#107: TSA Data 2020 vs 2019
The TSA has started to publish the daily volume of passengers going through checkpoints on its website. The data set also includes the numbers from 2019 in order to measure the impact of travel as a result of COVID-19.
https://www.tsa.gov/coronavirus/passenger-throughput
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
source = requests.get('https://www.tsa.gov/coronavirus/passenger-throughput').text
soup = BeautifulSoup(source, 'lxml')
row = soup.find_all("tr")
data = []
for i in row[1:]:
string = i.get_text()
a = string.split("\n")
data.append(a)
print(data)
df = pd.DataFrame(data, columns=['Date','This Year','Last Year','Empty'])
print(df.dtypes)
df['This Year'] = df['This Year'].replace(',','', regex=True)
df['Last Year'] = df['Last Year'].replace(',','', regex=True)
df['This Year'] = pd.to_numeric(df['This Year'])
df['Last Year'] = pd.to_numeric(df['Last Year'])
df['Date']= pd.to_datetime(df['Date'])
df['Percent Change'] = df['This Year'] / df['Last Year'] -1
plt.clf()
sns.set_style("whitegrid")
sns.set_context("poster", font_scale = 1, rc={"grid.linewidth": 0.6})
f, ax = plt.subplots(figsize=(20,14))
# top chart
plt.subplot(2, 1, 1)
ax = plt.subplot(2, 1, 1)
plt.subplots_adjust(top=0.95, hspace=0.3) # adds padding in between both charts
sns.lineplot(data = df, x = 'Date', y= 'This Year')
sns.lineplot(data = df, x = 'Date', y= 'Last Year')
plt.ylabel('Number of Passengers')
plt.xlabel('Date')
plt.title('Passenger Throughput')
plt.legend(['This Year','Last Year'])
ax.set_yticklabels(['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])
#bottom chart
plt.subplot(2, 1, 2)
ax2 = plt.subplot(2, 1, 2)
sns.lineplot(data = df, x = 'Date', y= 'Percent Change')
plt.ylabel('')
plt.xlabel('Date')
plt.title('% Change 2020 vs 2019')
plt.legend(['% Change in Passengers'])
ax2.set_yticks(sorted([ round(i,1) for i in np.arange(-1.0,0.1,.1)],reverse=True))
ax2.set_yticklabels(['0%','-10%','-20%', '-30%', '-40%','-50%','-60%','-70%', '-80%', '-90%', "-100%"])
sns.set_context("poster", font_scale = 1, rc={"grid.linewidth": 0.6})
ymin = df['Percent Change'].min()
filt = df['Percent Change'] == df['Percent Change'].min()
xmin = df.loc[filt]['Date'].min()
yrecent = df.iloc[0,4]
xrecent = df.iloc[0,0]
#show the min value
ax2.annotate('Lowest Point ' + str("{:.1f}".format(ymin*100))+"%", xy=(xmin, ymin), xytext=(xmin, ymin+.2),
arrowprops=dict(facecolor='black', shrink=0.05),
horizontalalignment='center', verticalalignment='top'
#arrowprops=dict(facecolor='black', shrink=0.05),
)
#show the most recent value
ax2.annotate('Yesterday ' + str("{:.1f}".format(yrecent*100))+"%", xy=(xrecent, yrecent), xytext=(xrecent, yrecent+.2),
arrowprops=dict(facecolor='black', shrink=0.05),
horizontalalignment='right', verticalalignment='top'
#arrowprops=dict(facecolor='black', shrink=0.05),
)
plt.savefig('ex107.png')
plt.show()
Last Updated on 2020-09-28 by gantovnik

Recent Comments