#107: TSA Data 2020 vs 2019

The TSA has started to publish the daily volume of passengers going through checkpoints on its website. The data set also includes the numbers from 2019 in order to measure the impact of travel as a result of COVID-19.

https://www.tsa.gov/coronavirus/passenger-throughput

from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
source = requests.get('https://www.tsa.gov/coronavirus/passenger-throughput').text
soup = BeautifulSoup(source, 'lxml')

row = soup.find_all("tr")

data = []
for i in row[1:]:
    string = i.get_text()
    a = string.split("\n")
    data.append(a)

print(data)

df = pd.DataFrame(data, columns=['Date','This Year','Last Year','Empty'])
print(df.dtypes)

df['This Year'] = df['This Year'].replace(',','', regex=True)
df['Last Year'] = df['Last Year'].replace(',','', regex=True)
df['This Year'] = pd.to_numeric(df['This Year'])
df['Last Year'] = pd.to_numeric(df['Last Year'])
df['Date']= pd.to_datetime(df['Date'])
df['Percent Change'] = df['This Year'] / df['Last Year'] -1

plt.clf()

sns.set_style("whitegrid")
sns.set_context("poster", font_scale = 1, rc={"grid.linewidth": 0.6})
f, ax = plt.subplots(figsize=(20,14))
# top chart
plt.subplot(2, 1, 1)
ax = plt.subplot(2, 1, 1)
plt.subplots_adjust(top=0.95, hspace=0.3) # adds padding in between both charts
sns.lineplot(data = df, x = 'Date', y= 'This Year')
sns.lineplot(data = df, x = 'Date', y= 'Last Year')

plt.ylabel('Number of Passengers')
plt.xlabel('Date')
plt.title('Passenger Throughput')
plt.legend(['This Year','Last Year'])
ax.set_yticklabels(['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])

#bottom chart
plt.subplot(2, 1, 2)
ax2 = plt.subplot(2, 1, 2)

sns.lineplot(data = df, x = 'Date', y= 'Percent Change')
plt.ylabel('')
plt.xlabel('Date')
plt.title('% Change 2020 vs 2019')
plt.legend(['% Change in Passengers'])
ax2.set_yticks(sorted([ round(i,1) for i in np.arange(-1.0,0.1,.1)],reverse=True))


ax2.set_yticklabels(['0%','-10%','-20%', '-30%', '-40%','-50%','-60%','-70%', '-80%', '-90%', "-100%"])
sns.set_context("poster", font_scale = 1, rc={"grid.linewidth": 0.6})

ymin = df['Percent Change'].min()
filt = df['Percent Change'] == df['Percent Change'].min()
xmin = df.loc[filt]['Date'].min()

yrecent = df.iloc[0,4]
xrecent = df.iloc[0,0]

#show the min value
ax2.annotate('Lowest Point ' + str("{:.1f}".format(ymin*100))+"%", xy=(xmin, ymin), xytext=(xmin, ymin+.2),
            arrowprops=dict(facecolor='black', shrink=0.05),
            horizontalalignment='center', verticalalignment='top'
            #arrowprops=dict(facecolor='black', shrink=0.05),
            )

#show the most recent value
ax2.annotate('Yesterday ' + str("{:.1f}".format(yrecent*100))+"%", xy=(xrecent, yrecent), xytext=(xrecent, yrecent+.2),
            arrowprops=dict(facecolor='black', shrink=0.05),
            horizontalalignment='right', verticalalignment='top'
            #arrowprops=dict(facecolor='black', shrink=0.05),
            )

plt.savefig('ex107.png')
plt.show()

Discover more from Tips and Hints for Aerospace Engineers

Subscribe now to keep reading and get access to the full archive.

Continue reading