#107: TSA Data 2020 vs 2019
The TSA has started to publish the daily volume of passengers going through checkpoints on its website. The data set also includes the numbers from 2019 in order to measure the impact of travel as a result of COVID-19.
https://www.tsa.gov/coronavirus/passenger-throughput
from bs4 import BeautifulSoup import requests import numpy as np import pandas as pd from matplotlib import pyplot as plt import seaborn as sns source = requests.get('https://www.tsa.gov/coronavirus/passenger-throughput').text soup = BeautifulSoup(source, 'lxml') row = soup.find_all("tr") data = [] for i in row[1:]: string = i.get_text() a = string.split("\n") data.append(a) print(data) df = pd.DataFrame(data, columns=['Date','This Year','Last Year','Empty']) print(df.dtypes) df['This Year'] = df['This Year'].replace(',','', regex=True) df['Last Year'] = df['Last Year'].replace(',','', regex=True) df['This Year'] = pd.to_numeric(df['This Year']) df['Last Year'] = pd.to_numeric(df['Last Year']) df['Date']= pd.to_datetime(df['Date']) df['Percent Change'] = df['This Year'] / df['Last Year'] -1 plt.clf() sns.set_style("whitegrid") sns.set_context("poster", font_scale = 1, rc={"grid.linewidth": 0.6}) f, ax = plt.subplots(figsize=(20,14)) # top chart plt.subplot(2, 1, 1) ax = plt.subplot(2, 1, 1) plt.subplots_adjust(top=0.95, hspace=0.3) # adds padding in between both charts sns.lineplot(data = df, x = 'Date', y= 'This Year') sns.lineplot(data = df, x = 'Date', y= 'Last Year') plt.ylabel('Number of Passengers') plt.xlabel('Date') plt.title('Passenger Throughput') plt.legend(['This Year','Last Year']) ax.set_yticklabels(['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()]) #bottom chart plt.subplot(2, 1, 2) ax2 = plt.subplot(2, 1, 2) sns.lineplot(data = df, x = 'Date', y= 'Percent Change') plt.ylabel('') plt.xlabel('Date') plt.title('% Change 2020 vs 2019') plt.legend(['% Change in Passengers']) ax2.set_yticks(sorted([ round(i,1) for i in np.arange(-1.0,0.1,.1)],reverse=True)) ax2.set_yticklabels(['0%','-10%','-20%', '-30%', '-40%','-50%','-60%','-70%', '-80%', '-90%', "-100%"]) sns.set_context("poster", font_scale = 1, rc={"grid.linewidth": 0.6}) ymin = df['Percent Change'].min() filt = df['Percent Change'] == df['Percent Change'].min() xmin = df.loc[filt]['Date'].min() yrecent = df.iloc[0,4] xrecent = df.iloc[0,0] #show the min value ax2.annotate('Lowest Point ' + str("{:.1f}".format(ymin*100))+"%", xy=(xmin, ymin), xytext=(xmin, ymin+.2), arrowprops=dict(facecolor='black', shrink=0.05), horizontalalignment='center', verticalalignment='top' #arrowprops=dict(facecolor='black', shrink=0.05), ) #show the most recent value ax2.annotate('Yesterday ' + str("{:.1f}".format(yrecent*100))+"%", xy=(xrecent, yrecent), xytext=(xrecent, yrecent+.2), arrowprops=dict(facecolor='black', shrink=0.05), horizontalalignment='right', verticalalignment='top' #arrowprops=dict(facecolor='black', shrink=0.05), ) plt.savefig('ex107.png') plt.show()
Recent Comments