%%capture
import warnings
warnings.filterwarnings('ignore')
import altair as alt
import calitp_data_analysis.magics
import pandas as pd
from IPython.display import display, HTML, Markdown
import sys
sys.path.append("../../bus_service_increase")
sys.path.append("../")
# from shared_utils.portfolio_utils import label_visualization
from calitp_data_analysis import calitp_color_palette as cp
from update_vars import GCS_FILE_PATH, PUBLIC_FILENAME, YEAR, MONTH
from functools import cache
from calitp_data_analysis.gcs_pandas import GCSPandas
#from monthly_ridership_by_rtpa import sum_by_group
import _01_ntd_ridership_utils
@cache
def gcs_pandas():
return GCSPandas()
#alt.renderers.enable("html")
alt.data_transformers.enable('default', max_rows=None)# parameters cell for local
rtpa = "Merced County Association of Governments"# Parameters
rtpa = "Transportation Agency for Monterey County"
%%capture_parameters
rtpaTransportation Agency for Monterey County¶
Monthly Ridership Trends¶
Download data from our public folder by navigating to ntd_monthly_ridership and selecting a file.
Transit operators/agencies that are Urban full reporters, that submit monthly ridership data to NTD from 2018 to present, are included in this report.
Operators/agencies that do not appear in the report may be due to:
Were previously Urban full reporters, but are currently not
Non-monthly reporters (small system/rural/reduced reporters)
Has not reported data since 2018
Has reported “0” data since 2018
Examples:
Reporter A is an urban full reporter from 2019-2022, then became a reduced reporter for 2023. Reporter A’s ridership data will be displayed for 2019-2022 only.
Reporter B is an urban full reporter from 2000-2017, then became a reduced reporter for 2018. Reporter B will not display ridership data.
Reporter C was a reduced reporter form 2015-2020, then became an urban full reporter and began submitting monthly ridership data to NTD for 2021. Reporter C’s ridership data will be displayed for 2021-present.
URL = ("https://console.cloud.google.com/storage/"
"browser/calitp-publish-data-analysis"
)
display(
HTML(
f"""
<a href={URL}>
Download the latest month of data: {PUBLIC_FILENAME}</a>
"""
)
)
MIN_YEAR = 2018
#updated to filter for period_year greater than 2018
df = gcs_pandas().read_parquet(
f"{GCS_FILE_PATH}ca_monthly_ridership_{YEAR}_{MONTH}.parquet",
filters = [[("rtpa_name", "==", rtpa), ("period_year", ">=", MIN_YEAR)]]
).drop(
#updated columns names to match new df
columns = ["mode", "tos"]
).rename(columns = {"Mode_full": "Mode", "TOS_full": "TOS"})def remove_zero_upt_rows(df: pd.DataFrame) -> pd.DataFrame:
"""
takes the by_agency_long df, filters for rows with zero UPT AND zero change_1yr.
then removes these rows from the by_agency_long df.
resulting df should only contain rows with change in UPT not due to (0 UPT - 0 UPT) and net zero change in UPT
zero UPT and zero change_1yr occurs when (0 upt - 0upt = 0change), dont need to show this
but a net zero change (100upt - 100upt = 0 change), should be shown
"""
#df of rows with zero UPT for 2 years
zero_UPT_2years = df[(df["upt"] == 0) & (df["change_1yr"] == 0)]
merge = df.merge(zero_UPT_2years, how="left", indicator=True)
no_zero_UPT_rows = merge[merge["_merge"] == "left_only"].drop(columns=["_merge"]).dropna(subset="change_1yr")
return no_zero_UPT_rows
def group_by_agency(df):
"""
Take in the 'by_ageny_long' df and aggregatese by rtpa, and calculates upt % of total.
To be used in pie chart
"""
initial_agg = df.groupby("agency").agg(
total_upt=("upt","sum")
).reset_index()
# % total columns
initial_agg["pct_of_total_upt"] = (initial_agg["total_upt"]/initial_agg["total_upt"].sum())*100
# cleaning data types and rounding
initial_agg["total_upt"] = initial_agg["total_upt"].astype("int64")
initial_agg["pct_of_total_upt"] = initial_agg["pct_of_total_upt"].round(decimals=2)
cleaned_agg = initial_agg.sort_values(by="total_upt", ascending = False)
return cleaned_aggagency_cols = ["ntd_id", "agency", "rtpa_name"]
mode_cols = ["Mode", "rtpa_name"]
tos_cols = ["TOS", "rtpa_name"]
# by_agency_long = _01_ntd_ridership_utils.sum_by_group(df, agency_cols)
# by_mode_long = _01_ntd_ridership_utils.sum_by_group(df, mode_cols)
# by_tos_long = _01_ntd_ridership_utils.sum_by_group(df, tos_cols)
monthly_group_col_2 = [
'period_year',
'period_month',
'period_year_month']
monthly_agg_col = {
"upt":"sum",
"previous_y_m_upt":"sum",
"change_1yr":"sum"
}
monthly_change_col ="previous_y_m_upt"
by_agency_long = _01_ntd_ridership_utils.sum_by_group(
df = df,
group_cols= agency_cols,
group_col2= monthly_group_col_2,# look into combingin with base grou_cols
agg_cols = monthly_agg_col,
change_col= monthly_change_col
)
by_mode_long = _01_ntd_ridership_utils.sum_by_group(
df = df,
group_cols= mode_cols,
group_col2= monthly_group_col_2,# look into combingin with base grou_cols
agg_cols = monthly_agg_col,
change_col= monthly_change_col
)
by_tos_long = _01_ntd_ridership_utils.sum_by_group(
df = df,
group_cols= tos_cols,
group_col2= monthly_group_col_2,# look into combingin with base grou_cols
agg_cols = monthly_agg_col,
change_col= monthly_change_col
)
# remove zero UPT rows
by_agency_long_no_zero_upt = remove_zero_upt_rows(by_agency_long)
# agg by agency, for pie chart
agency_agg_yr = group_by_agency(by_agency_long)
# total UPT check
total_upt = by_agency_long["upt"].sum()
agency_count = by_agency_long["agency"].nunique()def label_visualization(word: str, labeling_dict: dict = {}) -> str:
"""
Supply a labeling dictionary where
keys are existing column names and
values are what's to be displayed on visualization.
If not in dict, replace underscores with spaces and Title Case.
"""
if word in labeling_dict.keys():
word = labeling_dict[word]
else:
word = word.replace("_", " ").title()
return word
LABELING_DICT = {
"upt": "Unlinked Passenger Trips",
"change_1yr": "Change in Unlinked Passenger Trips from Prior Year",
"TOS": "Type of Service",
"year_month": "Date"
}
def labeling(word: str) -> str:
return label_visualization(word, LABELING_DICT)
WIDTH = 325
HEIGHT = 150def make_line_chart(
df: pd.DataFrame,
y_col: str,
color_col: str,
) -> alt.Chart:
df = df[df[y_col] > 0].dropna(subset = y_col)
x_label = [i for i in df.period_year_month.unique() if
any(substring in i for substring in
["-01", "-06"])
]
chart = (alt.Chart(df)
.mark_line()
.encode(
x = alt.X("period_year_month:O",
axis=alt.Axis(values = x_label),
title = "Date"
),
y = alt.Y(y_col, title = labeling(y_col)),
color = alt.Color(color_col, title = "",
scale = alt.Scale(
range = cp.CALITP_CATEGORY_BRIGHT_COLORS +
cp.CALITP_CATEGORY_BOLD_COLORS
)),
tooltip = ["period_year_month", y_col, color_col, "rtpa_name"]
).properties(width = WIDTH, height = HEIGHT)
.facet(color_col, columns=2, title = "")
.resolve_scale(y="independent")
).properties(
title = f"{labeling(y_col)} by {labeling(color_col)}"
).interactive()
return chartdef make_bar_chart(
df: pd.DataFrame,
y_col: str,
color_col: str,
) -> alt.Chart:
def short_label(word):
shorten_dict = {
"change_1yr": "Change",
"pct_change_1yr":"Change",
}
return shorten_dict[word]
# For change column, we are missing everything prior to 2023
#df = df.dropna(subset = y_col)
#need flag for y_col >,<, 0, missing?
#count function to how many agencies fall in those categories, then look at those agencies
#present table
x_label = [i for i in df.period_year_month.unique() if
any(substring in i for substring in
["-01", "-03", "-06", "-09"])
]
chart = (alt.Chart(df)
.mark_bar()
.encode(
x = alt.X("period_year_month:O",
axis=alt.Axis(values = x_label),
title = "Date"
),
y = alt.Y(y_col, title = short_label(y_col)),
color = alt.Color(color_col, title = "",
scale = alt.Scale(
range = cp.CALITP_CATEGORY_BRIGHT_COLORS +
cp.CALITP_CATEGORY_BOLD_COLORS
)),
tooltip = ["period_year_month", y_col, color_col, "rtpa_name"]
).properties(width = WIDTH, height = HEIGHT)
.facet(color_col, columns=2, title = "")
.resolve_scale(x="shared",
y="independent")
).properties(
title = f"{labeling(y_col)} by {labeling(color_col)}"
).interactive()
return chartdef make_pie_chart(df,
col,
color_col):
pie = alt.Chart(df).mark_arc(radius = 150).encode(
theta=col,
color=color_col,
tooltip = ["agency","total_upt","pct_of_total_upt"]
).properties(
title=f"Total Unlinked Passenger Trips per Reporter in RTPA since {MIN_YEAR}",
)
return pie#simple bar chart for total agencies and UPT
def total_upt_chart(df: pd.DataFrame, x_col:str, y_col:str,tool_tip:list):
bar_chart = alt.Chart(df).mark_bar().encode(
x=alt.X(x_col).sort("-y"),
y=alt.Y(y_col),
tooltip = tool_tip,
color = alt.Color(x_col, title = "",
scale = alt.Scale(
range = cp.CALITP_CATEGORY_BRIGHT_COLORS +
cp.CALITP_CATEGORY_BOLD_COLORS
))
).properties(
title=f"Total Unlinked Passenger Trips per Reporter in RTPA since {MIN_YEAR}",
width = WIDTH,
height = HEIGHT,
).resolve_scale(y="independent").interactive()
return bar_chartReport Totals¶
Markdown(f"""
Within {rtpa}:
- Number of Reporters: <b>{agency_count}</b>.
- Total Unlinked Passenger Trips since {MIN_YEAR}: <b>{total_upt:,}</b>.
- Individual Reporters ridership breakdown:
""")display(agency_agg_yr.reset_index(drop=True))tooltip_list = ["agency","total_upt","pct_of_total_upt"]
total_upt_chart(
agency_agg_yr,
x_col="agency",
y_col="total_upt",
tool_tip=tooltip_list)Reporter¶
make_line_chart(by_agency_long, y_col = "upt", color_col = "agency")Change in Unlinked Passenger Trips from the prior year. For example, July 2023’s change would be the change in July 2023’s reported values against July 2022’s reported values.
make_bar_chart(by_agency_long_no_zero_upt, y_col = "change_1yr", color_col = "agency")Transit Mode¶
make_line_chart(by_mode_long, y_col = "upt", color_col = "Mode")make_bar_chart(by_mode_long, y_col = "change_1yr", color_col = "Mode")Type of Service¶
make_line_chart(by_tos_long, y_col = "upt", color_col = "TOS")make_bar_chart(by_tos_long, y_col = "change_1yr", color_col = "TOS")