1import warnings 2import pandas as pd 3import time 4import logging as lg 5 6from urbanaccess.utils import log 7from urbanaccess.gtfs.network import _time_selector 8 9warnings.simplefilter(action="ignore", category=FutureWarning) 10 11 12def _calc_headways_by_route_stop(df): 13 """ 14 Calculate headways by route stop 15 16 Parameters 17 ---------- 18 df : pandas.DataFrame 19 interpolated stop times dataframe for stop times within the time 20 range with appended trip and route information 21 22 Returns 23 ------- 24 dataframe : pandas.DataFrame 25 dataframe of statistics of route stop headways in units of minutes 26 """ 27 28 # TODO: Optimize for speed 29 30 start_time = time.time() 31 32 df['unique_stop_route'] = ( 33 df['unique_stop_id'].str.cat( 34 df['unique_route_id'].astype('str'), sep=',')) 35 36 stop_route_groups = df.groupby('unique_stop_route') 37 log('Starting route stop headway calculation for {:,} route ' 38 'stops...'.format(len(stop_route_groups))) 39 40 results = {} 41 42 # suppress RuntimeWarning: Mean of empty slice. for this code block 43 with warnings.catch_warnings(): 44 warnings.simplefilter("ignore", category='RuntimeWarning') 45 46 for unique_stop_route, stop_route_group in stop_route_groups: 47 stop_route_group.sort_values(['departure_time_sec_interpolate'], 48 ascending=True, inplace=True) 49 next_bus_time = (stop_route_group['departure_time_sec_interpolate'] 50 .iloc[1:].values) 51 prev_bus_time = (stop_route_group['departure_time_sec_interpolate'] 52 .iloc[:-1].values) 53 stop_route_group_headways = (next_bus_time - prev_bus_time) / 60 54 results[unique_stop_route] = (pd.Series(stop_route_group_headways) 55 .describe()) 56 57 log('Route stop headway calculation complete. Took {:,.2f} seconds'.format( 58 time.time() - start_time)) 59 60 return pd.DataFrame(results).T 61 62 63def _headway_handler(interpolated_stop_times_df, trips_df, 64 routes_df, headway_timerange): 65 """ 66 route stop headway calculator handler 67 68 Parameters 69 ---------- 70 interpolated_stop_times_df : pandas.DataFrame 71 interpolated stop times dataframe for stop times within the time range 72 trips_df : pandas.DataFrame 73 trips dataframe 74 routes_df : pandas.DataFrame 75 routes dataframe 76 headway_timerange : list 77 time range for which to calculate headways between as a 78 list of time 1 and time 2 where times are 24 hour clock strings 79 such as: 80 ['07:00:00', '10:00:00'] 81 82 Returns 83 ------- 84 headway_by_routestop_df : pandas.DataFrame 85 dataframe of statistics of route stop headways in units of minutes 86 with relevant route and stop information 87 """ 88 start_time = time.time() 89 90 # add unique trip and route id 91 trips_df['unique_trip_id'] = ( 92 trips_df['trip_id'].str.cat( 93 trips_df['unique_agency_id'].astype('str'), sep='_')) 94 trips_df['unique_route_id'] = ( 95 trips_df['route_id'].str.cat( 96 trips_df['unique_agency_id'].astype('str'), sep='_')) 97 98 columns = ['unique_route_id', 'service_id', 'unique_trip_id', 99 'unique_agency_id'] 100 # if these optional cols exist then keep those that do 101 optional_cols = ['direction_id', 'shape_id'] 102 for item in optional_cols: 103 if item in trips_df.columns: 104 columns.append(item) 105 106 trips_df = trips_df[columns] 107 108 # add unique route id 109 routes_df['unique_route_id'] = ( 110 routes_df['route_id'].str.cat( 111 routes_df['unique_agency_id'].astype('str'), sep='_')) 112 113 columns = ['unique_route_id', 'route_long_name', 'route_type', 114 'unique_agency_id'] 115 routes_df = routes_df[columns] 116 117 selected_interpolated_stop_times_df = _time_selector( 118 df=interpolated_stop_times_df, starttime=headway_timerange[0], 119 endtime=headway_timerange[1]) 120 121 tmp1 = pd.merge(trips_df, routes_df, how='left', left_on='unique_route_id', 122 right_on='unique_route_id', sort=False) 123 merge_df = pd.merge(selected_interpolated_stop_times_df, tmp1, how='left', 124 left_on='unique_trip_id', right_on='unique_trip_id', 125 sort=False) 126 cols_to_drop = ['unique_agency_id_y', 'unique_agency_id_x'] 127 merge_df.drop(cols_to_drop, axis=1, inplace=True) 128 129 headway_by_routestop_df = _calc_headways_by_route_stop(df=merge_df) 130 131 # add unique route stop node_id 132 headway_by_routestop_df = pd.merge( 133 headway_by_routestop_df, 134 merge_df[['unique_stop_route', 'unique_stop_id', 'unique_route_id']], 135 how='left', left_index=True, right_on='unique_stop_route', sort=False) 136 headway_by_routestop_df.drop('unique_stop_route', axis=1, inplace=True) 137 headway_by_routestop_df['node_id_route'] = ( 138 headway_by_routestop_df['unique_stop_id'].str.cat( 139 headway_by_routestop_df['unique_route_id'].astype('str'), sep='_')) 140 141 log('headway calculation complete. Took {:,.2f} seconds'.format( 142 time.time() - start_time)) 143 144 return headway_by_routestop_df 145 146 147def headways(gtfsfeeds_df, headway_timerange): 148 """ 149 Calculate headways by route stop for a specific time range 150 151 Parameters 152 ---------- 153 gtfsfeeds_df : object 154 gtfsfeeds_dfs object with all processed GTFS data tables 155 headway_timerange : list 156 time range for which to calculate headways between as a list of 157 time 1 and time 2 where times are 24 hour clock strings such as: 158 ['07:00:00', '10:00:00'] 159 160 Returns 161 ------- 162 gtfsfeeds_dfs.headways : pandas.DataFrame 163 gtfsfeeds_dfs object for the headways dataframe with statistics of 164 route stop headways in units of minutes 165 with relevant route and stop information 166 """ 167 168 time_error_statement = ( 169 '{} starttime and endtime are not in the correct format. ' 170 'Format should be a 24 hour clock in following format: 08:00:00 ' 171 'or 17:00:00'.format(headway_timerange)) 172 if not isinstance(headway_timerange, list) or len(headway_timerange) != 2: 173 raise ValueError('timerange must be a list of length 2') 174 if headway_timerange[0].split(':')[0] > headway_timerange[1].split(':')[0]: 175 raise ValueError('starttime is greater than endtime') 176 177 for t in headway_timerange: 178 if not isinstance(t, str): 179 raise ValueError(time_error_statement) 180 if len(t) != 8: 181 raise ValueError(time_error_statement) 182 if int(headway_timerange[1].split(':')[0]) - int( 183 headway_timerange[0].split(':')[0]) > 3: 184 long_time_range_msg = ( 185 'WARNING: Time range passed: {} is a {} hour period. Long periods ' 186 'over 3 hours may take a significant amount of time to process.') 187 log(long_time_range_msg.format(headway_timerange, 188 int(str( 189 headway_timerange[1][0:2])) - int( 190 str(headway_timerange[0][0:2]))), 191 level=lg.WARNING) 192 193 if gtfsfeeds_df is None: 194 raise ValueError('gtfsfeeds_df cannot be None') 195 if gtfsfeeds_df.stop_times_int.empty or gtfsfeeds_df.trips.empty or \ 196 gtfsfeeds_df.routes.empty: 197 raise ValueError( 198 'one of the gtfsfeeds_dfs objects: stop_times_int, trips, ' 199 'or routes were found to be empty.') 200 201 headways_df = _headway_handler( 202 interpolated_stop_times_df=gtfsfeeds_df.stop_times_int, 203 trips_df=gtfsfeeds_df.trips, 204 routes_df=gtfsfeeds_df.routes, 205 headway_timerange=headway_timerange) 206 207 gtfsfeeds_df.headways = headways_df 208 209 return gtfsfeeds_df 210