1import warnings
2import pandas as pd
3import time
4import logging as lg
5
6from urbanaccess.utils import log
7from urbanaccess.gtfs.network import _time_selector
8
9warnings.simplefilter(action="ignore", category=FutureWarning)
10
11
12def _calc_headways_by_route_stop(df):
13    """
14    Calculate headways by route stop
15
16    Parameters
17    ----------
18    df : pandas.DataFrame
19        interpolated stop times dataframe for stop times within the time
20        range with appended trip and route information
21
22    Returns
23    -------
24    dataframe : pandas.DataFrame
25        dataframe of statistics of route stop headways in units of minutes
26    """
27
28    # TODO: Optimize for speed
29
30    start_time = time.time()
31
32    df['unique_stop_route'] = (
33        df['unique_stop_id'].str.cat(
34            df['unique_route_id'].astype('str'), sep=','))
35
36    stop_route_groups = df.groupby('unique_stop_route')
37    log('Starting route stop headway calculation for {:,} route '
38        'stops...'.format(len(stop_route_groups)))
39
40    results = {}
41
42    # suppress RuntimeWarning: Mean of empty slice. for this code block
43    with warnings.catch_warnings():
44        warnings.simplefilter("ignore", category='RuntimeWarning')
45
46        for unique_stop_route, stop_route_group in stop_route_groups:
47            stop_route_group.sort_values(['departure_time_sec_interpolate'],
48                                         ascending=True, inplace=True)
49            next_bus_time = (stop_route_group['departure_time_sec_interpolate']
50                             .iloc[1:].values)
51            prev_bus_time = (stop_route_group['departure_time_sec_interpolate']
52                             .iloc[:-1].values)
53            stop_route_group_headways = (next_bus_time - prev_bus_time) / 60
54            results[unique_stop_route] = (pd.Series(stop_route_group_headways)
55                                          .describe())
56
57    log('Route stop headway calculation complete. Took {:,.2f} seconds'.format(
58        time.time() - start_time))
59
60    return pd.DataFrame(results).T
61
62
63def _headway_handler(interpolated_stop_times_df, trips_df,
64                     routes_df, headway_timerange):
65    """
66    route stop headway calculator handler
67
68    Parameters
69    ----------
70    interpolated_stop_times_df : pandas.DataFrame
71        interpolated stop times dataframe for stop times within the time range
72    trips_df : pandas.DataFrame
73        trips dataframe
74    routes_df : pandas.DataFrame
75        routes dataframe
76    headway_timerange : list
77        time range for which to calculate headways between as a
78        list of time 1 and time 2 where times are 24 hour clock strings
79        such as:
80        ['07:00:00', '10:00:00']
81
82    Returns
83    -------
84    headway_by_routestop_df : pandas.DataFrame
85        dataframe of statistics of route stop headways in units of minutes
86        with relevant route and stop information
87    """
88    start_time = time.time()
89
90    # add unique trip and route id
91    trips_df['unique_trip_id'] = (
92        trips_df['trip_id'].str.cat(
93            trips_df['unique_agency_id'].astype('str'), sep='_'))
94    trips_df['unique_route_id'] = (
95        trips_df['route_id'].str.cat(
96            trips_df['unique_agency_id'].astype('str'), sep='_'))
97
98    columns = ['unique_route_id', 'service_id', 'unique_trip_id',
99               'unique_agency_id']
100    # if these optional cols exist then keep those that do
101    optional_cols = ['direction_id', 'shape_id']
102    for item in optional_cols:
103        if item in trips_df.columns:
104            columns.append(item)
105
106    trips_df = trips_df[columns]
107
108    # add unique route id
109    routes_df['unique_route_id'] = (
110        routes_df['route_id'].str.cat(
111            routes_df['unique_agency_id'].astype('str'), sep='_'))
112
113    columns = ['unique_route_id', 'route_long_name', 'route_type',
114               'unique_agency_id']
115    routes_df = routes_df[columns]
116
117    selected_interpolated_stop_times_df = _time_selector(
118        df=interpolated_stop_times_df, starttime=headway_timerange[0],
119        endtime=headway_timerange[1])
120
121    tmp1 = pd.merge(trips_df, routes_df, how='left', left_on='unique_route_id',
122                    right_on='unique_route_id', sort=False)
123    merge_df = pd.merge(selected_interpolated_stop_times_df, tmp1, how='left',
124                        left_on='unique_trip_id', right_on='unique_trip_id',
125                        sort=False)
126    cols_to_drop = ['unique_agency_id_y', 'unique_agency_id_x']
127    merge_df.drop(cols_to_drop, axis=1, inplace=True)
128
129    headway_by_routestop_df = _calc_headways_by_route_stop(df=merge_df)
130
131    # add unique route stop node_id
132    headway_by_routestop_df = pd.merge(
133        headway_by_routestop_df,
134        merge_df[['unique_stop_route', 'unique_stop_id', 'unique_route_id']],
135        how='left', left_index=True, right_on='unique_stop_route', sort=False)
136    headway_by_routestop_df.drop('unique_stop_route', axis=1, inplace=True)
137    headway_by_routestop_df['node_id_route'] = (
138        headway_by_routestop_df['unique_stop_id'].str.cat(
139            headway_by_routestop_df['unique_route_id'].astype('str'), sep='_'))
140
141    log('headway calculation complete. Took {:,.2f} seconds'.format(
142        time.time() - start_time))
143
144    return headway_by_routestop_df
145
146
147def headways(gtfsfeeds_df, headway_timerange):
148    """
149    Calculate headways by route stop for a specific time range
150
151    Parameters
152    ----------
153    gtfsfeeds_df : object
154        gtfsfeeds_dfs object with all processed GTFS data tables
155    headway_timerange : list
156        time range for which to calculate headways between as a list of
157        time 1 and time 2 where times are 24 hour clock strings such as:
158        ['07:00:00', '10:00:00']
159
160    Returns
161    -------
162    gtfsfeeds_dfs.headways : pandas.DataFrame
163        gtfsfeeds_dfs object for the headways dataframe with statistics of
164        route stop headways in units of minutes
165        with relevant route and stop information
166    """
167
168    time_error_statement = (
169        '{} starttime and endtime are not in the correct format. '
170        'Format should be a 24 hour clock in following format: 08:00:00 '
171        'or 17:00:00'.format(headway_timerange))
172    if not isinstance(headway_timerange, list) or len(headway_timerange) != 2:
173        raise ValueError('timerange must be a list of length 2')
174    if headway_timerange[0].split(':')[0] > headway_timerange[1].split(':')[0]:
175        raise ValueError('starttime is greater than endtime')
176
177    for t in headway_timerange:
178        if not isinstance(t, str):
179            raise ValueError(time_error_statement)
180        if len(t) != 8:
181            raise ValueError(time_error_statement)
182    if int(headway_timerange[1].split(':')[0]) - int(
183            headway_timerange[0].split(':')[0]) > 3:
184        long_time_range_msg = (
185            'WARNING: Time range passed: {} is a {} hour period. Long periods '
186            'over 3 hours may take a significant amount of time to process.')
187        log(long_time_range_msg.format(headway_timerange,
188                                       int(str(
189                                           headway_timerange[1][0:2])) - int(
190                                           str(headway_timerange[0][0:2]))),
191            level=lg.WARNING)
192
193    if gtfsfeeds_df is None:
194        raise ValueError('gtfsfeeds_df cannot be None')
195    if gtfsfeeds_df.stop_times_int.empty or gtfsfeeds_df.trips.empty or \
196            gtfsfeeds_df.routes.empty:
197        raise ValueError(
198            'one of the gtfsfeeds_dfs objects: stop_times_int, trips, '
199            'or routes were found to be empty.')
200
201    headways_df = _headway_handler(
202        interpolated_stop_times_df=gtfsfeeds_df.stop_times_int,
203        trips_df=gtfsfeeds_df.trips,
204        routes_df=gtfsfeeds_df.routes,
205        headway_timerange=headway_timerange)
206
207    gtfsfeeds_df.headways = headways_df
208
209    return gtfsfeeds_df
210