1# Licensed under a 3-clause BSD style license - see LICENSE.rst
2
3from copy import deepcopy
4
5import numpy as np
6
7from astropy.table import groups, Table, QTable
8from astropy.time import Time, TimeDelta
9from astropy import units as u
10from astropy.units import Quantity
11
12from astropy.timeseries.core import BaseTimeSeries, autocheck_required_columns
13
14__all__ = ['BinnedTimeSeries']
15
16
17@autocheck_required_columns
18class BinnedTimeSeries(BaseTimeSeries):
19    """
20    A class to represent binned time series data in tabular form.
21
22    `~astropy.timeseries.BinnedTimeSeries` provides a class for
23    representing time series as a collection of values of different
24    quantities measured in time bins (for time series with values
25    sampled at specific times, see the `~astropy.timeseries.TimeSeries`
26    class). `~astropy.timeseries.BinnedTimeSeries` is a sub-class of
27    `~astropy.table.QTable` and thus provides all the standard table
28    maniplation methods available to tables, but it also provides
29    additional conveniences for dealing with time series, such as a
30    flexible initializer for setting up the times, and attributes to
31    access the start/center/end time of bins.
32
33    See also: https://docs.astropy.org/en/stable/timeseries/
34
35    Parameters
36    ----------
37    data : numpy ndarray, dict, list, table-like object, optional
38        Data to initialize time series. This does not need to contain the
39        times, which can be provided separately, but if it does contain the
40        times they should be in columns called ``'time_bin_start'`` and
41        ``'time_bin_size'`` to be automatically recognized.
42    time_bin_start : `~astropy.time.Time` or iterable
43        The times of the start of each bin - this can be either given
44        directly as a `~astropy.time.Time` array or as any iterable that
45        initializes the `~astropy.time.Time` class. If this is given, then
46        the remaining time-related arguments should not be used. This can also
47        be a scalar value if ``time_bin_size`` is provided.
48    time_bin_end : `~astropy.time.Time` or iterable
49        The times of the end of each bin - this can be either given directly
50        as a `~astropy.time.Time` array or as any value or iterable that
51        initializes the `~astropy.time.Time` class. If this is given, then the
52        remaining time-related arguments should not be used. This can only be
53        given if ``time_bin_start`` is an array of values. If ``time_bin_end``
54        is a scalar, time bins are assumed to be contiguous, such that the end
55        of each bin is the start of the next one, and ``time_bin_end`` gives
56        the end time for the last bin. If ``time_bin_end`` is an array, the
57        time bins do not need to be contiguous. If this argument is provided,
58        ``time_bin_size`` should not be provided.
59    time_bin_size : `~astropy.time.TimeDelta` or `~astropy.units.Quantity`
60        The size of the time bins, either as a scalar value (in which case all
61        time bins will be assumed to have the same duration) or as an array of
62        values (in which case each time bin can have a different duration).
63        If this argument is provided, ``time_bin_end`` should not be provided.
64    n_bins : int
65        The number of time bins for the series. This is only used if both
66        ``time_bin_start`` and ``time_bin_size`` are provided and are scalar
67        values.
68    **kwargs : dict, optional
69        Additional keyword arguments are passed to `~astropy.table.QTable`.
70    """
71
72    _required_columns = ['time_bin_start', 'time_bin_size']
73
74    def __init__(self, data=None, *, time_bin_start=None, time_bin_end=None,
75                 time_bin_size=None, n_bins=None, **kwargs):
76
77        super().__init__(data=data, **kwargs)
78
79        # For some operations, an empty time series needs to be created, then
80        # columns added one by one. We should check that when columns are added
81        # manually, time is added first and is of the right type.
82        if (data is None and time_bin_start is None and time_bin_end is None and
83                time_bin_size is None and n_bins is None):
84            self._required_columns_relax = True
85            return
86
87        # First if time_bin_start and time_bin_end have been given in the table data, we
88        # should extract them and treat them as if they had been passed as
89        # keyword arguments.
90
91        if 'time_bin_start' in self.colnames:
92            if time_bin_start is None:
93                time_bin_start = self.columns['time_bin_start']
94            else:
95                raise TypeError("'time_bin_start' has been given both in the table "
96                                "and as a keyword argument")
97
98        if 'time_bin_size' in self.colnames:
99            if time_bin_size is None:
100                time_bin_size = self.columns['time_bin_size']
101            else:
102                raise TypeError("'time_bin_size' has been given both in the table "
103                                "and as a keyword argument")
104
105        if time_bin_start is None:
106            raise TypeError("'time_bin_start' has not been specified")
107
108        if time_bin_end is None and time_bin_size is None:
109            raise TypeError("Either 'time_bin_size' or 'time_bin_end' should be specified")
110
111        if not isinstance(time_bin_start, (Time, TimeDelta)):
112            time_bin_start = Time(time_bin_start)
113
114        if time_bin_end is not None and not isinstance(time_bin_end, (Time, TimeDelta)):
115            time_bin_end = Time(time_bin_end)
116
117        if time_bin_size is not None and not isinstance(time_bin_size, (Quantity, TimeDelta)):
118            raise TypeError("'time_bin_size' should be a Quantity or a TimeDelta")
119
120        if isinstance(time_bin_size, TimeDelta):
121            time_bin_size = time_bin_size.sec * u.s
122
123        if n_bins is not None and time_bin_size is not None:
124            if not (time_bin_start.isscalar and time_bin_size.isscalar):
125                raise TypeError("'n_bins' cannot be specified if 'time_bin_start' or "
126                                "'time_bin_size' are not scalar'")
127
128        if time_bin_start.isscalar:
129
130            # We interpret this as meaning that this is the start of the
131            # first bin and that the bins are contiguous. In this case,
132            # we require time_bin_size to be specified.
133
134            if time_bin_size is None:
135                raise TypeError("'time_bin_start' is scalar, so 'time_bin_size' is required")
136
137            if time_bin_size.isscalar:
138                if data is not None:
139                    if n_bins is not None:
140                        if n_bins != len(self):
141                            raise TypeError("'n_bins' has been given and it is not the "
142                                            "same length as the input data.")
143                    else:
144                        n_bins = len(self)
145
146                time_bin_size = np.repeat(time_bin_size, n_bins)
147
148            time_delta = np.cumsum(time_bin_size)
149            time_bin_end = time_bin_start + time_delta
150
151            # Now shift the array so that the first entry is 0
152            time_delta = np.roll(time_delta, 1)
153            time_delta[0] = 0. * u.s
154
155            # Make time_bin_start into an array
156            time_bin_start = time_bin_start + time_delta
157
158        else:
159
160            if len(self.colnames) > 0 and len(time_bin_start) != len(self):
161                raise ValueError("Length of 'time_bin_start' ({}) should match "
162                                 "table length ({})".format(len(time_bin_start), len(self)))
163
164            if time_bin_end is not None:
165                if time_bin_end.isscalar:
166                    times = time_bin_start.copy()
167                    times[:-1] = times[1:]
168                    times[-1] = time_bin_end
169                    time_bin_end = times
170                time_bin_size = (time_bin_end - time_bin_start).sec * u.s
171
172        if time_bin_size.isscalar:
173            time_bin_size = np.repeat(time_bin_size, len(self))
174
175        with self._delay_required_column_checks():
176
177            if 'time_bin_start' in self.colnames:
178                self.remove_column('time_bin_start')
179
180            if 'time_bin_size' in self.colnames:
181                self.remove_column('time_bin_size')
182
183            self.add_column(time_bin_start, index=0, name='time_bin_start')
184            self.add_index('time_bin_start')
185            self.add_column(time_bin_size, index=1, name='time_bin_size')
186
187    @property
188    def time_bin_start(self):
189        """
190        The start times of all the time bins.
191        """
192        return self['time_bin_start']
193
194    @property
195    def time_bin_center(self):
196        """
197        The center times of all the time bins.
198        """
199        return self['time_bin_start'] + self['time_bin_size'] * 0.5
200
201    @property
202    def time_bin_end(self):
203        """
204        The end times of all the time bins.
205        """
206        return self['time_bin_start'] + self['time_bin_size']
207
208    @property
209    def time_bin_size(self):
210        """
211        The sizes of all the time bins.
212        """
213        return self['time_bin_size']
214
215    def __getitem__(self, item):
216        if self._is_list_or_tuple_of_str(item):
217            if 'time_bin_start' not in item or 'time_bin_size' not in item:
218                out = QTable([self[x] for x in item],
219                             meta=deepcopy(self.meta),
220                             copy_indices=self._copy_indices)
221                out._groups = groups.TableGroups(out, indices=self.groups._indices,
222                                                 keys=self.groups._keys)
223                return out
224        return super().__getitem__(item)
225
226    @classmethod
227    def read(self, filename, time_bin_start_column=None, time_bin_end_column=None,
228             time_bin_size_column=None, time_bin_size_unit=None, time_format=None, time_scale=None,
229             format=None, *args, **kwargs):
230        """
231        Read and parse a file and returns a `astropy.timeseries.BinnedTimeSeries`.
232
233        This method uses the unified I/O infrastructure in Astropy which makes
234        it easy to define readers/writers for various classes
235        (https://docs.astropy.org/en/stable/io/unified.html). By default, this
236        method will try and use readers defined specifically for the
237        `astropy.timeseries.BinnedTimeSeries` class - however, it is also
238        possible to use the ``format`` keyword to specify formats defined for
239        the `astropy.table.Table` class - in this case, you will need to also
240        provide the column names for column containing the start times for the
241        bins, as well as other column names (see the Parameters section below
242        for details)::
243
244            >>> from astropy.timeseries.binned import BinnedTimeSeries
245            >>> ts = BinnedTimeSeries.read('binned.dat', format='ascii.ecsv',
246            ...                            time_bin_start_column='date_start',
247            ...                            time_bin_end_column='date_end')  # doctest: +SKIP
248
249        Parameters
250        ----------
251        filename : str
252            File to parse.
253        format : str
254            File format specifier.
255        time_bin_start_column : str
256            The name of the column with the start time for each bin.
257        time_bin_end_column : str, optional
258            The name of the column with the end time for each bin. Either this
259            option or ``time_bin_size_column`` should be specified.
260        time_bin_size_column : str, optional
261            The name of the column with the size for each bin. Either this
262            option or ``time_bin_end_column`` should be specified.
263        time_bin_size_unit : `astropy.units.Unit`, optional
264            If ``time_bin_size_column`` is specified but does not have a unit
265            set in the table, you can specify the unit manually.
266        time_format : str, optional
267            The time format for the start and end columns.
268        time_scale : str, optional
269            The time scale for the start and end columns.
270        *args : tuple, optional
271            Positional arguments passed through to the data reader.
272        **kwargs : dict, optional
273            Keyword arguments passed through to the data reader.
274
275        Returns
276        -------
277        out : `astropy.timeseries.binned.BinnedTimeSeries`
278            BinnedTimeSeries corresponding to the file.
279
280        """
281
282        try:
283
284            # First we try the readers defined for the BinnedTimeSeries class
285            return super().read(filename, format=format, *args, **kwargs)
286
287        except TypeError:
288
289            # Otherwise we fall back to the default Table readers
290
291            if time_bin_start_column is None:
292                raise ValueError("``time_bin_start_column`` should be provided since the default Table readers are being used.")
293            if time_bin_end_column is None and time_bin_size_column is None:
294                raise ValueError("Either `time_bin_end_column` or `time_bin_size_column` should be provided.")
295            elif time_bin_end_column is not None and time_bin_size_column is not None:
296                raise ValueError("Cannot specify both `time_bin_end_column` and `time_bin_size_column`.")
297
298            table = Table.read(filename, format=format, *args, **kwargs)
299
300            if time_bin_start_column in table.colnames:
301                time_bin_start = Time(table.columns[time_bin_start_column],
302                                      scale=time_scale, format=time_format)
303                table.remove_column(time_bin_start_column)
304            else:
305                raise ValueError(f"Bin start time column '{time_bin_start_column}' not found in the input data.")
306
307            if time_bin_end_column is not None:
308
309                if time_bin_end_column in table.colnames:
310                    time_bin_end = Time(table.columns[time_bin_end_column],
311                                        scale=time_scale, format=time_format)
312                    table.remove_column(time_bin_end_column)
313                else:
314                    raise ValueError(f"Bin end time column '{time_bin_end_column}' not found in the input data.")
315
316                time_bin_size = None
317
318            elif time_bin_size_column is not None:
319
320                if time_bin_size_column in table.colnames:
321                    time_bin_size = table.columns[time_bin_size_column]
322                    table.remove_column(time_bin_size_column)
323                else:
324                    raise ValueError(f"Bin size column '{time_bin_size_column}' not found in the input data.")
325
326                if time_bin_size.unit is None:
327                    if time_bin_size_unit is None or not isinstance(time_bin_size_unit, u.UnitBase):
328                        raise ValueError("The bin size unit should be specified as an astropy Unit using ``time_bin_size_unit``.")
329                    time_bin_size = time_bin_size * time_bin_size_unit
330                else:
331                    time_bin_size = u.Quantity(time_bin_size)
332
333                time_bin_end = None
334
335            if time_bin_start.isscalar and time_bin_size.isscalar:
336                return BinnedTimeSeries(data=table,
337                                    time_bin_start=time_bin_start,
338                                    time_bin_end=time_bin_end,
339                                    time_bin_size=time_bin_size,
340                                    n_bins=len(table))
341            else:
342                return BinnedTimeSeries(data=table,
343                                    time_bin_start=time_bin_start,
344                                    time_bin_end=time_bin_end,
345                                    time_bin_size=time_bin_size)
346