1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #ifndef DATASET_H
21 #define DATASET_H
22 
23 #include "gretl_matrix.h"
24 #include "gretl_bundle.h"
25 
26 typedef enum {
27     NO_MARKERS = 0,
28     REGULAR_MARKERS,
29     DAILY_DATE_STRINGS
30 } DatasetMarkerType;
31 
32 typedef enum {
33     VAR_DISCRETE   = 1 << 0,
34     VAR_HIDDEN     = 1 << 1,
35     VAR_GENERATED  = 1 << 2,
36     VAR_LISTARG    = 1 << 3,
37     VAR_TIMECOL    = 1 << 4,
38     VAR_HFANCHOR   = 1 << 5,
39     VAR_CODED      = 1 << 6
40 } VarFlags;
41 
42 typedef enum {
43     DS_NONE,
44     DS_ADDOBS,
45     DS_COMPACT,
46     DS_EXPAND,
47     DS_TRANSPOSE,
48     DS_DELETE,
49     DS_KEEP,
50     DS_SORTBY,
51     DS_DSORTBY,
52     DS_RESAMPLE,
53     DS_RESTORE,
54     DS_CLEAR,
55     DS_RENUMBER,
56     DS_INSOBS,
57     DS_PAD_DAILY
58 } DatasetOp;
59 
60 typedef enum {
61     DS_COPY_VALUES,
62     DS_GRAB_VALUES
63 } DataCopyFlag;
64 
65 /**
66  * CompactMethod:
67  * @COMPACT_NONE:    no data compaction
68  * @COMPACT_SUM:     take sum of higher frequency data
69  * @COMPACT_AVG:     take mean of higher frequency data
70  * @COMPACT_SOP:     use start-of-period value
71  * @COMPACT_EOP:     use end-of-period value
72  * @COMPACT_WDAY:    use a specified day of the week
73  * @COMPACT_SPREAD:  spread out into multiple series
74  * @COMPACT_MAX:     sentinel value
75  *
76  * Symbolic codes for various methods of compacting data
77  * series (i.e. converting from a higher to a lower
78  * frequency). %COMPACT_WDAY is applicable only when
79  * converting from daily to weekly frequency.
80  */
81 
82 typedef enum {
83     COMPACT_NONE,
84     COMPACT_SUM,
85     COMPACT_AVG,
86     COMPACT_SOP,
87     COMPACT_EOP,
88     COMPACT_WDAY,
89     COMPACT_SPREAD,
90     COMPACT_MAX
91 } CompactMethod;
92 
93 typedef struct series_table_ series_table;
94 
95 /**
96  * dataset_is_cross_section:
97  * @p: pointer to data information struct.
98  *
99  * Attempt to determine whether a dataset contains cross-sectional
100  * data (1) or not (0).
101  */
102 #define dataset_is_cross_section(p) (p != NULL && p->structure == CROSS_SECTION)
103 
104 /**
105  * dataset_is_time_series:
106  * @p: pointer to data information struct.
107  *
108  * Attempt to determine whether a dataset contains time series
109  * data (1) or not (0).
110  */
111 #define dataset_is_time_series(p) (p != NULL && (p->structure == TIME_SERIES || \
112 						 p->structure == SPECIAL_TIME_SERIES))
113 
114 /**
115  * dataset_is_seasonal:
116  * @p: pointer to data information struct.
117  *
118  * Attempt to determine whether a dataset contains seasonal time series
119  * data (1) or not (0).
120  */
121 #define dataset_is_seasonal(p) (p != NULL && (p->structure == TIME_SERIES || \
122                                 p->structure == SPECIAL_TIME_SERIES) && \
123                                 p->pd > 1)
124 
125 /**
126  * custom_time_series:
127  * @p: pointer to data information struct.
128  *
129  * Attempt to determine whether a dataset contains time series
130  * data with custom (non-standard) frequency (1) or not (0).
131  */
132 #define custom_time_series(p) (p != NULL && p->structure == SPECIAL_TIME_SERIES)
133 
134 /**
135  * dataset_is_daily:
136  * @p: pointer to data information struct.
137  *
138  * Attempt to determine whether a dataset contains daily time series
139  * data (1) or not (0).
140  */
141 #define dataset_is_daily(p) (p != NULL && p->structure == TIME_SERIES \
142                              && (p->pd == 5 || p->pd == 6 || p->pd == 7))
143 
144 /**
145  * dataset_is_weekly:
146  * @p: pointer to data information struct.
147  *
148  * Attempt to determine whether a dataset contains weekly time series
149  * data (1) or not (0).
150  */
151 #define dataset_is_weekly(p) (p != NULL && p->structure == TIME_SERIES \
152                               && p->pd == 52)
153 
154 /**
155  * dataset_is_hourly:
156  * @p: pointer to data information struct.
157  *
158  * Attempt to determine whether a dataset contains hourly time series
159  * data (1) or not (0).
160  */
161 #define dataset_is_hourly(p) (p != NULL && p->structure == TIME_SERIES \
162                               && p->pd == 24)
163 
164 /**
165  * dataset_is_decennial:
166  * @p: pointer to data information struct.
167  *
168  * Attempt to determine whether a dataset contains decennial time series
169  * data (1) or not (0).
170  */
171 #define dataset_is_decennial(p) (p != NULL && p->structure == TIME_SERIES \
172                                  && p->pd == 10)
173 
174 /**
175  * dated_daily_data:
176  * @p: pointer to data information struct.
177  *
178  * Attempt to determine whether a dataset contains dated daily time series
179  * data (1) or not (0).
180  */
181 #define dated_daily_data(p) (p != NULL && p->structure == TIME_SERIES \
182                              && (p->pd == 5 || p->pd == 6 || p->pd == 7) \
183                              && p->sd0 > 10000.0)
184 
185 /**
186  * dated_seven_day_data:
187  * @p: pointer to data information struct.
188  *
189  * Attempt to determine whether a dataset contains dated daily
190  * (seven-day) time series data (1) or not (0).
191  */
192 #define dated_seven_day_data(p) (p != NULL && p->structure == TIME_SERIES \
193                                  && p->pd == 7 && \
194                                  p->sd0 > 10000.0)
195 
196 /**
197  * dated_weekly_data:
198  * @p: pointer to data information struct.
199  *
200  * Attempt to determine whether a dataset contains dated weekly
201  * time series data (1) or not (0).
202  */
203 #define dated_weekly_data(p) (p != NULL && p->structure == TIME_SERIES \
204                               && p->pd == 52 && \
205                               p->sd0 > 10000.0)
206 
207 /**
208  * calendar_data:
209  * @p: pointer to data information struct.
210  *
211  * Attempt to determine whether a dataset uses calendar
212  * dates for observation strings (1) or not (0).
213  */
214 #define calendar_data(p) (p != NULL && p->structure == TIME_SERIES && \
215                           (p->pd == 5 || p->pd == 6 || p->pd == 7 \
216                            || p->pd == 52) && strchr(p->stobs, '-'))
217 
218 /**
219  * quarterly_or_monthly:
220  * @p: pointer to data information struct.
221  *
222  * Attempt to determine whether a dataset is a quarterly
223  * or monthly time series (1), or something else (0).
224  */
225 #define quarterly_or_monthly(p) (p != NULL && p->structure == TIME_SERIES && \
226                                  (p->pd == 4 || p->pd == 12))
227 
228 /**
229  * annual_data:
230  * @p: pointer to data information struct.
231  *
232  * Attempt to determine whether a dataset is an annual
233  * time series (1), or something else (0).
234  */
235 #define annual_data(p) (p != NULL && p->structure == TIME_SERIES && \
236 			p->pd == 1)
237 
238 /**
239  * decennial_data:
240  * @p: pointer to data information struct.
241  *
242  * Attempt to determine whether a dataset is a decemmial
243  * time series (1), or something else (0).
244  */
245 #define decennial_data(p) (p != NULL && p->structure == TIME_SERIES && \
246 			   p->pd == 10 && p->sd0 > 1000)
247 
248 /**
249  * dataset_is_panel:
250  * @p: pointer to data information struct.
251  *
252  * Attempt to determine whether a dataset contains panel
253  * data (1) or not (0).
254  */
255 #define dataset_is_panel(p) (p != NULL && p->structure == STACKED_TIME_SERIES)
256 
257 /**
258  * dataset_is_seasonal_panel:
259  * @p: pointer to data information struct.
260  *
261  * Attempt to determine whether a dataset contains panel
262  * data with a seasonal time-series dimension (1) or not (0).
263  */
264 #define dataset_is_seasonal_panel(p) (p != NULL && \
265 				      p->structure == STACKED_TIME_SERIES && \
266 				      p->panel_pd > 1)
267 
268 /**
269  * dataset_has_markers:
270  * @p: pointer to data information struct.
271  *
272  * Determine whether a dataset has observation marker strings (1)
273  * or not (0).
274  */
275 #define dataset_has_markers(p) (p != NULL && p->markers && p->S != NULL)
276 
277 /**
278  * dataset_has_panel_time:
279  * @p: pointer to data information struct.
280  *
281  * Determine whether a panel dataset has information on its time
282  * dimension recorded (1) or not (0).
283  */
284 #define dataset_has_panel_time(p) (p != NULL && \
285 				   p->structure == STACKED_TIME_SERIES && \
286 				   p->panel_pd > 0 && p->panel_sd0 > 0.0)
287 
288 /**
289  * sample_size:
290  * @p: pointer to data information struct.
291  *
292  * Retrieves the length of the current sample range.
293  */
294 #define sample_size(p) ((p == NULL)? 0 : (p->t2 - p->t1 + 1))
295 
296 /**
297  * dset_get_data:
298  * @d: pointer to dataset.
299  * @i: index number of variable.
300  * @t: observation number.
301  *
302  * Gets the value of series @i at observation @t.
303  */
304 #define dset_get_data(d,i,t) (d->Z[i][t])
305 
306 /**
307  * dset_set_data:
308  * @d: pointer to dataset.
309  * @i: index number of variable.
310  * @t: observation number.
311  * @x: value to set.
312  *
313  * Sets the value of series @i at observation @t.
314  */
315 #define dset_set_data(d,i,t,x) (d->Z[i][t]=x)
316 
317 void free_Z (DATASET *dset);
318 
319 DATASET *datainfo_new (void);
320 
321 void datainfo_init (DATASET *dset);
322 
323 DATASET *create_new_dataset (int nvar,     /* number of variables */
324 			     int nobs,     /* observations per variable */
325 			     int markers   /* case markers or not? */
326 			     );
327 
328 DATASET *create_auxiliary_dataset (int nvar, int nobs, gretlopt opt);
329 
330 void destroy_dataset (DATASET *dset);
331 
332 void clear_datainfo (DATASET *dset, int code);
333 
334 int allocate_Z (DATASET *dset, gretlopt opt);
335 
336 int dataset_allocate_varnames (DATASET *dset);
337 
338 int dataset_allocate_obs_markers (DATASET *dset);
339 
340 void dataset_destroy_obs_markers (DATASET *dset);
341 
342 void dataset_obs_info_default (DATASET *dset);
343 
344 void copy_dataset_obs_info (DATASET *targ, const DATASET *src);
345 
346 void copy_varinfo (VARINFO *targ, const VARINFO *src);
347 
348 int shrink_varinfo (DATASET *dset, int nv);
349 
350 void set_sorted_markers (DATASET *dset, int v, char **S);
351 
352 void dataset_set_regular_markers (DATASET *dset);
353 
354 int start_new_Z (DATASET *dset, gretlopt opt);
355 
356 int is_trend_variable (const double *x, int n);
357 
358 int is_periodic_dummy (const double *x, const DATASET *dset);
359 
360 int dataset_add_observations (DATASET *dset, int n, gretlopt opt);
361 
362 int dataset_drop_observations (DATASET *dset, int n);
363 
364 int dataset_shrink_obs_range (DATASET *dset);
365 
366 int dataset_add_series (DATASET *dset, int newvars);
367 
368 int dataset_add_NA_series (DATASET *dset, int newvars);
369 
370 int dataset_add_allocated_series (DATASET *dset, double *x);
371 
372 int dataset_add_series_as (DATASET *dset, double *x, const char *name);
373 
374 int dataset_copy_series_as (DATASET *dset, int v, const char *name);
375 
376 int overwrite_err (const char *name);
377 
378 int series_is_parent (const DATASET *dset, int v);
379 
380 int dataset_replace_series (DATASET *dset, int v,
381 			    double *x, const char *descrip,
382 			    DataCopyFlag flag);
383 
384 int dataset_replace_series_data (DATASET *dset, int v,
385 				 const double *x,
386 				 int t1, int t2,
387 				 const char *descrip);
388 
389 int dataset_rename_series (DATASET *dset, int v, const char *name);
390 
391 int dataset_drop_listed_variables (int *list, DATASET *dset,
392 				   int *renumber, PRN *prn);
393 
394 int dataset_drop_variable (int v, DATASET *dset);
395 
396 int dataset_destroy_hidden_variables (DATASET *dset, int vmin);
397 
398 int dataset_drop_last_variables (DATASET *dset, int delvars);
399 
400 int dataset_renumber_variable (int v_old, int v_new,
401 			       DATASET *dset);
402 
403 int renumber_series_with_checks (const int *list,
404 				 const char *param,
405 				 int fixmax,
406 				 DATASET *dset,
407 				 PRN *prn);
408 
409 int maybe_prune_dataset (DATASET **pdset, gretl_string_table *st);
410 
411 int build_stacked_series (double **pstack, int *list,
412 			  int length, int offset,
413 			  DATASET *dset);
414 
415 int dataset_sort_by (DATASET *dset, const int *list, gretlopt opt);
416 
417 int dataset_set_matrix_name (DATASET *dset, const char *name);
418 
419 const char *dataset_get_matrix_name (const DATASET *dset);
420 
421 const char *dataset_period_label (const DATASET *dset);
422 
423 const char *dataset_get_mapfile (const DATASET *dset);
424 
425 void dataset_set_mapfile (DATASET *dset, const char *fname);
426 
427 int series_is_log (const DATASET *dset, int i, char *parent);
428 
429 void series_set_discrete (DATASET *dset, int i, int s);
430 
431 int series_record_display_name (DATASET *dset, int i,
432 				const char *s);
433 
434 int series_record_label (DATASET *dset, int i,
435 			 const char *s);
436 
437 const char *series_get_graph_name (const DATASET *dset, int i);
438 
439 unsigned int get_resampling_seed (void);
440 
441 int dataset_resample (DATASET *dset, int n, unsigned int seed);
442 
443 int dataset_op_from_string (const char *s);
444 
445 int modify_dataset (DATASET *dset, int op, const int *list,
446 		    const char *s, gretlopt opt, PRN *prn);
447 
448 int dataset_get_structure (const DATASET *dset);
449 
450 int panel_sample_size (const DATASET *dset);
451 
452 int multi_unit_panel_sample (const DATASET *dset);
453 
454 int dataset_purge_missing_rows (DATASET *dset);
455 
456 int check_dataset_is_changed (DATASET *dset);
457 
458 void set_dataset_is_changed (DATASET *dset, int s);
459 
460 void dataset_clear_sample_record (DATASET *dset);
461 
462 int dataset_set_time_series (DATASET *dset, int pd,
463 			     int yr0, int minor0);
464 
465 int series_is_discrete (const DATASET *dset, int i);
466 
467 int series_is_hidden (const DATASET *dset, int i);
468 
469 int series_is_generated (const DATASET *dset, int i);
470 
471 int series_is_listarg (const DATASET *dset, int i,
472 		       const char **lname);
473 
474 int series_is_coded (const DATASET *dset, int i);
475 
476 int series_is_integer_valued (const DATASET *dset, int i);
477 
478 VarFlags series_get_flags (const DATASET *dset, int i);
479 
480 void series_set_flag (DATASET *dset, int i, VarFlags flag);
481 
482 void series_unset_flag (DATASET *dset, int i, VarFlags flag);
483 
484 void series_zero_flags (DATASET *dset, int i);
485 
486 const char *series_get_label (const DATASET *dset, int i);
487 
488 const char *series_get_display_name (const DATASET *dset, int i);
489 
490 const char *series_get_parent_name (const DATASET *dset, int i);
491 
492 int series_get_parent_id (const DATASET *dset, int i);
493 
494 int series_get_compact_method (const DATASET *dset, int i);
495 
496 int series_get_stack_level (const DATASET *dset, int i);
497 
498 int series_get_transform (const DATASET *dset, int i);
499 
500 int series_get_lag (const DATASET *dset, int i);
501 
502 int series_get_string_width (const DATASET *dset, int i);
503 
504 void series_set_mtime (DATASET *dset, int i);
505 
506 gint64 series_get_mtime (const DATASET *dset, int i);
507 
508 void series_set_label (DATASET *dset, int i,
509 		       const char *s);
510 
511 void series_set_display_name (DATASET *dset, int i,
512 			      const char *s);
513 
514 void series_set_compact_method (DATASET *dset, int i,
515 				int method);
516 
517 void series_set_parent (DATASET *dset, int i,
518 			const char *parent);
519 
520 void series_set_transform (DATASET *dset, int i,
521 			   int transform);
522 
523 void series_delete_metadata (DATASET *dset, int i);
524 
525 void series_set_lag (DATASET *dset, int i, int lag);
526 
527 void series_set_stack_level (DATASET *dset, int i, int level);
528 
529 void series_increment_stack_level (DATASET *dset, int i);
530 
531 void series_decrement_stack_level (DATASET *dset, int i);
532 
533 void series_ensure_level_zero (DATASET *dset);
534 
535 void series_attach_string_table (DATASET *dset, int i,
536 				 series_table *st);
537 
538 void series_destroy_string_table (DATASET *dset, int i);
539 
540 int is_string_valued (const DATASET *dset, int i);
541 
542 series_table *series_get_string_table (const DATASET *dset, int i);
543 
544 const char *series_get_string_for_obs (const DATASET *dset, int i,
545 				       int t);
546 
547 const char *series_get_string_for_value (const DATASET *dset, int i,
548 					 double val);
549 
550 int series_set_string_val (DATASET *dset, int i, int t, const char *s);
551 
552 int string_series_assign_value (DATASET *dset, int i,
553 				int t, double x);
554 
555 int series_set_string_vals (DATASET *dset, int i, gretl_array *a);
556 
557 int series_set_string_vals_direct (DATASET *dset, int i,
558 				   char **S, int ns);
559 
560 int series_recode_strings (DATASET *dset, int v, gretlopt opt,
561 			   int *changed);
562 
563 double series_decode_string (const DATASET *dset, int i, const char *s);
564 
565 char **series_get_string_vals (const DATASET *dset, int i,
566 			       int *n_strs, int subsample);
567 
568 int steal_string_table (DATASET *l_dset, int lvar,
569 			DATASET *r_dset, int rvar);
570 
571 int merge_string_tables (DATASET *l_dset, int lvar,
572 			 DATASET *r_dset, int rvar);
573 
574 int set_panel_groups_name (DATASET *dset, const char *vname);
575 
576 const char *get_panel_group_name (const DATASET *dset, int obs);
577 
578 int panel_group_names_ok (const DATASET *dset, int maxlen);
579 
580 const char *panel_group_names_varname (const DATASET *dset);
581 
582 int is_panel_group_names_series (const DATASET *dset, int v);
583 
584 series_table *get_panel_group_table (const DATASET *dset,
585 				     int maxlen, int *pv);
586 
587 int is_dataset_series (const DATASET *dset, const double *x);
588 
589 int postprocess_daily_data (DATASET *dset, const int *list);
590 
591 int series_get_midas_period (const DATASET *dset, int i);
592 
593 void series_set_midas_period (const DATASET *dset, int i,
594 			      int period);
595 
596 int series_get_midas_freq (const DATASET *dset, int i);
597 
598 int series_set_midas_freq (const DATASET *dset, int i,
599 			   int freq);
600 
601 int series_is_midas_anchor (const DATASET *dset, int i);
602 
603 void series_set_midas_anchor (const DATASET *dset, int i);
604 
605 int series_get_orig_pd (const DATASET *dset, int i);
606 
607 void series_set_orig_pd (const DATASET *dset, int i, int pd);
608 
609 void series_unset_orig_pd (const DATASET *dset, int i);
610 
611 gretl_bundle *series_info_bundle (const DATASET *dset, int i,
612 				  int *err);
613 
614 gretl_matrix *list_info_matrix (const int *list,
615 				const DATASET *dset,
616 				gretlopt opt,
617 				int *err);
618 
619 gretl_bundle *get_current_map (const DATASET *dset,
620 			       const int *list,
621 			       int *err);
622 
623 #endif /* DATASET_H */
624