1 /* 2 * gretl -- Gnu Regression, Econometrics and Time-series Library 3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 #ifndef DATASET_H 21 #define DATASET_H 22 23 #include "gretl_matrix.h" 24 #include "gretl_bundle.h" 25 26 typedef enum { 27 NO_MARKERS = 0, 28 REGULAR_MARKERS, 29 DAILY_DATE_STRINGS 30 } DatasetMarkerType; 31 32 typedef enum { 33 VAR_DISCRETE = 1 << 0, 34 VAR_HIDDEN = 1 << 1, 35 VAR_GENERATED = 1 << 2, 36 VAR_LISTARG = 1 << 3, 37 VAR_TIMECOL = 1 << 4, 38 VAR_HFANCHOR = 1 << 5, 39 VAR_CODED = 1 << 6 40 } VarFlags; 41 42 typedef enum { 43 DS_NONE, 44 DS_ADDOBS, 45 DS_COMPACT, 46 DS_EXPAND, 47 DS_TRANSPOSE, 48 DS_DELETE, 49 DS_KEEP, 50 DS_SORTBY, 51 DS_DSORTBY, 52 DS_RESAMPLE, 53 DS_RESTORE, 54 DS_CLEAR, 55 DS_RENUMBER, 56 DS_INSOBS, 57 DS_PAD_DAILY 58 } DatasetOp; 59 60 typedef enum { 61 DS_COPY_VALUES, 62 DS_GRAB_VALUES 63 } DataCopyFlag; 64 65 /** 66 * CompactMethod: 67 * @COMPACT_NONE: no data compaction 68 * @COMPACT_SUM: take sum of higher frequency data 69 * @COMPACT_AVG: take mean of higher frequency data 70 * @COMPACT_SOP: use start-of-period value 71 * @COMPACT_EOP: use end-of-period value 72 * @COMPACT_WDAY: use a specified day of the week 73 * @COMPACT_SPREAD: spread out into multiple series 74 * @COMPACT_MAX: sentinel value 75 * 76 * Symbolic codes for various methods of compacting data 77 * series (i.e. converting from a higher to a lower 78 * frequency). %COMPACT_WDAY is applicable only when 79 * converting from daily to weekly frequency. 80 */ 81 82 typedef enum { 83 COMPACT_NONE, 84 COMPACT_SUM, 85 COMPACT_AVG, 86 COMPACT_SOP, 87 COMPACT_EOP, 88 COMPACT_WDAY, 89 COMPACT_SPREAD, 90 COMPACT_MAX 91 } CompactMethod; 92 93 typedef struct series_table_ series_table; 94 95 /** 96 * dataset_is_cross_section: 97 * @p: pointer to data information struct. 98 * 99 * Attempt to determine whether a dataset contains cross-sectional 100 * data (1) or not (0). 101 */ 102 #define dataset_is_cross_section(p) (p != NULL && p->structure == CROSS_SECTION) 103 104 /** 105 * dataset_is_time_series: 106 * @p: pointer to data information struct. 107 * 108 * Attempt to determine whether a dataset contains time series 109 * data (1) or not (0). 110 */ 111 #define dataset_is_time_series(p) (p != NULL && (p->structure == TIME_SERIES || \ 112 p->structure == SPECIAL_TIME_SERIES)) 113 114 /** 115 * dataset_is_seasonal: 116 * @p: pointer to data information struct. 117 * 118 * Attempt to determine whether a dataset contains seasonal time series 119 * data (1) or not (0). 120 */ 121 #define dataset_is_seasonal(p) (p != NULL && (p->structure == TIME_SERIES || \ 122 p->structure == SPECIAL_TIME_SERIES) && \ 123 p->pd > 1) 124 125 /** 126 * custom_time_series: 127 * @p: pointer to data information struct. 128 * 129 * Attempt to determine whether a dataset contains time series 130 * data with custom (non-standard) frequency (1) or not (0). 131 */ 132 #define custom_time_series(p) (p != NULL && p->structure == SPECIAL_TIME_SERIES) 133 134 /** 135 * dataset_is_daily: 136 * @p: pointer to data information struct. 137 * 138 * Attempt to determine whether a dataset contains daily time series 139 * data (1) or not (0). 140 */ 141 #define dataset_is_daily(p) (p != NULL && p->structure == TIME_SERIES \ 142 && (p->pd == 5 || p->pd == 6 || p->pd == 7)) 143 144 /** 145 * dataset_is_weekly: 146 * @p: pointer to data information struct. 147 * 148 * Attempt to determine whether a dataset contains weekly time series 149 * data (1) or not (0). 150 */ 151 #define dataset_is_weekly(p) (p != NULL && p->structure == TIME_SERIES \ 152 && p->pd == 52) 153 154 /** 155 * dataset_is_hourly: 156 * @p: pointer to data information struct. 157 * 158 * Attempt to determine whether a dataset contains hourly time series 159 * data (1) or not (0). 160 */ 161 #define dataset_is_hourly(p) (p != NULL && p->structure == TIME_SERIES \ 162 && p->pd == 24) 163 164 /** 165 * dataset_is_decennial: 166 * @p: pointer to data information struct. 167 * 168 * Attempt to determine whether a dataset contains decennial time series 169 * data (1) or not (0). 170 */ 171 #define dataset_is_decennial(p) (p != NULL && p->structure == TIME_SERIES \ 172 && p->pd == 10) 173 174 /** 175 * dated_daily_data: 176 * @p: pointer to data information struct. 177 * 178 * Attempt to determine whether a dataset contains dated daily time series 179 * data (1) or not (0). 180 */ 181 #define dated_daily_data(p) (p != NULL && p->structure == TIME_SERIES \ 182 && (p->pd == 5 || p->pd == 6 || p->pd == 7) \ 183 && p->sd0 > 10000.0) 184 185 /** 186 * dated_seven_day_data: 187 * @p: pointer to data information struct. 188 * 189 * Attempt to determine whether a dataset contains dated daily 190 * (seven-day) time series data (1) or not (0). 191 */ 192 #define dated_seven_day_data(p) (p != NULL && p->structure == TIME_SERIES \ 193 && p->pd == 7 && \ 194 p->sd0 > 10000.0) 195 196 /** 197 * dated_weekly_data: 198 * @p: pointer to data information struct. 199 * 200 * Attempt to determine whether a dataset contains dated weekly 201 * time series data (1) or not (0). 202 */ 203 #define dated_weekly_data(p) (p != NULL && p->structure == TIME_SERIES \ 204 && p->pd == 52 && \ 205 p->sd0 > 10000.0) 206 207 /** 208 * calendar_data: 209 * @p: pointer to data information struct. 210 * 211 * Attempt to determine whether a dataset uses calendar 212 * dates for observation strings (1) or not (0). 213 */ 214 #define calendar_data(p) (p != NULL && p->structure == TIME_SERIES && \ 215 (p->pd == 5 || p->pd == 6 || p->pd == 7 \ 216 || p->pd == 52) && strchr(p->stobs, '-')) 217 218 /** 219 * quarterly_or_monthly: 220 * @p: pointer to data information struct. 221 * 222 * Attempt to determine whether a dataset is a quarterly 223 * or monthly time series (1), or something else (0). 224 */ 225 #define quarterly_or_monthly(p) (p != NULL && p->structure == TIME_SERIES && \ 226 (p->pd == 4 || p->pd == 12)) 227 228 /** 229 * annual_data: 230 * @p: pointer to data information struct. 231 * 232 * Attempt to determine whether a dataset is an annual 233 * time series (1), or something else (0). 234 */ 235 #define annual_data(p) (p != NULL && p->structure == TIME_SERIES && \ 236 p->pd == 1) 237 238 /** 239 * decennial_data: 240 * @p: pointer to data information struct. 241 * 242 * Attempt to determine whether a dataset is a decemmial 243 * time series (1), or something else (0). 244 */ 245 #define decennial_data(p) (p != NULL && p->structure == TIME_SERIES && \ 246 p->pd == 10 && p->sd0 > 1000) 247 248 /** 249 * dataset_is_panel: 250 * @p: pointer to data information struct. 251 * 252 * Attempt to determine whether a dataset contains panel 253 * data (1) or not (0). 254 */ 255 #define dataset_is_panel(p) (p != NULL && p->structure == STACKED_TIME_SERIES) 256 257 /** 258 * dataset_is_seasonal_panel: 259 * @p: pointer to data information struct. 260 * 261 * Attempt to determine whether a dataset contains panel 262 * data with a seasonal time-series dimension (1) or not (0). 263 */ 264 #define dataset_is_seasonal_panel(p) (p != NULL && \ 265 p->structure == STACKED_TIME_SERIES && \ 266 p->panel_pd > 1) 267 268 /** 269 * dataset_has_markers: 270 * @p: pointer to data information struct. 271 * 272 * Determine whether a dataset has observation marker strings (1) 273 * or not (0). 274 */ 275 #define dataset_has_markers(p) (p != NULL && p->markers && p->S != NULL) 276 277 /** 278 * dataset_has_panel_time: 279 * @p: pointer to data information struct. 280 * 281 * Determine whether a panel dataset has information on its time 282 * dimension recorded (1) or not (0). 283 */ 284 #define dataset_has_panel_time(p) (p != NULL && \ 285 p->structure == STACKED_TIME_SERIES && \ 286 p->panel_pd > 0 && p->panel_sd0 > 0.0) 287 288 /** 289 * sample_size: 290 * @p: pointer to data information struct. 291 * 292 * Retrieves the length of the current sample range. 293 */ 294 #define sample_size(p) ((p == NULL)? 0 : (p->t2 - p->t1 + 1)) 295 296 /** 297 * dset_get_data: 298 * @d: pointer to dataset. 299 * @i: index number of variable. 300 * @t: observation number. 301 * 302 * Gets the value of series @i at observation @t. 303 */ 304 #define dset_get_data(d,i,t) (d->Z[i][t]) 305 306 /** 307 * dset_set_data: 308 * @d: pointer to dataset. 309 * @i: index number of variable. 310 * @t: observation number. 311 * @x: value to set. 312 * 313 * Sets the value of series @i at observation @t. 314 */ 315 #define dset_set_data(d,i,t,x) (d->Z[i][t]=x) 316 317 void free_Z (DATASET *dset); 318 319 DATASET *datainfo_new (void); 320 321 void datainfo_init (DATASET *dset); 322 323 DATASET *create_new_dataset (int nvar, /* number of variables */ 324 int nobs, /* observations per variable */ 325 int markers /* case markers or not? */ 326 ); 327 328 DATASET *create_auxiliary_dataset (int nvar, int nobs, gretlopt opt); 329 330 void destroy_dataset (DATASET *dset); 331 332 void clear_datainfo (DATASET *dset, int code); 333 334 int allocate_Z (DATASET *dset, gretlopt opt); 335 336 int dataset_allocate_varnames (DATASET *dset); 337 338 int dataset_allocate_obs_markers (DATASET *dset); 339 340 void dataset_destroy_obs_markers (DATASET *dset); 341 342 void dataset_obs_info_default (DATASET *dset); 343 344 void copy_dataset_obs_info (DATASET *targ, const DATASET *src); 345 346 void copy_varinfo (VARINFO *targ, const VARINFO *src); 347 348 int shrink_varinfo (DATASET *dset, int nv); 349 350 void set_sorted_markers (DATASET *dset, int v, char **S); 351 352 void dataset_set_regular_markers (DATASET *dset); 353 354 int start_new_Z (DATASET *dset, gretlopt opt); 355 356 int is_trend_variable (const double *x, int n); 357 358 int is_periodic_dummy (const double *x, const DATASET *dset); 359 360 int dataset_add_observations (DATASET *dset, int n, gretlopt opt); 361 362 int dataset_drop_observations (DATASET *dset, int n); 363 364 int dataset_shrink_obs_range (DATASET *dset); 365 366 int dataset_add_series (DATASET *dset, int newvars); 367 368 int dataset_add_NA_series (DATASET *dset, int newvars); 369 370 int dataset_add_allocated_series (DATASET *dset, double *x); 371 372 int dataset_add_series_as (DATASET *dset, double *x, const char *name); 373 374 int dataset_copy_series_as (DATASET *dset, int v, const char *name); 375 376 int overwrite_err (const char *name); 377 378 int series_is_parent (const DATASET *dset, int v); 379 380 int dataset_replace_series (DATASET *dset, int v, 381 double *x, const char *descrip, 382 DataCopyFlag flag); 383 384 int dataset_replace_series_data (DATASET *dset, int v, 385 const double *x, 386 int t1, int t2, 387 const char *descrip); 388 389 int dataset_rename_series (DATASET *dset, int v, const char *name); 390 391 int dataset_drop_listed_variables (int *list, DATASET *dset, 392 int *renumber, PRN *prn); 393 394 int dataset_drop_variable (int v, DATASET *dset); 395 396 int dataset_destroy_hidden_variables (DATASET *dset, int vmin); 397 398 int dataset_drop_last_variables (DATASET *dset, int delvars); 399 400 int dataset_renumber_variable (int v_old, int v_new, 401 DATASET *dset); 402 403 int renumber_series_with_checks (const int *list, 404 const char *param, 405 int fixmax, 406 DATASET *dset, 407 PRN *prn); 408 409 int maybe_prune_dataset (DATASET **pdset, gretl_string_table *st); 410 411 int build_stacked_series (double **pstack, int *list, 412 int length, int offset, 413 DATASET *dset); 414 415 int dataset_sort_by (DATASET *dset, const int *list, gretlopt opt); 416 417 int dataset_set_matrix_name (DATASET *dset, const char *name); 418 419 const char *dataset_get_matrix_name (const DATASET *dset); 420 421 const char *dataset_period_label (const DATASET *dset); 422 423 const char *dataset_get_mapfile (const DATASET *dset); 424 425 void dataset_set_mapfile (DATASET *dset, const char *fname); 426 427 int series_is_log (const DATASET *dset, int i, char *parent); 428 429 void series_set_discrete (DATASET *dset, int i, int s); 430 431 int series_record_display_name (DATASET *dset, int i, 432 const char *s); 433 434 int series_record_label (DATASET *dset, int i, 435 const char *s); 436 437 const char *series_get_graph_name (const DATASET *dset, int i); 438 439 unsigned int get_resampling_seed (void); 440 441 int dataset_resample (DATASET *dset, int n, unsigned int seed); 442 443 int dataset_op_from_string (const char *s); 444 445 int modify_dataset (DATASET *dset, int op, const int *list, 446 const char *s, gretlopt opt, PRN *prn); 447 448 int dataset_get_structure (const DATASET *dset); 449 450 int panel_sample_size (const DATASET *dset); 451 452 int multi_unit_panel_sample (const DATASET *dset); 453 454 int dataset_purge_missing_rows (DATASET *dset); 455 456 int check_dataset_is_changed (DATASET *dset); 457 458 void set_dataset_is_changed (DATASET *dset, int s); 459 460 void dataset_clear_sample_record (DATASET *dset); 461 462 int dataset_set_time_series (DATASET *dset, int pd, 463 int yr0, int minor0); 464 465 int series_is_discrete (const DATASET *dset, int i); 466 467 int series_is_hidden (const DATASET *dset, int i); 468 469 int series_is_generated (const DATASET *dset, int i); 470 471 int series_is_listarg (const DATASET *dset, int i, 472 const char **lname); 473 474 int series_is_coded (const DATASET *dset, int i); 475 476 int series_is_integer_valued (const DATASET *dset, int i); 477 478 VarFlags series_get_flags (const DATASET *dset, int i); 479 480 void series_set_flag (DATASET *dset, int i, VarFlags flag); 481 482 void series_unset_flag (DATASET *dset, int i, VarFlags flag); 483 484 void series_zero_flags (DATASET *dset, int i); 485 486 const char *series_get_label (const DATASET *dset, int i); 487 488 const char *series_get_display_name (const DATASET *dset, int i); 489 490 const char *series_get_parent_name (const DATASET *dset, int i); 491 492 int series_get_parent_id (const DATASET *dset, int i); 493 494 int series_get_compact_method (const DATASET *dset, int i); 495 496 int series_get_stack_level (const DATASET *dset, int i); 497 498 int series_get_transform (const DATASET *dset, int i); 499 500 int series_get_lag (const DATASET *dset, int i); 501 502 int series_get_string_width (const DATASET *dset, int i); 503 504 void series_set_mtime (DATASET *dset, int i); 505 506 gint64 series_get_mtime (const DATASET *dset, int i); 507 508 void series_set_label (DATASET *dset, int i, 509 const char *s); 510 511 void series_set_display_name (DATASET *dset, int i, 512 const char *s); 513 514 void series_set_compact_method (DATASET *dset, int i, 515 int method); 516 517 void series_set_parent (DATASET *dset, int i, 518 const char *parent); 519 520 void series_set_transform (DATASET *dset, int i, 521 int transform); 522 523 void series_delete_metadata (DATASET *dset, int i); 524 525 void series_set_lag (DATASET *dset, int i, int lag); 526 527 void series_set_stack_level (DATASET *dset, int i, int level); 528 529 void series_increment_stack_level (DATASET *dset, int i); 530 531 void series_decrement_stack_level (DATASET *dset, int i); 532 533 void series_ensure_level_zero (DATASET *dset); 534 535 void series_attach_string_table (DATASET *dset, int i, 536 series_table *st); 537 538 void series_destroy_string_table (DATASET *dset, int i); 539 540 int is_string_valued (const DATASET *dset, int i); 541 542 series_table *series_get_string_table (const DATASET *dset, int i); 543 544 const char *series_get_string_for_obs (const DATASET *dset, int i, 545 int t); 546 547 const char *series_get_string_for_value (const DATASET *dset, int i, 548 double val); 549 550 int series_set_string_val (DATASET *dset, int i, int t, const char *s); 551 552 int string_series_assign_value (DATASET *dset, int i, 553 int t, double x); 554 555 int series_set_string_vals (DATASET *dset, int i, gretl_array *a); 556 557 int series_set_string_vals_direct (DATASET *dset, int i, 558 char **S, int ns); 559 560 int series_recode_strings (DATASET *dset, int v, gretlopt opt, 561 int *changed); 562 563 double series_decode_string (const DATASET *dset, int i, const char *s); 564 565 char **series_get_string_vals (const DATASET *dset, int i, 566 int *n_strs, int subsample); 567 568 int steal_string_table (DATASET *l_dset, int lvar, 569 DATASET *r_dset, int rvar); 570 571 int merge_string_tables (DATASET *l_dset, int lvar, 572 DATASET *r_dset, int rvar); 573 574 int set_panel_groups_name (DATASET *dset, const char *vname); 575 576 const char *get_panel_group_name (const DATASET *dset, int obs); 577 578 int panel_group_names_ok (const DATASET *dset, int maxlen); 579 580 const char *panel_group_names_varname (const DATASET *dset); 581 582 int is_panel_group_names_series (const DATASET *dset, int v); 583 584 series_table *get_panel_group_table (const DATASET *dset, 585 int maxlen, int *pv); 586 587 int is_dataset_series (const DATASET *dset, const double *x); 588 589 int postprocess_daily_data (DATASET *dset, const int *list); 590 591 int series_get_midas_period (const DATASET *dset, int i); 592 593 void series_set_midas_period (const DATASET *dset, int i, 594 int period); 595 596 int series_get_midas_freq (const DATASET *dset, int i); 597 598 int series_set_midas_freq (const DATASET *dset, int i, 599 int freq); 600 601 int series_is_midas_anchor (const DATASET *dset, int i); 602 603 void series_set_midas_anchor (const DATASET *dset, int i); 604 605 int series_get_orig_pd (const DATASET *dset, int i); 606 607 void series_set_orig_pd (const DATASET *dset, int i, int pd); 608 609 void series_unset_orig_pd (const DATASET *dset, int i); 610 611 gretl_bundle *series_info_bundle (const DATASET *dset, int i, 612 int *err); 613 614 gretl_matrix *list_info_matrix (const int *list, 615 const DATASET *dset, 616 gretlopt opt, 617 int *err); 618 619 gretl_bundle *get_current_map (const DATASET *dset, 620 const int *list, 621 int *err); 622 623 #endif /* DATASET_H */ 624