1/* PSPP - a program for statistical analysis. 2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2016 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation, either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17/* FIXME: 18 19 - How to calculate significance of some symmetric and directional measures? 20 - How to calculate ASE for symmetric Somers ' d? 21 - How to calculate ASE for Goodman and Kruskal's tau? 22 - How to calculate approx. T of symmetric uncertainty coefficient? 23 24*/ 25 26#include <config.h> 27 28#include <ctype.h> 29#include <float.h> 30#include <gsl/gsl_cdf.h> 31#include <stdlib.h> 32#include <stdio.h> 33 34#include "data/case.h" 35#include "data/casegrouper.h" 36#include "data/casereader.h" 37#include "data/data-out.h" 38#include "data/dataset.h" 39#include "data/dictionary.h" 40#include "data/format.h" 41#include "data/value-labels.h" 42#include "data/variable.h" 43#include "language/command.h" 44#include "language/stats/freq.h" 45#include "language/dictionary/split-file.h" 46#include "language/lexer/lexer.h" 47#include "language/lexer/variable-parser.h" 48#include "libpspp/array.h" 49#include "libpspp/assertion.h" 50#include "libpspp/compiler.h" 51#include "libpspp/hash-functions.h" 52#include "libpspp/hmap.h" 53#include "libpspp/hmapx.h" 54#include "libpspp/message.h" 55#include "libpspp/misc.h" 56#include "libpspp/pool.h" 57#include "libpspp/str.h" 58#include "output/pivot-table.h" 59#include "output/chart-item.h" 60#include "output/charts/barchart.h" 61 62#include "gl/minmax.h" 63#include "gl/xalloc.h" 64#include "gl/xsize.h" 65 66#include "gettext.h" 67#define _(msgid) gettext (msgid) 68#define N_(msgid) msgid 69 70/* (headers) */ 71 72/* (specification) 73 crosstabs (crs_): 74 *^tables=custom; 75 +variables=custom; 76 missing=miss:!table/include/report; 77 count=roundwhat:asis/case/!cell, 78 roundhow:!round/truncate; 79 +write[wr_]=none,cells,all; 80 +format=val:!avalue/dvalue, 81 indx:!noindex/index, 82 tabl:!tables/notables, 83 box:!box/nobox, 84 pivot:!pivot/nopivot; 85 +barchart=; 86 +cells[cl_]=count,expected,row,column,total,residual,sresidual, 87 asresidual,all,none; 88 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d, 89 kappa,eta,corr,all. 90*/ 91/* (declarations) */ 92/* (functions) */ 93 94/* Number of chi-square statistics. */ 95#define N_CHISQ 5 96 97/* Number of symmetric statistics. */ 98#define N_SYMMETRIC 9 99 100/* Number of directional statistics. */ 101#define N_DIRECTIONAL 13 102 103 104/* Indexes into the 'vars' member of struct crosstabulation and 105 struct crosstab member. */ 106enum 107 { 108 ROW_VAR = 0, /* Row variable. */ 109 COL_VAR = 1 /* Column variable. */ 110 /* Higher indexes cause multiple tables to be output. */ 111 }; 112 113struct xtab_var 114 { 115 const struct variable *var; 116 union value *values; 117 size_t n_values; 118 }; 119 120/* A crosstabulation of 2 or more variables. */ 121struct crosstabulation 122 { 123 struct crosstabs_proc *proc; 124 struct fmt_spec weight_format; /* Format for weight variable. */ 125 double missing; /* Weight of missing cases. */ 126 127 /* Variables (2 or more). */ 128 int n_vars; 129 struct xtab_var *vars; 130 131 /* Constants (0 or more). */ 132 int n_consts; 133 struct xtab_var *const_vars; 134 size_t *const_indexes; 135 136 /* Data. */ 137 struct hmap data; 138 struct freq **entries; 139 size_t n_entries; 140 141 /* Number of statistically interesting columns/rows 142 (columns/rows with data in them). */ 143 int ns_cols, ns_rows; 144 145 /* Matrix contents. */ 146 double *mat; /* Matrix proper. */ 147 double *row_tot; /* Row totals. */ 148 double *col_tot; /* Column totals. */ 149 double total; /* Grand total. */ 150 }; 151 152/* Integer mode variable info. */ 153struct var_range 154 { 155 struct hmap_node hmap_node; /* In struct crosstabs_proc var_ranges map. */ 156 const struct variable *var; /* The variable. */ 157 int min; /* Minimum value. */ 158 int max; /* Maximum value + 1. */ 159 int count; /* max - min. */ 160 }; 161 162struct crosstabs_proc 163 { 164 const struct dictionary *dict; 165 enum { INTEGER, GENERAL } mode; 166 enum mv_class exclude; 167 bool pivot; 168 bool barchart; 169 bool bad_warn; 170 struct fmt_spec weight_format; 171 172 /* Variables specifies on VARIABLES. */ 173 const struct variable **variables; 174 size_t n_variables; 175 struct hmap var_ranges; 176 177 /* TABLES. */ 178 struct crosstabulation *pivots; 179 int n_pivots; 180 181 /* CELLS. */ 182 int n_cells; /* Number of cells requested. */ 183 unsigned int cells; /* Bit k is 1 if cell k is requested. */ 184 int a_cells[CRS_CL_count]; /* 0...n_cells-1 are the requested cells. */ 185 186 /* Rounding of cells. */ 187 bool round_case_weights; /* Round case weights? */ 188 bool round_cells; /* If !round_case_weights, round cells? */ 189 bool round_down; /* Round down? (otherwise to nearest) */ 190 191 /* STATISTICS. */ 192 unsigned int statistics; /* Bit k is 1 if statistic k is requested. */ 193 194 bool descending; /* True if descending sort order is requested. */ 195 }; 196 197const struct var_range *get_var_range (const struct crosstabs_proc *, 198 const struct variable *); 199 200static bool should_tabulate_case (const struct crosstabulation *, 201 const struct ccase *, enum mv_class exclude); 202static void tabulate_general_case (struct crosstabulation *, const struct ccase *, 203 double weight); 204static void tabulate_integer_case (struct crosstabulation *, const struct ccase *, 205 double weight); 206static void postcalc (struct crosstabs_proc *); 207 208static double 209round_weight (const struct crosstabs_proc *proc, double weight) 210{ 211 return proc->round_down ? floor (weight) : floor (weight + 0.5); 212} 213 214#define FOR_EACH_POPULATED_COLUMN(C, XT) \ 215 for (int C = next_populated_column (0, XT); \ 216 C < (XT)->vars[COL_VAR].n_values; \ 217 C = next_populated_column (C + 1, XT)) 218static int 219next_populated_column (int c, const struct crosstabulation *xt) 220{ 221 int n_columns = xt->vars[COL_VAR].n_values; 222 for (; c < n_columns; c++) 223 if (xt->col_tot[c]) 224 break; 225 return c; 226} 227 228#define FOR_EACH_POPULATED_ROW(R, XT) \ 229 for (int R = next_populated_row (0, XT); R < (XT)->vars[ROW_VAR].n_values; \ 230 R = next_populated_row (R + 1, XT)) 231static int 232next_populated_row (int r, const struct crosstabulation *xt) 233{ 234 int n_rows = xt->vars[ROW_VAR].n_values; 235 for (; r < n_rows; r++) 236 if (xt->row_tot[r]) 237 break; 238 return r; 239} 240 241/* Parses and executes the CROSSTABS procedure. */ 242int 243cmd_crosstabs (struct lexer *lexer, struct dataset *ds) 244{ 245 struct var_range *range, *next_range; 246 struct crosstabs_proc proc; 247 struct casegrouper *grouper; 248 struct casereader *input, *group; 249 struct cmd_crosstabs cmd; 250 struct crosstabulation *xt; 251 int result; 252 bool ok; 253 int i; 254 255 proc.dict = dataset_dict (ds); 256 proc.bad_warn = true; 257 proc.variables = NULL; 258 proc.n_variables = 0; 259 hmap_init (&proc.var_ranges); 260 proc.pivots = NULL; 261 proc.n_pivots = 0; 262 proc.descending = false; 263 proc.weight_format = *dict_get_weight_format (dataset_dict (ds)); 264 265 if (!parse_crosstabs (lexer, ds, &cmd, &proc)) 266 { 267 result = CMD_FAILURE; 268 goto exit; 269 } 270 271 proc.mode = proc.n_variables ? INTEGER : GENERAL; 272 proc.barchart = cmd.sbc_barchart > 0; 273 274 proc.descending = cmd.val == CRS_DVALUE; 275 276 proc.round_case_weights = cmd.sbc_count && cmd.roundwhat == CRS_CASE; 277 proc.round_cells = cmd.sbc_count && cmd.roundwhat == CRS_CELL; 278 proc.round_down = cmd.roundhow == CRS_TRUNCATE; 279 280 /* CELLS. */ 281 if (!cmd.sbc_cells) 282 proc.cells = 1u << CRS_CL_COUNT; 283 else if (cmd.a_cells[CRS_CL_ALL]) 284 proc.cells = UINT_MAX; 285 else 286 { 287 proc.cells = 0; 288 for (i = 0; i < CRS_CL_count; i++) 289 if (cmd.a_cells[i]) 290 proc.cells |= 1u << i; 291 if (proc.cells == 0) 292 proc.cells = ((1u << CRS_CL_COUNT) 293 | (1u << CRS_CL_ROW) 294 | (1u << CRS_CL_COLUMN) 295 | (1u << CRS_CL_TOTAL)); 296 } 297 proc.cells &= ((1u << CRS_CL_count) - 1); 298 proc.cells &= ~((1u << CRS_CL_NONE) | (1u << CRS_CL_ALL)); 299 proc.n_cells = 0; 300 for (i = 0; i < CRS_CL_count; i++) 301 if (proc.cells & (1u << i)) 302 proc.a_cells[proc.n_cells++] = i; 303 304 /* STATISTICS. */ 305 if (cmd.a_statistics[CRS_ST_ALL]) 306 proc.statistics = UINT_MAX; 307 else if (cmd.sbc_statistics) 308 { 309 int i; 310 311 proc.statistics = 0; 312 for (i = 0; i < CRS_ST_count; i++) 313 if (cmd.a_statistics[i]) 314 proc.statistics |= 1u << i; 315 if (proc.statistics == 0) 316 proc.statistics |= 1u << CRS_ST_CHISQ; 317 } 318 else 319 proc.statistics = 0; 320 321 /* MISSING. */ 322 proc.exclude = (cmd.miss == CRS_TABLE ? MV_ANY 323 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM 324 : MV_NEVER); 325 if (proc.mode == GENERAL && proc.exclude == MV_NEVER) 326 { 327 msg (SE, _("Missing mode %s not allowed in general mode. " 328 "Assuming %s."), "REPORT", "MISSING=TABLE"); 329 proc.exclude = MV_ANY; 330 } 331 332 /* PIVOT. */ 333 proc.pivot = cmd.pivot == CRS_PIVOT; 334 335 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds), 336 NULL, NULL); 337 grouper = casegrouper_create_splits (input, dataset_dict (ds)); 338 while (casegrouper_get_next_group (grouper, &group)) 339 { 340 struct ccase *c; 341 342 /* Output SPLIT FILE variables. */ 343 c = casereader_peek (group, 0); 344 if (c != NULL) 345 { 346 output_split_file_values (ds, c); 347 case_unref (c); 348 } 349 350 /* Initialize hash tables. */ 351 for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++) 352 hmap_init (&xt->data); 353 354 /* Tabulate. */ 355 for (; (c = casereader_read (group)) != NULL; case_unref (c)) 356 for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++) 357 { 358 double weight = dict_get_case_weight (dataset_dict (ds), c, 359 &proc.bad_warn); 360 if (cmd.roundwhat == CRS_CASE) 361 { 362 weight = round_weight (&proc, weight); 363 if (weight == 0.) 364 continue; 365 } 366 if (should_tabulate_case (xt, c, proc.exclude)) 367 { 368 if (proc.mode == GENERAL) 369 tabulate_general_case (xt, c, weight); 370 else 371 tabulate_integer_case (xt, c, weight); 372 } 373 else 374 xt->missing += weight; 375 } 376 casereader_destroy (group); 377 378 /* Output. */ 379 postcalc (&proc); 380 } 381 ok = casegrouper_destroy (grouper); 382 ok = proc_commit (ds) && ok; 383 384 result = ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; 385 386exit: 387 free (proc.variables); 388 HMAP_FOR_EACH_SAFE (range, next_range, struct var_range, hmap_node, 389 &proc.var_ranges) 390 { 391 hmap_delete (&proc.var_ranges, &range->hmap_node); 392 free (range); 393 } 394 for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++) 395 { 396 free (xt->vars); 397 free (xt->const_vars); 398 free (xt->const_indexes); 399 } 400 free (proc.pivots); 401 402 return result; 403} 404 405/* Parses the TABLES subcommand. */ 406static int 407crs_custom_tables (struct lexer *lexer, struct dataset *ds, 408 struct cmd_crosstabs *cmd UNUSED, void *proc_) 409{ 410 struct crosstabs_proc *proc = proc_; 411 struct const_var_set *var_set; 412 int n_by; 413 const struct variable ***by = NULL; 414 int *by_iter; 415 size_t *by_nvar = NULL; 416 size_t nx = 1; 417 bool ok = false; 418 int i; 419 420 /* Ensure that this is a TABLES subcommand. */ 421 if (!lex_match_id (lexer, "TABLES") 422 && (lex_token (lexer) != T_ID || 423 dict_lookup_var (dataset_dict (ds), lex_tokcstr (lexer)) == NULL) 424 && lex_token (lexer) != T_ALL) 425 return 2; 426 lex_match (lexer, T_EQUALS); 427 428 if (proc->variables != NULL) 429 var_set = const_var_set_create_from_array (proc->variables, 430 proc->n_variables); 431 else 432 var_set = const_var_set_create_from_dict (dataset_dict (ds)); 433 assert (var_set != NULL); 434 435 for (n_by = 0; ;) 436 { 437 by = xnrealloc (by, n_by + 1, sizeof *by); 438 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar); 439 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by], 440 PV_NO_DUPLICATE | PV_NO_SCRATCH)) 441 goto done; 442 if (xalloc_oversized (nx, by_nvar[n_by])) 443 { 444 msg (SE, _("Too many cross-tabulation variables or dimensions.")); 445 goto done; 446 } 447 nx *= by_nvar[n_by]; 448 n_by++; 449 450 if (!lex_match (lexer, T_BY)) 451 { 452 if (n_by < 2) 453 goto done; 454 else 455 break; 456 } 457 } 458 459 by_iter = xcalloc (n_by, sizeof *by_iter); 460 proc->pivots = xnrealloc (proc->pivots, 461 proc->n_pivots + nx, sizeof *proc->pivots); 462 for (i = 0; i < nx; i++) 463 { 464 struct crosstabulation *xt = &proc->pivots[proc->n_pivots++]; 465 int j; 466 467 xt->proc = proc; 468 xt->weight_format = proc->weight_format; 469 xt->missing = 0.; 470 xt->n_vars = n_by; 471 xt->vars = xcalloc (n_by, sizeof *xt->vars); 472 xt->n_consts = 0; 473 xt->const_vars = NULL; 474 xt->const_indexes = NULL; 475 476 for (j = 0; j < n_by; j++) 477 xt->vars[j].var = by[j][by_iter[j]]; 478 479 for (j = n_by - 1; j >= 0; j--) 480 { 481 if (++by_iter[j] < by_nvar[j]) 482 break; 483 by_iter[j] = 0; 484 } 485 } 486 free (by_iter); 487 ok = true; 488 489done: 490 /* All return paths lead here. */ 491 for (i = 0; i < n_by; i++) 492 free (by[i]); 493 free (by); 494 free (by_nvar); 495 496 const_var_set_destroy (var_set); 497 498 return ok; 499} 500 501/* Parses the VARIABLES subcommand. */ 502static int 503crs_custom_variables (struct lexer *lexer, struct dataset *ds, 504 struct cmd_crosstabs *cmd UNUSED, void *proc_) 505{ 506 struct crosstabs_proc *proc = proc_; 507 if (proc->n_pivots) 508 { 509 msg (SE, _("%s must be specified before %s."), "VARIABLES", "TABLES"); 510 return 0; 511 } 512 513 lex_match (lexer, T_EQUALS); 514 515 for (;;) 516 { 517 size_t orig_nv = proc->n_variables; 518 size_t i; 519 520 long min, max; 521 522 if (!parse_variables_const (lexer, dataset_dict (ds), 523 &proc->variables, &proc->n_variables, 524 (PV_APPEND | PV_NUMERIC 525 | PV_NO_DUPLICATE | PV_NO_SCRATCH))) 526 return 0; 527 528 if (!lex_force_match (lexer, T_LPAREN)) 529 goto lossage; 530 531 if (!lex_force_int (lexer)) 532 goto lossage; 533 min = lex_integer (lexer); 534 lex_get (lexer); 535 536 lex_match (lexer, T_COMMA); 537 538 if (!lex_force_int (lexer)) 539 goto lossage; 540 max = lex_integer (lexer); 541 if (max < min) 542 { 543 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."), 544 max, min); 545 goto lossage; 546 } 547 lex_get (lexer); 548 549 if (!lex_force_match (lexer, T_RPAREN)) 550 goto lossage; 551 552 for (i = orig_nv; i < proc->n_variables; i++) 553 { 554 const struct variable *var = proc->variables[i]; 555 struct var_range *vr = xmalloc (sizeof *vr); 556 557 vr->var = var; 558 vr->min = min; 559 vr->max = max; 560 vr->count = max - min + 1; 561 hmap_insert (&proc->var_ranges, &vr->hmap_node, 562 hash_pointer (var, 0)); 563 } 564 565 if (lex_token (lexer) == T_SLASH) 566 break; 567 } 568 569 return 1; 570 571 lossage: 572 free (proc->variables); 573 proc->variables = NULL; 574 proc->n_variables = 0; 575 return 0; 576} 577 578/* Data file processing. */ 579 580const struct var_range * 581get_var_range (const struct crosstabs_proc *proc, const struct variable *var) 582{ 583 if (!hmap_is_empty (&proc->var_ranges)) 584 { 585 const struct var_range *range; 586 587 HMAP_FOR_EACH_IN_BUCKET (range, struct var_range, hmap_node, 588 hash_pointer (var, 0), &proc->var_ranges) 589 if (range->var == var) 590 return range; 591 } 592 593 return NULL; 594} 595 596static bool 597should_tabulate_case (const struct crosstabulation *xt, const struct ccase *c, 598 enum mv_class exclude) 599{ 600 int j; 601 for (j = 0; j < xt->n_vars; j++) 602 { 603 const struct variable *var = xt->vars[j].var; 604 const struct var_range *range = get_var_range (xt->proc, var); 605 606 if (var_is_value_missing (var, case_data (c, var), exclude)) 607 return false; 608 609 if (range != NULL) 610 { 611 double num = case_num (c, var); 612 if (num < range->min || num >= range->max + 1.) 613 return false; 614 } 615 } 616 return true; 617} 618 619static void 620tabulate_integer_case (struct crosstabulation *xt, const struct ccase *c, 621 double weight) 622{ 623 struct freq *te; 624 size_t hash; 625 int j; 626 627 hash = 0; 628 for (j = 0; j < xt->n_vars; j++) 629 { 630 /* Throw away fractional parts of values. */ 631 hash = hash_int (case_num (c, xt->vars[j].var), hash); 632 } 633 634 HMAP_FOR_EACH_WITH_HASH (te, struct freq, node, hash, &xt->data) 635 { 636 for (j = 0; j < xt->n_vars; j++) 637 if ((int) case_num (c, xt->vars[j].var) != (int) te->values[j].f) 638 goto no_match; 639 640 /* Found an existing entry. */ 641 te->count += weight; 642 return; 643 644 no_match: ; 645 } 646 647 /* No existing entry. Create a new one. */ 648 te = xmalloc (table_entry_size (xt->n_vars)); 649 te->count = weight; 650 for (j = 0; j < xt->n_vars; j++) 651 te->values[j].f = (int) case_num (c, xt->vars[j].var); 652 hmap_insert (&xt->data, &te->node, hash); 653} 654 655static void 656tabulate_general_case (struct crosstabulation *xt, const struct ccase *c, 657 double weight) 658{ 659 struct freq *te; 660 size_t hash; 661 int j; 662 663 hash = 0; 664 for (j = 0; j < xt->n_vars; j++) 665 { 666 const struct variable *var = xt->vars[j].var; 667 hash = value_hash (case_data (c, var), var_get_width (var), hash); 668 } 669 670 HMAP_FOR_EACH_WITH_HASH (te, struct freq, node, hash, &xt->data) 671 { 672 for (j = 0; j < xt->n_vars; j++) 673 { 674 const struct variable *var = xt->vars[j].var; 675 if (!value_equal (case_data (c, var), &te->values[j], 676 var_get_width (var))) 677 goto no_match; 678 } 679 680 /* Found an existing entry. */ 681 te->count += weight; 682 return; 683 684 no_match: ; 685 } 686 687 /* No existing entry. Create a new one. */ 688 te = xmalloc (table_entry_size (xt->n_vars)); 689 te->count = weight; 690 for (j = 0; j < xt->n_vars; j++) 691 { 692 const struct variable *var = xt->vars[j].var; 693 value_clone (&te->values[j], case_data (c, var), var_get_width (var)); 694 } 695 hmap_insert (&xt->data, &te->node, hash); 696} 697 698/* Post-data reading calculations. */ 699 700static int compare_table_entry_vars_3way (const struct freq *a, 701 const struct freq *b, 702 const struct crosstabulation *xt, 703 int idx0, int idx1); 704static int compare_table_entry_3way (const void *ap_, const void *bp_, 705 const void *xt_); 706static int compare_table_entry_3way_inv (const void *ap_, const void *bp_, 707 const void *xt_); 708 709static void enum_var_values (const struct crosstabulation *, int var_idx, 710 bool descending); 711static void free_var_values (const struct crosstabulation *, int var_idx); 712static void output_crosstabulation (struct crosstabs_proc *, 713 struct crosstabulation *); 714static void make_crosstabulation_subset (struct crosstabulation *xt, 715 size_t row0, size_t row1, 716 struct crosstabulation *subset); 717static void make_summary_table (struct crosstabs_proc *); 718static bool find_crosstab (struct crosstabulation *, size_t *row0p, 719 size_t *row1p); 720 721static void 722postcalc (struct crosstabs_proc *proc) 723{ 724 725 /* Round hash table entries, if requested 726 727 If this causes any of the cell counts to fall to zero, delete those 728 cells. */ 729 if (proc->round_cells) 730 for (struct crosstabulation *xt = proc->pivots; 731 xt < &proc->pivots[proc->n_pivots]; xt++) 732 { 733 struct freq *e, *next; 734 HMAP_FOR_EACH_SAFE (e, next, struct freq, node, &xt->data) 735 { 736 e->count = round_weight (proc, e->count); 737 if (e->count == 0.0) 738 { 739 hmap_delete (&xt->data, &e->node); 740 free (e); 741 } 742 } 743 } 744 745 /* Convert hash tables into sorted arrays of entries. */ 746 for (struct crosstabulation *xt = proc->pivots; 747 xt < &proc->pivots[proc->n_pivots]; xt++) 748 { 749 struct freq *e; 750 751 xt->n_entries = hmap_count (&xt->data); 752 xt->entries = xnmalloc (xt->n_entries, sizeof *xt->entries); 753 size_t i = 0; 754 HMAP_FOR_EACH (e, struct freq, node, &xt->data) 755 xt->entries[i++] = e; 756 hmap_destroy (&xt->data); 757 758 sort (xt->entries, xt->n_entries, sizeof *xt->entries, 759 proc->descending ? compare_table_entry_3way_inv : compare_table_entry_3way, 760 xt); 761 762 } 763 764 make_summary_table (proc); 765 766 /* Output each pivot table. */ 767 for (struct crosstabulation *xt = proc->pivots; 768 xt < &proc->pivots[proc->n_pivots]; xt++) 769 { 770 if (proc->pivot || xt->n_vars == 2) 771 output_crosstabulation (proc, xt); 772 else 773 { 774 size_t row0 = 0, row1 = 0; 775 while (find_crosstab (xt, &row0, &row1)) 776 { 777 struct crosstabulation subset; 778 make_crosstabulation_subset (xt, row0, row1, &subset); 779 output_crosstabulation (proc, &subset); 780 free (subset.const_indexes); 781 } 782 } 783 if (proc->barchart) 784 { 785 int n_vars = (xt->n_vars > 2 ? 2 : xt->n_vars); 786 const struct variable **vars = xcalloc (n_vars, sizeof *vars); 787 for (size_t i = 0; i < n_vars; i++) 788 vars[i] = xt->vars[i].var; 789 chart_item_submit (barchart_create (vars, n_vars, _("Count"), 790 false, 791 xt->entries, xt->n_entries)); 792 free (vars); 793 } 794 } 795 796 /* Free output and prepare for next split file. */ 797 for (struct crosstabulation *xt = proc->pivots; 798 xt < &proc->pivots[proc->n_pivots]; xt++) 799 { 800 xt->missing = 0.0; 801 802 /* Free the members that were allocated in this function(and the values 803 owned by the entries. 804 805 The other pointer members are either both allocated and destroyed at a 806 lower level (in output_crosstabulation), or both allocated and 807 destroyed at a higher level (in crs_custom_tables and free_proc, 808 respectively). */ 809 for (size_t i = 0; i < xt->n_vars; i++) 810 { 811 int width = var_get_width (xt->vars[i].var); 812 if (value_needs_init (width)) 813 { 814 size_t j; 815 816 for (j = 0; j < xt->n_entries; j++) 817 value_destroy (&xt->entries[j]->values[i], width); 818 } 819 } 820 821 for (size_t i = 0; i < xt->n_entries; i++) 822 free (xt->entries[i]); 823 free (xt->entries); 824 } 825} 826 827static void 828make_crosstabulation_subset (struct crosstabulation *xt, size_t row0, 829 size_t row1, struct crosstabulation *subset) 830{ 831 *subset = *xt; 832 if (xt->n_vars > 2) 833 { 834 assert (xt->n_consts == 0); 835 subset->n_vars = 2; 836 subset->vars = xt->vars; 837 838 subset->n_consts = xt->n_vars - 2; 839 subset->const_vars = xt->vars + 2; 840 subset->const_indexes = xcalloc (subset->n_consts, 841 sizeof *subset->const_indexes); 842 for (size_t i = 0; i < subset->n_consts; i++) 843 { 844 const union value *value = &xt->entries[row0]->values[2 + i]; 845 846 for (size_t j = 0; j < xt->vars[2 + i].n_values; j++) 847 if (value_equal (&xt->vars[2 + i].values[j], value, 848 var_get_width (xt->vars[2 + i].var))) 849 { 850 subset->const_indexes[i] = j; 851 goto found; 852 } 853 NOT_REACHED (); 854 found: ; 855 } 856 } 857 subset->entries = &xt->entries[row0]; 858 subset->n_entries = row1 - row0; 859} 860 861static int 862compare_table_entry_var_3way (const struct freq *a, 863 const struct freq *b, 864 const struct crosstabulation *xt, 865 int idx) 866{ 867 return value_compare_3way (&a->values[idx], &b->values[idx], 868 var_get_width (xt->vars[idx].var)); 869} 870 871static int 872compare_table_entry_vars_3way (const struct freq *a, 873 const struct freq *b, 874 const struct crosstabulation *xt, 875 int idx0, int idx1) 876{ 877 int i; 878 879 for (i = idx1 - 1; i >= idx0; i--) 880 { 881 int cmp = compare_table_entry_var_3way (a, b, xt, i); 882 if (cmp != 0) 883 return cmp; 884 } 885 return 0; 886} 887 888/* Compare the struct freq at *AP to the one at *BP and 889 return a strcmp()-type result. */ 890static int 891compare_table_entry_3way (const void *ap_, const void *bp_, const void *xt_) 892{ 893 const struct freq *const *ap = ap_; 894 const struct freq *const *bp = bp_; 895 const struct freq *a = *ap; 896 const struct freq *b = *bp; 897 const struct crosstabulation *xt = xt_; 898 int cmp; 899 900 cmp = compare_table_entry_vars_3way (a, b, xt, 2, xt->n_vars); 901 if (cmp != 0) 902 return cmp; 903 904 cmp = compare_table_entry_var_3way (a, b, xt, ROW_VAR); 905 if (cmp != 0) 906 return cmp; 907 908 return compare_table_entry_var_3way (a, b, xt, COL_VAR); 909} 910 911/* Inverted version of compare_table_entry_3way */ 912static int 913compare_table_entry_3way_inv (const void *ap_, const void *bp_, const void *xt_) 914{ 915 return -compare_table_entry_3way (ap_, bp_, xt_); 916} 917 918/* Output a table summarizing the cases processed. */ 919static void 920make_summary_table (struct crosstabs_proc *proc) 921{ 922 struct pivot_table *table = pivot_table_create (N_("Summary")); 923 pivot_table_set_weight_var (table, dict_get_weight (proc->dict)); 924 925 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), 926 N_("N"), PIVOT_RC_COUNT, 927 N_("Percent"), PIVOT_RC_PERCENT); 928 929 struct pivot_dimension *cases = pivot_dimension_create ( 930 table, PIVOT_AXIS_COLUMN, N_("Cases"), 931 N_("Valid"), N_("Missing"), N_("Total")); 932 cases->root->show_label = true; 933 934 struct pivot_dimension *tables = pivot_dimension_create ( 935 table, PIVOT_AXIS_ROW, N_("Crosstabulation")); 936 for (struct crosstabulation *xt = &proc->pivots[0]; 937 xt < &proc->pivots[proc->n_pivots]; xt++) 938 { 939 struct string name = DS_EMPTY_INITIALIZER; 940 for (size_t i = 0; i < xt->n_vars; i++) 941 { 942 if (i > 0) 943 ds_put_cstr (&name, " × "); 944 ds_put_cstr (&name, var_to_string (xt->vars[i].var)); 945 } 946 947 int row = pivot_category_create_leaf ( 948 tables->root, 949 pivot_value_new_user_text_nocopy (ds_steal_cstr (&name))); 950 951 double valid = 0.; 952 for (size_t i = 0; i < xt->n_entries; i++) 953 valid += xt->entries[i]->count; 954 955 double n[3]; 956 n[0] = valid; 957 n[1] = xt->missing; 958 n[2] = n[0] + n[1]; 959 for (int i = 0; i < 3; i++) 960 { 961 pivot_table_put3 (table, 0, i, row, pivot_value_new_number (n[i])); 962 pivot_table_put3 (table, 1, i, row, 963 pivot_value_new_number (n[i] / n[2] * 100.0)); 964 } 965 } 966 967 pivot_table_submit (table); 968} 969 970/* Output. */ 971 972static struct pivot_table *create_crosstab_table ( 973 struct crosstabs_proc *, struct crosstabulation *, 974 size_t crs_leaves[CRS_CL_count]); 975static struct pivot_table *create_chisq_table (struct crosstabulation *); 976static struct pivot_table *create_sym_table (struct crosstabulation *); 977static struct pivot_table *create_risk_table ( 978 struct crosstabulation *, struct pivot_dimension **risk_statistics); 979static struct pivot_table *create_direct_table (struct crosstabulation *); 980static void display_crosstabulation (struct crosstabs_proc *, 981 struct crosstabulation *, 982 struct pivot_table *, 983 size_t crs_leaves[CRS_CL_count]); 984static void display_chisq (struct crosstabulation *, struct pivot_table *); 985static void display_symmetric (struct crosstabs_proc *, 986 struct crosstabulation *, struct pivot_table *); 987static void display_risk (struct crosstabulation *, struct pivot_table *, 988 struct pivot_dimension *risk_statistics); 989static void display_directional (struct crosstabs_proc *, 990 struct crosstabulation *, 991 struct pivot_table *); 992static void delete_missing (struct crosstabulation *); 993static void build_matrix (struct crosstabulation *); 994 995/* Output pivot table XT in the context of PROC. */ 996static void 997output_crosstabulation (struct crosstabs_proc *proc, struct crosstabulation *xt) 998{ 999 for (size_t i = 0; i < xt->n_vars; i++) 1000 enum_var_values (xt, i, proc->descending); 1001 1002 if (xt->vars[COL_VAR].n_values == 0) 1003 { 1004 struct string vars; 1005 int i; 1006 1007 ds_init_cstr (&vars, var_to_string (xt->vars[0].var)); 1008 for (i = 1; i < xt->n_vars; i++) 1009 ds_put_format (&vars, " × %s", var_to_string (xt->vars[i].var)); 1010 1011 /* TRANSLATORS: The %s here describes a crosstabulation. It takes the 1012 form "var1 * var2 * var3 * ...". */ 1013 msg (SW, _("Crosstabulation %s contained no non-missing cases."), 1014 ds_cstr (&vars)); 1015 1016 ds_destroy (&vars); 1017 for (size_t i = 0; i < xt->n_vars; i++) 1018 free_var_values (xt, i); 1019 return; 1020 } 1021 1022 size_t crs_leaves[CRS_CL_count]; 1023 struct pivot_table *table = (proc->cells 1024 ? create_crosstab_table (proc, xt, crs_leaves) 1025 : NULL); 1026 struct pivot_table *chisq = (proc->statistics & (1u << CRS_ST_CHISQ) 1027 ? create_chisq_table (xt) 1028 : NULL); 1029 struct pivot_table *sym 1030 = (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC) 1031 | (1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU) 1032 | (1u << CRS_ST_GAMMA) | (1u << CRS_ST_CORR) 1033 | (1u << CRS_ST_KAPPA)) 1034 ? create_sym_table (xt) 1035 : NULL); 1036 struct pivot_dimension *risk_statistics = NULL; 1037 struct pivot_table *risk = (proc->statistics & (1u << CRS_ST_RISK) 1038 ? create_risk_table (xt, &risk_statistics) 1039 : NULL); 1040 struct pivot_table *direct 1041 = (proc->statistics & ((1u << CRS_ST_LAMBDA) | (1u << CRS_ST_UC) 1042 | (1u << CRS_ST_D) | (1u << CRS_ST_ETA)) 1043 ? create_direct_table (xt) 1044 : NULL); 1045 1046 size_t row0 = 0; 1047 size_t row1 = 0; 1048 while (find_crosstab (xt, &row0, &row1)) 1049 { 1050 struct crosstabulation x; 1051 1052 make_crosstabulation_subset (xt, row0, row1, &x); 1053 1054 size_t n_rows = x.vars[ROW_VAR].n_values; 1055 size_t n_cols = x.vars[COL_VAR].n_values; 1056 if (size_overflow_p (xtimes (xtimes (n_rows, n_cols), sizeof (double)))) 1057 xalloc_die (); 1058 x.row_tot = xmalloc (n_rows * sizeof *x.row_tot); 1059 x.col_tot = xmalloc (n_cols * sizeof *x.col_tot); 1060 x.mat = xmalloc (n_rows * n_cols * sizeof *x.mat); 1061 1062 build_matrix (&x); 1063 1064 /* Find the first variable that differs from the last subtable. */ 1065 if (table) 1066 display_crosstabulation (proc, &x, table, crs_leaves); 1067 1068 if (proc->exclude == MV_NEVER) 1069 delete_missing (&x); 1070 1071 if (chisq) 1072 display_chisq (&x, chisq); 1073 1074 if (sym) 1075 display_symmetric (proc, &x, sym); 1076 if (risk) 1077 display_risk (&x, risk, risk_statistics); 1078 if (direct) 1079 display_directional (proc, &x, direct); 1080 1081 free (x.mat); 1082 free (x.row_tot); 1083 free (x.col_tot); 1084 free (x.const_indexes); 1085 } 1086 1087 if (table) 1088 pivot_table_submit (table); 1089 1090 if (chisq) 1091 pivot_table_submit (chisq); 1092 1093 if (sym) 1094 pivot_table_submit (sym); 1095 1096 if (risk) 1097 { 1098 if (!pivot_table_is_empty (risk)) 1099 pivot_table_submit (risk); 1100 else 1101 pivot_table_unref (risk); 1102 } 1103 1104 if (direct) 1105 pivot_table_submit (direct); 1106 1107 for (size_t i = 0; i < xt->n_vars; i++) 1108 free_var_values (xt, i); 1109} 1110 1111static void 1112build_matrix (struct crosstabulation *x) 1113{ 1114 const int col_var_width = var_get_width (x->vars[COL_VAR].var); 1115 const int row_var_width = var_get_width (x->vars[ROW_VAR].var); 1116 size_t n_rows = x->vars[ROW_VAR].n_values; 1117 size_t n_cols = x->vars[COL_VAR].n_values; 1118 int col, row; 1119 double *mp; 1120 struct freq **p; 1121 1122 mp = x->mat; 1123 col = row = 0; 1124 for (p = x->entries; p < &x->entries[x->n_entries]; p++) 1125 { 1126 const struct freq *te = *p; 1127 1128 while (!value_equal (&x->vars[ROW_VAR].values[row], 1129 &te->values[ROW_VAR], row_var_width)) 1130 { 1131 for (; col < n_cols; col++) 1132 *mp++ = 0.0; 1133 col = 0; 1134 row++; 1135 } 1136 1137 while (!value_equal (&x->vars[COL_VAR].values[col], 1138 &te->values[COL_VAR], col_var_width)) 1139 { 1140 *mp++ = 0.0; 1141 col++; 1142 } 1143 1144 *mp++ = te->count; 1145 if (++col >= n_cols) 1146 { 1147 col = 0; 1148 row++; 1149 } 1150 } 1151 while (mp < &x->mat[n_cols * n_rows]) 1152 *mp++ = 0.0; 1153 assert (mp == &x->mat[n_cols * n_rows]); 1154 1155 /* Column totals, row totals, ns_rows. */ 1156 mp = x->mat; 1157 for (col = 0; col < n_cols; col++) 1158 x->col_tot[col] = 0.0; 1159 for (row = 0; row < n_rows; row++) 1160 x->row_tot[row] = 0.0; 1161 x->ns_rows = 0; 1162 for (row = 0; row < n_rows; row++) 1163 { 1164 bool row_is_empty = true; 1165 for (col = 0; col < n_cols; col++) 1166 { 1167 if (*mp != 0.0) 1168 { 1169 row_is_empty = false; 1170 x->col_tot[col] += *mp; 1171 x->row_tot[row] += *mp; 1172 } 1173 mp++; 1174 } 1175 if (!row_is_empty) 1176 x->ns_rows++; 1177 } 1178 assert (mp == &x->mat[n_cols * n_rows]); 1179 1180 /* ns_cols. */ 1181 x->ns_cols = 0; 1182 for (col = 0; col < n_cols; col++) 1183 for (row = 0; row < n_rows; row++) 1184 if (x->mat[col + row * n_cols] != 0.0) 1185 { 1186 x->ns_cols++; 1187 break; 1188 } 1189 1190 /* Grand total. */ 1191 x->total = 0.0; 1192 for (col = 0; col < n_cols; col++) 1193 x->total += x->col_tot[col]; 1194} 1195 1196static void 1197add_var_dimension (struct pivot_table *table, const struct xtab_var *var, 1198 enum pivot_axis_type axis_type, bool total) 1199{ 1200 struct pivot_dimension *d = pivot_dimension_create__ ( 1201 table, axis_type, pivot_value_new_variable (var->var)); 1202 1203 struct pivot_footnote *missing_footnote = pivot_table_create_footnote ( 1204 table, pivot_value_new_text (N_("Missing value"))); 1205 1206 struct pivot_category *group = pivot_category_create_group__ ( 1207 d->root, pivot_value_new_variable (var->var)); 1208 for (size_t j = 0; j < var->n_values; j++) 1209 { 1210 struct pivot_value *value = pivot_value_new_var_value ( 1211 var->var, &var->values[j]); 1212 if (var_is_value_missing (var->var, &var->values[j], MV_ANY)) 1213 pivot_value_add_footnote (value, missing_footnote); 1214 pivot_category_create_leaf (group, value); 1215 } 1216 1217 if (total) 1218 pivot_category_create_leaf (d->root, pivot_value_new_text (N_("Total"))); 1219} 1220 1221static struct pivot_table * 1222create_crosstab_table (struct crosstabs_proc *proc, struct crosstabulation *xt, 1223 size_t crs_leaves[CRS_CL_count]) 1224{ 1225 /* Title. */ 1226 struct string title = DS_EMPTY_INITIALIZER; 1227 for (size_t i = 0; i < xt->n_vars; i++) 1228 { 1229 if (i) 1230 ds_put_cstr (&title, " × "); 1231 ds_put_cstr (&title, var_to_string (xt->vars[i].var)); 1232 } 1233 for (size_t i = 0; i < xt->n_consts; i++) 1234 { 1235 const struct variable *var = xt->const_vars[i].var; 1236 const union value *value = &xt->entries[0]->values[2 + i]; 1237 char *s; 1238 1239 ds_put_format (&title, ", %s=", var_to_string (var)); 1240 1241 /* Insert the formatted value of VAR without any leading spaces. */ 1242 s = data_out (value, var_get_encoding (var), var_get_print_format (var)); 1243 ds_put_cstr (&title, s + strspn (s, " ")); 1244 free (s); 1245 } 1246 struct pivot_table *table = pivot_table_create__ ( 1247 pivot_value_new_user_text_nocopy (ds_steal_cstr (&title)), 1248 "Crosstabulation"); 1249 pivot_table_set_weight_format (table, &proc->weight_format); 1250 table->omit_empty = true; 1251 1252 struct pivot_dimension *statistics = pivot_dimension_create ( 1253 table, PIVOT_AXIS_ROW, N_("Statistics")); 1254 1255 struct statistic 1256 { 1257 const char *label; 1258 const char *rc; 1259 }; 1260 static const struct statistic stats[CRS_CL_count] = 1261 { 1262 [CRS_CL_COUNT] = { N_("Count"), PIVOT_RC_COUNT }, 1263 [CRS_CL_ROW] = { N_("Row %"), PIVOT_RC_PERCENT }, 1264 [CRS_CL_COLUMN] = { N_("Column %"), PIVOT_RC_PERCENT }, 1265 [CRS_CL_TOTAL] = { N_("Total %"), PIVOT_RC_PERCENT }, 1266 [CRS_CL_EXPECTED] = { N_("Expected"), PIVOT_RC_OTHER }, 1267 [CRS_CL_RESIDUAL] = { N_("Residual"), PIVOT_RC_RESIDUAL }, 1268 [CRS_CL_SRESIDUAL] = { N_("Std. Residual"), PIVOT_RC_RESIDUAL }, 1269 [CRS_CL_ASRESIDUAL] = { N_("Adjusted Residual"), PIVOT_RC_RESIDUAL }, 1270 }; 1271 for (size_t i = 0; i < CRS_CL_count; i++) 1272 if (proc->cells & (1u << i) && stats[i].label) 1273 crs_leaves[i] = pivot_category_create_leaf_rc ( 1274 statistics->root, pivot_value_new_text (stats[i].label), 1275 stats[i].rc); 1276 1277 for (size_t i = 0; i < xt->n_vars; i++) 1278 add_var_dimension (table, &xt->vars[i], 1279 i == COL_VAR ? PIVOT_AXIS_COLUMN : PIVOT_AXIS_ROW, 1280 true); 1281 1282 return table; 1283} 1284 1285static struct pivot_table * 1286create_chisq_table (struct crosstabulation *xt) 1287{ 1288 struct pivot_table *chisq = pivot_table_create (N_("Chi-Square Tests")); 1289 pivot_table_set_weight_format (chisq, &xt->weight_format); 1290 chisq->omit_empty = true; 1291 1292 pivot_dimension_create ( 1293 chisq, PIVOT_AXIS_ROW, N_("Statistics"), 1294 N_("Pearson Chi-Square"), 1295 N_("Likelihood Ratio"), 1296 N_("Fisher's Exact Test"), 1297 N_("Continuity Correction"), 1298 N_("Linear-by-Linear Association"), 1299 N_("N of Valid Cases"), PIVOT_RC_COUNT); 1300 1301 pivot_dimension_create ( 1302 chisq, PIVOT_AXIS_COLUMN, N_("Statistics"), 1303 N_("Value"), PIVOT_RC_OTHER, 1304 N_("df"), PIVOT_RC_COUNT, 1305 N_("Asymptotic Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE, 1306 N_("Exact Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE, 1307 N_("Exact Sig. (1-tailed)"), PIVOT_RC_SIGNIFICANCE); 1308 1309 for (size_t i = 2; i < xt->n_vars; i++) 1310 add_var_dimension (chisq, &xt->vars[i], PIVOT_AXIS_ROW, false); 1311 1312 return chisq; 1313} 1314 1315/* Symmetric measures. */ 1316static struct pivot_table * 1317create_sym_table (struct crosstabulation *xt) 1318{ 1319 struct pivot_table *sym = pivot_table_create (N_("Symmetric Measures")); 1320 pivot_table_set_weight_format (sym, &xt->weight_format); 1321 sym->omit_empty = true; 1322 1323 pivot_dimension_create ( 1324 sym, PIVOT_AXIS_COLUMN, N_("Values"), 1325 N_("Value"), PIVOT_RC_OTHER, 1326 N_("Asymp. Std. Error"), PIVOT_RC_OTHER, 1327 N_("Approx. T"), PIVOT_RC_OTHER, 1328 N_("Approx. Sig."), PIVOT_RC_SIGNIFICANCE); 1329 1330 struct pivot_dimension *statistics = pivot_dimension_create ( 1331 sym, PIVOT_AXIS_ROW, N_("Statistics")); 1332 pivot_category_create_group ( 1333 statistics->root, N_("Nominal by Nominal"), 1334 N_("Phi"), N_("Cramer's V"), N_("Contingency Coefficient")); 1335 pivot_category_create_group ( 1336 statistics->root, N_("Ordinal by Ordinal"), 1337 N_("Kendall's tau-b"), N_("Kendall's tau-c"), 1338 N_("Gamma"), N_("Spearman Correlation")); 1339 pivot_category_create_group ( 1340 statistics->root, N_("Interval by Interval"), 1341 N_("Pearson's R")); 1342 pivot_category_create_group ( 1343 statistics->root, N_("Measure of Agreement"), 1344 N_("Kappa")); 1345 pivot_category_create_leaves (statistics->root, N_("N of Valid Cases"), 1346 PIVOT_RC_COUNT); 1347 1348 for (size_t i = 2; i < xt->n_vars; i++) 1349 add_var_dimension (sym, &xt->vars[i], PIVOT_AXIS_ROW, false); 1350 1351 return sym; 1352} 1353 1354/* Risk estimate. */ 1355static struct pivot_table * 1356create_risk_table (struct crosstabulation *xt, 1357 struct pivot_dimension **risk_statistics) 1358{ 1359 struct pivot_table *risk = pivot_table_create (N_("Risk Estimate")); 1360 pivot_table_set_weight_format (risk, &xt->weight_format); 1361 risk->omit_empty = true; 1362 1363 struct pivot_dimension *values = pivot_dimension_create ( 1364 risk, PIVOT_AXIS_COLUMN, N_("Values"), 1365 N_("Value"), PIVOT_RC_OTHER); 1366 pivot_category_create_group ( 1367 /* xgettext:no-c-format */ 1368 values->root, N_("95% Confidence Interval"), 1369 N_("Lower"), PIVOT_RC_OTHER, 1370 N_("Upper"), PIVOT_RC_OTHER); 1371 1372 *risk_statistics = pivot_dimension_create ( 1373 risk, PIVOT_AXIS_ROW, N_("Statistics")); 1374 1375 for (size_t i = 2; i < xt->n_vars; i++) 1376 add_var_dimension (risk, &xt->vars[i], PIVOT_AXIS_ROW, false); 1377 1378 return risk; 1379} 1380 1381static void 1382create_direct_stat (struct pivot_category *parent, 1383 const struct crosstabulation *xt, 1384 const char *name, bool symmetric) 1385{ 1386 struct pivot_category *group = pivot_category_create_group ( 1387 parent, name); 1388 if (symmetric) 1389 pivot_category_create_leaf (group, pivot_value_new_text (N_("Symmetric"))); 1390 1391 char *row_label = xasprintf (_("%s Dependent"), 1392 var_to_string (xt->vars[ROW_VAR].var)); 1393 pivot_category_create_leaf (group, pivot_value_new_user_text_nocopy ( 1394 row_label)); 1395 1396 char *col_label = xasprintf (_("%s Dependent"), 1397 var_to_string (xt->vars[COL_VAR].var)); 1398 pivot_category_create_leaf (group, pivot_value_new_user_text_nocopy ( 1399 col_label)); 1400} 1401 1402/* Directional measures. */ 1403static struct pivot_table * 1404create_direct_table (struct crosstabulation *xt) 1405{ 1406 struct pivot_table *direct = pivot_table_create (N_("Directional Measures")); 1407 pivot_table_set_weight_format (direct, &xt->weight_format); 1408 direct->omit_empty = true; 1409 1410 pivot_dimension_create ( 1411 direct, PIVOT_AXIS_COLUMN, N_("Values"), 1412 N_("Value"), PIVOT_RC_OTHER, 1413 N_("Asymp. Std. Error"), PIVOT_RC_OTHER, 1414 N_("Approx. T"), PIVOT_RC_OTHER, 1415 N_("Approx. Sig."), PIVOT_RC_SIGNIFICANCE); 1416 1417 struct pivot_dimension *statistics = pivot_dimension_create ( 1418 direct, PIVOT_AXIS_ROW, N_("Statistics")); 1419 struct pivot_category *nn = pivot_category_create_group ( 1420 statistics->root, N_("Nominal by Nominal")); 1421 create_direct_stat (nn, xt, N_("Lambda"), true); 1422 create_direct_stat (nn, xt, N_("Goodman and Kruskal tau"), false); 1423 create_direct_stat (nn, xt, N_("Uncertainty Coefficient"), true); 1424 struct pivot_category *oo = pivot_category_create_group ( 1425 statistics->root, N_("Ordinal by Ordinal")); 1426 create_direct_stat (oo, xt, N_("Somers' d"), true); 1427 struct pivot_category *ni = pivot_category_create_group ( 1428 statistics->root, N_("Nominal by Interval")); 1429 create_direct_stat (ni, xt, N_("Eta"), false); 1430 1431 for (size_t i = 2; i < xt->n_vars; i++) 1432 add_var_dimension (direct, &xt->vars[i], PIVOT_AXIS_ROW, false); 1433 1434 return direct; 1435} 1436 1437/* Delete missing rows and columns for statistical analysis when 1438 /MISSING=REPORT. */ 1439static void 1440delete_missing (struct crosstabulation *xt) 1441{ 1442 size_t n_rows = xt->vars[ROW_VAR].n_values; 1443 size_t n_cols = xt->vars[COL_VAR].n_values; 1444 int r, c; 1445 1446 for (r = 0; r < n_rows; r++) 1447 if (var_is_num_missing (xt->vars[ROW_VAR].var, 1448 xt->vars[ROW_VAR].values[r].f, MV_USER)) 1449 { 1450 for (c = 0; c < n_cols; c++) 1451 xt->mat[c + r * n_cols] = 0.; 1452 xt->ns_rows--; 1453 } 1454 1455 1456 for (c = 0; c < n_cols; c++) 1457 if (var_is_num_missing (xt->vars[COL_VAR].var, 1458 xt->vars[COL_VAR].values[c].f, MV_USER)) 1459 { 1460 for (r = 0; r < n_rows; r++) 1461 xt->mat[c + r * n_cols] = 0.; 1462 xt->ns_cols--; 1463 } 1464} 1465 1466static bool 1467find_crosstab (struct crosstabulation *xt, size_t *row0p, size_t *row1p) 1468{ 1469 size_t row0 = *row1p; 1470 size_t row1; 1471 1472 if (row0 >= xt->n_entries) 1473 return false; 1474 1475 for (row1 = row0 + 1; row1 < xt->n_entries; row1++) 1476 { 1477 struct freq *a = xt->entries[row0]; 1478 struct freq *b = xt->entries[row1]; 1479 if (compare_table_entry_vars_3way (a, b, xt, 2, xt->n_vars) != 0) 1480 break; 1481 } 1482 *row0p = row0; 1483 *row1p = row1; 1484 return true; 1485} 1486 1487/* Compares `union value's A_ and B_ and returns a strcmp()-like 1488 result. WIDTH_ points to an int which is either 0 for a 1489 numeric value or a string width for a string value. */ 1490static int 1491compare_value_3way (const void *a_, const void *b_, const void *width_) 1492{ 1493 const union value *a = a_; 1494 const union value *b = b_; 1495 const int *width = width_; 1496 1497 return value_compare_3way (a, b, *width); 1498} 1499 1500/* Inverted version of the above */ 1501static int 1502compare_value_3way_inv (const void *a_, const void *b_, const void *width_) 1503{ 1504 return -compare_value_3way (a_, b_, width_); 1505} 1506 1507 1508/* Given an array of ENTRY_CNT table_entry structures starting at 1509 ENTRIES, creates a sorted list of the values that the variable 1510 with index VAR_IDX takes on. Stores the array of the values in 1511 XT->values and the number of values in XT->n_values. */ 1512static void 1513enum_var_values (const struct crosstabulation *xt, int var_idx, 1514 bool descending) 1515{ 1516 struct xtab_var *xv = &xt->vars[var_idx]; 1517 const struct var_range *range = get_var_range (xt->proc, xv->var); 1518 1519 if (range) 1520 { 1521 xv->values = xnmalloc (range->count, sizeof *xv->values); 1522 xv->n_values = range->count; 1523 for (size_t i = 0; i < range->count; i++) 1524 xv->values[i].f = range->min + i; 1525 } 1526 else 1527 { 1528 int width = var_get_width (xv->var); 1529 struct hmapx_node *node; 1530 const union value *iter; 1531 struct hmapx set; 1532 1533 hmapx_init (&set); 1534 for (size_t i = 0; i < xt->n_entries; i++) 1535 { 1536 const struct freq *te = xt->entries[i]; 1537 const union value *value = &te->values[var_idx]; 1538 size_t hash = value_hash (value, width, 0); 1539 1540 HMAPX_FOR_EACH_WITH_HASH (iter, node, hash, &set) 1541 if (value_equal (iter, value, width)) 1542 goto next_entry; 1543 1544 hmapx_insert (&set, (union value *) value, hash); 1545 1546 next_entry: ; 1547 } 1548 1549 xv->n_values = hmapx_count (&set); 1550 xv->values = xnmalloc (xv->n_values, sizeof *xv->values); 1551 size_t i = 0; 1552 HMAPX_FOR_EACH (iter, node, &set) 1553 xv->values[i++] = *iter; 1554 hmapx_destroy (&set); 1555 1556 sort (xv->values, xv->n_values, sizeof *xv->values, 1557 descending ? compare_value_3way_inv : compare_value_3way, 1558 &width); 1559 } 1560} 1561 1562static void 1563free_var_values (const struct crosstabulation *xt, int var_idx) 1564{ 1565 struct xtab_var *xv = &xt->vars[var_idx]; 1566 free (xv->values); 1567 xv->values = NULL; 1568 xv->n_values = 0; 1569} 1570 1571/* Displays the crosstabulation table. */ 1572static void 1573display_crosstabulation (struct crosstabs_proc *proc, 1574 struct crosstabulation *xt, struct pivot_table *table, 1575 size_t crs_leaves[CRS_CL_count]) 1576{ 1577 size_t n_rows = xt->vars[ROW_VAR].n_values; 1578 size_t n_cols = xt->vars[COL_VAR].n_values; 1579 1580 size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes); 1581 assert (xt->n_vars == 2); 1582 for (size_t i = 0; i < xt->n_consts; i++) 1583 indexes[i + 3] = xt->const_indexes[i]; 1584 1585 /* Put in the actual cells. */ 1586 double *mp = xt->mat; 1587 for (size_t r = 0; r < n_rows; r++) 1588 { 1589 if (!xt->row_tot[r] && proc->mode != INTEGER) 1590 continue; 1591 1592 indexes[ROW_VAR + 1] = r; 1593 for (size_t c = 0; c < n_cols; c++) 1594 { 1595 if (!xt->col_tot[c] && proc->mode != INTEGER) 1596 continue; 1597 1598 indexes[COL_VAR + 1] = c; 1599 1600 double expected_value = xt->row_tot[r] * xt->col_tot[c] / xt->total; 1601 double residual = *mp - expected_value; 1602 double sresidual = residual / sqrt (expected_value); 1603 double asresidual = (sresidual 1604 * (1. - xt->row_tot[r] / xt->total) 1605 * (1. - xt->col_tot[c] / xt->total)); 1606 double entries[] = { 1607 [CRS_CL_COUNT] = *mp, 1608 [CRS_CL_ROW] = *mp / xt->row_tot[r] * 100., 1609 [CRS_CL_COLUMN] = *mp / xt->col_tot[c] * 100., 1610 [CRS_CL_TOTAL] = *mp / xt->total * 100., 1611 [CRS_CL_EXPECTED] = expected_value, 1612 [CRS_CL_RESIDUAL] = residual, 1613 [CRS_CL_SRESIDUAL] = sresidual, 1614 [CRS_CL_ASRESIDUAL] = asresidual, 1615 }; 1616 for (size_t i = 0; i < proc->n_cells; i++) 1617 { 1618 int cell = proc->a_cells[i]; 1619 indexes[0] = crs_leaves[cell]; 1620 pivot_table_put (table, indexes, table->n_dimensions, 1621 pivot_value_new_number (entries[cell])); 1622 } 1623 1624 mp++; 1625 } 1626 } 1627 1628 /* Row totals. */ 1629 for (size_t r = 0; r < n_rows; r++) 1630 { 1631 if (!xt->row_tot[r] && proc->mode != INTEGER) 1632 continue; 1633 1634 double expected_value = xt->row_tot[r] / xt->total; 1635 double entries[] = { 1636 [CRS_CL_COUNT] = xt->row_tot[r], 1637 [CRS_CL_ROW] = 100.0, 1638 [CRS_CL_COLUMN] = expected_value * 100., 1639 [CRS_CL_TOTAL] = expected_value * 100., 1640 [CRS_CL_EXPECTED] = expected_value, 1641 [CRS_CL_RESIDUAL] = SYSMIS, 1642 [CRS_CL_SRESIDUAL] = SYSMIS, 1643 [CRS_CL_ASRESIDUAL] = SYSMIS, 1644 }; 1645 for (size_t i = 0; i < proc->n_cells; i++) 1646 { 1647 int cell = proc->a_cells[i]; 1648 double entry = entries[cell]; 1649 if (entry != SYSMIS) 1650 { 1651 indexes[ROW_VAR + 1] = r; 1652 indexes[COL_VAR + 1] = n_cols; 1653 indexes[0] = crs_leaves[cell]; 1654 pivot_table_put (table, indexes, table->n_dimensions, 1655 pivot_value_new_number (entry)); 1656 } 1657 } 1658 } 1659 1660 for (size_t c = 0; c <= n_cols; c++) 1661 { 1662 if (c < n_cols && !xt->col_tot[c] && proc->mode != INTEGER) 1663 continue; 1664 1665 double ct = c < n_cols ? xt->col_tot[c] : xt->total; 1666 double expected_value = ct / xt->total; 1667 double entries[] = { 1668 [CRS_CL_COUNT] = ct, 1669 [CRS_CL_ROW] = expected_value * 100.0, 1670 [CRS_CL_COLUMN] = 100.0, 1671 [CRS_CL_TOTAL] = expected_value * 100., 1672 [CRS_CL_EXPECTED] = expected_value, 1673 [CRS_CL_RESIDUAL] = SYSMIS, 1674 [CRS_CL_SRESIDUAL] = SYSMIS, 1675 [CRS_CL_ASRESIDUAL] = SYSMIS, 1676 }; 1677 for (size_t i = 0; i < proc->n_cells; i++) 1678 { 1679 int cell = proc->a_cells[i]; 1680 double entry = entries[cell]; 1681 if (entry != SYSMIS) 1682 { 1683 indexes[ROW_VAR + 1] = n_rows; 1684 indexes[COL_VAR + 1] = c; 1685 indexes[0] = crs_leaves[cell]; 1686 pivot_table_put (table, indexes, table->n_dimensions, 1687 pivot_value_new_number (entry)); 1688 } 1689 } 1690 } 1691 1692 free (indexes); 1693} 1694 1695static void calc_r (struct crosstabulation *, 1696 double *XT, double *Y, double *, double *, double *); 1697static void calc_chisq (struct crosstabulation *, 1698 double[N_CHISQ], int[N_CHISQ], double *, double *); 1699 1700/* Display chi-square statistics. */ 1701static void 1702display_chisq (struct crosstabulation *xt, struct pivot_table *chisq) 1703{ 1704 double chisq_v[N_CHISQ]; 1705 double fisher1, fisher2; 1706 int df[N_CHISQ]; 1707 calc_chisq (xt, chisq_v, df, &fisher1, &fisher2); 1708 1709 size_t *indexes = xnmalloc (chisq->n_dimensions, sizeof *indexes); 1710 assert (xt->n_vars == 2); 1711 for (size_t i = 0; i < xt->n_consts; i++) 1712 indexes[i + 2] = xt->const_indexes[i]; 1713 for (int i = 0; i < N_CHISQ; i++) 1714 { 1715 indexes[0] = i; 1716 1717 double entries[5] = { SYSMIS, SYSMIS, SYSMIS, SYSMIS, SYSMIS }; 1718 if (i == 2) 1719 { 1720 entries[3] = fisher2; 1721 entries[4] = fisher1; 1722 } 1723 else if (chisq_v[i] != SYSMIS) 1724 { 1725 entries[0] = chisq_v[i]; 1726 entries[1] = df[i]; 1727 entries[2] = gsl_cdf_chisq_Q (chisq_v[i], df[i]); 1728 } 1729 1730 for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) 1731 if (entries[j] != SYSMIS) 1732 { 1733 indexes[1] = j; 1734 pivot_table_put (chisq, indexes, chisq->n_dimensions, 1735 pivot_value_new_number (entries[j])); 1736 } 1737 } 1738 1739 indexes[0] = 5; 1740 indexes[1] = 0; 1741 pivot_table_put (chisq, indexes, chisq->n_dimensions, 1742 pivot_value_new_number (xt->total)); 1743 1744 free (indexes); 1745} 1746 1747static int calc_symmetric (struct crosstabs_proc *, struct crosstabulation *, 1748 double[N_SYMMETRIC], double[N_SYMMETRIC], 1749 double[N_SYMMETRIC], 1750 double[3], double[3], double[3]); 1751 1752/* Display symmetric measures. */ 1753static void 1754display_symmetric (struct crosstabs_proc *proc, struct crosstabulation *xt, 1755 struct pivot_table *sym) 1756{ 1757 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC]; 1758 double somers_d_v[3], somers_d_ase[3], somers_d_t[3]; 1759 1760 if (!calc_symmetric (proc, xt, sym_v, sym_ase, sym_t, 1761 somers_d_v, somers_d_ase, somers_d_t)) 1762 return; 1763 1764 size_t *indexes = xnmalloc (sym->n_dimensions, sizeof *indexes); 1765 assert (xt->n_vars == 2); 1766 for (size_t i = 0; i < xt->n_consts; i++) 1767 indexes[i + 2] = xt->const_indexes[i]; 1768 1769 for (int i = 0; i < N_SYMMETRIC; i++) 1770 { 1771 if (sym_v[i] == SYSMIS) 1772 continue; 1773 1774 indexes[1] = i; 1775 1776 double entries[] = { sym_v[i], sym_ase[i], sym_t[i] }; 1777 for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) 1778 if (entries[j] != SYSMIS) 1779 { 1780 indexes[0] = j; 1781 pivot_table_put (sym, indexes, sym->n_dimensions, 1782 pivot_value_new_number (entries[j])); 1783 } 1784 } 1785 1786 indexes[1] = N_SYMMETRIC; 1787 indexes[0] = 0; 1788 struct pivot_value *total = pivot_value_new_number (xt->total); 1789 pivot_value_set_rc (sym, total, PIVOT_RC_COUNT); 1790 pivot_table_put (sym, indexes, sym->n_dimensions, total); 1791 1792 free (indexes); 1793} 1794 1795static bool calc_risk (struct crosstabulation *, 1796 double[], double[], double[], union value *, 1797 double *); 1798 1799/* Display risk estimate. */ 1800static void 1801display_risk (struct crosstabulation *xt, struct pivot_table *risk, 1802 struct pivot_dimension *risk_statistics) 1803{ 1804 double risk_v[3], lower[3], upper[3], n_valid; 1805 union value c[2]; 1806 if (!calc_risk (xt, risk_v, upper, lower, c, &n_valid)) 1807 return; 1808 1809 size_t *indexes = xnmalloc (risk->n_dimensions, sizeof *indexes); 1810 assert (xt->n_vars == 2); 1811 for (size_t i = 0; i < xt->n_consts; i++) 1812 indexes[i + 2] = xt->const_indexes[i]; 1813 1814 for (int i = 0; i < 3; i++) 1815 { 1816 const struct variable *cv = xt->vars[COL_VAR].var; 1817 const struct variable *rv = xt->vars[ROW_VAR].var; 1818 1819 if (risk_v[i] == SYSMIS) 1820 continue; 1821 1822 struct string label = DS_EMPTY_INITIALIZER; 1823 switch (i) 1824 { 1825 case 0: 1826 ds_put_format (&label, _("Odds Ratio for %s"), var_to_string (rv)); 1827 ds_put_cstr (&label, " ("); 1828 var_append_value_name (rv, &c[0], &label); 1829 ds_put_cstr (&label, " / "); 1830 var_append_value_name (rv, &c[1], &label); 1831 ds_put_cstr (&label, ")"); 1832 break; 1833 case 1: 1834 case 2: 1835 ds_put_format (&label, _("For cohort %s = "), var_to_string (cv)); 1836 var_append_value_name (cv, &xt->vars[ROW_VAR].values[i - 1], &label); 1837 break; 1838 } 1839 1840 indexes[1] = pivot_category_create_leaf ( 1841 risk_statistics->root, 1842 pivot_value_new_user_text_nocopy (ds_steal_cstr (&label))); 1843 1844 double entries[] = { risk_v[i], lower[i], upper[i] }; 1845 for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) 1846 { 1847 indexes[0] = j; 1848 pivot_table_put (risk, indexes, risk->n_dimensions, 1849 pivot_value_new_number (entries[i])); 1850 } 1851 } 1852 indexes[1] = pivot_category_create_leaf ( 1853 risk_statistics->root, 1854 pivot_value_new_text (N_("N of Valid Cases"))); 1855 indexes[0] = 0; 1856 pivot_table_put (risk, indexes, risk->n_dimensions, 1857 pivot_value_new_number (n_valid)); 1858 free (indexes); 1859} 1860 1861static int calc_directional (struct crosstabs_proc *, struct crosstabulation *, 1862 double[N_DIRECTIONAL], double[N_DIRECTIONAL], 1863 double[N_DIRECTIONAL], double[N_DIRECTIONAL]); 1864 1865/* Display directional measures. */ 1866static void 1867display_directional (struct crosstabs_proc *proc, 1868 struct crosstabulation *xt, struct pivot_table *direct) 1869{ 1870 double direct_v[N_DIRECTIONAL]; 1871 double direct_ase[N_DIRECTIONAL]; 1872 double direct_t[N_DIRECTIONAL]; 1873 double sig[N_DIRECTIONAL]; 1874 if (!calc_directional (proc, xt, direct_v, direct_ase, direct_t, sig)) 1875 return; 1876 1877 size_t *indexes = xnmalloc (direct->n_dimensions, sizeof *indexes); 1878 assert (xt->n_vars == 2); 1879 for (size_t i = 0; i < xt->n_consts; i++) 1880 indexes[i + 2] = xt->const_indexes[i]; 1881 1882 for (int i = 0; i < N_DIRECTIONAL; i++) 1883 { 1884 if (direct_v[i] == SYSMIS) 1885 continue; 1886 1887 indexes[1] = i; 1888 1889 double entries[] = { 1890 direct_v[i], direct_ase[i], direct_t[i], sig[i], 1891 }; 1892 for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) 1893 if (entries[j] != SYSMIS) 1894 { 1895 indexes[0] = j; 1896 pivot_table_put (direct, indexes, direct->n_dimensions, 1897 pivot_value_new_number (entries[j])); 1898 } 1899 } 1900 1901 free (indexes); 1902} 1903 1904/* Statistical calculations. */ 1905 1906/* Returns the value of the logarithm of gamma (factorial) function for an integer 1907 argument XT. */ 1908static double 1909log_gamma_int (double xt) 1910{ 1911 double r = 0; 1912 int i; 1913 1914 for (i = 2; i < xt; i++) 1915 r += log(i); 1916 1917 return r; 1918} 1919 1920/* Calculate P_r as specified in _SPSS Statistical Algorithms_, 1921 Appendix 5. */ 1922static inline double 1923Pr (int a, int b, int c, int d) 1924{ 1925 return exp (log_gamma_int (a + b + 1.) - log_gamma_int (a + 1.) 1926 + log_gamma_int (c + d + 1.) - log_gamma_int (b + 1.) 1927 + log_gamma_int (a + c + 1.) - log_gamma_int (c + 1.) 1928 + log_gamma_int (b + d + 1.) - log_gamma_int (d + 1.) 1929 - log_gamma_int (a + b + c + d + 1.)); 1930} 1931 1932/* Swap the contents of A and B. */ 1933static inline void 1934swap (int *a, int *b) 1935{ 1936 int t = *a; 1937 *a = *b; 1938 *b = t; 1939} 1940 1941/* Calculate significance for Fisher's exact test as specified in 1942 _SPSS Statistical Algorithms_, Appendix 5. */ 1943static void 1944calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2) 1945{ 1946 int xt; 1947 double pn1; 1948 1949 if (MIN (c, d) < MIN (a, b)) 1950 swap (&a, &c), swap (&b, &d); 1951 if (MIN (b, d) < MIN (a, c)) 1952 swap (&a, &b), swap (&c, &d); 1953 if (b * c < a * d) 1954 { 1955 if (b < c) 1956 swap (&a, &b), swap (&c, &d); 1957 else 1958 swap (&a, &c), swap (&b, &d); 1959 } 1960 1961 pn1 = Pr (a, b, c, d); 1962 *fisher1 = pn1; 1963 for (xt = 1; xt <= a; xt++) 1964 { 1965 *fisher1 += Pr (a - xt, b + xt, c + xt, d - xt); 1966 } 1967 1968 *fisher2 = *fisher1; 1969 1970 for (xt = 1; xt <= b; xt++) 1971 { 1972 double p = Pr (a + xt, b - xt, c - xt, d + xt); 1973 if (p < pn1) 1974 *fisher2 += p; 1975 } 1976} 1977 1978/* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS 1979 columns with values COLS and N_ROWS rows with values ROWS. Values 1980 in the matrix sum to xt->total. */ 1981static void 1982calc_chisq (struct crosstabulation *xt, 1983 double chisq[N_CHISQ], int df[N_CHISQ], 1984 double *fisher1, double *fisher2) 1985{ 1986 chisq[0] = chisq[1] = 0.; 1987 chisq[2] = chisq[3] = chisq[4] = SYSMIS; 1988 *fisher1 = *fisher2 = SYSMIS; 1989 1990 df[0] = df[1] = (xt->ns_cols - 1) * (xt->ns_rows - 1); 1991 1992 if (xt->ns_rows <= 1 || xt->ns_cols <= 1) 1993 { 1994 chisq[0] = chisq[1] = SYSMIS; 1995 return; 1996 } 1997 1998 size_t n_cols = xt->vars[COL_VAR].n_values; 1999 FOR_EACH_POPULATED_ROW (r, xt) 2000 FOR_EACH_POPULATED_COLUMN (c, xt) 2001 { 2002 const double expected = xt->row_tot[r] * xt->col_tot[c] / xt->total; 2003 const double freq = xt->mat[n_cols * r + c]; 2004 const double residual = freq - expected; 2005 2006 chisq[0] += residual * residual / expected; 2007 if (freq) 2008 chisq[1] += freq * log (expected / freq); 2009 } 2010 2011 if (chisq[0] == 0.) 2012 chisq[0] = SYSMIS; 2013 2014 if (chisq[1] != 0.) 2015 chisq[1] *= -2.; 2016 else 2017 chisq[1] = SYSMIS; 2018 2019 /* Calculate Yates and Fisher exact test. */ 2020 if (xt->ns_cols == 2 && xt->ns_rows == 2) 2021 { 2022 double f11, f12, f21, f22; 2023 2024 { 2025 int nz_cols[2]; 2026 2027 int j = 0; 2028 FOR_EACH_POPULATED_COLUMN (c, xt) 2029 { 2030 nz_cols[j++] = c; 2031 if (j == 2) 2032 break; 2033 } 2034 assert (j == 2); 2035 2036 f11 = xt->mat[nz_cols[0]]; 2037 f12 = xt->mat[nz_cols[1]]; 2038 f21 = xt->mat[nz_cols[0] + n_cols]; 2039 f22 = xt->mat[nz_cols[1] + n_cols]; 2040 } 2041 2042 /* Yates. */ 2043 { 2044 const double xt_ = fabs (f11 * f22 - f12 * f21) - 0.5 * xt->total; 2045 2046 if (xt_ > 0.) 2047 chisq[3] = (xt->total * pow2 (xt_) 2048 / (f11 + f12) / (f21 + f22) 2049 / (f11 + f21) / (f12 + f22)); 2050 else 2051 chisq[3] = 0.; 2052 2053 df[3] = 1.; 2054 } 2055 2056 /* Fisher. */ 2057 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2); 2058 } 2059 2060 /* Calculate Mantel-Haenszel. */ 2061 if (var_is_numeric (xt->vars[ROW_VAR].var) 2062 && var_is_numeric (xt->vars[COL_VAR].var)) 2063 { 2064 double r, ase_0, ase_1; 2065 calc_r (xt, (double *) xt->vars[ROW_VAR].values, 2066 (double *) xt->vars[COL_VAR].values, 2067 &r, &ase_0, &ase_1); 2068 2069 chisq[4] = (xt->total - 1.) * r * r; 2070 df[4] = 1; 2071 } 2072} 2073 2074/* Calculate the value of Pearson's r. r is stored into R, its T value into 2075 T, and standard error into ERROR. The row and column values must be 2076 passed in XT and Y. */ 2077static void 2078calc_r (struct crosstabulation *xt, 2079 double *XT, double *Y, double *r, double *t, double *error) 2080{ 2081 size_t n_rows = xt->vars[ROW_VAR].n_values; 2082 size_t n_cols = xt->vars[COL_VAR].n_values; 2083 double SX, SY, S, T; 2084 double Xbar, Ybar; 2085 double sum_XYf, sum_X2Y2f; 2086 double sum_Xr, sum_X2r; 2087 double sum_Yc, sum_Y2c; 2088 int i, j; 2089 2090 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++) 2091 for (j = 0; j < n_cols; j++) 2092 { 2093 double fij = xt->mat[j + i * n_cols]; 2094 double product = XT[i] * Y[j]; 2095 double temp = fij * product; 2096 sum_XYf += temp; 2097 sum_X2Y2f += temp * product; 2098 } 2099 2100 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++) 2101 { 2102 sum_Xr += XT[i] * xt->row_tot[i]; 2103 sum_X2r += pow2 (XT[i]) * xt->row_tot[i]; 2104 } 2105 Xbar = sum_Xr / xt->total; 2106 2107 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++) 2108 { 2109 sum_Yc += Y[i] * xt->col_tot[i]; 2110 sum_Y2c += Y[i] * Y[i] * xt->col_tot[i]; 2111 } 2112 Ybar = sum_Yc / xt->total; 2113 2114 S = sum_XYf - sum_Xr * sum_Yc / xt->total; 2115 SX = sum_X2r - pow2 (sum_Xr) / xt->total; 2116 SY = sum_Y2c - pow2 (sum_Yc) / xt->total; 2117 T = sqrt (SX * SY); 2118 *r = S / T; 2119 *t = *r / sqrt (1 - pow2 (*r)) * sqrt (xt->total - 2); 2120 2121 { 2122 double s, c, y, t; 2123 2124 for (s = c = 0., i = 0; i < n_rows; i++) 2125 for (j = 0; j < n_cols; j++) 2126 { 2127 double Xresid, Yresid; 2128 double temp; 2129 2130 Xresid = XT[i] - Xbar; 2131 Yresid = Y[j] - Ybar; 2132 temp = (T * Xresid * Yresid 2133 - ((S / (2. * T)) 2134 * (Xresid * Xresid * SY + Yresid * Yresid * SX))); 2135 y = xt->mat[j + i * n_cols] * temp * temp - c; 2136 t = s + y; 2137 c = (t - s) - y; 2138 s = t; 2139 } 2140 *error = sqrt (s) / (T * T); 2141 } 2142} 2143 2144/* Calculate symmetric statistics and their asymptotic standard 2145 errors. Returns 0 if none could be calculated. */ 2146static int 2147calc_symmetric (struct crosstabs_proc *proc, struct crosstabulation *xt, 2148 double v[N_SYMMETRIC], double ase[N_SYMMETRIC], 2149 double t[N_SYMMETRIC], 2150 double somers_d_v[3], double somers_d_ase[3], 2151 double somers_d_t[3]) 2152{ 2153 size_t n_rows = xt->vars[ROW_VAR].n_values; 2154 size_t n_cols = xt->vars[COL_VAR].n_values; 2155 int q, i; 2156 2157 q = MIN (xt->ns_rows, xt->ns_cols); 2158 if (q <= 1) 2159 return 0; 2160 2161 for (i = 0; i < N_SYMMETRIC; i++) 2162 v[i] = ase[i] = t[i] = SYSMIS; 2163 2164 /* Phi, Cramer's V, contingency coefficient. */ 2165 if (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC))) 2166 { 2167 double Xp = 0.; /* Pearson chi-square. */ 2168 2169 FOR_EACH_POPULATED_ROW (r, xt) 2170 FOR_EACH_POPULATED_COLUMN (c, xt) 2171 { 2172 double expected = xt->row_tot[r] * xt->col_tot[c] / xt->total; 2173 double freq = xt->mat[n_cols * r + c]; 2174 double residual = freq - expected; 2175 2176 Xp += residual * residual / expected; 2177 } 2178 2179 if (proc->statistics & (1u << CRS_ST_PHI)) 2180 { 2181 v[0] = sqrt (Xp / xt->total); 2182 v[1] = sqrt (Xp / (xt->total * (q - 1))); 2183 } 2184 if (proc->statistics & (1u << CRS_ST_CC)) 2185 v[2] = sqrt (Xp / (Xp + xt->total)); 2186 } 2187 2188 if (proc->statistics & ((1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU) 2189 | (1u << CRS_ST_GAMMA) | (1u << CRS_ST_D))) 2190 { 2191 double *cum; 2192 double Dr, Dc; 2193 double P, Q; 2194 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum; 2195 double btau_var; 2196 int r, c; 2197 2198 Dr = Dc = pow2 (xt->total); 2199 for (r = 0; r < n_rows; r++) 2200 Dr -= pow2 (xt->row_tot[r]); 2201 for (c = 0; c < n_cols; c++) 2202 Dc -= pow2 (xt->col_tot[c]); 2203 2204 cum = xnmalloc (n_cols * n_rows, sizeof *cum); 2205 for (c = 0; c < n_cols; c++) 2206 { 2207 double ct = 0.; 2208 2209 for (r = 0; r < n_rows; r++) 2210 cum[c + r * n_cols] = ct += xt->mat[c + r * n_cols]; 2211 } 2212 2213 /* P and Q. */ 2214 { 2215 int i, j; 2216 double Cij, Dij; 2217 2218 P = Q = 0.; 2219 for (i = 0; i < n_rows; i++) 2220 { 2221 Cij = Dij = 0.; 2222 2223 for (j = 1; j < n_cols; j++) 2224 Cij += xt->col_tot[j] - cum[j + i * n_cols]; 2225 2226 if (i > 0) 2227 for (j = 1; j < n_cols; j++) 2228 Dij += cum[j + (i - 1) * n_cols]; 2229 2230 for (j = 0;;) 2231 { 2232 double fij = xt->mat[j + i * n_cols]; 2233 P += fij * Cij; 2234 Q += fij * Dij; 2235 2236 if (++j == n_cols) 2237 break; 2238 assert (j < n_cols); 2239 2240 Cij -= xt->col_tot[j] - cum[j + i * n_cols]; 2241 Dij += xt->col_tot[j - 1] - cum[j - 1 + i * n_cols]; 2242 2243 if (i > 0) 2244 { 2245 Cij += cum[j - 1 + (i - 1) * n_cols]; 2246 Dij -= cum[j + (i - 1) * n_cols]; 2247 } 2248 } 2249 } 2250 } 2251 2252 if (proc->statistics & (1u << CRS_ST_BTAU)) 2253 v[3] = (P - Q) / sqrt (Dr * Dc); 2254 if (proc->statistics & (1u << CRS_ST_CTAU)) 2255 v[4] = (q * (P - Q)) / (pow2 (xt->total) * (q - 1)); 2256 if (proc->statistics & (1u << CRS_ST_GAMMA)) 2257 v[5] = (P - Q) / (P + Q); 2258 2259 /* ASE for tau-b, tau-c, gamma. Calculations could be 2260 eliminated here, at expense of memory. */ 2261 { 2262 int i, j; 2263 double Cij, Dij; 2264 2265 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.; 2266 for (i = 0; i < n_rows; i++) 2267 { 2268 Cij = Dij = 0.; 2269 2270 for (j = 1; j < n_cols; j++) 2271 Cij += xt->col_tot[j] - cum[j + i * n_cols]; 2272 2273 if (i > 0) 2274 for (j = 1; j < n_cols; j++) 2275 Dij += cum[j + (i - 1) * n_cols]; 2276 2277 for (j = 0;;) 2278 { 2279 double fij = xt->mat[j + i * n_cols]; 2280 2281 if (proc->statistics & (1u << CRS_ST_BTAU)) 2282 { 2283 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij) 2284 + v[3] * (xt->row_tot[i] * Dc 2285 + xt->col_tot[j] * Dr)); 2286 btau_cum += fij * temp * temp; 2287 } 2288 2289 { 2290 const double temp = Cij - Dij; 2291 ctau_cum += fij * temp * temp; 2292 } 2293 2294 if (proc->statistics & (1u << CRS_ST_GAMMA)) 2295 { 2296 const double temp = Q * Cij - P * Dij; 2297 gamma_cum += fij * temp * temp; 2298 } 2299 2300 if (proc->statistics & (1u << CRS_ST_D)) 2301 { 2302 d_yx_cum += fij * pow2 (Dr * (Cij - Dij) 2303 - (P - Q) * (xt->total - xt->row_tot[i])); 2304 d_xy_cum += fij * pow2 (Dc * (Dij - Cij) 2305 - (Q - P) * (xt->total - xt->col_tot[j])); 2306 } 2307 2308 if (++j == n_cols) 2309 break; 2310 assert (j < n_cols); 2311 2312 Cij -= xt->col_tot[j] - cum[j + i * n_cols]; 2313 Dij += xt->col_tot[j - 1] - cum[j - 1 + i * n_cols]; 2314 2315 if (i > 0) 2316 { 2317 Cij += cum[j - 1 + (i - 1) * n_cols]; 2318 Dij -= cum[j + (i - 1) * n_cols]; 2319 } 2320 } 2321 } 2322 } 2323 2324 btau_var = ((btau_cum 2325 - (xt->total * pow2 (xt->total * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc)))) 2326 / pow2 (Dr * Dc)); 2327 if (proc->statistics & (1u << CRS_ST_BTAU)) 2328 { 2329 ase[3] = sqrt (btau_var); 2330 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / xt->total) 2331 / (Dr * Dc))); 2332 } 2333 if (proc->statistics & (1u << CRS_ST_CTAU)) 2334 { 2335 ase[4] = ((2 * q / ((q - 1) * pow2 (xt->total))) 2336 * sqrt (ctau_cum - (P - Q) * (P - Q) / xt->total)); 2337 t[4] = v[4] / ase[4]; 2338 } 2339 if (proc->statistics & (1u << CRS_ST_GAMMA)) 2340 { 2341 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum)); 2342 t[5] = v[5] / (2. / (P + Q) 2343 * sqrt (ctau_cum - (P - Q) * (P - Q) / xt->total)); 2344 } 2345 if (proc->statistics & (1u << CRS_ST_D)) 2346 { 2347 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr)); 2348 somers_d_ase[0] = SYSMIS; 2349 somers_d_t[0] = (somers_d_v[0] 2350 / (4 / (Dc + Dr) 2351 * sqrt (ctau_cum - pow2 (P - Q) / xt->total))); 2352 somers_d_v[1] = (P - Q) / Dc; 2353 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum); 2354 somers_d_t[1] = (somers_d_v[1] 2355 / (2. / Dc 2356 * sqrt (ctau_cum - pow2 (P - Q) / xt->total))); 2357 somers_d_v[2] = (P - Q) / Dr; 2358 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum); 2359 somers_d_t[2] = (somers_d_v[2] 2360 / (2. / Dr 2361 * sqrt (ctau_cum - pow2 (P - Q) / xt->total))); 2362 } 2363 2364 free (cum); 2365 } 2366 2367 /* Spearman correlation, Pearson's r. */ 2368 if (proc->statistics & (1u << CRS_ST_CORR)) 2369 { 2370 double *R = xmalloc (sizeof *R * n_rows); 2371 double *C = xmalloc (sizeof *C * n_cols); 2372 2373 { 2374 double y, t, c = 0., s = 0.; 2375 int i = 0; 2376 2377 for (;;) 2378 { 2379 R[i] = s + (xt->row_tot[i] + 1.) / 2.; 2380 y = xt->row_tot[i] - c; 2381 t = s + y; 2382 c = (t - s) - y; 2383 s = t; 2384 if (++i == n_rows) 2385 break; 2386 assert (i < n_rows); 2387 } 2388 } 2389 2390 { 2391 double y, t, c = 0., s = 0.; 2392 int j = 0; 2393 2394 for (;;) 2395 { 2396 C[j] = s + (xt->col_tot[j] + 1.) / 2; 2397 y = xt->col_tot[j] - c; 2398 t = s + y; 2399 c = (t - s) - y; 2400 s = t; 2401 if (++j == n_cols) 2402 break; 2403 assert (j < n_cols); 2404 } 2405 } 2406 2407 calc_r (xt, R, C, &v[6], &t[6], &ase[6]); 2408 2409 free (R); 2410 free (C); 2411 2412 calc_r (xt, (double *) xt->vars[ROW_VAR].values, 2413 (double *) xt->vars[COL_VAR].values, 2414 &v[7], &t[7], &ase[7]); 2415 } 2416 2417 /* Cohen's kappa. */ 2418 if (proc->statistics & (1u << CRS_ST_KAPPA) && xt->ns_rows == xt->ns_cols) 2419 { 2420 double ase_under_h0; 2421 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci; 2422 int i, j; 2423 2424 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0; 2425 i < xt->ns_rows; i++, j++) 2426 { 2427 double prod, sum; 2428 2429 while (xt->col_tot[j] == 0.) 2430 j++; 2431 2432 prod = xt->row_tot[i] * xt->col_tot[j]; 2433 sum = xt->row_tot[i] + xt->col_tot[j]; 2434 2435 sum_fii += xt->mat[j + i * n_cols]; 2436 sum_rici += prod; 2437 sum_fiiri_ci += xt->mat[j + i * n_cols] * sum; 2438 sum_riciri_ci += prod * sum; 2439 } 2440 for (sum_fijri_ci2 = 0., i = 0; i < xt->ns_rows; i++) 2441 for (j = 0; j < xt->ns_cols; j++) 2442 { 2443 double sum = xt->row_tot[i] + xt->col_tot[j]; 2444 sum_fijri_ci2 += xt->mat[j + i * n_cols] * sum * sum; 2445 } 2446 2447 v[8] = (xt->total * sum_fii - sum_rici) / (pow2 (xt->total) - sum_rici); 2448 2449 ase_under_h0 = sqrt ((pow2 (xt->total) * sum_rici 2450 + sum_rici * sum_rici 2451 - xt->total * sum_riciri_ci) 2452 / (xt->total * (pow2 (xt->total) - sum_rici) * (pow2 (xt->total) - sum_rici))); 2453 2454 ase[8] = sqrt (xt->total * (((sum_fii * (xt->total - sum_fii)) 2455 / pow2 (pow2 (xt->total) - sum_rici)) 2456 + ((2. * (xt->total - sum_fii) 2457 * (2. * sum_fii * sum_rici 2458 - xt->total * sum_fiiri_ci)) 2459 / pow3 (pow2 (xt->total) - sum_rici)) 2460 + (pow2 (xt->total - sum_fii) 2461 * (xt->total * sum_fijri_ci2 - 4. 2462 * sum_rici * sum_rici) 2463 / pow4 (pow2 (xt->total) - sum_rici)))); 2464 2465 t[8] = v[8] / ase_under_h0; 2466 } 2467 2468 return 1; 2469} 2470 2471/* Calculate risk estimate. */ 2472static bool 2473calc_risk (struct crosstabulation *xt, 2474 double *value, double *upper, double *lower, union value *c, 2475 double *n_valid) 2476{ 2477 size_t n_cols = xt->vars[COL_VAR].n_values; 2478 double f11, f12, f21, f22; 2479 double v; 2480 2481 for (int i = 0; i < 3; i++) 2482 value[i] = upper[i] = lower[i] = SYSMIS; 2483 2484 if (xt->ns_rows != 2 || xt->ns_cols != 2) 2485 return false; 2486 2487 { 2488 /* Find populated columns. */ 2489 int nz_cols[2]; 2490 int n = 0; 2491 FOR_EACH_POPULATED_COLUMN (c, xt) 2492 nz_cols[n++] = c; 2493 assert (n == 2); 2494 2495 /* Find populated rows. */ 2496 int nz_rows[2]; 2497 n = 0; 2498 FOR_EACH_POPULATED_ROW (r, xt) 2499 nz_rows[n++] = r; 2500 assert (n == 2); 2501 2502 f11 = xt->mat[nz_cols[0] + n_cols * nz_rows[0]]; 2503 f12 = xt->mat[nz_cols[1] + n_cols * nz_rows[0]]; 2504 f21 = xt->mat[nz_cols[0] + n_cols * nz_rows[1]]; 2505 f22 = xt->mat[nz_cols[1] + n_cols * nz_rows[1]]; 2506 *n_valid = f11 + f12 + f21 + f22; 2507 2508 c[0] = xt->vars[COL_VAR].values[nz_cols[0]]; 2509 c[1] = xt->vars[COL_VAR].values[nz_cols[1]]; 2510 } 2511 2512 value[0] = (f11 * f22) / (f12 * f21); 2513 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22); 2514 lower[0] = value[0] * exp (-1.960 * v); 2515 upper[0] = value[0] * exp (1.960 * v); 2516 2517 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12)); 2518 v = sqrt ((f12 / (f11 * (f11 + f12))) 2519 + (f22 / (f21 * (f21 + f22)))); 2520 lower[1] = value[1] * exp (-1.960 * v); 2521 upper[1] = value[1] * exp (1.960 * v); 2522 2523 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12)); 2524 v = sqrt ((f11 / (f12 * (f11 + f12))) 2525 + (f21 / (f22 * (f21 + f22)))); 2526 lower[2] = value[2] * exp (-1.960 * v); 2527 upper[2] = value[2] * exp (1.960 * v); 2528 2529 return true; 2530} 2531 2532/* Calculate directional measures. */ 2533static int 2534calc_directional (struct crosstabs_proc *proc, struct crosstabulation *xt, 2535 double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], 2536 double t[N_DIRECTIONAL], double sig[N_DIRECTIONAL]) 2537{ 2538 size_t n_rows = xt->vars[ROW_VAR].n_values; 2539 size_t n_cols = xt->vars[COL_VAR].n_values; 2540 for (int i = 0; i < N_DIRECTIONAL; i++) 2541 v[i] = ase[i] = t[i] = sig[i] = SYSMIS; 2542 2543 /* Lambda. */ 2544 if (proc->statistics & (1u << CRS_ST_LAMBDA)) 2545 { 2546 /* Find maximum for each row and their sum. */ 2547 double *fim = xnmalloc (n_rows, sizeof *fim); 2548 int *fim_index = xnmalloc (n_rows, sizeof *fim_index); 2549 double sum_fim = 0.0; 2550 for (int i = 0; i < n_rows; i++) 2551 { 2552 double max = xt->mat[i * n_cols]; 2553 int index = 0; 2554 2555 for (int j = 1; j < n_cols; j++) 2556 if (xt->mat[j + i * n_cols] > max) 2557 { 2558 max = xt->mat[j + i * n_cols]; 2559 index = j; 2560 } 2561 2562 fim[i] = max; 2563 sum_fim += max; 2564 fim_index[i] = index; 2565 } 2566 2567 /* Find maximum for each column. */ 2568 double *fmj = xnmalloc (n_cols, sizeof *fmj); 2569 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index); 2570 double sum_fmj = 0.0; 2571 for (int j = 0; j < n_cols; j++) 2572 { 2573 double max = xt->mat[j]; 2574 int index = 0; 2575 2576 for (int i = 1; i < n_rows; i++) 2577 if (xt->mat[j + i * n_cols] > max) 2578 { 2579 max = xt->mat[j + i * n_cols]; 2580 index = i; 2581 } 2582 2583 fmj[j] = max; 2584 sum_fmj += max; 2585 fmj_index[j] = index; 2586 } 2587 2588 /* Find maximum row total. */ 2589 double rm = xt->row_tot[0]; 2590 int rm_index = 0; 2591 for (int i = 1; i < n_rows; i++) 2592 if (xt->row_tot[i] > rm) 2593 { 2594 rm = xt->row_tot[i]; 2595 rm_index = i; 2596 } 2597 2598 /* Find maximum column total. */ 2599 double cm = xt->col_tot[0]; 2600 int cm_index = 0; 2601 for (int j = 1; j < n_cols; j++) 2602 if (xt->col_tot[j] > cm) 2603 { 2604 cm = xt->col_tot[j]; 2605 cm_index = j; 2606 } 2607 2608 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * xt->total - rm - cm); 2609 v[1] = (sum_fmj - rm) / (xt->total - rm); 2610 v[2] = (sum_fim - cm) / (xt->total - cm); 2611 2612 /* ASE1 for Y given XT. */ 2613 { 2614 double accum = 0.0; 2615 for (int i = 0; i < n_rows; i++) 2616 if (cm_index == fim_index[i]) 2617 accum += fim[i]; 2618 ase[2] = sqrt ((xt->total - sum_fim) * (sum_fim + cm - 2. * accum) 2619 / pow3 (xt->total - cm)); 2620 } 2621 2622 /* ASE0 for Y given XT. */ 2623 { 2624 double accum = 0.0; 2625 for (int i = 0; i < n_rows; i++) 2626 if (cm_index != fim_index[i]) 2627 accum += (xt->mat[i * n_cols + fim_index[i]] 2628 + xt->mat[i * n_cols + cm_index]); 2629 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / xt->total) / (xt->total - cm)); 2630 } 2631 2632 /* ASE1 for XT given Y. */ 2633 { 2634 double accum = 0.0; 2635 for (int j = 0; j < n_cols; j++) 2636 if (rm_index == fmj_index[j]) 2637 accum += fmj[j]; 2638 ase[1] = sqrt ((xt->total - sum_fmj) * (sum_fmj + rm - 2. * accum) 2639 / pow3 (xt->total - rm)); 2640 } 2641 2642 /* ASE0 for XT given Y. */ 2643 { 2644 double accum = 0.0; 2645 for (int j = 0; j < n_cols; j++) 2646 if (rm_index != fmj_index[j]) 2647 accum += (xt->mat[j + n_cols * fmj_index[j]] 2648 + xt->mat[j + n_cols * rm_index]); 2649 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / xt->total) / (xt->total - rm)); 2650 } 2651 2652 /* Symmetric ASE0 and ASE1. */ 2653 { 2654 double accum0 = 0.0; 2655 double accum1 = 0.0; 2656 for (int i = 0; i < n_rows; i++) 2657 for (int j = 0; j < n_cols; j++) 2658 { 2659 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j); 2660 int temp1 = (i == rm_index) + (j == cm_index); 2661 accum0 += xt->mat[j + i * n_cols] * pow2 (temp0 - temp1); 2662 accum1 += (xt->mat[j + i * n_cols] 2663 * pow2 (temp0 + (v[0] - 1.) * temp1)); 2664 } 2665 ase[0] = sqrt (accum1 - 4. * xt->total * v[0] * v[0]) / (2. * xt->total - rm - cm); 2666 t[0] = v[0] / (sqrt (accum0 - pow2 (sum_fim + sum_fmj - cm - rm) / xt->total) 2667 / (2. * xt->total - rm - cm)); 2668 } 2669 2670 for (int i = 0; i < 3; i++) 2671 sig[i] = 2 * gsl_cdf_ugaussian_Q (t[i]); 2672 2673 free (fim); 2674 free (fim_index); 2675 free (fmj); 2676 free (fmj_index); 2677 2678 /* Tau. */ 2679 { 2680 double sum_fij2_ri = 0.0; 2681 double sum_fij2_ci = 0.0; 2682 FOR_EACH_POPULATED_ROW (i, xt) 2683 FOR_EACH_POPULATED_COLUMN (j, xt) 2684 { 2685 double temp = pow2 (xt->mat[j + i * n_cols]); 2686 sum_fij2_ri += temp / xt->row_tot[i]; 2687 sum_fij2_ci += temp / xt->col_tot[j]; 2688 } 2689 2690 double sum_ri2 = 0.0; 2691 for (int i = 0; i < n_rows; i++) 2692 sum_ri2 += pow2 (xt->row_tot[i]); 2693 2694 double sum_cj2 = 0.0; 2695 for (int j = 0; j < n_cols; j++) 2696 sum_cj2 += pow2 (xt->col_tot[j]); 2697 2698 v[3] = (xt->total * sum_fij2_ci - sum_ri2) / (pow2 (xt->total) - sum_ri2); 2699 v[4] = (xt->total * sum_fij2_ri - sum_cj2) / (pow2 (xt->total) - sum_cj2); 2700 } 2701 } 2702 2703 if (proc->statistics & (1u << CRS_ST_UC)) 2704 { 2705 double UX = 0.0; 2706 FOR_EACH_POPULATED_ROW (i, xt) 2707 UX -= xt->row_tot[i] / xt->total * log (xt->row_tot[i] / xt->total); 2708 2709 double UY = 0.0; 2710 FOR_EACH_POPULATED_COLUMN (j, xt) 2711 UY -= xt->col_tot[j] / xt->total * log (xt->col_tot[j] / xt->total); 2712 2713 double UXY = 0.0; 2714 double P = 0.0; 2715 for (int i = 0; i < n_rows; i++) 2716 for (int j = 0; j < n_cols; j++) 2717 { 2718 double entry = xt->mat[j + i * n_cols]; 2719 2720 if (entry <= 0.) 2721 continue; 2722 2723 P += entry * pow2 (log (xt->col_tot[j] * xt->row_tot[i] / (xt->total * entry))); 2724 UXY -= entry / xt->total * log (entry / xt->total); 2725 } 2726 2727 double ase1_yx = 0.0; 2728 double ase1_xy = 0.0; 2729 double ase1_sym = 0.0; 2730 for (int i = 0; i < n_rows; i++) 2731 for (int j = 0; j < n_cols; j++) 2732 { 2733 double entry = xt->mat[j + i * n_cols]; 2734 2735 if (entry <= 0.) 2736 continue; 2737 2738 ase1_yx += entry * pow2 (UY * log (entry / xt->row_tot[i]) 2739 + (UX - UXY) * log (xt->col_tot[j] / xt->total)); 2740 ase1_xy += entry * pow2 (UX * log (entry / xt->col_tot[j]) 2741 + (UY - UXY) * log (xt->row_tot[i] / xt->total)); 2742 ase1_sym += entry * pow2 ((UXY 2743 * log (xt->row_tot[i] * xt->col_tot[j] / pow2 (xt->total))) 2744 - (UX + UY) * log (entry / xt->total)); 2745 } 2746 2747 v[5] = 2. * ((UX + UY - UXY) / (UX + UY)); 2748 ase[5] = (2. / (xt->total * pow2 (UX + UY))) * sqrt (ase1_sym); 2749 t[5] = SYSMIS; 2750 2751 v[6] = (UX + UY - UXY) / UX; 2752 ase[6] = sqrt (ase1_xy) / (xt->total * UX * UX); 2753 t[6] = v[6] / (sqrt (P - xt->total * pow2 (UX + UY - UXY)) / (xt->total * UX)); 2754 2755 v[7] = (UX + UY - UXY) / UY; 2756 ase[7] = sqrt (ase1_yx) / (xt->total * UY * UY); 2757 t[7] = v[7] / (sqrt (P - xt->total * pow2 (UX + UY - UXY)) / (xt->total * UY)); 2758 } 2759 2760 /* Somers' D. */ 2761 if (proc->statistics & (1u << CRS_ST_D)) 2762 { 2763 double v_dummy[N_SYMMETRIC]; 2764 double ase_dummy[N_SYMMETRIC]; 2765 double t_dummy[N_SYMMETRIC]; 2766 double somers_d_v[3]; 2767 double somers_d_ase[3]; 2768 double somers_d_t[3]; 2769 2770 if (calc_symmetric (proc, xt, v_dummy, ase_dummy, t_dummy, 2771 somers_d_v, somers_d_ase, somers_d_t)) 2772 { 2773 for (int i = 0; i < 3; i++) 2774 { 2775 v[8 + i] = somers_d_v[i]; 2776 ase[8 + i] = somers_d_ase[i]; 2777 t[8 + i] = somers_d_t[i]; 2778 sig[8 + i] = 2 * gsl_cdf_ugaussian_Q (fabs (somers_d_t[i])); 2779 } 2780 } 2781 } 2782 2783 /* Eta. */ 2784 if (proc->statistics & (1u << CRS_ST_ETA)) 2785 { 2786 /* X dependent. */ 2787 double sum_Xr = 0.0; 2788 double sum_X2r = 0.0; 2789 for (int i = 0; i < n_rows; i++) 2790 { 2791 sum_Xr += xt->vars[ROW_VAR].values[i].f * xt->row_tot[i]; 2792 sum_X2r += pow2 (xt->vars[ROW_VAR].values[i].f) * xt->row_tot[i]; 2793 } 2794 double SX = sum_X2r - pow2 (sum_Xr) / xt->total; 2795 2796 double SXW = 0.0; 2797 FOR_EACH_POPULATED_COLUMN (j, xt) 2798 { 2799 double cum = 0.0; 2800 2801 for (int i = 0; i < n_rows; i++) 2802 { 2803 SXW += (pow2 (xt->vars[ROW_VAR].values[i].f) 2804 * xt->mat[j + i * n_cols]); 2805 cum += (xt->vars[ROW_VAR].values[i].f 2806 * xt->mat[j + i * n_cols]); 2807 } 2808 2809 SXW -= cum * cum / xt->col_tot[j]; 2810 } 2811 v[11] = sqrt (1. - SXW / SX); 2812 2813 /* Y dependent. */ 2814 double sum_Yc = 0.0; 2815 double sum_Y2c = 0.0; 2816 for (int i = 0; i < n_cols; i++) 2817 { 2818 sum_Yc += xt->vars[COL_VAR].values[i].f * xt->col_tot[i]; 2819 sum_Y2c += pow2 (xt->vars[COL_VAR].values[i].f) * xt->col_tot[i]; 2820 } 2821 double SY = sum_Y2c - pow2 (sum_Yc) / xt->total; 2822 2823 double SYW = 0.0; 2824 FOR_EACH_POPULATED_ROW (i, xt) 2825 { 2826 double cum = 0.0; 2827 for (int j = 0; j < n_cols; j++) 2828 { 2829 SYW += (pow2 (xt->vars[COL_VAR].values[j].f) 2830 * xt->mat[j + i * n_cols]); 2831 cum += (xt->vars[COL_VAR].values[j].f 2832 * xt->mat[j + i * n_cols]); 2833 } 2834 2835 SYW -= cum * cum / xt->row_tot[i]; 2836 } 2837 v[12] = sqrt (1. - SYW / SY); 2838 } 2839 2840 return 1; 2841} 2842 2843/* 2844 Local Variables: 2845 mode: c 2846 End: 2847*/ 2848