1/* PSPP - a program for statistical analysis.
2   Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2016 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16
17/* FIXME:
18
19   - How to calculate significance of some symmetric and directional measures?
20   - How to calculate ASE for symmetric Somers ' d?
21   - How to calculate ASE for Goodman and Kruskal's tau?
22   - How to calculate approx. T of symmetric uncertainty coefficient?
23
24*/
25
26#include <config.h>
27
28#include <ctype.h>
29#include <float.h>
30#include <gsl/gsl_cdf.h>
31#include <stdlib.h>
32#include <stdio.h>
33
34#include "data/case.h"
35#include "data/casegrouper.h"
36#include "data/casereader.h"
37#include "data/data-out.h"
38#include "data/dataset.h"
39#include "data/dictionary.h"
40#include "data/format.h"
41#include "data/value-labels.h"
42#include "data/variable.h"
43#include "language/command.h"
44#include "language/stats/freq.h"
45#include "language/dictionary/split-file.h"
46#include "language/lexer/lexer.h"
47#include "language/lexer/variable-parser.h"
48#include "libpspp/array.h"
49#include "libpspp/assertion.h"
50#include "libpspp/compiler.h"
51#include "libpspp/hash-functions.h"
52#include "libpspp/hmap.h"
53#include "libpspp/hmapx.h"
54#include "libpspp/message.h"
55#include "libpspp/misc.h"
56#include "libpspp/pool.h"
57#include "libpspp/str.h"
58#include "output/pivot-table.h"
59#include "output/chart-item.h"
60#include "output/charts/barchart.h"
61
62#include "gl/minmax.h"
63#include "gl/xalloc.h"
64#include "gl/xsize.h"
65
66#include "gettext.h"
67#define _(msgid) gettext (msgid)
68#define N_(msgid) msgid
69
70/* (headers) */
71
72/* (specification)
73   crosstabs (crs_):
74     *^tables=custom;
75     +variables=custom;
76     missing=miss:!table/include/report;
77     count=roundwhat:asis/case/!cell,
78           roundhow:!round/truncate;
79     +write[wr_]=none,cells,all;
80     +format=val:!avalue/dvalue,
81	     indx:!noindex/index,
82	     tabl:!tables/notables,
83	     box:!box/nobox,
84	     pivot:!pivot/nopivot;
85     +barchart=;
86     +cells[cl_]=count,expected,row,column,total,residual,sresidual,
87		 asresidual,all,none;
88     +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
89		      kappa,eta,corr,all.
90*/
91/* (declarations) */
92/* (functions) */
93
94/* Number of chi-square statistics. */
95#define N_CHISQ 5
96
97/* Number of symmetric statistics. */
98#define N_SYMMETRIC 9
99
100/* Number of directional statistics. */
101#define N_DIRECTIONAL 13
102
103
104/* Indexes into the 'vars' member of struct crosstabulation and
105   struct crosstab member. */
106enum
107  {
108    ROW_VAR = 0,                /* Row variable. */
109    COL_VAR = 1                 /* Column variable. */
110    /* Higher indexes cause multiple tables to be output. */
111  };
112
113struct xtab_var
114  {
115    const struct variable *var;
116    union value *values;
117    size_t n_values;
118  };
119
120/* A crosstabulation of 2 or more variables. */
121struct crosstabulation
122  {
123    struct crosstabs_proc *proc;
124    struct fmt_spec weight_format; /* Format for weight variable. */
125    double missing;             /* Weight of missing cases. */
126
127    /* Variables (2 or more). */
128    int n_vars;
129    struct xtab_var *vars;
130
131    /* Constants (0 or more). */
132    int n_consts;
133    struct xtab_var *const_vars;
134    size_t *const_indexes;
135
136    /* Data. */
137    struct hmap data;
138    struct freq **entries;
139    size_t n_entries;
140
141    /* Number of statistically interesting columns/rows
142       (columns/rows with data in them). */
143    int ns_cols, ns_rows;
144
145    /* Matrix contents. */
146    double *mat;		/* Matrix proper. */
147    double *row_tot;		/* Row totals. */
148    double *col_tot;		/* Column totals. */
149    double total;		/* Grand total. */
150  };
151
152/* Integer mode variable info. */
153struct var_range
154  {
155    struct hmap_node hmap_node; /* In struct crosstabs_proc var_ranges map. */
156    const struct variable *var; /* The variable. */
157    int min;			/* Minimum value. */
158    int max;			/* Maximum value + 1. */
159    int count;			/* max - min. */
160  };
161
162struct crosstabs_proc
163  {
164    const struct dictionary *dict;
165    enum { INTEGER, GENERAL } mode;
166    enum mv_class exclude;
167    bool pivot;
168    bool barchart;
169    bool bad_warn;
170    struct fmt_spec weight_format;
171
172    /* Variables specifies on VARIABLES. */
173    const struct variable **variables;
174    size_t n_variables;
175    struct hmap var_ranges;
176
177    /* TABLES. */
178    struct crosstabulation *pivots;
179    int n_pivots;
180
181    /* CELLS. */
182    int n_cells;		/* Number of cells requested. */
183    unsigned int cells;         /* Bit k is 1 if cell k is requested. */
184    int a_cells[CRS_CL_count];  /* 0...n_cells-1 are the requested cells. */
185
186    /* Rounding of cells. */
187    bool round_case_weights;    /* Round case weights? */
188    bool round_cells;           /* If !round_case_weights, round cells? */
189    bool round_down;            /* Round down? (otherwise to nearest) */
190
191    /* STATISTICS. */
192    unsigned int statistics;    /* Bit k is 1 if statistic k is requested. */
193
194    bool descending;            /* True if descending sort order is requested. */
195  };
196
197const struct var_range *get_var_range (const struct crosstabs_proc *,
198                                       const struct variable *);
199
200static bool should_tabulate_case (const struct crosstabulation *,
201                                  const struct ccase *, enum mv_class exclude);
202static void tabulate_general_case (struct crosstabulation *, const struct ccase *,
203                                   double weight);
204static void tabulate_integer_case (struct crosstabulation *, const struct ccase *,
205                                   double weight);
206static void postcalc (struct crosstabs_proc *);
207
208static double
209round_weight (const struct crosstabs_proc *proc, double weight)
210{
211  return proc->round_down ? floor (weight) : floor (weight + 0.5);
212}
213
214#define FOR_EACH_POPULATED_COLUMN(C, XT) \
215  for (int C = next_populated_column (0, XT); \
216       C < (XT)->vars[COL_VAR].n_values;      \
217       C = next_populated_column (C + 1, XT))
218static int
219next_populated_column (int c, const struct crosstabulation *xt)
220{
221  int n_columns = xt->vars[COL_VAR].n_values;
222  for (; c < n_columns; c++)
223    if (xt->col_tot[c])
224      break;
225  return c;
226}
227
228#define FOR_EACH_POPULATED_ROW(R, XT) \
229  for (int R = next_populated_row (0, XT); R < (XT)->vars[ROW_VAR].n_values; \
230       R = next_populated_row (R + 1, XT))
231static int
232next_populated_row (int r, const struct crosstabulation *xt)
233{
234  int n_rows = xt->vars[ROW_VAR].n_values;
235  for (; r < n_rows; r++)
236    if (xt->row_tot[r])
237      break;
238  return r;
239}
240
241/* Parses and executes the CROSSTABS procedure. */
242int
243cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
244{
245  struct var_range *range, *next_range;
246  struct crosstabs_proc proc;
247  struct casegrouper *grouper;
248  struct casereader *input, *group;
249  struct cmd_crosstabs cmd;
250  struct crosstabulation *xt;
251  int result;
252  bool ok;
253  int i;
254
255  proc.dict = dataset_dict (ds);
256  proc.bad_warn = true;
257  proc.variables = NULL;
258  proc.n_variables = 0;
259  hmap_init (&proc.var_ranges);
260  proc.pivots = NULL;
261  proc.n_pivots = 0;
262  proc.descending = false;
263  proc.weight_format = *dict_get_weight_format (dataset_dict (ds));
264
265  if (!parse_crosstabs (lexer, ds, &cmd, &proc))
266    {
267      result = CMD_FAILURE;
268      goto exit;
269    }
270
271  proc.mode = proc.n_variables ? INTEGER : GENERAL;
272  proc.barchart = cmd.sbc_barchart > 0;
273
274  proc.descending = cmd.val == CRS_DVALUE;
275
276  proc.round_case_weights = cmd.sbc_count && cmd.roundwhat == CRS_CASE;
277  proc.round_cells = cmd.sbc_count && cmd.roundwhat == CRS_CELL;
278  proc.round_down = cmd.roundhow == CRS_TRUNCATE;
279
280  /* CELLS. */
281  if (!cmd.sbc_cells)
282    proc.cells = 1u << CRS_CL_COUNT;
283  else if (cmd.a_cells[CRS_CL_ALL])
284    proc.cells = UINT_MAX;
285  else
286    {
287      proc.cells = 0;
288      for (i = 0; i < CRS_CL_count; i++)
289	if (cmd.a_cells[i])
290	  proc.cells |= 1u << i;
291      if (proc.cells == 0)
292        proc.cells = ((1u << CRS_CL_COUNT)
293                       | (1u << CRS_CL_ROW)
294                       | (1u << CRS_CL_COLUMN)
295                       | (1u << CRS_CL_TOTAL));
296    }
297  proc.cells &= ((1u << CRS_CL_count) - 1);
298  proc.cells &= ~((1u << CRS_CL_NONE) | (1u << CRS_CL_ALL));
299  proc.n_cells = 0;
300  for (i = 0; i < CRS_CL_count; i++)
301    if (proc.cells & (1u << i))
302      proc.a_cells[proc.n_cells++] = i;
303
304  /* STATISTICS. */
305  if (cmd.a_statistics[CRS_ST_ALL])
306    proc.statistics = UINT_MAX;
307  else if (cmd.sbc_statistics)
308    {
309      int i;
310
311      proc.statistics = 0;
312      for (i = 0; i < CRS_ST_count; i++)
313	if (cmd.a_statistics[i])
314	  proc.statistics |= 1u << i;
315      if (proc.statistics == 0)
316        proc.statistics |= 1u << CRS_ST_CHISQ;
317    }
318  else
319    proc.statistics = 0;
320
321  /* MISSING. */
322  proc.exclude = (cmd.miss == CRS_TABLE ? MV_ANY
323                   : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
324                   : MV_NEVER);
325  if (proc.mode == GENERAL && proc.exclude == MV_NEVER)
326    {
327      msg (SE, _("Missing mode %s not allowed in general mode.  "
328		 "Assuming %s."), "REPORT", "MISSING=TABLE");
329      proc.exclude = MV_ANY;
330    }
331
332  /* PIVOT. */
333  proc.pivot = cmd.pivot == CRS_PIVOT;
334
335  input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
336                                           NULL, NULL);
337  grouper = casegrouper_create_splits (input, dataset_dict (ds));
338  while (casegrouper_get_next_group (grouper, &group))
339    {
340      struct ccase *c;
341
342      /* Output SPLIT FILE variables. */
343      c = casereader_peek (group, 0);
344      if (c != NULL)
345        {
346          output_split_file_values (ds, c);
347          case_unref (c);
348        }
349
350      /* Initialize hash tables. */
351      for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++)
352        hmap_init (&xt->data);
353
354      /* Tabulate. */
355      for (; (c = casereader_read (group)) != NULL; case_unref (c))
356        for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++)
357          {
358            double weight = dict_get_case_weight (dataset_dict (ds), c,
359                                                  &proc.bad_warn);
360            if (cmd.roundwhat == CRS_CASE)
361              {
362                weight = round_weight (&proc, weight);
363                if (weight == 0.)
364                  continue;
365              }
366            if (should_tabulate_case (xt, c, proc.exclude))
367              {
368                if (proc.mode == GENERAL)
369                  tabulate_general_case (xt, c, weight);
370                else
371                  tabulate_integer_case (xt, c, weight);
372              }
373            else
374              xt->missing += weight;
375          }
376      casereader_destroy (group);
377
378      /* Output. */
379      postcalc (&proc);
380    }
381  ok = casegrouper_destroy (grouper);
382  ok = proc_commit (ds) && ok;
383
384  result = ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
385
386exit:
387  free (proc.variables);
388  HMAP_FOR_EACH_SAFE (range, next_range, struct var_range, hmap_node,
389                      &proc.var_ranges)
390    {
391      hmap_delete (&proc.var_ranges, &range->hmap_node);
392      free (range);
393    }
394  for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++)
395    {
396      free (xt->vars);
397      free (xt->const_vars);
398      free (xt->const_indexes);
399    }
400  free (proc.pivots);
401
402  return result;
403}
404
405/* Parses the TABLES subcommand. */
406static int
407crs_custom_tables (struct lexer *lexer, struct dataset *ds,
408                   struct cmd_crosstabs *cmd UNUSED, void *proc_)
409{
410  struct crosstabs_proc *proc = proc_;
411  struct const_var_set *var_set;
412  int n_by;
413  const struct variable ***by = NULL;
414  int *by_iter;
415  size_t *by_nvar = NULL;
416  size_t nx = 1;
417  bool ok = false;
418  int i;
419
420  /* Ensure that this is a TABLES subcommand. */
421  if (!lex_match_id (lexer, "TABLES")
422      && (lex_token (lexer) != T_ID ||
423	  dict_lookup_var (dataset_dict (ds), lex_tokcstr (lexer)) == NULL)
424      && lex_token (lexer) != T_ALL)
425    return 2;
426  lex_match (lexer, T_EQUALS);
427
428  if (proc->variables != NULL)
429    var_set = const_var_set_create_from_array (proc->variables,
430                                               proc->n_variables);
431  else
432    var_set = const_var_set_create_from_dict (dataset_dict (ds));
433  assert (var_set != NULL);
434
435  for (n_by = 0; ;)
436    {
437      by = xnrealloc (by, n_by + 1, sizeof *by);
438      by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
439      if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
440                                     PV_NO_DUPLICATE | PV_NO_SCRATCH))
441	goto done;
442      if (xalloc_oversized (nx, by_nvar[n_by]))
443        {
444          msg (SE, _("Too many cross-tabulation variables or dimensions."));
445          goto done;
446        }
447      nx *= by_nvar[n_by];
448      n_by++;
449
450      if (!lex_match (lexer, T_BY))
451	{
452	  if (n_by < 2)
453	    goto done;
454	  else
455	    break;
456	}
457    }
458
459  by_iter = xcalloc (n_by, sizeof *by_iter);
460  proc->pivots = xnrealloc (proc->pivots,
461                            proc->n_pivots + nx, sizeof *proc->pivots);
462  for (i = 0; i < nx; i++)
463    {
464      struct crosstabulation *xt = &proc->pivots[proc->n_pivots++];
465      int j;
466
467      xt->proc = proc;
468      xt->weight_format = proc->weight_format;
469      xt->missing = 0.;
470      xt->n_vars = n_by;
471      xt->vars = xcalloc (n_by, sizeof *xt->vars);
472      xt->n_consts = 0;
473      xt->const_vars = NULL;
474      xt->const_indexes = NULL;
475
476      for (j = 0; j < n_by; j++)
477        xt->vars[j].var = by[j][by_iter[j]];
478
479      for (j = n_by - 1; j >= 0; j--)
480        {
481          if (++by_iter[j] < by_nvar[j])
482            break;
483          by_iter[j] = 0;
484        }
485    }
486  free (by_iter);
487  ok = true;
488
489done:
490  /* All return paths lead here. */
491  for (i = 0; i < n_by; i++)
492    free (by[i]);
493  free (by);
494  free (by_nvar);
495
496  const_var_set_destroy (var_set);
497
498  return ok;
499}
500
501/* Parses the VARIABLES subcommand. */
502static int
503crs_custom_variables (struct lexer *lexer, struct dataset *ds,
504                      struct cmd_crosstabs *cmd UNUSED, void *proc_)
505{
506  struct crosstabs_proc *proc = proc_;
507  if (proc->n_pivots)
508    {
509      msg (SE, _("%s must be specified before %s."), "VARIABLES", "TABLES");
510      return 0;
511    }
512
513  lex_match (lexer, T_EQUALS);
514
515  for (;;)
516    {
517      size_t orig_nv = proc->n_variables;
518      size_t i;
519
520      long min, max;
521
522      if (!parse_variables_const (lexer, dataset_dict (ds),
523                                  &proc->variables, &proc->n_variables,
524                                  (PV_APPEND | PV_NUMERIC
525                                   | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
526	return 0;
527
528      if (!lex_force_match (lexer, T_LPAREN))
529	  goto lossage;
530
531      if (!lex_force_int (lexer))
532	goto lossage;
533      min = lex_integer (lexer);
534      lex_get (lexer);
535
536      lex_match (lexer, T_COMMA);
537
538      if (!lex_force_int (lexer))
539	goto lossage;
540      max = lex_integer (lexer);
541      if (max < min)
542	{
543	  msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
544	       max, min);
545	  goto lossage;
546	}
547      lex_get (lexer);
548
549      if (!lex_force_match (lexer, T_RPAREN))
550        goto lossage;
551
552      for (i = orig_nv; i < proc->n_variables; i++)
553        {
554          const struct variable *var = proc->variables[i];
555          struct var_range *vr = xmalloc (sizeof *vr);
556
557          vr->var = var;
558          vr->min = min;
559	  vr->max = max;
560	  vr->count = max - min + 1;
561          hmap_insert (&proc->var_ranges, &vr->hmap_node,
562                       hash_pointer (var, 0));
563	}
564
565      if (lex_token (lexer) == T_SLASH)
566	break;
567    }
568
569  return 1;
570
571 lossage:
572  free (proc->variables);
573  proc->variables = NULL;
574  proc->n_variables = 0;
575  return 0;
576}
577
578/* Data file processing. */
579
580const struct var_range *
581get_var_range (const struct crosstabs_proc *proc, const struct variable *var)
582{
583  if (!hmap_is_empty (&proc->var_ranges))
584    {
585      const struct var_range *range;
586
587      HMAP_FOR_EACH_IN_BUCKET (range, struct var_range, hmap_node,
588                               hash_pointer (var, 0), &proc->var_ranges)
589        if (range->var == var)
590          return range;
591    }
592
593  return NULL;
594}
595
596static bool
597should_tabulate_case (const struct crosstabulation *xt, const struct ccase *c,
598                      enum mv_class exclude)
599{
600  int j;
601  for (j = 0; j < xt->n_vars; j++)
602    {
603      const struct variable *var = xt->vars[j].var;
604      const struct var_range *range = get_var_range (xt->proc, var);
605
606      if (var_is_value_missing (var, case_data (c, var), exclude))
607        return false;
608
609      if (range != NULL)
610        {
611          double num = case_num (c, var);
612          if (num < range->min || num >= range->max + 1.)
613            return false;
614        }
615    }
616  return true;
617}
618
619static void
620tabulate_integer_case (struct crosstabulation *xt, const struct ccase *c,
621                       double weight)
622{
623  struct freq *te;
624  size_t hash;
625  int j;
626
627  hash = 0;
628  for (j = 0; j < xt->n_vars; j++)
629    {
630      /* Throw away fractional parts of values. */
631      hash = hash_int (case_num (c, xt->vars[j].var), hash);
632    }
633
634  HMAP_FOR_EACH_WITH_HASH (te, struct freq, node, hash, &xt->data)
635    {
636      for (j = 0; j < xt->n_vars; j++)
637        if ((int) case_num (c, xt->vars[j].var) != (int) te->values[j].f)
638          goto no_match;
639
640      /* Found an existing entry. */
641      te->count += weight;
642      return;
643
644    no_match: ;
645    }
646
647  /* No existing entry.  Create a new one. */
648  te = xmalloc (table_entry_size (xt->n_vars));
649  te->count = weight;
650  for (j = 0; j < xt->n_vars; j++)
651    te->values[j].f = (int) case_num (c, xt->vars[j].var);
652  hmap_insert (&xt->data, &te->node, hash);
653}
654
655static void
656tabulate_general_case (struct crosstabulation *xt, const struct ccase *c,
657                       double weight)
658{
659  struct freq *te;
660  size_t hash;
661  int j;
662
663  hash = 0;
664  for (j = 0; j < xt->n_vars; j++)
665    {
666      const struct variable *var = xt->vars[j].var;
667      hash = value_hash (case_data (c, var), var_get_width (var), hash);
668    }
669
670  HMAP_FOR_EACH_WITH_HASH (te, struct freq, node, hash, &xt->data)
671    {
672      for (j = 0; j < xt->n_vars; j++)
673        {
674          const struct variable *var = xt->vars[j].var;
675          if (!value_equal (case_data (c, var), &te->values[j],
676                            var_get_width (var)))
677            goto no_match;
678        }
679
680      /* Found an existing entry. */
681      te->count += weight;
682      return;
683
684    no_match: ;
685    }
686
687  /* No existing entry.  Create a new one. */
688  te = xmalloc (table_entry_size (xt->n_vars));
689  te->count = weight;
690  for (j = 0; j < xt->n_vars; j++)
691    {
692      const struct variable *var = xt->vars[j].var;
693      value_clone (&te->values[j], case_data (c, var), var_get_width (var));
694    }
695  hmap_insert (&xt->data, &te->node, hash);
696}
697
698/* Post-data reading calculations. */
699
700static int compare_table_entry_vars_3way (const struct freq *a,
701                                          const struct freq *b,
702                                          const struct crosstabulation *xt,
703                                          int idx0, int idx1);
704static int compare_table_entry_3way (const void *ap_, const void *bp_,
705                                     const void *xt_);
706static int compare_table_entry_3way_inv (const void *ap_, const void *bp_,
707                                     const void *xt_);
708
709static void enum_var_values (const struct crosstabulation *, int var_idx,
710                             bool descending);
711static void free_var_values (const struct crosstabulation *, int var_idx);
712static void output_crosstabulation (struct crosstabs_proc *,
713                                struct crosstabulation *);
714static void make_crosstabulation_subset (struct crosstabulation *xt,
715                                     size_t row0, size_t row1,
716                                     struct crosstabulation *subset);
717static void make_summary_table (struct crosstabs_proc *);
718static bool find_crosstab (struct crosstabulation *, size_t *row0p,
719                           size_t *row1p);
720
721static void
722postcalc (struct crosstabs_proc *proc)
723{
724
725  /* Round hash table entries, if requested
726
727     If this causes any of the cell counts to fall to zero, delete those
728     cells. */
729  if (proc->round_cells)
730    for (struct crosstabulation *xt = proc->pivots;
731         xt < &proc->pivots[proc->n_pivots]; xt++)
732      {
733        struct freq *e, *next;
734        HMAP_FOR_EACH_SAFE (e, next, struct freq, node, &xt->data)
735          {
736            e->count = round_weight (proc, e->count);
737            if (e->count == 0.0)
738              {
739                hmap_delete (&xt->data, &e->node);
740                free (e);
741              }
742          }
743      }
744
745  /* Convert hash tables into sorted arrays of entries. */
746  for (struct crosstabulation *xt = proc->pivots;
747       xt < &proc->pivots[proc->n_pivots]; xt++)
748    {
749      struct freq *e;
750
751      xt->n_entries = hmap_count (&xt->data);
752      xt->entries = xnmalloc (xt->n_entries, sizeof *xt->entries);
753      size_t i = 0;
754      HMAP_FOR_EACH (e, struct freq, node, &xt->data)
755        xt->entries[i++] = e;
756      hmap_destroy (&xt->data);
757
758      sort (xt->entries, xt->n_entries, sizeof *xt->entries,
759            proc->descending ? compare_table_entry_3way_inv : compare_table_entry_3way,
760	    xt);
761
762    }
763
764  make_summary_table (proc);
765
766  /* Output each pivot table. */
767  for (struct crosstabulation *xt = proc->pivots;
768       xt < &proc->pivots[proc->n_pivots]; xt++)
769    {
770      if (proc->pivot || xt->n_vars == 2)
771        output_crosstabulation (proc, xt);
772      else
773        {
774          size_t row0 = 0, row1 = 0;
775          while (find_crosstab (xt, &row0, &row1))
776            {
777              struct crosstabulation subset;
778              make_crosstabulation_subset (xt, row0, row1, &subset);
779              output_crosstabulation (proc, &subset);
780              free (subset.const_indexes);
781            }
782        }
783      if (proc->barchart)
784        {
785          int n_vars = (xt->n_vars > 2 ? 2 : xt->n_vars);
786          const struct variable **vars = xcalloc (n_vars, sizeof *vars);
787          for (size_t i = 0; i < n_vars; i++)
788            vars[i] = xt->vars[i].var;
789          chart_item_submit (barchart_create (vars, n_vars, _("Count"),
790                                              false,
791                                              xt->entries, xt->n_entries));
792          free (vars);
793        }
794    }
795
796  /* Free output and prepare for next split file. */
797  for (struct crosstabulation *xt = proc->pivots;
798       xt < &proc->pivots[proc->n_pivots]; xt++)
799    {
800      xt->missing = 0.0;
801
802      /* Free the members that were allocated in this function(and the values
803         owned by the entries.
804
805         The other pointer members are either both allocated and destroyed at a
806         lower level (in output_crosstabulation), or both allocated and
807         destroyed at a higher level (in crs_custom_tables and free_proc,
808         respectively). */
809      for (size_t i = 0; i < xt->n_vars; i++)
810        {
811          int width = var_get_width (xt->vars[i].var);
812          if (value_needs_init (width))
813            {
814              size_t j;
815
816              for (j = 0; j < xt->n_entries; j++)
817                value_destroy (&xt->entries[j]->values[i], width);
818            }
819        }
820
821      for (size_t i = 0; i < xt->n_entries; i++)
822        free (xt->entries[i]);
823      free (xt->entries);
824    }
825}
826
827static void
828make_crosstabulation_subset (struct crosstabulation *xt, size_t row0,
829                             size_t row1, struct crosstabulation *subset)
830{
831  *subset = *xt;
832  if (xt->n_vars > 2)
833    {
834      assert (xt->n_consts == 0);
835      subset->n_vars = 2;
836      subset->vars = xt->vars;
837
838      subset->n_consts = xt->n_vars - 2;
839      subset->const_vars = xt->vars + 2;
840      subset->const_indexes = xcalloc (subset->n_consts,
841                                       sizeof *subset->const_indexes);
842      for (size_t i = 0; i < subset->n_consts; i++)
843        {
844          const union value *value = &xt->entries[row0]->values[2 + i];
845
846          for (size_t j = 0; j < xt->vars[2 + i].n_values; j++)
847            if (value_equal (&xt->vars[2 + i].values[j], value,
848                             var_get_width (xt->vars[2 + i].var)))
849              {
850                subset->const_indexes[i] = j;
851                goto found;
852              }
853          NOT_REACHED ();
854        found: ;
855        }
856    }
857  subset->entries = &xt->entries[row0];
858  subset->n_entries = row1 - row0;
859}
860
861static int
862compare_table_entry_var_3way (const struct freq *a,
863                              const struct freq *b,
864                              const struct crosstabulation *xt,
865                              int idx)
866{
867  return value_compare_3way (&a->values[idx], &b->values[idx],
868                             var_get_width (xt->vars[idx].var));
869}
870
871static int
872compare_table_entry_vars_3way (const struct freq *a,
873                               const struct freq *b,
874                               const struct crosstabulation *xt,
875                               int idx0, int idx1)
876{
877  int i;
878
879  for (i = idx1 - 1; i >= idx0; i--)
880    {
881      int cmp = compare_table_entry_var_3way (a, b, xt, i);
882      if (cmp != 0)
883        return cmp;
884    }
885  return 0;
886}
887
888/* Compare the struct freq at *AP to the one at *BP and
889   return a strcmp()-type result. */
890static int
891compare_table_entry_3way (const void *ap_, const void *bp_, const void *xt_)
892{
893  const struct freq *const *ap = ap_;
894  const struct freq *const *bp = bp_;
895  const struct freq *a = *ap;
896  const struct freq *b = *bp;
897  const struct crosstabulation *xt = xt_;
898  int cmp;
899
900  cmp = compare_table_entry_vars_3way (a, b, xt, 2, xt->n_vars);
901  if (cmp != 0)
902    return cmp;
903
904  cmp = compare_table_entry_var_3way (a, b, xt, ROW_VAR);
905  if (cmp != 0)
906    return cmp;
907
908  return compare_table_entry_var_3way (a, b, xt, COL_VAR);
909}
910
911/* Inverted version of compare_table_entry_3way */
912static int
913compare_table_entry_3way_inv (const void *ap_, const void *bp_, const void *xt_)
914{
915  return -compare_table_entry_3way (ap_, bp_, xt_);
916}
917
918/* Output a table summarizing the cases processed. */
919static void
920make_summary_table (struct crosstabs_proc *proc)
921{
922  struct pivot_table *table = pivot_table_create (N_("Summary"));
923  pivot_table_set_weight_var (table, dict_get_weight (proc->dict));
924
925  pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
926                          N_("N"), PIVOT_RC_COUNT,
927                          N_("Percent"), PIVOT_RC_PERCENT);
928
929  struct pivot_dimension *cases = pivot_dimension_create (
930    table, PIVOT_AXIS_COLUMN, N_("Cases"),
931    N_("Valid"), N_("Missing"), N_("Total"));
932  cases->root->show_label = true;
933
934  struct pivot_dimension *tables = pivot_dimension_create (
935    table, PIVOT_AXIS_ROW, N_("Crosstabulation"));
936  for (struct crosstabulation *xt = &proc->pivots[0];
937       xt < &proc->pivots[proc->n_pivots]; xt++)
938    {
939      struct string name = DS_EMPTY_INITIALIZER;
940      for (size_t i = 0; i < xt->n_vars; i++)
941        {
942          if (i > 0)
943            ds_put_cstr (&name, " × ");
944          ds_put_cstr (&name, var_to_string (xt->vars[i].var));
945        }
946
947      int row = pivot_category_create_leaf (
948        tables->root,
949        pivot_value_new_user_text_nocopy (ds_steal_cstr (&name)));
950
951      double valid = 0.;
952      for (size_t i = 0; i < xt->n_entries; i++)
953        valid += xt->entries[i]->count;
954
955      double n[3];
956      n[0] = valid;
957      n[1] = xt->missing;
958      n[2] = n[0] + n[1];
959      for (int i = 0; i < 3; i++)
960        {
961          pivot_table_put3 (table, 0, i, row, pivot_value_new_number (n[i]));
962          pivot_table_put3 (table, 1, i, row,
963                            pivot_value_new_number (n[i] / n[2] * 100.0));
964        }
965    }
966
967  pivot_table_submit (table);
968}
969
970/* Output. */
971
972static struct pivot_table *create_crosstab_table (
973  struct crosstabs_proc *, struct crosstabulation *,
974  size_t crs_leaves[CRS_CL_count]);
975static struct pivot_table *create_chisq_table (struct crosstabulation *);
976static struct pivot_table *create_sym_table (struct crosstabulation *);
977static struct pivot_table *create_risk_table (
978  struct crosstabulation *, struct pivot_dimension **risk_statistics);
979static struct pivot_table *create_direct_table (struct crosstabulation *);
980static void display_crosstabulation (struct crosstabs_proc *,
981                                     struct crosstabulation *,
982                                     struct pivot_table *,
983                                     size_t crs_leaves[CRS_CL_count]);
984static void display_chisq (struct crosstabulation *, struct pivot_table *);
985static void display_symmetric (struct crosstabs_proc *,
986                               struct crosstabulation *, struct pivot_table *);
987static void display_risk (struct crosstabulation *, struct pivot_table *,
988                          struct pivot_dimension *risk_statistics);
989static void display_directional (struct crosstabs_proc *,
990                                 struct crosstabulation *,
991                                 struct pivot_table *);
992static void delete_missing (struct crosstabulation *);
993static void build_matrix (struct crosstabulation *);
994
995/* Output pivot table XT in the context of PROC. */
996static void
997output_crosstabulation (struct crosstabs_proc *proc, struct crosstabulation *xt)
998{
999  for (size_t i = 0; i < xt->n_vars; i++)
1000    enum_var_values (xt, i, proc->descending);
1001
1002  if (xt->vars[COL_VAR].n_values == 0)
1003    {
1004      struct string vars;
1005      int i;
1006
1007      ds_init_cstr (&vars, var_to_string (xt->vars[0].var));
1008      for (i = 1; i < xt->n_vars; i++)
1009        ds_put_format (&vars, " × %s", var_to_string (xt->vars[i].var));
1010
1011      /* TRANSLATORS: The %s here describes a crosstabulation.  It takes the
1012         form "var1 * var2 * var3 * ...".  */
1013      msg (SW, _("Crosstabulation %s contained no non-missing cases."),
1014           ds_cstr (&vars));
1015
1016      ds_destroy (&vars);
1017      for (size_t i = 0; i < xt->n_vars; i++)
1018        free_var_values (xt, i);
1019      return;
1020    }
1021
1022  size_t crs_leaves[CRS_CL_count];
1023  struct pivot_table *table = (proc->cells
1024                               ? create_crosstab_table (proc, xt, crs_leaves)
1025                               : NULL);
1026  struct pivot_table *chisq = (proc->statistics & (1u << CRS_ST_CHISQ)
1027                               ? create_chisq_table (xt)
1028                               : NULL);
1029  struct pivot_table *sym
1030    = (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC)
1031                           | (1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU)
1032                           | (1u << CRS_ST_GAMMA) | (1u << CRS_ST_CORR)
1033                           | (1u << CRS_ST_KAPPA))
1034       ? create_sym_table (xt)
1035       : NULL);
1036  struct pivot_dimension *risk_statistics = NULL;
1037  struct pivot_table *risk = (proc->statistics & (1u << CRS_ST_RISK)
1038                              ? create_risk_table (xt, &risk_statistics)
1039                              : NULL);
1040  struct pivot_table *direct
1041    = (proc->statistics & ((1u << CRS_ST_LAMBDA) | (1u << CRS_ST_UC)
1042                           | (1u << CRS_ST_D) | (1u << CRS_ST_ETA))
1043       ? create_direct_table (xt)
1044       : NULL);
1045
1046  size_t row0 = 0;
1047  size_t row1 = 0;
1048  while (find_crosstab (xt, &row0, &row1))
1049    {
1050      struct crosstabulation x;
1051
1052      make_crosstabulation_subset (xt, row0, row1, &x);
1053
1054      size_t n_rows = x.vars[ROW_VAR].n_values;
1055      size_t n_cols = x.vars[COL_VAR].n_values;
1056      if (size_overflow_p (xtimes (xtimes (n_rows, n_cols), sizeof (double))))
1057        xalloc_die ();
1058      x.row_tot = xmalloc (n_rows * sizeof *x.row_tot);
1059      x.col_tot = xmalloc (n_cols * sizeof *x.col_tot);
1060      x.mat = xmalloc (n_rows * n_cols * sizeof *x.mat);
1061
1062      build_matrix (&x);
1063
1064      /* Find the first variable that differs from the last subtable. */
1065      if (table)
1066        display_crosstabulation (proc, &x, table, crs_leaves);
1067
1068      if (proc->exclude == MV_NEVER)
1069	delete_missing (&x);
1070
1071      if (chisq)
1072        display_chisq (&x, chisq);
1073
1074      if (sym)
1075        display_symmetric (proc, &x, sym);
1076      if (risk)
1077        display_risk (&x, risk, risk_statistics);
1078      if (direct)
1079        display_directional (proc, &x, direct);
1080
1081      free (x.mat);
1082      free (x.row_tot);
1083      free (x.col_tot);
1084      free (x.const_indexes);
1085    }
1086
1087  if (table)
1088    pivot_table_submit (table);
1089
1090  if (chisq)
1091    pivot_table_submit (chisq);
1092
1093  if (sym)
1094    pivot_table_submit (sym);
1095
1096  if (risk)
1097    {
1098      if (!pivot_table_is_empty (risk))
1099        pivot_table_submit (risk);
1100      else
1101        pivot_table_unref (risk);
1102    }
1103
1104  if (direct)
1105    pivot_table_submit (direct);
1106
1107  for (size_t i = 0; i < xt->n_vars; i++)
1108    free_var_values (xt, i);
1109}
1110
1111static void
1112build_matrix (struct crosstabulation *x)
1113{
1114  const int col_var_width = var_get_width (x->vars[COL_VAR].var);
1115  const int row_var_width = var_get_width (x->vars[ROW_VAR].var);
1116  size_t n_rows = x->vars[ROW_VAR].n_values;
1117  size_t n_cols = x->vars[COL_VAR].n_values;
1118  int col, row;
1119  double *mp;
1120  struct freq **p;
1121
1122  mp = x->mat;
1123  col = row = 0;
1124  for (p = x->entries; p < &x->entries[x->n_entries]; p++)
1125    {
1126      const struct freq *te = *p;
1127
1128      while (!value_equal (&x->vars[ROW_VAR].values[row],
1129                           &te->values[ROW_VAR], row_var_width))
1130        {
1131          for (; col < n_cols; col++)
1132            *mp++ = 0.0;
1133          col = 0;
1134          row++;
1135        }
1136
1137      while (!value_equal (&x->vars[COL_VAR].values[col],
1138                           &te->values[COL_VAR], col_var_width))
1139        {
1140          *mp++ = 0.0;
1141          col++;
1142        }
1143
1144      *mp++ = te->count;
1145      if (++col >= n_cols)
1146        {
1147          col = 0;
1148          row++;
1149        }
1150    }
1151  while (mp < &x->mat[n_cols * n_rows])
1152    *mp++ = 0.0;
1153  assert (mp == &x->mat[n_cols * n_rows]);
1154
1155  /* Column totals, row totals, ns_rows. */
1156  mp = x->mat;
1157  for (col = 0; col < n_cols; col++)
1158    x->col_tot[col] = 0.0;
1159  for (row = 0; row < n_rows; row++)
1160    x->row_tot[row] = 0.0;
1161  x->ns_rows = 0;
1162  for (row = 0; row < n_rows; row++)
1163    {
1164      bool row_is_empty = true;
1165      for (col = 0; col < n_cols; col++)
1166        {
1167          if (*mp != 0.0)
1168            {
1169              row_is_empty = false;
1170              x->col_tot[col] += *mp;
1171              x->row_tot[row] += *mp;
1172            }
1173          mp++;
1174        }
1175      if (!row_is_empty)
1176        x->ns_rows++;
1177    }
1178  assert (mp == &x->mat[n_cols * n_rows]);
1179
1180  /* ns_cols. */
1181  x->ns_cols = 0;
1182  for (col = 0; col < n_cols; col++)
1183    for (row = 0; row < n_rows; row++)
1184      if (x->mat[col + row * n_cols] != 0.0)
1185        {
1186          x->ns_cols++;
1187          break;
1188        }
1189
1190  /* Grand total. */
1191  x->total = 0.0;
1192  for (col = 0; col < n_cols; col++)
1193    x->total += x->col_tot[col];
1194}
1195
1196static void
1197add_var_dimension (struct pivot_table *table, const struct xtab_var *var,
1198                   enum pivot_axis_type axis_type, bool total)
1199{
1200  struct pivot_dimension *d = pivot_dimension_create__ (
1201    table, axis_type, pivot_value_new_variable (var->var));
1202
1203  struct pivot_footnote *missing_footnote = pivot_table_create_footnote (
1204    table, pivot_value_new_text (N_("Missing value")));
1205
1206  struct pivot_category *group = pivot_category_create_group__ (
1207    d->root, pivot_value_new_variable (var->var));
1208  for (size_t j = 0; j < var->n_values; j++)
1209    {
1210      struct pivot_value *value = pivot_value_new_var_value (
1211        var->var, &var->values[j]);
1212      if (var_is_value_missing (var->var, &var->values[j], MV_ANY))
1213        pivot_value_add_footnote (value, missing_footnote);
1214      pivot_category_create_leaf (group, value);
1215    }
1216
1217  if (total)
1218    pivot_category_create_leaf (d->root, pivot_value_new_text (N_("Total")));
1219}
1220
1221static struct pivot_table *
1222create_crosstab_table (struct crosstabs_proc *proc, struct crosstabulation *xt,
1223                       size_t crs_leaves[CRS_CL_count])
1224{
1225  /* Title. */
1226  struct string title = DS_EMPTY_INITIALIZER;
1227  for (size_t i = 0; i < xt->n_vars; i++)
1228    {
1229      if (i)
1230        ds_put_cstr (&title, " × ");
1231      ds_put_cstr (&title, var_to_string (xt->vars[i].var));
1232    }
1233  for (size_t i = 0; i < xt->n_consts; i++)
1234    {
1235      const struct variable *var = xt->const_vars[i].var;
1236      const union value *value = &xt->entries[0]->values[2 + i];
1237      char *s;
1238
1239      ds_put_format (&title, ", %s=", var_to_string (var));
1240
1241      /* Insert the formatted value of VAR without any leading spaces. */
1242      s = data_out (value, var_get_encoding (var), var_get_print_format (var));
1243      ds_put_cstr (&title, s + strspn (s, " "));
1244      free (s);
1245    }
1246  struct pivot_table *table = pivot_table_create__ (
1247    pivot_value_new_user_text_nocopy (ds_steal_cstr (&title)),
1248    "Crosstabulation");
1249  pivot_table_set_weight_format (table, &proc->weight_format);
1250  table->omit_empty = true;
1251
1252  struct pivot_dimension *statistics = pivot_dimension_create (
1253    table, PIVOT_AXIS_ROW, N_("Statistics"));
1254
1255  struct statistic
1256    {
1257      const char *label;
1258      const char *rc;
1259    };
1260  static const struct statistic stats[CRS_CL_count] =
1261    {
1262      [CRS_CL_COUNT] = { N_("Count"), PIVOT_RC_COUNT },
1263      [CRS_CL_ROW] = { N_("Row %"), PIVOT_RC_PERCENT },
1264      [CRS_CL_COLUMN] = { N_("Column %"), PIVOT_RC_PERCENT },
1265      [CRS_CL_TOTAL] = { N_("Total %"), PIVOT_RC_PERCENT },
1266      [CRS_CL_EXPECTED] = { N_("Expected"), PIVOT_RC_OTHER },
1267      [CRS_CL_RESIDUAL] = { N_("Residual"), PIVOT_RC_RESIDUAL },
1268      [CRS_CL_SRESIDUAL] = { N_("Std. Residual"), PIVOT_RC_RESIDUAL },
1269      [CRS_CL_ASRESIDUAL] = { N_("Adjusted Residual"), PIVOT_RC_RESIDUAL },
1270    };
1271  for (size_t i = 0; i < CRS_CL_count; i++)
1272    if (proc->cells & (1u << i) && stats[i].label)
1273        crs_leaves[i] = pivot_category_create_leaf_rc (
1274          statistics->root, pivot_value_new_text (stats[i].label),
1275          stats[i].rc);
1276
1277  for (size_t i = 0; i < xt->n_vars; i++)
1278    add_var_dimension (table, &xt->vars[i],
1279                       i == COL_VAR ? PIVOT_AXIS_COLUMN : PIVOT_AXIS_ROW,
1280                       true);
1281
1282  return table;
1283}
1284
1285static struct pivot_table *
1286create_chisq_table (struct crosstabulation *xt)
1287{
1288  struct pivot_table *chisq = pivot_table_create (N_("Chi-Square Tests"));
1289  pivot_table_set_weight_format (chisq, &xt->weight_format);
1290  chisq->omit_empty = true;
1291
1292  pivot_dimension_create (
1293    chisq, PIVOT_AXIS_ROW, N_("Statistics"),
1294    N_("Pearson Chi-Square"),
1295    N_("Likelihood Ratio"),
1296    N_("Fisher's Exact Test"),
1297    N_("Continuity Correction"),
1298    N_("Linear-by-Linear Association"),
1299    N_("N of Valid Cases"), PIVOT_RC_COUNT);
1300
1301  pivot_dimension_create (
1302    chisq, PIVOT_AXIS_COLUMN, N_("Statistics"),
1303    N_("Value"), PIVOT_RC_OTHER,
1304    N_("df"), PIVOT_RC_COUNT,
1305    N_("Asymptotic Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE,
1306    N_("Exact Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE,
1307    N_("Exact Sig. (1-tailed)"), PIVOT_RC_SIGNIFICANCE);
1308
1309  for (size_t i = 2; i < xt->n_vars; i++)
1310    add_var_dimension (chisq, &xt->vars[i], PIVOT_AXIS_ROW, false);
1311
1312  return chisq;
1313}
1314
1315/* Symmetric measures. */
1316static struct pivot_table *
1317create_sym_table (struct crosstabulation *xt)
1318{
1319  struct pivot_table *sym = pivot_table_create (N_("Symmetric Measures"));
1320  pivot_table_set_weight_format (sym, &xt->weight_format);
1321  sym->omit_empty = true;
1322
1323  pivot_dimension_create (
1324    sym, PIVOT_AXIS_COLUMN, N_("Values"),
1325    N_("Value"), PIVOT_RC_OTHER,
1326    N_("Asymp. Std. Error"), PIVOT_RC_OTHER,
1327    N_("Approx. T"), PIVOT_RC_OTHER,
1328    N_("Approx. Sig."), PIVOT_RC_SIGNIFICANCE);
1329
1330  struct pivot_dimension *statistics = pivot_dimension_create (
1331    sym, PIVOT_AXIS_ROW, N_("Statistics"));
1332  pivot_category_create_group (
1333    statistics->root, N_("Nominal by Nominal"),
1334    N_("Phi"), N_("Cramer's V"), N_("Contingency Coefficient"));
1335  pivot_category_create_group (
1336    statistics->root, N_("Ordinal by Ordinal"),
1337    N_("Kendall's tau-b"), N_("Kendall's tau-c"),
1338    N_("Gamma"), N_("Spearman Correlation"));
1339  pivot_category_create_group (
1340    statistics->root, N_("Interval by Interval"),
1341    N_("Pearson's R"));
1342  pivot_category_create_group (
1343    statistics->root, N_("Measure of Agreement"),
1344    N_("Kappa"));
1345  pivot_category_create_leaves (statistics->root, N_("N of Valid Cases"),
1346                                PIVOT_RC_COUNT);
1347
1348  for (size_t i = 2; i < xt->n_vars; i++)
1349    add_var_dimension (sym, &xt->vars[i], PIVOT_AXIS_ROW, false);
1350
1351  return sym;
1352}
1353
1354/* Risk estimate. */
1355static struct pivot_table *
1356create_risk_table (struct crosstabulation *xt,
1357                   struct pivot_dimension **risk_statistics)
1358{
1359  struct pivot_table *risk = pivot_table_create (N_("Risk Estimate"));
1360  pivot_table_set_weight_format (risk, &xt->weight_format);
1361  risk->omit_empty = true;
1362
1363  struct pivot_dimension *values = pivot_dimension_create (
1364    risk, PIVOT_AXIS_COLUMN, N_("Values"),
1365    N_("Value"), PIVOT_RC_OTHER);
1366  pivot_category_create_group (
1367  /* xgettext:no-c-format */
1368    values->root, N_("95% Confidence Interval"),
1369    N_("Lower"), PIVOT_RC_OTHER,
1370    N_("Upper"), PIVOT_RC_OTHER);
1371
1372  *risk_statistics = pivot_dimension_create (
1373    risk, PIVOT_AXIS_ROW, N_("Statistics"));
1374
1375  for (size_t i = 2; i < xt->n_vars; i++)
1376    add_var_dimension (risk, &xt->vars[i], PIVOT_AXIS_ROW, false);
1377
1378  return risk;
1379}
1380
1381static void
1382create_direct_stat (struct pivot_category *parent,
1383                    const struct crosstabulation *xt,
1384                    const char *name, bool symmetric)
1385{
1386  struct pivot_category *group = pivot_category_create_group (
1387    parent, name);
1388  if (symmetric)
1389    pivot_category_create_leaf (group, pivot_value_new_text (N_("Symmetric")));
1390
1391  char *row_label = xasprintf (_("%s Dependent"),
1392                               var_to_string (xt->vars[ROW_VAR].var));
1393  pivot_category_create_leaf (group, pivot_value_new_user_text_nocopy (
1394                                row_label));
1395
1396  char *col_label = xasprintf (_("%s Dependent"),
1397                               var_to_string (xt->vars[COL_VAR].var));
1398  pivot_category_create_leaf (group, pivot_value_new_user_text_nocopy (
1399                                col_label));
1400}
1401
1402/* Directional measures. */
1403static struct pivot_table *
1404create_direct_table (struct crosstabulation *xt)
1405{
1406  struct pivot_table *direct = pivot_table_create (N_("Directional Measures"));
1407  pivot_table_set_weight_format (direct, &xt->weight_format);
1408  direct->omit_empty = true;
1409
1410  pivot_dimension_create (
1411    direct, PIVOT_AXIS_COLUMN, N_("Values"),
1412    N_("Value"), PIVOT_RC_OTHER,
1413    N_("Asymp. Std. Error"), PIVOT_RC_OTHER,
1414    N_("Approx. T"), PIVOT_RC_OTHER,
1415    N_("Approx. Sig."), PIVOT_RC_SIGNIFICANCE);
1416
1417  struct pivot_dimension *statistics = pivot_dimension_create (
1418    direct, PIVOT_AXIS_ROW, N_("Statistics"));
1419  struct pivot_category *nn = pivot_category_create_group (
1420    statistics->root, N_("Nominal by Nominal"));
1421  create_direct_stat (nn, xt, N_("Lambda"), true);
1422  create_direct_stat (nn, xt, N_("Goodman and Kruskal tau"), false);
1423  create_direct_stat (nn, xt, N_("Uncertainty Coefficient"), true);
1424  struct pivot_category *oo = pivot_category_create_group (
1425    statistics->root, N_("Ordinal by Ordinal"));
1426  create_direct_stat (oo, xt, N_("Somers' d"), true);
1427  struct pivot_category *ni = pivot_category_create_group (
1428    statistics->root, N_("Nominal by Interval"));
1429  create_direct_stat (ni, xt, N_("Eta"), false);
1430
1431  for (size_t i = 2; i < xt->n_vars; i++)
1432    add_var_dimension (direct, &xt->vars[i], PIVOT_AXIS_ROW, false);
1433
1434  return direct;
1435}
1436
1437/* Delete missing rows and columns for statistical analysis when
1438   /MISSING=REPORT. */
1439static void
1440delete_missing (struct crosstabulation *xt)
1441{
1442  size_t n_rows = xt->vars[ROW_VAR].n_values;
1443  size_t n_cols = xt->vars[COL_VAR].n_values;
1444  int r, c;
1445
1446  for (r = 0; r < n_rows; r++)
1447    if (var_is_num_missing (xt->vars[ROW_VAR].var,
1448                            xt->vars[ROW_VAR].values[r].f, MV_USER))
1449      {
1450        for (c = 0; c < n_cols; c++)
1451          xt->mat[c + r * n_cols] = 0.;
1452        xt->ns_rows--;
1453      }
1454
1455
1456  for (c = 0; c < n_cols; c++)
1457    if (var_is_num_missing (xt->vars[COL_VAR].var,
1458                            xt->vars[COL_VAR].values[c].f, MV_USER))
1459      {
1460        for (r = 0; r < n_rows; r++)
1461          xt->mat[c + r * n_cols] = 0.;
1462        xt->ns_cols--;
1463      }
1464}
1465
1466static bool
1467find_crosstab (struct crosstabulation *xt, size_t *row0p, size_t *row1p)
1468{
1469  size_t row0 = *row1p;
1470  size_t row1;
1471
1472  if (row0 >= xt->n_entries)
1473    return false;
1474
1475  for (row1 = row0 + 1; row1 < xt->n_entries; row1++)
1476    {
1477      struct freq *a = xt->entries[row0];
1478      struct freq *b = xt->entries[row1];
1479      if (compare_table_entry_vars_3way (a, b, xt, 2, xt->n_vars) != 0)
1480        break;
1481    }
1482  *row0p = row0;
1483  *row1p = row1;
1484  return true;
1485}
1486
1487/* Compares `union value's A_ and B_ and returns a strcmp()-like
1488   result.  WIDTH_ points to an int which is either 0 for a
1489   numeric value or a string width for a string value. */
1490static int
1491compare_value_3way (const void *a_, const void *b_, const void *width_)
1492{
1493  const union value *a = a_;
1494  const union value *b = b_;
1495  const int *width = width_;
1496
1497  return value_compare_3way (a, b, *width);
1498}
1499
1500/* Inverted version of the above */
1501static int
1502compare_value_3way_inv (const void *a_, const void *b_, const void *width_)
1503{
1504  return -compare_value_3way (a_, b_, width_);
1505}
1506
1507
1508/* Given an array of ENTRY_CNT table_entry structures starting at
1509   ENTRIES, creates a sorted list of the values that the variable
1510   with index VAR_IDX takes on.  Stores the array of the values in
1511   XT->values and the number of values in XT->n_values. */
1512static void
1513enum_var_values (const struct crosstabulation *xt, int var_idx,
1514                 bool descending)
1515{
1516  struct xtab_var *xv = &xt->vars[var_idx];
1517  const struct var_range *range = get_var_range (xt->proc, xv->var);
1518
1519  if (range)
1520    {
1521      xv->values = xnmalloc (range->count, sizeof *xv->values);
1522      xv->n_values = range->count;
1523      for (size_t i = 0; i < range->count; i++)
1524        xv->values[i].f = range->min + i;
1525    }
1526  else
1527    {
1528      int width = var_get_width (xv->var);
1529      struct hmapx_node *node;
1530      const union value *iter;
1531      struct hmapx set;
1532
1533      hmapx_init (&set);
1534      for (size_t i = 0; i < xt->n_entries; i++)
1535        {
1536          const struct freq *te = xt->entries[i];
1537          const union value *value = &te->values[var_idx];
1538          size_t hash = value_hash (value, width, 0);
1539
1540          HMAPX_FOR_EACH_WITH_HASH (iter, node, hash, &set)
1541            if (value_equal (iter, value, width))
1542              goto next_entry;
1543
1544          hmapx_insert (&set, (union value *) value, hash);
1545
1546        next_entry: ;
1547        }
1548
1549      xv->n_values = hmapx_count (&set);
1550      xv->values = xnmalloc (xv->n_values, sizeof *xv->values);
1551      size_t i = 0;
1552      HMAPX_FOR_EACH (iter, node, &set)
1553        xv->values[i++] = *iter;
1554      hmapx_destroy (&set);
1555
1556      sort (xv->values, xv->n_values, sizeof *xv->values,
1557	    descending ? compare_value_3way_inv : compare_value_3way,
1558	    &width);
1559    }
1560}
1561
1562static void
1563free_var_values (const struct crosstabulation *xt, int var_idx)
1564{
1565  struct xtab_var *xv = &xt->vars[var_idx];
1566  free (xv->values);
1567  xv->values = NULL;
1568  xv->n_values = 0;
1569}
1570
1571/* Displays the crosstabulation table. */
1572static void
1573display_crosstabulation (struct crosstabs_proc *proc,
1574                         struct crosstabulation *xt, struct pivot_table *table,
1575                         size_t crs_leaves[CRS_CL_count])
1576{
1577  size_t n_rows = xt->vars[ROW_VAR].n_values;
1578  size_t n_cols = xt->vars[COL_VAR].n_values;
1579
1580  size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
1581  assert (xt->n_vars == 2);
1582  for (size_t i = 0; i < xt->n_consts; i++)
1583    indexes[i + 3] = xt->const_indexes[i];
1584
1585  /* Put in the actual cells. */
1586  double *mp = xt->mat;
1587  for (size_t r = 0; r < n_rows; r++)
1588    {
1589      if (!xt->row_tot[r] && proc->mode != INTEGER)
1590        continue;
1591
1592      indexes[ROW_VAR + 1] = r;
1593      for (size_t c = 0; c < n_cols; c++)
1594        {
1595          if (!xt->col_tot[c] && proc->mode != INTEGER)
1596            continue;
1597
1598          indexes[COL_VAR + 1] = c;
1599
1600          double expected_value = xt->row_tot[r] * xt->col_tot[c] / xt->total;
1601          double residual = *mp - expected_value;
1602          double sresidual = residual / sqrt (expected_value);
1603          double asresidual = (sresidual
1604                               * (1. - xt->row_tot[r] / xt->total)
1605                               * (1. - xt->col_tot[c] / xt->total));
1606          double entries[] = {
1607            [CRS_CL_COUNT] = *mp,
1608            [CRS_CL_ROW] = *mp / xt->row_tot[r] * 100.,
1609            [CRS_CL_COLUMN] = *mp / xt->col_tot[c] * 100.,
1610            [CRS_CL_TOTAL] = *mp / xt->total * 100.,
1611            [CRS_CL_EXPECTED] = expected_value,
1612            [CRS_CL_RESIDUAL] = residual,
1613            [CRS_CL_SRESIDUAL] = sresidual,
1614            [CRS_CL_ASRESIDUAL] = asresidual,
1615          };
1616          for (size_t i = 0; i < proc->n_cells; i++)
1617            {
1618              int cell = proc->a_cells[i];
1619              indexes[0] = crs_leaves[cell];
1620              pivot_table_put (table, indexes, table->n_dimensions,
1621                               pivot_value_new_number (entries[cell]));
1622            }
1623
1624          mp++;
1625        }
1626    }
1627
1628  /* Row totals. */
1629  for (size_t r = 0; r < n_rows; r++)
1630    {
1631      if (!xt->row_tot[r] && proc->mode != INTEGER)
1632        continue;
1633
1634      double expected_value = xt->row_tot[r] / xt->total;
1635      double entries[] = {
1636        [CRS_CL_COUNT] = xt->row_tot[r],
1637        [CRS_CL_ROW] = 100.0,
1638        [CRS_CL_COLUMN] = expected_value * 100.,
1639        [CRS_CL_TOTAL] = expected_value * 100.,
1640        [CRS_CL_EXPECTED] = expected_value,
1641        [CRS_CL_RESIDUAL] = SYSMIS,
1642        [CRS_CL_SRESIDUAL] = SYSMIS,
1643        [CRS_CL_ASRESIDUAL] = SYSMIS,
1644      };
1645      for (size_t i = 0; i < proc->n_cells; i++)
1646        {
1647          int cell = proc->a_cells[i];
1648          double entry = entries[cell];
1649          if (entry != SYSMIS)
1650            {
1651              indexes[ROW_VAR + 1] = r;
1652              indexes[COL_VAR + 1] = n_cols;
1653              indexes[0] = crs_leaves[cell];
1654              pivot_table_put (table, indexes, table->n_dimensions,
1655                               pivot_value_new_number (entry));
1656            }
1657        }
1658    }
1659
1660  for (size_t c = 0; c <= n_cols; c++)
1661    {
1662      if (c < n_cols && !xt->col_tot[c] && proc->mode != INTEGER)
1663        continue;
1664
1665      double ct = c < n_cols ? xt->col_tot[c] : xt->total;
1666      double expected_value = ct / xt->total;
1667      double entries[] = {
1668        [CRS_CL_COUNT] = ct,
1669        [CRS_CL_ROW] = expected_value * 100.0,
1670        [CRS_CL_COLUMN] = 100.0,
1671        [CRS_CL_TOTAL] = expected_value * 100.,
1672        [CRS_CL_EXPECTED] = expected_value,
1673        [CRS_CL_RESIDUAL] = SYSMIS,
1674        [CRS_CL_SRESIDUAL] = SYSMIS,
1675        [CRS_CL_ASRESIDUAL] = SYSMIS,
1676      };
1677      for (size_t i = 0; i < proc->n_cells; i++)
1678        {
1679          int cell = proc->a_cells[i];
1680          double entry = entries[cell];
1681          if (entry != SYSMIS)
1682            {
1683              indexes[ROW_VAR + 1] = n_rows;
1684              indexes[COL_VAR + 1] = c;
1685              indexes[0] = crs_leaves[cell];
1686              pivot_table_put (table, indexes, table->n_dimensions,
1687                               pivot_value_new_number (entry));
1688            }
1689        }
1690    }
1691
1692  free (indexes);
1693}
1694
1695static void calc_r (struct crosstabulation *,
1696                    double *XT, double *Y, double *, double *, double *);
1697static void calc_chisq (struct crosstabulation *,
1698                        double[N_CHISQ], int[N_CHISQ], double *, double *);
1699
1700/* Display chi-square statistics. */
1701static void
1702display_chisq (struct crosstabulation *xt, struct pivot_table *chisq)
1703{
1704  double chisq_v[N_CHISQ];
1705  double fisher1, fisher2;
1706  int df[N_CHISQ];
1707  calc_chisq (xt, chisq_v, df, &fisher1, &fisher2);
1708
1709  size_t *indexes = xnmalloc (chisq->n_dimensions, sizeof *indexes);
1710  assert (xt->n_vars == 2);
1711  for (size_t i = 0; i < xt->n_consts; i++)
1712    indexes[i + 2] = xt->const_indexes[i];
1713  for (int i = 0; i < N_CHISQ; i++)
1714    {
1715      indexes[0] = i;
1716
1717      double entries[5] = { SYSMIS, SYSMIS, SYSMIS, SYSMIS, SYSMIS };
1718      if (i == 2)
1719        {
1720          entries[3] = fisher2;
1721          entries[4] = fisher1;
1722        }
1723      else if (chisq_v[i] != SYSMIS)
1724        {
1725          entries[0] = chisq_v[i];
1726          entries[1] = df[i];
1727          entries[2] = gsl_cdf_chisq_Q (chisq_v[i], df[i]);
1728        }
1729
1730      for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
1731        if (entries[j] != SYSMIS)
1732          {
1733            indexes[1] = j;
1734            pivot_table_put (chisq, indexes, chisq->n_dimensions,
1735                             pivot_value_new_number (entries[j]));
1736        }
1737    }
1738
1739  indexes[0] = 5;
1740  indexes[1] = 0;
1741  pivot_table_put (chisq, indexes, chisq->n_dimensions,
1742                   pivot_value_new_number (xt->total));
1743
1744  free (indexes);
1745}
1746
1747static int calc_symmetric (struct crosstabs_proc *, struct crosstabulation *,
1748                           double[N_SYMMETRIC], double[N_SYMMETRIC],
1749			   double[N_SYMMETRIC],
1750                           double[3], double[3], double[3]);
1751
1752/* Display symmetric measures. */
1753static void
1754display_symmetric (struct crosstabs_proc *proc, struct crosstabulation *xt,
1755                   struct pivot_table *sym)
1756{
1757  double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
1758  double somers_d_v[3], somers_d_ase[3], somers_d_t[3];
1759
1760  if (!calc_symmetric (proc, xt, sym_v, sym_ase, sym_t,
1761                       somers_d_v, somers_d_ase, somers_d_t))
1762    return;
1763
1764  size_t *indexes = xnmalloc (sym->n_dimensions, sizeof *indexes);
1765  assert (xt->n_vars == 2);
1766  for (size_t i = 0; i < xt->n_consts; i++)
1767    indexes[i + 2] = xt->const_indexes[i];
1768
1769  for (int i = 0; i < N_SYMMETRIC; i++)
1770    {
1771      if (sym_v[i] == SYSMIS)
1772	continue;
1773
1774      indexes[1] = i;
1775
1776      double entries[] = { sym_v[i], sym_ase[i], sym_t[i] };
1777      for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
1778        if (entries[j] != SYSMIS)
1779          {
1780            indexes[0] = j;
1781            pivot_table_put (sym, indexes, sym->n_dimensions,
1782                             pivot_value_new_number (entries[j]));
1783          }
1784    }
1785
1786  indexes[1] = N_SYMMETRIC;
1787  indexes[0] = 0;
1788  struct pivot_value *total = pivot_value_new_number (xt->total);
1789  pivot_value_set_rc (sym, total, PIVOT_RC_COUNT);
1790  pivot_table_put (sym, indexes, sym->n_dimensions, total);
1791
1792  free (indexes);
1793}
1794
1795static bool calc_risk (struct crosstabulation *,
1796                       double[], double[], double[], union value *,
1797                       double *);
1798
1799/* Display risk estimate. */
1800static void
1801display_risk (struct crosstabulation *xt, struct pivot_table *risk,
1802              struct pivot_dimension *risk_statistics)
1803{
1804  double risk_v[3], lower[3], upper[3], n_valid;
1805  union value c[2];
1806  if (!calc_risk (xt, risk_v, upper, lower, c, &n_valid))
1807    return;
1808
1809  size_t *indexes = xnmalloc (risk->n_dimensions, sizeof *indexes);
1810  assert (xt->n_vars == 2);
1811  for (size_t i = 0; i < xt->n_consts; i++)
1812    indexes[i + 2] = xt->const_indexes[i];
1813
1814  for (int i = 0; i < 3; i++)
1815    {
1816      const struct variable *cv = xt->vars[COL_VAR].var;
1817      const struct variable *rv = xt->vars[ROW_VAR].var;
1818
1819      if (risk_v[i] == SYSMIS)
1820	continue;
1821
1822      struct string label = DS_EMPTY_INITIALIZER;
1823      switch (i)
1824	{
1825	case 0:
1826          ds_put_format (&label, _("Odds Ratio for %s"), var_to_string (rv));
1827          ds_put_cstr (&label, " (");
1828          var_append_value_name (rv, &c[0], &label);
1829          ds_put_cstr (&label, " / ");
1830          var_append_value_name (rv, &c[1], &label);
1831          ds_put_cstr (&label, ")");
1832	  break;
1833	case 1:
1834	case 2:
1835          ds_put_format (&label, _("For cohort %s = "), var_to_string (cv));
1836          var_append_value_name (cv, &xt->vars[ROW_VAR].values[i - 1], &label);
1837	  break;
1838	}
1839
1840      indexes[1] = pivot_category_create_leaf (
1841        risk_statistics->root,
1842        pivot_value_new_user_text_nocopy (ds_steal_cstr (&label)));
1843
1844      double entries[] = { risk_v[i], lower[i], upper[i] };
1845      for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
1846        {
1847          indexes[0] = j;
1848          pivot_table_put (risk, indexes, risk->n_dimensions,
1849                           pivot_value_new_number (entries[i]));
1850        }
1851    }
1852  indexes[1] = pivot_category_create_leaf (
1853    risk_statistics->root,
1854    pivot_value_new_text (N_("N of Valid Cases")));
1855  indexes[0] = 0;
1856  pivot_table_put (risk, indexes, risk->n_dimensions,
1857                   pivot_value_new_number (n_valid));
1858  free (indexes);
1859}
1860
1861static int calc_directional (struct crosstabs_proc *, struct crosstabulation *,
1862                             double[N_DIRECTIONAL], double[N_DIRECTIONAL],
1863			     double[N_DIRECTIONAL], double[N_DIRECTIONAL]);
1864
1865/* Display directional measures. */
1866static void
1867display_directional (struct crosstabs_proc *proc,
1868                     struct crosstabulation *xt, struct pivot_table *direct)
1869{
1870  double direct_v[N_DIRECTIONAL];
1871  double direct_ase[N_DIRECTIONAL];
1872  double direct_t[N_DIRECTIONAL];
1873  double sig[N_DIRECTIONAL];
1874  if (!calc_directional (proc, xt, direct_v, direct_ase, direct_t, sig))
1875    return;
1876
1877  size_t *indexes = xnmalloc (direct->n_dimensions, sizeof *indexes);
1878  assert (xt->n_vars == 2);
1879  for (size_t i = 0; i < xt->n_consts; i++)
1880    indexes[i + 2] = xt->const_indexes[i];
1881
1882  for (int i = 0; i < N_DIRECTIONAL; i++)
1883    {
1884      if (direct_v[i] == SYSMIS)
1885	continue;
1886
1887      indexes[1] = i;
1888
1889      double entries[] = {
1890        direct_v[i], direct_ase[i], direct_t[i], sig[i],
1891      };
1892      for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
1893        if (entries[j] != SYSMIS)
1894          {
1895            indexes[0] = j;
1896            pivot_table_put (direct, indexes, direct->n_dimensions,
1897                             pivot_value_new_number (entries[j]));
1898          }
1899    }
1900
1901  free (indexes);
1902}
1903
1904/* Statistical calculations. */
1905
1906/* Returns the value of the logarithm of gamma (factorial) function for an integer
1907   argument XT. */
1908static double
1909log_gamma_int (double xt)
1910{
1911  double r = 0;
1912  int i;
1913
1914  for (i = 2; i < xt; i++)
1915    r += log(i);
1916
1917  return r;
1918}
1919
1920/* Calculate P_r as specified in _SPSS Statistical Algorithms_,
1921   Appendix 5. */
1922static inline double
1923Pr (int a, int b, int c, int d)
1924{
1925  return exp (log_gamma_int (a + b + 1.) -  log_gamma_int (a + 1.)
1926	    + log_gamma_int (c + d + 1.) - log_gamma_int (b + 1.)
1927	    + log_gamma_int (a + c + 1.) - log_gamma_int (c + 1.)
1928	    + log_gamma_int (b + d + 1.) - log_gamma_int (d + 1.)
1929	    - log_gamma_int (a + b + c + d + 1.));
1930}
1931
1932/* Swap the contents of A and B. */
1933static inline void
1934swap (int *a, int *b)
1935{
1936  int t = *a;
1937  *a = *b;
1938  *b = t;
1939}
1940
1941/* Calculate significance for Fisher's exact test as specified in
1942   _SPSS Statistical Algorithms_, Appendix 5. */
1943static void
1944calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
1945{
1946  int xt;
1947  double pn1;
1948
1949  if (MIN (c, d) < MIN (a, b))
1950    swap (&a, &c), swap (&b, &d);
1951  if (MIN (b, d) < MIN (a, c))
1952    swap (&a, &b), swap (&c, &d);
1953  if (b * c < a * d)
1954    {
1955      if (b < c)
1956	swap (&a, &b), swap (&c, &d);
1957      else
1958	swap (&a, &c), swap (&b, &d);
1959    }
1960
1961  pn1 = Pr (a, b, c, d);
1962  *fisher1 = pn1;
1963  for (xt = 1; xt <= a; xt++)
1964    {
1965      *fisher1 += Pr (a - xt, b + xt, c + xt, d - xt);
1966    }
1967
1968  *fisher2 = *fisher1;
1969
1970  for (xt = 1; xt <= b; xt++)
1971    {
1972      double p = Pr (a + xt, b - xt, c - xt, d + xt);
1973      if (p < pn1)
1974	*fisher2 += p;
1975    }
1976}
1977
1978/* Calculates chi-squares into CHISQ.  MAT is a matrix with N_COLS
1979   columns with values COLS and N_ROWS rows with values ROWS.  Values
1980   in the matrix sum to xt->total. */
1981static void
1982calc_chisq (struct crosstabulation *xt,
1983            double chisq[N_CHISQ], int df[N_CHISQ],
1984	    double *fisher1, double *fisher2)
1985{
1986  chisq[0] = chisq[1] = 0.;
1987  chisq[2] = chisq[3] = chisq[4] = SYSMIS;
1988  *fisher1 = *fisher2 = SYSMIS;
1989
1990  df[0] = df[1] = (xt->ns_cols - 1) * (xt->ns_rows - 1);
1991
1992  if (xt->ns_rows <= 1 || xt->ns_cols <= 1)
1993    {
1994      chisq[0] = chisq[1] = SYSMIS;
1995      return;
1996    }
1997
1998  size_t n_cols = xt->vars[COL_VAR].n_values;
1999  FOR_EACH_POPULATED_ROW (r, xt)
2000    FOR_EACH_POPULATED_COLUMN (c, xt)
2001      {
2002        const double expected = xt->row_tot[r] * xt->col_tot[c] / xt->total;
2003        const double freq = xt->mat[n_cols * r + c];
2004        const double residual = freq - expected;
2005
2006        chisq[0] += residual * residual / expected;
2007        if (freq)
2008          chisq[1] += freq * log (expected / freq);
2009      }
2010
2011  if (chisq[0] == 0.)
2012    chisq[0] = SYSMIS;
2013
2014  if (chisq[1] != 0.)
2015    chisq[1] *= -2.;
2016  else
2017    chisq[1] = SYSMIS;
2018
2019  /* Calculate Yates and Fisher exact test. */
2020  if (xt->ns_cols == 2 && xt->ns_rows == 2)
2021    {
2022      double f11, f12, f21, f22;
2023
2024      {
2025	int nz_cols[2];
2026
2027        int j = 0;
2028        FOR_EACH_POPULATED_COLUMN (c, xt)
2029          {
2030            nz_cols[j++] = c;
2031            if (j == 2)
2032              break;
2033          }
2034	assert (j == 2);
2035
2036	f11 = xt->mat[nz_cols[0]];
2037	f12 = xt->mat[nz_cols[1]];
2038	f21 = xt->mat[nz_cols[0] + n_cols];
2039	f22 = xt->mat[nz_cols[1] + n_cols];
2040      }
2041
2042      /* Yates. */
2043      {
2044	const double xt_ = fabs (f11 * f22 - f12 * f21) - 0.5 * xt->total;
2045
2046	if (xt_ > 0.)
2047	  chisq[3] = (xt->total * pow2 (xt_)
2048		      / (f11 + f12) / (f21 + f22)
2049		      / (f11 + f21) / (f12 + f22));
2050	else
2051	  chisq[3] = 0.;
2052
2053	df[3] = 1.;
2054      }
2055
2056      /* Fisher. */
2057      calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2058    }
2059
2060  /* Calculate Mantel-Haenszel. */
2061  if (var_is_numeric (xt->vars[ROW_VAR].var)
2062      && var_is_numeric (xt->vars[COL_VAR].var))
2063    {
2064      double r, ase_0, ase_1;
2065      calc_r (xt, (double *) xt->vars[ROW_VAR].values,
2066              (double *) xt->vars[COL_VAR].values,
2067              &r, &ase_0, &ase_1);
2068
2069      chisq[4] = (xt->total - 1.) * r * r;
2070      df[4] = 1;
2071    }
2072}
2073
2074/* Calculate the value of Pearson's r.  r is stored into R, its T value into
2075   T, and standard error into ERROR.  The row and column values must be
2076   passed in XT and Y. */
2077static void
2078calc_r (struct crosstabulation *xt,
2079        double *XT, double *Y, double *r, double *t, double *error)
2080{
2081  size_t n_rows = xt->vars[ROW_VAR].n_values;
2082  size_t n_cols = xt->vars[COL_VAR].n_values;
2083  double SX, SY, S, T;
2084  double Xbar, Ybar;
2085  double sum_XYf, sum_X2Y2f;
2086  double sum_Xr, sum_X2r;
2087  double sum_Yc, sum_Y2c;
2088  int i, j;
2089
2090  for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2091    for (j = 0; j < n_cols; j++)
2092      {
2093	double fij = xt->mat[j + i * n_cols];
2094	double product = XT[i] * Y[j];
2095	double temp = fij * product;
2096	sum_XYf += temp;
2097	sum_X2Y2f += temp * product;
2098      }
2099
2100  for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2101    {
2102      sum_Xr += XT[i] * xt->row_tot[i];
2103      sum_X2r += pow2 (XT[i]) * xt->row_tot[i];
2104    }
2105  Xbar = sum_Xr / xt->total;
2106
2107  for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2108    {
2109      sum_Yc += Y[i] * xt->col_tot[i];
2110      sum_Y2c += Y[i] * Y[i] * xt->col_tot[i];
2111    }
2112  Ybar = sum_Yc / xt->total;
2113
2114  S = sum_XYf - sum_Xr * sum_Yc / xt->total;
2115  SX = sum_X2r - pow2 (sum_Xr) / xt->total;
2116  SY = sum_Y2c - pow2 (sum_Yc) / xt->total;
2117  T = sqrt (SX * SY);
2118  *r = S / T;
2119  *t = *r / sqrt (1 - pow2 (*r)) * sqrt (xt->total - 2);
2120
2121  {
2122    double s, c, y, t;
2123
2124    for (s = c = 0., i = 0; i < n_rows; i++)
2125      for (j = 0; j < n_cols; j++)
2126	{
2127	  double Xresid, Yresid;
2128	  double temp;
2129
2130	  Xresid = XT[i] - Xbar;
2131	  Yresid = Y[j] - Ybar;
2132	  temp = (T * Xresid * Yresid
2133		  - ((S / (2. * T))
2134		     * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2135	  y = xt->mat[j + i * n_cols] * temp * temp - c;
2136	  t = s + y;
2137	  c = (t - s) - y;
2138	  s = t;
2139	}
2140    *error = sqrt (s) / (T * T);
2141  }
2142}
2143
2144/* Calculate symmetric statistics and their asymptotic standard
2145   errors.  Returns 0 if none could be calculated. */
2146static int
2147calc_symmetric (struct crosstabs_proc *proc, struct crosstabulation *xt,
2148                double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2149		double t[N_SYMMETRIC],
2150                double somers_d_v[3], double somers_d_ase[3],
2151                double somers_d_t[3])
2152{
2153  size_t n_rows = xt->vars[ROW_VAR].n_values;
2154  size_t n_cols = xt->vars[COL_VAR].n_values;
2155  int q, i;
2156
2157  q = MIN (xt->ns_rows, xt->ns_cols);
2158  if (q <= 1)
2159    return 0;
2160
2161  for (i = 0; i < N_SYMMETRIC; i++)
2162    v[i] = ase[i] = t[i] = SYSMIS;
2163
2164  /* Phi, Cramer's V, contingency coefficient. */
2165  if (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC)))
2166    {
2167      double Xp = 0.;	/* Pearson chi-square. */
2168
2169      FOR_EACH_POPULATED_ROW (r, xt)
2170        FOR_EACH_POPULATED_COLUMN (c, xt)
2171          {
2172            double expected = xt->row_tot[r] * xt->col_tot[c] / xt->total;
2173            double freq = xt->mat[n_cols * r + c];
2174            double residual = freq - expected;
2175
2176            Xp += residual * residual / expected;
2177          }
2178
2179      if (proc->statistics & (1u << CRS_ST_PHI))
2180	{
2181	  v[0] = sqrt (Xp / xt->total);
2182	  v[1] = sqrt (Xp / (xt->total * (q - 1)));
2183	}
2184      if (proc->statistics & (1u << CRS_ST_CC))
2185	v[2] = sqrt (Xp / (Xp + xt->total));
2186    }
2187
2188  if (proc->statistics & ((1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU)
2189                          | (1u << CRS_ST_GAMMA) | (1u << CRS_ST_D)))
2190    {
2191      double *cum;
2192      double Dr, Dc;
2193      double P, Q;
2194      double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2195      double btau_var;
2196      int r, c;
2197
2198      Dr = Dc = pow2 (xt->total);
2199      for (r = 0; r < n_rows; r++)
2200        Dr -= pow2 (xt->row_tot[r]);
2201      for (c = 0; c < n_cols; c++)
2202        Dc -= pow2 (xt->col_tot[c]);
2203
2204      cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2205      for (c = 0; c < n_cols; c++)
2206        {
2207          double ct = 0.;
2208
2209          for (r = 0; r < n_rows; r++)
2210            cum[c + r * n_cols] = ct += xt->mat[c + r * n_cols];
2211        }
2212
2213      /* P and Q. */
2214      {
2215	int i, j;
2216	double Cij, Dij;
2217
2218	P = Q = 0.;
2219	for (i = 0; i < n_rows; i++)
2220	  {
2221	    Cij = Dij = 0.;
2222
2223	    for (j = 1; j < n_cols; j++)
2224	      Cij += xt->col_tot[j] - cum[j + i * n_cols];
2225
2226	    if (i > 0)
2227	      for (j = 1; j < n_cols; j++)
2228		Dij += cum[j + (i - 1) * n_cols];
2229
2230	    for (j = 0;;)
2231	      {
2232		double fij = xt->mat[j + i * n_cols];
2233		P += fij * Cij;
2234		Q += fij * Dij;
2235
2236		if (++j == n_cols)
2237		  break;
2238		assert (j < n_cols);
2239
2240		Cij -= xt->col_tot[j] - cum[j + i * n_cols];
2241		Dij += xt->col_tot[j - 1] - cum[j - 1 + i * n_cols];
2242
2243		if (i > 0)
2244		  {
2245		    Cij += cum[j - 1 + (i - 1) * n_cols];
2246		    Dij -= cum[j + (i - 1) * n_cols];
2247		  }
2248	      }
2249	  }
2250      }
2251
2252      if (proc->statistics & (1u << CRS_ST_BTAU))
2253	v[3] = (P - Q) / sqrt (Dr * Dc);
2254      if (proc->statistics & (1u << CRS_ST_CTAU))
2255	v[4] = (q * (P - Q)) / (pow2 (xt->total) * (q - 1));
2256      if (proc->statistics & (1u << CRS_ST_GAMMA))
2257	v[5] = (P - Q) / (P + Q);
2258
2259      /* ASE for tau-b, tau-c, gamma.  Calculations could be
2260	 eliminated here, at expense of memory.  */
2261      {
2262	int i, j;
2263	double Cij, Dij;
2264
2265	btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2266	for (i = 0; i < n_rows; i++)
2267	  {
2268	    Cij = Dij = 0.;
2269
2270	    for (j = 1; j < n_cols; j++)
2271	      Cij += xt->col_tot[j] - cum[j + i * n_cols];
2272
2273	    if (i > 0)
2274	      for (j = 1; j < n_cols; j++)
2275		Dij += cum[j + (i - 1) * n_cols];
2276
2277	    for (j = 0;;)
2278	      {
2279		double fij = xt->mat[j + i * n_cols];
2280
2281		if (proc->statistics & (1u << CRS_ST_BTAU))
2282		  {
2283		    const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2284					 + v[3] * (xt->row_tot[i] * Dc
2285						   + xt->col_tot[j] * Dr));
2286		    btau_cum += fij * temp * temp;
2287		  }
2288
2289		{
2290		  const double temp = Cij - Dij;
2291		  ctau_cum += fij * temp * temp;
2292		}
2293
2294		if (proc->statistics & (1u << CRS_ST_GAMMA))
2295		  {
2296		    const double temp = Q * Cij - P * Dij;
2297		    gamma_cum += fij * temp * temp;
2298		  }
2299
2300		if (proc->statistics & (1u << CRS_ST_D))
2301		  {
2302		    d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2303                                            - (P - Q) * (xt->total - xt->row_tot[i]));
2304		    d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2305                                            - (Q - P) * (xt->total - xt->col_tot[j]));
2306		  }
2307
2308		if (++j == n_cols)
2309		  break;
2310		assert (j < n_cols);
2311
2312		Cij -= xt->col_tot[j] - cum[j + i * n_cols];
2313		Dij += xt->col_tot[j - 1] - cum[j - 1 + i * n_cols];
2314
2315		if (i > 0)
2316		  {
2317		    Cij += cum[j - 1 + (i - 1) * n_cols];
2318		    Dij -= cum[j + (i - 1) * n_cols];
2319		  }
2320	      }
2321	  }
2322      }
2323
2324      btau_var = ((btau_cum
2325		   - (xt->total * pow2 (xt->total * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2326		  / pow2 (Dr * Dc));
2327      if (proc->statistics & (1u << CRS_ST_BTAU))
2328	{
2329	  ase[3] = sqrt (btau_var);
2330	  t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / xt->total)
2331				   / (Dr * Dc)));
2332	}
2333      if (proc->statistics & (1u << CRS_ST_CTAU))
2334	{
2335	  ase[4] = ((2 * q / ((q - 1) * pow2 (xt->total)))
2336		    * sqrt (ctau_cum - (P - Q) * (P - Q) / xt->total));
2337	  t[4] = v[4] / ase[4];
2338	}
2339      if (proc->statistics & (1u << CRS_ST_GAMMA))
2340	{
2341	  ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2342	  t[5] = v[5] / (2. / (P + Q)
2343			 * sqrt (ctau_cum - (P - Q) * (P - Q) / xt->total));
2344	}
2345      if (proc->statistics & (1u << CRS_ST_D))
2346	{
2347	  somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2348	  somers_d_ase[0] = SYSMIS;
2349	  somers_d_t[0] = (somers_d_v[0]
2350			   / (4 / (Dc + Dr)
2351			      * sqrt (ctau_cum - pow2 (P - Q) / xt->total)));
2352	  somers_d_v[1] = (P - Q) / Dc;
2353	  somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2354	  somers_d_t[1] = (somers_d_v[1]
2355			   / (2. / Dc
2356			      * sqrt (ctau_cum - pow2 (P - Q) / xt->total)));
2357	  somers_d_v[2] = (P - Q) / Dr;
2358	  somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2359	  somers_d_t[2] = (somers_d_v[2]
2360			   / (2. / Dr
2361			      * sqrt (ctau_cum - pow2 (P - Q) / xt->total)));
2362	}
2363
2364      free (cum);
2365    }
2366
2367  /* Spearman correlation, Pearson's r. */
2368  if (proc->statistics & (1u << CRS_ST_CORR))
2369    {
2370      double *R = xmalloc (sizeof *R * n_rows);
2371      double *C = xmalloc (sizeof *C * n_cols);
2372
2373      {
2374	double y, t, c = 0., s = 0.;
2375	int i = 0;
2376
2377	for (;;)
2378	  {
2379	    R[i] = s + (xt->row_tot[i] + 1.) / 2.;
2380	    y = xt->row_tot[i] - c;
2381	    t = s + y;
2382	    c = (t - s) - y;
2383	    s = t;
2384	    if (++i == n_rows)
2385	      break;
2386	    assert (i < n_rows);
2387	  }
2388      }
2389
2390      {
2391	double y, t, c = 0., s = 0.;
2392	int j = 0;
2393
2394	for (;;)
2395	  {
2396	    C[j] = s + (xt->col_tot[j] + 1.) / 2;
2397	    y = xt->col_tot[j] - c;
2398	    t = s + y;
2399	    c = (t - s) - y;
2400	    s = t;
2401	    if (++j == n_cols)
2402	      break;
2403	    assert (j < n_cols);
2404	  }
2405      }
2406
2407      calc_r (xt, R, C, &v[6], &t[6], &ase[6]);
2408
2409      free (R);
2410      free (C);
2411
2412      calc_r (xt, (double *) xt->vars[ROW_VAR].values,
2413              (double *) xt->vars[COL_VAR].values,
2414              &v[7], &t[7], &ase[7]);
2415    }
2416
2417  /* Cohen's kappa. */
2418  if (proc->statistics & (1u << CRS_ST_KAPPA) && xt->ns_rows == xt->ns_cols)
2419    {
2420      double ase_under_h0;
2421      double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2422      int i, j;
2423
2424      for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2425	   i < xt->ns_rows; i++, j++)
2426	{
2427	  double prod, sum;
2428
2429	  while (xt->col_tot[j] == 0.)
2430	    j++;
2431
2432	  prod = xt->row_tot[i] * xt->col_tot[j];
2433	  sum = xt->row_tot[i] + xt->col_tot[j];
2434
2435	  sum_fii += xt->mat[j + i * n_cols];
2436	  sum_rici += prod;
2437	  sum_fiiri_ci += xt->mat[j + i * n_cols] * sum;
2438	  sum_riciri_ci += prod * sum;
2439	}
2440      for (sum_fijri_ci2 = 0., i = 0; i < xt->ns_rows; i++)
2441	for (j = 0; j < xt->ns_cols; j++)
2442	  {
2443	    double sum = xt->row_tot[i] + xt->col_tot[j];
2444	    sum_fijri_ci2 += xt->mat[j + i * n_cols] * sum * sum;
2445	  }
2446
2447      v[8] = (xt->total * sum_fii - sum_rici) / (pow2 (xt->total) - sum_rici);
2448
2449      ase_under_h0 = sqrt ((pow2 (xt->total) * sum_rici
2450			    + sum_rici * sum_rici
2451			    - xt->total * sum_riciri_ci)
2452			   / (xt->total * (pow2 (xt->total) - sum_rici) * (pow2 (xt->total) - sum_rici)));
2453
2454      ase[8] = sqrt (xt->total * (((sum_fii * (xt->total - sum_fii))
2455				/ pow2 (pow2 (xt->total) - sum_rici))
2456			       + ((2. * (xt->total - sum_fii)
2457				   * (2. * sum_fii * sum_rici
2458				      - xt->total * sum_fiiri_ci))
2459				  / pow3 (pow2 (xt->total) - sum_rici))
2460			       + (pow2 (xt->total - sum_fii)
2461				  * (xt->total * sum_fijri_ci2 - 4.
2462				     * sum_rici * sum_rici)
2463				  / pow4 (pow2 (xt->total) - sum_rici))));
2464
2465      t[8] = v[8] / ase_under_h0;
2466    }
2467
2468  return 1;
2469}
2470
2471/* Calculate risk estimate. */
2472static bool
2473calc_risk (struct crosstabulation *xt,
2474           double *value, double *upper, double *lower, union value *c,
2475           double *n_valid)
2476{
2477  size_t n_cols = xt->vars[COL_VAR].n_values;
2478  double f11, f12, f21, f22;
2479  double v;
2480
2481  for (int i = 0; i < 3; i++)
2482    value[i] = upper[i] = lower[i] = SYSMIS;
2483
2484  if (xt->ns_rows != 2 || xt->ns_cols != 2)
2485    return false;
2486
2487  {
2488    /* Find populated columns. */
2489    int nz_cols[2];
2490    int n = 0;
2491    FOR_EACH_POPULATED_COLUMN (c, xt)
2492      nz_cols[n++] = c;
2493    assert (n == 2);
2494
2495    /* Find populated rows. */
2496    int nz_rows[2];
2497    n = 0;
2498    FOR_EACH_POPULATED_ROW (r, xt)
2499      nz_rows[n++] = r;
2500    assert (n == 2);
2501
2502    f11 = xt->mat[nz_cols[0] + n_cols * nz_rows[0]];
2503    f12 = xt->mat[nz_cols[1] + n_cols * nz_rows[0]];
2504    f21 = xt->mat[nz_cols[0] + n_cols * nz_rows[1]];
2505    f22 = xt->mat[nz_cols[1] + n_cols * nz_rows[1]];
2506    *n_valid = f11 + f12 + f21 + f22;
2507
2508    c[0] = xt->vars[COL_VAR].values[nz_cols[0]];
2509    c[1] = xt->vars[COL_VAR].values[nz_cols[1]];
2510  }
2511
2512  value[0] = (f11 * f22) / (f12 * f21);
2513  v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2514  lower[0] = value[0] * exp (-1.960 * v);
2515  upper[0] = value[0] * exp (1.960 * v);
2516
2517  value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2518  v = sqrt ((f12 / (f11 * (f11 + f12)))
2519	    + (f22 / (f21 * (f21 + f22))));
2520  lower[1] = value[1] * exp (-1.960 * v);
2521  upper[1] = value[1] * exp (1.960 * v);
2522
2523  value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2524  v = sqrt ((f11 / (f12 * (f11 + f12)))
2525	    + (f21 / (f22 * (f21 + f22))));
2526  lower[2] = value[2] * exp (-1.960 * v);
2527  upper[2] = value[2] * exp (1.960 * v);
2528
2529  return true;
2530}
2531
2532/* Calculate directional measures. */
2533static int
2534calc_directional (struct crosstabs_proc *proc, struct crosstabulation *xt,
2535                  double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2536		  double t[N_DIRECTIONAL], double sig[N_DIRECTIONAL])
2537{
2538  size_t n_rows = xt->vars[ROW_VAR].n_values;
2539  size_t n_cols = xt->vars[COL_VAR].n_values;
2540  for (int i = 0; i < N_DIRECTIONAL; i++)
2541    v[i] = ase[i] = t[i] = sig[i] = SYSMIS;
2542
2543  /* Lambda. */
2544  if (proc->statistics & (1u << CRS_ST_LAMBDA))
2545    {
2546      /* Find maximum for each row and their sum. */
2547      double *fim = xnmalloc (n_rows, sizeof *fim);
2548      int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2549      double sum_fim = 0.0;
2550      for (int i = 0; i < n_rows; i++)
2551	{
2552	  double max = xt->mat[i * n_cols];
2553	  int index = 0;
2554
2555	  for (int j = 1; j < n_cols; j++)
2556	    if (xt->mat[j + i * n_cols] > max)
2557	      {
2558		max = xt->mat[j + i * n_cols];
2559		index = j;
2560	      }
2561
2562          fim[i] = max;
2563	  sum_fim += max;
2564	  fim_index[i] = index;
2565	}
2566
2567      /* Find maximum for each column. */
2568      double *fmj = xnmalloc (n_cols, sizeof *fmj);
2569      int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2570      double sum_fmj = 0.0;
2571      for (int j = 0; j < n_cols; j++)
2572	{
2573	  double max = xt->mat[j];
2574	  int index = 0;
2575
2576	  for (int i = 1; i < n_rows; i++)
2577	    if (xt->mat[j + i * n_cols] > max)
2578	      {
2579		max = xt->mat[j + i * n_cols];
2580		index = i;
2581	      }
2582
2583          fmj[j] = max;
2584	  sum_fmj += max;
2585	  fmj_index[j] = index;
2586	}
2587
2588      /* Find maximum row total. */
2589      double rm = xt->row_tot[0];
2590      int rm_index = 0;
2591      for (int i = 1; i < n_rows; i++)
2592	if (xt->row_tot[i] > rm)
2593	  {
2594	    rm = xt->row_tot[i];
2595	    rm_index = i;
2596	  }
2597
2598      /* Find maximum column total. */
2599      double cm = xt->col_tot[0];
2600      int cm_index = 0;
2601      for (int j = 1; j < n_cols; j++)
2602	if (xt->col_tot[j] > cm)
2603	  {
2604	    cm = xt->col_tot[j];
2605	    cm_index = j;
2606	  }
2607
2608      v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * xt->total - rm - cm);
2609      v[1] = (sum_fmj - rm) / (xt->total - rm);
2610      v[2] = (sum_fim - cm) / (xt->total - cm);
2611
2612      /* ASE1 for Y given XT. */
2613      {
2614        double accum = 0.0;
2615	for (int i = 0; i < n_rows; i++)
2616          if (cm_index == fim_index[i])
2617            accum += fim[i];
2618        ase[2] = sqrt ((xt->total - sum_fim) * (sum_fim + cm - 2. * accum)
2619                       / pow3 (xt->total - cm));
2620      }
2621
2622      /* ASE0 for Y given XT. */
2623      {
2624	double accum = 0.0;
2625	for (int i = 0; i < n_rows; i++)
2626	  if (cm_index != fim_index[i])
2627	    accum += (xt->mat[i * n_cols + fim_index[i]]
2628		      + xt->mat[i * n_cols + cm_index]);
2629	t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / xt->total) / (xt->total - cm));
2630      }
2631
2632      /* ASE1 for XT given Y. */
2633      {
2634        double accum = 0.0;
2635	for (int j = 0; j < n_cols; j++)
2636          if (rm_index == fmj_index[j])
2637            accum += fmj[j];
2638        ase[1] = sqrt ((xt->total - sum_fmj) * (sum_fmj + rm - 2. * accum)
2639                       / pow3 (xt->total - rm));
2640      }
2641
2642      /* ASE0 for XT given Y. */
2643      {
2644	double accum = 0.0;
2645	for (int j = 0; j < n_cols; j++)
2646	  if (rm_index != fmj_index[j])
2647	    accum += (xt->mat[j + n_cols * fmj_index[j]]
2648		      + xt->mat[j + n_cols * rm_index]);
2649	t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / xt->total) / (xt->total - rm));
2650      }
2651
2652      /* Symmetric ASE0 and ASE1. */
2653      {
2654	double accum0 = 0.0;
2655	double accum1 = 0.0;
2656	for (int i = 0; i < n_rows; i++)
2657	  for (int j = 0; j < n_cols; j++)
2658	    {
2659	      int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
2660	      int temp1 = (i == rm_index) + (j == cm_index);
2661	      accum0 += xt->mat[j + i * n_cols] * pow2 (temp0 - temp1);
2662	      accum1 += (xt->mat[j + i * n_cols]
2663			 * pow2 (temp0 + (v[0] - 1.) * temp1));
2664	    }
2665	ase[0] = sqrt (accum1 - 4. * xt->total * v[0] * v[0]) / (2. * xt->total - rm - cm);
2666	t[0] = v[0] / (sqrt (accum0 - pow2 (sum_fim + sum_fmj - cm - rm) / xt->total)
2667		       / (2. * xt->total - rm - cm));
2668      }
2669
2670      for (int i = 0; i < 3; i++)
2671        sig[i] = 2 * gsl_cdf_ugaussian_Q (t[i]);
2672
2673      free (fim);
2674      free (fim_index);
2675      free (fmj);
2676      free (fmj_index);
2677
2678      /* Tau. */
2679      {
2680	double sum_fij2_ri = 0.0;
2681        double sum_fij2_ci = 0.0;
2682        FOR_EACH_POPULATED_ROW (i, xt)
2683          FOR_EACH_POPULATED_COLUMN (j, xt)
2684	    {
2685	      double temp = pow2 (xt->mat[j + i * n_cols]);
2686	      sum_fij2_ri += temp / xt->row_tot[i];
2687	      sum_fij2_ci += temp / xt->col_tot[j];
2688	    }
2689
2690	double sum_ri2 = 0.0;
2691	for (int i = 0; i < n_rows; i++)
2692	  sum_ri2 += pow2 (xt->row_tot[i]);
2693
2694        double sum_cj2 = 0.0;
2695	for (int j = 0; j < n_cols; j++)
2696	  sum_cj2 += pow2 (xt->col_tot[j]);
2697
2698	v[3] = (xt->total * sum_fij2_ci - sum_ri2) / (pow2 (xt->total) - sum_ri2);
2699	v[4] = (xt->total * sum_fij2_ri - sum_cj2) / (pow2 (xt->total) - sum_cj2);
2700      }
2701    }
2702
2703  if (proc->statistics & (1u << CRS_ST_UC))
2704    {
2705      double UX = 0.0;
2706      FOR_EACH_POPULATED_ROW (i, xt)
2707        UX -= xt->row_tot[i] / xt->total * log (xt->row_tot[i] / xt->total);
2708
2709      double UY = 0.0;
2710      FOR_EACH_POPULATED_COLUMN (j, xt)
2711        UY -= xt->col_tot[j] / xt->total * log (xt->col_tot[j] / xt->total);
2712
2713      double UXY = 0.0;
2714      double P = 0.0;
2715      for (int i = 0; i < n_rows; i++)
2716	for (int j = 0; j < n_cols; j++)
2717	  {
2718	    double entry = xt->mat[j + i * n_cols];
2719
2720	    if (entry <= 0.)
2721	      continue;
2722
2723	    P += entry * pow2 (log (xt->col_tot[j] * xt->row_tot[i] / (xt->total * entry)));
2724	    UXY -= entry / xt->total * log (entry / xt->total);
2725	  }
2726
2727      double ase1_yx = 0.0;
2728      double ase1_xy = 0.0;
2729      double ase1_sym = 0.0;
2730      for (int i = 0; i < n_rows; i++)
2731	for (int j = 0; j < n_cols; j++)
2732	  {
2733	    double entry = xt->mat[j + i * n_cols];
2734
2735	    if (entry <= 0.)
2736	      continue;
2737
2738	    ase1_yx += entry * pow2 (UY * log (entry / xt->row_tot[i])
2739				    + (UX - UXY) * log (xt->col_tot[j] / xt->total));
2740	    ase1_xy += entry * pow2 (UX * log (entry / xt->col_tot[j])
2741				    + (UY - UXY) * log (xt->row_tot[i] / xt->total));
2742	    ase1_sym += entry * pow2 ((UXY
2743				      * log (xt->row_tot[i] * xt->col_tot[j] / pow2 (xt->total)))
2744				     - (UX + UY) * log (entry / xt->total));
2745	  }
2746
2747      v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
2748      ase[5] = (2. / (xt->total * pow2 (UX + UY))) * sqrt (ase1_sym);
2749      t[5] = SYSMIS;
2750
2751      v[6] = (UX + UY - UXY) / UX;
2752      ase[6] = sqrt (ase1_xy) / (xt->total * UX * UX);
2753      t[6] = v[6] / (sqrt (P - xt->total * pow2 (UX + UY - UXY)) / (xt->total * UX));
2754
2755      v[7] = (UX + UY - UXY) / UY;
2756      ase[7] = sqrt (ase1_yx) / (xt->total * UY * UY);
2757      t[7] = v[7] / (sqrt (P - xt->total * pow2 (UX + UY - UXY)) / (xt->total * UY));
2758    }
2759
2760  /* Somers' D. */
2761  if (proc->statistics & (1u << CRS_ST_D))
2762    {
2763      double v_dummy[N_SYMMETRIC];
2764      double ase_dummy[N_SYMMETRIC];
2765      double t_dummy[N_SYMMETRIC];
2766      double somers_d_v[3];
2767      double somers_d_ase[3];
2768      double somers_d_t[3];
2769
2770      if (calc_symmetric (proc, xt, v_dummy, ase_dummy, t_dummy,
2771                          somers_d_v, somers_d_ase, somers_d_t))
2772        {
2773          for (int i = 0; i < 3; i++)
2774            {
2775              v[8 + i] = somers_d_v[i];
2776              ase[8 + i] = somers_d_ase[i];
2777              t[8 + i] = somers_d_t[i];
2778              sig[8 + i] = 2 * gsl_cdf_ugaussian_Q (fabs (somers_d_t[i]));
2779            }
2780        }
2781    }
2782
2783  /* Eta. */
2784  if (proc->statistics & (1u << CRS_ST_ETA))
2785    {
2786      /* X dependent. */
2787      double sum_Xr = 0.0;
2788      double sum_X2r = 0.0;
2789      for (int i = 0; i < n_rows; i++)
2790        {
2791          sum_Xr += xt->vars[ROW_VAR].values[i].f * xt->row_tot[i];
2792          sum_X2r += pow2 (xt->vars[ROW_VAR].values[i].f) * xt->row_tot[i];
2793        }
2794      double SX = sum_X2r - pow2 (sum_Xr) / xt->total;
2795
2796      double SXW = 0.0;
2797      FOR_EACH_POPULATED_COLUMN (j, xt)
2798        {
2799          double cum = 0.0;
2800
2801          for (int i = 0; i < n_rows; i++)
2802            {
2803              SXW += (pow2 (xt->vars[ROW_VAR].values[i].f)
2804                      * xt->mat[j + i * n_cols]);
2805              cum += (xt->vars[ROW_VAR].values[i].f
2806                      * xt->mat[j + i * n_cols]);
2807            }
2808
2809          SXW -= cum * cum / xt->col_tot[j];
2810        }
2811      v[11] = sqrt (1. - SXW / SX);
2812
2813      /* Y dependent. */
2814      double sum_Yc = 0.0;
2815      double sum_Y2c = 0.0;
2816      for (int i = 0; i < n_cols; i++)
2817        {
2818          sum_Yc += xt->vars[COL_VAR].values[i].f * xt->col_tot[i];
2819          sum_Y2c += pow2 (xt->vars[COL_VAR].values[i].f) * xt->col_tot[i];
2820        }
2821      double SY = sum_Y2c - pow2 (sum_Yc) / xt->total;
2822
2823      double SYW = 0.0;
2824      FOR_EACH_POPULATED_ROW (i, xt)
2825        {
2826          double cum = 0.0;
2827          for (int j = 0; j < n_cols; j++)
2828            {
2829              SYW += (pow2 (xt->vars[COL_VAR].values[j].f)
2830                      * xt->mat[j + i * n_cols]);
2831              cum += (xt->vars[COL_VAR].values[j].f
2832                      * xt->mat[j + i * n_cols]);
2833            }
2834
2835          SYW -= cum * cum / xt->row_tot[i];
2836        }
2837      v[12] = sqrt (1. - SYW / SY);
2838    }
2839
2840  return 1;
2841}
2842
2843/*
2844   Local Variables:
2845   mode: c
2846   End:
2847*/
2848