1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2009, 2010, 2011, 2014 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 #include "language/stats/binomial.h"
20
21 #include <float.h>
22 #include <gsl/gsl_cdf.h>
23 #include <gsl/gsl_randist.h>
24
25 #include "data/case.h"
26 #include "data/casereader.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/format.h"
30 #include "data/value-labels.h"
31 #include "data/value.h"
32 #include "data/variable.h"
33 #include "language/stats/freq.h"
34 #include "libpspp/assertion.h"
35 #include "libpspp/compiler.h"
36 #include "libpspp/message.h"
37 #include "libpspp/misc.h"
38 #include "output/pivot-table.h"
39
40 #include "gl/xalloc.h"
41 #include "gl/minmax.h"
42
43 #include "gettext.h"
44 #define N_(msgid) msgid
45 #define _(msgid) gettext (msgid)
46
47 static double calculate_binomial_internal (double n1, double n2,
48 double p);
49
50
51 static void
swap(double * i1,double * i2)52 swap (double *i1, double *i2)
53 {
54 double temp = *i1;
55 *i1 = *i2;
56 *i2 = temp;
57 }
58
59 static double
calculate_binomial(double n1,double n2,double p)60 calculate_binomial (double n1, double n2, double p)
61 {
62 const double n = n1 + n2;
63 const bool test_reversed = (n1 / n > p) ;
64 if (test_reversed)
65 {
66 p = 1 - p ;
67 swap (&n1, &n2);
68 }
69
70 return calculate_binomial_internal (n1, n2, p);
71 }
72
73 static double
calculate_binomial_internal(double n1,double n2,double p)74 calculate_binomial_internal (double n1, double n2, double p)
75 {
76 /* SPSS Statistical Algorithms has completely different and WRONG
77 advice here. */
78
79 double sig1tailed = gsl_cdf_binomial_P (n1, p, n1 + n2);
80
81 if (p == 0.5)
82 return sig1tailed > 0.5 ? 1.0 :sig1tailed * 2.0;
83
84 return sig1tailed ;
85 }
86
87 static bool
do_binomial(const struct dictionary * dict,struct casereader * input,const struct one_sample_test * ost,struct freq * cat1,struct freq * cat2,enum mv_class exclude)88 do_binomial (const struct dictionary *dict,
89 struct casereader *input,
90 const struct one_sample_test *ost,
91 struct freq *cat1,
92 struct freq *cat2,
93 enum mv_class exclude
94 )
95 {
96 const struct binomial_test *bst = UP_CAST (ost, const struct binomial_test, parent);
97 bool warn = true;
98
99 struct ccase *c;
100
101 for (; (c = casereader_read (input)) != NULL; case_unref (c))
102 {
103 int v;
104 double w = dict_get_case_weight (dict, c, &warn);
105
106 for (v = 0 ; v < ost->n_vars ; ++v)
107 {
108 const struct variable *var = ost->vars[v];
109 double value = case_num (c, var);
110
111 if (var_is_num_missing (var, value, exclude))
112 continue;
113
114 if (bst->cutpoint != SYSMIS)
115 {
116 if (cat1[v].values[0].f >= value)
117 cat1[v].count += w;
118 else
119 cat2[v].count += w;
120 }
121 else
122 {
123 if (SYSMIS == cat1[v].values[0].f)
124 {
125 cat1[v].values[0].f = value;
126 cat1[v].count = w;
127 }
128 else if (cat1[v].values[0].f == value)
129 cat1[v].count += w;
130 else if (SYSMIS == cat2[v].values[0].f)
131 {
132 cat2[v].values[0].f = value;
133 cat2[v].count = w;
134 }
135 else if (cat2[v].values[0].f == value)
136 cat2[v].count += w;
137 else if (bst->category1 == SYSMIS)
138 msg (ME, _("Variable %s is not dichotomous"), var_get_name (var));
139 }
140 }
141 }
142 return casereader_destroy (input);
143 }
144
145
146
147 void
binomial_execute(const struct dataset * ds,struct casereader * input,enum mv_class exclude,const struct npar_test * test,bool exact UNUSED,double timer UNUSED)148 binomial_execute (const struct dataset *ds,
149 struct casereader *input,
150 enum mv_class exclude,
151 const struct npar_test *test,
152 bool exact UNUSED,
153 double timer UNUSED)
154 {
155 const struct dictionary *dict = dataset_dict (ds);
156 const struct one_sample_test *ost = UP_CAST (test, const struct one_sample_test, parent);
157 const struct binomial_test *bst = UP_CAST (ost, const struct binomial_test, parent);
158
159 struct freq *cat[2];
160 int i;
161
162 assert ((bst->category1 == SYSMIS) == (bst->category2 == SYSMIS) || bst->cutpoint != SYSMIS);
163
164 for (i = 0; i < 2; i++)
165 {
166 double value;
167 if (i == 0)
168 value = bst->cutpoint != SYSMIS ? bst->cutpoint : bst->category1;
169 else
170 value = bst->category2;
171
172 cat[i] = xnmalloc (ost->n_vars, sizeof *cat[i]);
173 for (size_t v = 0; v < ost->n_vars; v++)
174 {
175 cat[i][v].values[0].f = value;
176 cat[i][v].count = 0;
177 }
178 }
179
180 if (do_binomial (dataset_dict (ds), input, ost, cat[0], cat[1], exclude))
181 {
182 struct pivot_table *table = pivot_table_create (N_("Binomial Test"));
183 pivot_table_set_weight_var (table, dict_get_weight (dict));
184
185 pivot_dimension_create (
186 table, PIVOT_AXIS_COLUMN, N_("Statistics"),
187 N_("Category"),
188 N_("N"), PIVOT_RC_COUNT,
189 N_("Observed Prop."), PIVOT_RC_OTHER,
190 N_("Test Prop."), PIVOT_RC_OTHER,
191 (bst->p == 0.5
192 ? N_("Exact Sig. (2-tailed)")
193 : N_("Exact Sig. (1-tailed)")), PIVOT_RC_SIGNIFICANCE);
194
195 pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Groups"),
196 N_("Group 1"), N_("Group 2"), N_("Total"));
197
198 struct pivot_dimension *variables = pivot_dimension_create (
199 table, PIVOT_AXIS_ROW, N_("Variables"));
200
201 for (size_t v = 0; v < ost->n_vars; ++v)
202 {
203 const struct variable *var = ost->vars[v];
204
205 int var_idx = pivot_category_create_leaf (
206 variables->root, pivot_value_new_variable (var));
207
208 /* Category. */
209 if (bst->cutpoint != SYSMIS)
210 pivot_table_put3 (
211 table, 0, 0, var_idx,
212 pivot_value_new_user_text_nocopy (
213 xasprintf ("<= %.*g", DBL_DIG + 1, bst->cutpoint)));
214 else
215 for (int i = 0; i < 2; i++)
216 pivot_table_put3 (
217 table, 0, i, var_idx,
218 pivot_value_new_var_value (var, cat[i][v].values));
219
220 double n_total = cat[0][v].count + cat[1][v].count;
221 double sig = calculate_binomial (cat[0][v].count, cat[1][v].count,
222 bst->p);
223 struct entry
224 {
225 int stat_idx;
226 int group_idx;
227 double x;
228 }
229 entries[] = {
230 /* N. */
231 { 1, 0, cat[0][v].count },
232 { 1, 1, cat[1][v].count },
233 { 1, 2, n_total },
234 /* Observed Prop. */
235 { 2, 0, cat[0][v].count / n_total },
236 { 2, 1, cat[1][v].count / n_total },
237 { 2, 2, 1.0 },
238 /* Test Prop. */
239 { 3, 0, bst->p },
240 /* Significance. */
241 { 4, 0, sig }
242 };
243 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
244 {
245 const struct entry *e = &entries[i];
246 pivot_table_put3 (table, e->stat_idx, e->group_idx,
247 var_idx, pivot_value_new_number (e->x));
248 }
249 }
250
251 pivot_table_submit (table);
252 }
253
254 for (i = 0; i < 2; i++)
255 free (cat[i]);
256 }
257