1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009, 2010, 2011, 2013, 2015, 2016 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 #include <math.h>
20
21 #include "data/casegrouper.h"
22 #include "data/casereader.h"
23 #include "data/dataset.h"
24 #include "data/dictionary.h"
25 #include "data/format.h"
26 #include "data/missing-values.h"
27 #include "language/command.h"
28 #include "language/lexer/lexer.h"
29 #include "language/lexer/variable-parser.h"
30 #include "libpspp/message.h"
31 #include "libpspp/misc.h"
32 #include "libpspp/str.h"
33 #include "math/moments.h"
34 #include "output/pivot-table.h"
35 #include "output/text-item.h"
36
37 #include "gettext.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) msgid
40
41 struct cronbach
42 {
43 const struct variable **items;
44 size_t n_items;
45 double alpha;
46 double sum_of_variances;
47 double variance_of_sums;
48 int totals_idx; /* Casereader index into the totals */
49
50 struct moments1 **m ; /* Moments of the items */
51 struct moments1 *total ; /* Moments of the totals */
52 };
53
54 #if 0
55 static void
56 dump_cronbach (const struct cronbach *s)
57 {
58 int i;
59 printf ("N items %d\n", s->n_items);
60 for (i = 0 ; i < s->n_items; ++i)
61 {
62 printf ("%s\n", var_get_name (s->items[i]));
63 }
64
65 printf ("Totals idx %d\n", s->totals_idx);
66
67 printf ("scale variance %g\n", s->variance_of_sums);
68 printf ("alpha %g\n", s->alpha);
69 putchar ('\n');
70 }
71 #endif
72
73 enum model
74 {
75 MODEL_ALPHA,
76 MODEL_SPLIT
77 };
78
79
80 enum summary_opts
81 {
82 SUMMARY_TOTAL = 0x0001,
83 };
84
85
86 struct reliability
87 {
88 const struct variable **variables;
89 size_t n_variables;
90 enum mv_class exclude;
91
92 struct cronbach *sc;
93 int n_sc;
94
95 int total_start;
96
97 struct string scale_name;
98
99 enum model model;
100 int split_point;
101
102
103 enum summary_opts summary;
104
105 const struct variable *wv;
106 };
107
108
109 static bool run_reliability (struct dataset *ds, const struct reliability *reliability);
110
111 static void
reliability_destroy(struct reliability * rel)112 reliability_destroy (struct reliability *rel)
113 {
114 int j;
115 ds_destroy (&rel->scale_name);
116 if (rel->sc)
117 for (j = 0; j < rel->n_sc ; ++j)
118 {
119 int x;
120 free (rel->sc[j].items);
121 moments1_destroy (rel->sc[j].total);
122 if (rel->sc[j].m)
123 for (x = 0; x < rel->sc[j].n_items; ++x)
124 free (rel->sc[j].m[x]);
125 free (rel->sc[j].m);
126 }
127
128 free (rel->sc);
129 free (rel->variables);
130 }
131
132 int
cmd_reliability(struct lexer * lexer,struct dataset * ds)133 cmd_reliability (struct lexer *lexer, struct dataset *ds)
134 {
135 const struct dictionary *dict = dataset_dict (ds);
136
137 struct reliability reliability;
138 reliability.n_variables = 0;
139 reliability.variables = NULL;
140 reliability.model = MODEL_ALPHA;
141 reliability.exclude = MV_ANY;
142 reliability.summary = 0;
143 reliability.n_sc = 0;
144 reliability.sc = NULL;
145 reliability.wv = dict_get_weight (dict);
146 reliability.total_start = 0;
147 ds_init_empty (&reliability.scale_name);
148
149
150 lex_match (lexer, T_SLASH);
151
152 if (!lex_force_match_id (lexer, "VARIABLES"))
153 {
154 goto error;
155 }
156
157 lex_match (lexer, T_EQUALS);
158
159 if (!parse_variables_const (lexer, dict, &reliability.variables, &reliability.n_variables,
160 PV_NO_DUPLICATE | PV_NUMERIC))
161 goto error;
162
163 if (reliability.n_variables < 2)
164 msg (MW, _("Reliability on a single variable is not useful."));
165
166
167 {
168 int i;
169 struct cronbach *c;
170 /* Create a default Scale */
171
172 reliability.n_sc = 1;
173 reliability.sc = xzalloc (sizeof (struct cronbach) * reliability.n_sc);
174
175 ds_assign_cstr (&reliability.scale_name, "ANY");
176
177 c = &reliability.sc[0];
178 c->n_items = reliability.n_variables;
179 c->items = xzalloc (sizeof (struct variable*) * c->n_items);
180
181 for (i = 0 ; i < c->n_items ; ++i)
182 c->items[i] = reliability.variables[i];
183 }
184
185
186
187 while (lex_token (lexer) != T_ENDCMD)
188 {
189 lex_match (lexer, T_SLASH);
190
191 if (lex_match_id (lexer, "SCALE"))
192 {
193 struct const_var_set *vs;
194 if (! lex_force_match (lexer, T_LPAREN))
195 goto error;
196
197 if (! lex_force_string (lexer))
198 goto error;
199
200 ds_assign_substring (&reliability.scale_name, lex_tokss (lexer));
201
202 lex_get (lexer);
203
204 if (! lex_force_match (lexer, T_RPAREN))
205 goto error;
206
207 lex_match (lexer, T_EQUALS);
208
209 vs = const_var_set_create_from_array (reliability.variables, reliability.n_variables);
210
211 free (reliability.sc->items);
212 if (!parse_const_var_set_vars (lexer, vs, &reliability.sc->items, &reliability.sc->n_items, 0))
213 {
214 const_var_set_destroy (vs);
215 goto error;
216 }
217
218 const_var_set_destroy (vs);
219 }
220 else if (lex_match_id (lexer, "MODEL"))
221 {
222 lex_match (lexer, T_EQUALS);
223 if (lex_match_id (lexer, "ALPHA"))
224 {
225 reliability.model = MODEL_ALPHA;
226 }
227 else if (lex_match_id (lexer, "SPLIT"))
228 {
229 reliability.model = MODEL_SPLIT;
230 reliability.split_point = -1;
231
232 if (lex_match (lexer, T_LPAREN)
233 && lex_force_num (lexer))
234 {
235 reliability.split_point = lex_number (lexer);
236 lex_get (lexer);
237 if (! lex_force_match (lexer, T_RPAREN))
238 goto error;
239 }
240 }
241 else
242 goto error;
243 }
244 else if (lex_match_id (lexer, "SUMMARY"))
245 {
246 lex_match (lexer, T_EQUALS);
247 if (lex_match_id (lexer, "TOTAL"))
248 {
249 reliability.summary |= SUMMARY_TOTAL;
250 }
251 else if (lex_match (lexer, T_ALL))
252 {
253 reliability.summary = 0xFFFF;
254 }
255 else
256 goto error;
257 }
258 else if (lex_match_id (lexer, "MISSING"))
259 {
260 lex_match (lexer, T_EQUALS);
261 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
262 {
263 if (lex_match_id (lexer, "INCLUDE"))
264 {
265 reliability.exclude = MV_SYSTEM;
266 }
267 else if (lex_match_id (lexer, "EXCLUDE"))
268 {
269 reliability.exclude = MV_ANY;
270 }
271 else
272 {
273 lex_error (lexer, NULL);
274 goto error;
275 }
276 }
277 }
278 else if (lex_match_id (lexer, "STATISTICS"))
279 {
280 lex_match (lexer, T_EQUALS);
281 msg (SW, _("The STATISTICS subcommand is not yet implemented. "
282 "No statistics will be produced."));
283 while (lex_match (lexer, T_ID))
284 continue;
285 }
286 else
287 {
288 lex_error (lexer, NULL);
289 goto error;
290 }
291 }
292
293 if (reliability.model == MODEL_SPLIT)
294 {
295 int i;
296 const struct cronbach *s;
297
298 if (reliability.split_point >= reliability.n_variables)
299 {
300 msg (ME, _("The split point must be less than the number of variables"));
301 goto error;
302 }
303
304 reliability.n_sc += 2 ;
305 reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc);
306
307 s = &reliability.sc[0];
308
309 reliability.sc[1].n_items =
310 (reliability.split_point == -1) ? s->n_items / 2 : reliability.split_point;
311
312 reliability.sc[2].n_items = s->n_items - reliability.sc[1].n_items;
313 reliability.sc[1].items = xzalloc (sizeof (struct variable *)
314 * reliability.sc[1].n_items);
315
316 reliability.sc[2].items = xzalloc (sizeof (struct variable *) *
317 reliability.sc[2].n_items);
318
319 for (i = 0; i < reliability.sc[1].n_items ; ++i)
320 reliability.sc[1].items[i] = s->items[i];
321
322 while (i < s->n_items)
323 {
324 reliability.sc[2].items[i - reliability.sc[1].n_items] = s->items[i];
325 i++;
326 }
327 }
328
329 if (reliability.summary & SUMMARY_TOTAL)
330 {
331 int i;
332 const int base_sc = reliability.n_sc;
333
334 reliability.total_start = base_sc;
335
336 reliability.n_sc += reliability.sc[0].n_items ;
337 reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc);
338
339
340 for (i = 0 ; i < reliability.sc[0].n_items; ++i)
341 {
342 int v_src;
343 int v_dest = 0;
344 struct cronbach *s = &reliability.sc[i + base_sc];
345
346 s->n_items = reliability.sc[0].n_items - 1;
347 s->items = xzalloc (sizeof (struct variable *) * s->n_items);
348 for (v_src = 0 ; v_src < reliability.sc[0].n_items ; ++v_src)
349 {
350 if (v_src != i)
351 s->items[v_dest++] = reliability.sc[0].items[v_src];
352 }
353 }
354 }
355
356
357 if (! run_reliability (ds, &reliability))
358 goto error;
359
360 reliability_destroy (&reliability);
361 return CMD_SUCCESS;
362
363 error:
364 reliability_destroy (&reliability);
365 return CMD_FAILURE;
366 }
367
368
369 static void
370 do_reliability (struct casereader *group, struct dataset *ds,
371 const struct reliability *rel);
372
373
374 static void reliability_summary_total (const struct reliability *rel);
375
376 static void reliability_statistics (const struct reliability *rel);
377
378
379 static bool
run_reliability(struct dataset * ds,const struct reliability * reliability)380 run_reliability (struct dataset *ds, const struct reliability *reliability)
381 {
382 struct dictionary *dict = dataset_dict (ds);
383 bool ok;
384 struct casereader *group;
385
386 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
387 int si;
388
389 for (si = 0 ; si < reliability->n_sc; ++si)
390 {
391 struct cronbach *s = &reliability->sc[si];
392 int i;
393
394 s->m = xzalloc (sizeof *s->m * s->n_items);
395 s->total = moments1_create (MOMENT_VARIANCE);
396
397 for (i = 0 ; i < s->n_items ; ++i)
398 s->m[i] = moments1_create (MOMENT_VARIANCE);
399 }
400
401
402 while (casegrouper_get_next_group (grouper, &group))
403 {
404 do_reliability (group, ds, reliability);
405
406 reliability_statistics (reliability);
407
408 if (reliability->summary & SUMMARY_TOTAL)
409 reliability_summary_total (reliability);
410 }
411
412 ok = casegrouper_destroy (grouper);
413 ok = proc_commit (ds) && ok;
414
415 return ok;
416 }
417
418
419
420
421
422 /* Return the sum of all the item variables in S */
423 static double
append_sum(const struct ccase * c,casenumber n UNUSED,void * aux)424 append_sum (const struct ccase *c, casenumber n UNUSED, void *aux)
425 {
426 double sum = 0;
427 const struct cronbach *s = aux;
428
429 int v;
430 for (v = 0 ; v < s->n_items; ++v)
431 {
432 sum += case_data (c, s->items[v])->f;
433 }
434
435 return sum;
436 };
437
438 static void
439 case_processing_summary (casenumber n_valid, casenumber n_missing,
440 const struct dictionary *dict);
441
442
443 static double
alpha(int k,double sum_of_variances,double variance_of_sums)444 alpha (int k, double sum_of_variances, double variance_of_sums)
445 {
446 return k / (k - 1.0) * (1 - sum_of_variances / variance_of_sums);
447 }
448
449 static void
do_reliability(struct casereader * input,struct dataset * ds,const struct reliability * rel)450 do_reliability (struct casereader *input, struct dataset *ds,
451 const struct reliability *rel)
452 {
453 int i;
454 int si;
455 struct ccase *c;
456 casenumber n_missing ;
457 casenumber n_valid = 0;
458
459
460 for (si = 0 ; si < rel->n_sc; ++si)
461 {
462 struct cronbach *s = &rel->sc[si];
463
464 moments1_clear (s->total);
465
466 for (i = 0 ; i < s->n_items ; ++i)
467 moments1_clear (s->m[i]);
468 }
469
470 input = casereader_create_filter_missing (input,
471 rel->variables,
472 rel->n_variables,
473 rel->exclude,
474 &n_missing,
475 NULL);
476
477 for (si = 0 ; si < rel->n_sc; ++si)
478 {
479 struct cronbach *s = &rel->sc[si];
480
481
482 s->totals_idx = caseproto_get_n_widths (casereader_get_proto (input));
483 input =
484 casereader_create_append_numeric (input, append_sum,
485 s, NULL);
486 }
487
488 for (; (c = casereader_read (input)) != NULL; case_unref (c))
489 {
490 double weight = 1.0;
491 n_valid ++;
492
493 for (si = 0; si < rel->n_sc; ++si)
494 {
495 struct cronbach *s = &rel->sc[si];
496
497 for (i = 0 ; i < s->n_items ; ++i)
498 moments1_add (s->m[i], case_data (c, s->items[i])->f, weight);
499
500 moments1_add (s->total, case_data_idx (c, s->totals_idx)->f, weight);
501 }
502 }
503 casereader_destroy (input);
504
505 for (si = 0; si < rel->n_sc; ++si)
506 {
507 struct cronbach *s = &rel->sc[si];
508
509 s->sum_of_variances = 0;
510 for (i = 0 ; i < s->n_items ; ++i)
511 {
512 double weight, mean, variance;
513 moments1_calculate (s->m[i], &weight, &mean, &variance, NULL, NULL);
514
515 s->sum_of_variances += variance;
516 }
517
518 moments1_calculate (s->total, NULL, NULL, &s->variance_of_sums,
519 NULL, NULL);
520
521 s->alpha =
522 alpha (s->n_items, s->sum_of_variances, s->variance_of_sums);
523 }
524
525 text_item_submit (text_item_create_format (TEXT_ITEM_TITLE, _("Scale: %s"),
526 ds_cstr (&rel->scale_name)));
527
528 case_processing_summary (n_valid, n_missing, dataset_dict (ds));
529 }
530
531
532
533
534
535 static void
case_processing_summary(casenumber n_valid,casenumber n_missing,const struct dictionary * dict)536 case_processing_summary (casenumber n_valid, casenumber n_missing,
537 const struct dictionary *dict)
538 {
539 struct pivot_table *table = pivot_table_create (
540 N_("Case Processing Summary"));
541 pivot_table_set_weight_var (table, dict_get_weight (dict));
542
543 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
544 N_("N"), PIVOT_RC_COUNT,
545 N_("Percent"), PIVOT_RC_PERCENT);
546
547 struct pivot_dimension *cases = pivot_dimension_create (
548 table, PIVOT_AXIS_ROW, N_("Cases"), N_("Valid"), N_("Excluded"),
549 N_("Total"));
550 cases->root->show_label = true;
551
552 casenumber total = n_missing + n_valid;
553
554 struct entry
555 {
556 int stat_idx;
557 int case_idx;
558 double x;
559 }
560 entries[] = {
561 { 0, 0, n_valid },
562 { 0, 1, n_missing },
563 { 0, 2, total },
564 { 1, 0, 100.0 * n_valid / total },
565 { 1, 1, 100.0 * n_missing / total },
566 { 1, 2, 100.0 }
567 };
568
569 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
570 {
571 const struct entry *e = &entries[i];
572 pivot_table_put2 (table, e->stat_idx, e->case_idx,
573 pivot_value_new_number (e->x));
574 }
575
576 pivot_table_submit (table);
577 }
578
579 static void
reliability_summary_total(const struct reliability * rel)580 reliability_summary_total (const struct reliability *rel)
581 {
582 struct pivot_table *table = pivot_table_create (N_("Item-Total Statistics"));
583
584 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
585 N_("Scale Mean if Item Deleted"),
586 N_("Scale Variance if Item Deleted"),
587 N_("Corrected Item-Total Correlation"),
588 N_("Cronbach's Alpha if Item Deleted"));
589
590 struct pivot_dimension *variables = pivot_dimension_create (
591 table, PIVOT_AXIS_ROW, N_("Variables"));
592
593 for (size_t i = 0 ; i < rel->sc[0].n_items; ++i)
594 {
595 const struct cronbach *s = &rel->sc[rel->total_start + i];
596
597 int var_idx = pivot_category_create_leaf (
598 variables->root, pivot_value_new_variable (rel->sc[0].items[i]));
599
600 double mean;
601 moments1_calculate (s->total, NULL, &mean, NULL, NULL, NULL);
602
603 double var;
604 moments1_calculate (rel->sc[0].m[i], NULL, NULL, &var, NULL, NULL);
605 double cov
606 = (rel->sc[0].variance_of_sums + var - s->variance_of_sums) / 2.0;
607
608 double entries[] = {
609 mean,
610 s->variance_of_sums,
611 (cov - var) / sqrt (var * s->variance_of_sums),
612 s->alpha,
613 };
614 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
615 pivot_table_put2 (table, i, var_idx,
616 pivot_value_new_number (entries[i]));
617 }
618
619 pivot_table_submit (table);
620 }
621
622
623 static void
reliability_statistics(const struct reliability * rel)624 reliability_statistics (const struct reliability *rel)
625 {
626 struct pivot_table *table = pivot_table_create (
627 N_("Reliability Statistics"));
628 pivot_table_set_weight_var (table, rel->wv);
629
630 if (rel->model == MODEL_ALPHA)
631 {
632 pivot_dimension_create (table, PIVOT_AXIS_COLUMN,
633 N_("Statistics"),
634 N_("Cronbach's Alpha"), PIVOT_RC_OTHER,
635 N_("N of Items"), PIVOT_RC_COUNT);
636
637 const struct cronbach *s = &rel->sc[0];
638 pivot_table_put1 (table, 0, pivot_value_new_number (s->alpha));
639 pivot_table_put1 (table, 1, pivot_value_new_number (s->n_items));
640 }
641 else
642 {
643 struct pivot_dimension *statistics = pivot_dimension_create (
644 table, PIVOT_AXIS_ROW, N_("Statistics"));
645 struct pivot_category *alpha = pivot_category_create_group (
646 statistics->root, N_("Cronbach's Alpha"));
647 pivot_category_create_group (alpha, N_("Part 1"),
648 N_("Value"), PIVOT_RC_OTHER,
649 N_("N of Items"), PIVOT_RC_COUNT);
650 pivot_category_create_group (alpha, N_("Part 2"),
651 N_("Value"), PIVOT_RC_OTHER,
652 N_("N of Items"), PIVOT_RC_COUNT);
653 pivot_category_create_leaves (alpha,
654 N_("Total N of Items"), PIVOT_RC_COUNT);
655 pivot_category_create_leaves (statistics->root,
656 N_("Correlation Between Forms"),
657 PIVOT_RC_OTHER);
658 pivot_category_create_group (statistics->root,
659 N_("Spearman-Brown Coefficient"),
660 N_("Equal Length"), PIVOT_RC_OTHER,
661 N_("Unequal Length"), PIVOT_RC_OTHER);
662 pivot_category_create_leaves (statistics->root,
663 N_("Guttman Split-Half Coefficient"),
664 PIVOT_RC_OTHER);
665
666 /* R is the correlation between the two parts */
667 double r0 = rel->sc[0].variance_of_sums -
668 rel->sc[1].variance_of_sums -
669 rel->sc[2].variance_of_sums ;
670 double r1 = (r0 / sqrt (rel->sc[1].variance_of_sums)
671 / sqrt (rel->sc[2].variance_of_sums)
672 / 2.0);
673
674 /* Guttman Split Half Coefficient */
675 double g = 2 * r0 / rel->sc[0].variance_of_sums;
676
677 double tmp = (1.0 - r1*r1) * rel->sc[1].n_items * rel->sc[2].n_items /
678 pow2 (rel->sc[0].n_items);
679
680 double entries[] = {
681 rel->sc[1].alpha,
682 rel->sc[1].n_items,
683 rel->sc[2].alpha,
684 rel->sc[2].n_items,
685 rel->sc[1].n_items + rel->sc[2].n_items,
686 r1,
687 2 * r1 / (1.0 + r1),
688 (sqrt (pow4 (r1) + 4 * pow2 (r1) * tmp) - pow2 (r1)) / (2 * tmp),
689 g,
690 };
691 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
692 pivot_table_put1 (table, i, pivot_value_new_number (entries[i]));
693 }
694
695 pivot_table_submit (table);
696 }
697