1 /*
2 * analysis-tools.c:
3 *
4 * Authors:
5 * Jukka-Pekka Iivonen <jiivonen@hutcs.cs.hut.fi>
6 * Andreas J. Guelzow <aguelzow@taliesin.ca>
7 *
8 * (C) Copyright 2000, 2001 by Jukka-Pekka Iivonen <jiivonen@hutcs.cs.hut.fi>
9 * (C) Copyright 2002, 2004 by Andreas J. Guelzow <aguelzow@taliesin.ca>
10 *
11 * Modified 2001 to use range_* functions of mathfunc.h
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, see <https://www.gnu.org/licenses/>.
25 */
26
27 #include <gnumeric-config.h>
28 #include <glib/gi18n-lib.h>
29 #include <gnumeric.h>
30 #include <tools/analysis-tools.h>
31
32 #include <mathfunc.h>
33 #include <func.h>
34 #include <expr.h>
35 #include <position.h>
36 #include <tools/tools.h>
37 #include <value.h>
38 #include <cell.h>
39 #include <sheet.h>
40 #include <ranges.h>
41 #include <parse-util.h>
42 #include <style.h>
43 #include <regression.h>
44 #include <sheet-style.h>
45 #include <workbook.h>
46 #include <collect.h>
47 #include <gnm-format.h>
48 #include <sheet-object-cell-comment.h>
49 #include <workbook-control.h>
50 #include <command-context.h>
51 #include <sheet-object-graph.h>
52 #include <graph.h>
53 #include <goffice/goffice.h>
54
55 #include <string.h>
56 #include <stdlib.h>
57 #include <math.h>
58
59
60 const GnmExpr *
make_cellref(int dx,int dy)61 make_cellref (int dx, int dy)
62 {
63 GnmCellRef r;
64 r.sheet = NULL;
65 r.col = dx;
66 r.col_relative = TRUE;
67 r.row = dy;
68 r.row_relative = TRUE;
69 return gnm_expr_new_cellref (&r);
70 }
71
72 const GnmExpr *
make_rangeref(int dx0,int dy0,int dx1,int dy1)73 make_rangeref (int dx0, int dy0, int dx1, int dy1)
74 {
75 GnmCellRef a, b;
76 GnmValue *val;
77
78 a.sheet = NULL;
79 a.col = dx0;
80 a.col_relative = TRUE;
81 a.row = dy0;
82 a.row_relative = TRUE;
83 b.sheet = NULL;
84 b.col = dx1;
85 b.col_relative = TRUE;
86 b.row = dy1;
87 b.row_relative = TRUE;
88
89 val = value_new_cellrange_unsafe (&a, &b);
90 return gnm_expr_new_constant (val);
91 }
92
93
94 typedef struct {
95 char *format;
96 GPtrArray *data_lists;
97 gboolean read_label;
98 gboolean ignore_non_num;
99 guint length;
100 Sheet *sheet;
101 } data_list_specs_t;
102
103 /*
104 * cb_adjust_areas:
105 * @data:
106 * @user_data:
107 *
108 */
109 static void
cb_adjust_areas(gpointer data,G_GNUC_UNUSED gpointer user_data)110 cb_adjust_areas (gpointer data, G_GNUC_UNUSED gpointer user_data)
111 {
112 GnmValue *range = (GnmValue *)data;
113
114 if (range == NULL || !VALUE_IS_CELLRANGE (range)) {
115 return;
116 }
117
118 range->v_range.cell.a.col_relative = 0;
119 range->v_range.cell.a.row_relative = 0;
120 range->v_range.cell.b.col_relative = 0;
121 range->v_range.cell.b.row_relative = 0;
122 }
123
124 /*
125 * analysis_tools_remove_label:
126 *
127 */
128 static void
analysis_tools_remove_label(GnmValue * val,gboolean labels,group_by_t group_by)129 analysis_tools_remove_label (GnmValue *val,
130 gboolean labels, group_by_t group_by)
131 {
132 if (labels) {
133 switch (group_by) {
134 case GROUPED_BY_ROW:
135 val->v_range.cell.a.col++;
136 break;
137 case GROUPED_BY_COL:
138 case GROUPED_BY_BIN:
139 case GROUPED_BY_AREA:
140 default:
141 val->v_range.cell.a.row++;
142 break;
143 }
144 }
145 }
146
147
148
149 /*
150 * analysis_tools_write_label:
151 * @val: range to extract label from
152 * @dao: data_analysis_output_t, where to write to
153 * @info: analysis_tools_data_generic_t info
154 * @x: output col number
155 * @y: output row number
156 * @i: default col/row number
157 *
158 */
159
160 void
analysis_tools_write_label(GnmValue * val,data_analysis_output_t * dao,analysis_tools_data_generic_t * info,int x,int y,int i)161 analysis_tools_write_label (GnmValue *val, data_analysis_output_t *dao,
162 analysis_tools_data_generic_t *info,
163 int x, int y, int i)
164 {
165 char const *format = NULL;
166
167 if (info->labels) {
168 GnmValue *label = value_dup (val);
169
170 label->v_range.cell.b = label->v_range.cell.a;
171 dao_set_cell_expr (dao, x, y, gnm_expr_new_constant (label));
172 analysis_tools_remove_label (val, info->labels, info->group_by);
173 } else {
174 switch (info->group_by) {
175 case GROUPED_BY_ROW:
176 format = _("Row %i");
177 break;
178 case GROUPED_BY_COL:
179 format = _("Column %i");
180 break;
181 case GROUPED_BY_BIN:
182 format = _("Bin %i");
183 break;
184 case GROUPED_BY_AREA:
185 default:
186 format = _("Area %i");
187 break;
188 }
189
190 dao_set_cell_printf (dao, x, y, format, i);
191 }
192 }
193
194 /*
195 * analysis_tools_write_label:
196 * @val: range to extract label from
197 * @dao: data_analysis_output_t, where to write to
198 * @labels: analysis_tools_data_generic_t infowhether the
199 * @val contains label info
200 * @group_by: grouping info
201 * @x: output col number
202 * @y: output row number
203 * @i: default col/row number
204 *
205 */
206
207 static void
analysis_tools_write_a_label(GnmValue * val,data_analysis_output_t * dao,gboolean labels,group_by_t group_by,int x,int y)208 analysis_tools_write_a_label (GnmValue *val, data_analysis_output_t *dao,
209 gboolean labels, group_by_t group_by,
210 int x, int y)
211 {
212 if (labels) {
213 GnmValue *label = value_dup (val);
214
215 label->v_range.cell.b = label->v_range.cell.a;
216 dao_set_cell_expr (dao, x, y, gnm_expr_new_constant (label));
217 analysis_tools_remove_label (val, labels, group_by);
218 } else {
219 char const *str = ((group_by == GROUPED_BY_ROW) ? "row" : "col");
220 char const *label = ((group_by == GROUPED_BY_ROW) ? _("Row") : _("Column"));
221
222 GnmFunc *fd_concatenate;
223 GnmFunc *fd_cell;
224
225 fd_concatenate = gnm_func_lookup_or_add_placeholder ("CONCATENATE");
226 gnm_func_inc_usage (fd_concatenate);
227 fd_cell = gnm_func_lookup_or_add_placeholder ("CELL");
228 gnm_func_inc_usage (fd_cell);
229
230 dao_set_cell_expr (dao, x, y, gnm_expr_new_funcall3
231 (fd_concatenate, gnm_expr_new_constant (value_new_string (label)),
232 gnm_expr_new_constant (value_new_string (" ")),
233 gnm_expr_new_funcall2 (fd_cell,
234 gnm_expr_new_constant (value_new_string (str)),
235 gnm_expr_new_constant (value_dup (val)))));
236
237 gnm_func_dec_usage (fd_concatenate);
238 gnm_func_dec_usage (fd_cell);
239 }
240 }
241
242 /*
243 * analysis_tools_write_label_ftest:
244 * @val: range to extract label from
245 * @dao: data_analysis_output_t, where to write to
246 * @info: analysis_tools_data_generic_t info
247 * @x: output col number
248 * @y: output row number
249 * @i: default col/row number
250 *
251 */
252
253 void
analysis_tools_write_label_ftest(GnmValue * val,data_analysis_output_t * dao,int x,int y,gboolean labels,int i)254 analysis_tools_write_label_ftest (GnmValue *val, data_analysis_output_t *dao,
255 int x, int y, gboolean labels, int i)
256 {
257 cb_adjust_areas (val, NULL);
258
259 if (labels) {
260 GnmValue *label = value_dup (val);
261
262 label->v_range.cell.b = label->v_range.cell.a;
263 dao_set_cell_expr (dao, x, y, gnm_expr_new_constant (label));
264
265 if ((val->v_range.cell.b.col - val->v_range.cell.a.col) <
266 (val->v_range.cell.b.row - val->v_range.cell.a.row))
267 val->v_range.cell.a.row++;
268 else
269 val->v_range.cell.a.col++;
270 } else {
271 dao_set_cell_printf (dao, x, y, _("Variable %i"), i);
272 }
273 }
274
275 /*
276 * cb_cut_into_cols:
277 * @data:
278 * @user_data:
279 *
280 */
281 static void
cb_cut_into_cols(gpointer data,gpointer user_data)282 cb_cut_into_cols (gpointer data, gpointer user_data)
283 {
284 GnmValue *range = (GnmValue *)data;
285 GnmValue *col_value;
286 GSList **list_of_units = (GSList **) user_data;
287 gint col;
288
289 if (range == NULL) {
290 return;
291 }
292 if (!VALUE_IS_CELLRANGE (range) ||
293 (range->v_range.cell.b.sheet != NULL &&
294 range->v_range.cell.b.sheet != range->v_range.cell.a.sheet)) {
295 value_release (range);
296 return;
297 }
298
299 cb_adjust_areas (data, NULL);
300
301 if (range->v_range.cell.a.col == range->v_range.cell.b.col) {
302 *list_of_units = g_slist_prepend (*list_of_units, range);
303 return;
304 }
305
306 for (col = range->v_range.cell.a.col; col <= range->v_range.cell.b.col; col++) {
307 col_value = value_dup (range);
308 col_value->v_range.cell.a.col = col;
309 col_value->v_range.cell.b.col = col;
310 *list_of_units = g_slist_prepend (*list_of_units, col_value);
311 }
312 value_release (range);
313 return;
314 }
315
316 /*
317 * cb_cut_into_rows:
318 * @data:
319 * @user_data:
320 *
321 */
322 static void
cb_cut_into_rows(gpointer data,gpointer user_data)323 cb_cut_into_rows (gpointer data, gpointer user_data)
324 {
325 GnmValue *range = (GnmValue *)data;
326 GnmValue *row_value;
327 GSList **list_of_units = (GSList **) user_data;
328 gint row;
329
330 if (range == NULL) {
331 return;
332 }
333 if (!VALUE_IS_CELLRANGE (range) ||
334 (range->v_range.cell.b.sheet != NULL &&
335 range->v_range.cell.b.sheet != range->v_range.cell.a.sheet)) {
336 value_release (range);
337 return;
338 }
339
340 cb_adjust_areas (data, NULL);
341
342 if (range->v_range.cell.a.row == range->v_range.cell.b.row) {
343 *list_of_units = g_slist_prepend (*list_of_units, range);
344 return;
345 }
346
347 for (row = range->v_range.cell.a.row; row <= range->v_range.cell.b.row; row++) {
348 row_value = value_dup (range);
349 row_value->v_range.cell.a.row = row;
350 row_value->v_range.cell.b.row = row;
351 *list_of_units = g_slist_prepend (*list_of_units, row_value);
352 }
353 value_release (range);
354 return;
355 }
356
357
358 /**
359 * prepare_input_range:
360 * @input_range: (inout) (element-type GnmRange) (transfer full):
361 * @group_by:
362 */
363 void
prepare_input_range(GSList ** input_range,group_by_t group_by)364 prepare_input_range (GSList **input_range, group_by_t group_by)
365 {
366 GSList *input_by_units = NULL;
367
368 switch (group_by) {
369 case GROUPED_BY_ROW:
370 g_slist_foreach (*input_range, cb_cut_into_rows, &input_by_units);
371 g_slist_free (*input_range);
372 *input_range = g_slist_reverse (input_by_units);
373 return;
374 case GROUPED_BY_COL:
375 g_slist_foreach (*input_range, cb_cut_into_cols, &input_by_units);
376 g_slist_free (*input_range);
377 *input_range = g_slist_reverse (input_by_units);
378 return;
379 case GROUPED_BY_AREA:
380 default:
381 g_slist_foreach (*input_range, cb_adjust_areas, NULL);
382 return;
383 }
384 }
385
386 typedef struct {
387 gboolean init;
388 gint size;
389 gboolean hom;
390 } homogeneity_check_t;
391
392
393 /*
394 * cb_check_hom:
395 * @data:
396 * @user_data:
397 *
398 */
399 static void
cb_check_hom(gpointer data,gpointer user_data)400 cb_check_hom (gpointer data, gpointer user_data)
401 {
402 GnmValue *range = (GnmValue *)data;
403 homogeneity_check_t *state = (homogeneity_check_t *) user_data;
404 gint this_size;
405
406 if (!VALUE_IS_CELLRANGE (range)) {
407 state->hom = FALSE;
408 return;
409 }
410
411 this_size = (range->v_range.cell.b.col - range->v_range.cell.a.col + 1) *
412 (range->v_range.cell.b.row - range->v_range.cell.a.row + 1);
413
414 if (state->init) {
415 if (state->size != this_size)
416 state->hom = FALSE;
417 } else {
418 state->init = TRUE;
419 state->size = this_size;
420 }
421 return;
422 }
423
424 /*
425 * gnm_check_input_range_list_homogeneity:
426 * @input_range:
427 *
428 * Check that all columns have the same size
429 *
430 */
431 static gboolean
gnm_check_input_range_list_homogeneity(GSList * input_range)432 gnm_check_input_range_list_homogeneity (GSList *input_range)
433 {
434 homogeneity_check_t state = { FALSE, 0, TRUE };
435
436 g_slist_foreach (input_range, cb_check_hom, &state);
437
438 return state.hom;
439 }
440
441
442 /***** Some general routines ***********************************************/
443
444 /*
445 * Set a column of text from a string like "/first/second/third" or "|foo|bar|baz".
446 */
447 void
set_cell_text_col(data_analysis_output_t * dao,int col,int row,const char * text)448 set_cell_text_col (data_analysis_output_t *dao, int col, int row, const char *text)
449 {
450 gboolean leave = FALSE;
451 char *copy, *orig_copy;
452 char sep = *text;
453 if (sep == 0) return;
454
455 copy = orig_copy = g_strdup (text + 1);
456 while (!leave) {
457 char *p = copy;
458 while (*copy && *copy != sep)
459 copy++;
460 if (*copy)
461 *copy++ = 0;
462 else
463 leave = TRUE;
464 dao_set_cell_value (dao, col, row++, value_new_string (p));
465 }
466 g_free (orig_copy);
467 }
468
469
470 /*
471 * Set a row of text from a string like "/first/second/third" or "|foo|bar|baz".
472 */
473 void
set_cell_text_row(data_analysis_output_t * dao,int col,int row,const char * text)474 set_cell_text_row (data_analysis_output_t *dao, int col, int row, const char *text)
475 {
476 gboolean leave = 0;
477 char *copy, *orig_copy;
478 char sep = *text;
479 if (sep == 0) return;
480
481 copy = orig_copy = g_strdup (text + 1);
482 while (!leave) {
483 char *p = copy;
484 while (*copy && *copy != sep)
485 copy++;
486 if (*copy)
487 *copy++ = 0;
488 else
489 leave = TRUE;
490 dao_set_cell_value (dao, col++, row, value_new_string (p));
491 }
492 g_free (orig_copy);
493 }
494
495 gboolean
analysis_tool_generic_clean(gpointer specs)496 analysis_tool_generic_clean (gpointer specs)
497 {
498 analysis_tools_data_generic_t *info = specs;
499
500 range_list_destroy (info->input);
501 info->input = NULL;
502 return FALSE;
503 }
504
505 gboolean
analysis_tool_generic_b_clean(gpointer specs)506 analysis_tool_generic_b_clean (gpointer specs)
507 {
508 analysis_tools_data_generic_b_t *info = specs;
509
510 value_release (info->range_1);
511 info->range_1 = NULL;
512 value_release (info->range_2);
513 info->range_2 = NULL;
514 return FALSE;
515 }
516
517
518
analysis_tool_calc_length(analysis_tools_data_generic_t * info)519 int analysis_tool_calc_length (analysis_tools_data_generic_t *info)
520 {
521 int result = 1;
522 GSList *dataset;
523
524 for (dataset = info->input; dataset; dataset = dataset->next) {
525 GnmValue *current = dataset->data;
526 int given_length;
527
528 if (info->group_by == GROUPED_BY_AREA) {
529 given_length = (current->v_range.cell.b.row - current->v_range.cell.a.row + 1) *
530 (current->v_range.cell.b.col - current->v_range.cell.a.col + 1);
531 } else
532 given_length = (info->group_by == GROUPED_BY_COL) ?
533 (current->v_range.cell.b.row - current->v_range.cell.a.row + 1) :
534 (current->v_range.cell.b.col - current->v_range.cell.a.col + 1);
535 if (given_length > result)
536 result = given_length;
537 }
538 if (info->labels)
539 result--;
540 return result;
541 }
542
543 /**
544 * analysis_tool_get_function:
545 * @name: name of function
546 * @dao:
547 *
548 * Returns: (transfer full): the function named @name or a placeholder.
549 * The usage count of the function is incremented.
550 */
551 GnmFunc *
analysis_tool_get_function(char const * name,data_analysis_output_t * dao)552 analysis_tool_get_function (char const *name,
553 data_analysis_output_t *dao)
554 {
555 GnmFunc *fd;
556
557 fd = gnm_func_lookup_or_add_placeholder (name);
558 gnm_func_inc_usage (fd);
559 return fd;
560 }
561
562
563
564 /************* Correlation Tool *******************************************
565 *
566 * The correlation tool calculates the correlation coefficient of two
567 * data sets. The two data sets can be grouped by rows or by columns.
568 * The results are given in a table which can be printed out in a new
569 * sheet, in a new workbook, or simply into an existing sheet.
570 *
571 **/
572
573 gboolean
analysis_tool_table(data_analysis_output_t * dao,analysis_tools_data_generic_t * info,gchar const * title,gchar const * functionname,gboolean full_table)574 analysis_tool_table (data_analysis_output_t *dao,
575 analysis_tools_data_generic_t *info,
576 gchar const *title, gchar const *functionname,
577 gboolean full_table)
578 {
579 GSList *inputdata, *inputexpr = NULL;
580 GnmFunc *fd = NULL;
581
582 guint col, row;
583
584 dao_set_italic (dao, 0, 0, 0, 0);
585 dao_set_cell_printf (dao, 0, 0, "%s", title);
586
587 fd = gnm_func_lookup_or_add_placeholder (functionname);
588 gnm_func_inc_usage (fd);
589
590 for (col = 1, inputdata = info->input; inputdata != NULL;
591 inputdata = inputdata->next, col++) {
592 GnmValue *val = NULL;
593
594 val = value_dup (inputdata->data);
595
596 /* Label */
597 dao_set_italic (dao, col, 0, col, 0);
598 analysis_tools_write_label (val, dao, info,
599 col, 0, col);
600
601 inputexpr = g_slist_prepend (inputexpr,
602 (gpointer) gnm_expr_new_constant (val));
603 }
604 inputexpr = g_slist_reverse (inputexpr);
605
606 for (row = 1, inputdata = info->input; inputdata != NULL;
607 inputdata = inputdata->next, row++) {
608 GnmValue *val = value_dup (inputdata->data);
609 GSList *colexprlist;
610
611 /* Label */
612 dao_set_italic (dao, 0, row, 0, row);
613 analysis_tools_write_label (val, dao, info,
614 0, row, row);
615
616 for (col = 1, colexprlist = inputexpr; colexprlist != NULL;
617 colexprlist = colexprlist->next, col++) {
618 GnmExpr const *colexpr = colexprlist->data;
619
620 if ((!full_table) && (col < row))
621 continue;
622
623 dao_set_cell_expr
624 (dao, row, col,
625 gnm_expr_new_funcall2
626 (fd,
627 gnm_expr_new_constant (value_dup (val)),
628 gnm_expr_copy (colexpr)));
629 }
630
631 value_release (val);
632 }
633
634 g_slist_free_full (inputexpr, (GDestroyNotify)gnm_expr_free);
635 if (fd) gnm_func_dec_usage (fd);
636
637 dao_redraw_respan (dao);
638 return FALSE;
639 }
640
641 static gboolean
analysis_tool_correlation_engine_run(data_analysis_output_t * dao,analysis_tools_data_generic_t * info)642 analysis_tool_correlation_engine_run (data_analysis_output_t *dao,
643 analysis_tools_data_generic_t *info)
644 {
645 return analysis_tool_table (dao, info, _("Correlations"),
646 "CORREL", FALSE);
647 }
648
649 gboolean
analysis_tool_correlation_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)650 analysis_tool_correlation_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
651 analysis_tool_engine_t selector, gpointer result)
652 {
653 analysis_tools_data_generic_t *info = specs;
654
655 switch (selector) {
656 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
657 return (dao_command_descriptor (dao, _("Correlation (%s)"), result)
658 == NULL);
659 case TOOL_ENGINE_UPDATE_DAO:
660 prepare_input_range (&info->input, info->group_by);
661 if (!gnm_check_input_range_list_homogeneity (info->input)) {
662 info->err = info->group_by + 1;
663 return TRUE;
664 }
665 dao_adjust (dao, 1 + g_slist_length (info->input),
666 1 + g_slist_length (info->input));
667 return FALSE;
668 case TOOL_ENGINE_CLEAN_UP:
669 return analysis_tool_generic_clean (specs);
670 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
671 return FALSE;
672 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
673 dao_prepare_output (NULL, dao, _("Correlation"));
674 return FALSE;
675 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
676 return dao_format_output (dao, _("Correlation"));
677 case TOOL_ENGINE_PERFORM_CALC:
678 default:
679 return analysis_tool_correlation_engine_run (dao, specs);
680 }
681 return TRUE; /* We shouldn't get here */
682 }
683
684
685
686
687 /************* Covariance Tool ********************************************
688 *
689 * The covariance tool calculates the covariance of two data sets.
690 * The two data sets can be grouped by rows or by columns. The
691 * results are given in a table which can be printed out in a new
692 * sheet, in a new workbook, or simply into an existing sheet.
693 *
694 **/
695
696 static gboolean
analysis_tool_covariance_engine_run(data_analysis_output_t * dao,analysis_tools_data_generic_t * info)697 analysis_tool_covariance_engine_run (data_analysis_output_t *dao,
698 analysis_tools_data_generic_t *info)
699 {
700 return analysis_tool_table (dao, info, _("Covariances"),
701 "COVAR", FALSE);
702 }
703
704 gboolean
analysis_tool_covariance_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)705 analysis_tool_covariance_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
706 analysis_tool_engine_t selector, gpointer result)
707 {
708 analysis_tools_data_generic_t *info = specs;
709
710 switch (selector) {
711 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
712 return (dao_command_descriptor (dao, _("Covariance (%s)"), result)
713 == NULL);
714 case TOOL_ENGINE_UPDATE_DAO:
715 prepare_input_range (&info->input, info->group_by);
716 if (!gnm_check_input_range_list_homogeneity (info->input)) {
717 info->err = info->group_by + 1;
718 return TRUE;
719 }
720 dao_adjust (dao, 1 + g_slist_length (info->input),
721 1 + g_slist_length (info->input));
722 return FALSE;
723 case TOOL_ENGINE_CLEAN_UP:
724 return analysis_tool_generic_clean (specs);
725 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
726 return FALSE;
727 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
728 dao_prepare_output (NULL, dao, _("Covariance"));
729 return FALSE;
730 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
731 return dao_format_output (dao, _("Covariance"));
732 case TOOL_ENGINE_PERFORM_CALC:
733 default:
734 return analysis_tool_covariance_engine_run (dao, specs);
735 }
736 return TRUE; /* We shouldn't get here */
737 }
738
739
740
741
742 /************* Descriptive Statistics Tool *******************************
743 *
744 * Descriptive Statistics Tool calculates some useful statistical
745 * information such as the mean, standard deviation, sample variance,
746 * skewness, kurtosis, and standard error about the given variables.
747 * The results are given in a table which can be printed out in a new
748 * sheet, in a new workbook, or simply into an existing sheet.
749 *
750 **/
751
752 typedef struct {
753 gnm_float mean;
754 gint error_mean;
755 gnm_float var;
756 gint error_var;
757 gint len;
758 } desc_stats_t;
759
760 static void
summary_statistics(data_analysis_output_t * dao,analysis_tools_data_descriptive_t * info)761 summary_statistics (data_analysis_output_t *dao,
762 analysis_tools_data_descriptive_t *info)
763 {
764 guint col;
765 GSList *data = info->base.input;
766 GnmFunc *fd_mean;
767 GnmFunc *fd_median;
768 GnmFunc *fd_mode;
769 GnmFunc *fd_stdev;
770 GnmFunc *fd_var;
771 GnmFunc *fd_kurt;
772 GnmFunc *fd_skew;
773 GnmFunc *fd_min;
774 GnmFunc *fd_max;
775 GnmFunc *fd_sum;
776 GnmFunc *fd_count;
777 GnmFunc *fd_sqrt;
778
779 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
780 gnm_func_inc_usage (fd_mean);
781 fd_median = gnm_func_lookup_or_add_placeholder (info->use_ssmedian ? "SSMEDIAN" : "MEDIAN");
782 gnm_func_inc_usage (fd_median);
783 fd_mode = gnm_func_lookup_or_add_placeholder ("MODE");
784 gnm_func_inc_usage (fd_mode);
785 fd_stdev = gnm_func_lookup_or_add_placeholder ("STDEV");
786 gnm_func_inc_usage (fd_stdev);
787 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
788 gnm_func_inc_usage (fd_var);
789 fd_kurt = gnm_func_lookup_or_add_placeholder ("KURT");
790 gnm_func_inc_usage (fd_kurt);
791 fd_skew = gnm_func_lookup_or_add_placeholder ("SKEW");
792 gnm_func_inc_usage (fd_skew);
793 fd_min = gnm_func_lookup_or_add_placeholder ("MIN");
794 gnm_func_inc_usage (fd_min);
795 fd_max = gnm_func_lookup_or_add_placeholder ("MAX");
796 gnm_func_inc_usage (fd_max);
797 fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
798 gnm_func_inc_usage (fd_sum);
799 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
800 gnm_func_inc_usage (fd_count);
801 fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
802 gnm_func_inc_usage (fd_sqrt);
803
804 dao_set_cell (dao, 0, 0, NULL);
805
806 dao_set_italic (dao, 0, 1, 0, 13);
807 /*
808 * Note to translators: in the following string and others like it,
809 * the "/" is a separator character that can be changed to anything
810 * if the translation needs the slash; just use, say, "|" instead.
811 *
812 * The items are bundled like this to increase translation context.
813 */
814 set_cell_text_col (dao, 0, 1, _("/Mean"
815 "/Standard Error"
816 "/Median"
817 "/Mode"
818 "/Standard Deviation"
819 "/Sample Variance"
820 "/Kurtosis"
821 "/Skewness"
822 "/Range"
823 "/Minimum"
824 "/Maximum"
825 "/Sum"
826 "/Count"));
827
828 for (col = 0; data != NULL; data = data->next, col++) {
829 GnmExpr const *expr;
830 GnmExpr const *expr_min;
831 GnmExpr const *expr_max;
832 GnmExpr const *expr_var;
833 GnmExpr const *expr_count;
834 GnmValue *val_org = value_dup (data->data);
835
836 dao_set_italic (dao, col + 1, 0, col+1, 0);
837 /* Note that analysis_tools_write_label may modify val_org */
838 analysis_tools_write_label (val_org, dao, &info->base,
839 col + 1, 0, col + 1);
840
841 /* Mean */
842 expr = gnm_expr_new_funcall1
843 (fd_mean,
844 gnm_expr_new_constant (value_dup (val_org)));
845 dao_set_cell_expr (dao, col + 1, 1, expr);
846
847 /* Standard Deviation */
848 expr = gnm_expr_new_funcall1
849 (fd_stdev,
850 gnm_expr_new_constant (value_dup (val_org)));
851 dao_set_cell_expr (dao, col + 1, 5, expr);
852
853 /* Sample Variance */
854 expr_var = gnm_expr_new_funcall1
855 (fd_var,
856 gnm_expr_new_constant (value_dup (val_org)));
857 dao_set_cell_expr (dao, col + 1, 6, gnm_expr_copy (expr_var));
858
859 /* Median */
860 expr = gnm_expr_new_funcall1
861 (fd_median,
862 gnm_expr_new_constant (value_dup (val_org)));
863 dao_set_cell_expr (dao, col + 1, 3, expr);
864
865 /* Mode */
866 expr = gnm_expr_new_funcall1
867 (fd_mode,
868 gnm_expr_new_constant (value_dup (val_org)));
869 dao_set_cell_expr (dao, col + 1, 4, expr);
870
871 /* Kurtosis */
872 expr = gnm_expr_new_funcall1
873 (fd_kurt,
874 gnm_expr_new_constant (value_dup (val_org)));
875 dao_set_cell_expr (dao, col + 1, 7, expr);
876
877 /* Skewness */
878 expr = gnm_expr_new_funcall1
879 (fd_skew,
880 gnm_expr_new_constant (value_dup (val_org)));
881 dao_set_cell_expr (dao, col + 1, 8, expr);
882
883 /* Minimum */
884 expr_min = gnm_expr_new_funcall1
885 (fd_min,
886 gnm_expr_new_constant (value_dup (val_org)));
887 dao_set_cell_expr (dao, col + 1, 10, gnm_expr_copy (expr_min));
888
889 /* Maximum */
890 expr_max = gnm_expr_new_funcall1
891 (fd_max,
892 gnm_expr_new_constant (value_dup (val_org)));
893 dao_set_cell_expr (dao, col + 1, 11, gnm_expr_copy (expr_max));
894
895 /* Range */
896 expr = gnm_expr_new_binary (expr_max, GNM_EXPR_OP_SUB, expr_min);
897 dao_set_cell_expr (dao, col + 1, 9, expr);
898
899 /* Sum */
900 expr = gnm_expr_new_funcall1
901 (fd_sum,
902 gnm_expr_new_constant (value_dup (val_org)));
903 dao_set_cell_expr (dao, col + 1, 12, expr);
904
905 /* Count */
906 expr_count = gnm_expr_new_funcall1
907 (fd_count,
908 gnm_expr_new_constant (val_org));
909 dao_set_cell_expr (dao, col + 1, 13, gnm_expr_copy (expr_count));
910
911 /* Standard Error */
912 expr = gnm_expr_new_funcall1
913 (fd_sqrt,
914 gnm_expr_new_binary (expr_var,
915 GNM_EXPR_OP_DIV,
916 expr_count));
917 dao_set_cell_expr (dao, col + 1, 2, expr);
918 }
919
920 gnm_func_dec_usage (fd_mean);
921 gnm_func_dec_usage (fd_median);
922 gnm_func_dec_usage (fd_mode);
923 gnm_func_dec_usage (fd_stdev);
924 gnm_func_dec_usage (fd_var);
925 gnm_func_dec_usage (fd_kurt);
926 gnm_func_dec_usage (fd_skew);
927 gnm_func_dec_usage (fd_min);
928 gnm_func_dec_usage (fd_max);
929 gnm_func_dec_usage (fd_sum);
930 gnm_func_dec_usage (fd_count);
931 gnm_func_dec_usage (fd_sqrt);
932 }
933
934 static void
confidence_level(data_analysis_output_t * dao,analysis_tools_data_descriptive_t * info)935 confidence_level (data_analysis_output_t *dao,
936 analysis_tools_data_descriptive_t *info)
937 {
938 guint col;
939 char *buffer;
940 char *format;
941 GSList *data = info->base.input;
942 GnmFunc *fd_mean;
943 GnmFunc *fd_var;
944 GnmFunc *fd_count;
945 GnmFunc *fd_tinv;
946 GnmFunc *fd_sqrt;
947
948 format = g_strdup_printf (_("/%%%s%%%% CI for the Mean from"
949 "/to"), GNM_FORMAT_g);
950 buffer = g_strdup_printf (format, info->c_level * 100);
951 g_free (format);
952 dao_set_italic (dao, 0, 1, 0, 2);
953 set_cell_text_col (dao, 0, 1, buffer);
954 g_free (buffer);
955
956 dao_set_cell (dao, 0, 0, NULL);
957
958 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
959 gnm_func_inc_usage (fd_mean);
960 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
961 gnm_func_inc_usage (fd_var);
962 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
963 gnm_func_inc_usage (fd_count);
964 fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
965 gnm_func_inc_usage (fd_tinv);
966 fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
967 gnm_func_inc_usage (fd_sqrt);
968
969
970 for (col = 0; data != NULL; data = data->next, col++) {
971 GnmExpr const *expr;
972 GnmExpr const *expr_mean;
973 GnmExpr const *expr_var;
974 GnmExpr const *expr_count;
975 GnmValue *val_org = value_dup (data->data);
976
977 dao_set_italic (dao, col+1, 0, col+1, 0);
978 /* Note that analysis_tools_write_label may modify val_org */
979 analysis_tools_write_label (val_org, dao, &info->base, col + 1, 0, col + 1);
980
981 expr_mean = gnm_expr_new_funcall1
982 (fd_mean,
983 gnm_expr_new_constant (value_dup (val_org)));
984
985 expr_var = gnm_expr_new_funcall1
986 (fd_var,
987 gnm_expr_new_constant (value_dup (val_org)));
988
989 expr_count = gnm_expr_new_funcall1
990 (fd_count,
991 gnm_expr_new_constant (val_org));
992
993 expr = gnm_expr_new_binary
994 (gnm_expr_new_funcall2
995 (fd_tinv,
996 gnm_expr_new_constant (value_new_float (1 - info->c_level)),
997 gnm_expr_new_binary
998 (gnm_expr_copy (expr_count),
999 GNM_EXPR_OP_SUB,
1000 gnm_expr_new_constant (value_new_int (1)))),
1001 GNM_EXPR_OP_MULT,
1002 gnm_expr_new_funcall1
1003 (fd_sqrt,
1004 gnm_expr_new_binary (expr_var,
1005 GNM_EXPR_OP_DIV,
1006 expr_count)));
1007
1008 dao_set_cell_expr (dao, col + 1, 1,
1009 gnm_expr_new_binary
1010 (gnm_expr_copy (expr_mean),
1011 GNM_EXPR_OP_SUB,
1012 gnm_expr_copy (expr)));
1013 dao_set_cell_expr (dao, col + 1, 2,
1014 gnm_expr_new_binary (expr_mean,
1015 GNM_EXPR_OP_ADD,
1016 expr));
1017 }
1018
1019 gnm_func_dec_usage (fd_mean);
1020 gnm_func_dec_usage (fd_var);
1021 gnm_func_dec_usage (fd_count);
1022 gnm_func_dec_usage (fd_tinv);
1023 gnm_func_dec_usage (fd_sqrt);
1024 }
1025
1026 static void
kth_smallest_largest(data_analysis_output_t * dao,analysis_tools_data_descriptive_t * info,char const * func,char const * label,int k)1027 kth_smallest_largest (data_analysis_output_t *dao,
1028 analysis_tools_data_descriptive_t *info,
1029 char const* func, char const* label, int k)
1030 {
1031 guint col;
1032 GSList *data = info->base.input;
1033 GnmFunc *fd = gnm_func_lookup_or_add_placeholder (func);
1034 gnm_func_inc_usage (fd);
1035
1036 dao_set_italic (dao, 0, 1, 0, 1);
1037 dao_set_cell_printf (dao, 0, 1, label, k);
1038
1039 dao_set_cell (dao, 0, 0, NULL);
1040
1041 for (col = 0; data != NULL; data = data->next, col++) {
1042 GnmExpr const *expr = NULL;
1043 GnmValue *val = value_dup (data->data);
1044
1045 dao_set_italic (dao, col + 1, 0, col + 1, 0);
1046 analysis_tools_write_label (val, dao, &info->base,
1047 col + 1, 0, col + 1);
1048
1049 expr = gnm_expr_new_funcall2
1050 (fd,
1051 gnm_expr_new_constant (val),
1052 gnm_expr_new_constant (value_new_int (k)));
1053
1054 dao_set_cell_expr (dao, col + 1, 1, expr);
1055 }
1056
1057 gnm_func_dec_usage (fd);
1058 }
1059
1060 /* Descriptive Statistics
1061 */
1062 static gboolean
analysis_tool_descriptive_engine_run(data_analysis_output_t * dao,analysis_tools_data_descriptive_t * info)1063 analysis_tool_descriptive_engine_run (data_analysis_output_t *dao,
1064 analysis_tools_data_descriptive_t *info)
1065 {
1066 if (info->summary_statistics) {
1067 summary_statistics (dao, info);
1068 dao->offset_row += 16;
1069 if (dao->rows <= dao->offset_row)
1070 goto finish_descriptive_tool;
1071 }
1072 if (info->confidence_level) {
1073 confidence_level (dao, info);
1074 dao->offset_row += 4;
1075 if (dao->rows <= dao->offset_row)
1076 goto finish_descriptive_tool;
1077 }
1078 if (info->kth_largest) {
1079 kth_smallest_largest (dao, info, "LARGE", _("Largest (%d)"),
1080 info->k_largest);
1081 dao->offset_row += 4;
1082 if (dao->rows <= dao->offset_row)
1083 goto finish_descriptive_tool;
1084 }
1085 if (info->kth_smallest)
1086 kth_smallest_largest (dao, info, "SMALL", _("Smallest (%d)"),
1087 info->k_smallest);
1088
1089 finish_descriptive_tool:
1090
1091 dao_redraw_respan (dao);
1092 return 0;
1093 }
1094
1095 gboolean
analysis_tool_descriptive_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)1096 analysis_tool_descriptive_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1097 analysis_tool_engine_t selector, gpointer result)
1098 {
1099 analysis_tools_data_descriptive_t *info = specs;
1100
1101 switch (selector) {
1102 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1103 return (dao_command_descriptor (dao, _("Descriptive Statistics (%s)"), result)
1104 == NULL);
1105 case TOOL_ENGINE_UPDATE_DAO:
1106 prepare_input_range (&info->base.input, info->base.group_by);
1107 dao_adjust (dao, 1 + g_slist_length (info->base.input),
1108 (info->summary_statistics ? 16 : 0) +
1109 (info->confidence_level ? 4 : 0) +
1110 (info->kth_largest ? 4 : 0) +
1111 (info->kth_smallest ? 4 : 0 ) - 1);
1112 return FALSE;
1113 case TOOL_ENGINE_CLEAN_UP:
1114 return analysis_tool_generic_clean (specs);
1115 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1116 return FALSE;
1117 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1118 dao_prepare_output (NULL, dao, _("Descriptive Statistics"));
1119 return FALSE;
1120 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1121 return dao_format_output (dao, _("Descriptive Statistics"));
1122 case TOOL_ENGINE_PERFORM_CALC:
1123 default:
1124 return analysis_tool_descriptive_engine_run (dao, specs);
1125 }
1126 return TRUE; /* We shouldn't get here */
1127 }
1128
1129
1130
1131 /************* Sampling Tool *********************************************
1132 *
1133 * Sampling tool takes a sample from a given data set. Sample can be
1134 * a random sample where a given number of data points are selected
1135 * randomly from the data set. The sample can also be a periodic
1136 * sample where, for example, every fourth data element is selected to
1137 * the sample. The results are given in a table which can be printed
1138 * out in a new sheet, in a new workbook, or simply into an existing
1139 * sheet.
1140 *
1141 **/
1142
1143
1144 static gboolean
analysis_tool_sampling_engine_run(data_analysis_output_t * dao,analysis_tools_data_sampling_t * info)1145 analysis_tool_sampling_engine_run (data_analysis_output_t *dao,
1146 analysis_tools_data_sampling_t *info)
1147 {
1148 GSList *l;
1149 gint col = 0;
1150 guint ct;
1151 GnmFunc *fd_index = NULL;
1152 GnmFunc *fd_randdiscrete = NULL;
1153 gint source;
1154
1155 if (info->base.labels || info->periodic) {
1156 fd_index = gnm_func_lookup_or_add_placeholder ("INDEX");
1157 gnm_func_inc_usage (fd_index);
1158 }
1159 if (!info->periodic) {
1160 fd_randdiscrete = gnm_func_lookup_or_add_placeholder ("RANDDISCRETE");
1161 gnm_func_inc_usage (fd_randdiscrete);
1162 }
1163
1164 for (l = info->base.input, source = 1; l; l = l->next, source++) {
1165 GnmValue *val = value_dup ((GnmValue *)l->data);
1166 GnmValue *val_c = NULL;
1167 GnmExpr const *expr_title = NULL;
1168 GnmExpr const *expr_input = NULL;
1169 char const *format = NULL;
1170 guint offset = info->periodic ? ((info->offset == 0) ? info->period : info->offset): 0;
1171 GnmEvalPos ep;
1172
1173 eval_pos_init_sheet (&ep, val->v_range.cell.a.sheet);
1174
1175 dao_set_italic (dao, col, 0, col + info->number - 1, 0);
1176
1177 if (info->base.labels) {
1178 val_c = value_dup (val);
1179 switch (info->base.group_by) {
1180 case GROUPED_BY_ROW:
1181 val->v_range.cell.a.col++;
1182 break;
1183 case GROUPED_BY_COL:
1184 val->v_range.cell.a.row++;
1185 break;
1186 default:
1187 offset++;
1188 break;
1189 }
1190 expr_title = gnm_expr_new_funcall1 (fd_index,
1191 gnm_expr_new_constant (val_c));
1192 for (ct = 0; ct < info->number; ct++)
1193 dao_set_cell_expr (dao, col+ct, 0, gnm_expr_copy (expr_title));
1194 gnm_expr_free (expr_title);
1195 } else {
1196 switch (info->base.group_by) {
1197 case GROUPED_BY_ROW:
1198 format = _("Row %d");
1199 break;
1200 case GROUPED_BY_COL:
1201 format = _("Column %d");
1202 break;
1203 default:
1204 format = _("Area %d");
1205 break;
1206 }
1207 for (ct = 0; ct < info->number; ct++)
1208 dao_set_cell_printf (dao, col+ct, 0, format, source);
1209 }
1210
1211 expr_input = gnm_expr_new_constant (value_dup (val));
1212
1213
1214 if (info->periodic) {
1215 guint i;
1216 gint height = value_area_get_height (val, &ep);
1217 gint width = value_area_get_width (val, &ep);
1218 GnmExpr const *expr_period;
1219
1220 for (i=0; i < info->size; i++, offset += info->period) {
1221 gint x_offset;
1222 gint y_offset;
1223
1224 if (info->row_major) {
1225 y_offset = (offset - 1)/width + 1;
1226 x_offset = offset - (y_offset - 1) * width;
1227 } else {
1228 x_offset = (offset - 1)/height + 1;
1229 y_offset = offset - (x_offset - 1) * height;
1230 }
1231
1232 expr_period = gnm_expr_new_funcall3
1233 (fd_index, gnm_expr_copy (expr_input),
1234 gnm_expr_new_constant (value_new_int (y_offset)),
1235 gnm_expr_new_constant (value_new_int (x_offset)));
1236
1237 for (ct = 0; ct < info->number; ct += 2)
1238 dao_set_cell_expr (dao, col + ct, i + 1,
1239 gnm_expr_copy (expr_period));
1240 gnm_expr_free (expr_period);
1241
1242 if (info->number > 1) {
1243 if (!info->row_major) {
1244 y_offset = (offset - 1)/width + 1;
1245 x_offset = offset - (y_offset - 1) * width;
1246 } else {
1247 x_offset = (offset - 1)/height + 1;
1248 y_offset = offset - (x_offset - 1) * height;
1249 }
1250
1251 expr_period = gnm_expr_new_funcall3
1252 (fd_index, gnm_expr_copy (expr_input),
1253 gnm_expr_new_constant (value_new_int (y_offset)),
1254 gnm_expr_new_constant (value_new_int (x_offset)));
1255
1256 for (ct = 1; ct < info->number; ct += 2)
1257 dao_set_cell_expr (dao, col + ct, i + 1,
1258 gnm_expr_copy (expr_period));
1259 gnm_expr_free (expr_period);
1260
1261 }
1262 }
1263 col += info->number;
1264 } else {
1265 GnmExpr const *expr_random;
1266 guint i;
1267
1268 expr_random = gnm_expr_new_funcall1 (fd_randdiscrete,
1269 gnm_expr_copy (expr_input));
1270
1271 for (ct = 0; ct < info->number; ct++, col++)
1272 for (i=0; i < info->size; i++)
1273 dao_set_cell_expr (dao, col, i + 1,
1274 gnm_expr_copy (expr_random));
1275 gnm_expr_free (expr_random);
1276 }
1277
1278 value_release (val);
1279 gnm_expr_free (expr_input);
1280
1281 }
1282
1283 if (fd_index != NULL)
1284 gnm_func_dec_usage (fd_index);
1285 if (fd_randdiscrete != NULL)
1286 gnm_func_dec_usage (fd_randdiscrete);
1287
1288 dao_redraw_respan (dao);
1289
1290 return FALSE;
1291 }
1292
1293 gboolean
analysis_tool_sampling_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)1294 analysis_tool_sampling_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1295 analysis_tool_engine_t selector, gpointer result)
1296 {
1297 analysis_tools_data_sampling_t *info = specs;
1298
1299 switch (selector) {
1300 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1301 return (dao_command_descriptor (dao, _("Sampling (%s)"), result)
1302 == NULL);
1303 case TOOL_ENGINE_UPDATE_DAO:
1304 {
1305 GSList *l;
1306
1307 prepare_input_range (&info->base.input, info->base.group_by);
1308
1309 if (info->periodic) {
1310 info->size = 1;
1311 for (l = info->base.input; l; l = l->next) {
1312 GnmEvalPos ep;
1313 GnmValue *val = ((GnmValue *)l->data);
1314 gint size;
1315 guint usize;
1316 eval_pos_init_sheet (&ep, val->v_range.cell.a.sheet);
1317 size = (value_area_get_width (val, &ep) *
1318 value_area_get_height (val, &ep));
1319 usize = (size > 0) ? size : 1;
1320
1321 if (info->offset == 0)
1322 usize = usize/info->period;
1323 else
1324 usize = (usize - info->offset)/info->period + 1;
1325 if (usize > info->size)
1326 info->size = usize;
1327 }
1328 }
1329
1330 dao_adjust (dao, info->number * g_slist_length (info->base.input),
1331 1 + info->size);
1332 return FALSE;
1333 }
1334 case TOOL_ENGINE_CLEAN_UP:
1335 return analysis_tool_generic_clean (specs);
1336 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1337 return FALSE;
1338 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1339 dao_prepare_output (NULL, dao, _("Sample"));
1340 return FALSE;
1341 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1342 return dao_format_output (dao, _("Sample"));
1343 case TOOL_ENGINE_PERFORM_CALC:
1344 default:
1345 return analysis_tool_sampling_engine_run (dao, specs);
1346 }
1347 return TRUE; /* We shouldn't get here */
1348 }
1349
1350
1351
1352 /************* z-Test: Two Sample for Means ******************************
1353 *
1354 * The results are given in a table which can be printed out in a new
1355 * sheet, in a new workbook, or simply into an existing sheet.
1356 *
1357 **/
1358
1359
1360 static gboolean
analysis_tool_ztest_engine_run(data_analysis_output_t * dao,analysis_tools_data_ttests_t * info)1361 analysis_tool_ztest_engine_run (data_analysis_output_t *dao,
1362 analysis_tools_data_ttests_t *info)
1363 {
1364 GnmValue *val_1;
1365 GnmValue *val_2;
1366 GnmFunc *fd_count;
1367 GnmFunc *fd_mean;
1368 GnmFunc *fd_normsdist;
1369 GnmFunc *fd_normsinv;
1370 GnmFunc *fd_abs;
1371 GnmFunc *fd_sqrt;
1372 GnmExpr const *expr_1;
1373 GnmExpr const *expr_2;
1374 GnmExpr const *expr_mean_1;
1375 GnmExpr const *expr_mean_2;
1376 GnmExpr const *expr_count_1;
1377 GnmExpr const *expr_count_2;
1378
1379 dao_set_italic (dao, 0, 0, 0, 11);
1380 dao_set_italic (dao, 0, 0, 2, 0);
1381
1382 dao_set_cell (dao, 0, 0, "");
1383 set_cell_text_col (dao, 0, 1, _("/Mean"
1384 "/Known Variance"
1385 "/Observations"
1386 "/Hypothesized Mean Difference"
1387 "/Observed Mean Difference"
1388 "/z"
1389 "/P (Z<=z) one-tail"
1390 "/z Critical one-tail"
1391 "/P (Z<=z) two-tail"
1392 "/z Critical two-tail"));
1393
1394 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
1395 gnm_func_inc_usage (fd_mean);
1396 fd_normsdist = gnm_func_lookup_or_add_placeholder ("NORMSDIST");
1397 gnm_func_inc_usage (fd_normsdist);
1398 fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
1399 gnm_func_inc_usage (fd_abs);
1400 fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
1401 gnm_func_inc_usage (fd_sqrt);
1402 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
1403 gnm_func_inc_usage (fd_count);
1404 fd_normsinv = gnm_func_lookup_or_add_placeholder ("NORMSINV");
1405 gnm_func_inc_usage (fd_normsinv);
1406
1407 val_1 = value_dup (info->base.range_1);
1408 expr_1 = gnm_expr_new_constant (value_dup (val_1));
1409
1410 val_2 = value_dup (info->base.range_2);
1411 expr_2 = gnm_expr_new_constant (value_dup (val_2));
1412
1413 /* Labels */
1414 analysis_tools_write_label_ftest (val_1, dao, 1, 0,
1415 info->base.labels, 1);
1416 analysis_tools_write_label_ftest (val_2, dao, 2, 0,
1417 info->base.labels, 2);
1418
1419
1420 /* Mean */
1421 expr_mean_1 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_1));
1422 dao_set_cell_expr (dao, 1, 1, expr_mean_1);
1423 expr_mean_2 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_2));
1424 dao_set_cell_expr (dao, 2, 1, gnm_expr_copy (expr_mean_2));
1425
1426 /* Known Variance */
1427 dao_set_cell_float (dao, 1, 2, info->var1);
1428 dao_set_cell_float (dao, 2, 2, info->var2);
1429
1430 /* Observations */
1431 expr_count_1 = gnm_expr_new_funcall1 (fd_count, expr_1);
1432 dao_set_cell_expr (dao, 1, 3, expr_count_1);
1433 expr_count_2 = gnm_expr_new_funcall1 (fd_count, expr_2);
1434 dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_2));
1435
1436 /* Hypothesized Mean Difference */
1437 dao_set_cell_float (dao, 1, 4, info->mean_diff);
1438
1439 /* Observed Mean Difference */
1440 if (dao_cell_is_visible (dao, 2, 1)) {
1441 gnm_expr_free (expr_mean_2);
1442 expr_mean_2 = make_cellref (1, -4);
1443 }
1444
1445 {
1446 dao_set_cell_expr (dao, 1, 5,
1447 gnm_expr_new_binary
1448 (make_cellref (0, -4),
1449 GNM_EXPR_OP_SUB,
1450 expr_mean_2));
1451 }
1452
1453 /* z */
1454 {
1455 GnmExpr const *expr_var_1 = make_cellref (0, -4);
1456 GnmExpr const *expr_var_2 = NULL;
1457 GnmExpr const *expr_count_1 = make_cellref (0, -3);
1458 GnmExpr const *expr_a = NULL;
1459 GnmExpr const *expr_b = NULL;
1460 GnmExpr const *expr_count_2_adj = NULL;
1461
1462 if (dao_cell_is_visible (dao, 2, 2)) {
1463 expr_var_2 = make_cellref (1, -4);
1464 } else {
1465 expr_var_2 = gnm_expr_new_constant
1466 (value_new_float (info->var2));
1467 }
1468
1469 if (dao_cell_is_visible (dao, 2, 3)) {
1470 gnm_expr_free (expr_count_2);
1471 expr_count_2_adj = make_cellref (1, -3);
1472 } else
1473 expr_count_2_adj = expr_count_2;
1474
1475 expr_a = gnm_expr_new_binary (expr_var_1, GNM_EXPR_OP_DIV,
1476 expr_count_1);
1477 expr_b = gnm_expr_new_binary (expr_var_2, GNM_EXPR_OP_DIV,
1478 expr_count_2_adj);
1479
1480 dao_set_cell_expr (dao, 1, 6,
1481 gnm_expr_new_binary
1482 (gnm_expr_new_binary
1483 (make_cellref (0, -1),
1484 GNM_EXPR_OP_SUB,
1485 make_cellref (0, -2)),
1486 GNM_EXPR_OP_DIV,
1487 gnm_expr_new_funcall1
1488 (fd_sqrt,
1489 gnm_expr_new_binary
1490 (expr_a,
1491 GNM_EXPR_OP_ADD,
1492 expr_b))));
1493 }
1494
1495 /* P (Z<=z) one-tail */
1496 /* FIXME: 1- looks like a bad idea. */
1497 dao_set_cell_expr
1498 (dao, 1, 7,
1499 gnm_expr_new_binary
1500 (gnm_expr_new_constant (value_new_int (1)),
1501 GNM_EXPR_OP_SUB,
1502 gnm_expr_new_funcall1
1503 (fd_normsdist,
1504 gnm_expr_new_funcall1
1505 (fd_abs,
1506 make_cellref (0, -1)))));
1507
1508
1509 /* Critical Z, one right tail */
1510 dao_set_cell_expr
1511 (dao, 1, 8,
1512 gnm_expr_new_unary
1513 (GNM_EXPR_OP_UNARY_NEG,
1514 gnm_expr_new_funcall1
1515 (fd_normsinv,
1516 gnm_expr_new_constant
1517 (value_new_float (info->base.alpha)))));
1518
1519 /* P (T<=t) two-tail */
1520 dao_set_cell_expr
1521 (dao, 1, 9,
1522 gnm_expr_new_binary
1523 (gnm_expr_new_constant (value_new_int (2)),
1524 GNM_EXPR_OP_MULT,
1525 gnm_expr_new_funcall1
1526 (fd_normsdist,
1527 gnm_expr_new_unary
1528 (GNM_EXPR_OP_UNARY_NEG,
1529 gnm_expr_new_funcall1
1530 (fd_abs,
1531 make_cellref (0, -3))))));
1532
1533 /* Critical Z, two tails */
1534 dao_set_cell_expr
1535 (dao, 1, 10,
1536 gnm_expr_new_unary
1537 (GNM_EXPR_OP_UNARY_NEG,
1538 gnm_expr_new_funcall1
1539 (fd_normsinv,
1540 gnm_expr_new_binary
1541 (gnm_expr_new_constant
1542 (value_new_float (info->base.alpha)),
1543 GNM_EXPR_OP_DIV,
1544 gnm_expr_new_constant (value_new_int (2))))));
1545
1546 gnm_func_dec_usage (fd_mean);
1547 gnm_func_dec_usage (fd_normsdist);
1548 gnm_func_dec_usage (fd_abs);
1549 gnm_func_dec_usage (fd_sqrt);
1550 gnm_func_dec_usage (fd_count);
1551 gnm_func_dec_usage (fd_normsinv);
1552
1553 /* And finish up */
1554
1555 value_release (val_1);
1556 value_release (val_2);
1557
1558 dao_redraw_respan (dao);
1559
1560 return FALSE;
1561 }
1562
1563
1564 gboolean
analysis_tool_ztest_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)1565 analysis_tool_ztest_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1566 analysis_tool_engine_t selector, gpointer result)
1567 {
1568 switch (selector) {
1569 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1570 return (dao_command_descriptor (dao, _("z-Test (%s)"), result)
1571 == NULL);
1572 case TOOL_ENGINE_UPDATE_DAO:
1573 dao_adjust (dao, 3, 11);
1574 return FALSE;
1575 case TOOL_ENGINE_CLEAN_UP:
1576 return analysis_tool_generic_b_clean (specs);
1577 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1578 return FALSE;
1579 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1580 dao_prepare_output (NULL, dao, _("z-Test"));
1581 return FALSE;
1582 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1583 return dao_format_output (dao, _("z-Test"));
1584 case TOOL_ENGINE_PERFORM_CALC:
1585 default:
1586 return analysis_tool_ztest_engine_run (dao, specs);
1587 }
1588 return TRUE; /* We shouldn't get here */
1589 }
1590
1591
1592 /************* t-Test Tools ********************************************
1593 *
1594 * The t-Test tool set consists of three kinds of tests to test the
1595 * mean of two variables. The tests are: Student's t-test for paired
1596 * sample, Student's t-test for two samples assuming equal variance
1597 * and the same test assuming unequal variance. The results are given
1598 * in a table which can be printed out in a new sheet, in a new
1599 * workbook, or simply into an existing sheet.
1600 *
1601 **/
1602
1603 /* t-Test: Paired Two Sample for Means.
1604 */
1605 static gboolean
analysis_tool_ttest_paired_engine_run(data_analysis_output_t * dao,analysis_tools_data_ttests_t * info)1606 analysis_tool_ttest_paired_engine_run (data_analysis_output_t *dao,
1607 analysis_tools_data_ttests_t *info)
1608 {
1609 GnmValue *val_1;
1610 GnmValue *val_2;
1611
1612 GnmFunc *fd_count;
1613 GnmFunc *fd_mean;
1614 GnmFunc *fd_var;
1615 GnmFunc *fd_tdist;
1616 GnmFunc *fd_abs;
1617 GnmFunc *fd_tinv;
1618 GnmFunc *fd_correl;
1619 GnmFunc *fd_isodd;
1620 GnmFunc *fd_isnumber;
1621 GnmFunc *fd_if;
1622 GnmFunc *fd_sum;
1623
1624 GnmExpr const *expr_1;
1625 GnmExpr const *expr_2;
1626 GnmExpr const *expr_diff;
1627 GnmExpr const *expr_ifisnumber;
1628 GnmExpr const *expr_ifisoddifisnumber;
1629
1630 dao_set_italic (dao, 0, 0, 0, 13);
1631 dao_set_italic (dao, 0, 0, 2, 0);
1632
1633 dao_set_cell (dao, 0, 0, "");
1634 set_cell_text_col (dao, 0, 1, _("/Mean"
1635 "/Variance"
1636 "/Observations"
1637 "/Pearson Correlation"
1638 "/Hypothesized Mean Difference"
1639 "/Observed Mean Difference"
1640 "/Variance of the Differences"
1641 "/df"
1642 "/t Stat"
1643 "/P (T<=t) one-tail"
1644 "/t Critical one-tail"
1645 "/P (T<=t) two-tail"
1646 "/t Critical two-tail"));
1647
1648 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
1649 gnm_func_inc_usage (fd_mean);
1650 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
1651 gnm_func_inc_usage (fd_var);
1652 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
1653 gnm_func_inc_usage (fd_count);
1654 fd_correl = gnm_func_lookup_or_add_placeholder ("CORREL");
1655 gnm_func_inc_usage (fd_correl);
1656 fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
1657 gnm_func_inc_usage (fd_tinv);
1658 fd_tdist = gnm_func_lookup_or_add_placeholder ("TDIST");
1659 gnm_func_inc_usage (fd_tdist);
1660 fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
1661 gnm_func_inc_usage (fd_abs);
1662 fd_isodd = gnm_func_lookup_or_add_placeholder ("ISODD");
1663 gnm_func_inc_usage (fd_isodd);
1664 fd_isnumber = gnm_func_lookup_or_add_placeholder ("ISNUMBER");
1665 gnm_func_inc_usage (fd_isnumber);
1666 fd_if = gnm_func_lookup_or_add_placeholder ("IF");
1667 gnm_func_inc_usage (fd_if);
1668 fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
1669 gnm_func_inc_usage (fd_sum);
1670
1671 val_1 = value_dup (info->base.range_1);
1672 val_2 = value_dup (info->base.range_2);
1673
1674 /* Labels */
1675 analysis_tools_write_label_ftest (val_1, dao, 1, 0,
1676 info->base.labels, 1);
1677 analysis_tools_write_label_ftest (val_2, dao, 2, 0,
1678 info->base.labels, 2);
1679
1680 /* Mean */
1681
1682 expr_1 = gnm_expr_new_constant (value_dup (val_1));
1683 dao_set_cell_expr (dao, 1, 1,
1684 gnm_expr_new_funcall1 (fd_mean,
1685 gnm_expr_copy (expr_1)));
1686
1687 expr_2 = gnm_expr_new_constant (value_dup (val_2));
1688 dao_set_cell_expr (dao, 2, 1,
1689 gnm_expr_new_funcall1 (fd_mean,
1690 gnm_expr_copy (expr_2)));
1691
1692 /* Variance */
1693 dao_set_cell_expr (dao, 1, 2,
1694 gnm_expr_new_funcall1 (fd_var,
1695 gnm_expr_copy (expr_1)));
1696 dao_set_cell_expr (dao, 2, 2,
1697 gnm_expr_new_funcall1 (fd_var,
1698 gnm_expr_copy (expr_2)));
1699
1700 /* Observations */
1701 dao_set_cell_expr (dao, 1, 3,
1702 gnm_expr_new_funcall1 (fd_count,
1703 gnm_expr_copy (expr_1)));
1704 dao_set_cell_expr (dao, 2, 3,
1705 gnm_expr_new_funcall1 (fd_count,
1706 gnm_expr_copy (expr_2)));
1707
1708 /* Pearson Correlation */
1709 dao_set_cell_expr (dao, 1, 4,
1710 gnm_expr_new_funcall2 (fd_correl,
1711 gnm_expr_copy (expr_1),
1712 gnm_expr_copy (expr_2)));
1713
1714 /* Hypothesized Mean Difference */
1715 dao_set_cell_float (dao, 1, 5, info->mean_diff);
1716
1717 /* Some useful expressions for the next field */
1718
1719 expr_diff = gnm_expr_new_binary (expr_1, GNM_EXPR_OP_SUB, expr_2);
1720
1721 /* IF (ISNUMBER (area1), 1, 0) * IF (ISNUMBER (area2), 1, 0) */
1722 expr_ifisnumber = gnm_expr_new_binary (gnm_expr_new_funcall3 (
1723 fd_if,
1724 gnm_expr_new_funcall1 (
1725 fd_isnumber,
1726 gnm_expr_copy (expr_1)),
1727 gnm_expr_new_constant (value_new_int (1)),
1728 gnm_expr_new_constant (value_new_int (0))),
1729 GNM_EXPR_OP_MULT,
1730 gnm_expr_new_funcall3 (
1731 fd_if,
1732 gnm_expr_new_funcall1 (
1733 fd_isnumber,
1734 gnm_expr_copy (expr_2)),
1735 gnm_expr_new_constant (value_new_int (1)),
1736 gnm_expr_new_constant (value_new_int (0)))
1737 );
1738 /* IF (ISODD (expr_ifisnumber), area1-area2, "NA")*/
1739 expr_ifisoddifisnumber = gnm_expr_new_funcall3 (fd_if,
1740 gnm_expr_new_funcall1 (fd_isodd,
1741 gnm_expr_copy (expr_ifisnumber)),
1742 expr_diff,
1743 gnm_expr_new_constant (value_new_string ("NA")));
1744
1745 /* Observed Mean Difference */
1746 dao_set_cell_array_expr (dao, 1, 6,
1747 gnm_expr_new_funcall1 (fd_mean,
1748 gnm_expr_copy (expr_ifisoddifisnumber)));
1749
1750 /* Variance of the Differences */
1751 dao_set_cell_array_expr (dao, 1, 7,
1752 gnm_expr_new_funcall1 (fd_var,
1753 expr_ifisoddifisnumber));
1754
1755 /* df */
1756 dao_set_cell_array_expr (dao, 1, 8,
1757 gnm_expr_new_binary
1758 (gnm_expr_new_funcall1 (
1759 fd_sum,
1760 expr_ifisnumber),
1761 GNM_EXPR_OP_SUB,
1762 gnm_expr_new_constant (value_new_int (1))));
1763
1764 /* t */
1765 /* E24 = (E21-E20)/(E22/(E23+1))^0.5 */
1766 {
1767 GnmExpr const *expr_num;
1768 GnmExpr const *expr_denom;
1769
1770 expr_num = gnm_expr_new_binary (make_cellref (0, -3),
1771 GNM_EXPR_OP_SUB,
1772 make_cellref (0,-4));
1773
1774 expr_denom = gnm_expr_new_binary
1775 (gnm_expr_new_binary
1776 (make_cellref (0, -2),
1777 GNM_EXPR_OP_DIV,
1778 gnm_expr_new_binary
1779 (make_cellref (0, -1),
1780 GNM_EXPR_OP_ADD,
1781 gnm_expr_new_constant
1782 (value_new_int (1)))),
1783 GNM_EXPR_OP_EXP,
1784 gnm_expr_new_constant
1785 (value_new_float (0.5)));
1786
1787 dao_set_cell_expr (dao, 1, 9,
1788 gnm_expr_new_binary
1789 (expr_num, GNM_EXPR_OP_DIV, expr_denom));
1790 }
1791
1792 /* P (T<=t) one-tail */
1793 dao_set_cell_expr
1794 (dao, 1, 10,
1795 gnm_expr_new_funcall3
1796 (fd_tdist,
1797 gnm_expr_new_funcall1
1798 (fd_abs,
1799 make_cellref (0, -1)),
1800 make_cellref (0, -2),
1801 gnm_expr_new_constant (value_new_int (1))));
1802
1803 /* t Critical one-tail */
1804 dao_set_cell_expr
1805 (dao, 1, 11,
1806 gnm_expr_new_funcall2
1807 (fd_tinv,
1808 gnm_expr_new_binary
1809 (gnm_expr_new_constant (value_new_int (2)),
1810 GNM_EXPR_OP_MULT,
1811 gnm_expr_new_constant
1812 (value_new_float (info->base.alpha))),
1813 make_cellref (0, -3)));
1814
1815 /* P (T<=t) two-tail */
1816 dao_set_cell_expr
1817 (dao, 1, 12,
1818 gnm_expr_new_funcall3
1819 (fd_tdist,
1820 gnm_expr_new_funcall1 (fd_abs, make_cellref (0, -3)),
1821 make_cellref (0, -4),
1822 gnm_expr_new_constant (value_new_int (2))));
1823
1824 /* t Critical two-tail */
1825 dao_set_cell_expr
1826 (dao, 1, 13,
1827 gnm_expr_new_funcall2
1828 (fd_tinv,
1829 gnm_expr_new_constant
1830 (value_new_float (info->base.alpha)),
1831 make_cellref (0, -5)));
1832
1833 /* And finish up */
1834
1835 value_release (val_1);
1836 value_release (val_2);
1837
1838 gnm_func_dec_usage (fd_count);
1839 gnm_func_dec_usage (fd_correl);
1840 gnm_func_dec_usage (fd_mean);
1841 gnm_func_dec_usage (fd_var);
1842 gnm_func_dec_usage (fd_tinv);
1843 gnm_func_dec_usage (fd_tdist);
1844 gnm_func_dec_usage (fd_abs);
1845 gnm_func_dec_usage (fd_isodd);
1846 gnm_func_dec_usage (fd_isnumber);
1847 gnm_func_dec_usage (fd_if);
1848 gnm_func_dec_usage (fd_sum);
1849
1850 dao_redraw_respan (dao);
1851
1852 return FALSE;
1853 }
1854
1855 gboolean
analysis_tool_ttest_paired_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)1856 analysis_tool_ttest_paired_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1857 analysis_tool_engine_t selector,
1858 gpointer result)
1859 {
1860 switch (selector) {
1861 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1862 return (dao_command_descriptor (dao, _("t-Test, paired (%s)"), result)
1863 == NULL);
1864 case TOOL_ENGINE_UPDATE_DAO:
1865 dao_adjust (dao, 3, 14);
1866 return FALSE;
1867 case TOOL_ENGINE_CLEAN_UP:
1868 return analysis_tool_generic_b_clean (specs);
1869 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1870 return FALSE;
1871 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1872 dao_prepare_output (NULL, dao, _("t-Test"));
1873 return FALSE;
1874 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1875 return dao_format_output (dao, _("t-Test"));
1876 case TOOL_ENGINE_PERFORM_CALC:
1877 default:
1878 return analysis_tool_ttest_paired_engine_run (dao, specs);
1879 }
1880 return TRUE; /* We shouldn't get here */
1881 }
1882
1883
1884
1885
1886 /* t-Test: Two-Sample Assuming Equal Variances.
1887 */
1888 static gboolean
analysis_tool_ttest_eqvar_engine_run(data_analysis_output_t * dao,analysis_tools_data_ttests_t * info)1889 analysis_tool_ttest_eqvar_engine_run (data_analysis_output_t *dao,
1890 analysis_tools_data_ttests_t *info)
1891 {
1892 GnmValue *val_1;
1893 GnmValue *val_2;
1894 GnmFunc *fd_count;
1895 GnmFunc *fd_mean;
1896 GnmFunc *fd_var;
1897 GnmFunc *fd_tdist;
1898 GnmFunc *fd_abs;
1899 GnmFunc *fd_tinv;
1900 GnmExpr const *expr_1;
1901 GnmExpr const *expr_2;
1902 GnmExpr const *expr_mean_1;
1903 GnmExpr const *expr_mean_2;
1904 GnmExpr const *expr_var_1;
1905 GnmExpr const *expr_var_2;
1906 GnmExpr const *expr_count_1;
1907 GnmExpr const *expr_count_2;
1908
1909 dao_set_italic (dao, 0, 0, 0, 12);
1910 dao_set_italic (dao, 0, 0, 2, 0);
1911
1912 dao_set_cell (dao, 0, 0, "");
1913 set_cell_text_col (dao, 0, 1, _("/Mean"
1914 "/Variance"
1915 "/Observations"
1916 "/Pooled Variance"
1917 "/Hypothesized Mean Difference"
1918 "/Observed Mean Difference"
1919 "/df"
1920 "/t Stat"
1921 "/P (T<=t) one-tail"
1922 "/t Critical one-tail"
1923 "/P (T<=t) two-tail"
1924 "/t Critical two-tail"));
1925
1926
1927 val_1 = value_dup (info->base.range_1);
1928 val_2 = value_dup (info->base.range_2);
1929
1930 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
1931 gnm_func_inc_usage (fd_mean);
1932 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
1933 gnm_func_inc_usage (fd_count);
1934 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
1935 gnm_func_inc_usage (fd_var);
1936 fd_tdist = gnm_func_lookup_or_add_placeholder ("TDIST");
1937 gnm_func_inc_usage (fd_tdist);
1938 fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
1939 gnm_func_inc_usage (fd_abs);
1940 fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
1941 gnm_func_inc_usage (fd_tinv);
1942
1943 /* Labels */
1944 analysis_tools_write_label_ftest (val_1, dao, 1, 0,
1945 info->base.labels, 1);
1946 analysis_tools_write_label_ftest (val_2, dao, 2, 0,
1947 info->base.labels, 2);
1948
1949
1950 /* Mean */
1951 expr_1 = gnm_expr_new_constant (value_dup (val_1));
1952 expr_mean_1 = gnm_expr_new_funcall1 (fd_mean,
1953 gnm_expr_copy (expr_1));
1954 dao_set_cell_expr (dao, 1, 1, expr_mean_1);
1955 expr_2 = gnm_expr_new_constant (value_dup (val_2));
1956 expr_mean_2 = gnm_expr_new_funcall1 (fd_mean,
1957 gnm_expr_copy (expr_2));
1958 dao_set_cell_expr (dao, 2, 1, gnm_expr_copy (expr_mean_2));
1959
1960 /* Variance */
1961 expr_var_1 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_1));
1962 dao_set_cell_expr (dao, 1, 2, expr_var_1);
1963 expr_var_2 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_2));
1964 dao_set_cell_expr (dao, 2, 2, gnm_expr_copy (expr_var_2));
1965
1966 /* Observations */
1967 expr_count_1 = gnm_expr_new_funcall1 (fd_count, expr_1);
1968 dao_set_cell_expr (dao, 1, 3, expr_count_1);
1969 expr_count_2 = gnm_expr_new_funcall1 (fd_count, expr_2);
1970 dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_2));
1971
1972 /* Pooled Variance */
1973 {
1974 GnmExpr const *expr_var_2_adj = NULL;
1975 GnmExpr const *expr_count_2_adj = NULL;
1976 GnmExpr const *expr_var_1 = make_cellref (0, -2);
1977 GnmExpr const *expr_count_1 = make_cellref (0, -1);
1978 GnmExpr const *expr_one = gnm_expr_new_constant
1979 (value_new_int (1));
1980 GnmExpr const *expr_count_1_minus_1;
1981 GnmExpr const *expr_count_2_minus_1;
1982
1983 if (dao_cell_is_visible (dao, 2, 2)) {
1984 gnm_expr_free (expr_var_2);
1985 expr_var_2_adj = make_cellref (1, -2);
1986 } else
1987 expr_var_2_adj = expr_var_2;
1988
1989 if (dao_cell_is_visible (dao, 2, 3)) {
1990 expr_count_2_adj = make_cellref (1, -1);
1991 } else
1992 expr_count_2_adj = gnm_expr_copy (expr_count_2);
1993
1994 expr_count_1_minus_1 = gnm_expr_new_binary
1995 (expr_count_1,
1996 GNM_EXPR_OP_SUB,
1997 gnm_expr_copy (expr_one));
1998 expr_count_2_minus_1 = gnm_expr_new_binary
1999 (expr_count_2_adj, GNM_EXPR_OP_SUB, expr_one);
2000
2001 dao_set_cell_expr (dao, 1, 4,
2002 gnm_expr_new_binary
2003 (gnm_expr_new_binary
2004 (gnm_expr_new_binary
2005 (gnm_expr_copy (expr_count_1_minus_1),
2006 GNM_EXPR_OP_MULT,
2007 expr_var_1),
2008 GNM_EXPR_OP_ADD,
2009 gnm_expr_new_binary
2010 (gnm_expr_copy (expr_count_2_minus_1),
2011 GNM_EXPR_OP_MULT,
2012 expr_var_2_adj)),
2013 GNM_EXPR_OP_DIV,
2014 gnm_expr_new_binary
2015 (expr_count_1_minus_1,
2016 GNM_EXPR_OP_ADD,
2017 expr_count_2_minus_1)));
2018
2019 }
2020
2021 /* Hypothesized Mean Difference */
2022 dao_set_cell_float (dao, 1, 5, info->mean_diff);
2023
2024 /* Observed Mean Difference */
2025 if (dao_cell_is_visible (dao, 2,1)) {
2026 gnm_expr_free (expr_mean_2);
2027 expr_mean_2 = make_cellref (1, -5);
2028 }
2029 dao_set_cell_expr (dao, 1, 6,
2030 gnm_expr_new_binary
2031 (make_cellref (0, -5),
2032 GNM_EXPR_OP_SUB,
2033 expr_mean_2));
2034
2035 /* df */
2036 {
2037 GnmExpr const *expr_count_1 = make_cellref (0, -4);
2038 GnmExpr const *expr_count_2_adj;
2039 GnmExpr const *expr_two = gnm_expr_new_constant
2040 (value_new_int (2));
2041
2042 if (dao_cell_is_visible (dao, 2,3)) {
2043 expr_count_2_adj = make_cellref (1, -4);
2044 } else
2045 expr_count_2_adj = gnm_expr_copy (expr_count_2);
2046
2047 dao_set_cell_expr (dao, 1, 7,
2048 gnm_expr_new_binary
2049 (gnm_expr_new_binary
2050 (expr_count_1,
2051 GNM_EXPR_OP_ADD,
2052 expr_count_2_adj),
2053 GNM_EXPR_OP_SUB,
2054 expr_two));
2055 }
2056
2057 /* t */
2058 {
2059 GnmExpr const *expr_var = make_cellref (0, -4);
2060 GnmExpr const *expr_count_1 = make_cellref (0, -5);
2061 GnmExpr const *expr_a;
2062 GnmExpr const *expr_b;
2063 GnmExpr const *expr_count_2_adj;
2064
2065 if (dao_cell_is_visible (dao, 2,3)) {
2066 gnm_expr_free (expr_count_2);
2067 expr_count_2_adj = make_cellref (1, -5);
2068 } else
2069 expr_count_2_adj = expr_count_2;
2070
2071 expr_a = gnm_expr_new_binary (gnm_expr_copy (expr_var),
2072 GNM_EXPR_OP_DIV,
2073 expr_count_1);
2074 expr_b = gnm_expr_new_binary (expr_var,
2075 GNM_EXPR_OP_DIV,
2076 expr_count_2_adj);
2077
2078 dao_set_cell_expr (dao, 1, 8,
2079 gnm_expr_new_binary
2080 (gnm_expr_new_binary
2081 (make_cellref (0, -2),
2082 GNM_EXPR_OP_SUB,
2083 make_cellref (0, -3)),
2084 GNM_EXPR_OP_DIV,
2085 gnm_expr_new_binary
2086 (gnm_expr_new_binary
2087 (expr_a,
2088 GNM_EXPR_OP_ADD,
2089 expr_b),
2090 GNM_EXPR_OP_EXP,
2091 gnm_expr_new_constant
2092 (value_new_float (0.5)))));
2093
2094 }
2095
2096 /* P (T<=t) one-tail */
2097 dao_set_cell_expr
2098 (dao, 1, 9,
2099 gnm_expr_new_funcall3
2100 (fd_tdist,
2101 gnm_expr_new_funcall1
2102 (fd_abs,
2103 make_cellref (0, -1)),
2104 make_cellref (0, -2),
2105 gnm_expr_new_constant (value_new_int (1))));
2106
2107 /* t Critical one-tail */
2108 dao_set_cell_expr
2109 (dao, 1, 10,
2110 gnm_expr_new_funcall2
2111 (fd_tinv,
2112 gnm_expr_new_binary
2113 (gnm_expr_new_constant (value_new_int (2)),
2114 GNM_EXPR_OP_MULT,
2115 gnm_expr_new_constant
2116 (value_new_float (info->base.alpha))),
2117 make_cellref (0, -3)));
2118
2119 /* P (T<=t) two-tail */
2120 dao_set_cell_expr
2121 (dao, 1, 11,
2122 gnm_expr_new_funcall3
2123 (fd_tdist,
2124 gnm_expr_new_funcall1
2125 (fd_abs,
2126 make_cellref (0, -3)),
2127 make_cellref (0, -4),
2128 gnm_expr_new_constant (value_new_int (2))));
2129
2130 /* t Critical two-tail */
2131 dao_set_cell_expr
2132 (dao, 1, 12,
2133 gnm_expr_new_funcall2
2134 (fd_tinv,
2135 gnm_expr_new_constant
2136 (value_new_float (info->base.alpha)),
2137 make_cellref (0, -5)));
2138
2139 /* And finish up */
2140
2141 value_release (val_1);
2142 value_release (val_2);
2143
2144 gnm_func_dec_usage (fd_mean);
2145 gnm_func_dec_usage (fd_var);
2146 gnm_func_dec_usage (fd_count);
2147 gnm_func_dec_usage (fd_tdist);
2148 gnm_func_dec_usage (fd_abs);
2149 gnm_func_dec_usage (fd_tinv);
2150
2151 dao_redraw_respan (dao);
2152
2153 return FALSE;
2154 }
2155
2156 gboolean
analysis_tool_ttest_eqvar_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)2157 analysis_tool_ttest_eqvar_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
2158 analysis_tool_engine_t selector, gpointer result)
2159 {
2160 switch (selector) {
2161 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
2162 return (dao_command_descriptor (dao, _("t-Test (%s)"), result)
2163 == NULL);
2164 case TOOL_ENGINE_UPDATE_DAO:
2165 dao_adjust (dao, 3, 13);
2166 return FALSE;
2167 case TOOL_ENGINE_CLEAN_UP:
2168 return analysis_tool_generic_b_clean (specs);
2169 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
2170 return FALSE;
2171 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
2172 dao_prepare_output (NULL, dao, _("t-Test"));
2173 return FALSE;
2174 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
2175 return dao_format_output (dao, _("t-Test"));
2176 case TOOL_ENGINE_PERFORM_CALC:
2177 default:
2178 return analysis_tool_ttest_eqvar_engine_run (dao, specs);
2179 }
2180 return TRUE; /* We shouldn't get here */
2181 }
2182
2183 /* t-Test: Two-Sample Assuming Unequal Variances.
2184 */
2185 static gboolean
analysis_tool_ttest_neqvar_engine_run(data_analysis_output_t * dao,analysis_tools_data_ttests_t * info)2186 analysis_tool_ttest_neqvar_engine_run (data_analysis_output_t *dao,
2187 analysis_tools_data_ttests_t *info)
2188 {
2189 GnmValue *val_1;
2190 GnmValue *val_2;
2191 GnmFunc *fd_count;
2192 GnmFunc *fd_mean;
2193 GnmFunc *fd_var;
2194 GnmFunc *fd_tdist;
2195 GnmFunc *fd_abs;
2196 GnmFunc *fd_tinv;
2197 GnmExpr const *expr_1;
2198 GnmExpr const *expr_2;
2199 GnmExpr const *expr_mean_1;
2200 GnmExpr const *expr_mean_2;
2201 GnmExpr const *expr_var_1;
2202 GnmExpr const *expr_var_2;
2203 GnmExpr const *expr_count_1;
2204 GnmExpr const *expr_count_2;
2205
2206 dao_set_italic (dao, 0, 0, 0, 11);
2207 dao_set_italic (dao, 0, 0, 2, 0);
2208
2209 dao_set_cell (dao, 0, 0, "");
2210 set_cell_text_col (dao, 0, 1, _("/Mean"
2211 "/Variance"
2212 "/Observations"
2213 "/Hypothesized Mean Difference"
2214 "/Observed Mean Difference"
2215 "/df"
2216 "/t Stat"
2217 "/P (T<=t) one-tail"
2218 "/t Critical one-tail"
2219 "/P (T<=t) two-tail"
2220 "/t Critical two-tail"));
2221
2222
2223 val_1 = value_dup (info->base.range_1);
2224 val_2 = value_dup (info->base.range_2);
2225
2226 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
2227 gnm_func_inc_usage (fd_mean);
2228 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
2229 gnm_func_inc_usage (fd_var);
2230 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
2231 gnm_func_inc_usage (fd_count);
2232 fd_tdist = gnm_func_lookup_or_add_placeholder ("TDIST");
2233 gnm_func_inc_usage (fd_tdist);
2234 fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
2235 gnm_func_inc_usage (fd_abs);
2236 fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
2237 gnm_func_inc_usage (fd_tinv);
2238
2239 /* Labels */
2240 analysis_tools_write_label_ftest (val_1, dao, 1, 0,
2241 info->base.labels, 1);
2242 analysis_tools_write_label_ftest (val_2, dao, 2, 0,
2243 info->base.labels, 2);
2244
2245
2246 /* Mean */
2247 expr_1 = gnm_expr_new_constant (value_dup (val_1));
2248 expr_mean_1 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_1));
2249 dao_set_cell_expr (dao, 1, 1, expr_mean_1);
2250 expr_2 = gnm_expr_new_constant (value_dup (val_2));
2251 expr_mean_2 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_2));
2252 dao_set_cell_expr (dao, 2, 1, gnm_expr_copy (expr_mean_2));
2253
2254 /* Variance */
2255 expr_var_1 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_1));
2256 dao_set_cell_expr (dao, 1, 2, expr_var_1);
2257 expr_var_2 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_2));
2258 dao_set_cell_expr (dao, 2, 2, gnm_expr_copy (expr_var_2));
2259
2260 /* Observations */
2261 expr_count_1 = gnm_expr_new_funcall1 (fd_count, expr_1);
2262 dao_set_cell_expr (dao, 1, 3, expr_count_1);
2263 expr_count_2 = gnm_expr_new_funcall1 (fd_count, expr_2);
2264 dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_2));
2265
2266 /* Hypothesized Mean Difference */
2267 dao_set_cell_float (dao, 1, 4, info->mean_diff);
2268
2269 /* Observed Mean Difference */
2270 if (dao_cell_is_visible (dao, 2,1)) {
2271 gnm_expr_free (expr_mean_2);
2272 expr_mean_2 = make_cellref (1, -4);
2273 }
2274 dao_set_cell_expr (dao, 1, 5,
2275 gnm_expr_new_binary
2276 (make_cellref (0, -4),
2277 GNM_EXPR_OP_SUB,
2278 expr_mean_2));
2279
2280 /* df */
2281
2282 {
2283 GnmExpr const *expr_var_1 = make_cellref (0, -4);
2284 GnmExpr const *expr_count_1 = make_cellref (0, -3);
2285 GnmExpr const *expr_a;
2286 GnmExpr const *expr_b;
2287 GnmExpr const *expr_var_2_adj;
2288 GnmExpr const *expr_count_2_adj;
2289 GnmExpr const *expr_two = gnm_expr_new_constant
2290 (value_new_int (2));
2291 GnmExpr const *expr_one = gnm_expr_new_constant
2292 (value_new_int (1));
2293
2294 if (dao_cell_is_visible (dao, 2,2)) {
2295 expr_var_2_adj = make_cellref (1, -4);
2296 } else
2297 expr_var_2_adj = gnm_expr_copy (expr_var_2);
2298
2299 if (dao_cell_is_visible (dao, 2,3)) {
2300 expr_count_2_adj = make_cellref (1, -3);
2301 } else
2302 expr_count_2_adj = gnm_expr_copy (expr_count_2);
2303
2304 expr_a = gnm_expr_new_binary (expr_var_1,
2305 GNM_EXPR_OP_DIV,
2306 gnm_expr_copy (expr_count_1));
2307 expr_b = gnm_expr_new_binary (expr_var_2_adj,
2308 GNM_EXPR_OP_DIV,
2309 gnm_expr_copy (expr_count_2_adj));
2310
2311 dao_set_cell_expr (dao, 1, 6,
2312 gnm_expr_new_binary (
2313 gnm_expr_new_binary
2314 (gnm_expr_new_binary
2315 (gnm_expr_copy (expr_a),
2316 GNM_EXPR_OP_ADD,
2317 gnm_expr_copy (expr_b)),
2318 GNM_EXPR_OP_EXP,
2319 gnm_expr_copy (expr_two)),
2320 GNM_EXPR_OP_DIV,
2321 gnm_expr_new_binary
2322 (gnm_expr_new_binary
2323 (gnm_expr_new_binary
2324 (expr_a,
2325 GNM_EXPR_OP_EXP,
2326 gnm_expr_copy (expr_two)),
2327 GNM_EXPR_OP_DIV,
2328 gnm_expr_new_binary
2329 (expr_count_1,
2330 GNM_EXPR_OP_SUB,
2331 gnm_expr_copy (expr_one))),
2332 GNM_EXPR_OP_ADD,
2333 gnm_expr_new_binary
2334 (gnm_expr_new_binary
2335 (expr_b,
2336 GNM_EXPR_OP_EXP,
2337 expr_two),
2338 GNM_EXPR_OP_DIV,
2339 gnm_expr_new_binary
2340 (expr_count_2_adj,
2341 GNM_EXPR_OP_SUB,
2342 expr_one)))));
2343 }
2344
2345 /* t */
2346
2347 {
2348 GnmExpr const *expr_var_1 = make_cellref (0, -5);
2349 GnmExpr const *expr_count_1 = make_cellref (0, -4);
2350 GnmExpr const *expr_a;
2351 GnmExpr const *expr_b;
2352 GnmExpr const *expr_var_2_adj;
2353 GnmExpr const *expr_count_2_adj;
2354
2355 if (dao_cell_is_visible (dao, 2,2)) {
2356 gnm_expr_free (expr_var_2);
2357 expr_var_2_adj = make_cellref (1, -5);
2358 } else
2359 expr_var_2_adj = expr_var_2;
2360 if (dao_cell_is_visible (dao, 2,3)) {
2361 gnm_expr_free (expr_count_2);
2362 expr_count_2_adj = make_cellref (1, -4);
2363 } else
2364 expr_count_2_adj = expr_count_2;
2365
2366 expr_a = gnm_expr_new_binary (expr_var_1, GNM_EXPR_OP_DIV,
2367 expr_count_1);
2368 expr_b = gnm_expr_new_binary (expr_var_2_adj, GNM_EXPR_OP_DIV,
2369 expr_count_2_adj);
2370
2371 dao_set_cell_expr (dao, 1, 7,
2372 gnm_expr_new_binary
2373 (gnm_expr_new_binary
2374 (make_cellref (0, -2),
2375 GNM_EXPR_OP_SUB,
2376 make_cellref (0, -3)),
2377 GNM_EXPR_OP_DIV,
2378 gnm_expr_new_binary
2379 (gnm_expr_new_binary
2380 (expr_a,
2381 GNM_EXPR_OP_ADD,
2382 expr_b),
2383 GNM_EXPR_OP_EXP,
2384 gnm_expr_new_constant
2385 (value_new_float (0.5)))));
2386
2387 }
2388
2389 /* P (T<=t) one-tail */
2390 /* I9: =tdist(abs(Sheet1!I8),Sheet1!I7,1) */
2391 dao_set_cell_expr
2392 (dao, 1, 8,
2393 gnm_expr_new_funcall3
2394 (fd_tdist,
2395 gnm_expr_new_funcall1 (fd_abs,
2396 make_cellref (0, -1)),
2397 make_cellref (0, -2),
2398 gnm_expr_new_constant (value_new_int (1))));
2399
2400 /* t Critical one-tail */
2401 /* H10 = tinv(2*alpha,Sheet1!H7) */
2402 dao_set_cell_expr
2403 (dao, 1, 9,
2404 gnm_expr_new_funcall2
2405 (fd_tinv,
2406 gnm_expr_new_binary
2407 (gnm_expr_new_constant (value_new_int (2)),
2408 GNM_EXPR_OP_MULT,
2409 gnm_expr_new_constant
2410 (value_new_float (info->base.alpha))),
2411 make_cellref (0, -3)));
2412
2413 /* P (T<=t) two-tail */
2414 /* I11: =tdist(abs(Sheet1!I8),Sheet1!I7,1) */
2415 dao_set_cell_expr
2416 (dao, 1, 10,
2417 gnm_expr_new_funcall3
2418 (fd_tdist,
2419 gnm_expr_new_funcall1 (fd_abs,
2420 make_cellref (0, -3)),
2421 make_cellref (0, -4),
2422 gnm_expr_new_constant (value_new_int (2))));
2423
2424 /* t Critical two-tail */
2425 dao_set_cell_expr
2426 (dao, 1, 11,
2427 gnm_expr_new_funcall2
2428 (fd_tinv,
2429 gnm_expr_new_constant
2430 (value_new_float (info->base.alpha)),
2431 make_cellref (0, -5)));
2432
2433 /* And finish up */
2434
2435 gnm_func_dec_usage (fd_mean);
2436 gnm_func_dec_usage (fd_var);
2437 gnm_func_dec_usage (fd_count);
2438 gnm_func_dec_usage (fd_tdist);
2439 gnm_func_dec_usage (fd_abs);
2440 gnm_func_dec_usage (fd_tinv);
2441
2442 value_release (val_1);
2443 value_release (val_2);
2444
2445 dao_redraw_respan (dao);
2446 return FALSE;
2447 }
2448
2449 gboolean
analysis_tool_ttest_neqvar_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)2450 analysis_tool_ttest_neqvar_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
2451 analysis_tool_engine_t selector, gpointer result)
2452 {
2453 switch (selector) {
2454 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
2455 return (dao_command_descriptor (dao, _("t-Test (%s)"), result)
2456 == NULL);
2457 case TOOL_ENGINE_UPDATE_DAO:
2458 dao_adjust (dao, 3, 12);
2459 return FALSE;
2460 case TOOL_ENGINE_CLEAN_UP:
2461 return analysis_tool_generic_b_clean (specs);
2462 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
2463 return FALSE;
2464 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
2465 dao_prepare_output (NULL, dao, _("t-Test"));
2466 return FALSE;
2467 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
2468 return dao_format_output (dao, _("t-Test"));
2469 case TOOL_ENGINE_PERFORM_CALC:
2470 default:
2471 return analysis_tool_ttest_neqvar_engine_run (dao, specs);
2472 }
2473 return TRUE; /* We shouldn't get here */
2474 }
2475
2476
2477 /************* F-Test Tool *********************************************
2478 *
2479 * The results are given in a table which can be printed out in a new
2480 * sheet, in a new workbook, or simply into an existing sheet.
2481 *
2482 **/
2483
2484
2485 /* F-Test: Two-Sample for Variances
2486 */
2487 static gboolean
analysis_tool_ftest_engine_run(data_analysis_output_t * dao,analysis_tools_data_generic_b_t * info)2488 analysis_tool_ftest_engine_run (data_analysis_output_t *dao,
2489 analysis_tools_data_generic_b_t *info)
2490 {
2491 GnmValue *val_1 = value_dup (info->range_1);
2492 GnmValue *val_2 = value_dup (info->range_2);
2493 GnmExpr const *expr;
2494 GnmExpr const *expr_var_denum;
2495 GnmExpr const *expr_count_denum;
2496 GnmExpr const *expr_df_denum = NULL;
2497
2498 GnmFunc *fd_finv;
2499
2500 fd_finv = gnm_func_lookup_or_add_placeholder ("FINV");
2501 gnm_func_inc_usage (fd_finv);
2502
2503 dao_set_italic (dao, 0, 0, 0, 11);
2504 dao_set_cell (dao, 0, 0, _("F-Test"));
2505 set_cell_text_col (dao, 0, 1, _("/Mean"
2506 "/Variance"
2507 "/Observations"
2508 "/df"
2509 "/F"
2510 "/P (F<=f) right-tail"
2511 "/F Critical right-tail"
2512 "/P (f<=F) left-tail"
2513 "/F Critical left-tail"
2514 "/P two-tail"
2515 "/F Critical two-tail"));
2516
2517 /* Label */
2518 dao_set_italic (dao, 0, 0, 2, 0);
2519 analysis_tools_write_label_ftest (val_1, dao, 1, 0, info->labels, 1);
2520 analysis_tools_write_label_ftest (val_2, dao, 2, 0, info->labels, 2);
2521
2522 /* Mean */
2523 {
2524 GnmFunc *fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
2525 gnm_func_inc_usage (fd_mean);
2526
2527 dao_set_cell_expr
2528 (dao, 1, 1,
2529 gnm_expr_new_funcall1
2530 (fd_mean,
2531 gnm_expr_new_constant (value_dup (val_1))));
2532
2533 dao_set_cell_expr
2534 (dao, 2, 1,
2535 gnm_expr_new_funcall1
2536 (fd_mean,
2537 gnm_expr_new_constant (value_dup (val_2))));
2538
2539 gnm_func_dec_usage (fd_mean);
2540 }
2541
2542 /* Variance */
2543 {
2544 GnmFunc *fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
2545 gnm_func_inc_usage (fd_var);
2546
2547 dao_set_cell_expr
2548 (dao, 1, 2,
2549 gnm_expr_new_funcall1
2550 (fd_var,
2551 gnm_expr_new_constant (value_dup (val_1))));
2552
2553 expr_var_denum = gnm_expr_new_funcall1
2554 (fd_var,
2555 gnm_expr_new_constant (value_dup (val_2)));
2556 dao_set_cell_expr (dao, 2, 2, gnm_expr_copy (expr_var_denum));
2557
2558 gnm_func_dec_usage (fd_var);
2559 }
2560
2561 /* Count */
2562 {
2563 GnmFunc *fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
2564 gnm_func_inc_usage (fd_count);
2565
2566 dao_set_cell_expr
2567 (dao, 1, 3,
2568 gnm_expr_new_funcall1
2569 (fd_count,
2570 gnm_expr_new_constant (value_dup (val_1))));
2571
2572 expr_count_denum = gnm_expr_new_funcall1
2573 (fd_count,
2574 gnm_expr_new_constant (value_dup (val_2)));
2575 dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_denum));
2576
2577 gnm_func_dec_usage (fd_count);
2578 }
2579
2580 /* df */
2581 {
2582 expr = gnm_expr_new_binary
2583 (make_cellref (0, -1),
2584 GNM_EXPR_OP_SUB,
2585 gnm_expr_new_constant (value_new_int (1)));
2586 dao_set_cell_expr (dao, 1, 4, gnm_expr_copy (expr));
2587 dao_set_cell_expr (dao, 2, 4, expr);
2588 }
2589
2590 /* F value */
2591 if (dao_cell_is_visible (dao, 2, 2)) {
2592 expr = gnm_expr_new_binary
2593 (make_cellref (0, -3),
2594 GNM_EXPR_OP_DIV,
2595 make_cellref (1, -3));
2596 gnm_expr_free (expr_var_denum);
2597 } else {
2598 expr = gnm_expr_new_binary
2599 (make_cellref (0, -3),
2600 GNM_EXPR_OP_DIV,
2601 expr_var_denum);
2602 }
2603 dao_set_cell_expr (dao, 1, 5, expr);
2604
2605 /* P right-tail */
2606 {
2607 GnmFunc *fd_fdist = gnm_func_lookup_or_add_placeholder ("FDIST");
2608 const GnmExpr *arg3;
2609
2610 gnm_func_inc_usage (fd_fdist);
2611
2612 if (dao_cell_is_visible (dao, 2, 2)) {
2613 arg3 = make_cellref (1, -2);
2614 gnm_expr_free (expr_count_denum);
2615 } else {
2616 expr_df_denum = gnm_expr_new_binary
2617 (expr_count_denum,
2618 GNM_EXPR_OP_SUB,
2619 gnm_expr_new_constant (value_new_int (1)));
2620 arg3 = gnm_expr_copy (expr_df_denum);
2621 }
2622
2623 dao_set_cell_expr
2624 (dao, 1, 6,
2625 gnm_expr_new_funcall3
2626 (fd_fdist,
2627 make_cellref (0, -1),
2628 make_cellref (0, -2),
2629 arg3));
2630
2631 gnm_func_dec_usage (fd_fdist);
2632 }
2633
2634 /* F critical right-tail */
2635 {
2636 const GnmExpr *arg3;
2637
2638 if (expr_df_denum == NULL) {
2639 arg3 = make_cellref (1, -3);
2640 } else {
2641 arg3 = gnm_expr_copy (expr_df_denum);
2642 }
2643
2644 dao_set_cell_expr
2645 (dao, 1, 7,
2646 gnm_expr_new_funcall3
2647 (fd_finv,
2648 gnm_expr_new_constant (value_new_float (info->alpha)),
2649 make_cellref (0, -3),
2650 arg3));
2651 }
2652
2653 /* P left-tail */
2654 dao_set_cell_expr (dao, 1, 8,
2655 gnm_expr_new_binary
2656 (gnm_expr_new_constant (value_new_int (1)),
2657 GNM_EXPR_OP_SUB,
2658 make_cellref (0, -2)));
2659
2660 /* F critical left-tail */
2661 {
2662 const GnmExpr *arg3;
2663
2664 if (expr_df_denum == NULL) {
2665 arg3 = make_cellref (1, -5);
2666 } else {
2667 arg3 = gnm_expr_copy (expr_df_denum);
2668 }
2669
2670 dao_set_cell_expr
2671 (dao, 1, 9,
2672 gnm_expr_new_funcall3
2673 (fd_finv,
2674 gnm_expr_new_constant
2675 (value_new_float (1. - info->alpha)),
2676 make_cellref (0, -5),
2677 arg3));
2678 }
2679
2680 /* P two-tail */
2681 {
2682 GnmFunc *fd_min = gnm_func_lookup_or_add_placeholder ("MIN");
2683
2684 gnm_func_inc_usage (fd_min);
2685
2686 dao_set_cell_expr
2687 (dao, 1, 10,
2688 gnm_expr_new_binary
2689 (gnm_expr_new_constant (value_new_int (2)),
2690 GNM_EXPR_OP_MULT,
2691 gnm_expr_new_funcall2
2692 (fd_min,
2693 make_cellref (0, -4),
2694 make_cellref (0, -2))));
2695 gnm_func_dec_usage (fd_min);
2696 }
2697
2698 /* F critical two-tail (left) */
2699 {
2700 const GnmExpr *arg3;
2701
2702 if (expr_df_denum == NULL) {
2703 arg3 = make_cellref (1, -7);
2704 } else {
2705 arg3 = expr_df_denum;
2706 }
2707
2708 dao_set_cell_expr
2709 (dao, 1, 11,
2710 gnm_expr_new_funcall3
2711 (fd_finv,
2712 gnm_expr_new_constant
2713 (value_new_float (1 - info->alpha / 2.)),
2714 make_cellref (0, -7),
2715 arg3));
2716 }
2717
2718 /* F critical two-tail (right) */
2719 dao_set_cell_expr
2720 (dao, 2, 11,
2721 gnm_expr_new_funcall3
2722 (fd_finv,
2723 gnm_expr_new_constant
2724 (value_new_float (info->alpha / 2.)),
2725 make_cellref (-1, -7),
2726 make_cellref (0, -7)));
2727
2728 value_release (val_1);
2729 value_release (val_2);
2730
2731 gnm_func_dec_usage (fd_finv);
2732
2733 dao_redraw_respan (dao);
2734 return FALSE;
2735 }
2736
2737 gboolean
analysis_tool_ftest_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)2738 analysis_tool_ftest_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
2739 analysis_tool_engine_t selector, gpointer result)
2740 {
2741 switch (selector) {
2742 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
2743 return (dao_command_descriptor (dao, _("F-Test (%s)"), result)
2744 == NULL);
2745 case TOOL_ENGINE_UPDATE_DAO:
2746 dao_adjust (dao, 3, 12);
2747 return FALSE;
2748 case TOOL_ENGINE_CLEAN_UP:
2749 return analysis_tool_generic_b_clean (specs);
2750 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
2751 return FALSE;
2752 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
2753 dao_prepare_output (NULL, dao, _("F-Test"));
2754 return FALSE;
2755 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
2756 return dao_format_output (dao, _("F-Test"));
2757 case TOOL_ENGINE_PERFORM_CALC:
2758 default:
2759 return analysis_tool_ftest_engine_run (dao, specs);
2760 }
2761 return TRUE; /* We shouldn't get here */
2762 }
2763
2764
2765
2766 /************* Regression Tool *********************************************
2767 *
2768 * The results are given in a table which can be printed out in a new
2769 * sheet, in a new workbook, or simply into an existing sheet.
2770 *
2771 * Excel Bug 1: (Andrew) I believe that the following is a bug in Excel: When
2772 * calculating the F-statistic in the no-intercept case, it will use xdim as
2773 * the numerator df and (n - xdim) as the denominator df, which is as it should
2774 * be. However, in the regression it will then calculate the significance of the
2775 * F-statistic using (n - #slope parameters - 1) as the denominator df, which
2776 * makes sense when you are calculating an intercept, but in this case you are not
2777 * and the df should be just (n - #slope parameters). Excel is inconsistent,
2778 * in that it does not use the same df to calculate the significance that it
2779 * does to calculate the F-stat itself. Inference on regressions
2780 * without intercepts don't really work anyway (because of the way the
2781 * statistics work, not the code), so this is not a terribly big deal, and
2782 * those who would actually use the significance of F are not likely to be
2783 * using interceptless regressions anyway. So while it is easy to mimic Excel
2784 * in this respect, currently we do not and chose what at least for now seems
2785 * to be more correct.
2786 *
2787 * Excel Bug 2: (Andrew) Also in the no-intercept case: Excel has some weird way of
2788 * calculating the adjusted R^2 value that makes absolutely no sense to me, so
2789 * I couldn't mimic it if I tried. Again, what statistical opinion I have found
2790 * suggests that if you're running interceptless regressions, you won't know what
2791 * to do with an adjusted R^2 anyway.
2792 *
2793 **/
2794
2795 static gint
calculate_xdim(GnmValue * input,group_by_t group_by)2796 calculate_xdim (GnmValue *input, group_by_t group_by)
2797 {
2798 GnmRange r;
2799
2800 g_return_val_if_fail (input != NULL, 0);
2801
2802 if (NULL == range_init_value (&r, input))
2803 return 0;
2804
2805 if (group_by == GROUPED_BY_ROW)
2806 return range_height (&r);
2807
2808 return range_width (&r);
2809 }
2810
2811 static gint
calculate_n_obs(GnmValue * input,group_by_t group_by)2812 calculate_n_obs (GnmValue *input, group_by_t group_by)
2813 {
2814 GnmRange r;
2815
2816 g_return_val_if_fail (input != NULL, 0);
2817
2818 if (NULL == range_init_value (&r, input))
2819 return 0;
2820
2821 if (group_by == GROUPED_BY_ROW)
2822 return range_width (&r);
2823
2824 return range_height (&r);
2825 }
2826
2827
2828 static gboolean
analysis_tool_regression_engine_run(data_analysis_output_t * dao,analysis_tools_data_regression_t * info)2829 analysis_tool_regression_engine_run (data_analysis_output_t *dao,
2830 analysis_tools_data_regression_t *info)
2831 {
2832 gint xdim = calculate_xdim (info->base.range_1, info->group_by);
2833 gint i;
2834
2835 GnmValue *val_1 = value_dup (info->base.range_1);
2836 GnmValue *val_2 = value_dup (info->base.range_2);
2837 GnmValue *val_1_cp = NULL;
2838 GnmValue *val_2_cp = NULL;
2839
2840 GnmExpr const *expr_x;
2841 GnmExpr const *expr_y;
2842 GnmExpr const *expr_linest;
2843 GnmExpr const *expr_intercept;
2844 GnmExpr const *expr_ms;
2845 GnmExpr const *expr_sum;
2846 GnmExpr const *expr_tstat;
2847 GnmExpr const *expr_pvalue;
2848 GnmExpr const *expr_n;
2849 GnmExpr const *expr_df;
2850 GnmExpr const *expr_lower;
2851 GnmExpr const *expr_upper;
2852 GnmExpr const *expr_confidence;
2853
2854 GnmFunc *fd_linest = analysis_tool_get_function ("LINEST", dao);
2855 GnmFunc *fd_index = analysis_tool_get_function ("INDEX", dao);
2856 GnmFunc *fd_fdist = analysis_tool_get_function ("FDIST", dao);
2857 GnmFunc *fd_sum = analysis_tool_get_function ("SUM", dao);
2858 GnmFunc *fd_sqrt = analysis_tool_get_function ("SQRT", dao);
2859 GnmFunc *fd_tdist = analysis_tool_get_function ("TDIST", dao);
2860 GnmFunc *fd_abs = analysis_tool_get_function ("ABS", dao);
2861 GnmFunc *fd_tinv = analysis_tool_get_function ("TINV", dao);
2862 GnmFunc *fd_transpose = analysis_tool_get_function ("TRANSPOSE", dao);
2863 GnmFunc *fd_concatenate = NULL;
2864 GnmFunc *fd_cell = NULL;
2865 GnmFunc *fd_offset = NULL;
2866 GnmFunc *fd_sumproduct = NULL;
2867 GnmFunc *fd_leverage = NULL;
2868
2869 char const *str = ((info->group_by == GROUPED_BY_ROW) ? "row" : "col");
2870 char const *label = ((info->group_by == GROUPED_BY_ROW) ? _("Row")
2871 : _("Column"));
2872
2873 if (!info->base.labels) {
2874 fd_concatenate = analysis_tool_get_function ("CONCATENATE",
2875 dao);
2876 fd_cell = analysis_tool_get_function ("CELL", dao);
2877 fd_offset = analysis_tool_get_function ("OFFSET", dao);
2878 }
2879 if (info->residual) {
2880 fd_sumproduct = analysis_tool_get_function ("SUMPRODUCT", dao);
2881 fd_leverage = analysis_tool_get_function ("LEVERAGE", dao);
2882 }
2883
2884 cb_adjust_areas (val_1, NULL);
2885 cb_adjust_areas (val_2, NULL);
2886
2887 dao_set_italic (dao, 0, 0, 0, 16 + xdim);
2888 set_cell_text_col (dao, 0, 0, _("/SUMMARY OUTPUT"
2889 "/"
2890 "/Regression Statistics"
2891 "/Multiple R"
2892 "/R^2"
2893 "/Standard Error"
2894 "/Adjusted R^2"
2895 "/Observations"
2896 "/"
2897 "/ANOVA"
2898 "/"
2899 "/Regression"
2900 "/Residual"
2901 "/Total"
2902 "/"
2903 "/"
2904 "/Intercept"));
2905 dao_set_merge (dao, 0, 0, 1, 0);
2906 dao_set_italic (dao, 2, 0, 3, 0);
2907 dao_set_cell (dao, 2, 0, _("Response Variable"));
2908 dao_set_merge (dao, 0, 2, 1, 2);
2909
2910 if (info->base.labels) {
2911
2912 dao_set_cell_expr (dao, 3, 0,
2913 gnm_expr_new_funcall1 (fd_index, gnm_expr_new_constant (value_dup (val_2))));
2914
2915 val_1_cp = value_dup (val_1);
2916 val_2_cp = value_dup (val_2);
2917 if (info->group_by == GROUPED_BY_ROW) {
2918 val_1->v_range.cell.a.col++;
2919 val_2->v_range.cell.a.col++;
2920 val_1_cp->v_range.cell.b.col = val_1_cp->v_range.cell.a.col;
2921 dao_set_array_expr (dao, 0, 17, 1, xdim, gnm_expr_new_constant
2922 (value_dup (val_1_cp)));
2923 } else {
2924 val_1->v_range.cell.a.row++;
2925 val_2->v_range.cell.a.row++;
2926 val_1_cp->v_range.cell.b.row = val_1_cp->v_range.cell.a.row;
2927 dao_set_array_expr (dao, 0, 17, 1, xdim, gnm_expr_new_funcall1
2928 (fd_transpose,
2929 gnm_expr_new_constant (value_dup (val_1_cp))));
2930 }
2931 } else {
2932 dao_set_cell_expr (dao, 3, 0, gnm_expr_new_funcall3
2933 (fd_concatenate, gnm_expr_new_constant (value_new_string (label)),
2934 gnm_expr_new_constant (value_new_string (" ")),
2935 gnm_expr_new_funcall2 (fd_cell,
2936 gnm_expr_new_constant (value_new_string (str)),
2937 gnm_expr_new_constant (value_dup (val_2)))));
2938 }
2939
2940 dao_set_italic (dao, 1, 10, 5, 10);
2941 set_cell_text_row (dao, 1, 10, _("/df"
2942 "/SS"
2943 "/MS"
2944 "/F"
2945 "/Significance of F"));
2946
2947 dao_set_italic (dao, 1, 15, 6, 15);
2948 set_cell_text_row (dao, 1, 15, _("/Coefficients"
2949 "/Standard Error"
2950 "/t-Statistics"
2951 "/p-Value"));
2952
2953 /* xgettext: this is an Excel-style number format. Use "..." quotes and do not translate the 0% */
2954 dao_set_format (dao, 5, 15, 5, 15, _("\"Lower\" 0%"));
2955 /* xgettext: this is an Excel-style number format. Use "..." quotes and do not translate the 0% */
2956 dao_set_format (dao, 6, 15, 6, 15, _("\"Upper\" 0%"));
2957 dao_set_align (dao, 5, 15, 5, 15, GNM_HALIGN_LEFT, GNM_VALIGN_TOP);
2958 dao_set_align (dao, 6, 15, 6, 15, GNM_HALIGN_RIGHT, GNM_VALIGN_TOP);
2959
2960 dao_set_cell_float (dao, 5, 15, 1.0 - info->base.alpha);
2961 dao_set_cell_expr (dao, 6, 15, make_cellref (-1, 0));
2962 expr_confidence = dao_get_cellref (dao, 5, 15);
2963
2964 dao_set_cell_comment (dao, 4, 15,
2965 _("Probability of observing a t-statistic\n"
2966 "whose absolute value is at least as large\n"
2967 "as the absolute value of the actually\n"
2968 "observed t-statistic, assuming the null\n"
2969 "hypothesis is in fact true."));
2970 if (!info->intercept)
2971 dao_set_cell_comment (dao, 0, 4,
2972 _("This value is not the square of R\n"
2973 "but the uncentered version of the\n"
2974 "coefficient of determination; that\n"
2975 "is, the proportion of the sum of\n"
2976 "squares explained by the model."));
2977
2978 expr_x = gnm_expr_new_constant (value_dup (val_1));
2979 expr_y = gnm_expr_new_constant (value_dup (val_2));
2980
2981 expr_intercept = gnm_expr_new_constant (value_new_bool (info->intercept));
2982
2983 expr_linest = gnm_expr_new_funcall4 (fd_linest,
2984 expr_y,
2985 expr_x,
2986 expr_intercept,
2987 gnm_expr_new_constant (value_new_bool (TRUE)));
2988
2989
2990 /* Multiple R */
2991 if (info->intercept) {
2992 if (dao_cell_is_visible (dao, 1, 4))
2993 dao_set_cell_expr (dao, 1, 3, gnm_expr_new_funcall1 (fd_sqrt, make_cellref (0, 1)));
2994 else
2995 dao_set_cell_expr (dao, 1, 3,
2996 gnm_expr_new_funcall1 (fd_sqrt, gnm_expr_new_funcall3
2997 (fd_index,
2998 gnm_expr_copy (expr_linest),
2999 gnm_expr_new_constant (value_new_int (3)),
3000 gnm_expr_new_constant (value_new_int (1)))));
3001 } else
3002 dao_set_cell_expr (dao, 1, 3,
3003 gnm_expr_new_funcall1 (fd_sqrt, gnm_expr_new_funcall3
3004 (fd_index,
3005 gnm_expr_new_funcall4
3006 (fd_linest,
3007 gnm_expr_new_constant (value_dup (val_2)),
3008 gnm_expr_new_constant (value_dup (val_1)),
3009 gnm_expr_new_constant (value_new_bool (TRUE)),
3010 gnm_expr_new_constant (value_new_bool (TRUE))),
3011 gnm_expr_new_constant (value_new_int (3)),
3012 gnm_expr_new_constant (value_new_int (1)))));
3013
3014
3015 /* R Square */
3016 dao_set_cell_array_expr (dao, 1, 4,
3017 gnm_expr_new_funcall3 (fd_index,
3018 gnm_expr_copy (expr_linest),
3019 gnm_expr_new_constant (value_new_int (3)),
3020 gnm_expr_new_constant (value_new_int (1))));
3021
3022 /* Standard Error */
3023 dao_set_cell_array_expr (dao, 1, 5,
3024 gnm_expr_new_funcall3 (fd_index,
3025 gnm_expr_copy (expr_linest),
3026 gnm_expr_new_constant (value_new_int (3)),
3027 gnm_expr_new_constant (value_new_int (2))));
3028
3029 /* Adjusted R Square */
3030 if (dao_cell_is_visible (dao, 1, 7))
3031 expr_n = make_cellref (0, 1);
3032 else
3033 expr_n = gnm_expr_new_funcall3 (fd_sum,
3034 gnm_expr_new_constant (value_new_int (xdim)),
3035 gnm_expr_new_funcall3 (fd_index,
3036 gnm_expr_copy (expr_linest),
3037 gnm_expr_new_constant (value_new_int (4)),
3038 gnm_expr_new_constant (value_new_int (2))),
3039 gnm_expr_new_constant (value_new_int (1)));
3040
3041 dao_set_cell_expr (dao, 1, 6, gnm_expr_new_binary
3042 (gnm_expr_new_constant (value_new_int (1)),
3043 GNM_EXPR_OP_SUB,
3044 gnm_expr_new_binary
3045 (gnm_expr_new_binary
3046 (gnm_expr_new_binary
3047 (gnm_expr_copy (expr_n),
3048 GNM_EXPR_OP_SUB,
3049 gnm_expr_new_constant (value_new_int (1))),
3050 GNM_EXPR_OP_DIV,
3051 gnm_expr_new_binary
3052 (expr_n,
3053 GNM_EXPR_OP_SUB,
3054 gnm_expr_new_constant (value_new_int (xdim + (info->intercept?1:0))))),
3055 GNM_EXPR_OP_MULT,
3056 gnm_expr_new_binary
3057 (gnm_expr_new_constant (value_new_int (1)),
3058 GNM_EXPR_OP_SUB,
3059 make_cellref (0, -2)))));
3060
3061 /* Observations */
3062
3063 if (dao_cell_is_visible (dao, 1, 13))
3064 dao_set_cell_expr (dao, 1, 7,
3065 gnm_expr_new_funcall2 (fd_sum,
3066 make_cellref (0, 6),
3067 gnm_expr_new_constant (value_new_int (info->intercept?1:0))));
3068 else if (dao_cell_is_visible (dao, 1, 12))
3069 dao_set_cell_expr (dao, 1, 7,
3070 gnm_expr_new_funcall3 (fd_sum,
3071 make_cellref (0, 4),
3072 make_cellref (0, 5),
3073 gnm_expr_new_constant (value_new_int (info->intercept?1:0))));
3074 else
3075 dao_set_cell_expr (dao, 1, 7,
3076 gnm_expr_new_funcall3 (fd_sum,
3077 gnm_expr_new_constant (value_new_int (xdim)),
3078 gnm_expr_new_funcall3 (fd_index,
3079 gnm_expr_copy (expr_linest),
3080 gnm_expr_new_constant (value_new_int (4)),
3081 gnm_expr_new_constant (value_new_int (2))),
3082 gnm_expr_new_constant (value_new_int (info->intercept?1:0))));
3083
3084
3085
3086 /* Regression / df */
3087
3088 dao_set_cell_int (dao, 1, 11, xdim);
3089
3090 /* Residual / df */
3091 dao_set_cell_array_expr (dao, 1, 12,
3092 gnm_expr_new_funcall3 (fd_index,
3093 gnm_expr_copy (expr_linest),
3094 gnm_expr_new_constant (value_new_int (4)),
3095 gnm_expr_new_constant (value_new_int (2))));
3096
3097
3098 /* Total / df */
3099 expr_sum = gnm_expr_new_binary (make_cellref (0, -2),
3100 GNM_EXPR_OP_ADD,
3101 make_cellref (0, -1));
3102 dao_set_cell_expr (dao, 1, 13, gnm_expr_copy (expr_sum));
3103
3104 /* Regression / SS */
3105 dao_set_cell_array_expr (dao, 2, 11,
3106 gnm_expr_new_funcall3 (fd_index,
3107 gnm_expr_copy (expr_linest),
3108 gnm_expr_new_constant (value_new_int (5)),
3109 gnm_expr_new_constant (value_new_int (1))));
3110
3111 /* Residual / SS */
3112 dao_set_cell_array_expr (dao, 2, 12,
3113 gnm_expr_new_funcall3 (fd_index,
3114 gnm_expr_copy (expr_linest),
3115 gnm_expr_new_constant (value_new_int (5)),
3116 gnm_expr_new_constant (value_new_int (2))));
3117
3118
3119 /* Total / SS */
3120 dao_set_cell_expr (dao, 2, 13, expr_sum);
3121
3122
3123 /* Regression / MS */
3124 expr_ms = gnm_expr_new_binary (make_cellref (-1, 0),
3125 GNM_EXPR_OP_DIV,
3126 make_cellref (-2, 0));
3127 dao_set_cell_expr (dao, 3, 11, gnm_expr_copy (expr_ms));
3128
3129 /* Residual / MS */
3130 dao_set_cell_expr (dao, 3, 12, expr_ms);
3131
3132
3133 /* F */
3134 dao_set_cell_array_expr (dao, 4, 11,
3135 gnm_expr_new_funcall3 (fd_index,
3136 gnm_expr_copy (expr_linest),
3137 gnm_expr_new_constant (value_new_int (4)),
3138 gnm_expr_new_constant (value_new_int (1))));
3139
3140 /* Significance of F */
3141
3142 if (dao_cell_is_visible (dao, 1, 12))
3143 dao_set_cell_expr (dao, 5, 11, gnm_expr_new_funcall3 (fd_fdist,
3144 make_cellref (-1, 0),
3145 make_cellref (-4, 0),
3146 make_cellref (-4, 1)));
3147 else
3148 dao_set_cell_expr (dao, 5, 11, gnm_expr_new_funcall3 (fd_fdist,
3149 make_cellref (-1, 0),
3150 make_cellref (-4, 0),
3151 gnm_expr_new_funcall3
3152 (fd_index,
3153 gnm_expr_copy (expr_linest),
3154 gnm_expr_new_constant (value_new_int (4)),
3155 gnm_expr_new_constant (value_new_int (2)))));
3156
3157
3158 /* Intercept */
3159
3160
3161 expr_tstat = gnm_expr_new_binary (make_cellref (-2, 0),
3162 GNM_EXPR_OP_DIV,
3163 make_cellref (-1, 0));
3164 expr_df = dao_get_cellref (dao, 1, 12);
3165 expr_pvalue = gnm_expr_new_funcall3 (fd_tdist, gnm_expr_new_funcall1 (fd_abs, make_cellref (-1, 0)),
3166 gnm_expr_copy (expr_df),
3167 gnm_expr_new_constant (value_new_int (2)));
3168 expr_lower = gnm_expr_new_binary (make_cellref (-4, 0),
3169 GNM_EXPR_OP_SUB,
3170 gnm_expr_new_binary (make_cellref (-3, 0),
3171 GNM_EXPR_OP_MULT,
3172 gnm_expr_new_funcall2
3173 (fd_tinv,
3174 gnm_expr_new_binary
3175 (gnm_expr_new_constant (value_new_float (1.0)),
3176 GNM_EXPR_OP_SUB,
3177 gnm_expr_copy (expr_confidence)),
3178 gnm_expr_copy (expr_df))));
3179 expr_upper = gnm_expr_new_binary (make_cellref (-5, 0),
3180 GNM_EXPR_OP_ADD,
3181 gnm_expr_new_binary (make_cellref (-4, 0),
3182 GNM_EXPR_OP_MULT,
3183 gnm_expr_new_funcall2
3184 (fd_tinv,
3185 gnm_expr_new_binary
3186 (gnm_expr_new_constant (value_new_float (1.0)),
3187 GNM_EXPR_OP_SUB,
3188 expr_confidence),
3189 expr_df)));
3190
3191
3192 /* Intercept */
3193
3194 if (!info->intercept) {
3195 dao_set_cell_int (dao, 1, 16, 0);
3196 for (i = 2; i <= 6; i++)
3197 dao_set_cell_na (dao, i, 16);
3198 } else {
3199 dao_set_cell_array_expr (dao, 1, 16,
3200 gnm_expr_new_funcall3
3201 (fd_index,
3202 gnm_expr_copy (expr_linest),
3203 gnm_expr_new_constant (value_new_int (1)),
3204 gnm_expr_new_constant (value_new_int (xdim+1))));
3205 dao_set_cell_array_expr (dao, 2, 16,
3206 gnm_expr_new_funcall3
3207 (fd_index,
3208 gnm_expr_copy (expr_linest),
3209 gnm_expr_new_constant (value_new_int (2)),
3210 gnm_expr_new_constant (value_new_int (xdim+1))));
3211 dao_set_cell_expr (dao, 3, 16, gnm_expr_copy (expr_tstat));
3212 dao_set_cell_expr (dao, 4, 16, gnm_expr_copy (expr_pvalue));
3213 dao_set_cell_expr (dao, 5, 16, gnm_expr_copy (expr_lower));
3214 dao_set_cell_expr (dao, 6, 16, gnm_expr_copy (expr_upper));
3215 }
3216
3217 /* Coefficients */
3218
3219 dao->offset_row += 17;
3220
3221 for (i = 0; i < xdim; i++) {
3222 if (!info->base.labels) {
3223 GnmExpr const *expr_offset;
3224
3225 if (info->group_by == GROUPED_BY_ROW)
3226 expr_offset = gnm_expr_new_funcall3
3227 (fd_offset, gnm_expr_new_constant (value_dup (val_1)),
3228 gnm_expr_new_constant (value_new_int (i)),
3229 gnm_expr_new_constant (value_new_int (0)));
3230 else
3231 expr_offset = gnm_expr_new_funcall3
3232 (fd_offset, gnm_expr_new_constant (value_dup (val_1)),
3233 gnm_expr_new_constant (value_new_int (0)),
3234 gnm_expr_new_constant (value_new_int (i)));
3235
3236 dao_set_cell_expr (dao, 0, i, gnm_expr_new_funcall3
3237 (fd_concatenate, gnm_expr_new_constant (value_new_string (label)),
3238 gnm_expr_new_constant (value_new_string (" ")),
3239 gnm_expr_new_funcall2
3240 (fd_cell,
3241 gnm_expr_new_constant (value_new_string (str)),
3242 expr_offset)));
3243 }
3244
3245 dao_set_cell_array_expr (dao, 1, i,
3246 gnm_expr_new_funcall3
3247 (fd_index,
3248 gnm_expr_copy (expr_linest),
3249 gnm_expr_new_constant (value_new_int (1)),
3250 gnm_expr_new_constant (value_new_int (xdim - i))));
3251 dao_set_cell_array_expr (dao, 2, i,
3252 gnm_expr_new_funcall3
3253 (fd_index,
3254 gnm_expr_copy (expr_linest),
3255 gnm_expr_new_constant (value_new_int (2)),
3256 gnm_expr_new_constant (value_new_int (xdim - i))));
3257 dao_set_cell_expr (dao, 3, i, gnm_expr_copy (expr_tstat));
3258 dao_set_cell_expr (dao, 4, i, gnm_expr_copy (expr_pvalue));
3259 dao_set_cell_expr (dao, 5, i, gnm_expr_copy (expr_lower));
3260 dao_set_cell_expr (dao, 6, i, gnm_expr_copy (expr_upper));
3261 }
3262
3263
3264 gnm_expr_free (expr_linest);
3265 gnm_expr_free (expr_tstat);
3266 gnm_expr_free (expr_pvalue);
3267 gnm_expr_free (expr_lower);
3268 gnm_expr_free (expr_upper);
3269
3270 value_release (val_1_cp);
3271 value_release (val_2_cp);
3272
3273 if (info->residual) {
3274 gint n_obs = calculate_n_obs (val_1, info->group_by);
3275 GnmExpr const *expr_diff;
3276 GnmExpr const *expr_prediction;
3277
3278 dao->offset_row += xdim + 1;
3279 dao_set_italic (dao, 0, 0, xdim + 7, 0);
3280 dao_set_cell (dao, 0, 0, _("Constant"));
3281 dao_set_array_expr (dao, 1, 0, xdim, 1,
3282 gnm_expr_new_funcall1
3283 (fd_transpose,
3284 make_rangeref (-1, - xdim - 1, -1, -2)));
3285 set_cell_text_row (dao, xdim + 1, 0, _("/Prediction"
3286 "/"
3287 "/Residual"
3288 "/Leverages"
3289 "/Internally studentized"
3290 "/Externally studentized"
3291 "/p-Value"));
3292 dao_set_cell_expr (dao, xdim + 2, 0, make_cellref (1 - xdim, - 18 - xdim));
3293 if (info->group_by == GROUPED_BY_ROW) {
3294 dao_set_array_expr (dao, 1, 1, xdim, n_obs,
3295 gnm_expr_new_funcall1
3296 (fd_transpose,
3297 gnm_expr_new_constant (val_1)));
3298 dao_set_array_expr (dao, xdim + 2, 1, 1, n_obs,
3299 gnm_expr_new_funcall1
3300 (fd_transpose,
3301 gnm_expr_new_constant (val_2)));
3302 } else {
3303 dao_set_array_expr (dao, 1, 1, xdim, n_obs,
3304 gnm_expr_new_constant (val_1));
3305 dao_set_array_expr (dao, xdim + 2, 1, 1, n_obs,
3306 gnm_expr_new_constant (val_2));
3307 }
3308
3309 expr_prediction = gnm_expr_new_funcall2 (fd_sumproduct,
3310 dao_get_rangeref (dao, 1, - 2 - xdim, 1, - 2),
3311 gnm_expr_new_funcall1
3312 (fd_transpose, make_rangeref
3313 (-1 - xdim, 0, -1, 0)));
3314 expr_diff = gnm_expr_new_binary (make_cellref (-1, 0), GNM_EXPR_OP_SUB, make_cellref (-2, 0));
3315
3316 for (i = 0; i < n_obs; i++) {
3317 dao_set_cell_expr (dao, xdim + 1, i + 1, gnm_expr_copy (expr_prediction));
3318 dao_set_cell_expr (dao, xdim + 3, i + 1, gnm_expr_copy (expr_diff));
3319 dao_set_cell_expr (dao, 0, i + 1, gnm_expr_new_constant (value_new_int (1)));
3320 }
3321 gnm_expr_free (expr_diff);
3322 gnm_expr_free (expr_prediction);
3323
3324 if (dao_cell_is_visible (dao, xdim + 4, n_obs)) {
3325 GnmExpr const *expr_X = dao_get_rangeref (dao, info->intercept ? 0 : 1, 1, xdim, n_obs);
3326 GnmExpr const *expr_diagonal =
3327 gnm_expr_new_funcall1
3328 (fd_leverage, expr_X);
3329 GnmExpr const *expr_var =
3330 dao_get_cellref (dao, 3, - 6 - xdim);
3331 GnmExpr const *expr_int_stud =
3332 gnm_expr_new_binary
3333 (make_cellref (-2, 0),
3334 GNM_EXPR_OP_DIV,
3335 gnm_expr_new_funcall1
3336 (fd_sqrt,
3337 gnm_expr_new_binary
3338 (expr_var,
3339 GNM_EXPR_OP_MULT,
3340 gnm_expr_new_binary
3341 (gnm_expr_new_constant (value_new_int (1)),
3342 GNM_EXPR_OP_SUB,
3343 make_cellref (-1, 0)))));
3344 GnmExpr const *expr_ext_stud;
3345 GnmExpr const *expr_p_val_res;
3346
3347 expr_var = gnm_expr_new_binary
3348 (gnm_expr_new_binary
3349 (dao_get_cellref (dao, 2, - 6 - xdim),
3350 GNM_EXPR_OP_SUB,
3351 gnm_expr_new_binary
3352 (make_cellref (-3, 0),
3353 GNM_EXPR_OP_EXP,
3354 gnm_expr_new_constant (value_new_int (2)))),
3355 GNM_EXPR_OP_DIV,
3356 gnm_expr_new_binary
3357 (dao_get_cellref (dao, 1, - 6 - xdim),
3358 GNM_EXPR_OP_SUB,
3359 gnm_expr_new_constant (value_new_int (1))));
3360 expr_ext_stud = gnm_expr_new_binary
3361 (make_cellref (-3, 0),
3362 GNM_EXPR_OP_DIV,
3363 gnm_expr_new_funcall1
3364 (fd_sqrt,
3365 gnm_expr_new_binary
3366 (expr_var,
3367 GNM_EXPR_OP_MULT,
3368 gnm_expr_new_binary
3369 (gnm_expr_new_constant (value_new_int (1)),
3370 GNM_EXPR_OP_SUB,
3371 make_cellref (-2, 0)))));
3372 expr_p_val_res = gnm_expr_new_funcall3
3373 (fd_tdist,
3374 gnm_expr_new_funcall1
3375 (fd_abs,
3376 make_cellref (-1, 0)),
3377 gnm_expr_new_binary
3378 (dao_get_cellref (dao, 1, - 6 - xdim),
3379 GNM_EXPR_OP_SUB,
3380 gnm_expr_new_constant (value_new_int (1))),
3381 gnm_expr_new_constant (value_new_int (2)));
3382
3383 dao_set_array_expr (dao, xdim + 4, 1, 1, n_obs, expr_diagonal);
3384 dao_set_format (dao, xdim + 5, 1, xdim + 6, n_obs, "0.0000");
3385 dao_set_percent (dao, xdim + 7, 1, xdim + 7, n_obs);
3386 for (i = 0; i < n_obs; i++){
3387 dao_set_cell_expr (dao, xdim + 5, i + 1, gnm_expr_copy (expr_int_stud));
3388 dao_set_cell_expr (dao, xdim + 6, i + 1, gnm_expr_copy (expr_ext_stud));
3389 dao_set_cell_expr (dao, xdim + 7, i + 1, gnm_expr_copy (expr_p_val_res));
3390 }
3391 gnm_expr_free (expr_int_stud);
3392 gnm_expr_free (expr_ext_stud);
3393 gnm_expr_free (expr_p_val_res);
3394 }
3395 } else {
3396 value_release (val_1);
3397 value_release (val_2);
3398 }
3399
3400 gnm_func_dec_usage (fd_linest);
3401 gnm_func_dec_usage (fd_index);
3402 gnm_func_dec_usage (fd_fdist);
3403 gnm_func_dec_usage (fd_sum);
3404 gnm_func_dec_usage (fd_sqrt);
3405 gnm_func_dec_usage (fd_tdist);
3406 gnm_func_dec_usage (fd_abs);
3407 gnm_func_dec_usage (fd_tinv);
3408 gnm_func_dec_usage (fd_transpose);
3409 if (fd_concatenate != NULL)
3410 gnm_func_dec_usage (fd_concatenate);
3411 if (fd_cell != NULL)
3412 gnm_func_dec_usage (fd_cell);
3413 if (fd_offset != NULL)
3414 gnm_func_dec_usage (fd_offset);
3415 if (fd_sumproduct != NULL)
3416 gnm_func_dec_usage (fd_sumproduct);
3417 if (fd_leverage != NULL)
3418 gnm_func_dec_usage (fd_leverage);
3419
3420 dao_redraw_respan (dao);
3421
3422 return FALSE;
3423 }
3424
3425 static gboolean
analysis_tool_regression_simple_engine_run(data_analysis_output_t * dao,analysis_tools_data_regression_t * info)3426 analysis_tool_regression_simple_engine_run (data_analysis_output_t *dao,
3427 analysis_tools_data_regression_t *info)
3428 {
3429 GnmFunc *fd_linest = analysis_tool_get_function ("LINEST", dao);
3430 GnmFunc *fd_index = analysis_tool_get_function ("INDEX", dao);
3431 GnmFunc *fd_fdist = analysis_tool_get_function ("FDIST", dao);
3432 GnmFunc *fd_rows = analysis_tool_get_function ("ROWS", dao);
3433 GnmFunc *fd_columns = analysis_tool_get_function ("COLUMNS", dao);
3434
3435 GSList *inputdata;
3436 guint row;
3437
3438 GnmValue *val_dep = value_dup (info->base.range_2);
3439 GnmExpr const *expr_intercept
3440 = gnm_expr_new_constant (value_new_bool (info->intercept));
3441 GnmExpr const *expr_observ;
3442 GnmExpr const *expr_val_dep;
3443
3444 dao_set_italic (dao, 0, 0, 4, 0);
3445 dao_set_italic (dao, 0, 2, 5, 2);
3446 set_cell_text_row (dao, 0, 0, info->multiple_y ?
3447 _("/SUMMARY OUTPUT"
3448 "/"
3449 "/Independent Variable"
3450 "/"
3451 "/Observations") :
3452 _("/SUMMARY OUTPUT"
3453 "/"
3454 "/Response Variable"
3455 "/"
3456 "/Observations"));
3457 set_cell_text_row (dao, 0, 2, info->multiple_y ?
3458 _("/Response Variable"
3459 "/R^2"
3460 "/Slope"
3461 "/Intercept"
3462 "/F"
3463 "/Significance of F") :
3464 _("/Independent Variable"
3465 "/R^2"
3466 "/Slope"
3467 "/Intercept"
3468 "/F"
3469 "/Significance of F"));
3470 analysis_tools_write_a_label (val_dep, dao,
3471 info->base.labels, info->group_by,
3472 3, 0);
3473
3474 expr_val_dep = gnm_expr_new_constant (val_dep);
3475 dao_set_cell_expr (dao, 5, 0, gnm_expr_new_binary (gnm_expr_new_funcall1 (fd_rows, gnm_expr_copy (expr_val_dep)),
3476 GNM_EXPR_OP_MULT,
3477 gnm_expr_new_funcall1 (fd_columns, gnm_expr_copy (expr_val_dep))));
3478 expr_observ = dao_get_cellref (dao, 5, 0);
3479
3480 for (row = 3, inputdata = info->indep_vars; inputdata != NULL;
3481 inputdata = inputdata->next, row++) {
3482 GnmValue *val_indep = value_dup (inputdata->data);
3483 GnmExpr const *expr_linest;
3484
3485 dao_set_italic (dao, 0, row, 0, row);
3486 analysis_tools_write_a_label (val_indep, dao,
3487 info->base.labels, info->group_by,
3488 0, row);
3489 expr_linest = info->multiple_y ?
3490 gnm_expr_new_funcall4 (fd_linest,
3491 gnm_expr_new_constant (val_indep),
3492 gnm_expr_copy (expr_val_dep),
3493 gnm_expr_copy (expr_intercept),
3494 gnm_expr_new_constant (value_new_bool (TRUE))) :
3495 gnm_expr_new_funcall4 (fd_linest,
3496 gnm_expr_copy (expr_val_dep),
3497 gnm_expr_new_constant (val_indep),
3498 gnm_expr_copy (expr_intercept),
3499 gnm_expr_new_constant (value_new_bool (TRUE)));
3500 dao_set_cell_array_expr (dao, 1, row,
3501 gnm_expr_new_funcall3 (fd_index,
3502 gnm_expr_copy (expr_linest),
3503 gnm_expr_new_constant (value_new_int (3)),
3504 gnm_expr_new_constant (value_new_int (1))));
3505 dao_set_cell_array_expr (dao, 4, row,
3506 gnm_expr_new_funcall3 (fd_index,
3507 gnm_expr_copy (expr_linest),
3508 gnm_expr_new_constant (value_new_int (4)),
3509 gnm_expr_new_constant (value_new_int (1))));
3510 dao_set_array_expr (dao, 2, row, 2, 1, expr_linest);
3511
3512 dao_set_cell_expr (dao, 5, row, gnm_expr_new_funcall3
3513 (fd_fdist,
3514 make_cellref (-1, 0),
3515 gnm_expr_new_constant (value_new_int (1)),
3516 gnm_expr_new_binary (gnm_expr_copy (expr_observ),
3517 GNM_EXPR_OP_SUB,
3518 gnm_expr_new_constant (value_new_int (2)))));
3519
3520 }
3521
3522 gnm_expr_free (expr_intercept);
3523 gnm_expr_free (expr_observ);
3524 gnm_expr_free (expr_val_dep);
3525
3526 gnm_func_dec_usage (fd_fdist);
3527 gnm_func_dec_usage (fd_linest);
3528 gnm_func_dec_usage (fd_index);
3529 gnm_func_dec_usage (fd_rows);
3530 gnm_func_dec_usage (fd_columns);
3531
3532 dao_redraw_respan (dao);
3533
3534 return FALSE;
3535 }
3536
3537 gboolean
analysis_tool_regression_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)3538 analysis_tool_regression_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
3539 analysis_tool_engine_t selector, gpointer result)
3540 {
3541 analysis_tools_data_regression_t *info = specs;
3542
3543 switch (selector) {
3544 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
3545 return (dao_command_descriptor (dao, _("Regression (%s)"), result)
3546 == NULL);
3547 case TOOL_ENGINE_UPDATE_DAO:
3548 {
3549 gint xdim = calculate_xdim (info->base.range_1, info->group_by);
3550 gint cols, rows;
3551
3552 if (info->multiple_regression) {
3553 cols = 7;
3554 rows = 17 + xdim;
3555 info->indep_vars = NULL;
3556 if (info->residual) {
3557 gint residual_cols = xdim + 4;
3558 GnmValue *val = info->base.range_1;
3559
3560 rows += 2 + calculate_n_obs (val, info->group_by);
3561 residual_cols += 4;
3562 if (cols < residual_cols)
3563 cols = residual_cols;
3564 }
3565 } else {
3566 info->indep_vars = g_slist_prepend (NULL, info->base.range_1);
3567 info->base.range_1 = NULL;
3568 prepare_input_range (&info->indep_vars, info->group_by);
3569 cols = 6;
3570 rows = 3 + xdim;
3571 }
3572 dao_adjust (dao, cols, rows);
3573 return FALSE;
3574 }
3575 case TOOL_ENGINE_CLEAN_UP:
3576 range_list_destroy (info->indep_vars);
3577 info->indep_vars = NULL;
3578 return analysis_tool_generic_b_clean (specs);
3579
3580 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
3581 return FALSE;
3582 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
3583 dao_prepare_output (NULL, dao, _("Regression"));
3584 return FALSE;
3585 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
3586 return dao_format_output (dao, _("Regression"));
3587 case TOOL_ENGINE_PERFORM_CALC:
3588 default:
3589 if (info->multiple_regression)
3590 return analysis_tool_regression_engine_run (dao, specs);
3591 else
3592 return analysis_tool_regression_simple_engine_run (dao, specs);
3593 }
3594 return TRUE; /* We shouldn't get here */
3595 }
3596
3597
3598
3599 /************* Moving Average Tool *****************************************
3600 *
3601 * The moving average tool calculates moving averages of given data
3602 * set. The results are given in a table which can be printed out in
3603 * a new sheet, in a new workbook, or simply into an existing sheet.
3604 *
3605 **/
3606
3607 static GnmExpr const *
analysis_tool_moving_average_funcall5(GnmFunc * fd,GnmExpr const * ex,int y,int x,int dy,int dx)3608 analysis_tool_moving_average_funcall5 (GnmFunc *fd, GnmExpr const *ex, int y, int x, int dy, int dx)
3609 {
3610 GnmExprList *list;
3611 list = gnm_expr_list_prepend (NULL, gnm_expr_new_constant (value_new_int (dx)));
3612 list = gnm_expr_list_prepend (list, gnm_expr_new_constant (value_new_int (dy)));
3613 list = gnm_expr_list_prepend (list, gnm_expr_new_constant (value_new_int (x)));
3614 list = gnm_expr_list_prepend (list, gnm_expr_new_constant (value_new_int (y)));
3615 list = gnm_expr_list_prepend (list, gnm_expr_copy (ex));
3616
3617 return gnm_expr_new_funcall (fd, list);
3618 }
3619
3620 static GnmExpr const *
analysis_tool_moving_average_weighted_av(GnmFunc * fd_sum,GnmFunc * fd_in,GnmExpr const * ex,int y,int x,int dy,int dx,int * w)3621 analysis_tool_moving_average_weighted_av (GnmFunc *fd_sum, GnmFunc *fd_in, GnmExpr const *ex,
3622 int y, int x, int dy, int dx, int *w)
3623 {
3624 GnmExprList *list = NULL;
3625
3626 while (*w != 0) {
3627 list = gnm_expr_list_prepend
3628 (list, gnm_expr_new_binary
3629 (gnm_expr_new_constant (value_new_int (*w)),
3630 GNM_EXPR_OP_MULT,
3631 gnm_expr_new_funcall3 (fd_in, gnm_expr_copy (ex),
3632 gnm_expr_new_constant (value_new_int (y)),
3633 gnm_expr_new_constant (value_new_int (x)))));
3634 w++;
3635 x += dx;
3636 y += dy;
3637 }
3638
3639 return gnm_expr_new_funcall (fd_sum, list);
3640 }
3641
3642 static gboolean
analysis_tool_moving_average_engine_run(data_analysis_output_t * dao,analysis_tools_data_moving_average_t * info)3643 analysis_tool_moving_average_engine_run (data_analysis_output_t *dao,
3644 analysis_tools_data_moving_average_t *info)
3645 {
3646 GnmFunc *fd_index = NULL;
3647 GnmFunc *fd_average;
3648 GnmFunc *fd_offset;
3649 GnmFunc *fd_sqrt = NULL;
3650 GnmFunc *fd_sumxmy2 = NULL;
3651 GnmFunc *fd_sum = NULL;
3652 GSList *l;
3653 gint col = 0;
3654 gint source;
3655 SheetObject *so = NULL;
3656 GogPlot *plot = NULL;
3657
3658 if (info->base.labels || info->ma_type == moving_average_type_wma
3659 || info->ma_type== moving_average_type_spencer_ma) {
3660 fd_index = gnm_func_lookup_or_add_placeholder ("INDEX");
3661 gnm_func_inc_usage (fd_index);
3662 }
3663 if (info->std_error_flag) {
3664 fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
3665 gnm_func_inc_usage (fd_sqrt);
3666 fd_sumxmy2 = gnm_func_lookup_or_add_placeholder ("SUMXMY2");
3667 gnm_func_inc_usage (fd_sumxmy2);
3668 }
3669 if (moving_average_type_wma == info->ma_type || moving_average_type_spencer_ma == info->ma_type) {
3670 fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
3671 gnm_func_inc_usage (fd_sum);
3672 }
3673 fd_average = gnm_func_lookup_or_add_placeholder ("AVERAGE");
3674 gnm_func_inc_usage (fd_average);
3675 fd_offset = gnm_func_lookup_or_add_placeholder ("OFFSET");
3676 gnm_func_inc_usage (fd_offset);
3677
3678 if (info->show_graph) {
3679 GogGraph *graph;
3680 GogChart *chart;
3681
3682 graph = g_object_new (GOG_TYPE_GRAPH, NULL);
3683 chart = GOG_CHART (gog_object_add_by_name (GOG_OBJECT (graph), "Chart", NULL));
3684 plot = gog_plot_new_by_name ("GogLinePlot");
3685 gog_object_add_by_name (GOG_OBJECT (chart), "Plot", GOG_OBJECT (plot));
3686 so = sheet_object_graph_new (graph);
3687 g_object_unref (graph);
3688 }
3689
3690 for (l = info->base.input, source = 1; l; l = l->next, col++, source++) {
3691 GnmValue *val = value_dup ((GnmValue *)l->data);
3692 GnmValue *val_c = NULL;
3693 GnmExpr const *expr_title = NULL;
3694 GnmExpr const *expr_input = NULL;
3695 char const *format = NULL;
3696 gint height;
3697 gint x = 0;
3698 gint y = 0;
3699 gint *mover;
3700 guint *delta_mover;
3701 guint delta_x = 1;
3702 guint delta_y = 1;
3703 gint row, base;
3704 Sheet *sheet;
3705 GnmEvalPos ep;
3706
3707 eval_pos_init_sheet (&ep, val->v_range.cell.a.sheet);
3708
3709 if (info->base.labels) {
3710 val_c = value_dup (val);
3711 switch (info->base.group_by) {
3712 case GROUPED_BY_ROW:
3713 val->v_range.cell.a.col++;
3714 break;
3715 default:
3716 val->v_range.cell.a.row++;
3717 break;
3718 }
3719 expr_title = gnm_expr_new_funcall1 (fd_index,
3720 gnm_expr_new_constant (val_c));
3721
3722 dao_set_italic (dao, col, 0, col, 0);
3723 dao_set_cell_expr (dao, col, 0, expr_title);
3724 } else {
3725 switch (info->base.group_by) {
3726 case GROUPED_BY_ROW:
3727 format = _("Row %d");
3728 break;
3729 default:
3730 format = _("Column %d");
3731 break;
3732 }
3733 dao_set_cell_printf (dao, col, 0, format, source);
3734 }
3735
3736 switch (info->base.group_by) {
3737 case GROUPED_BY_ROW:
3738 height = value_area_get_width (val, &ep);
3739 mover = &x;
3740 delta_mover = &delta_x;
3741 break;
3742 default:
3743 height = value_area_get_height (val, &ep);
3744 mover = &y;
3745 delta_mover = &delta_y;
3746 break;
3747 }
3748
3749 sheet = val->v_range.cell.a.sheet;
3750 expr_input = gnm_expr_new_constant (val);
3751
3752 if (plot != NULL) {
3753 GogSeries *series;
3754
3755 series = gog_plot_new_series (plot);
3756 gog_series_set_dim (series, 1,
3757 gnm_go_data_vector_new_expr (sheet,
3758 gnm_expr_top_new (gnm_expr_copy (expr_input))),
3759 NULL);
3760
3761 series = gog_plot_new_series (plot);
3762 gog_series_set_dim (series, 1,
3763 dao_go_data_vector (dao, col, 1, col, height),
3764 NULL);
3765 }
3766
3767 switch (info->ma_type) {
3768 case moving_average_type_central_sma:
3769 {
3770 GnmExpr const *expr_offset_last = NULL;
3771 GnmExpr const *expr_offset = NULL;
3772 *delta_mover = info->interval;
3773 (*mover) = 1 - info->interval + info->offset;
3774 for (row = 1; row <= height; row++, (*mover)++) {
3775 expr_offset_last = expr_offset;
3776 expr_offset = NULL;
3777 if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3778 expr_offset = gnm_expr_new_funcall1
3779 (fd_average, analysis_tool_moving_average_funcall5
3780 (fd_offset,expr_input, y, x, delta_y, delta_x));
3781
3782 if (expr_offset_last == NULL)
3783 dao_set_cell_na (dao, col, row);
3784 else
3785 dao_set_cell_expr (dao, col, row,
3786 gnm_expr_new_funcall2 (fd_average, expr_offset_last,
3787 gnm_expr_copy (expr_offset)));
3788 } else {
3789 if (expr_offset_last != NULL) {
3790 gnm_expr_free (expr_offset_last);
3791 expr_offset_last = NULL;
3792 }
3793 dao_set_cell_na (dao, col, row);
3794 }
3795 }
3796 base = info->interval - info->offset;
3797 }
3798 break;
3799 case moving_average_type_cma:
3800 for (row = 1; row <= height; row++) {
3801 GnmExpr const *expr_offset;
3802
3803 *delta_mover = row;
3804
3805 expr_offset = analysis_tool_moving_average_funcall5
3806 (fd_offset, expr_input, y, x, delta_y, delta_x);
3807
3808 dao_set_cell_expr (dao, col, row,
3809 gnm_expr_new_funcall1 (fd_average, expr_offset));
3810 }
3811 base = 0;
3812 break;
3813 case moving_average_type_wma:
3814 {
3815 GnmExpr const *expr_divisor = gnm_expr_new_constant
3816 (value_new_int((info->interval * (info->interval + 1))/2));
3817 int *w = g_new (int, (info->interval + 1));
3818 int i;
3819
3820 for (i = 0; i < info->interval; i++)
3821 w[i] = i+1;
3822 w[info->interval] = 0;
3823
3824 delta_x = 0;
3825 delta_y= 0;
3826 (*delta_mover) = 1;
3827 (*mover) = 1 - info->interval;
3828 for (row = 1; row <= height; row++, (*mover)++) {
3829 if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3830 GnmExpr const *expr_sum;
3831
3832 expr_sum = analysis_tool_moving_average_weighted_av
3833 (fd_sum, fd_index, expr_input, y+1, x+1, delta_y, delta_x, w);
3834
3835 dao_set_cell_expr (dao, col, row,
3836 gnm_expr_new_binary
3837 (expr_sum,
3838 GNM_EXPR_OP_DIV,
3839 gnm_expr_copy (expr_divisor)));
3840 } else
3841 dao_set_cell_na (dao, col, row);
3842 }
3843 g_free (w);
3844 gnm_expr_free (expr_divisor);
3845 base = info->interval - 1;
3846 delta_x = 1;
3847 delta_y= 1;
3848 }
3849 break;
3850 case moving_average_type_spencer_ma:
3851 {
3852 GnmExpr const *expr_divisor = gnm_expr_new_constant
3853 (value_new_int(-3-6-5+3+21+45+67+74+67+46+21+3-5-6-3));
3854 int w[] = {-3, -6, -5, 3, 21, 45, 67, 74, 67, 46, 21, 3, -5, -6, -3, 0};
3855
3856 delta_x = 0;
3857 delta_y= 0;
3858 (*delta_mover) = 1;
3859 (*mover) = 1 - info->interval + info->offset;
3860 for (row = 1; row <= height; row++, (*mover)++) {
3861 if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3862 GnmExpr const *expr_sum;
3863
3864 expr_sum = analysis_tool_moving_average_weighted_av
3865 (fd_sum, fd_index, expr_input, y+1, x+1, delta_y, delta_x, w);
3866
3867 dao_set_cell_expr (dao, col, row,
3868 gnm_expr_new_binary
3869 (expr_sum,
3870 GNM_EXPR_OP_DIV,
3871 gnm_expr_copy (expr_divisor)));
3872 } else
3873 dao_set_cell_na (dao, col, row);
3874 }
3875 gnm_expr_free (expr_divisor);
3876 base = info->interval - info->offset - 1;
3877 delta_x = 1;
3878 delta_y= 1;
3879 }
3880 break;
3881 default:
3882 (*delta_mover) = info->interval;
3883 (*mover) = 1 - info->interval + info->offset;
3884 for (row = 1; row <= height; row++, (*mover)++) {
3885 if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3886 GnmExpr const *expr_offset;
3887
3888 expr_offset = analysis_tool_moving_average_funcall5
3889 (fd_offset, expr_input, y, x, delta_y, delta_x);
3890 dao_set_cell_expr (dao, col, row,
3891 gnm_expr_new_funcall1 (fd_average, expr_offset));
3892 } else
3893 dao_set_cell_na (dao, col, row);
3894 }
3895 base = info->interval - info->offset - 1;
3896 break;
3897 }
3898
3899 if (info->std_error_flag) {
3900 col++;
3901 dao_set_italic (dao, col, 0, col, 0);
3902 dao_set_cell (dao, col, 0, _("Standard Error"));
3903
3904 (*mover) = base;
3905 for (row = 1; row <= height; row++) {
3906 if (row > base && row <= height - info->offset && (row - base - info->df) > 0) {
3907 GnmExpr const *expr_offset;
3908
3909 if (info->base.group_by == GROUPED_BY_ROW)
3910 delta_x = row - base;
3911 else
3912 delta_y = row - base;
3913
3914 expr_offset = analysis_tool_moving_average_funcall5
3915 (fd_offset, expr_input, y, x, delta_y, delta_x);
3916 dao_set_cell_expr (dao, col, row,
3917 gnm_expr_new_funcall1
3918 (fd_sqrt,
3919 gnm_expr_new_binary
3920 (gnm_expr_new_funcall2
3921 (fd_sumxmy2,
3922 expr_offset,
3923 make_rangeref (-1, - row + base + 1, -1, 0)),
3924 GNM_EXPR_OP_DIV,
3925 gnm_expr_new_constant (value_new_int
3926 (row - base - info->df)))));
3927 } else
3928 dao_set_cell_na (dao, col, row);
3929 }
3930 }
3931
3932 gnm_expr_free (expr_input);
3933 }
3934
3935 if (so != NULL)
3936 dao_set_sheet_object (dao, 0, 1, so);
3937
3938 if (fd_index != NULL)
3939 gnm_func_dec_usage (fd_index);
3940 if (fd_sqrt != NULL)
3941 gnm_func_dec_usage (fd_sqrt);
3942 if (fd_sumxmy2 != NULL)
3943 gnm_func_dec_usage (fd_sumxmy2);
3944 if (fd_sum != NULL)
3945 gnm_func_dec_usage (fd_sum);
3946 gnm_func_dec_usage (fd_average);
3947 gnm_func_dec_usage (fd_offset);
3948
3949 dao_redraw_respan (dao);
3950
3951 return FALSE;
3952 }
3953
3954
3955 gboolean
analysis_tool_moving_average_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)3956 analysis_tool_moving_average_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
3957 analysis_tool_engine_t selector, gpointer result)
3958 {
3959 analysis_tools_data_moving_average_t *info = specs;
3960
3961 switch (selector) {
3962 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
3963 return (dao_command_descriptor (dao, _("Moving Average (%s)"), result)
3964 == NULL);
3965 case TOOL_ENGINE_UPDATE_DAO:
3966 prepare_input_range (&info->base.input, info->base.group_by);
3967 dao_adjust (dao, (info->std_error_flag ? 2 : 1) *
3968 g_slist_length (info->base.input),
3969 1 + analysis_tool_calc_length (specs));
3970 return FALSE;
3971 case TOOL_ENGINE_CLEAN_UP:
3972 return analysis_tool_generic_clean (specs);
3973 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
3974 return FALSE;
3975 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
3976 dao_prepare_output (NULL, dao, _("Moving Average"));
3977 return FALSE;
3978 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
3979 return dao_format_output (dao, _("Moving Average"));
3980 case TOOL_ENGINE_PERFORM_CALC:
3981 default:
3982 return analysis_tool_moving_average_engine_run (dao, specs);
3983 }
3984 return TRUE; /* We shouldn't get here */
3985 }
3986
3987
3988 /************* Rank and Percentile Tool ************************************
3989 *
3990 * The results are given in a table which can be printed out in a new
3991 * sheet, in a new workbook, or simply into an existing sheet.
3992 *
3993 **/
3994
3995 static gboolean
analysis_tool_ranking_engine_run(data_analysis_output_t * dao,analysis_tools_data_ranking_t * info)3996 analysis_tool_ranking_engine_run (data_analysis_output_t *dao,
3997 analysis_tools_data_ranking_t *info)
3998 {
3999 GSList *data = info->base.input;
4000 int col = 0;
4001
4002 GnmFunc *fd_large;
4003 GnmFunc *fd_row;
4004 GnmFunc *fd_rank;
4005 GnmFunc *fd_match;
4006 GnmFunc *fd_percentrank;
4007
4008 fd_large = gnm_func_lookup_or_add_placeholder ("LARGE");
4009 gnm_func_inc_usage (fd_large);
4010 fd_row = gnm_func_lookup_or_add_placeholder ("ROW");
4011 gnm_func_inc_usage (fd_row);
4012 fd_rank = gnm_func_lookup_or_add_placeholder ("RANK");
4013 gnm_func_inc_usage (fd_rank);
4014 fd_match = gnm_func_lookup_or_add_placeholder ("MATCH");
4015 gnm_func_inc_usage (fd_match);
4016 fd_percentrank = gnm_func_lookup_or_add_placeholder ("PERCENTRANK");
4017 gnm_func_inc_usage (fd_percentrank);
4018
4019 dao_set_merge (dao, 0, 0, 1, 0);
4020 dao_set_italic (dao, 0, 0, 0, 0);
4021 dao_set_cell (dao, 0, 0, _("Ranks & Percentiles"));
4022
4023 for (; data; data = data->next, col++) {
4024 GnmValue *val_org = value_dup (data->data);
4025 GnmExpr const *expr_large;
4026 GnmExpr const *expr_rank;
4027 GnmExpr const *expr_position;
4028 GnmExpr const *expr_percentile;
4029 int rows, i;
4030
4031 dao_set_italic (dao, 0, 1, 3, 1);
4032 dao_set_cell (dao, 0, 1, _("Point"));
4033 dao_set_cell (dao, 2, 1, _("Rank"));
4034 dao_set_cell (dao, 3, 1, _("Percentile Rank"));
4035 analysis_tools_write_label (val_org, dao, &info->base, 1, 1, col + 1);
4036
4037 rows = (val_org->v_range.cell.b.row - val_org->v_range.cell.a.row + 1) *
4038 (val_org->v_range.cell.b.col - val_org->v_range.cell.a.col + 1);
4039
4040 expr_large = gnm_expr_new_funcall2
4041 (fd_large, gnm_expr_new_constant (value_dup (val_org)),
4042 gnm_expr_new_binary (gnm_expr_new_binary
4043 (gnm_expr_new_funcall (fd_row, NULL),
4044 GNM_EXPR_OP_SUB,
4045 gnm_expr_new_funcall1
4046 (fd_row, dao_get_cellref (dao, 1, 2))),
4047 GNM_EXPR_OP_ADD,
4048 gnm_expr_new_constant (value_new_int (1))));
4049 dao_set_array_expr (dao, 1, 2, 1, rows, gnm_expr_copy (expr_large));
4050
4051 /* If there are ties the following will only give us the first occurrence... */
4052 expr_position = gnm_expr_new_funcall3 (fd_match, expr_large,
4053 gnm_expr_new_constant (value_dup (val_org)),
4054 gnm_expr_new_constant (value_new_int (0)));
4055
4056 dao_set_array_expr (dao, 0, 2, 1, rows, expr_position);
4057
4058 expr_rank = gnm_expr_new_funcall2 (fd_rank,
4059 make_cellref (-1,0),
4060 gnm_expr_new_constant (value_dup (val_org)));
4061 if (info->av_ties) {
4062 GnmExpr const *expr_rank_lower;
4063 GnmExpr const *expr_rows_p_one;
4064 GnmExpr const *expr_rows;
4065 GnmFunc *fd_count;
4066 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
4067 gnm_func_inc_usage (fd_count);
4068
4069 expr_rows = gnm_expr_new_funcall1
4070 (fd_count, gnm_expr_new_constant (value_dup (val_org)));
4071 expr_rows_p_one = gnm_expr_new_binary
4072 (expr_rows,
4073 GNM_EXPR_OP_ADD,
4074 gnm_expr_new_constant (value_new_int (1)));
4075 expr_rank_lower = gnm_expr_new_funcall3
4076 (fd_rank,
4077 make_cellref (-1,0),
4078 gnm_expr_new_constant (value_dup (val_org)),
4079 gnm_expr_new_constant (value_new_int (1)));
4080 expr_rank = gnm_expr_new_binary
4081 (gnm_expr_new_binary
4082 (gnm_expr_new_binary (expr_rank, GNM_EXPR_OP_SUB, expr_rank_lower),
4083 GNM_EXPR_OP_ADD, expr_rows_p_one),
4084 GNM_EXPR_OP_DIV,
4085 gnm_expr_new_constant (value_new_int (2)));
4086
4087 gnm_func_dec_usage (fd_count);
4088 }
4089 expr_percentile = gnm_expr_new_funcall3 (fd_percentrank,
4090 gnm_expr_new_constant (value_dup (val_org)),
4091 make_cellref (-2,0),
4092 gnm_expr_new_constant (value_new_int (10)));
4093
4094 dao_set_percent (dao, 3, 2, 3, 1 + rows);
4095 for (i = 2; i < rows + 2; i++) {
4096 dao_set_cell_expr ( dao, 2, i, gnm_expr_copy (expr_rank));
4097 dao_set_cell_expr ( dao, 3, i, gnm_expr_copy (expr_percentile));
4098 }
4099
4100
4101 dao->offset_col += 4;
4102 value_release (val_org);
4103 gnm_expr_free (expr_rank);
4104 gnm_expr_free (expr_percentile);
4105 }
4106
4107 gnm_func_dec_usage (fd_large);
4108 gnm_func_dec_usage (fd_row);
4109 gnm_func_dec_usage (fd_rank);
4110 gnm_func_dec_usage (fd_match);
4111 gnm_func_dec_usage (fd_percentrank);
4112
4113 dao_redraw_respan (dao);
4114
4115 return FALSE;
4116 }
4117
4118 gboolean
analysis_tool_ranking_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)4119 analysis_tool_ranking_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
4120 analysis_tool_engine_t selector, gpointer result)
4121 {
4122 analysis_tools_data_ranking_t *info = specs;
4123
4124 switch (selector) {
4125 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
4126 return (dao_command_descriptor (dao, _("Ranks (%s)"), result)
4127 == NULL);
4128 case TOOL_ENGINE_UPDATE_DAO:
4129 prepare_input_range (&info->base.input, info->base.group_by);
4130 dao_adjust (dao, 4 * g_slist_length (info->base.input),
4131 2 + analysis_tool_calc_length (specs));
4132 return FALSE;
4133 case TOOL_ENGINE_CLEAN_UP:
4134 return analysis_tool_generic_clean (specs);
4135 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
4136 return FALSE;
4137 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
4138 dao_prepare_output (NULL, dao, _("Ranks"));
4139 return FALSE;
4140 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
4141 return dao_format_output (dao, _("Ranks"));
4142 case TOOL_ENGINE_PERFORM_CALC:
4143 default:
4144 return analysis_tool_ranking_engine_run (dao, specs);
4145 }
4146 return TRUE; /* We shouldn't get here */
4147 }
4148
4149
4150
4151
4152 /************* Anova: Single Factor Tool **********************************
4153 *
4154 * The results are given in a table which can be printed out in a new
4155 * sheet, in a new workbook, or simply into an existing sheet.
4156 *
4157 **/
4158
4159 static gboolean
analysis_tool_anova_single_engine_run(data_analysis_output_t * dao,gpointer specs)4160 analysis_tool_anova_single_engine_run (data_analysis_output_t *dao, gpointer specs)
4161 {
4162 analysis_tools_data_anova_single_t *info = specs;
4163 GSList *inputdata = info->base.input;
4164 GnmFunc *fd_sum;
4165 GnmFunc *fd_count;
4166 GnmFunc *fd_mean;
4167 GnmFunc *fd_var;
4168 GnmFunc *fd_devsq;
4169
4170 guint index;
4171
4172 dao_set_italic (dao, 0, 0, 0, 2);
4173 dao_set_cell (dao, 0, 0, _("Anova: Single Factor"));
4174 dao_set_cell (dao, 0, 2, _("SUMMARY"));
4175
4176 dao_set_italic (dao, 0, 3, 4, 3);
4177 set_cell_text_row (dao, 0, 3, _("/Groups"
4178 "/Count"
4179 "/Sum"
4180 "/Average"
4181 "/Variance"));
4182
4183 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
4184 gnm_func_inc_usage (fd_mean);
4185 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
4186 gnm_func_inc_usage (fd_var);
4187 fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
4188 gnm_func_inc_usage (fd_sum);
4189 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
4190 gnm_func_inc_usage (fd_count);
4191 fd_devsq = gnm_func_lookup_or_add_placeholder ("DEVSQ");
4192 gnm_func_inc_usage (fd_devsq);
4193
4194 dao->offset_row += 4;
4195 if (dao->rows <= dao->offset_row)
4196 goto finish_anova_single_factor_tool;
4197
4198 /* SUMMARY */
4199
4200 for (index = 0; inputdata != NULL;
4201 inputdata = inputdata->next, index++) {
4202 GnmValue *val_org = value_dup (inputdata->data);
4203
4204 /* Label */
4205 dao_set_italic (dao, 0, index, 0, index);
4206 analysis_tools_write_label (val_org, dao, &info->base,
4207 0, index, index + 1);
4208
4209 /* Count */
4210 dao_set_cell_expr
4211 (dao, 1, index,
4212 gnm_expr_new_funcall1
4213 (fd_count,
4214 gnm_expr_new_constant (value_dup (val_org))));
4215
4216 /* Sum */
4217 dao_set_cell_expr
4218 (dao, 2, index,
4219 gnm_expr_new_funcall1
4220 (fd_sum,
4221 gnm_expr_new_constant (value_dup (val_org))));
4222
4223 /* Average */
4224 dao_set_cell_expr
4225 (dao, 3, index,
4226 gnm_expr_new_funcall1
4227 (fd_mean,
4228 gnm_expr_new_constant (value_dup (val_org))));
4229
4230 /* Variance */
4231 dao_set_cell_expr
4232 (dao, 4, index,
4233 gnm_expr_new_funcall1
4234 (fd_var,
4235 gnm_expr_new_constant (val_org)));
4236
4237 }
4238
4239 dao->offset_row += index + 2;
4240 if (dao->rows <= dao->offset_row)
4241 goto finish_anova_single_factor_tool;
4242
4243
4244 dao_set_italic (dao, 0, 0, 0, 4);
4245 set_cell_text_col (dao, 0, 0, _("/ANOVA"
4246 "/Source of Variation"
4247 "/Between Groups"
4248 "/Within Groups"
4249 "/Total"));
4250 dao_set_italic (dao, 1, 1, 6, 1);
4251 set_cell_text_row (dao, 1, 1, _("/SS"
4252 "/df"
4253 "/MS"
4254 "/F"
4255 "/P-value"
4256 "/F critical"));
4257
4258 /* ANOVA */
4259 {
4260 GnmExprList *sum_wdof_args = NULL;
4261 GnmExprList *sum_tdof_args = NULL;
4262 GnmExprList *arg_ss_total = NULL;
4263 GnmExprList *arg_ss_within = NULL;
4264
4265 GnmExpr const *expr_wdof = NULL;
4266 GnmExpr const *expr_ss_total = NULL;
4267 GnmExpr const *expr_ss_within = NULL;
4268
4269 for (inputdata = info->base.input; inputdata != NULL;
4270 inputdata = inputdata->next) {
4271 GnmValue *val_org = value_dup (inputdata->data);
4272 GnmExpr const *expr_one;
4273 GnmExpr const *expr_count_one;
4274
4275 analysis_tools_remove_label (val_org,
4276 info->base.labels,
4277 info->base.group_by);
4278 expr_one = gnm_expr_new_constant (value_dup (val_org));
4279
4280 arg_ss_total = gnm_expr_list_append
4281 (arg_ss_total,
4282 gnm_expr_new_constant (val_org));
4283
4284 arg_ss_within = gnm_expr_list_append
4285 (arg_ss_within,
4286 gnm_expr_new_funcall1
4287 (fd_devsq, gnm_expr_copy (expr_one)));
4288
4289 expr_count_one =
4290 gnm_expr_new_funcall1 (fd_count, expr_one);
4291
4292 sum_wdof_args = gnm_expr_list_append
4293 (sum_wdof_args,
4294 gnm_expr_new_binary(
4295 gnm_expr_copy (expr_count_one),
4296 GNM_EXPR_OP_SUB,
4297 gnm_expr_new_constant
4298 (value_new_int (1))));
4299 sum_tdof_args = gnm_expr_list_append
4300 (sum_tdof_args,
4301 expr_count_one);
4302 }
4303
4304 expr_ss_total = gnm_expr_new_funcall
4305 (fd_devsq, arg_ss_total);
4306 expr_ss_within = gnm_expr_new_funcall
4307 (fd_sum, arg_ss_within);
4308
4309 {
4310 /* SS between groups */
4311 GnmExpr const *expr_ss_between;
4312
4313 if (dao_cell_is_visible (dao, 1,4)) {
4314 expr_ss_between = gnm_expr_new_binary
4315 (make_cellref (0, 2),
4316 GNM_EXPR_OP_SUB,
4317 make_cellref (0, 1));
4318
4319 } else {
4320 expr_ss_between = gnm_expr_new_binary
4321 (gnm_expr_copy (expr_ss_total),
4322 GNM_EXPR_OP_SUB,
4323 gnm_expr_copy (expr_ss_within));
4324 }
4325 dao_set_cell_expr (dao, 1, 2, expr_ss_between);
4326 }
4327 {
4328 /* SS within groups */
4329 dao_set_cell_expr (dao, 1, 3, gnm_expr_copy (expr_ss_within));
4330 }
4331 {
4332 /* SS total groups */
4333 dao_set_cell_expr (dao, 1, 4, expr_ss_total);
4334 }
4335 {
4336 /* Between groups degrees of freedom */
4337 dao_set_cell_int (dao, 2, 2,
4338 g_slist_length (info->base.input) - 1);
4339 }
4340 {
4341 /* Within groups degrees of freedom */
4342 expr_wdof = gnm_expr_new_funcall (fd_sum, sum_wdof_args);
4343 dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_wdof));
4344 }
4345 {
4346 /* Total degrees of freedom */
4347 GnmExpr const *expr_tdof =
4348 gnm_expr_new_binary
4349 (gnm_expr_new_funcall (fd_sum, sum_tdof_args),
4350 GNM_EXPR_OP_SUB,
4351 gnm_expr_new_constant (value_new_int (1)));
4352 dao_set_cell_expr (dao, 2, 4, expr_tdof);
4353 }
4354 {
4355 /* MS values */
4356 GnmExpr const *expr_ms =
4357 gnm_expr_new_binary
4358 (make_cellref (-2, 0),
4359 GNM_EXPR_OP_DIV,
4360 make_cellref (-1, 0));
4361 dao_set_cell_expr (dao, 3, 2, gnm_expr_copy (expr_ms));
4362 dao_set_cell_expr (dao, 3, 3, expr_ms);
4363 }
4364 {
4365 /* Observed F */
4366 GnmExpr const *expr_denom;
4367 GnmExpr const *expr_f;
4368
4369 if (dao_cell_is_visible (dao, 3, 3)) {
4370 expr_denom = make_cellref (-1, 1);
4371 gnm_expr_free (expr_ss_within);
4372 } else {
4373 expr_denom = gnm_expr_new_binary
4374 (expr_ss_within,
4375 GNM_EXPR_OP_DIV,
4376 gnm_expr_copy (expr_wdof));
4377 }
4378
4379 expr_f = gnm_expr_new_binary
4380 (make_cellref (-1, 0),
4381 GNM_EXPR_OP_DIV,
4382 expr_denom);
4383 dao_set_cell_expr(dao, 4, 2, expr_f);
4384 }
4385 {
4386 /* P value */
4387 GnmFunc *fd_fdist;
4388 const GnmExpr *arg1;
4389 const GnmExpr *arg2;
4390 const GnmExpr *arg3;
4391
4392 arg1 = make_cellref (-1, 0);
4393 arg2 = make_cellref (-3, 0);
4394
4395 if (dao_cell_is_visible (dao, 2, 3)) {
4396 arg3 = make_cellref (-3, 1);
4397 } else {
4398 arg3 = gnm_expr_copy (expr_wdof);
4399 }
4400
4401 fd_fdist = gnm_func_lookup_or_add_placeholder ("FDIST");
4402 gnm_func_inc_usage (fd_fdist);
4403
4404 dao_set_cell_expr
4405 (dao, 5, 2,
4406 gnm_expr_new_funcall3
4407 (fd_fdist,
4408 arg1, arg2, arg3));
4409 if (fd_fdist)
4410 gnm_func_dec_usage (fd_fdist);
4411 }
4412 {
4413 /* Critical F*/
4414 GnmFunc *fd_finv;
4415 const GnmExpr *arg3;
4416
4417 if (dao_cell_is_visible (dao, 2, 3)) {
4418 arg3 = make_cellref (-4, 1);
4419 gnm_expr_free (expr_wdof);
4420 } else
4421 arg3 = expr_wdof;
4422
4423 fd_finv = gnm_func_lookup_or_add_placeholder ("FINV");
4424 gnm_func_inc_usage (fd_finv);
4425
4426 dao_set_cell_expr
4427 (dao, 6, 2,
4428 gnm_expr_new_funcall3
4429 (fd_finv,
4430 gnm_expr_new_constant
4431 (value_new_float (info->alpha)),
4432 make_cellref (-4, 0),
4433 arg3));
4434 gnm_func_dec_usage (fd_finv);
4435 }
4436 }
4437
4438 finish_anova_single_factor_tool:
4439
4440 gnm_func_dec_usage (fd_mean);
4441 gnm_func_dec_usage (fd_var);
4442 gnm_func_dec_usage (fd_sum);
4443 gnm_func_dec_usage (fd_count);
4444 gnm_func_dec_usage (fd_devsq);
4445
4446 dao->offset_row = 0;
4447 dao->offset_col = 0;
4448
4449 dao_redraw_respan (dao);
4450 return FALSE;
4451 }
4452
4453
4454
4455 gboolean
analysis_tool_anova_single_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)4456 analysis_tool_anova_single_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
4457 analysis_tool_engine_t selector, gpointer result)
4458 {
4459 analysis_tools_data_anova_single_t *info = specs;
4460
4461 switch (selector) {
4462 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
4463 return (dao_command_descriptor (dao, _("Single Factor ANOVA (%s)"), result)
4464 == NULL);
4465 case TOOL_ENGINE_UPDATE_DAO:
4466 prepare_input_range (&info->base.input, info->base.group_by);
4467 dao_adjust (dao, 7, 11 + g_slist_length (info->base.input));
4468 return FALSE;
4469 case TOOL_ENGINE_CLEAN_UP:
4470 return analysis_tool_generic_clean (specs);
4471 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
4472 return FALSE;
4473 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
4474 dao_prepare_output (NULL, dao, _("Anova"));
4475 return FALSE;
4476 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
4477 return dao_format_output (dao, _("Single Factor ANOVA"));
4478 case TOOL_ENGINE_PERFORM_CALC:
4479 default:
4480 return analysis_tool_anova_single_engine_run (dao, specs);
4481 }
4482 return TRUE; /* We shouldn't get here */
4483 }
4484
4485
4486 /************* Fourier Analysis Tool **************************************
4487 *
4488 * This tool performes a fast fourier transform calculating the fourier
4489 * transform as defined in Weaver: Theory of dis and cont Fouriere Analysis
4490 *
4491 *
4492 **/
4493
4494
4495 static gboolean
analysis_tool_fourier_engine_run(data_analysis_output_t * dao,analysis_tools_data_fourier_t * info)4496 analysis_tool_fourier_engine_run (data_analysis_output_t *dao,
4497 analysis_tools_data_fourier_t *info)
4498 {
4499 GSList *data = info->base.input;
4500 int col = 0;
4501
4502 GnmFunc *fd_fourier;
4503
4504 fd_fourier = gnm_func_lookup_or_add_placeholder ("FOURIER");
4505 gnm_func_inc_usage (fd_fourier);
4506
4507 dao_set_merge (dao, 0, 0, 1, 0);
4508 dao_set_italic (dao, 0, 0, 0, 0);
4509 dao_set_cell (dao, 0, 0, info->inverse ? _("Inverse Fourier Transform")
4510 : _("Fourier Transform"));
4511
4512 for (; data; data = data->next, col++) {
4513 GnmValue *val_org = value_dup (data->data);
4514 GnmExpr const *expr_fourier;
4515 int rows, n;
4516
4517 dao_set_italic (dao, 0, 1, 1, 2);
4518 set_cell_text_row (dao, 0, 2, _("/Real"
4519 "/Imaginary"));
4520 dao_set_merge (dao, 0, 1, 1, 1);
4521 analysis_tools_write_label (val_org, dao, &info->base, 0, 1, col + 1);
4522
4523 n = (val_org->v_range.cell.b.row - val_org->v_range.cell.a.row + 1) *
4524 (val_org->v_range.cell.b.col - val_org->v_range.cell.a.col + 1);
4525 rows = 1;
4526 while (rows < n)
4527 rows *= 2;
4528
4529 expr_fourier = gnm_expr_new_funcall3
4530 (fd_fourier,
4531 gnm_expr_new_constant (val_org),
4532 gnm_expr_new_constant (value_new_bool (info->inverse)),
4533 gnm_expr_new_constant (value_new_bool (TRUE)));
4534
4535 dao_set_array_expr (dao, 0, 3, 2, rows, expr_fourier);
4536
4537 dao->offset_col += 2;
4538 }
4539
4540 gnm_func_dec_usage (fd_fourier);
4541
4542 dao_redraw_respan (dao);
4543
4544 return FALSE;
4545 }
4546
4547 static int
analysis_tool_fourier_calc_length(analysis_tools_data_fourier_t * info)4548 analysis_tool_fourier_calc_length (analysis_tools_data_fourier_t *info)
4549 {
4550 int m = 1, n = analysis_tool_calc_length (&info->base);
4551
4552 while (m < n)
4553 m *= 2;
4554 return m;
4555 }
4556
4557
4558 gboolean
analysis_tool_fourier_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)4559 analysis_tool_fourier_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
4560 analysis_tool_engine_t selector, gpointer result)
4561 {
4562 analysis_tools_data_fourier_t *info = specs;
4563
4564 switch (selector) {
4565 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
4566 return (dao_command_descriptor (dao, _("Fourier Series (%s)"), result)
4567 == NULL);
4568 case TOOL_ENGINE_UPDATE_DAO:
4569 prepare_input_range (&info->base.input, info->base.group_by);
4570 dao_adjust (dao, 2 * g_slist_length (info->base.input),
4571 3 + analysis_tool_fourier_calc_length (specs));
4572 return FALSE;
4573 case TOOL_ENGINE_CLEAN_UP:
4574 return analysis_tool_generic_clean (specs);
4575 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
4576 return FALSE;
4577 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
4578 dao_prepare_output (NULL, dao, _("Fourier Series"));
4579 return FALSE;
4580 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
4581 return dao_format_output (dao, _("Fourier Series"));
4582 case TOOL_ENGINE_PERFORM_CALC:
4583 default:
4584 return analysis_tool_fourier_engine_run (dao, specs);
4585 }
4586 return TRUE; /* We shouldn't get here */
4587 }
4588