1 /*
2  * analysis-tools.c:
3  *
4  * Authors:
5  *   Jukka-Pekka Iivonen <jiivonen@hutcs.cs.hut.fi>
6  *   Andreas J. Guelzow  <aguelzow@taliesin.ca>
7  *
8  * (C) Copyright 2000, 2001 by Jukka-Pekka Iivonen <jiivonen@hutcs.cs.hut.fi>
9  * (C) Copyright 2002, 2004 by Andreas J. Guelzow  <aguelzow@taliesin.ca>
10  *
11  * Modified 2001 to use range_* functions of mathfunc.h
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation; either version 2 of the License, or
16  * (at your option) any later version.
17  *
18  * This program is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, see <https://www.gnu.org/licenses/>.
25  */
26 
27 #include <gnumeric-config.h>
28 #include <glib/gi18n-lib.h>
29 #include <gnumeric.h>
30 #include <tools/analysis-tools.h>
31 
32 #include <mathfunc.h>
33 #include <func.h>
34 #include <expr.h>
35 #include <position.h>
36 #include <tools/tools.h>
37 #include <value.h>
38 #include <cell.h>
39 #include <sheet.h>
40 #include <ranges.h>
41 #include <parse-util.h>
42 #include <style.h>
43 #include <regression.h>
44 #include <sheet-style.h>
45 #include <workbook.h>
46 #include <collect.h>
47 #include <gnm-format.h>
48 #include <sheet-object-cell-comment.h>
49 #include <workbook-control.h>
50 #include <command-context.h>
51 #include <sheet-object-graph.h>
52 #include <graph.h>
53 #include <goffice/goffice.h>
54 
55 #include <string.h>
56 #include <stdlib.h>
57 #include <math.h>
58 
59 
60 const GnmExpr *
make_cellref(int dx,int dy)61 make_cellref (int dx, int dy)
62 {
63 	GnmCellRef r;
64 	r.sheet = NULL;
65 	r.col = dx;
66 	r.col_relative = TRUE;
67 	r.row = dy;
68 	r.row_relative = TRUE;
69 	return gnm_expr_new_cellref (&r);
70 }
71 
72 const GnmExpr *
make_rangeref(int dx0,int dy0,int dx1,int dy1)73 make_rangeref (int dx0, int dy0, int dx1, int dy1)
74 {
75 	GnmCellRef a, b;
76 	GnmValue *val;
77 
78 	a.sheet = NULL;
79 	a.col = dx0;
80 	a.col_relative = TRUE;
81 	a.row = dy0;
82 	a.row_relative = TRUE;
83 	b.sheet = NULL;
84 	b.col = dx1;
85 	b.col_relative = TRUE;
86 	b.row = dy1;
87 	b.row_relative = TRUE;
88 
89 	val = value_new_cellrange_unsafe (&a, &b);
90 	return gnm_expr_new_constant (val);
91 }
92 
93 
94 typedef struct {
95 	char *format;
96 	GPtrArray *data_lists;
97 	gboolean read_label;
98 	gboolean ignore_non_num;
99 	guint length;
100 	Sheet *sheet;
101 } data_list_specs_t;
102 
103 /*
104  *  cb_adjust_areas:
105  *  @data:
106  *  @user_data:
107  *
108  */
109 static void
cb_adjust_areas(gpointer data,G_GNUC_UNUSED gpointer user_data)110 cb_adjust_areas (gpointer data, G_GNUC_UNUSED gpointer user_data)
111 {
112 	GnmValue *range = (GnmValue *)data;
113 
114 	if (range == NULL || !VALUE_IS_CELLRANGE (range)) {
115 		return;
116 	}
117 
118 	range->v_range.cell.a.col_relative = 0;
119 	range->v_range.cell.a.row_relative = 0;
120 	range->v_range.cell.b.col_relative = 0;
121 	range->v_range.cell.b.row_relative = 0;
122 }
123 
124 /*
125  *  analysis_tools_remove_label:
126  *
127  */
128 static void
analysis_tools_remove_label(GnmValue * val,gboolean labels,group_by_t group_by)129 analysis_tools_remove_label (GnmValue *val,
130 			     gboolean labels, group_by_t group_by)
131 {
132 	if (labels) {
133 		switch (group_by) {
134 		case GROUPED_BY_ROW:
135 			val->v_range.cell.a.col++;
136 			break;
137 		case GROUPED_BY_COL:
138 		case GROUPED_BY_BIN:
139 		case GROUPED_BY_AREA:
140 		default:
141 			val->v_range.cell.a.row++;
142 			break;
143 		}
144 	}
145 }
146 
147 
148 
149 /*
150  *  analysis_tools_write_label:
151  *  @val: range to extract label from
152  *  @dao: data_analysis_output_t, where to write to
153  *  @info: analysis_tools_data_generic_t info
154  *  @x: output col number
155  *  @y: output row number
156  *  @i: default col/row number
157  *
158  */
159 
160 void
analysis_tools_write_label(GnmValue * val,data_analysis_output_t * dao,analysis_tools_data_generic_t * info,int x,int y,int i)161 analysis_tools_write_label (GnmValue *val, data_analysis_output_t *dao,
162 			    analysis_tools_data_generic_t *info,
163 			    int x, int y, int i)
164 {
165 	char const *format = NULL;
166 
167 	if (info->labels) {
168 		GnmValue *label = value_dup (val);
169 
170 		label->v_range.cell.b = label->v_range.cell.a;
171 		dao_set_cell_expr (dao, x, y, gnm_expr_new_constant (label));
172 		analysis_tools_remove_label (val, info->labels, info->group_by);
173 	} else {
174 		switch (info->group_by) {
175 		case GROUPED_BY_ROW:
176 			format = _("Row %i");
177 			break;
178 		case GROUPED_BY_COL:
179 			format = _("Column %i");
180 			break;
181 		case GROUPED_BY_BIN:
182 			format = _("Bin %i");
183 			break;
184 		case GROUPED_BY_AREA:
185 		default:
186 			format = _("Area %i");
187 			break;
188 		}
189 
190 		dao_set_cell_printf (dao, x, y, format, i);
191 	}
192 }
193 
194 /*
195  *  analysis_tools_write_label:
196  *  @val: range to extract label from
197  *  @dao: data_analysis_output_t, where to write to
198  *  @labels: analysis_tools_data_generic_t infowhether the
199  *           @val contains label info
200  *  @group_by: grouping info
201  *  @x: output col number
202  *  @y: output row number
203  *  @i: default col/row number
204  *
205  */
206 
207 static void
analysis_tools_write_a_label(GnmValue * val,data_analysis_output_t * dao,gboolean labels,group_by_t group_by,int x,int y)208 analysis_tools_write_a_label (GnmValue *val, data_analysis_output_t *dao,
209 			      gboolean   labels, group_by_t group_by,
210 			      int x, int y)
211 {
212 	if (labels) {
213 		GnmValue *label = value_dup (val);
214 
215 		label->v_range.cell.b = label->v_range.cell.a;
216 		dao_set_cell_expr (dao, x, y, gnm_expr_new_constant (label));
217 		analysis_tools_remove_label (val, labels, group_by);
218 	} else {
219 		char const *str = ((group_by == GROUPED_BY_ROW) ? "row" : "col");
220 		char const *label = ((group_by == GROUPED_BY_ROW) ? _("Row") : _("Column"));
221 
222 		GnmFunc *fd_concatenate;
223 		GnmFunc *fd_cell;
224 
225 		fd_concatenate = gnm_func_lookup_or_add_placeholder ("CONCATENATE");
226 		gnm_func_inc_usage (fd_concatenate);
227 		fd_cell = gnm_func_lookup_or_add_placeholder ("CELL");
228 		gnm_func_inc_usage (fd_cell);
229 
230 		dao_set_cell_expr (dao, x, y, gnm_expr_new_funcall3
231 				   (fd_concatenate, gnm_expr_new_constant (value_new_string (label)),
232 				    gnm_expr_new_constant (value_new_string (" ")),
233 				    gnm_expr_new_funcall2 (fd_cell,
234 							   gnm_expr_new_constant (value_new_string (str)),
235 							   gnm_expr_new_constant (value_dup (val)))));
236 
237 		gnm_func_dec_usage (fd_concatenate);
238 		gnm_func_dec_usage (fd_cell);
239 	}
240 }
241 
242 /*
243  *  analysis_tools_write_label_ftest:
244  *  @val: range to extract label from
245  *  @dao: data_analysis_output_t, where to write to
246  *  @info: analysis_tools_data_generic_t info
247  *  @x: output col number
248  *  @y: output row number
249  *  @i: default col/row number
250  *
251  */
252 
253 void
analysis_tools_write_label_ftest(GnmValue * val,data_analysis_output_t * dao,int x,int y,gboolean labels,int i)254 analysis_tools_write_label_ftest (GnmValue *val, data_analysis_output_t *dao,
255 				  int x, int y, gboolean labels, int i)
256 {
257 	cb_adjust_areas (val, NULL);
258 
259 	if (labels) {
260 		GnmValue *label = value_dup (val);
261 
262 		label->v_range.cell.b = label->v_range.cell.a;
263 		dao_set_cell_expr (dao, x, y, gnm_expr_new_constant (label));
264 
265 		if ((val->v_range.cell.b.col - val->v_range.cell.a.col) <
266 		    (val->v_range.cell.b.row - val->v_range.cell.a.row))
267 			val->v_range.cell.a.row++;
268 		else
269 			val->v_range.cell.a.col++;
270 	} else {
271 		dao_set_cell_printf (dao, x, y,  _("Variable %i"), i);
272 	}
273 }
274 
275 /*
276  *  cb_cut_into_cols:
277  *  @data:
278  *  @user_data:
279  *
280  */
281 static void
cb_cut_into_cols(gpointer data,gpointer user_data)282 cb_cut_into_cols (gpointer data, gpointer user_data)
283 {
284 	GnmValue *range = (GnmValue *)data;
285 	GnmValue *col_value;
286 	GSList **list_of_units = (GSList **) user_data;
287 	gint col;
288 
289 	if (range == NULL) {
290 		return;
291 	}
292 	if (!VALUE_IS_CELLRANGE (range) ||
293 	    (range->v_range.cell.b.sheet != NULL &&
294 	     range->v_range.cell.b.sheet != range->v_range.cell.a.sheet)) {
295 		value_release (range);
296 		return;
297 	}
298 
299 	cb_adjust_areas (data, NULL);
300 
301 	if (range->v_range.cell.a.col == range->v_range.cell.b.col) {
302 		*list_of_units = g_slist_prepend (*list_of_units, range);
303 		return;
304 	}
305 
306 	for (col = range->v_range.cell.a.col; col <= range->v_range.cell.b.col; col++) {
307 		col_value = value_dup (range);
308 		col_value->v_range.cell.a.col = col;
309 		col_value->v_range.cell.b.col = col;
310 		*list_of_units = g_slist_prepend (*list_of_units, col_value);
311 	}
312 	value_release (range);
313 	return;
314 }
315 
316 /*
317  *  cb_cut_into_rows:
318  *  @data:
319  *  @user_data:
320  *
321  */
322 static void
cb_cut_into_rows(gpointer data,gpointer user_data)323 cb_cut_into_rows (gpointer data, gpointer user_data)
324 {
325 	GnmValue *range = (GnmValue *)data;
326 	GnmValue *row_value;
327 	GSList **list_of_units = (GSList **) user_data;
328 	gint row;
329 
330 	if (range == NULL) {
331 		return;
332 	}
333 	if (!VALUE_IS_CELLRANGE (range) ||
334 	    (range->v_range.cell.b.sheet != NULL &&
335 	     range->v_range.cell.b.sheet != range->v_range.cell.a.sheet)) {
336 		value_release (range);
337 		return;
338 	}
339 
340 	cb_adjust_areas (data, NULL);
341 
342 	if (range->v_range.cell.a.row == range->v_range.cell.b.row) {
343 		*list_of_units = g_slist_prepend (*list_of_units, range);
344 		return;
345 	}
346 
347 	for (row = range->v_range.cell.a.row; row <= range->v_range.cell.b.row; row++) {
348 		row_value = value_dup (range);
349 		row_value->v_range.cell.a.row = row;
350 		row_value->v_range.cell.b.row = row;
351 		*list_of_units = g_slist_prepend (*list_of_units, row_value);
352 	}
353 	value_release (range);
354 	return;
355 }
356 
357 
358 /**
359  *  prepare_input_range:
360  *  @input_range: (inout) (element-type GnmRange) (transfer full):
361  *  @group_by:
362  */
363 void
prepare_input_range(GSList ** input_range,group_by_t group_by)364 prepare_input_range (GSList **input_range, group_by_t group_by)
365 {
366 	GSList *input_by_units = NULL;
367 
368 	switch (group_by) {
369 	case GROUPED_BY_ROW:
370 		g_slist_foreach (*input_range, cb_cut_into_rows, &input_by_units);
371 		g_slist_free (*input_range);
372 		*input_range = g_slist_reverse (input_by_units);
373 		return;
374 	case GROUPED_BY_COL:
375 		g_slist_foreach (*input_range, cb_cut_into_cols, &input_by_units);
376 		g_slist_free (*input_range);
377 		*input_range = g_slist_reverse (input_by_units);
378 		return;
379 	case GROUPED_BY_AREA:
380 	default:
381 		g_slist_foreach (*input_range, cb_adjust_areas, NULL);
382 		return;
383 	}
384 }
385 
386 typedef struct {
387 	gboolean init;
388 	gint size;
389 	gboolean hom;
390 } homogeneity_check_t;
391 
392 
393 /*
394  *  cb_check_hom:
395  *  @data:
396  *  @user_data:
397  *
398  */
399 static void
cb_check_hom(gpointer data,gpointer user_data)400 cb_check_hom (gpointer data, gpointer user_data)
401 {
402 	GnmValue *range = (GnmValue *)data;
403 	homogeneity_check_t *state = (homogeneity_check_t *) user_data;
404 	gint this_size;
405 
406 	if (!VALUE_IS_CELLRANGE (range)) {
407 		state->hom = FALSE;
408 		return;
409 	}
410 
411 	this_size = (range->v_range.cell.b.col - range->v_range.cell.a.col + 1) *
412 		(range->v_range.cell.b.row - range->v_range.cell.a.row + 1);
413 
414 	if (state->init) {
415 		if (state->size != this_size)
416 			state->hom = FALSE;
417 	} else {
418 		state->init = TRUE;
419 		state->size = this_size;
420 	}
421 	return;
422 }
423 
424 /*
425  *  gnm_check_input_range_list_homogeneity:
426  *  @input_range:
427  *
428  *  Check that all columns have the same size
429  *
430  */
431 static gboolean
gnm_check_input_range_list_homogeneity(GSList * input_range)432 gnm_check_input_range_list_homogeneity (GSList *input_range)
433 {
434 	homogeneity_check_t state = { FALSE, 0, TRUE };
435 
436 	g_slist_foreach (input_range, cb_check_hom, &state);
437 
438 	return state.hom;
439 }
440 
441 
442 /***** Some general routines ***********************************************/
443 
444 /*
445  * Set a column of text from a string like "/first/second/third" or "|foo|bar|baz".
446  */
447 void
set_cell_text_col(data_analysis_output_t * dao,int col,int row,const char * text)448 set_cell_text_col (data_analysis_output_t *dao, int col, int row, const char *text)
449 {
450 	gboolean leave = FALSE;
451 	char *copy, *orig_copy;
452 	char sep = *text;
453 	if (sep == 0) return;
454 
455 	copy = orig_copy = g_strdup (text + 1);
456 	while (!leave) {
457 		char *p = copy;
458 		while (*copy && *copy != sep)
459 			copy++;
460 		if (*copy)
461 			*copy++ = 0;
462 		else
463 			leave = TRUE;
464 		dao_set_cell_value (dao, col, row++, value_new_string (p));
465 	}
466 	g_free (orig_copy);
467 }
468 
469 
470 /*
471  * Set a row of text from a string like "/first/second/third" or "|foo|bar|baz".
472  */
473 void
set_cell_text_row(data_analysis_output_t * dao,int col,int row,const char * text)474 set_cell_text_row (data_analysis_output_t *dao, int col, int row, const char *text)
475 {
476 	gboolean leave = 0;
477 	char *copy, *orig_copy;
478 	char sep = *text;
479 	if (sep == 0) return;
480 
481 	copy = orig_copy = g_strdup (text + 1);
482 	while (!leave) {
483 		char *p = copy;
484 		while (*copy && *copy != sep)
485 			copy++;
486 		if (*copy)
487 			*copy++ = 0;
488 		else
489 			leave = TRUE;
490 		dao_set_cell_value (dao, col++, row, value_new_string (p));
491 	}
492 	g_free (orig_copy);
493 }
494 
495 gboolean
analysis_tool_generic_clean(gpointer specs)496 analysis_tool_generic_clean (gpointer specs)
497 {
498 	analysis_tools_data_generic_t *info = specs;
499 
500 	range_list_destroy (info->input);
501 	info->input = NULL;
502 	return FALSE;
503 }
504 
505 gboolean
analysis_tool_generic_b_clean(gpointer specs)506 analysis_tool_generic_b_clean (gpointer specs)
507 {
508 	analysis_tools_data_generic_b_t *info = specs;
509 
510 	value_release (info->range_1);
511 	info->range_1 = NULL;
512 	value_release (info->range_2);
513 	info->range_2 = NULL;
514 	return FALSE;
515 }
516 
517 
518 
analysis_tool_calc_length(analysis_tools_data_generic_t * info)519 int analysis_tool_calc_length (analysis_tools_data_generic_t *info)
520 {
521 	int           result = 1;
522 	GSList        *dataset;
523 
524 	for (dataset = info->input; dataset; dataset = dataset->next) {
525 		GnmValue    *current = dataset->data;
526 		int      given_length;
527 
528 		if (info->group_by == GROUPED_BY_AREA) {
529 			given_length = (current->v_range.cell.b.row - current->v_range.cell.a.row + 1) *
530 				(current->v_range.cell.b.col - current->v_range.cell.a.col + 1);
531 		} else
532 			given_length = (info->group_by == GROUPED_BY_COL) ?
533 				(current->v_range.cell.b.row - current->v_range.cell.a.row + 1) :
534 				(current->v_range.cell.b.col - current->v_range.cell.a.col + 1);
535 		if (given_length > result)
536 			result = given_length;
537 	}
538 	if (info->labels)
539 		result--;
540 	return result;
541 }
542 
543 /**
544  * analysis_tool_get_function:
545  * @name: name of function
546  * @dao:
547  *
548  * Returns: (transfer full): the function named @name or a placeholder.
549  * The usage count of the function is incremented.
550  */
551 GnmFunc *
analysis_tool_get_function(char const * name,data_analysis_output_t * dao)552 analysis_tool_get_function (char const *name,
553 			    data_analysis_output_t *dao)
554 {
555 	GnmFunc *fd;
556 
557 	fd = gnm_func_lookup_or_add_placeholder (name);
558 	gnm_func_inc_usage (fd);
559 	return fd;
560 }
561 
562 
563 
564 /************* Correlation Tool *******************************************
565  *
566  * The correlation tool calculates the correlation coefficient of two
567  * data sets.  The two data sets can be grouped by rows or by columns.
568  * The results are given in a table which can be printed out in a new
569  * sheet, in a new workbook, or simply into an existing sheet.
570  *
571  **/
572 
573 gboolean
analysis_tool_table(data_analysis_output_t * dao,analysis_tools_data_generic_t * info,gchar const * title,gchar const * functionname,gboolean full_table)574 analysis_tool_table (data_analysis_output_t *dao,
575 		     analysis_tools_data_generic_t *info,
576 		     gchar const *title, gchar const *functionname,
577 		     gboolean full_table)
578 {
579 	GSList *inputdata, *inputexpr = NULL;
580 	GnmFunc *fd = NULL;
581 
582 	guint col, row;
583 
584 	dao_set_italic (dao, 0, 0, 0, 0);
585 	dao_set_cell_printf (dao, 0, 0, "%s", title);
586 
587 	fd = gnm_func_lookup_or_add_placeholder (functionname);
588 	gnm_func_inc_usage (fd);
589 
590 	for (col = 1, inputdata = info->input; inputdata != NULL;
591 	     inputdata = inputdata->next, col++) {
592 		GnmValue *val = NULL;
593 
594 		val = value_dup (inputdata->data);
595 
596 		/* Label */
597 		dao_set_italic (dao, col, 0, col, 0);
598 		analysis_tools_write_label (val, dao, info,
599 					    col, 0, col);
600 
601 		inputexpr = g_slist_prepend (inputexpr,
602 					     (gpointer) gnm_expr_new_constant (val));
603 	}
604 	inputexpr = g_slist_reverse (inputexpr);
605 
606 	for (row = 1, inputdata = info->input; inputdata != NULL;
607 	     inputdata = inputdata->next, row++) {
608 		GnmValue *val = value_dup (inputdata->data);
609 		GSList *colexprlist;
610 
611 		/* Label */
612 		dao_set_italic (dao, 0, row, 0, row);
613 		analysis_tools_write_label (val, dao, info,
614 					    0, row, row);
615 
616 		for (col = 1, colexprlist = inputexpr; colexprlist != NULL;
617 		     colexprlist = colexprlist->next, col++) {
618 			GnmExpr const *colexpr = colexprlist->data;
619 
620 			if ((!full_table) && (col < row))
621 				continue;
622 
623 			dao_set_cell_expr
624 				(dao, row, col,
625 				 gnm_expr_new_funcall2
626 				 (fd,
627 				  gnm_expr_new_constant (value_dup (val)),
628 				  gnm_expr_copy (colexpr)));
629 		}
630 
631 		value_release (val);
632 	}
633 
634 	g_slist_free_full (inputexpr, (GDestroyNotify)gnm_expr_free);
635 	if (fd) gnm_func_dec_usage (fd);
636 
637 	dao_redraw_respan (dao);
638 	return FALSE;
639 }
640 
641 static gboolean
analysis_tool_correlation_engine_run(data_analysis_output_t * dao,analysis_tools_data_generic_t * info)642 analysis_tool_correlation_engine_run (data_analysis_output_t *dao,
643 				      analysis_tools_data_generic_t *info)
644 {
645 	return analysis_tool_table (dao, info, _("Correlations"),
646 				    "CORREL", FALSE);
647 }
648 
649 gboolean
analysis_tool_correlation_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)650 analysis_tool_correlation_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
651 				   analysis_tool_engine_t selector, gpointer result)
652 {
653 	analysis_tools_data_generic_t *info = specs;
654 
655 	switch (selector) {
656 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
657 		return (dao_command_descriptor (dao, _("Correlation (%s)"), result)
658 			== NULL);
659 	case TOOL_ENGINE_UPDATE_DAO:
660 		prepare_input_range (&info->input, info->group_by);
661 		if (!gnm_check_input_range_list_homogeneity (info->input)) {
662 			info->err = info->group_by + 1;
663 			return TRUE;
664 		}
665 		dao_adjust (dao, 1 + g_slist_length (info->input),
666 			    1 + g_slist_length (info->input));
667 		return FALSE;
668 	case TOOL_ENGINE_CLEAN_UP:
669 		return analysis_tool_generic_clean (specs);
670 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
671 		return FALSE;
672 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
673 		dao_prepare_output (NULL, dao, _("Correlation"));
674 		return FALSE;
675 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
676 		return dao_format_output (dao, _("Correlation"));
677 	case TOOL_ENGINE_PERFORM_CALC:
678 	default:
679 		return analysis_tool_correlation_engine_run (dao, specs);
680 	}
681 	return TRUE;  /* We shouldn't get here */
682 }
683 
684 
685 
686 
687 /************* Covariance Tool ********************************************
688  *
689  * The covariance tool calculates the covariance of two data sets.
690  * The two data sets can be grouped by rows or by columns.  The
691  * results are given in a table which can be printed out in a new
692  * sheet, in a new workbook, or simply into an existing sheet.
693  *
694  **/
695 
696 static gboolean
analysis_tool_covariance_engine_run(data_analysis_output_t * dao,analysis_tools_data_generic_t * info)697 analysis_tool_covariance_engine_run (data_analysis_output_t *dao,
698 				      analysis_tools_data_generic_t *info)
699 {
700 	return analysis_tool_table (dao, info, _("Covariances"),
701 				    "COVAR", FALSE);
702 }
703 
704 gboolean
analysis_tool_covariance_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)705 analysis_tool_covariance_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
706 				   analysis_tool_engine_t selector, gpointer result)
707 {
708 	analysis_tools_data_generic_t *info = specs;
709 
710 	switch (selector) {
711 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
712 		return (dao_command_descriptor (dao, _("Covariance (%s)"), result)
713 			== NULL);
714 	case TOOL_ENGINE_UPDATE_DAO:
715 		prepare_input_range (&info->input, info->group_by);
716 		if (!gnm_check_input_range_list_homogeneity (info->input)) {
717 			info->err = info->group_by + 1;
718 			return TRUE;
719 		}
720 		dao_adjust (dao, 1 + g_slist_length (info->input),
721 			    1 + g_slist_length (info->input));
722 		return FALSE;
723 	case TOOL_ENGINE_CLEAN_UP:
724 		return analysis_tool_generic_clean (specs);
725 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
726 		return FALSE;
727 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
728 		dao_prepare_output (NULL, dao, _("Covariance"));
729 		return FALSE;
730 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
731 		return dao_format_output (dao, _("Covariance"));
732 	case TOOL_ENGINE_PERFORM_CALC:
733 	default:
734 		return analysis_tool_covariance_engine_run (dao, specs);
735 	}
736 	return TRUE;  /* We shouldn't get here */
737 }
738 
739 
740 
741 
742 /************* Descriptive Statistics Tool *******************************
743  *
744  * Descriptive Statistics Tool calculates some useful statistical
745  * information such as the mean, standard deviation, sample variance,
746  * skewness, kurtosis, and standard error about the given variables.
747  * The results are given in a table which can be printed out in a new
748  * sheet, in a new workbook, or simply into an existing sheet.
749  *
750  **/
751 
752 typedef struct {
753 	gnm_float mean;
754 	gint       error_mean;
755 	gnm_float var;
756 	gint       error_var;
757 	gint      len;
758 } desc_stats_t;
759 
760 static void
summary_statistics(data_analysis_output_t * dao,analysis_tools_data_descriptive_t * info)761 summary_statistics (data_analysis_output_t *dao,
762 		    analysis_tools_data_descriptive_t *info)
763 {
764 	guint     col;
765 	GSList *data = info->base.input;
766 	GnmFunc *fd_mean;
767 	GnmFunc *fd_median;
768 	GnmFunc *fd_mode;
769 	GnmFunc *fd_stdev;
770 	GnmFunc *fd_var;
771 	GnmFunc *fd_kurt;
772 	GnmFunc *fd_skew;
773 	GnmFunc *fd_min;
774 	GnmFunc *fd_max;
775 	GnmFunc *fd_sum;
776 	GnmFunc *fd_count;
777 	GnmFunc *fd_sqrt;
778 
779 	fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
780 	gnm_func_inc_usage (fd_mean);
781 	fd_median = gnm_func_lookup_or_add_placeholder (info->use_ssmedian ? "SSMEDIAN" : "MEDIAN");
782 	gnm_func_inc_usage (fd_median);
783 	fd_mode = gnm_func_lookup_or_add_placeholder ("MODE");
784 	gnm_func_inc_usage (fd_mode);
785 	fd_stdev = gnm_func_lookup_or_add_placeholder ("STDEV");
786 	gnm_func_inc_usage (fd_stdev);
787 	fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
788 	gnm_func_inc_usage (fd_var);
789 	fd_kurt = gnm_func_lookup_or_add_placeholder ("KURT");
790 	gnm_func_inc_usage (fd_kurt);
791 	fd_skew = gnm_func_lookup_or_add_placeholder ("SKEW");
792 	gnm_func_inc_usage (fd_skew);
793 	fd_min = gnm_func_lookup_or_add_placeholder ("MIN");
794 	gnm_func_inc_usage (fd_min);
795 	fd_max = gnm_func_lookup_or_add_placeholder ("MAX");
796 	gnm_func_inc_usage (fd_max);
797 	fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
798 	gnm_func_inc_usage (fd_sum);
799 	fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
800 	gnm_func_inc_usage (fd_count);
801 	fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
802 	gnm_func_inc_usage (fd_sqrt);
803 
804         dao_set_cell (dao, 0, 0, NULL);
805 
806 	dao_set_italic (dao, 0, 1, 0, 13);
807 	/*
808 	 * Note to translators: in the following string and others like it,
809 	 * the "/" is a separator character that can be changed to anything
810 	 * if the translation needs the slash; just use, say, "|" instead.
811 	 *
812 	 * The items are bundled like this to increase translation context.
813 	 */
814         set_cell_text_col (dao, 0, 1, _("/Mean"
815 					"/Standard Error"
816 					"/Median"
817 					"/Mode"
818 					"/Standard Deviation"
819 					"/Sample Variance"
820 					"/Kurtosis"
821 					"/Skewness"
822 					"/Range"
823 					"/Minimum"
824 					"/Maximum"
825 					"/Sum"
826 					"/Count"));
827 
828 	for (col = 0; data != NULL; data = data->next, col++) {
829 		GnmExpr const *expr;
830 		GnmExpr const *expr_min;
831 		GnmExpr const *expr_max;
832 		GnmExpr const *expr_var;
833 		GnmExpr const *expr_count;
834 		GnmValue *val_org = value_dup (data->data);
835 
836 		dao_set_italic (dao, col + 1, 0, col+1, 0);
837 		/* Note that analysis_tools_write_label may modify val_org */
838 		analysis_tools_write_label (val_org, dao, &info->base,
839 					    col + 1, 0, col + 1);
840 
841 	        /* Mean */
842 		expr = gnm_expr_new_funcall1
843 			(fd_mean,
844 			 gnm_expr_new_constant (value_dup (val_org)));
845 		dao_set_cell_expr (dao, col + 1, 1, expr);
846 
847 		/* Standard Deviation */
848 		expr = gnm_expr_new_funcall1
849 			(fd_stdev,
850 			 gnm_expr_new_constant (value_dup (val_org)));
851 		dao_set_cell_expr (dao, col + 1, 5, expr);
852 
853 		/* Sample Variance */
854 		expr_var = gnm_expr_new_funcall1
855 			(fd_var,
856 			 gnm_expr_new_constant (value_dup (val_org)));
857 		dao_set_cell_expr (dao, col + 1, 6, gnm_expr_copy (expr_var));
858 
859 		/* Median */
860 		expr = gnm_expr_new_funcall1
861 			(fd_median,
862 			 gnm_expr_new_constant (value_dup (val_org)));
863 		dao_set_cell_expr (dao, col + 1, 3, expr);
864 
865 		/* Mode */
866 		expr = gnm_expr_new_funcall1
867 			(fd_mode,
868 			 gnm_expr_new_constant (value_dup (val_org)));
869 		dao_set_cell_expr (dao, col + 1, 4, expr);
870 
871 		/* Kurtosis */
872 		expr = gnm_expr_new_funcall1
873 			(fd_kurt,
874 			 gnm_expr_new_constant (value_dup (val_org)));
875 		dao_set_cell_expr (dao, col + 1, 7, expr);
876 
877 		/* Skewness */
878 		expr = gnm_expr_new_funcall1
879 			(fd_skew,
880 			 gnm_expr_new_constant (value_dup (val_org)));
881 		dao_set_cell_expr (dao, col + 1, 8, expr);
882 
883 		/* Minimum */
884 		expr_min = gnm_expr_new_funcall1
885 			(fd_min,
886 			 gnm_expr_new_constant (value_dup (val_org)));
887 		dao_set_cell_expr (dao, col + 1, 10, gnm_expr_copy (expr_min));
888 
889 		/* Maximum */
890 		expr_max = gnm_expr_new_funcall1
891 			(fd_max,
892 			 gnm_expr_new_constant (value_dup (val_org)));
893 		dao_set_cell_expr (dao, col + 1, 11, gnm_expr_copy (expr_max));
894 
895 		/* Range */
896 		expr = gnm_expr_new_binary (expr_max, GNM_EXPR_OP_SUB, expr_min);
897 		dao_set_cell_expr (dao, col + 1, 9, expr);
898 
899 		/* Sum */
900 		expr = gnm_expr_new_funcall1
901 			(fd_sum,
902 			 gnm_expr_new_constant (value_dup (val_org)));
903 		dao_set_cell_expr (dao, col + 1, 12, expr);
904 
905 		/* Count */
906 		expr_count = gnm_expr_new_funcall1
907 			(fd_count,
908 			 gnm_expr_new_constant (val_org));
909 		dao_set_cell_expr (dao, col + 1, 13, gnm_expr_copy (expr_count));
910 
911 		/* Standard Error */
912 		expr = gnm_expr_new_funcall1
913 			(fd_sqrt,
914 			 gnm_expr_new_binary (expr_var,
915 					      GNM_EXPR_OP_DIV,
916 					      expr_count));
917 		dao_set_cell_expr (dao, col + 1, 2, expr);
918 	}
919 
920 	gnm_func_dec_usage (fd_mean);
921 	gnm_func_dec_usage (fd_median);
922 	gnm_func_dec_usage (fd_mode);
923 	gnm_func_dec_usage (fd_stdev);
924 	gnm_func_dec_usage (fd_var);
925 	gnm_func_dec_usage (fd_kurt);
926 	gnm_func_dec_usage (fd_skew);
927 	gnm_func_dec_usage (fd_min);
928 	gnm_func_dec_usage (fd_max);
929 	gnm_func_dec_usage (fd_sum);
930 	gnm_func_dec_usage (fd_count);
931 	gnm_func_dec_usage (fd_sqrt);
932 }
933 
934 static void
confidence_level(data_analysis_output_t * dao,analysis_tools_data_descriptive_t * info)935 confidence_level (data_analysis_output_t *dao,
936 		  analysis_tools_data_descriptive_t *info)
937 {
938         guint col;
939 	char *buffer;
940 	char *format;
941 	GSList *data = info->base.input;
942 	GnmFunc *fd_mean;
943 	GnmFunc *fd_var;
944 	GnmFunc *fd_count;
945 	GnmFunc *fd_tinv;
946 	GnmFunc *fd_sqrt;
947 
948 	format = g_strdup_printf (_("/%%%s%%%% CI for the Mean from"
949 				    "/to"), GNM_FORMAT_g);
950 	buffer = g_strdup_printf (format, info->c_level * 100);
951 	g_free (format);
952 	dao_set_italic (dao, 0, 1, 0, 2);
953 	set_cell_text_col (dao, 0, 1, buffer);
954         g_free (buffer);
955 
956         dao_set_cell (dao, 0, 0, NULL);
957 
958 	fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
959 	gnm_func_inc_usage (fd_mean);
960 	fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
961 	gnm_func_inc_usage (fd_var);
962 	fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
963 	gnm_func_inc_usage (fd_count);
964 	fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
965 	gnm_func_inc_usage (fd_tinv);
966 	fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
967 	gnm_func_inc_usage (fd_sqrt);
968 
969 
970 	for (col = 0; data != NULL; data = data->next, col++) {
971 		GnmExpr const *expr;
972 		GnmExpr const *expr_mean;
973 		GnmExpr const *expr_var;
974 		GnmExpr const *expr_count;
975 		GnmValue *val_org = value_dup (data->data);
976 
977 		dao_set_italic (dao, col+1, 0, col+1, 0);
978 		/* Note that analysis_tools_write_label may modify val_org */
979 		analysis_tools_write_label (val_org, dao, &info->base, col + 1, 0, col + 1);
980 
981 		expr_mean = gnm_expr_new_funcall1
982 			(fd_mean,
983 			 gnm_expr_new_constant (value_dup (val_org)));
984 
985 		expr_var = gnm_expr_new_funcall1
986 			(fd_var,
987 			 gnm_expr_new_constant (value_dup (val_org)));
988 
989 		expr_count = gnm_expr_new_funcall1
990 			(fd_count,
991 			 gnm_expr_new_constant (val_org));
992 
993 		expr = gnm_expr_new_binary
994 			(gnm_expr_new_funcall2
995 			 (fd_tinv,
996 			  gnm_expr_new_constant (value_new_float (1 - info->c_level)),
997 			  gnm_expr_new_binary
998 			  (gnm_expr_copy (expr_count),
999 			   GNM_EXPR_OP_SUB,
1000 			   gnm_expr_new_constant (value_new_int (1)))),
1001 			 GNM_EXPR_OP_MULT,
1002 			 gnm_expr_new_funcall1
1003 			 (fd_sqrt,
1004 			  gnm_expr_new_binary (expr_var,
1005 					       GNM_EXPR_OP_DIV,
1006 					       expr_count)));
1007 
1008 		dao_set_cell_expr (dao, col + 1, 1,
1009 				   gnm_expr_new_binary
1010 				   (gnm_expr_copy (expr_mean),
1011 				    GNM_EXPR_OP_SUB,
1012 				    gnm_expr_copy (expr)));
1013 		dao_set_cell_expr (dao, col + 1, 2,
1014 				   gnm_expr_new_binary (expr_mean,
1015 							GNM_EXPR_OP_ADD,
1016 							expr));
1017 	}
1018 
1019 	gnm_func_dec_usage (fd_mean);
1020 	gnm_func_dec_usage (fd_var);
1021 	gnm_func_dec_usage (fd_count);
1022 	gnm_func_dec_usage (fd_tinv);
1023 	gnm_func_dec_usage (fd_sqrt);
1024 }
1025 
1026 static void
kth_smallest_largest(data_analysis_output_t * dao,analysis_tools_data_descriptive_t * info,char const * func,char const * label,int k)1027 kth_smallest_largest (data_analysis_output_t *dao,
1028 		      analysis_tools_data_descriptive_t *info,
1029 		      char const* func, char const* label, int k)
1030 {
1031         guint col;
1032 	GSList *data = info->base.input;
1033 	GnmFunc *fd = gnm_func_lookup_or_add_placeholder (func);
1034 	gnm_func_inc_usage (fd);
1035 
1036 	dao_set_italic (dao, 0, 1, 0, 1);
1037         dao_set_cell_printf (dao, 0, 1, label, k);
1038 
1039         dao_set_cell (dao, 0, 0, NULL);
1040 
1041 	for (col = 0; data != NULL; data = data->next, col++) {
1042 		GnmExpr const *expr = NULL;
1043 		GnmValue *val = value_dup (data->data);
1044 
1045 		dao_set_italic (dao, col + 1, 0, col + 1, 0);
1046 		analysis_tools_write_label (val, dao, &info->base,
1047 					    col + 1, 0, col + 1);
1048 
1049 		expr = gnm_expr_new_funcall2
1050 			(fd,
1051 			 gnm_expr_new_constant (val),
1052 			 gnm_expr_new_constant (value_new_int (k)));
1053 
1054 		dao_set_cell_expr (dao, col + 1, 1, expr);
1055 	}
1056 
1057 	gnm_func_dec_usage (fd);
1058 }
1059 
1060 /* Descriptive Statistics
1061  */
1062 static gboolean
analysis_tool_descriptive_engine_run(data_analysis_output_t * dao,analysis_tools_data_descriptive_t * info)1063 analysis_tool_descriptive_engine_run (data_analysis_output_t *dao,
1064 				      analysis_tools_data_descriptive_t *info)
1065 {
1066         if (info->summary_statistics) {
1067                 summary_statistics (dao, info);
1068 		dao->offset_row += 16;
1069 		if (dao->rows <= dao->offset_row)
1070 			goto finish_descriptive_tool;
1071 	}
1072         if (info->confidence_level) {
1073                 confidence_level (dao, info);
1074 		dao->offset_row += 4;
1075 		if (dao->rows <= dao->offset_row)
1076 			goto finish_descriptive_tool;
1077 	}
1078         if (info->kth_largest) {
1079 		kth_smallest_largest (dao, info, "LARGE", _("Largest (%d)"),
1080 				      info->k_largest);
1081 		dao->offset_row += 4;
1082 		if (dao->rows <= dao->offset_row)
1083 			goto finish_descriptive_tool;
1084 	}
1085         if (info->kth_smallest)
1086                 kth_smallest_largest (dao, info, "SMALL", _("Smallest (%d)"),
1087 				      info->k_smallest);
1088 
1089  finish_descriptive_tool:
1090 
1091 	dao_redraw_respan (dao);
1092 	return 0;
1093 }
1094 
1095 gboolean
analysis_tool_descriptive_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)1096 analysis_tool_descriptive_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1097 				   analysis_tool_engine_t selector, gpointer result)
1098 {
1099 	analysis_tools_data_descriptive_t *info = specs;
1100 
1101 	switch (selector) {
1102 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1103 		return (dao_command_descriptor (dao, _("Descriptive Statistics (%s)"), result)
1104 			== NULL);
1105 	case TOOL_ENGINE_UPDATE_DAO:
1106 		prepare_input_range (&info->base.input, info->base.group_by);
1107 		dao_adjust (dao, 1 + g_slist_length (info->base.input),
1108 			    (info->summary_statistics ? 16 : 0) +
1109 			    (info->confidence_level ? 4 : 0) +
1110 			    (info->kth_largest ? 4 : 0) +
1111 			    (info->kth_smallest ? 4 : 0 ) - 1);
1112 		return FALSE;
1113 	case TOOL_ENGINE_CLEAN_UP:
1114 		return analysis_tool_generic_clean (specs);
1115 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1116 		return FALSE;
1117 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1118 		dao_prepare_output (NULL, dao, _("Descriptive Statistics"));
1119 		return FALSE;
1120 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1121 		return dao_format_output (dao, _("Descriptive Statistics"));
1122 	case TOOL_ENGINE_PERFORM_CALC:
1123 	default:
1124 		return analysis_tool_descriptive_engine_run (dao, specs);
1125 	}
1126 	return TRUE;  /* We shouldn't get here */
1127 }
1128 
1129 
1130 
1131 /************* Sampling Tool *********************************************
1132  *
1133  * Sampling tool takes a sample from a given data set.  Sample can be
1134  * a random sample where a given number of data points are selected
1135  * randomly from the data set.  The sample can also be a periodic
1136  * sample where, for example, every fourth data element is selected to
1137  * the sample.  The results are given in a table which can be printed
1138  * out in a new sheet, in a new workbook, or simply into an existing
1139  * sheet.
1140  *
1141  **/
1142 
1143 
1144 static gboolean
analysis_tool_sampling_engine_run(data_analysis_output_t * dao,analysis_tools_data_sampling_t * info)1145 analysis_tool_sampling_engine_run (data_analysis_output_t *dao,
1146 					 analysis_tools_data_sampling_t *info)
1147 {
1148 	GSList *l;
1149 	gint col = 0;
1150 	guint ct;
1151 	GnmFunc *fd_index = NULL;
1152 	GnmFunc *fd_randdiscrete = NULL;
1153 	gint source;
1154 
1155 	if (info->base.labels || info->periodic) {
1156 		fd_index = gnm_func_lookup_or_add_placeholder ("INDEX");
1157 		gnm_func_inc_usage (fd_index);
1158 	}
1159 	if (!info->periodic) {
1160 		fd_randdiscrete = gnm_func_lookup_or_add_placeholder ("RANDDISCRETE");
1161 		gnm_func_inc_usage (fd_randdiscrete);
1162 	}
1163 
1164 	for (l = info->base.input, source = 1; l; l = l->next, source++) {
1165 		GnmValue *val = value_dup ((GnmValue *)l->data);
1166 		GnmValue *val_c = NULL;
1167 		GnmExpr const *expr_title = NULL;
1168 		GnmExpr const *expr_input = NULL;
1169 		char const *format = NULL;
1170 		guint offset = info->periodic ? ((info->offset == 0) ? info->period : info->offset): 0;
1171 		GnmEvalPos ep;
1172 
1173 		eval_pos_init_sheet (&ep, val->v_range.cell.a.sheet);
1174 
1175 		dao_set_italic (dao, col, 0, col + info->number - 1, 0);
1176 
1177 		if (info->base.labels) {
1178 			val_c = value_dup (val);
1179 			switch (info->base.group_by) {
1180 			case GROUPED_BY_ROW:
1181 				val->v_range.cell.a.col++;
1182 				break;
1183 			case GROUPED_BY_COL:
1184 				val->v_range.cell.a.row++;
1185 				break;
1186 			default:
1187 				offset++;
1188 				break;
1189 			}
1190 			expr_title = gnm_expr_new_funcall1 (fd_index,
1191 							    gnm_expr_new_constant (val_c));
1192 			for (ct = 0; ct < info->number; ct++)
1193 				dao_set_cell_expr (dao, col+ct, 0, gnm_expr_copy (expr_title));
1194 			gnm_expr_free (expr_title);
1195 		} else {
1196 			switch (info->base.group_by) {
1197 			case GROUPED_BY_ROW:
1198 				format = _("Row %d");
1199 				break;
1200 			case GROUPED_BY_COL:
1201 				format = _("Column %d");
1202 				break;
1203 			default:
1204 				format = _("Area %d");
1205 				break;
1206 			}
1207 			for (ct = 0; ct < info->number; ct++)
1208 				dao_set_cell_printf (dao, col+ct, 0, format, source);
1209 		}
1210 
1211 		expr_input = gnm_expr_new_constant (value_dup (val));
1212 
1213 
1214 		if (info->periodic) {
1215 			guint i;
1216 			gint height = value_area_get_height (val, &ep);
1217 			gint width = value_area_get_width (val, &ep);
1218 			GnmExpr const *expr_period;
1219 
1220 			for (i=0; i < info->size; i++, offset += info->period) {
1221 				gint x_offset;
1222 				gint y_offset;
1223 
1224 				if (info->row_major) {
1225 					y_offset = (offset - 1)/width + 1;
1226 					x_offset = offset - (y_offset - 1) * width;
1227 				} else {
1228 					x_offset = (offset - 1)/height + 1;
1229 					y_offset = offset - (x_offset - 1) * height;
1230 				}
1231 
1232 				expr_period = gnm_expr_new_funcall3
1233 					(fd_index, gnm_expr_copy (expr_input),
1234 					 gnm_expr_new_constant (value_new_int (y_offset)),
1235 					 gnm_expr_new_constant (value_new_int (x_offset)));
1236 
1237 				for (ct = 0; ct < info->number; ct += 2)
1238 					dao_set_cell_expr (dao, col + ct, i + 1,
1239 							   gnm_expr_copy (expr_period));
1240 				gnm_expr_free (expr_period);
1241 
1242 				if (info->number > 1) {
1243 					if (!info->row_major) {
1244 						y_offset = (offset - 1)/width + 1;
1245 						x_offset = offset - (y_offset - 1) * width;
1246 					} else {
1247 						x_offset = (offset - 1)/height + 1;
1248 						y_offset = offset - (x_offset - 1) * height;
1249 					}
1250 
1251 					expr_period = gnm_expr_new_funcall3
1252 						(fd_index, gnm_expr_copy (expr_input),
1253 						 gnm_expr_new_constant (value_new_int (y_offset)),
1254 						 gnm_expr_new_constant (value_new_int (x_offset)));
1255 
1256 					for (ct = 1; ct < info->number; ct += 2)
1257 						dao_set_cell_expr (dao, col + ct, i + 1,
1258 								   gnm_expr_copy (expr_period));
1259 					gnm_expr_free (expr_period);
1260 
1261 				}
1262 			}
1263 			col += info->number;
1264 		} else {
1265 			GnmExpr const *expr_random;
1266 			guint i;
1267 
1268 			expr_random = gnm_expr_new_funcall1 (fd_randdiscrete,
1269 							     gnm_expr_copy (expr_input));
1270 
1271 			for (ct = 0; ct < info->number; ct++, col++)
1272 				for (i=0; i < info->size; i++)
1273 					dao_set_cell_expr (dao, col, i + 1,
1274 							   gnm_expr_copy (expr_random));
1275 			gnm_expr_free (expr_random);
1276 		}
1277 
1278 		value_release (val);
1279 		gnm_expr_free (expr_input);
1280 
1281 	}
1282 
1283 	if (fd_index != NULL)
1284 		gnm_func_dec_usage (fd_index);
1285 	if (fd_randdiscrete != NULL)
1286 		gnm_func_dec_usage (fd_randdiscrete);
1287 
1288 	dao_redraw_respan (dao);
1289 
1290 	return FALSE;
1291 }
1292 
1293 gboolean
analysis_tool_sampling_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)1294 analysis_tool_sampling_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1295 			       analysis_tool_engine_t selector, gpointer result)
1296 {
1297 	analysis_tools_data_sampling_t *info = specs;
1298 
1299 	switch (selector) {
1300 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1301 		return (dao_command_descriptor (dao, _("Sampling (%s)"), result)
1302 			== NULL);
1303 	case TOOL_ENGINE_UPDATE_DAO:
1304 	{
1305 		GSList *l;
1306 
1307 		prepare_input_range (&info->base.input, info->base.group_by);
1308 
1309 		if (info->periodic) {
1310 			info->size = 1;
1311 			for (l = info->base.input; l; l = l->next) {
1312 				GnmEvalPos ep;
1313 				GnmValue *val = ((GnmValue *)l->data);
1314 				gint size;
1315 				guint usize;
1316 				eval_pos_init_sheet (&ep, val->v_range.cell.a.sheet);
1317 				size = (value_area_get_width (val, &ep) *
1318 					     value_area_get_height (val, &ep));
1319 				usize = (size > 0) ? size : 1;
1320 
1321 				if (info->offset == 0)
1322 					usize = usize/info->period;
1323 				else
1324 					usize = (usize - info->offset)/info->period + 1;
1325 				if (usize > info->size)
1326 					info->size = usize;
1327 			}
1328 		}
1329 
1330 		dao_adjust (dao, info->number * g_slist_length (info->base.input),
1331 			    1 + info->size);
1332 		return FALSE;
1333 	}
1334 	case TOOL_ENGINE_CLEAN_UP:
1335 		return analysis_tool_generic_clean (specs);
1336 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1337 		return FALSE;
1338 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1339 		dao_prepare_output (NULL, dao, _("Sample"));
1340 		return FALSE;
1341 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1342 		return dao_format_output (dao, _("Sample"));
1343 	case TOOL_ENGINE_PERFORM_CALC:
1344 	default:
1345 		return analysis_tool_sampling_engine_run (dao, specs);
1346 	}
1347 	return TRUE;  /* We shouldn't get here */
1348 }
1349 
1350 
1351 
1352 /************* z-Test: Two Sample for Means ******************************
1353  *
1354  * The results are given in a table which can be printed out in a new
1355  * sheet, in a new workbook, or simply into an existing sheet.
1356  *
1357  **/
1358 
1359 
1360 static gboolean
analysis_tool_ztest_engine_run(data_analysis_output_t * dao,analysis_tools_data_ttests_t * info)1361 analysis_tool_ztest_engine_run (data_analysis_output_t *dao,
1362 				analysis_tools_data_ttests_t *info)
1363 {
1364 	GnmValue *val_1;
1365 	GnmValue *val_2;
1366 	GnmFunc *fd_count;
1367 	GnmFunc *fd_mean;
1368 	GnmFunc *fd_normsdist;
1369 	GnmFunc *fd_normsinv;
1370 	GnmFunc *fd_abs;
1371 	GnmFunc *fd_sqrt;
1372 	GnmExpr const *expr_1;
1373 	GnmExpr const *expr_2;
1374 	GnmExpr const *expr_mean_1;
1375 	GnmExpr const *expr_mean_2;
1376 	GnmExpr const *expr_count_1;
1377 	GnmExpr const *expr_count_2;
1378 
1379 	dao_set_italic (dao, 0, 0, 0, 11);
1380 	dao_set_italic (dao, 0, 0, 2, 0);
1381 
1382         dao_set_cell (dao, 0, 0, "");
1383         set_cell_text_col (dao, 0, 1, _("/Mean"
1384 					"/Known Variance"
1385 					"/Observations"
1386 					"/Hypothesized Mean Difference"
1387 					"/Observed Mean Difference"
1388 					"/z"
1389 					"/P (Z<=z) one-tail"
1390 					"/z Critical one-tail"
1391 					"/P (Z<=z) two-tail"
1392 					"/z Critical two-tail"));
1393 
1394 	fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
1395 	gnm_func_inc_usage (fd_mean);
1396 	fd_normsdist = gnm_func_lookup_or_add_placeholder ("NORMSDIST");
1397 	gnm_func_inc_usage (fd_normsdist);
1398 	fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
1399 	gnm_func_inc_usage (fd_abs);
1400 	fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
1401 	gnm_func_inc_usage (fd_sqrt);
1402 	fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
1403 	gnm_func_inc_usage (fd_count);
1404 	fd_normsinv = gnm_func_lookup_or_add_placeholder ("NORMSINV");
1405 	gnm_func_inc_usage (fd_normsinv);
1406 
1407 	val_1 = value_dup (info->base.range_1);
1408 	expr_1 = gnm_expr_new_constant (value_dup (val_1));
1409 
1410 	val_2 = value_dup (info->base.range_2);
1411 	expr_2 = gnm_expr_new_constant (value_dup (val_2));
1412 
1413 	/* Labels */
1414 	analysis_tools_write_label_ftest (val_1, dao, 1, 0,
1415 					  info->base.labels, 1);
1416 	analysis_tools_write_label_ftest (val_2, dao, 2, 0,
1417 					  info->base.labels, 2);
1418 
1419 
1420 	/* Mean */
1421 	expr_mean_1 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_1));
1422 	dao_set_cell_expr (dao, 1, 1, expr_mean_1);
1423 	expr_mean_2 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_2));
1424 	dao_set_cell_expr (dao, 2, 1, gnm_expr_copy (expr_mean_2));
1425 
1426 	/* Known Variance */
1427 	dao_set_cell_float (dao, 1, 2, info->var1);
1428 	dao_set_cell_float (dao, 2, 2, info->var2);
1429 
1430 	/* Observations */
1431 	expr_count_1 = gnm_expr_new_funcall1 (fd_count, expr_1);
1432 	dao_set_cell_expr (dao, 1, 3, expr_count_1);
1433 	expr_count_2 = gnm_expr_new_funcall1 (fd_count, expr_2);
1434 	dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_2));
1435 
1436 	/* Hypothesized Mean Difference */
1437 	dao_set_cell_float (dao, 1, 4, info->mean_diff);
1438 
1439 	/* Observed Mean Difference */
1440 	if (dao_cell_is_visible (dao, 2, 1)) {
1441 		gnm_expr_free (expr_mean_2);
1442 		expr_mean_2 = make_cellref (1, -4);
1443 	}
1444 
1445 	{
1446 		dao_set_cell_expr (dao, 1, 5,
1447 				   gnm_expr_new_binary
1448 				   (make_cellref (0, -4),
1449 				    GNM_EXPR_OP_SUB,
1450 				    expr_mean_2));
1451 	}
1452 
1453 	/* z */
1454 	{
1455 		GnmExpr const *expr_var_1 = make_cellref (0, -4);
1456 		GnmExpr const *expr_var_2 = NULL;
1457 		GnmExpr const *expr_count_1 = make_cellref (0, -3);
1458 		GnmExpr const *expr_a = NULL;
1459 		GnmExpr const *expr_b = NULL;
1460 		GnmExpr const *expr_count_2_adj = NULL;
1461 
1462 		if (dao_cell_is_visible (dao, 2, 2)) {
1463 			expr_var_2 = make_cellref (1, -4);
1464 		} else {
1465 			expr_var_2 = gnm_expr_new_constant
1466 			(value_new_float (info->var2));
1467 		}
1468 
1469 		if (dao_cell_is_visible (dao, 2, 3)) {
1470 			gnm_expr_free (expr_count_2);
1471 			expr_count_2_adj = make_cellref (1, -3);
1472 		} else
1473 			expr_count_2_adj = expr_count_2;
1474 
1475 		expr_a = gnm_expr_new_binary (expr_var_1, GNM_EXPR_OP_DIV,
1476 					      expr_count_1);
1477 		expr_b = gnm_expr_new_binary (expr_var_2, GNM_EXPR_OP_DIV,
1478 					      expr_count_2_adj);
1479 
1480 		dao_set_cell_expr (dao, 1, 6,
1481 				   gnm_expr_new_binary
1482 				   (gnm_expr_new_binary
1483 				    (make_cellref (0, -1),
1484 				     GNM_EXPR_OP_SUB,
1485 				     make_cellref (0, -2)),
1486 				    GNM_EXPR_OP_DIV,
1487 				    gnm_expr_new_funcall1
1488 				    (fd_sqrt,
1489 				     gnm_expr_new_binary
1490 				     (expr_a,
1491 				      GNM_EXPR_OP_ADD,
1492 				      expr_b))));
1493 	}
1494 
1495 	/* P (Z<=z) one-tail */
1496 	/* FIXME: 1- looks like a bad idea.  */
1497 	dao_set_cell_expr
1498 		(dao, 1, 7,
1499 		 gnm_expr_new_binary
1500 		 (gnm_expr_new_constant (value_new_int (1)),
1501 		  GNM_EXPR_OP_SUB,
1502 		  gnm_expr_new_funcall1
1503 		  (fd_normsdist,
1504 		   gnm_expr_new_funcall1
1505 		   (fd_abs,
1506 		    make_cellref (0, -1)))));
1507 
1508 
1509 	/* Critical Z, one right tail */
1510 	dao_set_cell_expr
1511 		(dao, 1, 8,
1512 		 gnm_expr_new_unary
1513 		 (GNM_EXPR_OP_UNARY_NEG,
1514 		  gnm_expr_new_funcall1
1515 		  (fd_normsinv,
1516 		   gnm_expr_new_constant
1517 		   (value_new_float (info->base.alpha)))));
1518 
1519 	/* P (T<=t) two-tail */
1520 	dao_set_cell_expr
1521 		(dao, 1, 9,
1522 		 gnm_expr_new_binary
1523 		 (gnm_expr_new_constant (value_new_int (2)),
1524 		  GNM_EXPR_OP_MULT,
1525 		  gnm_expr_new_funcall1
1526 		  (fd_normsdist,
1527 		   gnm_expr_new_unary
1528 		   (GNM_EXPR_OP_UNARY_NEG,
1529 		    gnm_expr_new_funcall1
1530 		    (fd_abs,
1531 		     make_cellref (0, -3))))));
1532 
1533 	/* Critical Z, two tails */
1534 	dao_set_cell_expr
1535 		(dao, 1, 10,
1536 		 gnm_expr_new_unary
1537 		 (GNM_EXPR_OP_UNARY_NEG,
1538 		  gnm_expr_new_funcall1
1539 		  (fd_normsinv,
1540 		   gnm_expr_new_binary
1541 		   (gnm_expr_new_constant
1542 		    (value_new_float (info->base.alpha)),
1543 		    GNM_EXPR_OP_DIV,
1544 		    gnm_expr_new_constant (value_new_int (2))))));
1545 
1546 	gnm_func_dec_usage (fd_mean);
1547 	gnm_func_dec_usage (fd_normsdist);
1548 	gnm_func_dec_usage (fd_abs);
1549 	gnm_func_dec_usage (fd_sqrt);
1550 	gnm_func_dec_usage (fd_count);
1551 	gnm_func_dec_usage (fd_normsinv);
1552 
1553 	/* And finish up */
1554 
1555 	value_release (val_1);
1556 	value_release (val_2);
1557 
1558 	dao_redraw_respan (dao);
1559 
1560         return FALSE;
1561 }
1562 
1563 
1564 gboolean
analysis_tool_ztest_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)1565 analysis_tool_ztest_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1566 			       analysis_tool_engine_t selector, gpointer result)
1567 {
1568 	switch (selector) {
1569 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1570 		return (dao_command_descriptor (dao, _("z-Test (%s)"), result)
1571 			== NULL);
1572 	case TOOL_ENGINE_UPDATE_DAO:
1573 		dao_adjust (dao, 3, 11);
1574 		return FALSE;
1575 	case TOOL_ENGINE_CLEAN_UP:
1576 		return analysis_tool_generic_b_clean (specs);
1577 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1578 		return FALSE;
1579 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1580 		dao_prepare_output (NULL, dao, _("z-Test"));
1581 		return FALSE;
1582 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1583 		return dao_format_output (dao, _("z-Test"));
1584 	case TOOL_ENGINE_PERFORM_CALC:
1585 	default:
1586 		return analysis_tool_ztest_engine_run (dao, specs);
1587 	}
1588 	return TRUE;  /* We shouldn't get here */
1589 }
1590 
1591 
1592 /************* t-Test Tools ********************************************
1593  *
1594  * The t-Test tool set consists of three kinds of tests to test the
1595  * mean of two variables.  The tests are: Student's t-test for paired
1596  * sample, Student's t-test for two samples assuming equal variance
1597  * and the same test assuming unequal variance.  The results are given
1598  * in a table which can be printed out in a new sheet, in a new
1599  * workbook, or simply into an existing sheet.
1600  *
1601  **/
1602 
1603 /* t-Test: Paired Two Sample for Means.
1604  */
1605 static gboolean
analysis_tool_ttest_paired_engine_run(data_analysis_output_t * dao,analysis_tools_data_ttests_t * info)1606 analysis_tool_ttest_paired_engine_run (data_analysis_output_t *dao,
1607 				       analysis_tools_data_ttests_t *info)
1608 {
1609 	GnmValue *val_1;
1610 	GnmValue *val_2;
1611 
1612 	GnmFunc *fd_count;
1613 	GnmFunc *fd_mean;
1614 	GnmFunc *fd_var;
1615 	GnmFunc *fd_tdist;
1616 	GnmFunc *fd_abs;
1617 	GnmFunc *fd_tinv;
1618 	GnmFunc *fd_correl;
1619 	GnmFunc *fd_isodd;
1620 	GnmFunc *fd_isnumber;
1621 	GnmFunc *fd_if;
1622 	GnmFunc *fd_sum;
1623 
1624 	GnmExpr const *expr_1;
1625 	GnmExpr const *expr_2;
1626 	GnmExpr const *expr_diff;
1627 	GnmExpr const *expr_ifisnumber;
1628 	GnmExpr const *expr_ifisoddifisnumber;
1629 
1630 	dao_set_italic (dao, 0, 0, 0, 13);
1631 	dao_set_italic (dao, 0, 0, 2, 0);
1632 
1633         dao_set_cell (dao, 0, 0, "");
1634         set_cell_text_col (dao, 0, 1, _("/Mean"
1635 					"/Variance"
1636 					"/Observations"
1637 					"/Pearson Correlation"
1638 					"/Hypothesized Mean Difference"
1639 					"/Observed Mean Difference"
1640 					"/Variance of the Differences"
1641 					"/df"
1642 					"/t Stat"
1643 					"/P (T<=t) one-tail"
1644 					"/t Critical one-tail"
1645 					"/P (T<=t) two-tail"
1646 					"/t Critical two-tail"));
1647 
1648 	fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
1649 	gnm_func_inc_usage (fd_mean);
1650 	fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
1651 	gnm_func_inc_usage (fd_var);
1652 	fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
1653 	gnm_func_inc_usage (fd_count);
1654 	fd_correl = gnm_func_lookup_or_add_placeholder ("CORREL");
1655 	gnm_func_inc_usage (fd_correl);
1656 	fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
1657 	gnm_func_inc_usage (fd_tinv);
1658 	fd_tdist = gnm_func_lookup_or_add_placeholder ("TDIST");
1659 	gnm_func_inc_usage (fd_tdist);
1660 	fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
1661 	gnm_func_inc_usage (fd_abs);
1662 	fd_isodd = gnm_func_lookup_or_add_placeholder ("ISODD");
1663 	gnm_func_inc_usage (fd_isodd);
1664 	fd_isnumber = gnm_func_lookup_or_add_placeholder ("ISNUMBER");
1665 	gnm_func_inc_usage (fd_isnumber);
1666 	fd_if = gnm_func_lookup_or_add_placeholder ("IF");
1667 	gnm_func_inc_usage (fd_if);
1668 	fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
1669 	gnm_func_inc_usage (fd_sum);
1670 
1671 	val_1 = value_dup (info->base.range_1);
1672 	val_2 = value_dup (info->base.range_2);
1673 
1674 	/* Labels */
1675 	analysis_tools_write_label_ftest (val_1, dao, 1, 0,
1676 					  info->base.labels, 1);
1677 	analysis_tools_write_label_ftest (val_2, dao, 2, 0,
1678 					  info->base.labels, 2);
1679 
1680 	/* Mean */
1681 
1682 	expr_1 = gnm_expr_new_constant (value_dup (val_1));
1683 	dao_set_cell_expr (dao, 1, 1,
1684 			   gnm_expr_new_funcall1 (fd_mean,
1685 						  gnm_expr_copy (expr_1)));
1686 
1687 	expr_2 = gnm_expr_new_constant (value_dup (val_2));
1688 	dao_set_cell_expr (dao, 2, 1,
1689 			   gnm_expr_new_funcall1 (fd_mean,
1690 						  gnm_expr_copy (expr_2)));
1691 
1692 	/* Variance */
1693 	dao_set_cell_expr (dao, 1, 2,
1694 			   gnm_expr_new_funcall1 (fd_var,
1695 						  gnm_expr_copy (expr_1)));
1696 	dao_set_cell_expr (dao, 2, 2,
1697 			   gnm_expr_new_funcall1 (fd_var,
1698 						  gnm_expr_copy (expr_2)));
1699 
1700 	/* Observations */
1701 	dao_set_cell_expr (dao, 1, 3,
1702 			   gnm_expr_new_funcall1 (fd_count,
1703 						  gnm_expr_copy (expr_1)));
1704 	dao_set_cell_expr (dao, 2, 3,
1705 			   gnm_expr_new_funcall1 (fd_count,
1706 						  gnm_expr_copy (expr_2)));
1707 
1708 	/* Pearson Correlation */
1709 	dao_set_cell_expr (dao, 1, 4,
1710 			   gnm_expr_new_funcall2 (fd_correl,
1711 						  gnm_expr_copy (expr_1),
1712 						  gnm_expr_copy (expr_2)));
1713 
1714 	/* Hypothesized Mean Difference */
1715 	dao_set_cell_float (dao, 1, 5, info->mean_diff);
1716 
1717 	/* Some useful expressions for the next field */
1718 
1719 	expr_diff = gnm_expr_new_binary (expr_1, GNM_EXPR_OP_SUB, expr_2);
1720 
1721 	/* IF (ISNUMBER (area1), 1, 0) * IF (ISNUMBER (area2), 1, 0)  */
1722 	expr_ifisnumber = gnm_expr_new_binary (gnm_expr_new_funcall3 (
1723 						       fd_if,
1724 						       gnm_expr_new_funcall1 (
1725 							       fd_isnumber,
1726 							       gnm_expr_copy (expr_1)),
1727 						       gnm_expr_new_constant (value_new_int (1)),
1728 						       gnm_expr_new_constant (value_new_int (0))),
1729 					       GNM_EXPR_OP_MULT,
1730 					       gnm_expr_new_funcall3 (
1731 						       fd_if,
1732 						       gnm_expr_new_funcall1 (
1733 							       fd_isnumber,
1734 							       gnm_expr_copy (expr_2)),
1735 						       gnm_expr_new_constant (value_new_int (1)),
1736 						       gnm_expr_new_constant (value_new_int (0)))
1737 		);
1738 	/* IF (ISODD (expr_ifisnumber), area1-area2, "NA")*/
1739 	expr_ifisoddifisnumber = gnm_expr_new_funcall3 (fd_if,
1740 							gnm_expr_new_funcall1 (fd_isodd,
1741 									       gnm_expr_copy (expr_ifisnumber)),
1742 							expr_diff,
1743 							gnm_expr_new_constant (value_new_string ("NA")));
1744 
1745 	/* Observed Mean Difference */
1746 	dao_set_cell_array_expr (dao, 1, 6,
1747 				 gnm_expr_new_funcall1 (fd_mean,
1748 							gnm_expr_copy (expr_ifisoddifisnumber)));
1749 
1750 	/* Variance of the Differences */
1751 	dao_set_cell_array_expr (dao, 1, 7,
1752 				 gnm_expr_new_funcall1 (fd_var,
1753 							expr_ifisoddifisnumber));
1754 
1755 	/* df */
1756 	dao_set_cell_array_expr (dao, 1, 8,
1757 				 gnm_expr_new_binary
1758 				 (gnm_expr_new_funcall1 (
1759 					 fd_sum,
1760 					 expr_ifisnumber),
1761 				  GNM_EXPR_OP_SUB,
1762 				  gnm_expr_new_constant (value_new_int (1))));
1763 
1764 	/* t */
1765 	/* E24 = (E21-E20)/(E22/(E23+1))^0.5 */
1766 	{
1767 		GnmExpr const *expr_num;
1768 		GnmExpr const *expr_denom;
1769 
1770 		expr_num = gnm_expr_new_binary (make_cellref (0, -3),
1771 						GNM_EXPR_OP_SUB,
1772 						make_cellref (0,-4));
1773 
1774 		expr_denom = gnm_expr_new_binary
1775 			(gnm_expr_new_binary
1776 			 (make_cellref (0, -2),
1777 			  GNM_EXPR_OP_DIV,
1778 			  gnm_expr_new_binary
1779 			  (make_cellref (0, -1),
1780 			   GNM_EXPR_OP_ADD,
1781 			   gnm_expr_new_constant
1782 			   (value_new_int (1)))),
1783 			 GNM_EXPR_OP_EXP,
1784 			 gnm_expr_new_constant
1785 			 (value_new_float (0.5)));
1786 
1787 		dao_set_cell_expr (dao, 1, 9,
1788 				   gnm_expr_new_binary
1789 				   (expr_num, GNM_EXPR_OP_DIV, expr_denom));
1790 	}
1791 
1792 	/* P (T<=t) one-tail */
1793 	dao_set_cell_expr
1794 		(dao, 1, 10,
1795 		 gnm_expr_new_funcall3
1796 		 (fd_tdist,
1797 		  gnm_expr_new_funcall1
1798 		  (fd_abs,
1799 		   make_cellref (0, -1)),
1800 		  make_cellref (0, -2),
1801 		  gnm_expr_new_constant (value_new_int (1))));
1802 
1803 	/* t Critical one-tail */
1804 	dao_set_cell_expr
1805 		(dao, 1, 11,
1806 		 gnm_expr_new_funcall2
1807 		 (fd_tinv,
1808 		  gnm_expr_new_binary
1809 		  (gnm_expr_new_constant (value_new_int (2)),
1810 		   GNM_EXPR_OP_MULT,
1811 		   gnm_expr_new_constant
1812 		   (value_new_float (info->base.alpha))),
1813 		  make_cellref (0, -3)));
1814 
1815 	/* P (T<=t) two-tail */
1816 	dao_set_cell_expr
1817 		(dao, 1, 12,
1818 		 gnm_expr_new_funcall3
1819 		 (fd_tdist,
1820 		  gnm_expr_new_funcall1 (fd_abs, make_cellref (0, -3)),
1821 		  make_cellref (0, -4),
1822 		  gnm_expr_new_constant (value_new_int (2))));
1823 
1824 	/* t Critical two-tail */
1825 	dao_set_cell_expr
1826 		(dao, 1, 13,
1827 		 gnm_expr_new_funcall2
1828 		 (fd_tinv,
1829 		  gnm_expr_new_constant
1830 		  (value_new_float (info->base.alpha)),
1831 		  make_cellref (0, -5)));
1832 
1833 	/* And finish up */
1834 
1835 	value_release (val_1);
1836 	value_release (val_2);
1837 
1838 	gnm_func_dec_usage (fd_count);
1839 	gnm_func_dec_usage (fd_correl);
1840 	gnm_func_dec_usage (fd_mean);
1841 	gnm_func_dec_usage (fd_var);
1842 	gnm_func_dec_usage (fd_tinv);
1843 	gnm_func_dec_usage (fd_tdist);
1844 	gnm_func_dec_usage (fd_abs);
1845 	gnm_func_dec_usage (fd_isodd);
1846 	gnm_func_dec_usage (fd_isnumber);
1847 	gnm_func_dec_usage (fd_if);
1848 	gnm_func_dec_usage (fd_sum);
1849 
1850 	dao_redraw_respan (dao);
1851 
1852 	return FALSE;
1853 }
1854 
1855 gboolean
analysis_tool_ttest_paired_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)1856 analysis_tool_ttest_paired_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1857 				   analysis_tool_engine_t selector,
1858 				   gpointer result)
1859 {
1860 	switch (selector) {
1861 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1862 		return (dao_command_descriptor (dao, _("t-Test, paired (%s)"), result)
1863 			== NULL);
1864 	case TOOL_ENGINE_UPDATE_DAO:
1865 		dao_adjust (dao, 3, 14);
1866 		return FALSE;
1867 	case TOOL_ENGINE_CLEAN_UP:
1868 		return analysis_tool_generic_b_clean (specs);
1869 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1870 		return FALSE;
1871 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1872 		dao_prepare_output (NULL, dao, _("t-Test"));
1873 		return FALSE;
1874 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1875 		return dao_format_output (dao, _("t-Test"));
1876 	case TOOL_ENGINE_PERFORM_CALC:
1877 	default:
1878 		return analysis_tool_ttest_paired_engine_run (dao, specs);
1879 	}
1880 	return TRUE;  /* We shouldn't get here */
1881 }
1882 
1883 
1884 
1885 
1886 /* t-Test: Two-Sample Assuming Equal Variances.
1887  */
1888 static gboolean
analysis_tool_ttest_eqvar_engine_run(data_analysis_output_t * dao,analysis_tools_data_ttests_t * info)1889 analysis_tool_ttest_eqvar_engine_run (data_analysis_output_t *dao,
1890 				      analysis_tools_data_ttests_t *info)
1891 {
1892 	GnmValue *val_1;
1893 	GnmValue *val_2;
1894 	GnmFunc *fd_count;
1895 	GnmFunc *fd_mean;
1896 	GnmFunc *fd_var;
1897 	GnmFunc *fd_tdist;
1898 	GnmFunc *fd_abs;
1899 	GnmFunc *fd_tinv;
1900 	GnmExpr const *expr_1;
1901 	GnmExpr const *expr_2;
1902 	GnmExpr const *expr_mean_1;
1903 	GnmExpr const *expr_mean_2;
1904 	GnmExpr const *expr_var_1;
1905 	GnmExpr const *expr_var_2;
1906 	GnmExpr const *expr_count_1;
1907 	GnmExpr const *expr_count_2;
1908 
1909 	dao_set_italic (dao, 0, 0, 0, 12);
1910 	dao_set_italic (dao, 0, 0, 2, 0);
1911 
1912         dao_set_cell (dao, 0, 0, "");
1913 	set_cell_text_col (dao, 0, 1, _("/Mean"
1914 					"/Variance"
1915 					"/Observations"
1916 					"/Pooled Variance"
1917 					"/Hypothesized Mean Difference"
1918 					"/Observed Mean Difference"
1919 					"/df"
1920 					"/t Stat"
1921 					"/P (T<=t) one-tail"
1922 					"/t Critical one-tail"
1923 					"/P (T<=t) two-tail"
1924 					"/t Critical two-tail"));
1925 
1926 
1927 	val_1 = value_dup (info->base.range_1);
1928 	val_2 = value_dup (info->base.range_2);
1929 
1930 	fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
1931 	gnm_func_inc_usage (fd_mean);
1932 	fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
1933 	gnm_func_inc_usage (fd_count);
1934 	fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
1935 	gnm_func_inc_usage (fd_var);
1936 	fd_tdist = gnm_func_lookup_or_add_placeholder ("TDIST");
1937 	gnm_func_inc_usage (fd_tdist);
1938 	fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
1939 	gnm_func_inc_usage (fd_abs);
1940 	fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
1941 	gnm_func_inc_usage (fd_tinv);
1942 
1943 	/* Labels */
1944 	analysis_tools_write_label_ftest (val_1, dao, 1, 0,
1945 					  info->base.labels, 1);
1946 	analysis_tools_write_label_ftest (val_2, dao, 2, 0,
1947 					  info->base.labels, 2);
1948 
1949 
1950 	/* Mean */
1951 	expr_1 = gnm_expr_new_constant (value_dup (val_1));
1952 	expr_mean_1 = gnm_expr_new_funcall1 (fd_mean,
1953 					     gnm_expr_copy (expr_1));
1954 	dao_set_cell_expr (dao, 1, 1, expr_mean_1);
1955 	expr_2 = gnm_expr_new_constant (value_dup (val_2));
1956 	expr_mean_2 = gnm_expr_new_funcall1 (fd_mean,
1957 					     gnm_expr_copy (expr_2));
1958 	dao_set_cell_expr (dao, 2, 1, gnm_expr_copy (expr_mean_2));
1959 
1960 	/* Variance */
1961 	expr_var_1 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_1));
1962 	dao_set_cell_expr (dao, 1, 2, expr_var_1);
1963 	expr_var_2 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_2));
1964 	dao_set_cell_expr (dao, 2, 2, gnm_expr_copy (expr_var_2));
1965 
1966 	/* Observations */
1967 	expr_count_1 = gnm_expr_new_funcall1 (fd_count, expr_1);
1968 	dao_set_cell_expr (dao, 1, 3, expr_count_1);
1969 	expr_count_2 = gnm_expr_new_funcall1 (fd_count, expr_2);
1970 	dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_2));
1971 
1972         /* Pooled Variance */
1973 	{
1974 		GnmExpr const *expr_var_2_adj = NULL;
1975 		GnmExpr const *expr_count_2_adj = NULL;
1976 		GnmExpr const *expr_var_1 = make_cellref (0, -2);
1977 		GnmExpr const *expr_count_1 = make_cellref (0, -1);
1978 		GnmExpr const *expr_one = gnm_expr_new_constant
1979 			(value_new_int (1));
1980 		GnmExpr const *expr_count_1_minus_1;
1981 		GnmExpr const *expr_count_2_minus_1;
1982 
1983 		if (dao_cell_is_visible (dao, 2, 2)) {
1984 			gnm_expr_free (expr_var_2);
1985 			expr_var_2_adj = make_cellref (1, -2);
1986 		} else
1987 			expr_var_2_adj = expr_var_2;
1988 
1989 		if (dao_cell_is_visible (dao, 2, 3)) {
1990 			expr_count_2_adj = make_cellref (1, -1);
1991 		} else
1992 			expr_count_2_adj = gnm_expr_copy (expr_count_2);
1993 
1994 		expr_count_1_minus_1 = gnm_expr_new_binary
1995 			(expr_count_1,
1996 			 GNM_EXPR_OP_SUB,
1997 			 gnm_expr_copy (expr_one));
1998 		expr_count_2_minus_1 = gnm_expr_new_binary
1999 			(expr_count_2_adj, GNM_EXPR_OP_SUB, expr_one);
2000 
2001 		dao_set_cell_expr (dao, 1, 4,
2002 				   gnm_expr_new_binary
2003 				   (gnm_expr_new_binary
2004 				    (gnm_expr_new_binary
2005 				     (gnm_expr_copy (expr_count_1_minus_1),
2006 				      GNM_EXPR_OP_MULT,
2007 				      expr_var_1),
2008 				     GNM_EXPR_OP_ADD,
2009 				     gnm_expr_new_binary
2010 				     (gnm_expr_copy (expr_count_2_minus_1),
2011 				      GNM_EXPR_OP_MULT,
2012 				      expr_var_2_adj)),
2013 				    GNM_EXPR_OP_DIV,
2014 				    gnm_expr_new_binary
2015 				    (expr_count_1_minus_1,
2016 				     GNM_EXPR_OP_ADD,
2017 				     expr_count_2_minus_1)));
2018 
2019 	}
2020 
2021 	/* Hypothesized Mean Difference */
2022 	dao_set_cell_float (dao, 1, 5, info->mean_diff);
2023 
2024 	/* Observed Mean Difference */
2025 	if (dao_cell_is_visible (dao, 2,1)) {
2026 		gnm_expr_free (expr_mean_2);
2027 		expr_mean_2 = make_cellref (1, -5);
2028 	}
2029 	dao_set_cell_expr (dao, 1, 6,
2030 			   gnm_expr_new_binary
2031 			   (make_cellref (0, -5),
2032 			    GNM_EXPR_OP_SUB,
2033 			    expr_mean_2));
2034 
2035 	/* df */
2036 	{
2037 		GnmExpr const *expr_count_1 = make_cellref (0, -4);
2038 		GnmExpr const *expr_count_2_adj;
2039 		GnmExpr const *expr_two = gnm_expr_new_constant
2040 			(value_new_int (2));
2041 
2042 		if (dao_cell_is_visible (dao, 2,3)) {
2043 			expr_count_2_adj = make_cellref (1, -4);
2044 		} else
2045 			expr_count_2_adj = gnm_expr_copy (expr_count_2);
2046 
2047 		dao_set_cell_expr (dao, 1, 7,
2048 				   gnm_expr_new_binary
2049 				   (gnm_expr_new_binary
2050 				    (expr_count_1,
2051 				     GNM_EXPR_OP_ADD,
2052 				     expr_count_2_adj),
2053 				    GNM_EXPR_OP_SUB,
2054 				    expr_two));
2055 	}
2056 
2057 	/* t */
2058 	{
2059 		GnmExpr const *expr_var = make_cellref (0, -4);
2060 		GnmExpr const *expr_count_1 = make_cellref (0, -5);
2061 		GnmExpr const *expr_a;
2062 		GnmExpr const *expr_b;
2063 		GnmExpr const *expr_count_2_adj;
2064 
2065 		if (dao_cell_is_visible (dao, 2,3)) {
2066 			gnm_expr_free (expr_count_2);
2067 			expr_count_2_adj = make_cellref (1, -5);
2068 		} else
2069 			expr_count_2_adj = expr_count_2;
2070 
2071 		expr_a = gnm_expr_new_binary (gnm_expr_copy (expr_var),
2072 					      GNM_EXPR_OP_DIV,
2073 					      expr_count_1);
2074 		expr_b = gnm_expr_new_binary (expr_var,
2075 					      GNM_EXPR_OP_DIV,
2076 					      expr_count_2_adj);
2077 
2078 		dao_set_cell_expr (dao, 1, 8,
2079 				   gnm_expr_new_binary
2080 				   (gnm_expr_new_binary
2081 				    (make_cellref (0, -2),
2082 				     GNM_EXPR_OP_SUB,
2083 				     make_cellref (0, -3)),
2084 				    GNM_EXPR_OP_DIV,
2085 				    gnm_expr_new_binary
2086 					     (gnm_expr_new_binary
2087 					      (expr_a,
2088 					       GNM_EXPR_OP_ADD,
2089 					       expr_b),
2090 					      GNM_EXPR_OP_EXP,
2091 					      gnm_expr_new_constant
2092 					      (value_new_float (0.5)))));
2093 
2094 	}
2095 
2096 	/* P (T<=t) one-tail */
2097 	dao_set_cell_expr
2098 		(dao, 1, 9,
2099 		 gnm_expr_new_funcall3
2100 		 (fd_tdist,
2101 		  gnm_expr_new_funcall1
2102 		  (fd_abs,
2103 		   make_cellref (0, -1)),
2104 		  make_cellref (0, -2),
2105 		  gnm_expr_new_constant (value_new_int (1))));
2106 
2107 	/* t Critical one-tail */
2108 	dao_set_cell_expr
2109 		(dao, 1, 10,
2110 		 gnm_expr_new_funcall2
2111 		 (fd_tinv,
2112 		  gnm_expr_new_binary
2113 		  (gnm_expr_new_constant (value_new_int (2)),
2114 		   GNM_EXPR_OP_MULT,
2115 		   gnm_expr_new_constant
2116 		   (value_new_float (info->base.alpha))),
2117 		  make_cellref (0, -3)));
2118 
2119 	/* P (T<=t) two-tail */
2120 	dao_set_cell_expr
2121 		(dao, 1, 11,
2122 		 gnm_expr_new_funcall3
2123 		 (fd_tdist,
2124 		  gnm_expr_new_funcall1
2125 		  (fd_abs,
2126 		   make_cellref (0, -3)),
2127 		  make_cellref (0, -4),
2128 		  gnm_expr_new_constant (value_new_int (2))));
2129 
2130 	/* t Critical two-tail */
2131 	dao_set_cell_expr
2132 		(dao, 1, 12,
2133 		 gnm_expr_new_funcall2
2134 		 (fd_tinv,
2135 		  gnm_expr_new_constant
2136 		  (value_new_float (info->base.alpha)),
2137 		  make_cellref (0, -5)));
2138 
2139 	/* And finish up */
2140 
2141 	value_release (val_1);
2142 	value_release (val_2);
2143 
2144 	gnm_func_dec_usage (fd_mean);
2145 	gnm_func_dec_usage (fd_var);
2146 	gnm_func_dec_usage (fd_count);
2147 	gnm_func_dec_usage (fd_tdist);
2148 	gnm_func_dec_usage (fd_abs);
2149 	gnm_func_dec_usage (fd_tinv);
2150 
2151 	dao_redraw_respan (dao);
2152 
2153 	return FALSE;
2154 }
2155 
2156 gboolean
analysis_tool_ttest_eqvar_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)2157 analysis_tool_ttest_eqvar_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
2158 				  analysis_tool_engine_t selector, gpointer result)
2159 {
2160 	switch (selector) {
2161 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
2162 		return (dao_command_descriptor (dao, _("t-Test (%s)"), result)
2163 			== NULL);
2164 	case TOOL_ENGINE_UPDATE_DAO:
2165 		dao_adjust (dao, 3, 13);
2166 		return FALSE;
2167 	case TOOL_ENGINE_CLEAN_UP:
2168 		return analysis_tool_generic_b_clean (specs);
2169 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
2170 		return FALSE;
2171 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
2172 		dao_prepare_output (NULL, dao, _("t-Test"));
2173 		return FALSE;
2174 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
2175 		return dao_format_output (dao, _("t-Test"));
2176 	case TOOL_ENGINE_PERFORM_CALC:
2177 	default:
2178 		return analysis_tool_ttest_eqvar_engine_run (dao, specs);
2179 	}
2180 	return TRUE;  /* We shouldn't get here */
2181 }
2182 
2183 /* t-Test: Two-Sample Assuming Unequal Variances.
2184  */
2185 static gboolean
analysis_tool_ttest_neqvar_engine_run(data_analysis_output_t * dao,analysis_tools_data_ttests_t * info)2186 analysis_tool_ttest_neqvar_engine_run (data_analysis_output_t *dao,
2187 				analysis_tools_data_ttests_t *info)
2188 {
2189 	GnmValue *val_1;
2190 	GnmValue *val_2;
2191 	GnmFunc *fd_count;
2192 	GnmFunc *fd_mean;
2193 	GnmFunc *fd_var;
2194 	GnmFunc *fd_tdist;
2195 	GnmFunc *fd_abs;
2196 	GnmFunc *fd_tinv;
2197 	GnmExpr const *expr_1;
2198 	GnmExpr const *expr_2;
2199 	GnmExpr const *expr_mean_1;
2200 	GnmExpr const *expr_mean_2;
2201 	GnmExpr const *expr_var_1;
2202 	GnmExpr const *expr_var_2;
2203 	GnmExpr const *expr_count_1;
2204 	GnmExpr const *expr_count_2;
2205 
2206 	dao_set_italic (dao, 0, 0, 0, 11);
2207 	dao_set_italic (dao, 0, 0, 2, 0);
2208 
2209         dao_set_cell (dao, 0, 0, "");
2210         set_cell_text_col (dao, 0, 1, _("/Mean"
2211 					"/Variance"
2212 					"/Observations"
2213 					"/Hypothesized Mean Difference"
2214 					"/Observed Mean Difference"
2215 					"/df"
2216 					"/t Stat"
2217 					"/P (T<=t) one-tail"
2218 					"/t Critical one-tail"
2219 					"/P (T<=t) two-tail"
2220 					"/t Critical two-tail"));
2221 
2222 
2223 	val_1 = value_dup (info->base.range_1);
2224 	val_2 = value_dup (info->base.range_2);
2225 
2226 	fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
2227 	gnm_func_inc_usage (fd_mean);
2228 	fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
2229 	gnm_func_inc_usage (fd_var);
2230 	fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
2231 	gnm_func_inc_usage (fd_count);
2232 	fd_tdist = gnm_func_lookup_or_add_placeholder ("TDIST");
2233 	gnm_func_inc_usage (fd_tdist);
2234 	fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
2235 	gnm_func_inc_usage (fd_abs);
2236 	fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
2237 	gnm_func_inc_usage (fd_tinv);
2238 
2239 	/* Labels */
2240 	analysis_tools_write_label_ftest (val_1, dao, 1, 0,
2241 					  info->base.labels, 1);
2242 	analysis_tools_write_label_ftest (val_2, dao, 2, 0,
2243 					  info->base.labels, 2);
2244 
2245 
2246 	/* Mean */
2247 	expr_1 = gnm_expr_new_constant (value_dup (val_1));
2248 	expr_mean_1 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_1));
2249 	dao_set_cell_expr (dao, 1, 1, expr_mean_1);
2250 	expr_2 = gnm_expr_new_constant (value_dup (val_2));
2251 	expr_mean_2 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_2));
2252 	dao_set_cell_expr (dao, 2, 1, gnm_expr_copy (expr_mean_2));
2253 
2254 	/* Variance */
2255 	expr_var_1 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_1));
2256 	dao_set_cell_expr (dao, 1, 2, expr_var_1);
2257 	expr_var_2 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_2));
2258 	dao_set_cell_expr (dao, 2, 2, gnm_expr_copy (expr_var_2));
2259 
2260 	/* Observations */
2261 	expr_count_1 = gnm_expr_new_funcall1 (fd_count, expr_1);
2262 	dao_set_cell_expr (dao, 1, 3, expr_count_1);
2263 	expr_count_2 = gnm_expr_new_funcall1 (fd_count, expr_2);
2264 	dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_2));
2265 
2266 	/* Hypothesized Mean Difference */
2267 	dao_set_cell_float (dao, 1, 4, info->mean_diff);
2268 
2269 	/* Observed Mean Difference */
2270 	if (dao_cell_is_visible (dao, 2,1)) {
2271 		gnm_expr_free (expr_mean_2);
2272 		expr_mean_2 = make_cellref (1, -4);
2273 	}
2274 	dao_set_cell_expr (dao, 1, 5,
2275 			   gnm_expr_new_binary
2276 			   (make_cellref (0, -4),
2277 			    GNM_EXPR_OP_SUB,
2278 			    expr_mean_2));
2279 
2280 	/* df */
2281 
2282 	{
2283 		GnmExpr const *expr_var_1 = make_cellref (0, -4);
2284 		GnmExpr const *expr_count_1 = make_cellref (0, -3);
2285 		GnmExpr const *expr_a;
2286 		GnmExpr const *expr_b;
2287 		GnmExpr const *expr_var_2_adj;
2288 		GnmExpr const *expr_count_2_adj;
2289 		GnmExpr const *expr_two = gnm_expr_new_constant
2290 			(value_new_int (2));
2291 		GnmExpr const *expr_one = gnm_expr_new_constant
2292 			(value_new_int (1));
2293 
2294 		if (dao_cell_is_visible (dao, 2,2)) {
2295 			expr_var_2_adj = make_cellref (1, -4);
2296 		} else
2297 			expr_var_2_adj = gnm_expr_copy (expr_var_2);
2298 
2299 		if (dao_cell_is_visible (dao, 2,3)) {
2300 			expr_count_2_adj = make_cellref (1, -3);
2301 		} else
2302 			expr_count_2_adj = gnm_expr_copy (expr_count_2);
2303 
2304 		expr_a = gnm_expr_new_binary (expr_var_1,
2305 					      GNM_EXPR_OP_DIV,
2306 					      gnm_expr_copy (expr_count_1));
2307 		expr_b = gnm_expr_new_binary (expr_var_2_adj,
2308 					      GNM_EXPR_OP_DIV,
2309 					      gnm_expr_copy (expr_count_2_adj));
2310 
2311 		dao_set_cell_expr (dao, 1, 6,
2312 				   gnm_expr_new_binary (
2313 					   gnm_expr_new_binary
2314 					   (gnm_expr_new_binary
2315 					    (gnm_expr_copy (expr_a),
2316 					     GNM_EXPR_OP_ADD,
2317 					     gnm_expr_copy (expr_b)),
2318 					    GNM_EXPR_OP_EXP,
2319 					    gnm_expr_copy (expr_two)),
2320 					   GNM_EXPR_OP_DIV,
2321 					   gnm_expr_new_binary
2322 					   (gnm_expr_new_binary
2323 					    (gnm_expr_new_binary
2324 					     (expr_a,
2325 					      GNM_EXPR_OP_EXP,
2326 					      gnm_expr_copy (expr_two)),
2327 					     GNM_EXPR_OP_DIV,
2328 					     gnm_expr_new_binary
2329 					     (expr_count_1,
2330 					      GNM_EXPR_OP_SUB,
2331 					      gnm_expr_copy (expr_one))),
2332 					    GNM_EXPR_OP_ADD,
2333 					    gnm_expr_new_binary
2334 					    (gnm_expr_new_binary
2335 					     (expr_b,
2336 					      GNM_EXPR_OP_EXP,
2337 					      expr_two),
2338 					     GNM_EXPR_OP_DIV,
2339 					     gnm_expr_new_binary
2340 					     (expr_count_2_adj,
2341 					      GNM_EXPR_OP_SUB,
2342 					      expr_one)))));
2343 	}
2344 
2345 	/* t */
2346 
2347 	{
2348 		GnmExpr const *expr_var_1 = make_cellref (0, -5);
2349 		GnmExpr const *expr_count_1 = make_cellref (0, -4);
2350 		GnmExpr const *expr_a;
2351 		GnmExpr const *expr_b;
2352 		GnmExpr const *expr_var_2_adj;
2353 		GnmExpr const *expr_count_2_adj;
2354 
2355 		if (dao_cell_is_visible (dao, 2,2)) {
2356 			gnm_expr_free (expr_var_2);
2357 			expr_var_2_adj = make_cellref (1, -5);
2358 		} else
2359 			expr_var_2_adj = expr_var_2;
2360 		if (dao_cell_is_visible (dao, 2,3)) {
2361 			gnm_expr_free (expr_count_2);
2362 			expr_count_2_adj = make_cellref (1, -4);
2363 		} else
2364 			expr_count_2_adj = expr_count_2;
2365 
2366 		expr_a = gnm_expr_new_binary (expr_var_1, GNM_EXPR_OP_DIV,
2367 					      expr_count_1);
2368 		expr_b = gnm_expr_new_binary (expr_var_2_adj, GNM_EXPR_OP_DIV,
2369 					      expr_count_2_adj);
2370 
2371 		dao_set_cell_expr (dao, 1, 7,
2372 				   gnm_expr_new_binary
2373 				   (gnm_expr_new_binary
2374 				    (make_cellref (0, -2),
2375 				     GNM_EXPR_OP_SUB,
2376 				     make_cellref (0, -3)),
2377 				    GNM_EXPR_OP_DIV,
2378 				    gnm_expr_new_binary
2379 					     (gnm_expr_new_binary
2380 					      (expr_a,
2381 					       GNM_EXPR_OP_ADD,
2382 					       expr_b),
2383 					      GNM_EXPR_OP_EXP,
2384 					      gnm_expr_new_constant
2385 					      (value_new_float (0.5)))));
2386 
2387 	}
2388 
2389 	/* P (T<=t) one-tail */
2390 	/* I9: =tdist(abs(Sheet1!I8),Sheet1!I7,1) */
2391 	dao_set_cell_expr
2392 		(dao, 1, 8,
2393 		 gnm_expr_new_funcall3
2394 		 (fd_tdist,
2395 		  gnm_expr_new_funcall1 (fd_abs,
2396 					 make_cellref (0, -1)),
2397 		  make_cellref (0, -2),
2398 		  gnm_expr_new_constant (value_new_int (1))));
2399 
2400 	/* t Critical one-tail */
2401         /* H10 = tinv(2*alpha,Sheet1!H7) */
2402 	dao_set_cell_expr
2403 		(dao, 1, 9,
2404 		 gnm_expr_new_funcall2
2405 		 (fd_tinv,
2406 		  gnm_expr_new_binary
2407 		  (gnm_expr_new_constant (value_new_int (2)),
2408 		   GNM_EXPR_OP_MULT,
2409 		   gnm_expr_new_constant
2410 		   (value_new_float (info->base.alpha))),
2411 		  make_cellref (0, -3)));
2412 
2413 	/* P (T<=t) two-tail */
2414 	/* I11: =tdist(abs(Sheet1!I8),Sheet1!I7,1) */
2415 	dao_set_cell_expr
2416 		(dao, 1, 10,
2417 		 gnm_expr_new_funcall3
2418 		 (fd_tdist,
2419 		  gnm_expr_new_funcall1 (fd_abs,
2420 					 make_cellref (0, -3)),
2421 		  make_cellref (0, -4),
2422 		  gnm_expr_new_constant (value_new_int (2))));
2423 
2424 	/* t Critical two-tail */
2425 	dao_set_cell_expr
2426 		(dao, 1, 11,
2427 		 gnm_expr_new_funcall2
2428 		 (fd_tinv,
2429 		  gnm_expr_new_constant
2430 		  (value_new_float (info->base.alpha)),
2431 		  make_cellref (0, -5)));
2432 
2433 	/* And finish up */
2434 
2435 	gnm_func_dec_usage (fd_mean);
2436 	gnm_func_dec_usage (fd_var);
2437 	gnm_func_dec_usage (fd_count);
2438 	gnm_func_dec_usage (fd_tdist);
2439 	gnm_func_dec_usage (fd_abs);
2440 	gnm_func_dec_usage (fd_tinv);
2441 
2442 	value_release (val_1);
2443 	value_release (val_2);
2444 
2445 	dao_redraw_respan (dao);
2446 	return FALSE;
2447 }
2448 
2449 gboolean
analysis_tool_ttest_neqvar_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)2450 analysis_tool_ttest_neqvar_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
2451 				  analysis_tool_engine_t selector, gpointer result)
2452 {
2453 	switch (selector) {
2454 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
2455 		return (dao_command_descriptor (dao, _("t-Test (%s)"), result)
2456 			== NULL);
2457 	case TOOL_ENGINE_UPDATE_DAO:
2458 		dao_adjust (dao, 3, 12);
2459 		return FALSE;
2460 	case TOOL_ENGINE_CLEAN_UP:
2461 		return analysis_tool_generic_b_clean (specs);
2462 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
2463 		return FALSE;
2464 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
2465 		dao_prepare_output (NULL, dao, _("t-Test"));
2466 		return FALSE;
2467 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
2468 		return dao_format_output (dao, _("t-Test"));
2469 	case TOOL_ENGINE_PERFORM_CALC:
2470 	default:
2471 		return analysis_tool_ttest_neqvar_engine_run (dao, specs);
2472 	}
2473 	return TRUE;  /* We shouldn't get here */
2474 }
2475 
2476 
2477 /************* F-Test Tool *********************************************
2478  *
2479  * The results are given in a table which can be printed out in a new
2480  * sheet, in a new workbook, or simply into an existing sheet.
2481  *
2482  **/
2483 
2484 
2485 /* F-Test: Two-Sample for Variances
2486  */
2487 static gboolean
analysis_tool_ftest_engine_run(data_analysis_output_t * dao,analysis_tools_data_generic_b_t * info)2488 analysis_tool_ftest_engine_run (data_analysis_output_t *dao,
2489 				analysis_tools_data_generic_b_t *info)
2490 {
2491 	GnmValue *val_1 = value_dup (info->range_1);
2492 	GnmValue *val_2 = value_dup (info->range_2);
2493 	GnmExpr const *expr;
2494 	GnmExpr const *expr_var_denum;
2495 	GnmExpr const *expr_count_denum;
2496 	GnmExpr const *expr_df_denum = NULL;
2497 
2498 	GnmFunc *fd_finv;
2499 
2500 	fd_finv = gnm_func_lookup_or_add_placeholder ("FINV");
2501 	gnm_func_inc_usage (fd_finv);
2502 
2503 	dao_set_italic (dao, 0, 0, 0, 11);
2504 	dao_set_cell (dao, 0, 0, _("F-Test"));
2505 	set_cell_text_col (dao, 0, 1, _("/Mean"
2506 					"/Variance"
2507 					"/Observations"
2508 					"/df"
2509 					"/F"
2510 					"/P (F<=f) right-tail"
2511 					"/F Critical right-tail"
2512 					"/P (f<=F) left-tail"
2513 					"/F Critical left-tail"
2514 					"/P two-tail"
2515 					"/F Critical two-tail"));
2516 
2517 	/* Label */
2518 	dao_set_italic (dao, 0, 0, 2, 0);
2519 	analysis_tools_write_label_ftest (val_1, dao, 1, 0, info->labels, 1);
2520 	analysis_tools_write_label_ftest (val_2, dao, 2, 0, info->labels, 2);
2521 
2522 	/* Mean */
2523 	{
2524 		GnmFunc *fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
2525 		gnm_func_inc_usage (fd_mean);
2526 
2527 		dao_set_cell_expr
2528 			(dao, 1, 1,
2529 			 gnm_expr_new_funcall1
2530 			 (fd_mean,
2531 			  gnm_expr_new_constant (value_dup (val_1))));
2532 
2533 		dao_set_cell_expr
2534 			(dao, 2, 1,
2535 			 gnm_expr_new_funcall1
2536 			 (fd_mean,
2537 			  gnm_expr_new_constant (value_dup (val_2))));
2538 
2539 		gnm_func_dec_usage (fd_mean);
2540 	}
2541 
2542 	/* Variance */
2543 	{
2544 		GnmFunc *fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
2545 		gnm_func_inc_usage (fd_var);
2546 
2547 		dao_set_cell_expr
2548 			(dao, 1, 2,
2549 			 gnm_expr_new_funcall1
2550 			 (fd_var,
2551 			  gnm_expr_new_constant (value_dup (val_1))));
2552 
2553 		expr_var_denum = gnm_expr_new_funcall1
2554 			(fd_var,
2555 			 gnm_expr_new_constant (value_dup (val_2)));
2556 		dao_set_cell_expr (dao, 2, 2, gnm_expr_copy (expr_var_denum));
2557 
2558 		gnm_func_dec_usage (fd_var);
2559 	}
2560 
2561         /* Count */
2562 	{
2563 		GnmFunc *fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
2564 		gnm_func_inc_usage (fd_count);
2565 
2566 		dao_set_cell_expr
2567 			(dao, 1, 3,
2568 			 gnm_expr_new_funcall1
2569 			 (fd_count,
2570 			  gnm_expr_new_constant (value_dup (val_1))));
2571 
2572 		expr_count_denum = gnm_expr_new_funcall1
2573 			(fd_count,
2574 			 gnm_expr_new_constant (value_dup (val_2)));
2575 		dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_denum));
2576 
2577 		gnm_func_dec_usage (fd_count);
2578 	}
2579 
2580 	/* df */
2581 	{
2582 		expr = gnm_expr_new_binary
2583 			(make_cellref (0, -1),
2584 			 GNM_EXPR_OP_SUB,
2585 			 gnm_expr_new_constant (value_new_int (1)));
2586 		dao_set_cell_expr (dao, 1, 4, gnm_expr_copy (expr));
2587 		dao_set_cell_expr (dao, 2, 4, expr);
2588 	}
2589 
2590 	/* F value */
2591 	if (dao_cell_is_visible (dao, 2, 2)) {
2592 		expr = gnm_expr_new_binary
2593 			(make_cellref (0, -3),
2594 			 GNM_EXPR_OP_DIV,
2595 			 make_cellref (1, -3));
2596 		gnm_expr_free (expr_var_denum);
2597 	} else {
2598 		expr = gnm_expr_new_binary
2599 			(make_cellref (0, -3),
2600 			 GNM_EXPR_OP_DIV,
2601 			 expr_var_denum);
2602 	}
2603 	dao_set_cell_expr (dao, 1, 5, expr);
2604 
2605 	/* P right-tail */
2606 	{
2607 		GnmFunc *fd_fdist = gnm_func_lookup_or_add_placeholder ("FDIST");
2608 		const GnmExpr *arg3;
2609 
2610 		gnm_func_inc_usage (fd_fdist);
2611 
2612 		if (dao_cell_is_visible (dao, 2, 2)) {
2613 			arg3 = make_cellref (1, -2);
2614 			gnm_expr_free (expr_count_denum);
2615 		} else {
2616 			expr_df_denum = gnm_expr_new_binary
2617 				(expr_count_denum,
2618 				 GNM_EXPR_OP_SUB,
2619 				 gnm_expr_new_constant (value_new_int (1)));
2620 			arg3 = gnm_expr_copy (expr_df_denum);
2621 		}
2622 
2623 		dao_set_cell_expr
2624 			(dao, 1, 6,
2625 			 gnm_expr_new_funcall3
2626 			 (fd_fdist,
2627 			  make_cellref (0, -1),
2628 			  make_cellref (0, -2),
2629 			  arg3));
2630 
2631 		gnm_func_dec_usage (fd_fdist);
2632 	}
2633 
2634 	/* F critical right-tail */
2635 	{
2636 		const GnmExpr *arg3;
2637 
2638 		if (expr_df_denum == NULL) {
2639 			arg3 = make_cellref (1, -3);
2640 		} else {
2641 			arg3 = gnm_expr_copy (expr_df_denum);
2642 		}
2643 
2644 		dao_set_cell_expr
2645 			(dao, 1, 7,
2646 			 gnm_expr_new_funcall3
2647 			 (fd_finv,
2648 			  gnm_expr_new_constant (value_new_float (info->alpha)),
2649 			  make_cellref (0, -3),
2650 			  arg3));
2651 	}
2652 
2653 	/* P left-tail */
2654 	dao_set_cell_expr (dao, 1, 8,
2655 			   gnm_expr_new_binary
2656 			   (gnm_expr_new_constant (value_new_int (1)),
2657 			    GNM_EXPR_OP_SUB,
2658 			    make_cellref (0, -2)));
2659 
2660 	/* F critical left-tail */
2661 	{
2662 		const GnmExpr *arg3;
2663 
2664 		if (expr_df_denum == NULL) {
2665 			arg3 = make_cellref (1, -5);
2666 		} else {
2667 			arg3 = gnm_expr_copy (expr_df_denum);
2668 		}
2669 
2670 		dao_set_cell_expr
2671 			(dao, 1, 9,
2672 			 gnm_expr_new_funcall3
2673 			 (fd_finv,
2674 			  gnm_expr_new_constant
2675 			  (value_new_float (1. - info->alpha)),
2676 			  make_cellref (0, -5),
2677 			  arg3));
2678 	}
2679 
2680 	/* P two-tail */
2681 	{
2682 		GnmFunc *fd_min = gnm_func_lookup_or_add_placeholder ("MIN");
2683 
2684 		gnm_func_inc_usage (fd_min);
2685 
2686 		dao_set_cell_expr
2687 			(dao, 1, 10,
2688 			 gnm_expr_new_binary
2689 			 (gnm_expr_new_constant (value_new_int (2)),
2690 			  GNM_EXPR_OP_MULT,
2691 			  gnm_expr_new_funcall2
2692 			  (fd_min,
2693 			   make_cellref (0, -4),
2694 			   make_cellref (0, -2))));
2695 		gnm_func_dec_usage (fd_min);
2696 	}
2697 
2698 	/* F critical two-tail (left) */
2699 	{
2700 		const GnmExpr *arg3;
2701 
2702 		if (expr_df_denum == NULL) {
2703 			arg3 = make_cellref (1, -7);
2704 		} else {
2705 			arg3 = expr_df_denum;
2706 		}
2707 
2708 		dao_set_cell_expr
2709 			(dao, 1, 11,
2710 			 gnm_expr_new_funcall3
2711 			 (fd_finv,
2712 			  gnm_expr_new_constant
2713 			  (value_new_float (1 - info->alpha / 2.)),
2714 			  make_cellref (0, -7),
2715 			  arg3));
2716 	}
2717 
2718 	/* F critical two-tail (right) */
2719 	dao_set_cell_expr
2720 		(dao, 2, 11,
2721 		 gnm_expr_new_funcall3
2722 		 (fd_finv,
2723 		  gnm_expr_new_constant
2724 		  (value_new_float (info->alpha / 2.)),
2725 		  make_cellref (-1, -7),
2726 		  make_cellref (0, -7)));
2727 
2728 	value_release (val_1);
2729 	value_release (val_2);
2730 
2731 	gnm_func_dec_usage (fd_finv);
2732 
2733 	dao_redraw_respan (dao);
2734 	return FALSE;
2735 }
2736 
2737 gboolean
analysis_tool_ftest_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)2738 analysis_tool_ftest_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
2739 			    analysis_tool_engine_t selector, gpointer result)
2740 {
2741 	switch (selector) {
2742 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
2743 		return (dao_command_descriptor (dao, _("F-Test (%s)"), result)
2744 			== NULL);
2745 	case TOOL_ENGINE_UPDATE_DAO:
2746 		dao_adjust (dao, 3, 12);
2747 		return FALSE;
2748 	case TOOL_ENGINE_CLEAN_UP:
2749 		return analysis_tool_generic_b_clean (specs);
2750 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
2751 		return FALSE;
2752 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
2753 		dao_prepare_output (NULL, dao, _("F-Test"));
2754 		return FALSE;
2755 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
2756 		return dao_format_output (dao, _("F-Test"));
2757 	case TOOL_ENGINE_PERFORM_CALC:
2758 	default:
2759 		return analysis_tool_ftest_engine_run (dao, specs);
2760 	}
2761 	return TRUE;  /* We shouldn't get here */
2762 }
2763 
2764 
2765 
2766 /************* Regression Tool *********************************************
2767  *
2768  * The results are given in a table which can be printed out in a new
2769  * sheet, in a new workbook, or simply into an existing sheet.
2770  *
2771  * Excel Bug 1: (Andrew) I believe that the following is a bug in Excel: When
2772  * calculating the  F-statistic in the no-intercept case, it will use xdim as
2773  * the numerator df and (n - xdim) as the denominator df, which is as it should
2774  * be. However, in the regression it will then calculate the significance of the
2775  * F-statistic using (n - #slope parameters - 1) as the denominator df, which
2776  * makes sense when you are calculating an intercept, but in this case you are not
2777  * and the df should be just (n - #slope parameters). Excel is inconsistent,
2778  * in that it does not use the same df to calculate the significance that it
2779  * does to calculate the F-stat itself. Inference on regressions
2780  * without intercepts don't really work anyway (because of the way the
2781  * statistics work, not the code), so this is not a terribly big deal, and
2782  * those who would actually use the significance of F  are not likely to be
2783  * using interceptless regressions anyway. So while it is easy to mimic Excel
2784  * in this respect, currently we do not and chose what at least for now seems
2785  * to be more correct.
2786  *
2787  * Excel Bug 2: (Andrew) Also in the no-intercept case: Excel has some weird way of
2788  * calculating the adjusted R^2 value that makes absolutely no sense to me, so
2789  * I couldn't mimic it if I tried. Again, what statistical opinion I have found
2790  * suggests that if you're running interceptless regressions, you won't know what
2791  * to do with an adjusted R^2 anyway.
2792  *
2793  **/
2794 
2795 static gint
calculate_xdim(GnmValue * input,group_by_t group_by)2796 calculate_xdim (GnmValue *input, group_by_t  group_by)
2797 {
2798 		GnmRange r;
2799 
2800 		g_return_val_if_fail (input != NULL, 0);
2801 
2802 		if (NULL == range_init_value (&r, input))
2803 			return 0;
2804 
2805 		if (group_by == GROUPED_BY_ROW)
2806 			return range_height (&r);
2807 
2808 		return range_width (&r);
2809 }
2810 
2811 static gint
calculate_n_obs(GnmValue * input,group_by_t group_by)2812 calculate_n_obs (GnmValue *input, group_by_t  group_by)
2813 {
2814 		GnmRange r;
2815 
2816 		g_return_val_if_fail (input != NULL, 0);
2817 
2818 		if (NULL == range_init_value (&r, input))
2819 			return 0;
2820 
2821 		if (group_by == GROUPED_BY_ROW)
2822 			return range_width (&r);
2823 
2824 		return range_height (&r);
2825 }
2826 
2827 
2828 static gboolean
analysis_tool_regression_engine_run(data_analysis_output_t * dao,analysis_tools_data_regression_t * info)2829 analysis_tool_regression_engine_run (data_analysis_output_t *dao,
2830 				     analysis_tools_data_regression_t *info)
2831 {
2832 	gint xdim = calculate_xdim (info->base.range_1, info->group_by);
2833 	gint i;
2834 
2835 	GnmValue *val_1 = value_dup (info->base.range_1);
2836 	GnmValue *val_2 = value_dup (info->base.range_2);
2837 	GnmValue *val_1_cp = NULL;
2838 	GnmValue *val_2_cp = NULL;
2839 
2840 	GnmExpr const *expr_x;
2841 	GnmExpr const *expr_y;
2842 	GnmExpr const *expr_linest;
2843 	GnmExpr const *expr_intercept;
2844 	GnmExpr const *expr_ms;
2845 	GnmExpr const *expr_sum;
2846 	GnmExpr const *expr_tstat;
2847 	GnmExpr const *expr_pvalue;
2848 	GnmExpr const *expr_n;
2849 	GnmExpr const *expr_df;
2850 	GnmExpr const *expr_lower;
2851 	GnmExpr const *expr_upper;
2852 	GnmExpr const *expr_confidence;
2853 
2854 	GnmFunc *fd_linest    = analysis_tool_get_function ("LINEST", dao);
2855 	GnmFunc *fd_index     = analysis_tool_get_function ("INDEX", dao);
2856 	GnmFunc *fd_fdist     = analysis_tool_get_function ("FDIST", dao);
2857 	GnmFunc *fd_sum       = analysis_tool_get_function ("SUM", dao);
2858 	GnmFunc *fd_sqrt      = analysis_tool_get_function ("SQRT", dao);
2859 	GnmFunc *fd_tdist     = analysis_tool_get_function ("TDIST", dao);
2860 	GnmFunc *fd_abs       = analysis_tool_get_function ("ABS", dao);
2861 	GnmFunc *fd_tinv      = analysis_tool_get_function ("TINV", dao);
2862 	GnmFunc *fd_transpose = analysis_tool_get_function ("TRANSPOSE", dao);
2863 	GnmFunc *fd_concatenate = NULL;
2864 	GnmFunc *fd_cell = NULL;
2865 	GnmFunc *fd_offset = NULL;
2866 	GnmFunc *fd_sumproduct = NULL;
2867 	GnmFunc *fd_leverage = NULL;
2868 
2869 	char const *str = ((info->group_by == GROUPED_BY_ROW) ? "row" : "col");
2870 	char const *label = ((info->group_by == GROUPED_BY_ROW) ? _("Row")
2871 			     : _("Column"));
2872 
2873 	if (!info->base.labels) {
2874 		fd_concatenate = analysis_tool_get_function ("CONCATENATE",
2875 							     dao);
2876 		fd_cell        = analysis_tool_get_function ("CELL", dao);
2877 		fd_offset      = analysis_tool_get_function ("OFFSET", dao);
2878 	}
2879 	if (info->residual) {
2880 		fd_sumproduct  = analysis_tool_get_function ("SUMPRODUCT", dao);
2881 		fd_leverage = analysis_tool_get_function ("LEVERAGE", dao);
2882 	}
2883 
2884 	cb_adjust_areas (val_1, NULL);
2885 	cb_adjust_areas (val_2, NULL);
2886 
2887 	dao_set_italic (dao, 0, 0, 0, 16 + xdim);
2888         set_cell_text_col (dao, 0, 0, _("/SUMMARY OUTPUT"
2889 					"/"
2890 					"/Regression Statistics"
2891 					"/Multiple R"
2892 					"/R^2"
2893 					"/Standard Error"
2894 					"/Adjusted R^2"
2895 					"/Observations"
2896 					"/"
2897 					"/ANOVA"
2898 					"/"
2899 					"/Regression"
2900 					"/Residual"
2901 					"/Total"
2902 					"/"
2903 					"/"
2904 					"/Intercept"));
2905 	dao_set_merge (dao, 0, 0, 1, 0);
2906 	dao_set_italic (dao, 2, 0, 3, 0);
2907 	dao_set_cell (dao, 2, 0, _("Response Variable"));
2908 	dao_set_merge (dao, 0, 2, 1, 2);
2909 
2910 	if (info->base.labels) {
2911 
2912 		dao_set_cell_expr (dao, 3, 0,
2913 				   gnm_expr_new_funcall1 (fd_index, gnm_expr_new_constant (value_dup (val_2))));
2914 
2915 		val_1_cp =  value_dup (val_1);
2916 		val_2_cp =  value_dup (val_2);
2917 		if (info->group_by == GROUPED_BY_ROW) {
2918 			val_1->v_range.cell.a.col++;
2919 			val_2->v_range.cell.a.col++;
2920 			val_1_cp->v_range.cell.b.col = val_1_cp->v_range.cell.a.col;
2921 			dao_set_array_expr (dao, 0, 17, 1, xdim, gnm_expr_new_constant
2922 					    (value_dup (val_1_cp)));
2923 		} else {
2924 			val_1->v_range.cell.a.row++;
2925 			val_2->v_range.cell.a.row++;
2926 			val_1_cp->v_range.cell.b.row = val_1_cp->v_range.cell.a.row;
2927 			dao_set_array_expr (dao, 0, 17, 1, xdim, gnm_expr_new_funcall1
2928 					    (fd_transpose,
2929 					     gnm_expr_new_constant (value_dup (val_1_cp))));
2930 		}
2931 	} else {
2932 		dao_set_cell_expr (dao, 3, 0, gnm_expr_new_funcall3
2933 				   (fd_concatenate, gnm_expr_new_constant (value_new_string (label)),
2934 				    gnm_expr_new_constant (value_new_string (" ")),
2935 				    gnm_expr_new_funcall2 (fd_cell,
2936 							   gnm_expr_new_constant (value_new_string (str)),
2937 							   gnm_expr_new_constant (value_dup (val_2)))));
2938 	}
2939 
2940 	dao_set_italic (dao, 1, 10, 5, 10);
2941         set_cell_text_row (dao, 1, 10, _("/df"
2942 					 "/SS"
2943 					 "/MS"
2944 					 "/F"
2945 					 "/Significance of F"));
2946 
2947 	dao_set_italic (dao, 1, 15, 6, 15);
2948 	set_cell_text_row (dao, 1, 15, _("/Coefficients"
2949 					 "/Standard Error"
2950 					 "/t-Statistics"
2951 					 "/p-Value"));
2952 
2953 	/* xgettext: this is an Excel-style number format.  Use "..." quotes and do not translate the 0% */
2954 	dao_set_format  (dao, 5, 15, 5, 15, _("\"Lower\" 0%"));
2955 	/* xgettext: this is an Excel-style number format.  Use "..." quotes and do not translate the 0% */
2956 	dao_set_format  (dao, 6, 15, 6, 15, _("\"Upper\" 0%"));
2957 	dao_set_align (dao, 5, 15, 5, 15, GNM_HALIGN_LEFT, GNM_VALIGN_TOP);
2958 	dao_set_align (dao, 6, 15, 6, 15, GNM_HALIGN_RIGHT, GNM_VALIGN_TOP);
2959 
2960 	dao_set_cell_float (dao, 5, 15, 1.0 - info->base.alpha);
2961 	dao_set_cell_expr (dao, 6, 15, make_cellref (-1, 0));
2962 	expr_confidence = dao_get_cellref (dao, 5, 15);
2963 
2964 	dao_set_cell_comment (dao, 4, 15,
2965 			      _("Probability of observing a t-statistic\n"
2966 				"whose absolute value is at least as large\n"
2967 				"as the absolute value of the actually\n"
2968 				"observed t-statistic, assuming the null\n"
2969 				"hypothesis is in fact true."));
2970 	if (!info->intercept)
2971 		dao_set_cell_comment (dao, 0, 4,
2972 			      _("This value is not the square of R\n"
2973 				"but the uncentered version of the\n"
2974 				"coefficient of determination; that\n"
2975 				"is, the proportion of the sum of\n"
2976 				"squares explained by the model."));
2977 
2978 	expr_x = gnm_expr_new_constant (value_dup (val_1));
2979 	expr_y = gnm_expr_new_constant (value_dup (val_2));
2980 
2981 	expr_intercept = gnm_expr_new_constant (value_new_bool (info->intercept));
2982 
2983 	expr_linest = gnm_expr_new_funcall4 (fd_linest,
2984 					     expr_y,
2985 					     expr_x,
2986 					     expr_intercept,
2987 					     gnm_expr_new_constant (value_new_bool (TRUE)));
2988 
2989 
2990 	/* Multiple R */
2991 	if (info->intercept) {
2992 		if (dao_cell_is_visible (dao, 1, 4))
2993 			dao_set_cell_expr (dao, 1, 3, gnm_expr_new_funcall1 (fd_sqrt, make_cellref (0, 1)));
2994 		else
2995 			dao_set_cell_expr (dao, 1, 3,
2996 					   gnm_expr_new_funcall1 (fd_sqrt, gnm_expr_new_funcall3
2997 								  (fd_index,
2998 								   gnm_expr_copy (expr_linest),
2999 								   gnm_expr_new_constant (value_new_int (3)),
3000 								   gnm_expr_new_constant (value_new_int (1)))));
3001 	} else
3002 			dao_set_cell_expr (dao, 1, 3,
3003 					   gnm_expr_new_funcall1 (fd_sqrt, gnm_expr_new_funcall3
3004 								  (fd_index,
3005 								   gnm_expr_new_funcall4
3006 								   (fd_linest,
3007 								    gnm_expr_new_constant (value_dup (val_2)),
3008 								    gnm_expr_new_constant (value_dup (val_1)),
3009 								    gnm_expr_new_constant (value_new_bool (TRUE)),
3010 								    gnm_expr_new_constant (value_new_bool (TRUE))),
3011 								   gnm_expr_new_constant (value_new_int (3)),
3012 								   gnm_expr_new_constant (value_new_int (1)))));
3013 
3014 
3015 	/* R Square */
3016 	dao_set_cell_array_expr (dao, 1, 4,
3017 				 gnm_expr_new_funcall3 (fd_index,
3018 							gnm_expr_copy (expr_linest),
3019 							gnm_expr_new_constant (value_new_int (3)),
3020 							gnm_expr_new_constant (value_new_int (1))));
3021 
3022 	/* Standard Error */
3023 	dao_set_cell_array_expr (dao, 1, 5,
3024 				 gnm_expr_new_funcall3 (fd_index,
3025 							gnm_expr_copy (expr_linest),
3026 							gnm_expr_new_constant (value_new_int (3)),
3027 							gnm_expr_new_constant (value_new_int (2))));
3028 
3029 	/* Adjusted R Square */
3030 	if (dao_cell_is_visible (dao, 1, 7))
3031 		expr_n = make_cellref (0, 1);
3032 	else
3033 		expr_n = gnm_expr_new_funcall3 (fd_sum,
3034 						gnm_expr_new_constant (value_new_int (xdim)),
3035 						gnm_expr_new_funcall3 (fd_index,
3036 								       gnm_expr_copy (expr_linest),
3037 								       gnm_expr_new_constant (value_new_int (4)),
3038 								       gnm_expr_new_constant (value_new_int (2))),
3039 						gnm_expr_new_constant (value_new_int (1)));
3040 
3041 	dao_set_cell_expr (dao, 1, 6, gnm_expr_new_binary
3042 			   (gnm_expr_new_constant (value_new_int (1)),
3043 			    GNM_EXPR_OP_SUB,
3044 			    gnm_expr_new_binary
3045 			    (gnm_expr_new_binary
3046 			     (gnm_expr_new_binary
3047 			      (gnm_expr_copy (expr_n),
3048 			       GNM_EXPR_OP_SUB,
3049 			       gnm_expr_new_constant (value_new_int (1))),
3050 			      GNM_EXPR_OP_DIV,
3051 			      gnm_expr_new_binary
3052 			      (expr_n,
3053 			       GNM_EXPR_OP_SUB,
3054 			       gnm_expr_new_constant (value_new_int (xdim + (info->intercept?1:0))))),
3055 			     GNM_EXPR_OP_MULT,
3056 			     gnm_expr_new_binary
3057 			     (gnm_expr_new_constant (value_new_int (1)),
3058 			      GNM_EXPR_OP_SUB,
3059 			      make_cellref (0, -2)))));
3060 
3061 	/* Observations */
3062 
3063 	if (dao_cell_is_visible (dao, 1, 13))
3064 		dao_set_cell_expr (dao, 1, 7,
3065 				   gnm_expr_new_funcall2 (fd_sum,
3066 							  make_cellref (0, 6),
3067 							  gnm_expr_new_constant (value_new_int (info->intercept?1:0))));
3068 	else if (dao_cell_is_visible (dao, 1, 12))
3069 		dao_set_cell_expr (dao, 1, 7,
3070 				   gnm_expr_new_funcall3 (fd_sum,
3071 							  make_cellref (0, 4),
3072 							  make_cellref (0, 5),
3073 							  gnm_expr_new_constant (value_new_int (info->intercept?1:0))));
3074 	else
3075 		dao_set_cell_expr (dao, 1, 7,
3076 				   gnm_expr_new_funcall3 (fd_sum,
3077 							  gnm_expr_new_constant (value_new_int (xdim)),
3078 							  gnm_expr_new_funcall3 (fd_index,
3079 										 gnm_expr_copy (expr_linest),
3080 										 gnm_expr_new_constant (value_new_int (4)),
3081 										 gnm_expr_new_constant (value_new_int (2))),
3082 							  gnm_expr_new_constant (value_new_int (info->intercept?1:0))));
3083 
3084 
3085 
3086 	/* Regression / df */
3087 
3088 	dao_set_cell_int (dao, 1, 11, xdim);
3089 
3090 	/* Residual / df */
3091 	dao_set_cell_array_expr (dao, 1, 12,
3092 				 gnm_expr_new_funcall3 (fd_index,
3093 							gnm_expr_copy (expr_linest),
3094 							gnm_expr_new_constant (value_new_int (4)),
3095 							gnm_expr_new_constant (value_new_int (2))));
3096 
3097 
3098 	/* Total / df */
3099 	expr_sum = gnm_expr_new_binary (make_cellref (0, -2),
3100 				       GNM_EXPR_OP_ADD,
3101 				       make_cellref (0, -1));
3102 	dao_set_cell_expr (dao, 1, 13, gnm_expr_copy (expr_sum));
3103 
3104 	/* Regression / SS */
3105 	dao_set_cell_array_expr (dao, 2, 11,
3106 				 gnm_expr_new_funcall3 (fd_index,
3107 							gnm_expr_copy (expr_linest),
3108 							gnm_expr_new_constant (value_new_int (5)),
3109 							gnm_expr_new_constant (value_new_int (1))));
3110 
3111 	/* Residual / SS */
3112 	dao_set_cell_array_expr (dao, 2, 12,
3113 				 gnm_expr_new_funcall3 (fd_index,
3114 							gnm_expr_copy (expr_linest),
3115 							gnm_expr_new_constant (value_new_int (5)),
3116 							gnm_expr_new_constant (value_new_int (2))));
3117 
3118 
3119 	/* Total / SS */
3120 	dao_set_cell_expr (dao, 2, 13, expr_sum);
3121 
3122 
3123 	/* Regression / MS */
3124 	expr_ms = gnm_expr_new_binary (make_cellref (-1, 0),
3125 				       GNM_EXPR_OP_DIV,
3126 				       make_cellref (-2, 0));
3127 	dao_set_cell_expr (dao, 3, 11, gnm_expr_copy (expr_ms));
3128 
3129 	/* Residual / MS */
3130 	dao_set_cell_expr (dao, 3, 12, expr_ms);
3131 
3132 
3133 	/* F */
3134 	dao_set_cell_array_expr (dao, 4, 11,
3135 				 gnm_expr_new_funcall3 (fd_index,
3136 							gnm_expr_copy (expr_linest),
3137 							gnm_expr_new_constant (value_new_int (4)),
3138 							gnm_expr_new_constant (value_new_int (1))));
3139 
3140 	/* Significance of F */
3141 
3142 	if (dao_cell_is_visible (dao, 1, 12))
3143 		dao_set_cell_expr (dao, 5, 11, gnm_expr_new_funcall3 (fd_fdist,
3144 								      make_cellref (-1, 0),
3145 								      make_cellref (-4, 0),
3146 								      make_cellref (-4, 1)));
3147 	else
3148 		dao_set_cell_expr (dao, 5, 11, gnm_expr_new_funcall3 (fd_fdist,
3149 								      make_cellref (-1, 0),
3150 								      make_cellref (-4, 0),
3151 								      gnm_expr_new_funcall3
3152 								      (fd_index,
3153 								       gnm_expr_copy (expr_linest),
3154 								       gnm_expr_new_constant (value_new_int (4)),
3155 								       gnm_expr_new_constant (value_new_int (2)))));
3156 
3157 
3158 	/* Intercept */
3159 
3160 
3161 	expr_tstat = gnm_expr_new_binary (make_cellref (-2, 0),
3162 				       GNM_EXPR_OP_DIV,
3163 				       make_cellref (-1, 0));
3164 	expr_df = dao_get_cellref (dao, 1, 12);
3165 	expr_pvalue = gnm_expr_new_funcall3 (fd_tdist, gnm_expr_new_funcall1 (fd_abs, make_cellref (-1, 0)),
3166 					     gnm_expr_copy (expr_df),
3167 					     gnm_expr_new_constant (value_new_int (2)));
3168 	expr_lower = gnm_expr_new_binary (make_cellref (-4, 0),
3169 				      GNM_EXPR_OP_SUB,
3170 				      gnm_expr_new_binary (make_cellref (-3, 0),
3171 							   GNM_EXPR_OP_MULT,
3172 							   gnm_expr_new_funcall2
3173 							   (fd_tinv,
3174 							    gnm_expr_new_binary
3175 							    (gnm_expr_new_constant (value_new_float (1.0)),
3176 							     GNM_EXPR_OP_SUB,
3177 							     gnm_expr_copy (expr_confidence)),
3178 							    gnm_expr_copy (expr_df))));
3179 	expr_upper = gnm_expr_new_binary (make_cellref (-5, 0),
3180 				      GNM_EXPR_OP_ADD,
3181 				      gnm_expr_new_binary (make_cellref (-4, 0),
3182 							   GNM_EXPR_OP_MULT,
3183 							   gnm_expr_new_funcall2
3184 							   (fd_tinv,
3185 							    gnm_expr_new_binary
3186 							    (gnm_expr_new_constant (value_new_float (1.0)),
3187 							     GNM_EXPR_OP_SUB,
3188 							     expr_confidence),
3189 							    expr_df)));
3190 
3191 
3192 	/* Intercept */
3193 
3194 	if (!info->intercept) {
3195 		dao_set_cell_int (dao, 1, 16, 0);
3196 		for (i = 2; i <= 6; i++)
3197 			dao_set_cell_na (dao, i, 16);
3198 	} else {
3199 		dao_set_cell_array_expr (dao, 1, 16,
3200 					 gnm_expr_new_funcall3
3201 					 (fd_index,
3202 					  gnm_expr_copy (expr_linest),
3203 					  gnm_expr_new_constant (value_new_int (1)),
3204 					  gnm_expr_new_constant (value_new_int (xdim+1))));
3205 		dao_set_cell_array_expr (dao, 2, 16,
3206 					 gnm_expr_new_funcall3
3207 					 (fd_index,
3208 					  gnm_expr_copy (expr_linest),
3209 					  gnm_expr_new_constant (value_new_int (2)),
3210 					  gnm_expr_new_constant (value_new_int (xdim+1))));
3211 		dao_set_cell_expr (dao, 3, 16, gnm_expr_copy (expr_tstat));
3212 		dao_set_cell_expr (dao, 4, 16, gnm_expr_copy (expr_pvalue));
3213 		dao_set_cell_expr (dao, 5, 16, gnm_expr_copy (expr_lower));
3214 		dao_set_cell_expr (dao, 6, 16, gnm_expr_copy (expr_upper));
3215 	}
3216 
3217 	/* Coefficients */
3218 
3219 	dao->offset_row += 17;
3220 
3221 	for (i = 0; i < xdim; i++) {
3222 		if (!info->base.labels) {
3223 			GnmExpr const *expr_offset;
3224 
3225 			if (info->group_by == GROUPED_BY_ROW)
3226 				expr_offset = gnm_expr_new_funcall3
3227 					(fd_offset, gnm_expr_new_constant (value_dup (val_1)),
3228 					 gnm_expr_new_constant (value_new_int (i)),
3229 					 gnm_expr_new_constant (value_new_int (0)));
3230 			else
3231 				expr_offset = gnm_expr_new_funcall3
3232 					(fd_offset, gnm_expr_new_constant (value_dup (val_1)),
3233 					 gnm_expr_new_constant (value_new_int (0)),
3234 					 gnm_expr_new_constant (value_new_int (i)));
3235 
3236 			dao_set_cell_expr (dao, 0, i, gnm_expr_new_funcall3
3237 					   (fd_concatenate, gnm_expr_new_constant (value_new_string (label)),
3238 					    gnm_expr_new_constant (value_new_string (" ")),
3239 					    gnm_expr_new_funcall2
3240 					    (fd_cell,
3241 					     gnm_expr_new_constant (value_new_string (str)),
3242 					     expr_offset)));
3243 		}
3244 
3245 		dao_set_cell_array_expr (dao, 1, i,
3246 					 gnm_expr_new_funcall3
3247 					 (fd_index,
3248 					  gnm_expr_copy (expr_linest),
3249 					  gnm_expr_new_constant (value_new_int (1)),
3250 					  gnm_expr_new_constant (value_new_int (xdim - i))));
3251 		dao_set_cell_array_expr (dao, 2, i,
3252 					 gnm_expr_new_funcall3
3253 					 (fd_index,
3254 					  gnm_expr_copy (expr_linest),
3255 					  gnm_expr_new_constant (value_new_int (2)),
3256 					  gnm_expr_new_constant (value_new_int (xdim - i))));
3257 		dao_set_cell_expr (dao, 3, i, gnm_expr_copy (expr_tstat));
3258 		dao_set_cell_expr (dao, 4, i, gnm_expr_copy (expr_pvalue));
3259 		dao_set_cell_expr (dao, 5, i, gnm_expr_copy (expr_lower));
3260 		dao_set_cell_expr (dao, 6, i, gnm_expr_copy (expr_upper));
3261 	}
3262 
3263 
3264 	gnm_expr_free (expr_linest);
3265 	gnm_expr_free (expr_tstat);
3266 	gnm_expr_free (expr_pvalue);
3267 	gnm_expr_free (expr_lower);
3268 	gnm_expr_free (expr_upper);
3269 
3270 	value_release (val_1_cp);
3271 	value_release (val_2_cp);
3272 
3273 	if (info->residual) {
3274 		gint n_obs = calculate_n_obs (val_1, info->group_by);
3275 		GnmExpr const *expr_diff;
3276 		GnmExpr const *expr_prediction;
3277 
3278 		dao->offset_row += xdim + 1;
3279 		dao_set_italic (dao, 0, 0, xdim + 7, 0);
3280 		dao_set_cell (dao, 0, 0, _("Constant"));
3281 		dao_set_array_expr (dao, 1, 0, xdim, 1,
3282 				    gnm_expr_new_funcall1
3283 				    (fd_transpose,
3284 				     make_rangeref (-1, - xdim - 1, -1, -2)));
3285 		set_cell_text_row (dao, xdim + 1, 0, _("/Prediction"
3286 						       "/"
3287 						       "/Residual"
3288 						       "/Leverages"
3289 						       "/Internally studentized"
3290 						       "/Externally studentized"
3291 						       "/p-Value"));
3292 		dao_set_cell_expr (dao, xdim + 2, 0, make_cellref (1 - xdim, - 18 - xdim));
3293 		if (info->group_by == GROUPED_BY_ROW) {
3294 			dao_set_array_expr (dao, 1, 1, xdim, n_obs,
3295 					    gnm_expr_new_funcall1
3296 					    (fd_transpose,
3297 					     gnm_expr_new_constant (val_1)));
3298 			dao_set_array_expr (dao, xdim + 2, 1, 1, n_obs,
3299 					    gnm_expr_new_funcall1
3300 					    (fd_transpose,
3301 					     gnm_expr_new_constant (val_2)));
3302 		} else {
3303 			dao_set_array_expr (dao, 1, 1, xdim, n_obs,
3304 					    gnm_expr_new_constant (val_1));
3305 			dao_set_array_expr (dao, xdim + 2, 1, 1, n_obs,
3306 					    gnm_expr_new_constant (val_2));
3307 		}
3308 
3309 		expr_prediction =  gnm_expr_new_funcall2 (fd_sumproduct,
3310 							  dao_get_rangeref (dao, 1, - 2 - xdim, 1, - 2),
3311 							  gnm_expr_new_funcall1
3312 							  (fd_transpose, make_rangeref
3313 							   (-1 - xdim, 0, -1, 0)));
3314 		expr_diff = gnm_expr_new_binary (make_cellref (-1, 0), GNM_EXPR_OP_SUB, make_cellref (-2, 0));
3315 
3316 		for (i = 0; i < n_obs; i++) {
3317 			dao_set_cell_expr (dao, xdim + 1, i + 1, gnm_expr_copy (expr_prediction));
3318 			dao_set_cell_expr (dao, xdim + 3, i + 1, gnm_expr_copy (expr_diff));
3319 			dao_set_cell_expr (dao, 0, i + 1, gnm_expr_new_constant (value_new_int (1)));
3320 		}
3321 		gnm_expr_free (expr_diff);
3322 		gnm_expr_free (expr_prediction);
3323 
3324 		if (dao_cell_is_visible (dao, xdim + 4, n_obs)) {
3325 			GnmExpr const *expr_X = dao_get_rangeref (dao, info->intercept ? 0 : 1, 1, xdim, n_obs);
3326 			GnmExpr const *expr_diagonal =
3327 				gnm_expr_new_funcall1
3328 				(fd_leverage, expr_X);
3329 			GnmExpr const *expr_var =
3330 				dao_get_cellref (dao, 3, - 6 - xdim);
3331 			GnmExpr const *expr_int_stud =
3332 				gnm_expr_new_binary
3333 				(make_cellref (-2, 0),
3334 				 GNM_EXPR_OP_DIV,
3335 				 gnm_expr_new_funcall1
3336 				 (fd_sqrt,
3337 				  gnm_expr_new_binary
3338 				  (expr_var,
3339 				   GNM_EXPR_OP_MULT,
3340 				   gnm_expr_new_binary
3341 				   (gnm_expr_new_constant (value_new_int (1)),
3342 				    GNM_EXPR_OP_SUB,
3343 				    make_cellref (-1, 0)))));
3344 			GnmExpr const *expr_ext_stud;
3345 			GnmExpr const *expr_p_val_res;
3346 
3347 			expr_var = gnm_expr_new_binary
3348 				(gnm_expr_new_binary
3349 				 (dao_get_cellref (dao, 2, - 6 - xdim),
3350 				  GNM_EXPR_OP_SUB,
3351 				  gnm_expr_new_binary
3352 				  (make_cellref (-3, 0),
3353 				   GNM_EXPR_OP_EXP,
3354 				   gnm_expr_new_constant (value_new_int (2)))),
3355 				 GNM_EXPR_OP_DIV,
3356 				 gnm_expr_new_binary
3357 				 (dao_get_cellref (dao, 1, - 6 - xdim),
3358 				  GNM_EXPR_OP_SUB,
3359 				  gnm_expr_new_constant (value_new_int (1))));
3360 			expr_ext_stud = gnm_expr_new_binary
3361 				(make_cellref (-3, 0),
3362 				 GNM_EXPR_OP_DIV,
3363 				 gnm_expr_new_funcall1
3364 				 (fd_sqrt,
3365 				  gnm_expr_new_binary
3366 				  (expr_var,
3367 				   GNM_EXPR_OP_MULT,
3368 				   gnm_expr_new_binary
3369 				   (gnm_expr_new_constant (value_new_int (1)),
3370 				    GNM_EXPR_OP_SUB,
3371 				    make_cellref (-2, 0)))));
3372 			expr_p_val_res = gnm_expr_new_funcall3
3373 				(fd_tdist,
3374 				 gnm_expr_new_funcall1
3375 				 (fd_abs,
3376 				  make_cellref (-1, 0)),
3377 				 gnm_expr_new_binary
3378 				 (dao_get_cellref (dao, 1, - 6 - xdim),
3379 				  GNM_EXPR_OP_SUB,
3380 				  gnm_expr_new_constant (value_new_int (1))),
3381 				 gnm_expr_new_constant (value_new_int (2)));
3382 
3383 			dao_set_array_expr (dao, xdim + 4, 1, 1, n_obs, expr_diagonal);
3384 			dao_set_format (dao, xdim + 5, 1, xdim + 6, n_obs, "0.0000");
3385 			dao_set_percent (dao, xdim + 7, 1, xdim + 7, n_obs);
3386 			for (i = 0; i < n_obs; i++){
3387 				dao_set_cell_expr (dao, xdim + 5, i + 1, gnm_expr_copy (expr_int_stud));
3388 				dao_set_cell_expr (dao, xdim + 6, i + 1, gnm_expr_copy (expr_ext_stud));
3389 				dao_set_cell_expr (dao, xdim + 7, i + 1, gnm_expr_copy (expr_p_val_res));
3390 			}
3391 			gnm_expr_free (expr_int_stud);
3392 			gnm_expr_free (expr_ext_stud);
3393 			gnm_expr_free (expr_p_val_res);
3394 		}
3395 	} else {
3396 		value_release (val_1);
3397 		value_release (val_2);
3398 	}
3399 
3400 	gnm_func_dec_usage (fd_linest);
3401 	gnm_func_dec_usage (fd_index);
3402 	gnm_func_dec_usage (fd_fdist);
3403 	gnm_func_dec_usage (fd_sum);
3404 	gnm_func_dec_usage (fd_sqrt);
3405 	gnm_func_dec_usage (fd_tdist);
3406 	gnm_func_dec_usage (fd_abs);
3407 	gnm_func_dec_usage (fd_tinv);
3408 	gnm_func_dec_usage (fd_transpose);
3409 	if (fd_concatenate != NULL)
3410 		gnm_func_dec_usage (fd_concatenate);
3411 	if (fd_cell != NULL)
3412 		gnm_func_dec_usage (fd_cell);
3413 	if (fd_offset != NULL)
3414 		gnm_func_dec_usage (fd_offset);
3415 	if (fd_sumproduct != NULL)
3416 		gnm_func_dec_usage (fd_sumproduct);
3417 	if (fd_leverage != NULL)
3418 		gnm_func_dec_usage (fd_leverage);
3419 
3420 	dao_redraw_respan (dao);
3421 
3422 	return FALSE;
3423 }
3424 
3425 static gboolean
analysis_tool_regression_simple_engine_run(data_analysis_output_t * dao,analysis_tools_data_regression_t * info)3426 analysis_tool_regression_simple_engine_run (data_analysis_output_t *dao,
3427 				     analysis_tools_data_regression_t *info)
3428 {
3429 	GnmFunc *fd_linest  = analysis_tool_get_function ("LINEST", dao);
3430 	GnmFunc *fd_index   = analysis_tool_get_function ("INDEX", dao);
3431 	GnmFunc *fd_fdist   = analysis_tool_get_function ("FDIST", dao);
3432 	GnmFunc *fd_rows    = analysis_tool_get_function ("ROWS", dao);
3433 	GnmFunc *fd_columns = analysis_tool_get_function ("COLUMNS", dao);
3434 
3435 	GSList *inputdata;
3436 	guint row;
3437 
3438 	GnmValue *val_dep = value_dup (info->base.range_2);
3439 	GnmExpr const *expr_intercept
3440 		= gnm_expr_new_constant (value_new_bool (info->intercept));
3441 	GnmExpr const *expr_observ;
3442 	GnmExpr const *expr_val_dep;
3443 
3444 	dao_set_italic (dao, 0, 0, 4, 0);
3445 	dao_set_italic (dao, 0, 2, 5, 2);
3446         set_cell_text_row (dao, 0, 0, info->multiple_y ?
3447 			   _("/SUMMARY OUTPUT"
3448 			     "/"
3449 			     "/Independent Variable"
3450 			     "/"
3451 			     "/Observations") :
3452 			   _("/SUMMARY OUTPUT"
3453 			     "/"
3454 			     "/Response Variable"
3455 			     "/"
3456 			     "/Observations"));
3457         set_cell_text_row (dao, 0, 2, info->multiple_y ?
3458 			   _("/Response Variable"
3459 			     "/R^2"
3460 			     "/Slope"
3461 			     "/Intercept"
3462 			     "/F"
3463 			     "/Significance of F") :
3464 			   _("/Independent Variable"
3465 			     "/R^2"
3466 			     "/Slope"
3467 			     "/Intercept"
3468 			     "/F"
3469 			     "/Significance of F"));
3470 	analysis_tools_write_a_label (val_dep, dao,
3471 				      info->base.labels, info->group_by,
3472 				      3, 0);
3473 
3474 	expr_val_dep = gnm_expr_new_constant (val_dep);
3475 	dao_set_cell_expr (dao, 5, 0, gnm_expr_new_binary (gnm_expr_new_funcall1 (fd_rows, gnm_expr_copy (expr_val_dep)),
3476 							   GNM_EXPR_OP_MULT,
3477 							   gnm_expr_new_funcall1 (fd_columns, gnm_expr_copy (expr_val_dep))));
3478 	expr_observ = dao_get_cellref (dao, 5, 0);
3479 
3480 	for (row = 3, inputdata = info->indep_vars; inputdata != NULL;
3481 	     inputdata = inputdata->next, row++) {
3482 		GnmValue *val_indep = value_dup (inputdata->data);
3483 		GnmExpr const *expr_linest;
3484 
3485 		dao_set_italic (dao, 0, row, 0, row);
3486 		analysis_tools_write_a_label (val_indep, dao,
3487 					      info->base.labels, info->group_by,
3488 					      0, row);
3489 		expr_linest = info->multiple_y ?
3490 			gnm_expr_new_funcall4 (fd_linest,
3491 					       gnm_expr_new_constant (val_indep),
3492 					       gnm_expr_copy (expr_val_dep),
3493 					       gnm_expr_copy (expr_intercept),
3494 					       gnm_expr_new_constant (value_new_bool (TRUE))) :
3495 			gnm_expr_new_funcall4 (fd_linest,
3496 					       gnm_expr_copy (expr_val_dep),
3497 					       gnm_expr_new_constant (val_indep),
3498 					       gnm_expr_copy (expr_intercept),
3499 					       gnm_expr_new_constant (value_new_bool (TRUE)));
3500 		dao_set_cell_array_expr (dao, 1, row,
3501 				 gnm_expr_new_funcall3 (fd_index,
3502 							gnm_expr_copy (expr_linest),
3503 							gnm_expr_new_constant (value_new_int (3)),
3504 							gnm_expr_new_constant (value_new_int (1))));
3505 		dao_set_cell_array_expr (dao, 4, row,
3506 				 gnm_expr_new_funcall3 (fd_index,
3507 							gnm_expr_copy (expr_linest),
3508 							gnm_expr_new_constant (value_new_int (4)),
3509 							gnm_expr_new_constant (value_new_int (1))));
3510 		dao_set_array_expr (dao, 2, row, 2, 1, expr_linest);
3511 
3512 		dao_set_cell_expr (dao, 5, row, gnm_expr_new_funcall3
3513 				   (fd_fdist,
3514 				    make_cellref (-1, 0),
3515 				    gnm_expr_new_constant (value_new_int (1)),
3516 				    gnm_expr_new_binary (gnm_expr_copy (expr_observ),
3517 							 GNM_EXPR_OP_SUB,
3518 							 gnm_expr_new_constant (value_new_int (2)))));
3519 
3520 	}
3521 
3522 	gnm_expr_free (expr_intercept);
3523 	gnm_expr_free (expr_observ);
3524 	gnm_expr_free (expr_val_dep);
3525 
3526 	gnm_func_dec_usage (fd_fdist);
3527 	gnm_func_dec_usage (fd_linest);
3528 	gnm_func_dec_usage (fd_index);
3529 	gnm_func_dec_usage (fd_rows);
3530 	gnm_func_dec_usage (fd_columns);
3531 
3532 	dao_redraw_respan (dao);
3533 
3534 	return FALSE;
3535 }
3536 
3537 gboolean
analysis_tool_regression_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)3538 analysis_tool_regression_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
3539 			    analysis_tool_engine_t selector, gpointer result)
3540 {
3541 	analysis_tools_data_regression_t *info = specs;
3542 
3543 	switch (selector) {
3544 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
3545 		return (dao_command_descriptor (dao, _("Regression (%s)"), result)
3546 			== NULL);
3547 	case TOOL_ENGINE_UPDATE_DAO:
3548 	{
3549 		gint xdim = calculate_xdim (info->base.range_1, info->group_by);
3550 		gint cols, rows;
3551 
3552 		if (info->multiple_regression) {
3553 			cols = 7;
3554 			rows = 17 + xdim;
3555 			info->indep_vars = NULL;
3556 			if (info->residual) {
3557 				gint residual_cols = xdim + 4;
3558 				GnmValue *val = info->base.range_1;
3559 
3560 				rows += 2 + calculate_n_obs (val, info->group_by);
3561 				residual_cols += 4;
3562 				if (cols < residual_cols)
3563 					cols = residual_cols;
3564 			}
3565 		} else {
3566 			info->indep_vars = g_slist_prepend (NULL, info->base.range_1);
3567 			info->base.range_1 = NULL;
3568 			prepare_input_range (&info->indep_vars, info->group_by);
3569 			cols = 6;
3570 			rows = 3 + xdim;
3571 		}
3572 		dao_adjust (dao, cols, rows);
3573 		return FALSE;
3574 	}
3575 	case TOOL_ENGINE_CLEAN_UP:
3576 		range_list_destroy (info->indep_vars);
3577 		info->indep_vars = NULL;
3578 		return analysis_tool_generic_b_clean (specs);
3579 
3580 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
3581 		return FALSE;
3582 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
3583 		dao_prepare_output (NULL, dao, _("Regression"));
3584 		return FALSE;
3585 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
3586 		return dao_format_output (dao, _("Regression"));
3587 	case TOOL_ENGINE_PERFORM_CALC:
3588 	default:
3589 		if (info->multiple_regression)
3590 			return analysis_tool_regression_engine_run (dao, specs);
3591 		else
3592 			return analysis_tool_regression_simple_engine_run (dao, specs);
3593 	}
3594 	return TRUE;  /* We shouldn't get here */
3595 }
3596 
3597 
3598 
3599 /************* Moving Average Tool *****************************************
3600  *
3601  * The moving average tool calculates moving averages of given data
3602  * set.  The results are given in a table which can be printed out in
3603  * a new sheet, in a new workbook, or simply into an existing sheet.
3604  *
3605  **/
3606 
3607 static GnmExpr const *
analysis_tool_moving_average_funcall5(GnmFunc * fd,GnmExpr const * ex,int y,int x,int dy,int dx)3608 analysis_tool_moving_average_funcall5 (GnmFunc *fd, GnmExpr const *ex, int y, int x, int dy, int dx)
3609 {
3610 	GnmExprList *list;
3611 	list = gnm_expr_list_prepend (NULL, gnm_expr_new_constant (value_new_int (dx)));
3612 	list = gnm_expr_list_prepend (list, gnm_expr_new_constant (value_new_int (dy)));
3613 	list = gnm_expr_list_prepend (list, gnm_expr_new_constant (value_new_int (x)));
3614 	list = gnm_expr_list_prepend (list, gnm_expr_new_constant (value_new_int (y)));
3615 	list = gnm_expr_list_prepend (list, gnm_expr_copy (ex));
3616 
3617 	return gnm_expr_new_funcall (fd, list);
3618 }
3619 
3620 static GnmExpr const *
analysis_tool_moving_average_weighted_av(GnmFunc * fd_sum,GnmFunc * fd_in,GnmExpr const * ex,int y,int x,int dy,int dx,int * w)3621 analysis_tool_moving_average_weighted_av (GnmFunc *fd_sum, GnmFunc *fd_in, GnmExpr const *ex,
3622 					  int y, int x, int dy, int dx, int *w)
3623 {
3624 	GnmExprList *list = NULL;
3625 
3626 	while (*w != 0) {
3627 		list = gnm_expr_list_prepend
3628 			(list, gnm_expr_new_binary
3629 			 (gnm_expr_new_constant (value_new_int (*w)),
3630 			  GNM_EXPR_OP_MULT,
3631 			  gnm_expr_new_funcall3 (fd_in, gnm_expr_copy (ex),
3632 						 gnm_expr_new_constant (value_new_int (y)),
3633 						 gnm_expr_new_constant (value_new_int (x)))));
3634 		w++;
3635 		x += dx;
3636 		y += dy;
3637 	}
3638 
3639 	return gnm_expr_new_funcall (fd_sum, list);
3640 }
3641 
3642 static gboolean
analysis_tool_moving_average_engine_run(data_analysis_output_t * dao,analysis_tools_data_moving_average_t * info)3643 analysis_tool_moving_average_engine_run (data_analysis_output_t *dao,
3644 					 analysis_tools_data_moving_average_t *info)
3645 {
3646 	GnmFunc *fd_index = NULL;
3647 	GnmFunc *fd_average;
3648 	GnmFunc *fd_offset;
3649 	GnmFunc *fd_sqrt = NULL;
3650 	GnmFunc *fd_sumxmy2 = NULL;
3651 	GnmFunc *fd_sum = NULL;
3652 	GSList *l;
3653 	gint col = 0;
3654 	gint source;
3655 	SheetObject *so = NULL;
3656 	GogPlot	     *plot = NULL;
3657 
3658 	if (info->base.labels || info->ma_type == moving_average_type_wma
3659 	    || info->ma_type== moving_average_type_spencer_ma) {
3660 		fd_index = gnm_func_lookup_or_add_placeholder ("INDEX");
3661 		gnm_func_inc_usage (fd_index);
3662 	}
3663 	if (info->std_error_flag) {
3664 		fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
3665 		gnm_func_inc_usage (fd_sqrt);
3666 		fd_sumxmy2 = gnm_func_lookup_or_add_placeholder ("SUMXMY2");
3667 		gnm_func_inc_usage (fd_sumxmy2);
3668 	}
3669 	if (moving_average_type_wma == info->ma_type || moving_average_type_spencer_ma == info->ma_type) {
3670 		fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
3671 		gnm_func_inc_usage (fd_sum);
3672 	}
3673 	fd_average = gnm_func_lookup_or_add_placeholder ("AVERAGE");
3674 	gnm_func_inc_usage (fd_average);
3675 	fd_offset = gnm_func_lookup_or_add_placeholder ("OFFSET");
3676 	gnm_func_inc_usage (fd_offset);
3677 
3678 	if (info->show_graph) {
3679 		GogGraph     *graph;
3680 		GogChart     *chart;
3681 
3682 		graph = g_object_new (GOG_TYPE_GRAPH, NULL);
3683 		chart = GOG_CHART (gog_object_add_by_name (GOG_OBJECT (graph), "Chart", NULL));
3684 		plot = gog_plot_new_by_name ("GogLinePlot");
3685 		gog_object_add_by_name (GOG_OBJECT (chart), "Plot", GOG_OBJECT (plot));
3686 		so = sheet_object_graph_new (graph);
3687 		g_object_unref (graph);
3688 	}
3689 
3690 	for (l = info->base.input, source = 1; l; l = l->next, col++, source++) {
3691 		GnmValue *val = value_dup ((GnmValue *)l->data);
3692 		GnmValue *val_c = NULL;
3693 		GnmExpr const *expr_title = NULL;
3694 		GnmExpr const *expr_input = NULL;
3695 		char const *format = NULL;
3696 		gint height;
3697 		gint  x = 0;
3698 		gint  y = 0;
3699 		gint  *mover;
3700 		guint *delta_mover;
3701 		guint delta_x = 1;
3702 		guint delta_y = 1;
3703 		gint row, base;
3704 		Sheet *sheet;
3705 		GnmEvalPos ep;
3706 
3707 		eval_pos_init_sheet (&ep, val->v_range.cell.a.sheet);
3708 
3709 		if (info->base.labels) {
3710 			val_c = value_dup (val);
3711 			switch (info->base.group_by) {
3712 			case GROUPED_BY_ROW:
3713 				val->v_range.cell.a.col++;
3714 				break;
3715 			default:
3716 				val->v_range.cell.a.row++;
3717 				break;
3718 			}
3719 			expr_title = gnm_expr_new_funcall1 (fd_index,
3720 							    gnm_expr_new_constant (val_c));
3721 
3722 			dao_set_italic (dao, col, 0, col, 0);
3723 			dao_set_cell_expr (dao, col, 0, expr_title);
3724 		} else {
3725 			switch (info->base.group_by) {
3726 			case GROUPED_BY_ROW:
3727 				format = _("Row %d");
3728 				break;
3729 			default:
3730 				format = _("Column %d");
3731 				break;
3732 			}
3733 			dao_set_cell_printf (dao, col, 0, format, source);
3734 		}
3735 
3736 		switch (info->base.group_by) {
3737 		case GROUPED_BY_ROW:
3738 			height = value_area_get_width (val, &ep);
3739 			mover = &x;
3740 			delta_mover = &delta_x;
3741 			break;
3742 		default:
3743 			height = value_area_get_height (val, &ep);
3744 			mover = &y;
3745 			delta_mover = &delta_y;
3746 			break;
3747 		}
3748 
3749 		sheet = val->v_range.cell.a.sheet;
3750 		expr_input = gnm_expr_new_constant (val);
3751 
3752 		if  (plot != NULL) {
3753 			GogSeries    *series;
3754 
3755 			series = gog_plot_new_series (plot);
3756 			gog_series_set_dim (series, 1,
3757 					    gnm_go_data_vector_new_expr (sheet,
3758 									 gnm_expr_top_new (gnm_expr_copy (expr_input))),
3759 					    NULL);
3760 
3761 			series = gog_plot_new_series (plot);
3762 			gog_series_set_dim (series, 1,
3763 					    dao_go_data_vector (dao, col, 1, col, height),
3764 					    NULL);
3765 		}
3766 
3767 		switch (info->ma_type) {
3768 		case moving_average_type_central_sma:
3769 		{
3770 			GnmExpr const *expr_offset_last = NULL;
3771 			GnmExpr const *expr_offset = NULL;
3772 			*delta_mover = info->interval;
3773 			(*mover) = 1 - info->interval + info->offset;
3774 			for (row = 1; row <= height; row++, (*mover)++) {
3775 				expr_offset_last = expr_offset;
3776 				expr_offset = NULL;
3777 				if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3778 					expr_offset = gnm_expr_new_funcall1
3779 						(fd_average, analysis_tool_moving_average_funcall5
3780 						 (fd_offset,expr_input, y, x, delta_y, delta_x));
3781 
3782 					if (expr_offset_last == NULL)
3783 						dao_set_cell_na (dao, col, row);
3784 					else
3785 						dao_set_cell_expr (dao, col, row,
3786 								   gnm_expr_new_funcall2 (fd_average, expr_offset_last,
3787 											  gnm_expr_copy (expr_offset)));
3788 				} else {
3789 					if (expr_offset_last != NULL) {
3790 						gnm_expr_free (expr_offset_last);
3791 						expr_offset_last = NULL;
3792 					}
3793 					dao_set_cell_na (dao, col, row);
3794 				}
3795 			}
3796 			base = info->interval - info->offset;
3797 		}
3798 		break;
3799 		case moving_average_type_cma:
3800 			for (row = 1; row <= height; row++) {
3801 				GnmExpr const *expr_offset;
3802 
3803 				*delta_mover = row;
3804 
3805 				expr_offset = analysis_tool_moving_average_funcall5
3806 					 (fd_offset, expr_input, y, x, delta_y, delta_x);
3807 
3808 				dao_set_cell_expr (dao, col, row,
3809 						   gnm_expr_new_funcall1 (fd_average, expr_offset));
3810 			}
3811 			base = 0;
3812 			break;
3813 		case moving_average_type_wma:
3814 		{
3815 			GnmExpr const *expr_divisor = gnm_expr_new_constant
3816 				(value_new_int((info->interval * (info->interval + 1))/2));
3817 			int *w = g_new (int, (info->interval + 1));
3818 			int i;
3819 
3820 			for (i = 0; i < info->interval; i++)
3821 				w[i] = i+1;
3822 			w[info->interval] = 0;
3823 
3824 			delta_x = 0;
3825 			delta_y= 0;
3826 			(*delta_mover) = 1;
3827 			(*mover) = 1 - info->interval;
3828 			for (row = 1; row <= height; row++, (*mover)++) {
3829 				if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3830 					GnmExpr const *expr_sum;
3831 
3832 					expr_sum = analysis_tool_moving_average_weighted_av
3833 						(fd_sum, fd_index, expr_input, y+1, x+1, delta_y, delta_x, w);
3834 
3835 					dao_set_cell_expr (dao, col, row,
3836 							   gnm_expr_new_binary
3837 							   (expr_sum,
3838 							    GNM_EXPR_OP_DIV,
3839 							    gnm_expr_copy (expr_divisor)));
3840 				} else
3841 					dao_set_cell_na (dao, col, row);
3842 			}
3843 			g_free (w);
3844 			gnm_expr_free (expr_divisor);
3845 			base =  info->interval - 1;
3846 			delta_x = 1;
3847 			delta_y= 1;
3848 		}
3849 		break;
3850 		case moving_average_type_spencer_ma:
3851 		{
3852 			GnmExpr const *expr_divisor = gnm_expr_new_constant
3853 				(value_new_int(-3-6-5+3+21+45+67+74+67+46+21+3-5-6-3));
3854 			int w[] = {-3, -6, -5, 3, 21, 45, 67, 74, 67, 46, 21, 3, -5, -6, -3, 0};
3855 
3856 			delta_x = 0;
3857 			delta_y= 0;
3858 			(*delta_mover) = 1;
3859 			(*mover) = 1 - info->interval + info->offset;
3860 			for (row = 1; row <= height; row++, (*mover)++) {
3861 				if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3862 					GnmExpr const *expr_sum;
3863 
3864 					expr_sum = analysis_tool_moving_average_weighted_av
3865 						(fd_sum, fd_index, expr_input, y+1, x+1, delta_y, delta_x, w);
3866 
3867 					dao_set_cell_expr (dao, col, row,
3868 							   gnm_expr_new_binary
3869 							   (expr_sum,
3870 							    GNM_EXPR_OP_DIV,
3871 							    gnm_expr_copy (expr_divisor)));
3872 				} else
3873 					dao_set_cell_na (dao, col, row);
3874 			}
3875 			gnm_expr_free (expr_divisor);
3876 			base =  info->interval - info->offset - 1;
3877 			delta_x = 1;
3878 			delta_y= 1;
3879 		}
3880 		break;
3881 		default:
3882 			(*delta_mover) = info->interval;
3883 			(*mover) = 1 - info->interval + info->offset;
3884 			for (row = 1; row <= height; row++, (*mover)++) {
3885 				if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3886 					GnmExpr const *expr_offset;
3887 
3888 					expr_offset = analysis_tool_moving_average_funcall5
3889 						(fd_offset, expr_input, y, x, delta_y, delta_x);
3890 					dao_set_cell_expr (dao, col, row,
3891 							   gnm_expr_new_funcall1 (fd_average, expr_offset));
3892 				} else
3893 					dao_set_cell_na (dao, col, row);
3894 			}
3895 			base =  info->interval - info->offset - 1;
3896 			break;
3897 		}
3898 
3899 		if (info->std_error_flag) {
3900 			col++;
3901 			dao_set_italic (dao, col, 0, col, 0);
3902 			dao_set_cell (dao, col, 0, _("Standard Error"));
3903 
3904 			(*mover) = base;
3905 			for (row = 1; row <= height; row++) {
3906 				if (row > base && row <= height - info->offset && (row - base - info->df) > 0) {
3907 					GnmExpr const *expr_offset;
3908 
3909 					if (info->base.group_by == GROUPED_BY_ROW)
3910 						delta_x = row - base;
3911 					else
3912 						delta_y = row - base;
3913 
3914 					expr_offset = analysis_tool_moving_average_funcall5
3915 						(fd_offset, expr_input, y, x, delta_y, delta_x);
3916 					dao_set_cell_expr (dao, col, row,
3917 							   gnm_expr_new_funcall1
3918 							   (fd_sqrt,
3919 							    gnm_expr_new_binary
3920 							    (gnm_expr_new_funcall2
3921 							     (fd_sumxmy2,
3922 							      expr_offset,
3923 							      make_rangeref (-1, - row + base + 1, -1, 0)),
3924 							     GNM_EXPR_OP_DIV,
3925 							     gnm_expr_new_constant (value_new_int
3926 										    (row - base - info->df)))));
3927 				} else
3928 					dao_set_cell_na (dao, col, row);
3929 			}
3930 		}
3931 
3932 		gnm_expr_free (expr_input);
3933 	}
3934 
3935 	if (so != NULL)
3936 		dao_set_sheet_object (dao, 0, 1, so);
3937 
3938 	if (fd_index != NULL)
3939 		gnm_func_dec_usage (fd_index);
3940 	if (fd_sqrt != NULL)
3941 		gnm_func_dec_usage (fd_sqrt);
3942 	if (fd_sumxmy2 != NULL)
3943 		gnm_func_dec_usage (fd_sumxmy2);
3944 	if (fd_sum != NULL)
3945 		gnm_func_dec_usage (fd_sum);
3946 	gnm_func_dec_usage (fd_average);
3947 	gnm_func_dec_usage (fd_offset);
3948 
3949 	dao_redraw_respan (dao);
3950 
3951 	return FALSE;
3952 }
3953 
3954 
3955 gboolean
analysis_tool_moving_average_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)3956 analysis_tool_moving_average_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
3957 			      analysis_tool_engine_t selector, gpointer result)
3958 {
3959 	analysis_tools_data_moving_average_t *info = specs;
3960 
3961 	switch (selector) {
3962 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
3963 		return (dao_command_descriptor (dao, _("Moving Average (%s)"), result)
3964 			== NULL);
3965 	case TOOL_ENGINE_UPDATE_DAO:
3966 		prepare_input_range (&info->base.input, info->base.group_by);
3967 		dao_adjust (dao, (info->std_error_flag ? 2 : 1) *
3968 			    g_slist_length (info->base.input),
3969 			    1 + analysis_tool_calc_length (specs));
3970 		return FALSE;
3971 	case TOOL_ENGINE_CLEAN_UP:
3972 		return analysis_tool_generic_clean (specs);
3973 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
3974 		return FALSE;
3975 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
3976 		dao_prepare_output (NULL, dao, _("Moving Average"));
3977 		return FALSE;
3978 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
3979 		return dao_format_output (dao, _("Moving Average"));
3980 	case TOOL_ENGINE_PERFORM_CALC:
3981 	default:
3982 		return analysis_tool_moving_average_engine_run (dao, specs);
3983 	}
3984 	return TRUE;  /* We shouldn't get here */
3985 }
3986 
3987 
3988 /************* Rank and Percentile Tool ************************************
3989  *
3990  * The results are given in a table which can be printed out in a new
3991  * sheet, in a new workbook, or simply into an existing sheet.
3992  *
3993  **/
3994 
3995 static gboolean
analysis_tool_ranking_engine_run(data_analysis_output_t * dao,analysis_tools_data_ranking_t * info)3996 analysis_tool_ranking_engine_run (data_analysis_output_t *dao,
3997 				      analysis_tools_data_ranking_t *info)
3998 {
3999 	GSList *data = info->base.input;
4000 	int col = 0;
4001 
4002 	GnmFunc *fd_large;
4003 	GnmFunc *fd_row;
4004 	GnmFunc *fd_rank;
4005 	GnmFunc *fd_match;
4006 	GnmFunc *fd_percentrank;
4007 
4008 	fd_large = gnm_func_lookup_or_add_placeholder ("LARGE");
4009 	gnm_func_inc_usage (fd_large);
4010 	fd_row = gnm_func_lookup_or_add_placeholder ("ROW");
4011 	gnm_func_inc_usage (fd_row);
4012 	fd_rank = gnm_func_lookup_or_add_placeholder ("RANK");
4013 	gnm_func_inc_usage (fd_rank);
4014 	fd_match = gnm_func_lookup_or_add_placeholder ("MATCH");
4015 	gnm_func_inc_usage (fd_match);
4016 	fd_percentrank = gnm_func_lookup_or_add_placeholder ("PERCENTRANK");
4017 	gnm_func_inc_usage (fd_percentrank);
4018 
4019 	dao_set_merge (dao, 0, 0, 1, 0);
4020 	dao_set_italic (dao, 0, 0, 0, 0);
4021 	dao_set_cell (dao, 0, 0, _("Ranks & Percentiles"));
4022 
4023 	for (; data; data = data->next, col++) {
4024 		GnmValue *val_org = value_dup (data->data);
4025 		GnmExpr const *expr_large;
4026 		GnmExpr const *expr_rank;
4027 		GnmExpr const *expr_position;
4028 		GnmExpr const *expr_percentile;
4029 		int rows, i;
4030 
4031 		dao_set_italic (dao, 0, 1, 3, 1);
4032 		dao_set_cell (dao, 0, 1, _("Point"));
4033 		dao_set_cell (dao, 2, 1, _("Rank"));
4034 		dao_set_cell (dao, 3, 1, _("Percentile Rank"));
4035 		analysis_tools_write_label (val_org, dao, &info->base, 1, 1, col + 1);
4036 
4037 		rows = (val_org->v_range.cell.b.row - val_org->v_range.cell.a.row + 1) *
4038 			(val_org->v_range.cell.b.col - val_org->v_range.cell.a.col + 1);
4039 
4040 		expr_large = gnm_expr_new_funcall2
4041 			(fd_large, gnm_expr_new_constant (value_dup (val_org)),
4042 			 gnm_expr_new_binary (gnm_expr_new_binary
4043 					      (gnm_expr_new_funcall (fd_row, NULL),
4044 					       GNM_EXPR_OP_SUB,
4045 					       gnm_expr_new_funcall1
4046 					       (fd_row, dao_get_cellref (dao, 1, 2))),
4047 					      GNM_EXPR_OP_ADD,
4048 					      gnm_expr_new_constant (value_new_int (1))));
4049 		dao_set_array_expr (dao, 1, 2, 1, rows, gnm_expr_copy (expr_large));
4050 
4051 		/* If there are ties the following will only give us the first occurrence... */
4052 		expr_position = gnm_expr_new_funcall3 (fd_match, expr_large,
4053 						       gnm_expr_new_constant (value_dup (val_org)),
4054 						       gnm_expr_new_constant (value_new_int (0)));
4055 
4056 		dao_set_array_expr (dao, 0, 2, 1, rows, expr_position);
4057 
4058 		expr_rank = gnm_expr_new_funcall2 (fd_rank,
4059 						   make_cellref (-1,0),
4060 						   gnm_expr_new_constant (value_dup (val_org)));
4061 		if (info->av_ties) {
4062 			GnmExpr const *expr_rank_lower;
4063 			GnmExpr const *expr_rows_p_one;
4064 			GnmExpr const *expr_rows;
4065 			GnmFunc *fd_count;
4066 			fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
4067 			gnm_func_inc_usage (fd_count);
4068 
4069 			expr_rows = gnm_expr_new_funcall1
4070 				(fd_count, gnm_expr_new_constant (value_dup (val_org)));
4071 			expr_rows_p_one = gnm_expr_new_binary
4072 				(expr_rows,
4073 				 GNM_EXPR_OP_ADD,
4074 				 gnm_expr_new_constant (value_new_int (1)));
4075 			expr_rank_lower = gnm_expr_new_funcall3
4076 				(fd_rank,
4077 				 make_cellref (-1,0),
4078 				 gnm_expr_new_constant (value_dup (val_org)),
4079 				 gnm_expr_new_constant (value_new_int (1)));
4080 			expr_rank = gnm_expr_new_binary
4081 				(gnm_expr_new_binary
4082 				 (gnm_expr_new_binary (expr_rank, GNM_EXPR_OP_SUB, expr_rank_lower),
4083 				  GNM_EXPR_OP_ADD, expr_rows_p_one),
4084 				 GNM_EXPR_OP_DIV,
4085 				 gnm_expr_new_constant (value_new_int (2)));
4086 
4087 			gnm_func_dec_usage (fd_count);
4088 		}
4089 		expr_percentile = gnm_expr_new_funcall3 (fd_percentrank,
4090 							 gnm_expr_new_constant (value_dup (val_org)),
4091 							 make_cellref (-2,0),
4092 							 gnm_expr_new_constant (value_new_int (10)));
4093 
4094 		dao_set_percent (dao, 3, 2, 3, 1 + rows);
4095 		for (i = 2; i < rows + 2; i++) {
4096 			dao_set_cell_expr ( dao, 2, i, gnm_expr_copy (expr_rank));
4097 			dao_set_cell_expr ( dao, 3, i, gnm_expr_copy (expr_percentile));
4098 		}
4099 
4100 
4101 		dao->offset_col += 4;
4102 		value_release (val_org);
4103 		gnm_expr_free (expr_rank);
4104 		gnm_expr_free (expr_percentile);
4105 	}
4106 
4107 	gnm_func_dec_usage (fd_large);
4108 	gnm_func_dec_usage (fd_row);
4109 	gnm_func_dec_usage (fd_rank);
4110 	gnm_func_dec_usage (fd_match);
4111 	gnm_func_dec_usage (fd_percentrank);
4112 
4113 	dao_redraw_respan (dao);
4114 
4115 	return FALSE;
4116 }
4117 
4118 gboolean
analysis_tool_ranking_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)4119 analysis_tool_ranking_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
4120 			      analysis_tool_engine_t selector, gpointer result)
4121 {
4122 	analysis_tools_data_ranking_t *info = specs;
4123 
4124 	switch (selector) {
4125 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
4126 		return (dao_command_descriptor (dao, _("Ranks (%s)"), result)
4127 			== NULL);
4128 	case TOOL_ENGINE_UPDATE_DAO:
4129 		prepare_input_range (&info->base.input, info->base.group_by);
4130 		dao_adjust (dao, 4 * g_slist_length (info->base.input),
4131 			    2 + analysis_tool_calc_length (specs));
4132 		return FALSE;
4133 	case TOOL_ENGINE_CLEAN_UP:
4134 		return analysis_tool_generic_clean (specs);
4135 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
4136 		return FALSE;
4137 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
4138 		dao_prepare_output (NULL, dao, _("Ranks"));
4139 		return FALSE;
4140 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
4141 		return dao_format_output (dao, _("Ranks"));
4142 	case TOOL_ENGINE_PERFORM_CALC:
4143 	default:
4144 		return analysis_tool_ranking_engine_run (dao, specs);
4145 	}
4146 	return TRUE;  /* We shouldn't get here */
4147 }
4148 
4149 
4150 
4151 
4152 /************* Anova: Single Factor Tool **********************************
4153  *
4154  * The results are given in a table which can be printed out in a new
4155  * sheet, in a new workbook, or simply into an existing sheet.
4156  *
4157  **/
4158 
4159 static gboolean
analysis_tool_anova_single_engine_run(data_analysis_output_t * dao,gpointer specs)4160 analysis_tool_anova_single_engine_run (data_analysis_output_t *dao, gpointer specs)
4161 {
4162 	analysis_tools_data_anova_single_t *info = specs;
4163 	GSList *inputdata = info->base.input;
4164 	GnmFunc *fd_sum;
4165 	GnmFunc *fd_count;
4166 	GnmFunc *fd_mean;
4167 	GnmFunc *fd_var;
4168 	GnmFunc *fd_devsq;
4169 
4170 	guint index;
4171 
4172 	dao_set_italic (dao, 0, 0, 0, 2);
4173 	dao_set_cell (dao, 0, 0, _("Anova: Single Factor"));
4174 	dao_set_cell (dao, 0, 2, _("SUMMARY"));
4175 
4176 	dao_set_italic (dao, 0, 3, 4, 3);
4177 	set_cell_text_row (dao, 0, 3, _("/Groups"
4178 					"/Count"
4179 					"/Sum"
4180 					"/Average"
4181 					"/Variance"));
4182 
4183 	fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
4184 	gnm_func_inc_usage (fd_mean);
4185 	fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
4186 	gnm_func_inc_usage (fd_var);
4187 	fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
4188 	gnm_func_inc_usage (fd_sum);
4189 	fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
4190 	gnm_func_inc_usage (fd_count);
4191 	fd_devsq = gnm_func_lookup_or_add_placeholder ("DEVSQ");
4192 	gnm_func_inc_usage (fd_devsq);
4193 
4194 	dao->offset_row += 4;
4195 	if (dao->rows <= dao->offset_row)
4196 		goto finish_anova_single_factor_tool;
4197 
4198 	/* SUMMARY */
4199 
4200 	for (index = 0; inputdata != NULL;
4201 	     inputdata = inputdata->next, index++) {
4202 		GnmValue *val_org = value_dup (inputdata->data);
4203 
4204 		/* Label */
4205 		dao_set_italic (dao, 0, index, 0, index);
4206 		analysis_tools_write_label (val_org, dao, &info->base,
4207 					    0, index, index + 1);
4208 
4209 		/* Count */
4210 		dao_set_cell_expr
4211 			(dao, 1, index,
4212 			 gnm_expr_new_funcall1
4213 			 (fd_count,
4214 			  gnm_expr_new_constant (value_dup (val_org))));
4215 
4216 		/* Sum */
4217 		dao_set_cell_expr
4218 			(dao, 2, index,
4219 			 gnm_expr_new_funcall1
4220 			 (fd_sum,
4221 			  gnm_expr_new_constant (value_dup (val_org))));
4222 
4223 		/* Average */
4224 		dao_set_cell_expr
4225 			(dao, 3, index,
4226 			 gnm_expr_new_funcall1
4227 			 (fd_mean,
4228 			  gnm_expr_new_constant (value_dup (val_org))));
4229 
4230 		/* Variance */
4231 		dao_set_cell_expr
4232 			(dao, 4, index,
4233 			 gnm_expr_new_funcall1
4234 			 (fd_var,
4235 			  gnm_expr_new_constant (val_org)));
4236 
4237 	}
4238 
4239 	dao->offset_row += index + 2;
4240 	if (dao->rows <= dao->offset_row)
4241 		goto finish_anova_single_factor_tool;
4242 
4243 
4244 	dao_set_italic (dao, 0, 0, 0, 4);
4245 	set_cell_text_col (dao, 0, 0, _("/ANOVA"
4246 					"/Source of Variation"
4247 					"/Between Groups"
4248 					"/Within Groups"
4249 					"/Total"));
4250 	dao_set_italic (dao, 1, 1, 6, 1);
4251 	set_cell_text_row (dao, 1, 1, _("/SS"
4252 					"/df"
4253 					"/MS"
4254 					"/F"
4255 					"/P-value"
4256 					"/F critical"));
4257 
4258 	/* ANOVA */
4259 	{
4260 		GnmExprList *sum_wdof_args = NULL;
4261 		GnmExprList *sum_tdof_args = NULL;
4262 		GnmExprList *arg_ss_total = NULL;
4263 		GnmExprList *arg_ss_within = NULL;
4264 
4265 		GnmExpr const *expr_wdof = NULL;
4266 		GnmExpr const *expr_ss_total = NULL;
4267 		GnmExpr const *expr_ss_within = NULL;
4268 
4269 		for (inputdata = info->base.input; inputdata != NULL;
4270 		     inputdata = inputdata->next) {
4271 			GnmValue *val_org = value_dup (inputdata->data);
4272 			GnmExpr const *expr_one;
4273 			GnmExpr const *expr_count_one;
4274 
4275 			analysis_tools_remove_label (val_org,
4276 						     info->base.labels,
4277 						     info->base.group_by);
4278 			expr_one = gnm_expr_new_constant (value_dup (val_org));
4279 
4280 			arg_ss_total =  gnm_expr_list_append
4281 				(arg_ss_total,
4282 				 gnm_expr_new_constant (val_org));
4283 
4284 			arg_ss_within = gnm_expr_list_append
4285 				(arg_ss_within,
4286 				 gnm_expr_new_funcall1
4287 				 (fd_devsq, gnm_expr_copy (expr_one)));
4288 
4289 			expr_count_one =
4290 				gnm_expr_new_funcall1 (fd_count, expr_one);
4291 
4292 			sum_wdof_args = gnm_expr_list_append
4293 				(sum_wdof_args,
4294 				 gnm_expr_new_binary(
4295 					 gnm_expr_copy (expr_count_one),
4296 					 GNM_EXPR_OP_SUB,
4297 					 gnm_expr_new_constant
4298 					 (value_new_int (1))));
4299 			sum_tdof_args = gnm_expr_list_append
4300 				(sum_tdof_args,
4301 				 expr_count_one);
4302 		}
4303 
4304 		expr_ss_total = gnm_expr_new_funcall
4305 			(fd_devsq, arg_ss_total);
4306 		expr_ss_within = gnm_expr_new_funcall
4307 			(fd_sum, arg_ss_within);
4308 
4309 		{
4310 			/* SS between groups */
4311 			GnmExpr const *expr_ss_between;
4312 
4313 			if (dao_cell_is_visible (dao, 1,4)) {
4314 				expr_ss_between = gnm_expr_new_binary
4315 					(make_cellref (0, 2),
4316 					 GNM_EXPR_OP_SUB,
4317 					 make_cellref (0, 1));
4318 
4319 			} else {
4320 				expr_ss_between = gnm_expr_new_binary
4321 					(gnm_expr_copy (expr_ss_total),
4322 					 GNM_EXPR_OP_SUB,
4323 					 gnm_expr_copy (expr_ss_within));
4324 			}
4325 			dao_set_cell_expr (dao, 1, 2, expr_ss_between);
4326 		}
4327 		{
4328 			/* SS within groups */
4329 			dao_set_cell_expr (dao, 1, 3, gnm_expr_copy (expr_ss_within));
4330 		}
4331 		{
4332 			/* SS total groups */
4333 			dao_set_cell_expr (dao, 1, 4, expr_ss_total);
4334 		}
4335 		{
4336 			/* Between groups degrees of freedom */
4337 			dao_set_cell_int (dao, 2, 2,
4338 					  g_slist_length (info->base.input) - 1);
4339 		}
4340 		{
4341 			/* Within groups degrees of freedom */
4342 			expr_wdof = gnm_expr_new_funcall (fd_sum, sum_wdof_args);
4343 			dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_wdof));
4344 		}
4345 		{
4346 			/* Total degrees of freedom */
4347 			GnmExpr const *expr_tdof =
4348 				gnm_expr_new_binary
4349 				(gnm_expr_new_funcall (fd_sum, sum_tdof_args),
4350 				 GNM_EXPR_OP_SUB,
4351 				 gnm_expr_new_constant (value_new_int (1)));
4352 			dao_set_cell_expr (dao, 2, 4, expr_tdof);
4353 		}
4354 		{
4355 			/* MS values */
4356 			GnmExpr const *expr_ms =
4357 				gnm_expr_new_binary
4358 				(make_cellref (-2, 0),
4359 				 GNM_EXPR_OP_DIV,
4360 				 make_cellref (-1, 0));
4361 			dao_set_cell_expr (dao, 3, 2, gnm_expr_copy (expr_ms));
4362 			dao_set_cell_expr (dao, 3, 3, expr_ms);
4363 		}
4364 		{
4365 			/* Observed F */
4366 			GnmExpr const *expr_denom;
4367 			GnmExpr const *expr_f;
4368 
4369 			if (dao_cell_is_visible (dao, 3, 3)) {
4370 				expr_denom = make_cellref (-1, 1);
4371 				gnm_expr_free (expr_ss_within);
4372 			} else {
4373 				expr_denom = gnm_expr_new_binary
4374 					(expr_ss_within,
4375 					 GNM_EXPR_OP_DIV,
4376 					 gnm_expr_copy (expr_wdof));
4377 			}
4378 
4379 			expr_f = gnm_expr_new_binary
4380 				(make_cellref (-1, 0),
4381 				 GNM_EXPR_OP_DIV,
4382 				 expr_denom);
4383 			dao_set_cell_expr(dao, 4, 2, expr_f);
4384 		}
4385 		{
4386 			/* P value */
4387 			GnmFunc *fd_fdist;
4388 			const GnmExpr *arg1;
4389 			const GnmExpr *arg2;
4390 			const GnmExpr *arg3;
4391 
4392 			arg1 = make_cellref (-1, 0);
4393 			arg2 = make_cellref (-3, 0);
4394 
4395 			if (dao_cell_is_visible (dao, 2, 3)) {
4396 				arg3 = make_cellref (-3, 1);
4397 			} else {
4398 				arg3 = gnm_expr_copy (expr_wdof);
4399 			}
4400 
4401 			fd_fdist = gnm_func_lookup_or_add_placeholder ("FDIST");
4402 			gnm_func_inc_usage (fd_fdist);
4403 
4404 			dao_set_cell_expr
4405 				(dao, 5, 2,
4406 				 gnm_expr_new_funcall3
4407 				 (fd_fdist,
4408 				  arg1, arg2, arg3));
4409 			if (fd_fdist)
4410 				gnm_func_dec_usage (fd_fdist);
4411 		}
4412 		{
4413 			/* Critical F*/
4414 			GnmFunc *fd_finv;
4415 			const GnmExpr *arg3;
4416 
4417 			if (dao_cell_is_visible (dao, 2, 3)) {
4418 				arg3 = make_cellref (-4, 1);
4419 				gnm_expr_free (expr_wdof);
4420 			} else
4421 				arg3 = expr_wdof;
4422 
4423 			fd_finv = gnm_func_lookup_or_add_placeholder ("FINV");
4424 			gnm_func_inc_usage (fd_finv);
4425 
4426 			dao_set_cell_expr
4427 				(dao, 6, 2,
4428 				 gnm_expr_new_funcall3
4429 				 (fd_finv,
4430 				  gnm_expr_new_constant
4431 				  (value_new_float (info->alpha)),
4432 				  make_cellref (-4, 0),
4433 				  arg3));
4434 			gnm_func_dec_usage (fd_finv);
4435 		}
4436 	}
4437 
4438 finish_anova_single_factor_tool:
4439 
4440 	gnm_func_dec_usage (fd_mean);
4441 	gnm_func_dec_usage (fd_var);
4442 	gnm_func_dec_usage (fd_sum);
4443 	gnm_func_dec_usage (fd_count);
4444 	gnm_func_dec_usage (fd_devsq);
4445 
4446 	dao->offset_row = 0;
4447 	dao->offset_col = 0;
4448 
4449 	dao_redraw_respan (dao);
4450         return FALSE;
4451 }
4452 
4453 
4454 
4455 gboolean
analysis_tool_anova_single_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)4456 analysis_tool_anova_single_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
4457 				   analysis_tool_engine_t selector, gpointer result)
4458 {
4459 	analysis_tools_data_anova_single_t *info = specs;
4460 
4461 	switch (selector) {
4462 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
4463 		return (dao_command_descriptor (dao, _("Single Factor ANOVA (%s)"), result)
4464 			== NULL);
4465 	case TOOL_ENGINE_UPDATE_DAO:
4466 		prepare_input_range (&info->base.input, info->base.group_by);
4467 		dao_adjust (dao, 7, 11 + g_slist_length (info->base.input));
4468 		return FALSE;
4469 	case TOOL_ENGINE_CLEAN_UP:
4470 		return analysis_tool_generic_clean (specs);
4471 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
4472 		return FALSE;
4473 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
4474 		dao_prepare_output (NULL, dao, _("Anova"));
4475 		return FALSE;
4476 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
4477 		return dao_format_output (dao, _("Single Factor ANOVA"));
4478 	case TOOL_ENGINE_PERFORM_CALC:
4479 	default:
4480 		return analysis_tool_anova_single_engine_run (dao, specs);
4481 	}
4482 	return TRUE;  /* We shouldn't get here */
4483 }
4484 
4485 
4486 /************* Fourier Analysis Tool **************************************
4487  *
4488  * This tool performes a fast fourier transform calculating the fourier
4489  * transform as defined in Weaver: Theory of dis and cont Fouriere Analysis
4490  *
4491  *
4492  **/
4493 
4494 
4495 static gboolean
analysis_tool_fourier_engine_run(data_analysis_output_t * dao,analysis_tools_data_fourier_t * info)4496 analysis_tool_fourier_engine_run (data_analysis_output_t *dao,
4497 				  analysis_tools_data_fourier_t *info)
4498 {
4499 	GSList *data = info->base.input;
4500 	int col = 0;
4501 
4502 	GnmFunc *fd_fourier;
4503 
4504 	fd_fourier = gnm_func_lookup_or_add_placeholder ("FOURIER");
4505 	gnm_func_inc_usage (fd_fourier);
4506 
4507 	dao_set_merge (dao, 0, 0, 1, 0);
4508 	dao_set_italic (dao, 0, 0, 0, 0);
4509 	dao_set_cell (dao, 0, 0, info->inverse ? _("Inverse Fourier Transform")
4510 		      : _("Fourier Transform"));
4511 
4512 	for (; data; data = data->next, col++) {
4513 		GnmValue *val_org = value_dup (data->data);
4514 		GnmExpr const *expr_fourier;
4515 		int rows, n;
4516 
4517 		dao_set_italic (dao, 0, 1, 1, 2);
4518 		set_cell_text_row (dao, 0, 2, _("/Real"
4519 						"/Imaginary"));
4520 		dao_set_merge (dao, 0, 1, 1, 1);
4521 		analysis_tools_write_label (val_org, dao, &info->base, 0, 1, col + 1);
4522 
4523 		n = (val_org->v_range.cell.b.row - val_org->v_range.cell.a.row + 1) *
4524 			(val_org->v_range.cell.b.col - val_org->v_range.cell.a.col + 1);
4525 		rows = 1;
4526 		while (rows < n)
4527 			rows *= 2;
4528 
4529 		expr_fourier = gnm_expr_new_funcall3
4530 			(fd_fourier,
4531 			 gnm_expr_new_constant (val_org),
4532 			 gnm_expr_new_constant (value_new_bool (info->inverse)),
4533 			 gnm_expr_new_constant (value_new_bool (TRUE)));
4534 
4535 		dao_set_array_expr (dao, 0, 3, 2, rows, expr_fourier);
4536 
4537 		dao->offset_col += 2;
4538 	}
4539 
4540 	gnm_func_dec_usage (fd_fourier);
4541 
4542 	dao_redraw_respan (dao);
4543 
4544 	return FALSE;
4545 }
4546 
4547 static int
analysis_tool_fourier_calc_length(analysis_tools_data_fourier_t * info)4548 analysis_tool_fourier_calc_length (analysis_tools_data_fourier_t *info)
4549 {
4550 	int m = 1, n = analysis_tool_calc_length (&info->base);
4551 
4552 	while (m < n)
4553 		m *= 2;
4554 	return m;
4555 }
4556 
4557 
4558 gboolean
analysis_tool_fourier_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)4559 analysis_tool_fourier_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
4560 			      analysis_tool_engine_t selector, gpointer result)
4561 {
4562 	analysis_tools_data_fourier_t *info = specs;
4563 
4564 	switch (selector) {
4565 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
4566 		return (dao_command_descriptor (dao, _("Fourier Series (%s)"), result)
4567 			== NULL);
4568 	case TOOL_ENGINE_UPDATE_DAO:
4569 		prepare_input_range (&info->base.input, info->base.group_by);
4570 		dao_adjust (dao, 2 * g_slist_length (info->base.input),
4571 			    3 + analysis_tool_fourier_calc_length (specs));
4572 		return FALSE;
4573 	case TOOL_ENGINE_CLEAN_UP:
4574 		return analysis_tool_generic_clean (specs);
4575 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
4576 		return FALSE;
4577 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
4578 		dao_prepare_output (NULL, dao, _("Fourier Series"));
4579 		return FALSE;
4580 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
4581 		return dao_format_output (dao, _("Fourier Series"));
4582 	case TOOL_ENGINE_PERFORM_CALC:
4583 	default:
4584 		return analysis_tool_fourier_engine_run (dao, specs);
4585 	}
4586 	return TRUE;  /* We shouldn't get here */
4587 }
4588