1 /* vm: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * analysis-histogram.c:
4  *
5   * This is a complete reimplementation of the histogram tool in 2008
6  *
7  * Author:
8  *   Andreas J. Guelzow  <aguelzow@pyrshep.ca>
9  *
10  * (C) Copyright 2008 by Andreas J. Guelzow  <aguelzow@pyrshep.ca>
11  *
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation; either version 2 of the License, or
16  * (at your option) any later version.
17  *
18  * This program is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, see <https://www.gnu.org/licenses/>.
25  */
26 
27 #include <gnumeric-config.h>
28 #include <glib/gi18n-lib.h>
29 #include <gnumeric.h>
30 #include <tools/analysis-histogram.h>
31 #include <tools/analysis-tools.h>
32 #include <value.h>
33 #include <ranges.h>
34 #include <expr.h>
35 #include <func.h>
36 #include <numbers.h>
37 #include <sheet-object-graph.h>
38 #include <goffice/goffice.h>
39 #include <sheet.h>
40 
41 static GnmExpr const *
make_hist_expr(analysis_tools_data_histogram_t * info,int col,GnmValue * val,gboolean fromminf,gboolean topinf,data_analysis_output_t * dao)42 make_hist_expr (analysis_tools_data_histogram_t *info,
43 		int col, GnmValue *val,
44 		gboolean fromminf, gboolean topinf,
45 		data_analysis_output_t *dao)
46 {
47 	GnmExpr const *expr;
48 	GnmExpr const *expr_data;
49 	GnmExpr const *expr_if_to, *expr_if_from;
50 	GnmExprOp from, to;
51 	GnmFunc *fd_if = gnm_func_lookup_or_add_placeholder ("IF");
52 	GnmFunc *fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
53 	GnmFunc *fd_count = info->percentage ?
54 		gnm_func_lookup_or_add_placeholder (info->only_numbers ? "COUNT" : "COUNTA") : NULL;
55 	GnmFunc *fd_isnumber = gnm_func_lookup_or_add_placeholder (info->only_numbers ? "ISNUMBER" : "ISBLANK");
56 	gint to_col = (info->cumulative) ? 0 : 1;
57 
58 	if (info->bin_type & bintype_no_inf_upper) {
59 		from = GNM_EXPR_OP_LT;
60 		to = GNM_EXPR_OP_GTE;
61 	} else {
62 		from = GNM_EXPR_OP_LTE;
63 		to = GNM_EXPR_OP_GT;
64 	}
65 
66 	expr_data = gnm_expr_new_constant (value_dup (val));
67 	if (topinf)
68 		expr_if_to = gnm_expr_new_constant (value_new_int (1));
69 	else
70 		expr_if_to = gnm_expr_new_funcall3
71 			(fd_if,
72 			 gnm_expr_new_binary
73 			 (gnm_expr_copy (expr_data),
74 			  to, make_cellref (- (col-to_col), 0)),
75 			 gnm_expr_new_constant (value_new_int (0)),
76 			 gnm_expr_new_constant (value_new_int (1)));
77 
78 	if (info->cumulative)
79 		expr = expr_if_to;
80 	else {
81 		GnmExpr const *one = gnm_expr_new_constant (value_new_int (1));
82 		if (fromminf)
83 			expr_if_from = one;
84 		else
85 			expr_if_from = gnm_expr_new_funcall3
86 				(fd_if,
87 				 gnm_expr_new_binary
88 				 (gnm_expr_copy (expr_data),
89 				  from, make_cellref (- col, 0)),
90 				 gnm_expr_new_constant (value_new_int (0)),
91 				 one);
92 		expr = gnm_expr_new_binary (expr_if_from,
93 					      GNM_EXPR_OP_MULT,
94 					      expr_if_to);
95 	}
96 
97 	if (info->only_numbers)
98 		expr = gnm_expr_new_binary (expr,
99 					    GNM_EXPR_OP_MULT,
100 					    gnm_expr_new_funcall3
101 					    (fd_if,gnm_expr_new_funcall1
102 					     (fd_isnumber, gnm_expr_copy (expr_data)),
103 					     gnm_expr_new_constant (value_new_int (1)),
104 					     gnm_expr_new_constant (value_new_int (0))));
105 	else
106 		expr = gnm_expr_new_binary (expr,
107 					    GNM_EXPR_OP_MULT,
108 					    gnm_expr_new_funcall3
109 					    (fd_if,gnm_expr_new_funcall1
110 					     (fd_isnumber, gnm_expr_copy (expr_data)),
111 					     gnm_expr_new_constant (value_new_int (0)),
112 					     gnm_expr_new_constant (value_new_int (1))));
113 
114 
115 	expr = gnm_expr_new_funcall1 (fd_sum, expr);
116 
117 	if (info->percentage)
118 		expr = gnm_expr_new_binary (expr,
119 					    GNM_EXPR_OP_DIV,
120 					    gnm_expr_new_funcall1
121 					    (fd_count,
122 					     expr_data));
123 	else
124 		gnm_expr_free (expr_data);
125 
126 	return expr;
127 }
128 
129 static gboolean
analysis_tool_histogram_engine_run(data_analysis_output_t * dao,analysis_tools_data_histogram_t * info)130 analysis_tool_histogram_engine_run (data_analysis_output_t *dao,
131 				    analysis_tools_data_histogram_t *info)
132 {
133 	GnmRange range;
134 	gint i, i_limit, i_start, i_end, col;
135 	GSList *l;
136 	gint to_col = (info->cumulative) ? 0 : 1;
137 
138 	GnmExpr const *expr_bin = NULL;
139 
140 	GnmFunc *fd_small;
141 	GnmFunc *fd_index = NULL;
142 
143 	char const *format;
144 
145 	fd_small = gnm_func_lookup_or_add_placeholder ("SMALL");
146 	gnm_func_inc_usage (fd_small);
147 
148 	if (info->base.labels) {
149 		fd_index = gnm_func_lookup_or_add_placeholder ("INDEX");
150 		gnm_func_inc_usage (fd_index);
151 	}
152 
153 
154 	/* General Info */
155 
156 	dao_set_italic (dao, 0, 0, 0, 0);
157 	dao_set_cell (dao, 0, 0, _("Histogram"));
158 
159 	/* Setting up the bins */
160 
161 	if (info->predetermined) {
162 		range_init_value (&range, info->bin);
163 		i_limit = range_height (&range) * range_width (&range);
164 	} else {
165 		i_limit = info->n;
166 	}
167 
168 	i_end = i_limit;
169 	if (info->bin_type & bintype_p_inf_lower)
170 		i_end++;
171 	if (info->bin_type & bintype_m_inf_lower)
172 		i_end++;
173 	dao_set_format  (dao, to_col, 1, to_col, 1, "\"\";\"\"");
174 	format = (info->bin_type & bintype_no_inf_upper) ?
175 		/* translator note: only translate the */
176 		/* "to below" and "up to" exclusive of */
177 		/* the quotation marks: */
178 		_("\"to below\" * General") : _("\"up to\" * General");
179 	dao_set_format  (dao, to_col, 2, to_col, i_end, format);
180 
181 	if (info->bin_type & bintype_m_inf_lower) {
182 		dao_set_cell_value (dao, to_col, 1, value_new_float (-GNM_MAX));
183 		i_start = 2;
184 	} else
185 		i_start = 1;
186 
187 	if (info->predetermined) {
188 		expr_bin = gnm_expr_new_constant (info->bin);
189 		for (i = 0; i < i_limit; i++)
190 			dao_set_cell_expr (dao, to_col, i_start + i,
191 					   gnm_expr_new_funcall2 (fd_small,
192 								  gnm_expr_copy (expr_bin),
193 								  gnm_expr_new_constant
194 								  (value_new_int (i + 1))));
195 	} else {
196 		GnmValue *val = value_dup (info->base.input->data);
197 		GnmExpr const *expr_min;
198 		GnmExpr const *expr_max;
199 
200 		if (info->base.labels)
201 			switch (info->base.group_by) {
202 			case GROUPED_BY_ROW:
203 				val->v_range.cell.a.col++;
204 				break;
205 			default:
206 				val->v_range.cell.a.row++;
207 				break;
208 			}
209 
210 		if (info->min_given)
211 			dao_set_cell_float (dao, to_col, i_start, info->min);
212 		else {
213 			GnmFunc *fd_min;
214 
215 			fd_min = gnm_func_lookup_or_add_placeholder ("MIN");
216 			gnm_func_inc_usage (fd_min);
217 			dao_set_cell_expr (dao, to_col, i_start,
218 					   gnm_expr_new_funcall1
219 					   (fd_min,
220 					    gnm_expr_new_constant (value_dup (val))));
221 			gnm_func_dec_usage (fd_min);
222 		}
223 
224 		if (info->max_given)
225 			dao_set_cell_float (dao, to_col, i_start + i_limit - 1, info->max);
226 		else {
227 			GnmFunc *fd_max;
228 
229 			fd_max = gnm_func_lookup_or_add_placeholder ("MAX");
230 			gnm_func_inc_usage (fd_max);
231 			dao_set_cell_expr (dao, to_col, i_start + i_limit - 1,
232 					   gnm_expr_new_funcall1
233 					   (fd_max,
234 					    gnm_expr_new_constant (value_dup (val))));
235 			gnm_func_dec_usage (fd_max);
236 		}
237 
238 		value_release (val);
239 
240 		expr_min = dao_get_cellref (dao, to_col, i_start);
241 		expr_max = dao_get_cellref (dao, to_col, i_start + i_limit - 1);
242 
243 		for (i = 1; i < i_limit - 1; i++)
244 			dao_set_cell_expr (dao, to_col, i_start + i,
245 					   gnm_expr_new_binary (gnm_expr_copy (expr_min),
246 								GNM_EXPR_OP_ADD,
247 								gnm_expr_new_binary
248 								(gnm_expr_new_constant (value_new_int (i)),
249 								 GNM_EXPR_OP_MULT,
250 								 gnm_expr_new_binary
251 								 (gnm_expr_new_binary
252 								  (gnm_expr_copy (expr_max),
253 								   GNM_EXPR_OP_SUB,
254 								   gnm_expr_copy (expr_min)),
255 								  GNM_EXPR_OP_DIV,
256 								  gnm_expr_new_constant (value_new_int (info->n - 1))))));
257 
258 		gnm_expr_free (expr_min);
259 		gnm_expr_free (expr_max);
260 	}
261 
262 	if (info->bin_type & bintype_p_inf_lower) {
263 		dao_set_format  (dao, to_col, i_end, to_col, i_end,
264 		/* translator note: only translate the */
265 		/* "to" and "\xe2\x88\x9e" exclusive of */
266 		/* the quotation marks: */
267 				 _("\"to\" * \"\xe2\x88\x9e\""));
268 		dao_set_cell_value (dao, to_col, i_end, value_new_float (GNM_MAX));
269 	}
270 
271 	/* format the lower end of the bins */
272 
273 	if (!info->cumulative) {
274 		GnmExpr const *expr_cr = make_cellref (1,-1);
275 
276 		format = (info->bin_type & bintype_no_inf_upper) ?
277 		/* translator note: only translate the */
278 		/* "from" and "above" exclusive of */
279 		/* the quotation marks: */
280 			_("\"from\" * General") : _("\"above\" * General");
281 		dao_set_format  (dao, 0, 2, 0, i_end, format);
282 		if (info->bin_type & bintype_m_inf_lower)
283 			dao_set_format  (dao, 0, 2, 0, 2,
284 		/* translator note: only translate the */
285 		/* "from" and "\xe2\x88\x92\xe2\x88\x9e" exclusive of */
286 		/* the quotation marks: */
287 					 _("\"from\" * \"\xe2\x88\x92\xe2\x88\x9e\";"
288 					   "\"from\" * \"\xe2\x88\x92\xe2\x88\x9e\""));
289 		for (i = 2; i <= i_end; i++)
290 			dao_set_cell_expr (dao, 0, i, gnm_expr_copy (expr_cr));
291 
292 		gnm_expr_free (expr_cr);
293 	}
294 
295 	/* insert formulas for histogram values */
296 
297 	for (l = info->base.input, col = to_col + 1; l; col++, l = l->next) {
298 		GnmValue *val = l->data;
299 		GnmValue *val_c = NULL;
300 
301 		dao_set_italic (dao, col, 1, col, 1);
302 		if (info->base.labels) {
303 			val_c = value_dup (val);
304 			switch (info->base.group_by) {
305 			case GROUPED_BY_ROW:
306 				val->v_range.cell.a.col++;
307 				break;
308 			default:
309 				val->v_range.cell.a.row++;
310 				break;
311 			}
312 			dao_set_cell_expr (dao, col, 1,
313 					   gnm_expr_new_funcall1 (fd_index,
314 								  gnm_expr_new_constant (val_c)));
315 		} else {
316 			char const *format;
317 
318 			switch (info->base.group_by) {
319 			case GROUPED_BY_ROW:
320 				format = _("Row %d");
321 				break;
322 			case GROUPED_BY_COL:
323 				format = _("Column %d");
324 				break;
325 			default:
326 				format = _("Area %d");
327 				break;
328 			}
329 			dao_set_cell_printf (dao, col, 1, format, col - to_col);
330 		}
331 
332 		if (info->percentage)
333 			dao_set_format (dao, col, 2, col, i_end, "0.0%");
334 
335 		for (i = 2; i <= i_end; i++) {
336 			gboolean fromminf = (i == 2) &&
337 				(info->bin_type & bintype_m_inf_lower);
338 			gboolean topinf = (i == i_end) &&
339 				(info->bin_type & bintype_p_inf_lower);
340 			dao_set_cell_array_expr
341 				(dao, col, i,
342 				 make_hist_expr (info, col, val,
343 						 fromminf, topinf, dao));
344 		}
345 	}
346 
347 
348 	if (expr_bin != NULL)
349 		gnm_expr_free (expr_bin);
350 
351 	gnm_func_dec_usage (fd_small);
352 	if (fd_index != NULL)
353 		gnm_func_dec_usage (fd_index);
354 
355 	/* Create Chart if requested */
356 	if (info->chart != NO_CHART) {
357 		SheetObject *so;
358 		GogGraph     *graph;
359 		GogChart     *chart;
360 		GogPlot	     *plot;
361 		GogSeries    *series;
362 		gint limits_start, limits_end, values_start, values_end;
363 		GOData *limits;
364 		GOData *values;
365 		int ct;
366 
367 		graph = g_object_new (GOG_TYPE_GRAPH, NULL);
368 		chart = GOG_CHART (gog_object_add_by_name (
369 						   GOG_OBJECT (graph), "Chart", NULL));
370 
371 		if (info->chart == HISTOGRAM_CHART) {
372 			plot = gog_plot_new_by_name ("GogHistogramPlot");
373 			limits_start =  i_start;
374 			limits_end =  i_start + i_limit - 1;
375 			values_start = i_start + 1;
376 			values_end = i_start + i_limit - 1;
377 		} else {
378 			plot = gog_plot_new_by_name ("GogBarColPlot");
379 			limits_start =  2;
380 			limits_end =  i_end;
381 			values_start = 2;
382 			values_end = i_end;
383 			if (info->chart == BAR_CHART)
384 				go_object_toggle (plot, "horizontal");
385 		}
386 
387 		gog_object_add_by_name (GOG_OBJECT (chart),
388 					"Plot", GOG_OBJECT (plot));
389 
390 		limits = dao_go_data_vector (dao, to_col, limits_start,
391 					     to_col, limits_end);
392 
393 		for (ct = 1; ct < (col - to_col); ct ++) {
394 			g_object_ref (limits);
395 			values = dao_go_data_vector (dao, to_col + ct, values_start,
396 						     to_col + ct, values_end);
397 
398 			series = gog_plot_new_series (plot);
399 			gog_series_set_dim (series, 0, limits, NULL);
400 			gog_series_set_dim (series, 1, values, NULL);
401 		}
402 		g_object_unref (limits);
403 
404 		if (info->chart == HISTOGRAM_CHART) {
405 			GogObject *axis;
406 			GogObject *label;
407 			GnmExprTop const *label_string;
408 			GOData *data;
409 		        axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "X-Axis");
410 			go_object_set_property (G_OBJECT (axis), "assigned-format-string-XL",
411 						"X-Axis Format", "0.0EE0",
412 						NULL, NULL);
413 			axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "Y-Axis");
414 			label_string = gnm_expr_top_new_constant (value_new_string (_("Frequency Density")));
415 			data = gnm_go_data_scalar_new_expr (dao->sheet, label_string);
416 			label = gog_object_add_by_name (axis, "Label", NULL);
417 			gog_dataset_set_dim (GOG_DATASET (label), 0, data, NULL);
418 		} else if (info->chart == COLUMN_CHART) {
419 			GogObject *axis;
420 			GogObject *label;
421 			GnmExprTop const *label_string;
422 			GOData *data;
423 		        axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "X-Axis");
424 			go_object_set_property (G_OBJECT (axis), "assigned-format-string-XL",
425 						"X-Axis Format", "0.0EE0",
426 						NULL, NULL);
427 			axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "Y-Axis");
428 			label_string = gnm_expr_top_new_constant (value_new_string (_("Frequency")));
429 			data = gnm_go_data_scalar_new_expr (dao->sheet, label_string);
430 			label = gog_object_add_by_name (axis, "Label", NULL);
431 			gog_dataset_set_dim (GOG_DATASET (label), 0, data, NULL);
432 		} else if (info->chart == BAR_CHART) {
433 			GogObject *axis;
434 			GogObject *label;
435 			GnmExprTop const *label_string;
436 			GOData *data;
437 		        axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "Y-Axis");
438 			go_object_set_property (G_OBJECT (axis), "assigned-format-string-XL",
439 						"X-Axis Format", "0.0EE0",
440 						NULL, NULL);
441 			axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "X-Axis");
442 			label_string = gnm_expr_top_new_constant (value_new_string (_("Frequency")));
443 			data = gnm_go_data_scalar_new_expr (dao->sheet, label_string);
444 			label = gog_object_add_by_name (axis, "Label", NULL);
445 			gog_dataset_set_dim (GOG_DATASET (label), 0, data, NULL);
446 		}
447 
448 		so = sheet_object_graph_new (graph);
449 		g_object_unref (graph);
450 
451 		dao_set_sheet_object (dao, 0, 1, so);
452 	}
453 
454 	dao_redraw_respan (dao);
455 
456 	return FALSE;
457 }
458 
459 
460 static gint
calc_length(GnmValue * bin)461 calc_length (GnmValue   *bin)
462 {
463 	g_return_val_if_fail (bin != NULL, 0);
464 	g_return_val_if_fail (VALUE_IS_CELLRANGE (bin), 0);
465 
466 	return ((bin->v_range.cell.b.col - bin->v_range.cell.a.col + 1) *
467 		(bin->v_range.cell.b.row - bin->v_range.cell.a.row + 1));
468 }
469 
470 gboolean
analysis_tool_histogram_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)471 analysis_tool_histogram_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
472 			      analysis_tool_engine_t selector, gpointer result)
473 {
474 	analysis_tools_data_histogram_t *info = specs;
475 
476 	switch (selector) {
477 	case TOOL_ENGINE_UPDATE_DESCRIPTOR:
478 		return (dao_command_descriptor (dao, _("Histogram (%s)"), result)
479 			== NULL);
480 	case TOOL_ENGINE_UPDATE_DAO:
481 	{
482 		int i, j;
483 
484 		prepare_input_range (&info->base.input, info->base.group_by);
485 
486 		i = 1 + ((info->predetermined) ? calc_length (info->bin) : info->n);
487 		if (info->bin_type & bintype_p_inf_lower)
488 			i++;
489 		if (info->bin_type & bintype_m_inf_lower)
490 			i++;
491 
492 		j = g_slist_length (info->base.input) + ((info->cumulative) ? 1 : 2);
493 
494 		dao_adjust (dao, j, i);
495 
496 		return FALSE;
497 	}
498 	case TOOL_ENGINE_CLEAN_UP:
499 		return analysis_tool_generic_clean (specs);
500 	case TOOL_ENGINE_LAST_VALIDITY_CHECK:
501 		return FALSE;
502 	case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
503 		dao_prepare_output (NULL, dao, _("Histogram"));
504 		return FALSE;
505 	case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
506 		return dao_format_output (dao, _("Histogram"));
507 	case TOOL_ENGINE_PERFORM_CALC:
508 	default:
509 		return analysis_tool_histogram_engine_run (dao, specs);
510 	}
511 	return TRUE;
512 }
513 
514 
515 
516 
517