1 /* vm: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3 * analysis-histogram.c:
4 *
5 * This is a complete reimplementation of the histogram tool in 2008
6 *
7 * Author:
8 * Andreas J. Guelzow <aguelzow@pyrshep.ca>
9 *
10 * (C) Copyright 2008 by Andreas J. Guelzow <aguelzow@pyrshep.ca>
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, see <https://www.gnu.org/licenses/>.
25 */
26
27 #include <gnumeric-config.h>
28 #include <glib/gi18n-lib.h>
29 #include <gnumeric.h>
30 #include <tools/analysis-histogram.h>
31 #include <tools/analysis-tools.h>
32 #include <value.h>
33 #include <ranges.h>
34 #include <expr.h>
35 #include <func.h>
36 #include <numbers.h>
37 #include <sheet-object-graph.h>
38 #include <goffice/goffice.h>
39 #include <sheet.h>
40
41 static GnmExpr const *
make_hist_expr(analysis_tools_data_histogram_t * info,int col,GnmValue * val,gboolean fromminf,gboolean topinf,data_analysis_output_t * dao)42 make_hist_expr (analysis_tools_data_histogram_t *info,
43 int col, GnmValue *val,
44 gboolean fromminf, gboolean topinf,
45 data_analysis_output_t *dao)
46 {
47 GnmExpr const *expr;
48 GnmExpr const *expr_data;
49 GnmExpr const *expr_if_to, *expr_if_from;
50 GnmExprOp from, to;
51 GnmFunc *fd_if = gnm_func_lookup_or_add_placeholder ("IF");
52 GnmFunc *fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
53 GnmFunc *fd_count = info->percentage ?
54 gnm_func_lookup_or_add_placeholder (info->only_numbers ? "COUNT" : "COUNTA") : NULL;
55 GnmFunc *fd_isnumber = gnm_func_lookup_or_add_placeholder (info->only_numbers ? "ISNUMBER" : "ISBLANK");
56 gint to_col = (info->cumulative) ? 0 : 1;
57
58 if (info->bin_type & bintype_no_inf_upper) {
59 from = GNM_EXPR_OP_LT;
60 to = GNM_EXPR_OP_GTE;
61 } else {
62 from = GNM_EXPR_OP_LTE;
63 to = GNM_EXPR_OP_GT;
64 }
65
66 expr_data = gnm_expr_new_constant (value_dup (val));
67 if (topinf)
68 expr_if_to = gnm_expr_new_constant (value_new_int (1));
69 else
70 expr_if_to = gnm_expr_new_funcall3
71 (fd_if,
72 gnm_expr_new_binary
73 (gnm_expr_copy (expr_data),
74 to, make_cellref (- (col-to_col), 0)),
75 gnm_expr_new_constant (value_new_int (0)),
76 gnm_expr_new_constant (value_new_int (1)));
77
78 if (info->cumulative)
79 expr = expr_if_to;
80 else {
81 GnmExpr const *one = gnm_expr_new_constant (value_new_int (1));
82 if (fromminf)
83 expr_if_from = one;
84 else
85 expr_if_from = gnm_expr_new_funcall3
86 (fd_if,
87 gnm_expr_new_binary
88 (gnm_expr_copy (expr_data),
89 from, make_cellref (- col, 0)),
90 gnm_expr_new_constant (value_new_int (0)),
91 one);
92 expr = gnm_expr_new_binary (expr_if_from,
93 GNM_EXPR_OP_MULT,
94 expr_if_to);
95 }
96
97 if (info->only_numbers)
98 expr = gnm_expr_new_binary (expr,
99 GNM_EXPR_OP_MULT,
100 gnm_expr_new_funcall3
101 (fd_if,gnm_expr_new_funcall1
102 (fd_isnumber, gnm_expr_copy (expr_data)),
103 gnm_expr_new_constant (value_new_int (1)),
104 gnm_expr_new_constant (value_new_int (0))));
105 else
106 expr = gnm_expr_new_binary (expr,
107 GNM_EXPR_OP_MULT,
108 gnm_expr_new_funcall3
109 (fd_if,gnm_expr_new_funcall1
110 (fd_isnumber, gnm_expr_copy (expr_data)),
111 gnm_expr_new_constant (value_new_int (0)),
112 gnm_expr_new_constant (value_new_int (1))));
113
114
115 expr = gnm_expr_new_funcall1 (fd_sum, expr);
116
117 if (info->percentage)
118 expr = gnm_expr_new_binary (expr,
119 GNM_EXPR_OP_DIV,
120 gnm_expr_new_funcall1
121 (fd_count,
122 expr_data));
123 else
124 gnm_expr_free (expr_data);
125
126 return expr;
127 }
128
129 static gboolean
analysis_tool_histogram_engine_run(data_analysis_output_t * dao,analysis_tools_data_histogram_t * info)130 analysis_tool_histogram_engine_run (data_analysis_output_t *dao,
131 analysis_tools_data_histogram_t *info)
132 {
133 GnmRange range;
134 gint i, i_limit, i_start, i_end, col;
135 GSList *l;
136 gint to_col = (info->cumulative) ? 0 : 1;
137
138 GnmExpr const *expr_bin = NULL;
139
140 GnmFunc *fd_small;
141 GnmFunc *fd_index = NULL;
142
143 char const *format;
144
145 fd_small = gnm_func_lookup_or_add_placeholder ("SMALL");
146 gnm_func_inc_usage (fd_small);
147
148 if (info->base.labels) {
149 fd_index = gnm_func_lookup_or_add_placeholder ("INDEX");
150 gnm_func_inc_usage (fd_index);
151 }
152
153
154 /* General Info */
155
156 dao_set_italic (dao, 0, 0, 0, 0);
157 dao_set_cell (dao, 0, 0, _("Histogram"));
158
159 /* Setting up the bins */
160
161 if (info->predetermined) {
162 range_init_value (&range, info->bin);
163 i_limit = range_height (&range) * range_width (&range);
164 } else {
165 i_limit = info->n;
166 }
167
168 i_end = i_limit;
169 if (info->bin_type & bintype_p_inf_lower)
170 i_end++;
171 if (info->bin_type & bintype_m_inf_lower)
172 i_end++;
173 dao_set_format (dao, to_col, 1, to_col, 1, "\"\";\"\"");
174 format = (info->bin_type & bintype_no_inf_upper) ?
175 /* translator note: only translate the */
176 /* "to below" and "up to" exclusive of */
177 /* the quotation marks: */
178 _("\"to below\" * General") : _("\"up to\" * General");
179 dao_set_format (dao, to_col, 2, to_col, i_end, format);
180
181 if (info->bin_type & bintype_m_inf_lower) {
182 dao_set_cell_value (dao, to_col, 1, value_new_float (-GNM_MAX));
183 i_start = 2;
184 } else
185 i_start = 1;
186
187 if (info->predetermined) {
188 expr_bin = gnm_expr_new_constant (info->bin);
189 for (i = 0; i < i_limit; i++)
190 dao_set_cell_expr (dao, to_col, i_start + i,
191 gnm_expr_new_funcall2 (fd_small,
192 gnm_expr_copy (expr_bin),
193 gnm_expr_new_constant
194 (value_new_int (i + 1))));
195 } else {
196 GnmValue *val = value_dup (info->base.input->data);
197 GnmExpr const *expr_min;
198 GnmExpr const *expr_max;
199
200 if (info->base.labels)
201 switch (info->base.group_by) {
202 case GROUPED_BY_ROW:
203 val->v_range.cell.a.col++;
204 break;
205 default:
206 val->v_range.cell.a.row++;
207 break;
208 }
209
210 if (info->min_given)
211 dao_set_cell_float (dao, to_col, i_start, info->min);
212 else {
213 GnmFunc *fd_min;
214
215 fd_min = gnm_func_lookup_or_add_placeholder ("MIN");
216 gnm_func_inc_usage (fd_min);
217 dao_set_cell_expr (dao, to_col, i_start,
218 gnm_expr_new_funcall1
219 (fd_min,
220 gnm_expr_new_constant (value_dup (val))));
221 gnm_func_dec_usage (fd_min);
222 }
223
224 if (info->max_given)
225 dao_set_cell_float (dao, to_col, i_start + i_limit - 1, info->max);
226 else {
227 GnmFunc *fd_max;
228
229 fd_max = gnm_func_lookup_or_add_placeholder ("MAX");
230 gnm_func_inc_usage (fd_max);
231 dao_set_cell_expr (dao, to_col, i_start + i_limit - 1,
232 gnm_expr_new_funcall1
233 (fd_max,
234 gnm_expr_new_constant (value_dup (val))));
235 gnm_func_dec_usage (fd_max);
236 }
237
238 value_release (val);
239
240 expr_min = dao_get_cellref (dao, to_col, i_start);
241 expr_max = dao_get_cellref (dao, to_col, i_start + i_limit - 1);
242
243 for (i = 1; i < i_limit - 1; i++)
244 dao_set_cell_expr (dao, to_col, i_start + i,
245 gnm_expr_new_binary (gnm_expr_copy (expr_min),
246 GNM_EXPR_OP_ADD,
247 gnm_expr_new_binary
248 (gnm_expr_new_constant (value_new_int (i)),
249 GNM_EXPR_OP_MULT,
250 gnm_expr_new_binary
251 (gnm_expr_new_binary
252 (gnm_expr_copy (expr_max),
253 GNM_EXPR_OP_SUB,
254 gnm_expr_copy (expr_min)),
255 GNM_EXPR_OP_DIV,
256 gnm_expr_new_constant (value_new_int (info->n - 1))))));
257
258 gnm_expr_free (expr_min);
259 gnm_expr_free (expr_max);
260 }
261
262 if (info->bin_type & bintype_p_inf_lower) {
263 dao_set_format (dao, to_col, i_end, to_col, i_end,
264 /* translator note: only translate the */
265 /* "to" and "\xe2\x88\x9e" exclusive of */
266 /* the quotation marks: */
267 _("\"to\" * \"\xe2\x88\x9e\""));
268 dao_set_cell_value (dao, to_col, i_end, value_new_float (GNM_MAX));
269 }
270
271 /* format the lower end of the bins */
272
273 if (!info->cumulative) {
274 GnmExpr const *expr_cr = make_cellref (1,-1);
275
276 format = (info->bin_type & bintype_no_inf_upper) ?
277 /* translator note: only translate the */
278 /* "from" and "above" exclusive of */
279 /* the quotation marks: */
280 _("\"from\" * General") : _("\"above\" * General");
281 dao_set_format (dao, 0, 2, 0, i_end, format);
282 if (info->bin_type & bintype_m_inf_lower)
283 dao_set_format (dao, 0, 2, 0, 2,
284 /* translator note: only translate the */
285 /* "from" and "\xe2\x88\x92\xe2\x88\x9e" exclusive of */
286 /* the quotation marks: */
287 _("\"from\" * \"\xe2\x88\x92\xe2\x88\x9e\";"
288 "\"from\" * \"\xe2\x88\x92\xe2\x88\x9e\""));
289 for (i = 2; i <= i_end; i++)
290 dao_set_cell_expr (dao, 0, i, gnm_expr_copy (expr_cr));
291
292 gnm_expr_free (expr_cr);
293 }
294
295 /* insert formulas for histogram values */
296
297 for (l = info->base.input, col = to_col + 1; l; col++, l = l->next) {
298 GnmValue *val = l->data;
299 GnmValue *val_c = NULL;
300
301 dao_set_italic (dao, col, 1, col, 1);
302 if (info->base.labels) {
303 val_c = value_dup (val);
304 switch (info->base.group_by) {
305 case GROUPED_BY_ROW:
306 val->v_range.cell.a.col++;
307 break;
308 default:
309 val->v_range.cell.a.row++;
310 break;
311 }
312 dao_set_cell_expr (dao, col, 1,
313 gnm_expr_new_funcall1 (fd_index,
314 gnm_expr_new_constant (val_c)));
315 } else {
316 char const *format;
317
318 switch (info->base.group_by) {
319 case GROUPED_BY_ROW:
320 format = _("Row %d");
321 break;
322 case GROUPED_BY_COL:
323 format = _("Column %d");
324 break;
325 default:
326 format = _("Area %d");
327 break;
328 }
329 dao_set_cell_printf (dao, col, 1, format, col - to_col);
330 }
331
332 if (info->percentage)
333 dao_set_format (dao, col, 2, col, i_end, "0.0%");
334
335 for (i = 2; i <= i_end; i++) {
336 gboolean fromminf = (i == 2) &&
337 (info->bin_type & bintype_m_inf_lower);
338 gboolean topinf = (i == i_end) &&
339 (info->bin_type & bintype_p_inf_lower);
340 dao_set_cell_array_expr
341 (dao, col, i,
342 make_hist_expr (info, col, val,
343 fromminf, topinf, dao));
344 }
345 }
346
347
348 if (expr_bin != NULL)
349 gnm_expr_free (expr_bin);
350
351 gnm_func_dec_usage (fd_small);
352 if (fd_index != NULL)
353 gnm_func_dec_usage (fd_index);
354
355 /* Create Chart if requested */
356 if (info->chart != NO_CHART) {
357 SheetObject *so;
358 GogGraph *graph;
359 GogChart *chart;
360 GogPlot *plot;
361 GogSeries *series;
362 gint limits_start, limits_end, values_start, values_end;
363 GOData *limits;
364 GOData *values;
365 int ct;
366
367 graph = g_object_new (GOG_TYPE_GRAPH, NULL);
368 chart = GOG_CHART (gog_object_add_by_name (
369 GOG_OBJECT (graph), "Chart", NULL));
370
371 if (info->chart == HISTOGRAM_CHART) {
372 plot = gog_plot_new_by_name ("GogHistogramPlot");
373 limits_start = i_start;
374 limits_end = i_start + i_limit - 1;
375 values_start = i_start + 1;
376 values_end = i_start + i_limit - 1;
377 } else {
378 plot = gog_plot_new_by_name ("GogBarColPlot");
379 limits_start = 2;
380 limits_end = i_end;
381 values_start = 2;
382 values_end = i_end;
383 if (info->chart == BAR_CHART)
384 go_object_toggle (plot, "horizontal");
385 }
386
387 gog_object_add_by_name (GOG_OBJECT (chart),
388 "Plot", GOG_OBJECT (plot));
389
390 limits = dao_go_data_vector (dao, to_col, limits_start,
391 to_col, limits_end);
392
393 for (ct = 1; ct < (col - to_col); ct ++) {
394 g_object_ref (limits);
395 values = dao_go_data_vector (dao, to_col + ct, values_start,
396 to_col + ct, values_end);
397
398 series = gog_plot_new_series (plot);
399 gog_series_set_dim (series, 0, limits, NULL);
400 gog_series_set_dim (series, 1, values, NULL);
401 }
402 g_object_unref (limits);
403
404 if (info->chart == HISTOGRAM_CHART) {
405 GogObject *axis;
406 GogObject *label;
407 GnmExprTop const *label_string;
408 GOData *data;
409 axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "X-Axis");
410 go_object_set_property (G_OBJECT (axis), "assigned-format-string-XL",
411 "X-Axis Format", "0.0EE0",
412 NULL, NULL);
413 axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "Y-Axis");
414 label_string = gnm_expr_top_new_constant (value_new_string (_("Frequency Density")));
415 data = gnm_go_data_scalar_new_expr (dao->sheet, label_string);
416 label = gog_object_add_by_name (axis, "Label", NULL);
417 gog_dataset_set_dim (GOG_DATASET (label), 0, data, NULL);
418 } else if (info->chart == COLUMN_CHART) {
419 GogObject *axis;
420 GogObject *label;
421 GnmExprTop const *label_string;
422 GOData *data;
423 axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "X-Axis");
424 go_object_set_property (G_OBJECT (axis), "assigned-format-string-XL",
425 "X-Axis Format", "0.0EE0",
426 NULL, NULL);
427 axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "Y-Axis");
428 label_string = gnm_expr_top_new_constant (value_new_string (_("Frequency")));
429 data = gnm_go_data_scalar_new_expr (dao->sheet, label_string);
430 label = gog_object_add_by_name (axis, "Label", NULL);
431 gog_dataset_set_dim (GOG_DATASET (label), 0, data, NULL);
432 } else if (info->chart == BAR_CHART) {
433 GogObject *axis;
434 GogObject *label;
435 GnmExprTop const *label_string;
436 GOData *data;
437 axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "Y-Axis");
438 go_object_set_property (G_OBJECT (axis), "assigned-format-string-XL",
439 "X-Axis Format", "0.0EE0",
440 NULL, NULL);
441 axis = gog_object_get_child_by_name (GOG_OBJECT (chart), "X-Axis");
442 label_string = gnm_expr_top_new_constant (value_new_string (_("Frequency")));
443 data = gnm_go_data_scalar_new_expr (dao->sheet, label_string);
444 label = gog_object_add_by_name (axis, "Label", NULL);
445 gog_dataset_set_dim (GOG_DATASET (label), 0, data, NULL);
446 }
447
448 so = sheet_object_graph_new (graph);
449 g_object_unref (graph);
450
451 dao_set_sheet_object (dao, 0, 1, so);
452 }
453
454 dao_redraw_respan (dao);
455
456 return FALSE;
457 }
458
459
460 static gint
calc_length(GnmValue * bin)461 calc_length (GnmValue *bin)
462 {
463 g_return_val_if_fail (bin != NULL, 0);
464 g_return_val_if_fail (VALUE_IS_CELLRANGE (bin), 0);
465
466 return ((bin->v_range.cell.b.col - bin->v_range.cell.a.col + 1) *
467 (bin->v_range.cell.b.row - bin->v_range.cell.a.row + 1));
468 }
469
470 gboolean
analysis_tool_histogram_engine(G_GNUC_UNUSED GOCmdContext * gcc,data_analysis_output_t * dao,gpointer specs,analysis_tool_engine_t selector,gpointer result)471 analysis_tool_histogram_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
472 analysis_tool_engine_t selector, gpointer result)
473 {
474 analysis_tools_data_histogram_t *info = specs;
475
476 switch (selector) {
477 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
478 return (dao_command_descriptor (dao, _("Histogram (%s)"), result)
479 == NULL);
480 case TOOL_ENGINE_UPDATE_DAO:
481 {
482 int i, j;
483
484 prepare_input_range (&info->base.input, info->base.group_by);
485
486 i = 1 + ((info->predetermined) ? calc_length (info->bin) : info->n);
487 if (info->bin_type & bintype_p_inf_lower)
488 i++;
489 if (info->bin_type & bintype_m_inf_lower)
490 i++;
491
492 j = g_slist_length (info->base.input) + ((info->cumulative) ? 1 : 2);
493
494 dao_adjust (dao, j, i);
495
496 return FALSE;
497 }
498 case TOOL_ENGINE_CLEAN_UP:
499 return analysis_tool_generic_clean (specs);
500 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
501 return FALSE;
502 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
503 dao_prepare_output (NULL, dao, _("Histogram"));
504 return FALSE;
505 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
506 return dao_format_output (dao, _("Histogram"));
507 case TOOL_ENGINE_PERFORM_CALC:
508 default:
509 return analysis_tool_histogram_engine_run (dao, specs);
510 }
511 return TRUE;
512 }
513
514
515
516
517