1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2015 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 #include "output/charts/barchart.h"
20 #include "output/charts/piechart.h"
21
22 #include <stdlib.h>
23
24 #include "libpspp/cast.h"
25 #include "libpspp/str.h"
26 #include "libpspp/array.h"
27 #include "output/chart-item-provider.h"
28
29 #include "gl/xalloc.h"
30 #include "data/variable.h"
31 #include "data/settings.h"
32 #include "language/stats/freq.h"
33
34
35 static int
compare_category_3way(const void * a_,const void * b_,const void * bc_)36 compare_category_3way (const void *a_, const void *b_, const void *bc_)
37 {
38 const struct category *const*a = a_;
39 const struct category *const*b = b_;
40 const struct barchart *bc = bc_;
41
42 return value_compare_3way (&(*a)->val, &(*b)->val, var_get_width (bc->var[1]));
43 }
44
45
46 static int
compare_category_by_index_3way(const void * a_,const void * b_,const void * unused UNUSED)47 compare_category_by_index_3way (const void *a_, const void *b_,
48 const void *unused UNUSED)
49 {
50 const struct category *const*a = a_;
51 const struct category *const*b = b_;
52
53 if ( (*a)->idx < (*b)->idx)
54 return -1;
55
56 return ((*a)->idx > (*b)->idx);
57 }
58
59 static unsigned int
hash_freq_2level_ptr(const void * a_,const void * bc_)60 hash_freq_2level_ptr (const void *a_, const void *bc_)
61 {
62 const struct freq *const *ap = a_;
63 const struct barchart *bc = bc_;
64
65 size_t hash = value_hash (&(*ap)->values[0], bc->widths[0], 0);
66
67 if (bc->n_vars > 1)
68 hash = value_hash (&(*ap)->values[1], bc->widths[1], hash);
69
70 return hash;
71 }
72
73
74 static int
compare_freq_2level_ptr_3way(const void * a_,const void * b_,const void * bc_)75 compare_freq_2level_ptr_3way (const void *a_, const void *b_, const void *bc_)
76 {
77 const struct freq *const *ap = a_;
78 const struct freq *const *bp = b_;
79 const struct barchart *bc = bc_;
80
81 const int level0 = value_compare_3way (&(*ap)->values[0], &(*bp)->values[0], bc->widths[0]);
82
83 if (level0 == 0 && bc->n_vars > 1)
84 return value_compare_3way (&(*ap)->values[1], &(*bp)->values[1], bc->widths[1]);
85
86 return level0;
87 }
88
89 /* Print out a textual representation of a barchart.
90 This is intended only for testing, and not as a means
91 of visualising the data.
92 */
93 static void
barchart_dump(const struct barchart * bc,FILE * fp)94 barchart_dump (const struct barchart *bc, FILE *fp)
95 {
96 fprintf (fp, "Graphic: Barchart\n");
97 fprintf (fp, "Percentage: %d\n", bc->percent);
98 fprintf (fp, "Total Categories: %d\n", bc->n_nzcats);
99 fprintf (fp, "Primary Categories: %d\n", bc->n_pcats);
100 fprintf (fp, "Largest Category: %g\n", bc->largest);
101 fprintf (fp, "Total Count: %g\n", bc->total_count);
102
103 fprintf (fp, "Y Label: \"%s\"\n", bc->ylabel);
104
105 fprintf (fp, "Categorical Variables:\n");
106 for (int i = 0; i < bc->n_vars; ++i)
107 {
108 fprintf (fp, " Var: \"%s\"\n", var_get_name (bc->var[i]));
109 }
110
111 fprintf (fp, "Categories:\n");
112 struct category *cat;
113 struct category **cats = XCALLOC (hmap_count (&bc->primaries), struct category *);
114 int i = 0;
115 HMAP_FOR_EACH (cat, struct category, node, &bc->primaries)
116 {
117 cats[i++] = cat;
118 }
119 /* HMAP_FOR_EACH is not guaranteed to iterate in any particular order. So
120 we must sort here before we output the results. */
121 sort (cats, i, sizeof (struct category *), compare_category_by_index_3way, bc);
122 for (i = 0; i < hmap_count (&bc->primaries); ++i)
123 {
124 const struct category *c = cats[i];
125 fprintf (fp, " %d \"%s\"\n", c->idx, ds_cstr (&c->label));
126 }
127 free (cats);
128
129 if (bc->ss)
130 {
131 fprintf (fp, "Sub-categories:\n");
132 for (int i = 0; i < bc->n_nzcats / bc->n_pcats; ++i)
133 {
134 const struct category *cat = bc->ss[i];
135 fprintf (fp, " %d \"%s\"\n", cat->idx, ds_cstr(&cat->label));
136 }
137 }
138
139 fprintf (fp, "All Categories:\n");
140 for (int i = 0; i < bc->n_nzcats; ++i)
141 {
142 const struct freq *frq = bc->cats[i];
143 fprintf (fp, "Count: %g; ", frq->count);
144
145 struct string s = DS_EMPTY_INITIALIZER;
146 var_append_value_name (bc->var[0], &frq->values[0], &s);
147
148 fprintf (fp, "Cat: \"%s\"", ds_cstr (&s));
149 ds_clear (&s);
150
151 if (bc->ss)
152 {
153 var_append_value_name (bc->var[1], &frq->values[1], &s);
154 fprintf (fp, ", \"%s\"", ds_cstr (&s));
155 }
156 ds_destroy (&s);
157 fputc ('\n', fp);
158 }
159
160 fputc ('\n', fp);
161 }
162
163
164 /* Creates and returns a chart that will render a barchart with
165 the given TITLE and the N_CATS described in CATS.
166
167 VAR is an array containing the categorical variables, and N_VAR
168 the number of them. N_VAR must be exactly 1 or 2.
169
170 CATS are the counts of the values of those variables. N_CATS is the
171 number of distinct values.
172 */
173 struct chart_item *
barchart_create(const struct variable ** var,int n_vars,const char * ylabel,bool percent,struct freq * const * cats,int n_cats)174 barchart_create (const struct variable **var, int n_vars,
175 const char *ylabel, bool percent,
176 struct freq *const *cats, int n_cats)
177 {
178 struct barchart *bar;
179 int i;
180
181 const int pidx = 0;
182 const int sidx = 1;
183
184
185 int width = var_get_width (var[pidx]);
186
187 assert (n_vars >= 1 && n_vars <= 2);
188
189 bar = xzalloc (sizeof *bar);
190 bar->percent = percent;
191 bar->var = var;
192 bar->n_vars = n_vars;
193 bar->n_nzcats = n_cats;
194 chart_item_init (&bar->chart_item, &barchart_class, var_to_string (var[pidx]));
195
196 bar->largest = -1;
197 bar->ylabel = strdup (ylabel);
198
199 {
200 int idx = 0;
201 hmap_init (&bar->primaries);
202
203 /*
204 Iterate the categories and create a hash table of the primary categories.
205 We need to do this to find out how many there are and to cache the labels.
206 */
207 for (i = 0; i < n_cats; i++)
208 {
209 const struct freq *src = cats[i];
210 size_t hash = value_hash (&src->values[pidx], width, 0);
211
212 struct category *foo;
213 int flag = 0;
214 HMAP_FOR_EACH_WITH_HASH (foo, struct category, node, hash, &bar->primaries)
215 {
216 if (value_equal (&foo->val, &src->values[pidx], width))
217 {
218 flag = 1;
219 break;
220 }
221 }
222
223 if (!flag)
224 {
225 struct category *s = xzalloc (sizeof *s);
226 s->idx = idx++;
227 s->width = var_get_width (var[pidx]);
228 value_init (&s->val, s->width);
229 value_copy (&s->val, &src->values[pidx], s->width);
230 ds_init_empty (&s->label);
231 var_append_value_name (var[pidx], &s->val, &s->label);
232
233 hmap_insert (&bar->primaries, &s->node, hash);
234 }
235 }
236
237 bar->n_pcats = hmap_count (&bar->primaries);
238 }
239
240 if (n_vars > 1)
241 {
242 hmap_init (&bar->secondaries);
243 int idx = 0;
244 /* Iterate the categories, and create a hash table of secondary categories */
245 for (i = 0; i < n_cats; i++)
246 {
247 struct freq *src = cats[i];
248
249 struct category *foo;
250 int flag = 0;
251 size_t hash = value_hash (&src->values[sidx], var_get_width (var[sidx]), 0);
252 HMAP_FOR_EACH_WITH_HASH (foo, struct category, node, hash, &bar->secondaries)
253 {
254 if (value_equal (&foo->val, &src->values[sidx], var_get_width (var[sidx])))
255 {
256 flag = 1;
257 break;
258 }
259 }
260
261 if (!flag)
262 {
263 struct category *s = xzalloc (sizeof *s);
264 s->idx = idx++;
265 s->width = var_get_width (var[sidx]);
266 value_init (&s->val, s->width);
267 value_copy (&s->val, &src->values[sidx], var_get_width (var[sidx]));
268 ds_init_empty (&s->label);
269 var_append_value_name (var[sidx], &s->val, &s->label);
270
271 hmap_insert (&bar->secondaries, &s->node, hash);
272 bar->ss = xrealloc (bar->ss, idx * sizeof *bar->ss);
273 bar->ss[idx - 1] = s;
274 }
275 }
276
277 int n_category = hmap_count (&bar->secondaries);
278
279 sort (bar->ss, n_category, sizeof *bar->ss,
280 compare_category_3way, bar);
281 }
282
283
284 /* Deep copy. Not necessary for cmd line, but essential for the GUI,
285 since an expose callback will access these structs which may not
286 exist.
287 */
288 bar->cats = xcalloc (n_cats, sizeof *bar->cats);
289
290 bar->widths[0] = var_get_width (bar->var[0]);
291 if (n_vars > 1)
292 bar->widths[1] = var_get_width (bar->var[1]);
293
294 {
295 struct hmap level2table;
296 hmap_init (&level2table);
297 int x = 0;
298
299 for (i = 0; i < n_cats; i++)
300 {
301 struct freq *c = cats[i];
302
303 struct freq *foo;
304 bool flag = false;
305 size_t hash = hash_freq_2level_ptr (&c, bar);
306 HMAP_FOR_EACH_WITH_HASH (foo, struct freq, node, hash, &level2table)
307 {
308 if (0 == compare_freq_2level_ptr_3way (&foo, &c, bar))
309 {
310 foo->count += c->count;
311 bar->total_count += c->count;
312
313 if (foo->count > bar->largest)
314 bar->largest = foo->count;
315
316 flag = true;
317 break;
318 }
319 }
320
321 if (!flag)
322 {
323 struct freq *aggregated_freq = freq_clone (c, n_vars, bar->widths);
324 hmap_insert (&level2table, &aggregated_freq->node, hash);
325
326 if (c->count > bar->largest)
327 bar->largest = aggregated_freq->count;
328
329 bar->total_count += c->count;
330 bar->cats[x++] = aggregated_freq;
331 }
332 }
333
334 bar->n_nzcats = hmap_count (&level2table);
335 hmap_destroy (&level2table);
336 }
337
338 sort (bar->cats, bar->n_nzcats, sizeof *bar->cats,
339 compare_freq_2level_ptr_3way, bar);
340
341 if (settings_get_testing_mode ())
342 barchart_dump (bar, stdout);
343
344 return &bar->chart_item;
345 }
346
347 static void
destroy_cat_map(struct hmap * m)348 destroy_cat_map (struct hmap *m)
349 {
350 struct category *foo = NULL;
351 struct category *next = NULL;
352 HMAP_FOR_EACH_SAFE (foo, next, struct category, node, m)
353 {
354 value_destroy (&foo->val, foo->width);
355
356 ds_destroy (&foo->label);
357 free (foo);
358 }
359
360 hmap_destroy (m);
361 }
362
363 static void
barchart_destroy(struct chart_item * chart_item)364 barchart_destroy (struct chart_item *chart_item)
365 {
366 struct barchart *bar = to_barchart (chart_item);
367
368 int i;
369
370 destroy_cat_map (&bar->primaries);
371 if (bar->ss)
372 {
373 destroy_cat_map (&bar->secondaries);
374 }
375
376 for (i = 0; i < bar->n_nzcats; i++)
377 {
378 freq_destroy (bar->cats[i], bar->n_vars, bar->widths);
379 }
380
381 free (bar->cats);
382 free (bar->ylabel);
383 free (bar->ss);
384 free (bar);
385 }
386
387 const struct chart_item_class barchart_class =
388 {
389 barchart_destroy
390 };
391