xref: /qemu/util/qdist.c (revision bf8d4924)
1 /*
2  * qdist.c - QEMU helpers for handling frequency distributions of data.
3  *
4  * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
5  *
6  * License: GNU GPL, version 2 or later.
7  *   See the COPYING file in the top-level directory.
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/qdist.h"
11 
12 #include <math.h>
13 #ifndef NAN
14 #define NAN (0.0 / 0.0)
15 #endif
16 
17 void qdist_init(struct qdist *dist)
18 {
19     dist->entries = g_malloc(sizeof(*dist->entries));
20     dist->size = 1;
21     dist->n = 0;
22 }
23 
24 void qdist_destroy(struct qdist *dist)
25 {
26     g_free(dist->entries);
27 }
28 
29 static inline int qdist_cmp_double(double a, double b)
30 {
31     if (a > b) {
32         return 1;
33     } else if (a < b) {
34         return -1;
35     }
36     return 0;
37 }
38 
39 static int qdist_cmp(const void *ap, const void *bp)
40 {
41     const struct qdist_entry *a = ap;
42     const struct qdist_entry *b = bp;
43 
44     return qdist_cmp_double(a->x, b->x);
45 }
46 
47 void qdist_add(struct qdist *dist, double x, long count)
48 {
49     struct qdist_entry *entry = NULL;
50 
51     if (dist->n) {
52         struct qdist_entry e;
53 
54         e.x = x;
55         entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
56     }
57 
58     if (entry) {
59         entry->count += count;
60         return;
61     }
62 
63     if (unlikely(dist->n == dist->size)) {
64         dist->size *= 2;
65         dist->entries = g_realloc(dist->entries,
66                                   sizeof(*dist->entries) * (dist->size));
67     }
68     dist->n++;
69     entry = &dist->entries[dist->n - 1];
70     entry->x = x;
71     entry->count = count;
72     qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
73 }
74 
75 void qdist_inc(struct qdist *dist, double x)
76 {
77     qdist_add(dist, x, 1);
78 }
79 
80 /*
81  * Unicode for block elements. See:
82  *   https://en.wikipedia.org/wiki/Block_Elements
83  */
84 static const gunichar qdist_blocks[] = {
85     0x2581,
86     0x2582,
87     0x2583,
88     0x2584,
89     0x2585,
90     0x2586,
91     0x2587,
92     0x2588
93 };
94 
95 #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
96 
97 /*
98  * Print a distribution into a string.
99  *
100  * This function assumes that appropriate binning has been done on the input;
101  * see qdist_bin__internal() and qdist_pr_plain().
102  *
103  * Callers must free the returned string with g_free().
104  */
105 static char *qdist_pr_internal(const struct qdist *dist)
106 {
107     double min, max;
108     GString *s = g_string_new("");
109     size_t i;
110 
111     /* if only one entry, its printout will be either full or empty */
112     if (dist->n == 1) {
113         if (dist->entries[0].count) {
114             g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
115         } else {
116             g_string_append_c(s, ' ');
117         }
118         goto out;
119     }
120 
121     /* get min and max counts */
122     min = dist->entries[0].count;
123     max = min;
124     for (i = 0; i < dist->n; i++) {
125         struct qdist_entry *e = &dist->entries[i];
126 
127         if (e->count < min) {
128             min = e->count;
129         }
130         if (e->count > max) {
131             max = e->count;
132         }
133     }
134 
135     for (i = 0; i < dist->n; i++) {
136         struct qdist_entry *e = &dist->entries[i];
137         int index;
138 
139         /* make an exception with 0; instead of using block[0], print a space */
140         if (e->count) {
141             /* divide first to avoid loss of precision when e->count == max */
142             index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
143             g_string_append_unichar(s, qdist_blocks[index]);
144         } else {
145             g_string_append_c(s, ' ');
146         }
147     }
148  out:
149     return g_string_free(s, FALSE);
150 }
151 
152 /*
153  * Bin the distribution in @from into @n bins of consecutive, non-overlapping
154  * intervals, copying the result to @to.
155  *
156  * This function is internal to qdist: only this file and test code should
157  * ever call it.
158  *
159  * Note: calling this function on an already-binned qdist is a bug.
160  *
161  * If @n == 0 or @from->n == 1, use @from->n.
162  */
163 void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
164 {
165     double xmin, xmax;
166     double step;
167     size_t i, j;
168 
169     qdist_init(to);
170 
171     if (from->n == 0) {
172         return;
173     }
174     if (n == 0 || from->n == 1) {
175         n = from->n;
176     }
177 
178     /* set equally-sized bins between @from's left and right */
179     xmin = qdist_xmin(from);
180     xmax = qdist_xmax(from);
181     step = (xmax - xmin) / n;
182 
183     if (n == from->n) {
184         /* if @from's entries are equally spaced, no need to re-bin */
185         for (i = 0; i < from->n; i++) {
186             if (from->entries[i].x != xmin + i * step) {
187                 goto rebin;
188             }
189         }
190         /* they're equally spaced, so copy the dist and bail out */
191         to->entries = g_new(struct qdist_entry, from->n);
192         to->n = from->n;
193         memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
194         return;
195     }
196 
197  rebin:
198     j = 0;
199     for (i = 0; i < n; i++) {
200         double x;
201         double left, right;
202 
203         left = xmin + i * step;
204         right = xmin + (i + 1) * step;
205 
206         /* Add x, even if it might not get any counts later */
207         x = left;
208         qdist_add(to, x, 0);
209 
210         /*
211          * To avoid double-counting we capture [left, right) ranges, except for
212          * the righmost bin, which captures a [left, right] range.
213          */
214         while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
215             struct qdist_entry *o = &from->entries[j];
216 
217             qdist_add(to, x, o->count);
218             j++;
219         }
220     }
221 }
222 
223 /*
224  * Print @dist into a string, after re-binning it into @n bins of consecutive,
225  * non-overlapping intervals.
226  *
227  * If @n == 0, use @orig->n.
228  *
229  * Callers must free the returned string with g_free().
230  */
231 char *qdist_pr_plain(const struct qdist *dist, size_t n)
232 {
233     struct qdist binned;
234     char *ret;
235 
236     if (dist->n == 0) {
237         return NULL;
238     }
239     qdist_bin__internal(&binned, dist, n);
240     ret = qdist_pr_internal(&binned);
241     qdist_destroy(&binned);
242     return ret;
243 }
244 
245 static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
246                             uint32_t opt, bool is_left)
247 {
248     const char *percent;
249     const char *lparen;
250     const char *rparen;
251     GString *s;
252     double x1, x2, step;
253     double x;
254     double n;
255     int dec;
256 
257     s = g_string_new("");
258     if (!(opt & QDIST_PR_LABELS)) {
259         goto out;
260     }
261 
262     dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
263     percent = opt & QDIST_PR_PERCENT ? "%" : "";
264 
265     n = n_bins ? n_bins : dist->n;
266     x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
267     step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
268 
269     if (opt & QDIST_PR_100X) {
270         x *= 100.0;
271         step *= 100.0;
272     }
273     if (opt & QDIST_PR_NOBINRANGE) {
274         lparen = rparen = "";
275         x1 = x;
276         x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
277     } else {
278         lparen = "[";
279         rparen = is_left ? ")" : "]";
280         if (is_left) {
281             x1 = x;
282             x2 = x + step;
283         } else {
284             x1 = x - step;
285             x2 = x;
286         }
287     }
288     g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
289     if (!(opt & QDIST_PR_NOBINRANGE)) {
290         g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
291     }
292     g_string_append(s, percent);
293  out:
294     return g_string_free(s, FALSE);
295 }
296 
297 /*
298  * Print the distribution's histogram into a string.
299  *
300  * See also: qdist_pr_plain().
301  *
302  * Callers must free the returned string with g_free().
303  */
304 char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
305 {
306     const char *border = opt & QDIST_PR_BORDER ? "|" : "";
307     char *llabel, *rlabel;
308     char *hgram;
309     GString *s;
310 
311     if (dist->n == 0) {
312         return NULL;
313     }
314 
315     s = g_string_new("");
316 
317     llabel = qdist_pr_label(dist, n_bins, opt, true);
318     rlabel = qdist_pr_label(dist, n_bins, opt, false);
319     hgram = qdist_pr_plain(dist, n_bins);
320     g_string_append_printf(s, "%s%s%s%s%s",
321                            llabel, border, hgram, border, rlabel);
322     g_free(llabel);
323     g_free(rlabel);
324     g_free(hgram);
325 
326     return g_string_free(s, FALSE);
327 }
328 
329 static inline double qdist_x(const struct qdist *dist, int index)
330 {
331     if (dist->n == 0) {
332         return NAN;
333     }
334     return dist->entries[index].x;
335 }
336 
337 double qdist_xmin(const struct qdist *dist)
338 {
339     return qdist_x(dist, 0);
340 }
341 
342 double qdist_xmax(const struct qdist *dist)
343 {
344     return qdist_x(dist, dist->n - 1);
345 }
346 
347 size_t qdist_unique_entries(const struct qdist *dist)
348 {
349     return dist->n;
350 }
351 
352 unsigned long qdist_sample_count(const struct qdist *dist)
353 {
354     unsigned long count = 0;
355     size_t i;
356 
357     for (i = 0; i < dist->n; i++) {
358         struct qdist_entry *e = &dist->entries[i];
359 
360         count += e->count;
361     }
362     return count;
363 }
364 
365 static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
366                                  size_t n, unsigned long count)
367 {
368     /* amortize the recursion by using a base case > 2 */
369     if (n <= 8) {
370         size_t i;
371         double ret = 0;
372 
373         for (i = 0; i < n; i++) {
374             struct qdist_entry *e = &dist->entries[index + i];
375 
376             ret += e->x * e->count / count;
377         }
378         return ret;
379     } else {
380         size_t n2 = n / 2;
381 
382         return qdist_pairwise_avg(dist, index, n2, count) +
383                qdist_pairwise_avg(dist, index + n2, n - n2, count);
384     }
385 }
386 
387 double qdist_avg(const struct qdist *dist)
388 {
389     unsigned long count;
390 
391     count = qdist_sample_count(dist);
392     if (!count) {
393         return NAN;
394     }
395     return qdist_pairwise_avg(dist, 0, dist->n, count);
396 }
397