xref: /qemu/util/qdist.c (revision 52ea63de)
1 /*
2  * qdist.c - QEMU helpers for handling frequency distributions of data.
3  *
4  * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
5  *
6  * License: GNU GPL, version 2 or later.
7  *   See the COPYING file in the top-level directory.
8  */
9 #include "qemu/qdist.h"
10 
11 #include <math.h>
12 #ifndef NAN
13 #define NAN (0.0 / 0.0)
14 #endif
15 
16 void qdist_init(struct qdist *dist)
17 {
18     dist->entries = g_malloc(sizeof(*dist->entries));
19     dist->size = 1;
20     dist->n = 0;
21 }
22 
23 void qdist_destroy(struct qdist *dist)
24 {
25     g_free(dist->entries);
26 }
27 
28 static inline int qdist_cmp_double(double a, double b)
29 {
30     if (a > b) {
31         return 1;
32     } else if (a < b) {
33         return -1;
34     }
35     return 0;
36 }
37 
38 static int qdist_cmp(const void *ap, const void *bp)
39 {
40     const struct qdist_entry *a = ap;
41     const struct qdist_entry *b = bp;
42 
43     return qdist_cmp_double(a->x, b->x);
44 }
45 
46 void qdist_add(struct qdist *dist, double x, long count)
47 {
48     struct qdist_entry *entry = NULL;
49 
50     if (dist->n) {
51         struct qdist_entry e;
52 
53         e.x = x;
54         entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
55     }
56 
57     if (entry) {
58         entry->count += count;
59         return;
60     }
61 
62     if (unlikely(dist->n == dist->size)) {
63         dist->size *= 2;
64         dist->entries = g_realloc(dist->entries,
65                                   sizeof(*dist->entries) * (dist->size));
66     }
67     dist->n++;
68     entry = &dist->entries[dist->n - 1];
69     entry->x = x;
70     entry->count = count;
71     qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
72 }
73 
74 void qdist_inc(struct qdist *dist, double x)
75 {
76     qdist_add(dist, x, 1);
77 }
78 
79 /*
80  * Unicode for block elements. See:
81  *   https://en.wikipedia.org/wiki/Block_Elements
82  */
83 static const gunichar qdist_blocks[] = {
84     0x2581,
85     0x2582,
86     0x2583,
87     0x2584,
88     0x2585,
89     0x2586,
90     0x2587,
91     0x2588
92 };
93 
94 #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
95 
96 /*
97  * Print a distribution into a string.
98  *
99  * This function assumes that appropriate binning has been done on the input;
100  * see qdist_bin__internal() and qdist_pr_plain().
101  *
102  * Callers must free the returned string with g_free().
103  */
104 static char *qdist_pr_internal(const struct qdist *dist)
105 {
106     double min, max;
107     GString *s = g_string_new("");
108     size_t i;
109 
110     /* if only one entry, its printout will be either full or empty */
111     if (dist->n == 1) {
112         if (dist->entries[0].count) {
113             g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
114         } else {
115             g_string_append_c(s, ' ');
116         }
117         goto out;
118     }
119 
120     /* get min and max counts */
121     min = dist->entries[0].count;
122     max = min;
123     for (i = 0; i < dist->n; i++) {
124         struct qdist_entry *e = &dist->entries[i];
125 
126         if (e->count < min) {
127             min = e->count;
128         }
129         if (e->count > max) {
130             max = e->count;
131         }
132     }
133 
134     for (i = 0; i < dist->n; i++) {
135         struct qdist_entry *e = &dist->entries[i];
136         int index;
137 
138         /* make an exception with 0; instead of using block[0], print a space */
139         if (e->count) {
140             /* divide first to avoid loss of precision when e->count == max */
141             index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
142             g_string_append_unichar(s, qdist_blocks[index]);
143         } else {
144             g_string_append_c(s, ' ');
145         }
146     }
147  out:
148     return g_string_free(s, FALSE);
149 }
150 
151 /*
152  * Bin the distribution in @from into @n bins of consecutive, non-overlapping
153  * intervals, copying the result to @to.
154  *
155  * This function is internal to qdist: only this file and test code should
156  * ever call it.
157  *
158  * Note: calling this function on an already-binned qdist is a bug.
159  *
160  * If @n == 0 or @from->n == 1, use @from->n.
161  */
162 void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
163 {
164     double xmin, xmax;
165     double step;
166     size_t i, j;
167 
168     qdist_init(to);
169 
170     if (from->n == 0) {
171         return;
172     }
173     if (n == 0 || from->n == 1) {
174         n = from->n;
175     }
176 
177     /* set equally-sized bins between @from's left and right */
178     xmin = qdist_xmin(from);
179     xmax = qdist_xmax(from);
180     step = (xmax - xmin) / n;
181 
182     if (n == from->n) {
183         /* if @from's entries are equally spaced, no need to re-bin */
184         for (i = 0; i < from->n; i++) {
185             if (from->entries[i].x != xmin + i * step) {
186                 goto rebin;
187             }
188         }
189         /* they're equally spaced, so copy the dist and bail out */
190         to->entries = g_new(struct qdist_entry, from->n);
191         to->n = from->n;
192         memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
193         return;
194     }
195 
196  rebin:
197     j = 0;
198     for (i = 0; i < n; i++) {
199         double x;
200         double left, right;
201 
202         left = xmin + i * step;
203         right = xmin + (i + 1) * step;
204 
205         /* Add x, even if it might not get any counts later */
206         x = left;
207         qdist_add(to, x, 0);
208 
209         /*
210          * To avoid double-counting we capture [left, right) ranges, except for
211          * the righmost bin, which captures a [left, right] range.
212          */
213         while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
214             struct qdist_entry *o = &from->entries[j];
215 
216             qdist_add(to, x, o->count);
217             j++;
218         }
219     }
220 }
221 
222 /*
223  * Print @dist into a string, after re-binning it into @n bins of consecutive,
224  * non-overlapping intervals.
225  *
226  * If @n == 0, use @orig->n.
227  *
228  * Callers must free the returned string with g_free().
229  */
230 char *qdist_pr_plain(const struct qdist *dist, size_t n)
231 {
232     struct qdist binned;
233     char *ret;
234 
235     if (dist->n == 0) {
236         return NULL;
237     }
238     qdist_bin__internal(&binned, dist, n);
239     ret = qdist_pr_internal(&binned);
240     qdist_destroy(&binned);
241     return ret;
242 }
243 
244 static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
245                             uint32_t opt, bool is_left)
246 {
247     const char *percent;
248     const char *lparen;
249     const char *rparen;
250     GString *s;
251     double x1, x2, step;
252     double x;
253     double n;
254     int dec;
255 
256     s = g_string_new("");
257     if (!(opt & QDIST_PR_LABELS)) {
258         goto out;
259     }
260 
261     dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
262     percent = opt & QDIST_PR_PERCENT ? "%" : "";
263 
264     n = n_bins ? n_bins : dist->n;
265     x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
266     step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
267 
268     if (opt & QDIST_PR_100X) {
269         x *= 100.0;
270         step *= 100.0;
271     }
272     if (opt & QDIST_PR_NOBINRANGE) {
273         lparen = rparen = "";
274         x1 = x;
275         x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
276     } else {
277         lparen = "[";
278         rparen = is_left ? ")" : "]";
279         if (is_left) {
280             x1 = x;
281             x2 = x + step;
282         } else {
283             x1 = x - step;
284             x2 = x;
285         }
286     }
287     g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
288     if (!(opt & QDIST_PR_NOBINRANGE)) {
289         g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
290     }
291     g_string_append(s, percent);
292  out:
293     return g_string_free(s, FALSE);
294 }
295 
296 /*
297  * Print the distribution's histogram into a string.
298  *
299  * See also: qdist_pr_plain().
300  *
301  * Callers must free the returned string with g_free().
302  */
303 char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
304 {
305     const char *border = opt & QDIST_PR_BORDER ? "|" : "";
306     char *llabel, *rlabel;
307     char *hgram;
308     GString *s;
309 
310     if (dist->n == 0) {
311         return NULL;
312     }
313 
314     s = g_string_new("");
315 
316     llabel = qdist_pr_label(dist, n_bins, opt, true);
317     rlabel = qdist_pr_label(dist, n_bins, opt, false);
318     hgram = qdist_pr_plain(dist, n_bins);
319     g_string_append_printf(s, "%s%s%s%s%s",
320                            llabel, border, hgram, border, rlabel);
321     g_free(llabel);
322     g_free(rlabel);
323     g_free(hgram);
324 
325     return g_string_free(s, FALSE);
326 }
327 
328 static inline double qdist_x(const struct qdist *dist, int index)
329 {
330     if (dist->n == 0) {
331         return NAN;
332     }
333     return dist->entries[index].x;
334 }
335 
336 double qdist_xmin(const struct qdist *dist)
337 {
338     return qdist_x(dist, 0);
339 }
340 
341 double qdist_xmax(const struct qdist *dist)
342 {
343     return qdist_x(dist, dist->n - 1);
344 }
345 
346 size_t qdist_unique_entries(const struct qdist *dist)
347 {
348     return dist->n;
349 }
350 
351 unsigned long qdist_sample_count(const struct qdist *dist)
352 {
353     unsigned long count = 0;
354     size_t i;
355 
356     for (i = 0; i < dist->n; i++) {
357         struct qdist_entry *e = &dist->entries[i];
358 
359         count += e->count;
360     }
361     return count;
362 }
363 
364 static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
365                                  size_t n, unsigned long count)
366 {
367     /* amortize the recursion by using a base case > 2 */
368     if (n <= 8) {
369         size_t i;
370         double ret = 0;
371 
372         for (i = 0; i < n; i++) {
373             struct qdist_entry *e = &dist->entries[index + i];
374 
375             ret += e->x * e->count / count;
376         }
377         return ret;
378     } else {
379         size_t n2 = n / 2;
380 
381         return qdist_pairwise_avg(dist, index, n2, count) +
382                qdist_pairwise_avg(dist, index + n2, n - n2, count);
383     }
384 }
385 
386 double qdist_avg(const struct qdist *dist)
387 {
388     unsigned long count;
389 
390     count = qdist_sample_count(dist);
391     if (!count) {
392         return NAN;
393     }
394     return qdist_pairwise_avg(dist, 0, dist->n, count);
395 }
396