1 /*
2  * qdist.c - QEMU helpers for handling frequency distributions of data.
3  *
4  * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
5  *
6  * License: GNU GPL, version 2 or later.
7  *   See the COPYING file in the top-level directory.
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/qdist.h"
11 
12 #include <math.h>
13 #ifndef NAN
14 #define NAN (0.0 / 0.0)
15 #endif
16 
17 #define QDIST_EMPTY_STR "(empty)"
18 
qdist_init(struct qdist * dist)19 void qdist_init(struct qdist *dist)
20 {
21     dist->entries = g_new(struct qdist_entry, 1);
22     dist->size = 1;
23     dist->n = 0;
24 }
25 
qdist_destroy(struct qdist * dist)26 void qdist_destroy(struct qdist *dist)
27 {
28     g_free(dist->entries);
29 }
30 
qdist_cmp_double(double a,double b)31 static inline int qdist_cmp_double(double a, double b)
32 {
33     if (a > b) {
34         return 1;
35     } else if (a < b) {
36         return -1;
37     }
38     return 0;
39 }
40 
qdist_cmp(const void * ap,const void * bp)41 static int qdist_cmp(const void *ap, const void *bp)
42 {
43     const struct qdist_entry *a = ap;
44     const struct qdist_entry *b = bp;
45 
46     return qdist_cmp_double(a->x, b->x);
47 }
48 
qdist_add(struct qdist * dist,double x,long count)49 void qdist_add(struct qdist *dist, double x, long count)
50 {
51     struct qdist_entry *entry = NULL;
52 
53     if (dist->n) {
54         struct qdist_entry e;
55 
56         e.x = x;
57         entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
58     }
59 
60     if (entry) {
61         entry->count += count;
62         return;
63     }
64 
65     if (unlikely(dist->n == dist->size)) {
66         dist->size *= 2;
67         dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);
68     }
69     dist->n++;
70     entry = &dist->entries[dist->n - 1];
71     entry->x = x;
72     entry->count = count;
73     qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
74 }
75 
qdist_inc(struct qdist * dist,double x)76 void qdist_inc(struct qdist *dist, double x)
77 {
78     qdist_add(dist, x, 1);
79 }
80 
81 /*
82  * Unicode for block elements. See:
83  *   https://en.wikipedia.org/wiki/Block_Elements
84  */
85 static const gunichar qdist_blocks[] = {
86     0x2581,
87     0x2582,
88     0x2583,
89     0x2584,
90     0x2585,
91     0x2586,
92     0x2587,
93     0x2588
94 };
95 
96 #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
97 
98 /*
99  * Print a distribution into a string.
100  *
101  * This function assumes that appropriate binning has been done on the input;
102  * see qdist_bin__internal() and qdist_pr_plain().
103  *
104  * Callers must free the returned string with g_free().
105  */
qdist_pr_internal(const struct qdist * dist)106 static char *qdist_pr_internal(const struct qdist *dist)
107 {
108     double min, max;
109     GString *s = g_string_new("");
110     size_t i;
111 
112     /* if only one entry, its printout will be either full or empty */
113     if (dist->n == 1) {
114         if (dist->entries[0].count) {
115             g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
116         } else {
117             g_string_append_c(s, ' ');
118         }
119         goto out;
120     }
121 
122     /* get min and max counts */
123     min = dist->entries[0].count;
124     max = min;
125     for (i = 0; i < dist->n; i++) {
126         struct qdist_entry *e = &dist->entries[i];
127 
128         if (e->count < min) {
129             min = e->count;
130         }
131         if (e->count > max) {
132             max = e->count;
133         }
134     }
135 
136     for (i = 0; i < dist->n; i++) {
137         struct qdist_entry *e = &dist->entries[i];
138         int index;
139 
140         /* make an exception with 0; instead of using block[0], print a space */
141         if (e->count) {
142             /* divide first to avoid loss of precision when e->count == max */
143             index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
144             g_string_append_unichar(s, qdist_blocks[index]);
145         } else {
146             g_string_append_c(s, ' ');
147         }
148     }
149  out:
150     return g_string_free(s, FALSE);
151 }
152 
153 /*
154  * Bin the distribution in @from into @n bins of consecutive, non-overlapping
155  * intervals, copying the result to @to.
156  *
157  * This function is internal to qdist: only this file and test code should
158  * ever call it.
159  *
160  * Note: calling this function on an already-binned qdist is a bug.
161  *
162  * If @n == 0 or @from->n == 1, use @from->n.
163  */
qdist_bin__internal(struct qdist * to,const struct qdist * from,size_t n)164 void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
165 {
166     double xmin, xmax;
167     double step;
168     size_t i, j;
169 
170     qdist_init(to);
171 
172     if (from->n == 0) {
173         return;
174     }
175     if (n == 0 || from->n == 1) {
176         n = from->n;
177     }
178 
179     /* set equally-sized bins between @from's left and right */
180     xmin = qdist_xmin(from);
181     xmax = qdist_xmax(from);
182     step = (xmax - xmin) / n;
183 
184     if (n == from->n) {
185         /* if @from's entries are equally spaced, no need to re-bin */
186         for (i = 0; i < from->n; i++) {
187             if (from->entries[i].x != xmin + i * step) {
188                 goto rebin;
189             }
190         }
191         /* they're equally spaced, so copy the dist and bail out */
192         to->entries = g_renew(struct qdist_entry, to->entries, n);
193         to->n = from->n;
194         memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
195         return;
196     }
197 
198  rebin:
199     j = 0;
200     for (i = 0; i < n; i++) {
201         double x;
202         double left, right;
203 
204         left = xmin + i * step;
205         right = xmin + (i + 1) * step;
206 
207         /* Add x, even if it might not get any counts later */
208         x = left;
209         qdist_add(to, x, 0);
210 
211         /*
212          * To avoid double-counting we capture [left, right) ranges, except for
213          * the righmost bin, which captures a [left, right] range.
214          */
215         while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
216             struct qdist_entry *o = &from->entries[j];
217 
218             qdist_add(to, x, o->count);
219             j++;
220         }
221     }
222 }
223 
224 /*
225  * Print @dist into a string, after re-binning it into @n bins of consecutive,
226  * non-overlapping intervals.
227  *
228  * If @n == 0, use @orig->n.
229  *
230  * Callers must free the returned string with g_free().
231  */
qdist_pr_plain(const struct qdist * dist,size_t n)232 char *qdist_pr_plain(const struct qdist *dist, size_t n)
233 {
234     struct qdist binned;
235     char *ret;
236 
237     if (dist->n == 0) {
238         return g_strdup(QDIST_EMPTY_STR);
239     }
240     qdist_bin__internal(&binned, dist, n);
241     ret = qdist_pr_internal(&binned);
242     qdist_destroy(&binned);
243     return ret;
244 }
245 
qdist_pr_label(const struct qdist * dist,size_t n_bins,uint32_t opt,bool is_left)246 static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
247                             uint32_t opt, bool is_left)
248 {
249     const char *percent;
250     const char *lparen;
251     const char *rparen;
252     GString *s;
253     double x1, x2, step;
254     double x;
255     double n;
256     int dec;
257 
258     s = g_string_new("");
259     if (!(opt & QDIST_PR_LABELS)) {
260         goto out;
261     }
262 
263     dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
264     percent = opt & QDIST_PR_PERCENT ? "%" : "";
265 
266     n = n_bins ? n_bins : dist->n;
267     x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
268     step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
269 
270     if (opt & QDIST_PR_100X) {
271         x *= 100.0;
272         step *= 100.0;
273     }
274     if (opt & QDIST_PR_NOBINRANGE) {
275         lparen = rparen = "";
276         x1 = x;
277         x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
278     } else {
279         lparen = "[";
280         rparen = is_left ? ")" : "]";
281         if (is_left) {
282             x1 = x;
283             x2 = x + step;
284         } else {
285             x1 = x - step;
286             x2 = x;
287         }
288     }
289     g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
290     if (!(opt & QDIST_PR_NOBINRANGE)) {
291         g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
292     }
293     g_string_append(s, percent);
294  out:
295     return g_string_free(s, FALSE);
296 }
297 
298 /*
299  * Print the distribution's histogram into a string.
300  *
301  * See also: qdist_pr_plain().
302  *
303  * Callers must free the returned string with g_free().
304  */
qdist_pr(const struct qdist * dist,size_t n_bins,uint32_t opt)305 char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
306 {
307     const char *border = opt & QDIST_PR_BORDER ? "|" : "";
308     char *llabel, *rlabel;
309     char *hgram;
310     GString *s;
311 
312     if (dist->n == 0) {
313         return g_strdup(QDIST_EMPTY_STR);
314     }
315 
316     s = g_string_new("");
317 
318     llabel = qdist_pr_label(dist, n_bins, opt, true);
319     rlabel = qdist_pr_label(dist, n_bins, opt, false);
320     hgram = qdist_pr_plain(dist, n_bins);
321     g_string_append_printf(s, "%s%s%s%s%s",
322                            llabel, border, hgram, border, rlabel);
323     g_free(llabel);
324     g_free(rlabel);
325     g_free(hgram);
326 
327     return g_string_free(s, FALSE);
328 }
329 
qdist_x(const struct qdist * dist,int index)330 static inline double qdist_x(const struct qdist *dist, int index)
331 {
332     if (dist->n == 0) {
333         return NAN;
334     }
335     return dist->entries[index].x;
336 }
337 
qdist_xmin(const struct qdist * dist)338 double qdist_xmin(const struct qdist *dist)
339 {
340     return qdist_x(dist, 0);
341 }
342 
qdist_xmax(const struct qdist * dist)343 double qdist_xmax(const struct qdist *dist)
344 {
345     return qdist_x(dist, dist->n - 1);
346 }
347 
qdist_unique_entries(const struct qdist * dist)348 size_t qdist_unique_entries(const struct qdist *dist)
349 {
350     return dist->n;
351 }
352 
qdist_sample_count(const struct qdist * dist)353 unsigned long qdist_sample_count(const struct qdist *dist)
354 {
355     unsigned long count = 0;
356     size_t i;
357 
358     for (i = 0; i < dist->n; i++) {
359         struct qdist_entry *e = &dist->entries[i];
360 
361         count += e->count;
362     }
363     return count;
364 }
365 
qdist_pairwise_avg(const struct qdist * dist,size_t index,size_t n,unsigned long count)366 static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
367                                  size_t n, unsigned long count)
368 {
369     /* amortize the recursion by using a base case > 2 */
370     if (n <= 8) {
371         size_t i;
372         double ret = 0;
373 
374         for (i = 0; i < n; i++) {
375             struct qdist_entry *e = &dist->entries[index + i];
376 
377             ret += e->x * e->count / count;
378         }
379         return ret;
380     } else {
381         size_t n2 = n / 2;
382 
383         return qdist_pairwise_avg(dist, index, n2, count) +
384                qdist_pairwise_avg(dist, index + n2, n - n2, count);
385     }
386 }
387 
qdist_avg(const struct qdist * dist)388 double qdist_avg(const struct qdist *dist)
389 {
390     unsigned long count;
391 
392     count = qdist_sample_count(dist);
393     if (!count) {
394         return NAN;
395     }
396     return qdist_pairwise_avg(dist, 0, dist->n, count);
397 }
398