1 /* 2 * qdist.c - QEMU helpers for handling frequency distributions of data. 3 * 4 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> 5 * 6 * License: GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9 #include "qemu/qdist.h" 10 11 #include <math.h> 12 #ifndef NAN 13 #define NAN (0.0 / 0.0) 14 #endif 15 16 void qdist_init(struct qdist *dist) 17 { 18 dist->entries = g_malloc(sizeof(*dist->entries)); 19 dist->size = 1; 20 dist->n = 0; 21 } 22 23 void qdist_destroy(struct qdist *dist) 24 { 25 g_free(dist->entries); 26 } 27 28 static inline int qdist_cmp_double(double a, double b) 29 { 30 if (a > b) { 31 return 1; 32 } else if (a < b) { 33 return -1; 34 } 35 return 0; 36 } 37 38 static int qdist_cmp(const void *ap, const void *bp) 39 { 40 const struct qdist_entry *a = ap; 41 const struct qdist_entry *b = bp; 42 43 return qdist_cmp_double(a->x, b->x); 44 } 45 46 void qdist_add(struct qdist *dist, double x, long count) 47 { 48 struct qdist_entry *entry = NULL; 49 50 if (dist->n) { 51 struct qdist_entry e; 52 53 e.x = x; 54 entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp); 55 } 56 57 if (entry) { 58 entry->count += count; 59 return; 60 } 61 62 if (unlikely(dist->n == dist->size)) { 63 dist->size *= 2; 64 dist->entries = g_realloc(dist->entries, 65 sizeof(*dist->entries) * (dist->size)); 66 } 67 dist->n++; 68 entry = &dist->entries[dist->n - 1]; 69 entry->x = x; 70 entry->count = count; 71 qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp); 72 } 73 74 void qdist_inc(struct qdist *dist, double x) 75 { 76 qdist_add(dist, x, 1); 77 } 78 79 /* 80 * Unicode for block elements. See: 81 * https://en.wikipedia.org/wiki/Block_Elements 82 */ 83 static const gunichar qdist_blocks[] = { 84 0x2581, 85 0x2582, 86 0x2583, 87 0x2584, 88 0x2585, 89 0x2586, 90 0x2587, 91 0x2588 92 }; 93 94 #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks) 95 96 /* 97 * Print a distribution into a string. 98 * 99 * This function assumes that appropriate binning has been done on the input; 100 * see qdist_bin__internal() and qdist_pr_plain(). 101 * 102 * Callers must free the returned string with g_free(). 103 */ 104 static char *qdist_pr_internal(const struct qdist *dist) 105 { 106 double min, max; 107 GString *s = g_string_new(""); 108 size_t i; 109 110 /* if only one entry, its printout will be either full or empty */ 111 if (dist->n == 1) { 112 if (dist->entries[0].count) { 113 g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]); 114 } else { 115 g_string_append_c(s, ' '); 116 } 117 goto out; 118 } 119 120 /* get min and max counts */ 121 min = dist->entries[0].count; 122 max = min; 123 for (i = 0; i < dist->n; i++) { 124 struct qdist_entry *e = &dist->entries[i]; 125 126 if (e->count < min) { 127 min = e->count; 128 } 129 if (e->count > max) { 130 max = e->count; 131 } 132 } 133 134 for (i = 0; i < dist->n; i++) { 135 struct qdist_entry *e = &dist->entries[i]; 136 int index; 137 138 /* make an exception with 0; instead of using block[0], print a space */ 139 if (e->count) { 140 /* divide first to avoid loss of precision when e->count == max */ 141 index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1); 142 g_string_append_unichar(s, qdist_blocks[index]); 143 } else { 144 g_string_append_c(s, ' '); 145 } 146 } 147 out: 148 return g_string_free(s, FALSE); 149 } 150 151 /* 152 * Bin the distribution in @from into @n bins of consecutive, non-overlapping 153 * intervals, copying the result to @to. 154 * 155 * This function is internal to qdist: only this file and test code should 156 * ever call it. 157 * 158 * Note: calling this function on an already-binned qdist is a bug. 159 * 160 * If @n == 0 or @from->n == 1, use @from->n. 161 */ 162 void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n) 163 { 164 double xmin, xmax; 165 double step; 166 size_t i, j; 167 168 qdist_init(to); 169 170 if (from->n == 0) { 171 return; 172 } 173 if (n == 0 || from->n == 1) { 174 n = from->n; 175 } 176 177 /* set equally-sized bins between @from's left and right */ 178 xmin = qdist_xmin(from); 179 xmax = qdist_xmax(from); 180 step = (xmax - xmin) / n; 181 182 if (n == from->n) { 183 /* if @from's entries are equally spaced, no need to re-bin */ 184 for (i = 0; i < from->n; i++) { 185 if (from->entries[i].x != xmin + i * step) { 186 goto rebin; 187 } 188 } 189 /* they're equally spaced, so copy the dist and bail out */ 190 to->entries = g_new(struct qdist_entry, from->n); 191 to->n = from->n; 192 memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n); 193 return; 194 } 195 196 rebin: 197 j = 0; 198 for (i = 0; i < n; i++) { 199 double x; 200 double left, right; 201 202 left = xmin + i * step; 203 right = xmin + (i + 1) * step; 204 205 /* Add x, even if it might not get any counts later */ 206 x = left; 207 qdist_add(to, x, 0); 208 209 /* 210 * To avoid double-counting we capture [left, right) ranges, except for 211 * the righmost bin, which captures a [left, right] range. 212 */ 213 while (j < from->n && (from->entries[j].x < right || i == n - 1)) { 214 struct qdist_entry *o = &from->entries[j]; 215 216 qdist_add(to, x, o->count); 217 j++; 218 } 219 } 220 } 221 222 /* 223 * Print @dist into a string, after re-binning it into @n bins of consecutive, 224 * non-overlapping intervals. 225 * 226 * If @n == 0, use @orig->n. 227 * 228 * Callers must free the returned string with g_free(). 229 */ 230 char *qdist_pr_plain(const struct qdist *dist, size_t n) 231 { 232 struct qdist binned; 233 char *ret; 234 235 if (dist->n == 0) { 236 return NULL; 237 } 238 qdist_bin__internal(&binned, dist, n); 239 ret = qdist_pr_internal(&binned); 240 qdist_destroy(&binned); 241 return ret; 242 } 243 244 static char *qdist_pr_label(const struct qdist *dist, size_t n_bins, 245 uint32_t opt, bool is_left) 246 { 247 const char *percent; 248 const char *lparen; 249 const char *rparen; 250 GString *s; 251 double x1, x2, step; 252 double x; 253 double n; 254 int dec; 255 256 s = g_string_new(""); 257 if (!(opt & QDIST_PR_LABELS)) { 258 goto out; 259 } 260 261 dec = opt & QDIST_PR_NODECIMAL ? 0 : 1; 262 percent = opt & QDIST_PR_PERCENT ? "%" : ""; 263 264 n = n_bins ? n_bins : dist->n; 265 x = is_left ? qdist_xmin(dist) : qdist_xmax(dist); 266 step = (qdist_xmax(dist) - qdist_xmin(dist)) / n; 267 268 if (opt & QDIST_PR_100X) { 269 x *= 100.0; 270 step *= 100.0; 271 } 272 if (opt & QDIST_PR_NOBINRANGE) { 273 lparen = rparen = ""; 274 x1 = x; 275 x2 = x; /* unnecessary, but a dumb compiler might not figure it out */ 276 } else { 277 lparen = "["; 278 rparen = is_left ? ")" : "]"; 279 if (is_left) { 280 x1 = x; 281 x2 = x + step; 282 } else { 283 x1 = x - step; 284 x2 = x; 285 } 286 } 287 g_string_append_printf(s, "%s%.*f", lparen, dec, x1); 288 if (!(opt & QDIST_PR_NOBINRANGE)) { 289 g_string_append_printf(s, ",%.*f%s", dec, x2, rparen); 290 } 291 g_string_append(s, percent); 292 out: 293 return g_string_free(s, FALSE); 294 } 295 296 /* 297 * Print the distribution's histogram into a string. 298 * 299 * See also: qdist_pr_plain(). 300 * 301 * Callers must free the returned string with g_free(). 302 */ 303 char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt) 304 { 305 const char *border = opt & QDIST_PR_BORDER ? "|" : ""; 306 char *llabel, *rlabel; 307 char *hgram; 308 GString *s; 309 310 if (dist->n == 0) { 311 return NULL; 312 } 313 314 s = g_string_new(""); 315 316 llabel = qdist_pr_label(dist, n_bins, opt, true); 317 rlabel = qdist_pr_label(dist, n_bins, opt, false); 318 hgram = qdist_pr_plain(dist, n_bins); 319 g_string_append_printf(s, "%s%s%s%s%s", 320 llabel, border, hgram, border, rlabel); 321 g_free(llabel); 322 g_free(rlabel); 323 g_free(hgram); 324 325 return g_string_free(s, FALSE); 326 } 327 328 static inline double qdist_x(const struct qdist *dist, int index) 329 { 330 if (dist->n == 0) { 331 return NAN; 332 } 333 return dist->entries[index].x; 334 } 335 336 double qdist_xmin(const struct qdist *dist) 337 { 338 return qdist_x(dist, 0); 339 } 340 341 double qdist_xmax(const struct qdist *dist) 342 { 343 return qdist_x(dist, dist->n - 1); 344 } 345 346 size_t qdist_unique_entries(const struct qdist *dist) 347 { 348 return dist->n; 349 } 350 351 unsigned long qdist_sample_count(const struct qdist *dist) 352 { 353 unsigned long count = 0; 354 size_t i; 355 356 for (i = 0; i < dist->n; i++) { 357 struct qdist_entry *e = &dist->entries[i]; 358 359 count += e->count; 360 } 361 return count; 362 } 363 364 static double qdist_pairwise_avg(const struct qdist *dist, size_t index, 365 size_t n, unsigned long count) 366 { 367 /* amortize the recursion by using a base case > 2 */ 368 if (n <= 8) { 369 size_t i; 370 double ret = 0; 371 372 for (i = 0; i < n; i++) { 373 struct qdist_entry *e = &dist->entries[index + i]; 374 375 ret += e->x * e->count / count; 376 } 377 return ret; 378 } else { 379 size_t n2 = n / 2; 380 381 return qdist_pairwise_avg(dist, index, n2, count) + 382 qdist_pairwise_avg(dist, index + n2, n - n2, count); 383 } 384 } 385 386 double qdist_avg(const struct qdist *dist) 387 { 388 unsigned long count; 389 390 count = qdist_sample_count(dist); 391 if (!count) { 392 return NAN; 393 } 394 return qdist_pairwise_avg(dist, 0, dist->n, count); 395 } 396