qemu
1/*
2* qdist.c - QEMU helpers for handling frequency distributions of data.
3*
4* Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
5*
6* License: GNU GPL, version 2 or later.
7* See the COPYING file in the top-level directory.
8*/
9#include "qemu/osdep.h"10#include "qemu/qdist.h"11
12#include <math.h>13#ifndef NAN14#define NAN (0.0 / 0.0)15#endif16
17#define QDIST_EMPTY_STR "(empty)"18
19void qdist_init(struct qdist *dist)20{
21dist->entries = g_new(struct qdist_entry, 1);22dist->size = 1;23dist->n = 0;24}
25
26void qdist_destroy(struct qdist *dist)27{
28g_free(dist->entries);29}
30
31static inline int qdist_cmp_double(double a, double b)32{
33if (a > b) {34return 1;35} else if (a < b) {36return -1;37}38return 0;39}
40
41static int qdist_cmp(const void *ap, const void *bp)42{
43const struct qdist_entry *a = ap;44const struct qdist_entry *b = bp;45
46return qdist_cmp_double(a->x, b->x);47}
48
49void qdist_add(struct qdist *dist, double x, long count)50{
51struct qdist_entry *entry = NULL;52
53if (dist->n) {54struct qdist_entry e;55
56e.x = x;57entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);58}59
60if (entry) {61entry->count += count;62return;63}64
65if (unlikely(dist->n == dist->size)) {66dist->size *= 2;67dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);68}69dist->n++;70entry = &dist->entries[dist->n - 1];71entry->x = x;72entry->count = count;73qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);74}
75
76void qdist_inc(struct qdist *dist, double x)77{
78qdist_add(dist, x, 1);79}
80
81/*
82* Unicode for block elements. See:
83* https://en.wikipedia.org/wiki/Block_Elements
84*/
85static const gunichar qdist_blocks[] = {860x2581,870x2582,880x2583,890x2584,900x2585,910x2586,920x2587,930x258894};95
96#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)97
98/*
99* Print a distribution into a string.
100*
101* This function assumes that appropriate binning has been done on the input;
102* see qdist_bin__internal() and qdist_pr_plain().
103*
104* Callers must free the returned string with g_free().
105*/
106static char *qdist_pr_internal(const struct qdist *dist)107{
108double min, max;109GString *s = g_string_new("");110size_t i;111
112/* if only one entry, its printout will be either full or empty */113if (dist->n == 1) {114if (dist->entries[0].count) {115g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);116} else {117g_string_append_c(s, ' ');118}119goto out;120}121
122/* get min and max counts */123min = dist->entries[0].count;124max = min;125for (i = 0; i < dist->n; i++) {126struct qdist_entry *e = &dist->entries[i];127
128if (e->count < min) {129min = e->count;130}131if (e->count > max) {132max = e->count;133}134}135
136for (i = 0; i < dist->n; i++) {137struct qdist_entry *e = &dist->entries[i];138int index;139
140/* make an exception with 0; instead of using block[0], print a space */141if (e->count) {142/* divide first to avoid loss of precision when e->count == max */143index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);144g_string_append_unichar(s, qdist_blocks[index]);145} else {146g_string_append_c(s, ' ');147}148}149out:150return g_string_free(s, FALSE);151}
152
153/*
154* Bin the distribution in @from into @n bins of consecutive, non-overlapping
155* intervals, copying the result to @to.
156*
157* This function is internal to qdist: only this file and test code should
158* ever call it.
159*
160* Note: calling this function on an already-binned qdist is a bug.
161*
162* If @n == 0 or @from->n == 1, use @from->n.
163*/
164void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)165{
166double xmin, xmax;167double step;168size_t i, j;169
170qdist_init(to);171
172if (from->n == 0) {173return;174}175if (n == 0 || from->n == 1) {176n = from->n;177}178
179/* set equally-sized bins between @from's left and right */180xmin = qdist_xmin(from);181xmax = qdist_xmax(from);182step = (xmax - xmin) / n;183
184if (n == from->n) {185/* if @from's entries are equally spaced, no need to re-bin */186for (i = 0; i < from->n; i++) {187if (from->entries[i].x != xmin + i * step) {188goto rebin;189}190}191/* they're equally spaced, so copy the dist and bail out */192to->entries = g_renew(struct qdist_entry, to->entries, n);193to->n = from->n;194memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);195return;196}197
198rebin:199j = 0;200for (i = 0; i < n; i++) {201double x;202double left, right;203
204left = xmin + i * step;205right = xmin + (i + 1) * step;206
207/* Add x, even if it might not get any counts later */208x = left;209qdist_add(to, x, 0);210
211/*212* To avoid double-counting we capture [left, right) ranges, except for
213* the rightmost bin, which captures a [left, right] range.
214*/
215while (j < from->n && (from->entries[j].x < right || i == n - 1)) {216struct qdist_entry *o = &from->entries[j];217
218qdist_add(to, x, o->count);219j++;220}221}222}
223
224/*
225* Print @dist into a string, after re-binning it into @n bins of consecutive,
226* non-overlapping intervals.
227*
228* If @n == 0, use @orig->n.
229*
230* Callers must free the returned string with g_free().
231*/
232char *qdist_pr_plain(const struct qdist *dist, size_t n)233{
234struct qdist binned;235char *ret;236
237if (dist->n == 0) {238return g_strdup(QDIST_EMPTY_STR);239}240qdist_bin__internal(&binned, dist, n);241ret = qdist_pr_internal(&binned);242qdist_destroy(&binned);243return ret;244}
245
246static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,247uint32_t opt, bool is_left)248{
249const char *percent;250const char *lparen;251const char *rparen;252GString *s;253double x1, x2, step;254double x;255double n;256int dec;257
258s = g_string_new("");259if (!(opt & QDIST_PR_LABELS)) {260goto out;261}262
263dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;264percent = opt & QDIST_PR_PERCENT ? "%" : "";265
266n = n_bins ? n_bins : dist->n;267x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);268step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;269
270if (opt & QDIST_PR_100X) {271x *= 100.0;272step *= 100.0;273}274if (opt & QDIST_PR_NOBINRANGE) {275lparen = rparen = "";276x1 = x;277x2 = x; /* unnecessary, but a dumb compiler might not figure it out */278} else {279lparen = "[";280rparen = is_left ? ")" : "]";281if (is_left) {282x1 = x;283x2 = x + step;284} else {285x1 = x - step;286x2 = x;287}288}289g_string_append_printf(s, "%s%.*f", lparen, dec, x1);290if (!(opt & QDIST_PR_NOBINRANGE)) {291g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);292}293g_string_append(s, percent);294out:295return g_string_free(s, FALSE);296}
297
298/*
299* Print the distribution's histogram into a string.
300*
301* See also: qdist_pr_plain().
302*
303* Callers must free the returned string with g_free().
304*/
305char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)306{
307const char *border = opt & QDIST_PR_BORDER ? "|" : "";308char *llabel, *rlabel;309char *hgram;310GString *s;311
312if (dist->n == 0) {313return g_strdup(QDIST_EMPTY_STR);314}315
316s = g_string_new("");317
318llabel = qdist_pr_label(dist, n_bins, opt, true);319rlabel = qdist_pr_label(dist, n_bins, opt, false);320hgram = qdist_pr_plain(dist, n_bins);321g_string_append_printf(s, "%s%s%s%s%s",322llabel, border, hgram, border, rlabel);323g_free(llabel);324g_free(rlabel);325g_free(hgram);326
327return g_string_free(s, FALSE);328}
329
330static inline double qdist_x(const struct qdist *dist, int index)331{
332if (dist->n == 0) {333return NAN;334}335return dist->entries[index].x;336}
337
338double qdist_xmin(const struct qdist *dist)339{
340return qdist_x(dist, 0);341}
342
343double qdist_xmax(const struct qdist *dist)344{
345return qdist_x(dist, dist->n - 1);346}
347
348size_t qdist_unique_entries(const struct qdist *dist)349{
350return dist->n;351}
352
353unsigned long qdist_sample_count(const struct qdist *dist)354{
355unsigned long count = 0;356size_t i;357
358for (i = 0; i < dist->n; i++) {359struct qdist_entry *e = &dist->entries[i];360
361count += e->count;362}363return count;364}
365
366static double qdist_pairwise_avg(const struct qdist *dist, size_t index,367size_t n, unsigned long count)368{
369/* amortize the recursion by using a base case > 2 */370if (n <= 8) {371size_t i;372double ret = 0;373
374for (i = 0; i < n; i++) {375struct qdist_entry *e = &dist->entries[index + i];376
377ret += e->x * e->count / count;378}379return ret;380} else {381size_t n2 = n / 2;382
383return qdist_pairwise_avg(dist, index, n2, count) +384qdist_pairwise_avg(dist, index + n2, n - n2, count);385}386}
387
388double qdist_avg(const struct qdist *dist)389{
390unsigned long count;391
392count = qdist_sample_count(dist);393if (!count) {394return NAN;395}396return qdist_pairwise_avg(dist, 0, dist->n, count);397}
398