qemu

Форк
0
/
qdist.c 
397 строк · 9.2 Кб
1
/*
2
 * qdist.c - QEMU helpers for handling frequency distributions of data.
3
 *
4
 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
5
 *
6
 * License: GNU GPL, version 2 or later.
7
 *   See the COPYING file in the top-level directory.
8
 */
9
#include "qemu/osdep.h"
10
#include "qemu/qdist.h"
11

12
#include <math.h>
13
#ifndef NAN
14
#define NAN (0.0 / 0.0)
15
#endif
16

17
#define QDIST_EMPTY_STR "(empty)"
18

19
void qdist_init(struct qdist *dist)
20
{
21
    dist->entries = g_new(struct qdist_entry, 1);
22
    dist->size = 1;
23
    dist->n = 0;
24
}
25

26
void qdist_destroy(struct qdist *dist)
27
{
28
    g_free(dist->entries);
29
}
30

31
static inline int qdist_cmp_double(double a, double b)
32
{
33
    if (a > b) {
34
        return 1;
35
    } else if (a < b) {
36
        return -1;
37
    }
38
    return 0;
39
}
40

41
static int qdist_cmp(const void *ap, const void *bp)
42
{
43
    const struct qdist_entry *a = ap;
44
    const struct qdist_entry *b = bp;
45

46
    return qdist_cmp_double(a->x, b->x);
47
}
48

49
void qdist_add(struct qdist *dist, double x, long count)
50
{
51
    struct qdist_entry *entry = NULL;
52

53
    if (dist->n) {
54
        struct qdist_entry e;
55

56
        e.x = x;
57
        entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
58
    }
59

60
    if (entry) {
61
        entry->count += count;
62
        return;
63
    }
64

65
    if (unlikely(dist->n == dist->size)) {
66
        dist->size *= 2;
67
        dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);
68
    }
69
    dist->n++;
70
    entry = &dist->entries[dist->n - 1];
71
    entry->x = x;
72
    entry->count = count;
73
    qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
74
}
75

76
void qdist_inc(struct qdist *dist, double x)
77
{
78
    qdist_add(dist, x, 1);
79
}
80

81
/*
82
 * Unicode for block elements. See:
83
 *   https://en.wikipedia.org/wiki/Block_Elements
84
 */
85
static const gunichar qdist_blocks[] = {
86
    0x2581,
87
    0x2582,
88
    0x2583,
89
    0x2584,
90
    0x2585,
91
    0x2586,
92
    0x2587,
93
    0x2588
94
};
95

96
#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
97

98
/*
99
 * Print a distribution into a string.
100
 *
101
 * This function assumes that appropriate binning has been done on the input;
102
 * see qdist_bin__internal() and qdist_pr_plain().
103
 *
104
 * Callers must free the returned string with g_free().
105
 */
106
static char *qdist_pr_internal(const struct qdist *dist)
107
{
108
    double min, max;
109
    GString *s = g_string_new("");
110
    size_t i;
111

112
    /* if only one entry, its printout will be either full or empty */
113
    if (dist->n == 1) {
114
        if (dist->entries[0].count) {
115
            g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
116
        } else {
117
            g_string_append_c(s, ' ');
118
        }
119
        goto out;
120
    }
121

122
    /* get min and max counts */
123
    min = dist->entries[0].count;
124
    max = min;
125
    for (i = 0; i < dist->n; i++) {
126
        struct qdist_entry *e = &dist->entries[i];
127

128
        if (e->count < min) {
129
            min = e->count;
130
        }
131
        if (e->count > max) {
132
            max = e->count;
133
        }
134
    }
135

136
    for (i = 0; i < dist->n; i++) {
137
        struct qdist_entry *e = &dist->entries[i];
138
        int index;
139

140
        /* make an exception with 0; instead of using block[0], print a space */
141
        if (e->count) {
142
            /* divide first to avoid loss of precision when e->count == max */
143
            index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
144
            g_string_append_unichar(s, qdist_blocks[index]);
145
        } else {
146
            g_string_append_c(s, ' ');
147
        }
148
    }
149
 out:
150
    return g_string_free(s, FALSE);
151
}
152

153
/*
154
 * Bin the distribution in @from into @n bins of consecutive, non-overlapping
155
 * intervals, copying the result to @to.
156
 *
157
 * This function is internal to qdist: only this file and test code should
158
 * ever call it.
159
 *
160
 * Note: calling this function on an already-binned qdist is a bug.
161
 *
162
 * If @n == 0 or @from->n == 1, use @from->n.
163
 */
164
void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
165
{
166
    double xmin, xmax;
167
    double step;
168
    size_t i, j;
169

170
    qdist_init(to);
171

172
    if (from->n == 0) {
173
        return;
174
    }
175
    if (n == 0 || from->n == 1) {
176
        n = from->n;
177
    }
178

179
    /* set equally-sized bins between @from's left and right */
180
    xmin = qdist_xmin(from);
181
    xmax = qdist_xmax(from);
182
    step = (xmax - xmin) / n;
183

184
    if (n == from->n) {
185
        /* if @from's entries are equally spaced, no need to re-bin */
186
        for (i = 0; i < from->n; i++) {
187
            if (from->entries[i].x != xmin + i * step) {
188
                goto rebin;
189
            }
190
        }
191
        /* they're equally spaced, so copy the dist and bail out */
192
        to->entries = g_renew(struct qdist_entry, to->entries, n);
193
        to->n = from->n;
194
        memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
195
        return;
196
    }
197

198
 rebin:
199
    j = 0;
200
    for (i = 0; i < n; i++) {
201
        double x;
202
        double left, right;
203

204
        left = xmin + i * step;
205
        right = xmin + (i + 1) * step;
206

207
        /* Add x, even if it might not get any counts later */
208
        x = left;
209
        qdist_add(to, x, 0);
210

211
        /*
212
         * To avoid double-counting we capture [left, right) ranges, except for
213
         * the rightmost bin, which captures a [left, right] range.
214
         */
215
        while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
216
            struct qdist_entry *o = &from->entries[j];
217

218
            qdist_add(to, x, o->count);
219
            j++;
220
        }
221
    }
222
}
223

224
/*
225
 * Print @dist into a string, after re-binning it into @n bins of consecutive,
226
 * non-overlapping intervals.
227
 *
228
 * If @n == 0, use @orig->n.
229
 *
230
 * Callers must free the returned string with g_free().
231
 */
232
char *qdist_pr_plain(const struct qdist *dist, size_t n)
233
{
234
    struct qdist binned;
235
    char *ret;
236

237
    if (dist->n == 0) {
238
        return g_strdup(QDIST_EMPTY_STR);
239
    }
240
    qdist_bin__internal(&binned, dist, n);
241
    ret = qdist_pr_internal(&binned);
242
    qdist_destroy(&binned);
243
    return ret;
244
}
245

246
static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
247
                            uint32_t opt, bool is_left)
248
{
249
    const char *percent;
250
    const char *lparen;
251
    const char *rparen;
252
    GString *s;
253
    double x1, x2, step;
254
    double x;
255
    double n;
256
    int dec;
257

258
    s = g_string_new("");
259
    if (!(opt & QDIST_PR_LABELS)) {
260
        goto out;
261
    }
262

263
    dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
264
    percent = opt & QDIST_PR_PERCENT ? "%" : "";
265

266
    n = n_bins ? n_bins : dist->n;
267
    x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
268
    step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
269

270
    if (opt & QDIST_PR_100X) {
271
        x *= 100.0;
272
        step *= 100.0;
273
    }
274
    if (opt & QDIST_PR_NOBINRANGE) {
275
        lparen = rparen = "";
276
        x1 = x;
277
        x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
278
    } else {
279
        lparen = "[";
280
        rparen = is_left ? ")" : "]";
281
        if (is_left) {
282
            x1 = x;
283
            x2 = x + step;
284
        } else {
285
            x1 = x - step;
286
            x2 = x;
287
        }
288
    }
289
    g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
290
    if (!(opt & QDIST_PR_NOBINRANGE)) {
291
        g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
292
    }
293
    g_string_append(s, percent);
294
 out:
295
    return g_string_free(s, FALSE);
296
}
297

298
/*
299
 * Print the distribution's histogram into a string.
300
 *
301
 * See also: qdist_pr_plain().
302
 *
303
 * Callers must free the returned string with g_free().
304
 */
305
char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
306
{
307
    const char *border = opt & QDIST_PR_BORDER ? "|" : "";
308
    char *llabel, *rlabel;
309
    char *hgram;
310
    GString *s;
311

312
    if (dist->n == 0) {
313
        return g_strdup(QDIST_EMPTY_STR);
314
    }
315

316
    s = g_string_new("");
317

318
    llabel = qdist_pr_label(dist, n_bins, opt, true);
319
    rlabel = qdist_pr_label(dist, n_bins, opt, false);
320
    hgram = qdist_pr_plain(dist, n_bins);
321
    g_string_append_printf(s, "%s%s%s%s%s",
322
                           llabel, border, hgram, border, rlabel);
323
    g_free(llabel);
324
    g_free(rlabel);
325
    g_free(hgram);
326

327
    return g_string_free(s, FALSE);
328
}
329

330
static inline double qdist_x(const struct qdist *dist, int index)
331
{
332
    if (dist->n == 0) {
333
        return NAN;
334
    }
335
    return dist->entries[index].x;
336
}
337

338
double qdist_xmin(const struct qdist *dist)
339
{
340
    return qdist_x(dist, 0);
341
}
342

343
double qdist_xmax(const struct qdist *dist)
344
{
345
    return qdist_x(dist, dist->n - 1);
346
}
347

348
size_t qdist_unique_entries(const struct qdist *dist)
349
{
350
    return dist->n;
351
}
352

353
unsigned long qdist_sample_count(const struct qdist *dist)
354
{
355
    unsigned long count = 0;
356
    size_t i;
357

358
    for (i = 0; i < dist->n; i++) {
359
        struct qdist_entry *e = &dist->entries[i];
360

361
        count += e->count;
362
    }
363
    return count;
364
}
365

366
static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
367
                                 size_t n, unsigned long count)
368
{
369
    /* amortize the recursion by using a base case > 2 */
370
    if (n <= 8) {
371
        size_t i;
372
        double ret = 0;
373

374
        for (i = 0; i < n; i++) {
375
            struct qdist_entry *e = &dist->entries[index + i];
376

377
            ret += e->x * e->count / count;
378
        }
379
        return ret;
380
    } else {
381
        size_t n2 = n / 2;
382

383
        return qdist_pairwise_avg(dist, index, n2, count) +
384
               qdist_pairwise_avg(dist, index + n2, n - n2, count);
385
    }
386
}
387

388
double qdist_avg(const struct qdist *dist)
389
{
390
    unsigned long count;
391

392
    count = qdist_sample_count(dist);
393
    if (!count) {
394
        return NAN;
395
    }
396
    return qdist_pairwise_avg(dist, 0, dist->n, count);
397
}
398

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.