glusterfs

Форк
0
366 строк · 7.1 Кб
1
/*
2
  Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
3
  This file is part of GlusterFS.
4

5
  This file is licensed to you under your choice of the GNU Lesser
6
  General Public License, version 3 or any later version (LGPLv3 or
7
  later), or the GNU General Public License, version 2 (GPLv2), in all
8
  cases as published by the Free Software Foundation.
9
*/
10

11
#include <stdio.h>
12
#include <string.h>
13

14
#include "glusterfs/common-utils.h"
15
#include "glusterfs/trie.h"
16

17
#define DISTANCE_EDIT 1
18
#define DISTANCE_INS 1
19
#define DISTANCE_DEL 1
20

21
struct trienode {
22
    char id;
23
    char eow;
24
    int depth;
25
    void *data;
26
    struct trie *trie;
27
    struct trienode *parent;
28
    struct trienode *subnodes[255];
29
};
30

31
struct trie {
32
    struct trienode root;
33
    int nodecnt;
34
    size_t len;
35
};
36

37
trie_t *
38
trie_new(void)
39
{
40
    trie_t *trie = NULL;
41

42
    trie = GF_CALLOC(1, sizeof(*trie), gf_common_mt_trie_trie);
43
    if (!trie)
44
        return NULL;
45

46
    trie->root.trie = trie;
47

48
    return trie;
49
}
50

51
static trienode_t *
52
trie_subnode(trienode_t *node, int id)
53
{
54
    trienode_t *subnode = NULL;
55

56
    subnode = node->subnodes[id];
57
    if (!subnode) {
58
        subnode = GF_CALLOC(1, sizeof(*subnode), gf_common_mt_trie_node);
59
        if (!subnode)
60
            return NULL;
61

62
        subnode->id = id;
63
        subnode->depth = node->depth + 1;
64
        node->subnodes[id] = subnode;
65
        subnode->parent = node;
66
        subnode->trie = node->trie;
67
        node->trie->nodecnt++;
68
    }
69

70
    return subnode;
71
}
72

73
int
74
trie_add(trie_t *trie, const char *dword)
75
{
76
    trienode_t *node = NULL;
77
    int i = 0;
78
    char id = 0;
79
    trienode_t *subnode = NULL;
80

81
    node = &trie->root;
82

83
    for (i = 0; i < strlen(dword); i++) {
84
        id = dword[i];
85

86
        subnode = trie_subnode(node, id);
87
        if (!subnode)
88
            return -1;
89
        node = subnode;
90
    }
91

92
    node->eow = 1;
93

94
    return 0;
95
}
96

97
static void
98
trienode_free(trienode_t *node)
99
{
100
    trienode_t *trav = NULL;
101
    int i = 0;
102

103
    for (i = 0; i < 255; i++) {
104
        trav = node->subnodes[i];
105

106
        if (trav)
107
            trienode_free(trav);
108
    }
109

110
    GF_FREE(node->data);
111
    GF_FREE(node);
112
}
113

114
void
115
trie_destroy(trie_t *trie)
116
{
117
    trienode_free((trienode_t *)trie);
118
}
119

120
void
121
trie_destroy_bynode(trienode_t *node)
122
{
123
    trie_destroy(node->trie);
124
}
125

126
static int
127
trienode_walk(trienode_t *node, int (*fn)(trienode_t *node, void *data),
128
              void *data, int eowonly)
129
{
130
    trienode_t *trav = NULL;
131
    int i = 0;
132
    int cret = 0;
133
    int ret = 0;
134

135
    if (!eowonly || node->eow)
136
        ret = fn(node, data);
137

138
    if (ret)
139
        goto out;
140

141
    for (i = 0; i < 255; i++) {
142
        trav = node->subnodes[i];
143
        if (!trav)
144
            continue;
145

146
        cret = trienode_walk(trav, fn, data, eowonly);
147
        if (cret < 0) {
148
            ret = cret;
149
            goto out;
150
        }
151
        ret += cret;
152
    }
153

154
out:
155
    return ret;
156
}
157

158
static int
159
trie_walk(trie_t *trie, int (*fn)(trienode_t *node, void *data), void *data,
160
          int eowonly)
161
{
162
    return trienode_walk(&trie->root, fn, data, eowonly);
163
}
164

165
static void
166
print_node(trienode_t *node, char **buf)
167
{
168
    if (!node->parent)
169
        return;
170

171
    if (node->parent) {
172
        print_node(node->parent, buf);
173
        *(*buf)++ = node->id;
174
    }
175
}
176

177
int
178
trienode_get_word(trienode_t *node, char **bufp)
179
{
180
    char *buf = NULL;
181

182
    buf = GF_CALLOC(1, node->depth + 1, gf_common_mt_trie_buf);
183
    if (!buf)
184
        return -1;
185
    *bufp = buf;
186

187
    print_node(node, &buf);
188

189
    return 0;
190
}
191

192
static int
193
calc_dist(trienode_t *node, void *data)
194
{
195
    const char *word = NULL;
196
    int i = 0;
197
    int *row = NULL;
198
    int *uprow = NULL;
199
    int distu = 0;
200
    int distl = 0;
201
    int distul = 0;
202

203
    word = data;
204

205
    node->data = GF_CALLOC(node->trie->len, sizeof(int),
206
                           gf_common_mt_trie_data);
207
    if (!node->data)
208
        return -1;
209
    row = node->data;
210

211
    if (!node->parent) {
212
        for (i = 0; i < node->trie->len; i++)
213
            row[i] = i + 1;
214

215
        return 0;
216
    }
217

218
    uprow = node->parent->data;
219

220
    distu = node->depth;          /* up node */
221
    distul = node->parent->depth; /* up-left node */
222

223
    for (i = 0; i < node->trie->len; i++) {
224
        distl = uprow[i]; /* left node */
225

226
        if (word[i] == node->id)
227
            row[i] = distul;
228
        else
229
            row[i] = min((distul + DISTANCE_EDIT),
230
                         min((distu + DISTANCE_DEL), (distl + DISTANCE_INS)));
231

232
        distu = row[i];
233
        distul = distl;
234
    }
235

236
    return 0;
237
}
238

239
int
240
trienode_get_dist(trienode_t *node)
241
{
242
    int *row = NULL;
243

244
    row = node->data;
245

246
    return row[node->trie->len - 1];
247
}
248

249
struct trienodevec_w {
250
    struct trienodevec *vec;
251
    const char *word;
252
};
253

254
static void
255
trienodevec_clear(struct trienodevec *nodevec)
256
{
257
    memset(nodevec->nodes, 0, sizeof(*nodevec->nodes) * nodevec->cnt);
258
}
259

260
static int
261
collect_closest(trienode_t *node, void *data)
262
{
263
    struct trienodevec_w *nodevec_w = NULL;
264
    struct trienodevec *nodevec = NULL;
265
    int dist = 0;
266
    int i = 0;
267

268
    nodevec_w = data;
269
    nodevec = nodevec_w->vec;
270

271
    if (calc_dist(node, (void *)nodevec_w->word))
272
        return -1;
273

274
    if (!node->eow || !nodevec->cnt)
275
        return 0;
276

277
    dist = trienode_get_dist(node);
278

279
    /*
280
     * I thought that when descending further after some dictionary word dw,
281
     * if we see that child's distance is bigger than it was for dw, then we
282
     * can prune this branch, as it can contain only worse nodes.
283
     *
284
     * This conjecture fails, see eg:
285
     *
286
     * d("AB", "B") = 1;
287
     * d("AB", "BA") = 2;
288
     * d("AB", "BAB") = 1;
289
     *
290
     * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would
291
     * miss "BAB".
292
     *
293
     * (example courtesy of Richard Bann <richardbann at gmail.com>)
294

295
    if (node->parent->eow && dist > trienode_get_dist (node->parent))
296
            return 1;
297

298
     */
299

300
    if (nodevec->nodes[0] && dist < trienode_get_dist(nodevec->nodes[0])) {
301
        /* improving over the findings so far */
302
        trienodevec_clear(nodevec);
303
        nodevec->nodes[0] = node;
304
    } else if (!nodevec->nodes[0] ||
305
               dist == trienode_get_dist(nodevec->nodes[0])) {
306
        /* as good as the best so far, add if there is free space */
307
        for (i = 0; i < nodevec->cnt; i++) {
308
            if (!nodevec->nodes[i]) {
309
                nodevec->nodes[i] = node;
310
                break;
311
            }
312
        }
313
    }
314

315
    return 0;
316
}
317

318
int
319
trie_measure(trie_t *trie, const char *word, trienode_t **nodes, int nodecnt)
320
{
321
    struct trienodevec nodevec = {
322
        0,
323
    };
324

325
    nodevec.nodes = nodes;
326
    nodevec.cnt = nodecnt;
327

328
    return trie_measure_vec(trie, word, &nodevec);
329
}
330

331
int
332
trie_measure_vec(trie_t *trie, const char *word, struct trienodevec *nodevec)
333
{
334
    struct trienodevec_w nodevec_w = {
335
        0,
336
    };
337
    int ret = 0;
338

339
    trie->len = strlen(word);
340

341
    trienodevec_clear(nodevec);
342
    nodevec_w.vec = nodevec;
343
    nodevec_w.word = word;
344

345
    ret = trie_walk(trie, collect_closest, &nodevec_w, 0);
346
    if (ret > 0)
347
        ret = 0;
348

349
    return ret;
350
}
351

352
static int
353
trienode_reset(trienode_t *node, void *data)
354
{
355
    GF_FREE(node->data);
356

357
    return 0;
358
}
359

360
void
361
trie_reset_search(trie_t *trie)
362
{
363
    trie->len = 0;
364

365
    trie_walk(trie, trienode_reset, NULL, 0);
366
}
367

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.