glusterfs
366 строк · 7.1 Кб
1/*
2Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
3This file is part of GlusterFS.
4
5This file is licensed to you under your choice of the GNU Lesser
6General Public License, version 3 or any later version (LGPLv3 or
7later), or the GNU General Public License, version 2 (GPLv2), in all
8cases as published by the Free Software Foundation.
9*/
10
11#include <stdio.h>12#include <string.h>13
14#include "glusterfs/common-utils.h"15#include "glusterfs/trie.h"16
17#define DISTANCE_EDIT 118#define DISTANCE_INS 119#define DISTANCE_DEL 120
21struct trienode {22char id;23char eow;24int depth;25void *data;26struct trie *trie;27struct trienode *parent;28struct trienode *subnodes[255];29};30
31struct trie {32struct trienode root;33int nodecnt;34size_t len;35};36
37trie_t *38trie_new(void)39{
40trie_t *trie = NULL;41
42trie = GF_CALLOC(1, sizeof(*trie), gf_common_mt_trie_trie);43if (!trie)44return NULL;45
46trie->root.trie = trie;47
48return trie;49}
50
51static trienode_t *52trie_subnode(trienode_t *node, int id)53{
54trienode_t *subnode = NULL;55
56subnode = node->subnodes[id];57if (!subnode) {58subnode = GF_CALLOC(1, sizeof(*subnode), gf_common_mt_trie_node);59if (!subnode)60return NULL;61
62subnode->id = id;63subnode->depth = node->depth + 1;64node->subnodes[id] = subnode;65subnode->parent = node;66subnode->trie = node->trie;67node->trie->nodecnt++;68}69
70return subnode;71}
72
73int
74trie_add(trie_t *trie, const char *dword)75{
76trienode_t *node = NULL;77int i = 0;78char id = 0;79trienode_t *subnode = NULL;80
81node = &trie->root;82
83for (i = 0; i < strlen(dword); i++) {84id = dword[i];85
86subnode = trie_subnode(node, id);87if (!subnode)88return -1;89node = subnode;90}91
92node->eow = 1;93
94return 0;95}
96
97static void98trienode_free(trienode_t *node)99{
100trienode_t *trav = NULL;101int i = 0;102
103for (i = 0; i < 255; i++) {104trav = node->subnodes[i];105
106if (trav)107trienode_free(trav);108}109
110GF_FREE(node->data);111GF_FREE(node);112}
113
114void
115trie_destroy(trie_t *trie)116{
117trienode_free((trienode_t *)trie);118}
119
120void
121trie_destroy_bynode(trienode_t *node)122{
123trie_destroy(node->trie);124}
125
126static int127trienode_walk(trienode_t *node, int (*fn)(trienode_t *node, void *data),128void *data, int eowonly)129{
130trienode_t *trav = NULL;131int i = 0;132int cret = 0;133int ret = 0;134
135if (!eowonly || node->eow)136ret = fn(node, data);137
138if (ret)139goto out;140
141for (i = 0; i < 255; i++) {142trav = node->subnodes[i];143if (!trav)144continue;145
146cret = trienode_walk(trav, fn, data, eowonly);147if (cret < 0) {148ret = cret;149goto out;150}151ret += cret;152}153
154out:155return ret;156}
157
158static int159trie_walk(trie_t *trie, int (*fn)(trienode_t *node, void *data), void *data,160int eowonly)161{
162return trienode_walk(&trie->root, fn, data, eowonly);163}
164
165static void166print_node(trienode_t *node, char **buf)167{
168if (!node->parent)169return;170
171if (node->parent) {172print_node(node->parent, buf);173*(*buf)++ = node->id;174}175}
176
177int
178trienode_get_word(trienode_t *node, char **bufp)179{
180char *buf = NULL;181
182buf = GF_CALLOC(1, node->depth + 1, gf_common_mt_trie_buf);183if (!buf)184return -1;185*bufp = buf;186
187print_node(node, &buf);188
189return 0;190}
191
192static int193calc_dist(trienode_t *node, void *data)194{
195const char *word = NULL;196int i = 0;197int *row = NULL;198int *uprow = NULL;199int distu = 0;200int distl = 0;201int distul = 0;202
203word = data;204
205node->data = GF_CALLOC(node->trie->len, sizeof(int),206gf_common_mt_trie_data);207if (!node->data)208return -1;209row = node->data;210
211if (!node->parent) {212for (i = 0; i < node->trie->len; i++)213row[i] = i + 1;214
215return 0;216}217
218uprow = node->parent->data;219
220distu = node->depth; /* up node */221distul = node->parent->depth; /* up-left node */222
223for (i = 0; i < node->trie->len; i++) {224distl = uprow[i]; /* left node */225
226if (word[i] == node->id)227row[i] = distul;228else229row[i] = min((distul + DISTANCE_EDIT),230min((distu + DISTANCE_DEL), (distl + DISTANCE_INS)));231
232distu = row[i];233distul = distl;234}235
236return 0;237}
238
239int
240trienode_get_dist(trienode_t *node)241{
242int *row = NULL;243
244row = node->data;245
246return row[node->trie->len - 1];247}
248
249struct trienodevec_w {250struct trienodevec *vec;251const char *word;252};253
254static void255trienodevec_clear(struct trienodevec *nodevec)256{
257memset(nodevec->nodes, 0, sizeof(*nodevec->nodes) * nodevec->cnt);258}
259
260static int261collect_closest(trienode_t *node, void *data)262{
263struct trienodevec_w *nodevec_w = NULL;264struct trienodevec *nodevec = NULL;265int dist = 0;266int i = 0;267
268nodevec_w = data;269nodevec = nodevec_w->vec;270
271if (calc_dist(node, (void *)nodevec_w->word))272return -1;273
274if (!node->eow || !nodevec->cnt)275return 0;276
277dist = trienode_get_dist(node);278
279/*280* I thought that when descending further after some dictionary word dw,
281* if we see that child's distance is bigger than it was for dw, then we
282* can prune this branch, as it can contain only worse nodes.
283*
284* This conjecture fails, see eg:
285*
286* d("AB", "B") = 1;
287* d("AB", "BA") = 2;
288* d("AB", "BAB") = 1;
289*
290* -- if both "B" and "BAB" are in dict., then pruning at "BA" * would
291* miss "BAB".
292*
293* (example courtesy of Richard Bann <richardbann at gmail.com>)
294
295if (node->parent->eow && dist > trienode_get_dist (node->parent))
296return 1;
297
298*/
299
300if (nodevec->nodes[0] && dist < trienode_get_dist(nodevec->nodes[0])) {301/* improving over the findings so far */302trienodevec_clear(nodevec);303nodevec->nodes[0] = node;304} else if (!nodevec->nodes[0] ||305dist == trienode_get_dist(nodevec->nodes[0])) {306/* as good as the best so far, add if there is free space */307for (i = 0; i < nodevec->cnt; i++) {308if (!nodevec->nodes[i]) {309nodevec->nodes[i] = node;310break;311}312}313}314
315return 0;316}
317
318int
319trie_measure(trie_t *trie, const char *word, trienode_t **nodes, int nodecnt)320{
321struct trienodevec nodevec = {3220,323};324
325nodevec.nodes = nodes;326nodevec.cnt = nodecnt;327
328return trie_measure_vec(trie, word, &nodevec);329}
330
331int
332trie_measure_vec(trie_t *trie, const char *word, struct trienodevec *nodevec)333{
334struct trienodevec_w nodevec_w = {3350,336};337int ret = 0;338
339trie->len = strlen(word);340
341trienodevec_clear(nodevec);342nodevec_w.vec = nodevec;343nodevec_w.word = word;344
345ret = trie_walk(trie, collect_closest, &nodevec_w, 0);346if (ret > 0)347ret = 0;348
349return ret;350}
351
352static int353trienode_reset(trienode_t *node, void *data)354{
355GF_FREE(node->data);356
357return 0;358}
359
360void
361trie_reset_search(trie_t *trie)362{
363trie->len = 0;364
365trie_walk(trie, trienode_reset, NULL, 0);366}
367