35
static const char *charset = "Russian_Russia.20866";
37
static const char *charset = "ru_RU.koi8r";
40
static const char *usage =
41
"Lexical database holding utility.\n\n"
43
"This utility is designed for constructing, managing, testing and querying\n"
44
"lexical database providing pronunciation info for Russian words.\n\n"
46
"Usage:\t%s [options] <db_path>\n\n"
48
"When filling and updating the database, new records are read\n"
49
"from the standard input. When extracting data from the database\n"
50
"or testing operation, the result is printed to the standard\n"
51
"output. This behaviour can be changed by the \"-f\" switch.\n\n"
53
"All command line options described below are arranged\n"
54
"into several groups by its functionality.\n\n"
56
"The first group consists of options specifying an action to be done.\n"
57
"These options are mutually exclusive. We can do only one action\n"
58
"per invocation. If no action is specified, the program reads\n"
59
"its standard input (or a file specified by \"-f\" option)\n"
60
"and stores its content in the database. Here are the other actions:\n\n"
62
"-h -- Print this help (the only option not requiring the database path)\n"
63
"-l -- List database content\n"
64
"-t <dictionary_file> -- Test the database against specified dictionary\n"
65
"-c -- Clean the database (get rid of redundant records)\n"
66
"-s <key> -- Search specified key\n"
67
"-b <key> -- Retrieve basic forms (if any) for specified word\n"
68
"-d <key> -- Delete record for specified key\n"
69
"-D -- Discard the dataset\n\n"
71
"The next group of options is responsible for choosing the dataset.\n"
72
"These options are mutually exclusive and affect deletion, insertion\n"
73
"and listing operations. For listing and deletion the dataset must be\n"
74
"specified explicitly. If no one of these options is mentioned when\n"
75
"inserting new data, an appropriate dataset will be chosen according\n"
76
"to the input data. Only lexical data can be inserted in such a manner.\n"
77
"For rules target dataset must be specified explicitly.\n\n"
79
"-M -- Implicit Dictionary\n"
80
"-X -- Explicit dictionary\n"
81
"-G -- General rules\n"
82
"-L -- Lexical classification rules\n"
83
"-P -- Prefix detection rules\n"
84
"-C -- Correction rules\n\n"
86
"The next group contains options devoted to search mode specification.\n"
87
" These options affect search and test operation. By default (no options)\n"
88
"full search will be performed, otherwise only those stages specified\n"
89
"explicitly will be included in the search process.\n\n"
91
"-x -- Search in the explicit dictionary\n"
92
"-m -- Try to treat the word as an implicit form\n"
93
"-g -- Try to apply general rules\n\n"
95
"The last group contains several options affecting program behaviour\n"
98
"-f <file> -- Use specified file instead of standard input or output\n"
100
"-q -- Be more quiet than usual (don't print search results as well\n"
101
" as warnings about duplicate records)\n"
102
"-v -- Be more verbose than usual (print final statistical information)\n\n";
105
static int detect_implicit(RULEXDB *db, char *key, char *value)
108
char s[RULEXDB_BUFSIZE], t[RULEXDB_BUFSIZE];
111
for (i = 0; i < db->prefixes.nrules; i++)
112
if ((!regexec(db->prefixes.pattern[i], key, 1, &match, 0)) &&
114
(match.rm_eo < strlen(key)))
116
if (db->prefixes.replacement[i])
117
(void)strcpy(s, db->prefixes.replacement[i]);
120
(void)strcat(s, key + match.rm_eo);
121
if (rulexdb_retrieve_item(db, s, t + match.rm_eo - k, RULEXDB_LEXBASE))
123
if (db->prefixes.replacement[i])
124
(void)strcpy(t, db->prefixes.replacement[i]);
126
(void)strcat(t, value + match.rm_eo);
127
k = detect_implicit(db, s, t);
132
(void)strncpy(t, key, match.rm_eo);
133
return strcmp(t, value) ? -1 : 1;
140
int main(int argc, char *argv[])
144
char key[RULEXDB_BUFSIZE], value[RULEXDB_BUFSIZE];
145
char *s = NULL, *d = NULL, *t = NULL, line[256];
146
char *db_path = NULL, *srcf = NULL;
148
int ret, n, k, i = 0;
149
int invalid = 0, duplicate = 0;
150
int verbose = 0, quiet = 0, rules_data = 0;
151
int replace_mode = 0, dataset = RULEXDB_DEFAULT, search_mode = 0;
153
if (!setlocale(LC_CTYPE, charset))
155
fprintf(stderr, "Cannot set %s locale.\n", charset);
156
fprintf(stderr, "Probably you have to generate it by command:\n");
157
fprintf(stderr, "localedef -f KOI8-R -i ru_RU ru_RU.KOI8-R\n");
164
(void)fprintf(stderr, usage, argv[0]);
168
while((n = getopt(argc,argv,"f:b:s:d:t:CDGLMPXgmxclrqvh")) != -1)
172
if (dataset != RULEXDB_DEFAULT)
174
dataset = RULEXDB_EXCEPTION;
177
search_mode |= RULEXDB_EXCEPTIONS;
180
if (dataset != RULEXDB_DEFAULT)
182
dataset = RULEXDB_LEXBASE;
185
search_mode |= RULEXDB_FORMS;
188
if (dataset != RULEXDB_DEFAULT)
191
dataset = RULEXDB_RULE;
194
search_mode |= RULEXDB_RULES;
197
if (dataset != RULEXDB_DEFAULT)
200
dataset = RULEXDB_LEXCLASS;
203
if (dataset != RULEXDB_DEFAULT)
206
dataset = RULEXDB_PREFIX;
209
if (dataset != RULEXDB_DEFAULT)
212
dataset = RULEXDB_CORRECTOR;
215
if (strcmp(optarg, "-"))
219
if (d || s || t) ret = CS_CONFLICT;
223
if (d || s || t) ret = CS_CONFLICT;
228
if (d || s || t) ret = CS_CONFLICT;
232
if (d || s || t) ret = CS_CONFLICT;
236
if (d || s || t) ret = CS_CONFLICT;
240
if (d || s || t) ret = CS_CONFLICT;
244
if (d || s || t) ret = CS_CONFLICT;
251
if (verbose) ret = CS_CONFLICT;
255
if (quiet) ret = CS_CONFLICT;
259
(void)fprintf(stderr, usage, argv[0]);
262
(void)fprintf(stderr, usage, argv[0]);
265
if (optind && argv[optind])
267
db_path = argv[optind];
273
(void)fprintf(stderr,
274
"Ambiguous options in command line\n");
277
(void)fprintf(stderr, "DB file must be specified\n");
286
if (!freopen(srcf, "w", stdout))
288
(void)fprintf(stderr, "Cannot open %s for output\n", srcf);
291
db = rulexdb_open(db_path, RULEXDB_SEARCH);
300
if (dataset == RULEXDB_DEFAULT)
302
(void)fprintf(stderr, "Dataset must be specified explicitly\n");
311
(void)fprintf(stderr, "Listing %s ruleset\n",
312
rulexdb_dataset_name(dataset));
313
for (n = 0; (s = rulexdb_fetch_rule(db, dataset, n + 1)); n++)
314
(void)printf("%s\n", s);
319
(void)fprintf(stderr, "Listing %s dictionary\n",
320
rulexdb_dataset_name(dataset));
321
if (RULEXDB_EXCEPTION == dataset)
322
dataset = RULEXDB_EXCEPTION_RAW;
323
for (ret = rulexdb_seq(db, key, value, dataset, DB_FIRST);
324
ret == RULEXDB_SUCCESS;
325
ret = rulexdb_seq(db, key, value, dataset, DB_NEXT))
327
(void)printf("%s %s\n", key, value);
331
if (ret == RULEXDB_SPECIAL)
334
(void)fprintf(stderr, "Database corruption\n");
337
(void)fprintf(stderr, "%d record", n);
339
(void)fputs(" has", stderr);
340
else (void)fputs("s have", stderr);
341
(void)fputs(" been extracted from the database\n", stderr);
347
ret = RULEXDB_SPECIAL;
348
for (k = 1; k > 0; k++)
350
k = rulexdb_lexbase(db, t, key, k);
353
ret = RULEXDB_SUCCESS;
355
(void)printf("%d\t%s\n", k, key);
369
d = rulexdb_fetch_rule(db, dataset, k);
372
(void)strcpy(value, d);
373
ret = RULEXDB_SUCCESS;
375
else ret = RULEXDB_SPECIAL;
377
else ret = RULEXDB_EINVKEY;
381
for (k = 0; s[k]; k++)
383
s[k] = tolower(s[k]);
384
ret = rulexdb_search(db, s, value, search_mode);
388
case RULEXDB_SPECIAL:
389
if (rules_data) quiet = 1;
390
case RULEXDB_SUCCESS:
392
(void)printf("%s\n", value);
394
case RULEXDB_EINVKEY:
395
(void)fprintf(stderr, "Invalid key: %s\n", s);
398
(void)fprintf(stderr, "DB search error\n");
406
if (!freopen(t, "r", stdin))
408
(void)fprintf(stderr, "Cannot open %s for input\n", t);
412
(void)fputs("Testing the database\n", stderr);
413
for (n = 1, i = 0; fgets(line, 256, stdin); n++)
415
if (strlen(line) > RULEXDB_MAX_RECORD_SIZE)
418
(void)fprintf(stderr,
419
"%s:%i: warning: Record too long. Ignored.\n",
421
else (void)fprintf(stderr,
422
"Ignored too long record: %s\n",
427
for (k = 0; line[k]; k++)
428
if (isupper(line[k]))
429
line[k] = tolower(line[k]);
430
s = strtok(line, " ");
431
if (strlen(s) > RULEXDB_MAX_KEY_SIZE)
434
(void)fprintf(stderr,
435
"%s:%i: warning: Too long key. Ignored.\n",
437
else (void)fprintf(stderr, "Ignored too long key: %s\n", s);
441
(void)strcpy(key, s);
442
s = strtok(NULL, " \n");
443
ret = rulexdb_search(db, key, value, search_mode);
446
case RULEXDB_SUCCESS:
447
case RULEXDB_SPECIAL:
448
if (strcmp(s, value))
450
(void)printf("%s %s\n", key, s);
453
(void)fprintf(stderr,
454
"%s:%i: warning: Mismatch found.\n",
458
case RULEXDB_EINVKEY:
460
(void)fprintf(stderr,
461
"%s:%i: warning: Illegal symbols in key. Ignored.\n",
463
else (void)fprintf(stderr,
464
"Ignored key containing illegal symbols: %s\n",
468
case RULEXDB_EINVREC:
470
(void)fprintf(stderr,
471
"%s:%i: warning: Invalid record. Ignored.\n",
473
else (void)fprintf(stderr,
474
"Ignored invalid record: %s %s\n",
480
(void)fprintf(stderr,
481
"%s:%i: error: data retrieving error\n",
483
else (void)fprintf(stderr, "Data retrieving error\n");
491
(void)fprintf(stderr, "%i word", --n);
493
(void)fputs(" has", stderr);
494
else (void)fputs("s have", stderr);
495
(void)fputs(" been checked\n", stderr);
498
(void)fprintf(stderr, "%d invalid record", invalid);
499
if (invalid == 1) (void)fputs(" was", stderr);
500
else (void)fputs("s were", stderr);
501
(void)fputs(" ignored\n", stderr);
505
(void)fprintf(stderr, "%d mismatch", i);
506
if (i != 1) (void)fputs("es", stderr);
507
(void)fputs(" found\n", stderr);
509
else (void)fputs("No mismatches found\n", stderr);
515
if (ret) return EXIT_FAILURE;
516
else return EXIT_SUCCESS;
523
(void)fprintf(stderr, "Invalid dataset specification\n");
526
db = rulexdb_open(db_path, RULEXDB_UPDATE);
533
(void)fputs("Cleaning the database\n", stderr);
535
search_mode = RULEXDB_FORMS | RULEXDB_RULES;
536
if ((dataset == RULEXDB_DEFAULT) || (dataset == RULEXDB_LEXBASE))
538
if (dataset == RULEXDB_LEXBASE)
539
(void)rulexdb_load_ruleset(db, RULEXDB_PREFIX);
540
for (ret = rulexdb_seq(db, key, value, RULEXDB_LEXBASE, DB_FIRST);
541
ret == RULEXDB_SUCCESS;
542
ret = rulexdb_seq(db, key, value, RULEXDB_LEXBASE, DB_NEXT))
543
if (rulexdb_classify(db, key) == RULEXDB_SUCCESS)
545
if (!rulexdb_remove_this_item(db, RULEXDB_LEXBASE))
548
else if (dataset == RULEXDB_LEXBASE)
550
if ((detect_implicit(db, key, value) > 0) &&
551
!rulexdb_remove_this_item(db, RULEXDB_LEXBASE))
555
if ((dataset == RULEXDB_DEFAULT) || (dataset == RULEXDB_EXCEPTION))
556
for (ret = rulexdb_seq(db, key, value, RULEXDB_EXCEPTION, DB_FIRST);
557
ret == RULEXDB_SUCCESS;
558
ret = rulexdb_seq(db, key, value, RULEXDB_EXCEPTION, DB_NEXT))
560
(void)rulexdb_search(db, key, line, search_mode);
561
if (!strcmp(line, value))
562
if (!rulexdb_remove_this_item(db, RULEXDB_EXCEPTION))
568
(void)fprintf(stderr, "%i redundant record", n);
569
else (void)fputs("No redundant record", stderr);
571
(void)fputs(" has", stderr);
572
else (void)fputs("s have", stderr);
573
(void)fputs(" been ", stderr);
575
(void)fputs("removed from the database", stderr);
576
else (void)fputs("encountered", stderr);
577
(void)fputc('\n', stderr);
585
if (dataset == RULEXDB_DEFAULT)
587
(void)fprintf(stderr, "Dataset must be specified explicitly\n");
590
db = rulexdb_open(db_path, RULEXDB_UPDATE);
601
(void)fprintf(stderr, "Discarding %s ruleset\n",
602
rulexdb_dataset_name(dataset));
603
k = rulexdb_discard_ruleset(db, dataset);
610
ret = rulexdb_remove_rule(db, dataset, k);
611
else ret = RULEXDB_EINVKEY;
617
(void)fprintf(stderr, "Discarding %s dictionary\n",
618
rulexdb_dataset_name(dataset));
619
k = rulexdb_discard_dictionary(db, dataset);
624
for (k = 0; d[k]; k++)
626
d[k] = tolower(d[k]);
627
ret = rulexdb_remove_item(db, d, dataset);
631
ret = RULEXDB_SUCCESS;
634
(void)fprintf(stderr, "%d record", k);
636
(void)fputs(" has", stderr);
637
else (void)fputs("s have", stderr);
638
(void)fputs(" been removed from the database\n", stderr);
641
if (ret == RULEXDB_EINVKEY)
642
(void)fprintf(stderr, "Invalid key: %s\n", d);
644
if (ret) return EXIT_FAILURE;
645
else return EXIT_SUCCESS;
651
if (!freopen(srcf, "r", stdin))
653
(void)fprintf(stderr, "Cannot open %s for input\n", srcf);
656
db = rulexdb_open(db_path, RULEXDB_CREATE);
670
(void)fprintf(stderr, "Replacing %s ruleset\n",
671
rulexdb_dataset_name(dataset));
672
(void)rulexdb_discard_ruleset(db, dataset);
675
(void)fprintf(stderr, "Adding rules to %s ruleset\n",
676
rulexdb_dataset_name(dataset));
678
for (n = 1; fgets(line, 256, stdin); n++)
680
if (strlen(line) > RULEXDB_MAX_RECORD_SIZE)
683
(void)fprintf(stderr,
684
"%s:%d: warning: Record too long. Ignored.\n",
686
else (void)fprintf(stderr, "Ignored too long record: %s\n", line);
690
for (k = 0; line[k]; k++)
691
if (isupper(line[k]))
692
line[k] = tolower(line[k]);
695
s = strtok(line, " ");
696
if (strlen(s) > RULEXDB_MAX_KEY_SIZE)
699
(void)fprintf(stderr,
700
"%s:%d: warning: Too long key. Ignored.\n",
702
else (void)fprintf(stderr, "Ignored too long key: %s\n", s);
706
t = strtok(NULL, "\n");
707
ret = rulexdb_subscribe_item(db, s, t,
708
dataset, replace_mode);
710
else ret = rulexdb_subscribe_rule(db, strtok(line, "\n"), dataset, 0);
713
case RULEXDB_SUCCESS:
716
case RULEXDB_SPECIAL:
717
if (replace_mode) i++;
718
if (!(quiet || rules_data))
722
(void)fprintf(stderr,
723
"%s:%d: warning: Duplicate entry. ",
726
(void)fprintf(stderr, "Replaced.\n");
727
else (void)fprintf(stderr, "Ignored.\n");
732
(void)fputs("Replaced", stderr);
733
else (void)fputs("Ignored", stderr);
734
(void)fprintf(stderr, " duplicate entry: %s %s\n", s, t);
739
case RULEXDB_EINVKEY:
741
(void)fprintf(stderr,
742
"%s:%d: warning: Illegal symbols in key. Ignored.\n",
744
else (void)fprintf(stderr,
745
"Ignored key containing illegal symbols: %s\n",
749
case RULEXDB_EINVREC:
751
(void)fprintf(stderr,
752
"%s:%d: warning: Invalid record. Ignored.\n",
756
(void)fprintf(stderr, "Ignored invalid record: %s", s);
758
(void)fprintf(stderr, " %s", t);
759
(void)fputc('\n', stderr);
766
(void)fprintf(stderr,
767
"%s:%d: error: data storing error\n",
769
else (void)fprintf(stderr, "Data storing error\n");
779
(void)fprintf(stderr, "Total: %d record", n);
780
if (n != 1) (void)fputc('s', stderr);
781
(void)fprintf(stderr, " processed.\n");
783
(void)fprintf(stderr, "Invalid records: %d\n", invalid);
785
(void)fprintf(stderr, "Duplicates: %d\n", duplicate);
786
(void)fprintf(stderr, "%d record", i);
788
(void)fputs(" has", stderr);
789
else (void)fputs("s have", stderr);
790
(void)fputs(" been put into the database\n", stderr);