PolarDB-for-PostgreSQL
536 строк · 10.6 Кб
1/*
2* txtquery io
3* Teodor Sigaev <teodor@stack.net>
4* contrib/ltree/ltxtquery_io.c
5*/
6#include "postgres.h"
7
8#include <ctype.h>
9
10#include "crc32.h"
11#include "ltree.h"
12#include "miscadmin.h"
13
14PG_FUNCTION_INFO_V1(ltxtq_in);
15PG_FUNCTION_INFO_V1(ltxtq_out);
16
17
18/* parser's states */
19#define WAITOPERAND 1
20#define INOPERAND 2
21#define WAITOPERATOR 3
22
23/*
24* node of query tree, also used
25* for storing polish notation in parser
26*/
27typedef struct NODE
28{
29int32 type;
30int32 val;
31int16 distance;
32int16 length;
33uint16 flag;
34struct NODE *next;
35} NODE;
36
37typedef struct
38{
39char *buf;
40int32 state;
41int32 count;
42/* reverse polish notation in list (for temporary usage) */
43NODE *str;
44/* number in str */
45int32 num;
46
47/* user-friendly operand */
48int32 lenop;
49int32 sumlen;
50char *op;
51char *curop;
52} QPRS_STATE;
53
54/*
55* get token from query string
56*/
57static int32
58gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
59{
60int charlen;
61
62for (;;)
63{
64charlen = pg_mblen(state->buf);
65
66switch (state->state)
67{
68case WAITOPERAND:
69if (charlen == 1 && t_iseq(state->buf, '!'))
70{
71(state->buf)++;
72*val = (int32) '!';
73return OPR;
74}
75else if (charlen == 1 && t_iseq(state->buf, '('))
76{
77state->count++;
78(state->buf)++;
79return OPEN;
80}
81else if (ISALNUM(state->buf))
82{
83state->state = INOPERAND;
84*strval = state->buf;
85*lenval = charlen;
86*flag = 0;
87}
88else if (!t_isspace(state->buf))
89ereport(ERROR,
90(errcode(ERRCODE_SYNTAX_ERROR),
91errmsg("operand syntax error")));
92break;
93case INOPERAND:
94if (ISALNUM(state->buf))
95{
96if (*flag)
97ereport(ERROR,
98(errcode(ERRCODE_SYNTAX_ERROR),
99errmsg("modifiers syntax error")));
100*lenval += charlen;
101}
102else if (charlen == 1 && t_iseq(state->buf, '%'))
103*flag |= LVAR_SUBLEXEME;
104else if (charlen == 1 && t_iseq(state->buf, '@'))
105*flag |= LVAR_INCASE;
106else if (charlen == 1 && t_iseq(state->buf, '*'))
107*flag |= LVAR_ANYEND;
108else
109{
110state->state = WAITOPERATOR;
111return VAL;
112}
113break;
114case WAITOPERATOR:
115if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
116{
117state->state = WAITOPERAND;
118*val = (int32) *(state->buf);
119(state->buf)++;
120return OPR;
121}
122else if (charlen == 1 && t_iseq(state->buf, ')'))
123{
124(state->buf)++;
125state->count--;
126return (state->count < 0) ? ERR : CLOSE;
127}
128else if (*(state->buf) == '\0')
129return (state->count) ? ERR : END;
130else if (charlen == 1 && !t_iseq(state->buf, ' '))
131return ERR;
132break;
133default:
134return ERR;
135break;
136}
137
138state->buf += charlen;
139}
140}
141
142/*
143* push new one in polish notation reverse view
144*/
145static void
146pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
147{
148NODE *tmp = (NODE *) palloc(sizeof(NODE));
149
150tmp->type = type;
151tmp->val = val;
152tmp->flag = flag;
153if (distance > 0xffff)
154ereport(ERROR,
155(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
156errmsg("value is too big")));
157if (lenval > 0xff)
158ereport(ERROR,
159(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
160errmsg("operand is too long")));
161tmp->distance = distance;
162tmp->length = lenval;
163tmp->next = state->str;
164state->str = tmp;
165state->num++;
166}
167
168/*
169* This function is used for query_txt parsing
170*/
171static void
172pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
173{
174if (lenval > 0xffff)
175ereport(ERROR,
176(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
177errmsg("word is too long")));
178
179pushquery(state, type, ltree_crc32_sz(strval, lenval),
180state->curop - state->op, lenval, flag);
181
182while (state->curop - state->op + lenval + 1 >= state->lenop)
183{
184int32 tmp = state->curop - state->op;
185
186state->lenop *= 2;
187state->op = (char *) repalloc((void *) state->op, state->lenop);
188state->curop = state->op + tmp;
189}
190memcpy((void *) state->curop, (void *) strval, lenval);
191state->curop += lenval;
192*(state->curop) = '\0';
193state->curop++;
194state->sumlen += lenval + 1;
195return;
196}
197
198#define STACKDEPTH 32
199/*
200* make polish notation of query
201*/
202static int32
203makepol(QPRS_STATE *state)
204{
205int32 val = 0,
206type;
207int32 lenval = 0;
208char *strval = NULL;
209int32 stack[STACKDEPTH];
210int32 lenstack = 0;
211uint16 flag = 0;
212
213/* since this function recurses, it could be driven to stack overflow */
214check_stack_depth();
215
216while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
217{
218switch (type)
219{
220case VAL:
221pushval_asis(state, VAL, strval, lenval, flag);
222while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
223stack[lenstack - 1] == (int32) '!'))
224{
225lenstack--;
226pushquery(state, OPR, stack[lenstack], 0, 0, 0);
227}
228break;
229case OPR:
230if (lenstack && val == (int32) '|')
231pushquery(state, OPR, val, 0, 0, 0);
232else
233{
234if (lenstack == STACKDEPTH)
235/* internal error */
236elog(ERROR, "stack too short");
237stack[lenstack] = val;
238lenstack++;
239}
240break;
241case OPEN:
242if (makepol(state) == ERR)
243return ERR;
244while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
245stack[lenstack - 1] == (int32) '!'))
246{
247lenstack--;
248pushquery(state, OPR, stack[lenstack], 0, 0, 0);
249}
250break;
251case CLOSE:
252while (lenstack)
253{
254lenstack--;
255pushquery(state, OPR, stack[lenstack], 0, 0, 0);
256};
257return END;
258break;
259case ERR:
260default:
261ereport(ERROR,
262(errcode(ERRCODE_SYNTAX_ERROR),
263errmsg("syntax error")));
264
265return ERR;
266
267}
268}
269while (lenstack)
270{
271lenstack--;
272pushquery(state, OPR, stack[lenstack], 0, 0, 0);
273};
274return END;
275}
276
277static void
278findoprnd(ITEM *ptr, int32 *pos)
279{
280/* since this function recurses, it could be driven to stack overflow. */
281check_stack_depth();
282
283if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
284{
285ptr[*pos].left = 0;
286(*pos)++;
287}
288else if (ptr[*pos].val == (int32) '!')
289{
290ptr[*pos].left = 1;
291(*pos)++;
292findoprnd(ptr, pos);
293}
294else
295{
296ITEM *curitem = &ptr[*pos];
297int32 tmp = *pos;
298
299(*pos)++;
300findoprnd(ptr, pos);
301curitem->left = *pos - tmp;
302findoprnd(ptr, pos);
303}
304}
305
306
307/*
308* input
309*/
310static ltxtquery *
311queryin(char *buf)
312{
313QPRS_STATE state;
314int32 i;
315ltxtquery *query;
316int32 commonlen;
317ITEM *ptr;
318NODE *tmp;
319int32 pos = 0;
320
321#ifdef BS_DEBUG
322char pbuf[16384],
323*cur;
324#endif
325
326/* init state */
327state.buf = buf;
328state.state = WAITOPERAND;
329state.count = 0;
330state.num = 0;
331state.str = NULL;
332
333/* init list of operand */
334state.sumlen = 0;
335state.lenop = 64;
336state.curop = state.op = (char *) palloc(state.lenop);
337*(state.curop) = '\0';
338
339/* parse query & make polish notation (postfix, but in reverse order) */
340makepol(&state);
341if (!state.num)
342ereport(ERROR,
343(errcode(ERRCODE_SYNTAX_ERROR),
344errmsg("syntax error"),
345errdetail("Empty query.")));
346
347if (LTXTQUERY_TOO_BIG(state.num, state.sumlen))
348ereport(ERROR,
349(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
350errmsg("ltxtquery is too large")));
351commonlen = COMPUTESIZE(state.num, state.sumlen);
352
353query = (ltxtquery *) palloc0(commonlen);
354SET_VARSIZE(query, commonlen);
355query->size = state.num;
356ptr = GETQUERY(query);
357
358/* set item in polish notation */
359for (i = 0; i < state.num; i++)
360{
361ptr[i].type = state.str->type;
362ptr[i].val = state.str->val;
363ptr[i].distance = state.str->distance;
364ptr[i].length = state.str->length;
365ptr[i].flag = state.str->flag;
366tmp = state.str->next;
367pfree(state.str);
368state.str = tmp;
369}
370
371/* set user friendly-operand view */
372memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
373pfree(state.op);
374
375/* set left operand's position for every operator */
376pos = 0;
377findoprnd(ptr, &pos);
378
379return query;
380}
381
382/*
383* in without morphology
384*/
385Datum
386ltxtq_in(PG_FUNCTION_ARGS)
387{
388PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0)));
389}
390
391/*
392* out function
393*/
394typedef struct
395{
396ITEM *curpol;
397char *buf;
398char *cur;
399char *op;
400int32 buflen;
401} INFIX;
402
403#define RESIZEBUF(inf,addsize) \
404while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
405{ \
406int32 len = (inf)->cur - (inf)->buf; \
407(inf)->buflen *= 2; \
408(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
409(inf)->cur = (inf)->buf + len; \
410}
411
412/*
413* recursive walk on tree and print it in
414* infix (human-readable) view
415*/
416static void
417infix(INFIX *in, bool first)
418{
419/* since this function recurses, it could be driven to stack overflow. */
420check_stack_depth();
421
422if (in->curpol->type == VAL)
423{
424char *op = in->op + in->curpol->distance;
425
426RESIZEBUF(in, in->curpol->length * 2 + 5);
427while (*op)
428{
429*(in->cur) = *op;
430op++;
431in->cur++;
432}
433if (in->curpol->flag & LVAR_SUBLEXEME)
434{
435*(in->cur) = '%';
436in->cur++;
437}
438if (in->curpol->flag & LVAR_INCASE)
439{
440*(in->cur) = '@';
441in->cur++;
442}
443if (in->curpol->flag & LVAR_ANYEND)
444{
445*(in->cur) = '*';
446in->cur++;
447}
448*(in->cur) = '\0';
449in->curpol++;
450}
451else if (in->curpol->val == (int32) '!')
452{
453bool isopr = false;
454
455RESIZEBUF(in, 1);
456*(in->cur) = '!';
457in->cur++;
458*(in->cur) = '\0';
459in->curpol++;
460if (in->curpol->type == OPR)
461{
462isopr = true;
463RESIZEBUF(in, 2);
464sprintf(in->cur, "( ");
465in->cur = strchr(in->cur, '\0');
466}
467infix(in, isopr);
468if (isopr)
469{
470RESIZEBUF(in, 2);
471sprintf(in->cur, " )");
472in->cur = strchr(in->cur, '\0');
473}
474}
475else
476{
477int32 op = in->curpol->val;
478INFIX nrm;
479
480in->curpol++;
481if (op == (int32) '|' && !first)
482{
483RESIZEBUF(in, 2);
484sprintf(in->cur, "( ");
485in->cur = strchr(in->cur, '\0');
486}
487
488nrm.curpol = in->curpol;
489nrm.op = in->op;
490nrm.buflen = 16;
491nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
492
493/* get right operand */
494infix(&nrm, false);
495
496/* get & print left operand */
497in->curpol = nrm.curpol;
498infix(in, false);
499
500/* print operator & right operand */
501RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
502sprintf(in->cur, " %c %s", op, nrm.buf);
503in->cur = strchr(in->cur, '\0');
504pfree(nrm.buf);
505
506if (op == (int32) '|' && !first)
507{
508RESIZEBUF(in, 2);
509sprintf(in->cur, " )");
510in->cur = strchr(in->cur, '\0');
511}
512}
513}
514
515Datum
516ltxtq_out(PG_FUNCTION_ARGS)
517{
518ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
519INFIX nrm;
520
521if (query->size == 0)
522ereport(ERROR,
523(errcode(ERRCODE_SYNTAX_ERROR),
524errmsg("syntax error"),
525errdetail("Empty query.")));
526
527nrm.curpol = GETQUERY(query);
528nrm.buflen = 32;
529nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
530*(nrm.cur) = '\0';
531nrm.op = GETOPERAND(query);
532infix(&nrm, true);
533
534PG_FREE_IF_COPY(query, 0);
535PG_RETURN_POINTER(nrm.buf);
536}
537