PolarDB-for-PostgreSQL
614 строк · 14.1 Кб
1/*
2* contrib/btree_gist/btree_utils_var.c
3*/
4#include "postgres.h"5
6#include "btree_gist.h"7
8#include <math.h>9#include <limits.h>10#include <float.h>11
12#include "btree_utils_var.h"13#include "utils/pg_locale.h"14#include "utils/builtins.h"15#include "utils/rel.h"16
17/* used for key sorting */
18typedef struct19{
20int i;21GBT_VARKEY *t;22} Vsrt;23
24typedef struct25{
26const gbtree_vinfo *tinfo;27Oid collation;28FmgrInfo *flinfo;29} gbt_vsrt_arg;30
31
32PG_FUNCTION_INFO_V1(gbt_var_decompress);33PG_FUNCTION_INFO_V1(gbt_var_fetch);34
35
36Datum
37gbt_var_decompress(PG_FUNCTION_ARGS)38{
39GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);40GBT_VARKEY *key = (GBT_VARKEY *) PG_DETOAST_DATUM(entry->key);41
42if (key != (GBT_VARKEY *) DatumGetPointer(entry->key))43{44GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));45
46gistentryinit(*retval, PointerGetDatum(key),47entry->rel, entry->page,48entry->offset, false);49
50PG_RETURN_POINTER(retval);51}52
53PG_RETURN_POINTER(entry);54}
55
56/* Returns a better readable representation of variable key ( sets pointer ) */
57GBT_VARKEY_R
58gbt_var_key_readable(const GBT_VARKEY *k)59{
60GBT_VARKEY_R r;61
62r.lower = (bytea *) &(((char *) k)[VARHDRSZ]);63if (VARSIZE(k) > (VARHDRSZ + (VARSIZE(r.lower))))64r.upper = (bytea *) &(((char *) k)[VARHDRSZ + INTALIGN(VARSIZE(r.lower))]);65else66r.upper = r.lower;67return r;68}
69
70
71/*
72* Create a leaf-entry to store in the index, from a single Datum.
73*/
74static GBT_VARKEY *75gbt_var_key_from_datum(const struct varlena *u)76{
77int32 lowersize = VARSIZE(u);78GBT_VARKEY *r;79
80r = (GBT_VARKEY *) palloc(lowersize + VARHDRSZ);81memcpy(VARDATA(r), u, lowersize);82SET_VARSIZE(r, lowersize + VARHDRSZ);83
84return r;85}
86
87/*
88* Create an entry to store in the index, from lower and upper bound.
89*/
90GBT_VARKEY *91gbt_var_key_copy(const GBT_VARKEY_R *u)92{
93int32 lowersize = VARSIZE(u->lower);94int32 uppersize = VARSIZE(u->upper);95GBT_VARKEY *r;96
97r = (GBT_VARKEY *) palloc0(INTALIGN(lowersize) + uppersize + VARHDRSZ);98memcpy(VARDATA(r), u->lower, lowersize);99memcpy(VARDATA(r) + INTALIGN(lowersize), u->upper, uppersize);100SET_VARSIZE(r, INTALIGN(lowersize) + uppersize + VARHDRSZ);101
102return r;103}
104
105
106static GBT_VARKEY *107gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)108{
109GBT_VARKEY *out = leaf;110
111if (tinfo->f_l2n)112out = tinfo->f_l2n(leaf, flinfo);113
114return out;115}
116
117
118/*
119* returns the common prefix length of a node key
120*/
121static int32122gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)123{
124GBT_VARKEY_R r = gbt_var_key_readable(node);125int32 i = 0;126int32 l = 0;127int32 t1len = VARSIZE(r.lower) - VARHDRSZ;128int32 t2len = VARSIZE(r.upper) - VARHDRSZ;129int32 ml = Min(t1len, t2len);130char *p1 = VARDATA(r.lower);131char *p2 = VARDATA(r.upper);132
133if (ml == 0)134return 0;135
136while (i < ml)137{138if (tinfo->eml > 1 && l == 0)139{140if ((l = pg_mblen(p1)) != pg_mblen(p2))141{142return i;143}144}145if (*p1 != *p2)146{147if (tinfo->eml > 1)148{149return (i - l + 1);150}151else152{153return i;154}155}156
157p1++;158p2++;159l--;160i++;161}162return ml; /* lower == upper */163}
164
165
166/*
167* returns true, if query matches prefix ( common prefix )
168*/
169static bool170gbt_bytea_pf_match(const bytea *pf, const bytea *query, const gbtree_vinfo *tinfo)171{
172bool out = false;173int32 qlen = VARSIZE(query) - VARHDRSZ;174int32 nlen = VARSIZE(pf) - VARHDRSZ;175
176if (nlen <= qlen)177{178char *q = VARDATA(query);179char *n = VARDATA(pf);180
181out = (memcmp(q, n, nlen) == 0);182}183
184return out;185}
186
187
188/*
189* returns true, if query matches node using common prefix
190*/
191static bool192gbt_var_node_pf_match(const GBT_VARKEY_R *node, const bytea *query, const gbtree_vinfo *tinfo)193{
194return (tinfo->trnc && (195gbt_bytea_pf_match(node->lower, query, tinfo) ||196gbt_bytea_pf_match(node->upper, query, tinfo)197));198}
199
200
201/*
202* truncates / compresses the node key
203* cpf_length .. common prefix length
204*/
205static GBT_VARKEY *206gbt_var_node_truncate(const GBT_VARKEY *node, int32 cpf_length, const gbtree_vinfo *tinfo)207{
208GBT_VARKEY *out = NULL;209GBT_VARKEY_R r = gbt_var_key_readable(node);210int32 len1 = VARSIZE(r.lower) - VARHDRSZ;211int32 len2 = VARSIZE(r.upper) - VARHDRSZ;212int32 si;213char *out2;214
215len1 = Min(len1, (cpf_length + 1));216len2 = Min(len2, (cpf_length + 1));217
218si = 2 * VARHDRSZ + INTALIGN(len1 + VARHDRSZ) + len2;219out = (GBT_VARKEY *) palloc0(si);220SET_VARSIZE(out, si);221
222memcpy(VARDATA(out), r.lower, len1 + VARHDRSZ);223SET_VARSIZE(VARDATA(out), len1 + VARHDRSZ);224
225out2 = VARDATA(out) + INTALIGN(len1 + VARHDRSZ);226memcpy(out2, r.upper, len2 + VARHDRSZ);227SET_VARSIZE(out2, len2 + VARHDRSZ);228
229return out;230}
231
232
233
234void
235gbt_var_bin_union(Datum *u, GBT_VARKEY *e, Oid collation,236const gbtree_vinfo *tinfo, FmgrInfo *flinfo)237{
238GBT_VARKEY_R eo = gbt_var_key_readable(e);239GBT_VARKEY_R nr;240
241if (eo.lower == eo.upper) /* leaf */242{243GBT_VARKEY *tmp;244
245tmp = gbt_var_leaf2node(e, tinfo, flinfo);246if (tmp != e)247eo = gbt_var_key_readable(tmp);248}249
250if (DatumGetPointer(*u))251{252GBT_VARKEY_R ro = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(*u));253bool update = false;254
255nr.lower = ro.lower;256nr.upper = ro.upper;257
258if (tinfo->f_cmp(ro.lower, eo.lower, collation, flinfo) > 0)259{260nr.lower = eo.lower;261update = true;262}263
264if (tinfo->f_cmp(ro.upper, eo.upper, collation, flinfo) < 0)265{266nr.upper = eo.upper;267update = true;268}269
270if (update)271*u = PointerGetDatum(gbt_var_key_copy(&nr));272}273else274{275nr.lower = eo.lower;276nr.upper = eo.upper;277*u = PointerGetDatum(gbt_var_key_copy(&nr));278}279}
280
281
282GISTENTRY *283gbt_var_compress(GISTENTRY *entry, const gbtree_vinfo *tinfo)284{
285GISTENTRY *retval;286
287if (entry->leafkey)288{289struct varlena *leaf = PG_DETOAST_DATUM(entry->key);290GBT_VARKEY *r;291
292r = gbt_var_key_from_datum(leaf);293
294retval = palloc(sizeof(GISTENTRY));295gistentryinit(*retval, PointerGetDatum(r),296entry->rel, entry->page,297entry->offset, true);298}299else300retval = entry;301
302return retval;303}
304
305
306Datum
307gbt_var_fetch(PG_FUNCTION_ARGS)308{
309GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);310GBT_VARKEY *key = (GBT_VARKEY *) PG_DETOAST_DATUM(entry->key);311GBT_VARKEY_R r = gbt_var_key_readable(key);312GISTENTRY *retval;313
314retval = palloc(sizeof(GISTENTRY));315gistentryinit(*retval, PointerGetDatum(r.lower),316entry->rel, entry->page,317entry->offset, true);318
319PG_RETURN_POINTER(retval);320}
321
322
323GBT_VARKEY *324gbt_var_union(const GistEntryVector *entryvec, int32 *size, Oid collation,325const gbtree_vinfo *tinfo, FmgrInfo *flinfo)326{
327int i = 0,328numranges = entryvec->n;329GBT_VARKEY *cur;330Datum out;331GBT_VARKEY_R rk;332
333*size = sizeof(GBT_VARKEY);334
335cur = (GBT_VARKEY *) DatumGetPointer(entryvec->vector[0].key);336rk = gbt_var_key_readable(cur);337out = PointerGetDatum(gbt_var_key_copy(&rk));338
339for (i = 1; i < numranges; i++)340{341cur = (GBT_VARKEY *) DatumGetPointer(entryvec->vector[i].key);342gbt_var_bin_union(&out, cur, collation, tinfo, flinfo);343}344
345
346/* Truncate (=compress) key */347if (tinfo->trnc)348{349int32 plen;350GBT_VARKEY *trc = NULL;351
352plen = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(out), tinfo);353trc = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(out), plen + 1, tinfo);354
355out = PointerGetDatum(trc);356}357
358return ((GBT_VARKEY *) DatumGetPointer(out));359}
360
361
362bool
363gbt_var_same(Datum d1, Datum d2, Oid collation,364const gbtree_vinfo *tinfo, FmgrInfo *flinfo)365{
366GBT_VARKEY *t1 = (GBT_VARKEY *) DatumGetPointer(d1);367GBT_VARKEY *t2 = (GBT_VARKEY *) DatumGetPointer(d2);368GBT_VARKEY_R r1,369r2;370
371r1 = gbt_var_key_readable(t1);372r2 = gbt_var_key_readable(t2);373
374return (tinfo->f_cmp(r1.lower, r2.lower, collation, flinfo) == 0 &&375tinfo->f_cmp(r1.upper, r2.upper, collation, flinfo) == 0);376}
377
378
379float *380gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n,381Oid collation, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)382{
383GBT_VARKEY *orge = (GBT_VARKEY *) DatumGetPointer(o->key);384GBT_VARKEY *newe = (GBT_VARKEY *) DatumGetPointer(n->key);385GBT_VARKEY_R ok,386nk;387
388*res = 0.0;389
390nk = gbt_var_key_readable(newe);391if (nk.lower == nk.upper) /* leaf */392{393GBT_VARKEY *tmp;394
395tmp = gbt_var_leaf2node(newe, tinfo, flinfo);396if (tmp != newe)397nk = gbt_var_key_readable(tmp);398}399ok = gbt_var_key_readable(orge);400
401if ((VARSIZE(ok.lower) - VARHDRSZ) == 0 && (VARSIZE(ok.upper) - VARHDRSZ) == 0)402*res = 0.0;403else if (!((tinfo->f_cmp(nk.lower, ok.lower, collation, flinfo) >= 0 ||404gbt_bytea_pf_match(ok.lower, nk.lower, tinfo)) &&405(tinfo->f_cmp(nk.upper, ok.upper, collation, flinfo) <= 0 ||406gbt_bytea_pf_match(ok.upper, nk.upper, tinfo))))407{408Datum d = PointerGetDatum(0);409double dres;410int32 ol,411ul;412
413gbt_var_bin_union(&d, orge, collation, tinfo, flinfo);414ol = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(d), tinfo);415gbt_var_bin_union(&d, newe, collation, tinfo, flinfo);416ul = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(d), tinfo);417
418if (ul < ol)419{420dres = (ol - ul); /* reduction of common prefix len */421}422else423{424GBT_VARKEY_R uk = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(d));425unsigned char tmp[4];426
427tmp[0] = (unsigned char) (((VARSIZE(ok.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.lower)[ul]));428tmp[1] = (unsigned char) (((VARSIZE(uk.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.lower)[ul]));429tmp[2] = (unsigned char) (((VARSIZE(ok.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.upper)[ul]));430tmp[3] = (unsigned char) (((VARSIZE(uk.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.upper)[ul]));431dres = Abs(tmp[0] - tmp[1]) + Abs(tmp[3] - tmp[2]);432dres /= 256.0;433}434
435*res += FLT_MIN;436*res += (float) (dres / ((double) (ol + 1)));437*res *= (FLT_MAX / (o->rel->rd_att->natts + 1));438}439
440return res;441}
442
443
444static int445gbt_vsrt_cmp(const void *a, const void *b, void *arg)446{
447GBT_VARKEY_R ar = gbt_var_key_readable(((const Vsrt *) a)->t);448GBT_VARKEY_R br = gbt_var_key_readable(((const Vsrt *) b)->t);449const gbt_vsrt_arg *varg = (const gbt_vsrt_arg *) arg;450int res;451
452res = varg->tinfo->f_cmp(ar.lower, br.lower, varg->collation, varg->flinfo);453if (res == 0)454return varg->tinfo->f_cmp(ar.upper, br.upper, varg->collation, varg->flinfo);455
456return res;457}
458
459GIST_SPLITVEC *460gbt_var_picksplit(const GistEntryVector *entryvec, GIST_SPLITVEC *v,461Oid collation, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)462{
463OffsetNumber i,464maxoff = entryvec->n - 1;465Vsrt *arr;466int svcntr = 0,467nbytes;468char *cur;469GBT_VARKEY **sv = NULL;470gbt_vsrt_arg varg;471
472arr = (Vsrt *) palloc((maxoff + 1) * sizeof(Vsrt));473nbytes = (maxoff + 2) * sizeof(OffsetNumber);474v->spl_left = (OffsetNumber *) palloc(nbytes);475v->spl_right = (OffsetNumber *) palloc(nbytes);476v->spl_ldatum = PointerGetDatum(0);477v->spl_rdatum = PointerGetDatum(0);478v->spl_nleft = 0;479v->spl_nright = 0;480
481sv = palloc(sizeof(bytea *) * (maxoff + 1));482
483/* Sort entries */484
485for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))486{487GBT_VARKEY_R ro;488
489cur = (char *) DatumGetPointer(entryvec->vector[i].key);490ro = gbt_var_key_readable((GBT_VARKEY *) cur);491if (ro.lower == ro.upper) /* leaf */492{493sv[svcntr] = gbt_var_leaf2node((GBT_VARKEY *) cur, tinfo, flinfo);494arr[i].t = sv[svcntr];495if (sv[svcntr] != (GBT_VARKEY *) cur)496svcntr++;497}498else499arr[i].t = (GBT_VARKEY *) cur;500arr[i].i = i;501}502
503/* sort */504varg.tinfo = tinfo;505varg.collation = collation;506varg.flinfo = flinfo;507qsort_arg((void *) &arr[FirstOffsetNumber],508maxoff - FirstOffsetNumber + 1,509sizeof(Vsrt),510gbt_vsrt_cmp,511(void *) &varg);512
513/* We do simply create two parts */514
515for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))516{517if (i <= (maxoff - FirstOffsetNumber + 1) / 2)518{519gbt_var_bin_union(&v->spl_ldatum, arr[i].t, collation, tinfo, flinfo);520v->spl_left[v->spl_nleft] = arr[i].i;521v->spl_nleft++;522}523else524{525gbt_var_bin_union(&v->spl_rdatum, arr[i].t, collation, tinfo, flinfo);526v->spl_right[v->spl_nright] = arr[i].i;527v->spl_nright++;528}529}530
531/* Truncate (=compress) key */532if (tinfo->trnc)533{534int32 ll = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), tinfo);535int32 lr = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), tinfo);536GBT_VARKEY *dl;537GBT_VARKEY *dr;538
539ll = Max(ll, lr);540ll++;541
542dl = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), ll, tinfo);543dr = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), ll, tinfo);544v->spl_ldatum = PointerGetDatum(dl);545v->spl_rdatum = PointerGetDatum(dr);546}547
548return v;549}
550
551
552/*
553* The GiST consistent method
554*/
555bool
556gbt_var_consistent(GBT_VARKEY_R *key,557const void *query,558StrategyNumber strategy,559Oid collation,560bool is_leaf,561const gbtree_vinfo *tinfo,562FmgrInfo *flinfo)563{
564bool retval = false;565
566switch (strategy)567{568case BTLessEqualStrategyNumber:569if (is_leaf)570retval = tinfo->f_ge(query, key->lower, collation, flinfo);571else572retval = tinfo->f_cmp(query, key->lower, collation, flinfo) >= 0573|| gbt_var_node_pf_match(key, query, tinfo);574break;575case BTLessStrategyNumber:576if (is_leaf)577retval = tinfo->f_gt(query, key->lower, collation, flinfo);578else579retval = tinfo->f_cmp(query, key->lower, collation, flinfo) >= 0580|| gbt_var_node_pf_match(key, query, tinfo);581break;582case BTEqualStrategyNumber:583if (is_leaf)584retval = tinfo->f_eq(query, key->lower, collation, flinfo);585else586retval =587(tinfo->f_cmp(key->lower, query, collation, flinfo) <= 0 &&588tinfo->f_cmp(query, key->upper, collation, flinfo) <= 0) ||589gbt_var_node_pf_match(key, query, tinfo);590break;591case BTGreaterStrategyNumber:592if (is_leaf)593retval = tinfo->f_lt(query, key->upper, collation, flinfo);594else595retval = tinfo->f_cmp(query, key->upper, collation, flinfo) <= 0596|| gbt_var_node_pf_match(key, query, tinfo);597break;598case BTGreaterEqualStrategyNumber:599if (is_leaf)600retval = tinfo->f_le(query, key->upper, collation, flinfo);601else602retval = tinfo->f_cmp(query, key->upper, collation, flinfo) <= 0603|| gbt_var_node_pf_match(key, query, tinfo);604break;605case BtreeGistNotEqualStrategyNumber:606retval = !(tinfo->f_eq(query, key->lower, collation, flinfo) &&607tinfo->f_eq(query, key->upper, collation, flinfo));608break;609default:610retval = false;611}612
613return retval;614}
615