PolarDB-for-PostgreSQL
604 строки · 14.2 Кб
1/*
2* contrib/intarray/_int_gist.c
3*/
4#include "postgres.h"5
6#include <limits.h>7
8#include "access/gist.h"9#include "access/stratnum.h"10
11#include "_int.h"12
13#define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))14
15/*
16* Control the maximum sparseness of compressed keys.
17*
18* The upper safe bound for this limit is half the maximum allocatable array
19* size. A lower bound would give more guarantees that pathological data
20* wouldn't eat excessive CPU and memory, but at the expense of breaking
21* possibly working (after a fashion) indexes.
22*/
23#define MAXNUMELTS (Min((MaxAllocSize / sizeof(Datum)),((MaxAllocSize - ARR_OVERHEAD_NONULLS(1)) / sizeof(int)))/2)24/* or: #define MAXNUMELTS 1000000 */
25
26/*
27** GiST support methods
28*/
29PG_FUNCTION_INFO_V1(g_int_consistent);30PG_FUNCTION_INFO_V1(g_int_compress);31PG_FUNCTION_INFO_V1(g_int_decompress);32PG_FUNCTION_INFO_V1(g_int_penalty);33PG_FUNCTION_INFO_V1(g_int_picksplit);34PG_FUNCTION_INFO_V1(g_int_union);35PG_FUNCTION_INFO_V1(g_int_same);36
37
38/*
39** The GiST Consistent method for _intments
40** Should return false if for all data items x below entry,
41** the predicate x op query == false, where op is the oper
42** corresponding to strategy in the pg_amop table.
43*/
44Datum
45g_int_consistent(PG_FUNCTION_ARGS)46{
47GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);48ArrayType *query = PG_GETARG_ARRAYTYPE_P_COPY(1);49StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);50
51/* Oid subtype = PG_GETARG_OID(3); */52bool *recheck = (bool *) PG_GETARG_POINTER(4);53bool retval;54
55/* this is exact except for RTSameStrategyNumber */56*recheck = (strategy == RTSameStrategyNumber);57
58if (strategy == BooleanSearchStrategy)59{60retval = execconsistent((QUERYTYPE *) query,61(ArrayType *) DatumGetPointer(entry->key),62GIST_LEAF(entry));63
64pfree(query);65PG_RETURN_BOOL(retval);66}67
68/* sort query for fast search, key is already sorted */69CHECKARRVALID(query);70PREPAREARR(query);71
72switch (strategy)73{74case RTOverlapStrategyNumber:75retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),76query);77break;78case RTSameStrategyNumber:79if (GIST_LEAF(entry))80DirectFunctionCall3(g_int_same,81entry->key,82PointerGetDatum(query),83PointerGetDatum(&retval));84else85retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),86query);87break;88case RTContainsStrategyNumber:89case RTOldContainsStrategyNumber:90retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),91query);92break;93case RTContainedByStrategyNumber:94case RTOldContainedByStrategyNumber:95if (GIST_LEAF(entry))96retval = inner_int_contains(query,97(ArrayType *) DatumGetPointer(entry->key));98else99{100/*101* Unfortunately, because empty arrays could be anywhere in
102* the index, we must search the whole tree.
103*/
104retval = true;105}106break;107default:108retval = false;109}110pfree(query);111PG_RETURN_BOOL(retval);112}
113
114Datum
115g_int_union(PG_FUNCTION_ARGS)116{
117GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);118int *size = (int *) PG_GETARG_POINTER(1);119int32 i,120*ptr;121ArrayType *res;122int totlen = 0;123
124for (i = 0; i < entryvec->n; i++)125{126ArrayType *ent = GETENTRY(entryvec, i);127
128CHECKARRVALID(ent);129totlen += ARRNELEMS(ent);130}131
132res = new_intArrayType(totlen);133ptr = ARRPTR(res);134
135for (i = 0; i < entryvec->n; i++)136{137ArrayType *ent = GETENTRY(entryvec, i);138int nel;139
140nel = ARRNELEMS(ent);141memcpy(ptr, ARRPTR(ent), nel * sizeof(int32));142ptr += nel;143}144
145QSORT(res, 1);146res = _int_unique(res);147*size = VARSIZE(res);148PG_RETURN_POINTER(res);149}
150
151/*
152** GiST Compress and Decompress methods
153*/
154Datum
155g_int_compress(PG_FUNCTION_ARGS)156{
157GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);158GISTENTRY *retval;159ArrayType *r;160int len,161lenr;162int *dr;163int i,164j,165cand;166int64 min;167
168if (entry->leafkey)169{170r = DatumGetArrayTypePCopy(entry->key);171CHECKARRVALID(r);172PREPAREARR(r);173
174if (ARRNELEMS(r) >= 2 * MAXNUMRANGE)175elog(NOTICE, "input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead",1762 * MAXNUMRANGE - 1, ARRNELEMS(r));177
178retval = palloc(sizeof(GISTENTRY));179gistentryinit(*retval, PointerGetDatum(r),180entry->rel, entry->page, entry->offset, false);181
182PG_RETURN_POINTER(retval);183}184
185/*186* leaf entries never compress one more time, only when entry->leafkey
187* ==true, so now we work only with internal keys
188*/
189
190r = DatumGetArrayTypeP(entry->key);191CHECKARRVALID(r);192if (ARRISEMPTY(r))193{194if (r != (ArrayType *) DatumGetPointer(entry->key))195pfree(r);196PG_RETURN_POINTER(entry);197}198
199if ((len = ARRNELEMS(r)) >= 2 * MAXNUMRANGE)200{ /* compress */201if (r == (ArrayType *) DatumGetPointer(entry->key))202r = DatumGetArrayTypePCopy(entry->key);203r = resize_intArrayType(r, 2 * (len));204
205dr = ARRPTR(r);206
207/*208* "len" at this point is the number of ranges we will construct.
209* "lenr" is the number of ranges we must eventually remove by
210* merging, we must be careful to remove no more than this number.
211*/
212lenr = len - MAXNUMRANGE;213
214/*215* Initially assume we can merge consecutive ints into a range. but we
216* must count every value removed and stop when lenr runs out
217*/
218for (j = i = len - 1; i > 0 && lenr > 0; i--, j--)219{220int r_end = dr[i];221int r_start = r_end;222while (i > 0 && lenr > 0 && dr[i-1] == r_start - 1)223--r_start, --i, --lenr;224dr[2*j] = r_start;225dr[2*j+1] = r_end;226}227/* just copy the rest, if any, as trivial ranges */228for (; i >= 0; i--, j--)229dr[2*j] = dr[2*j + 1] = dr[i];230
231if (++j)232{233/*234* shunt everything down to start at the right place
235*/
236memmove((void *) &dr[0], (void *) &dr[2*j], 2*(len - j) * sizeof(int32));237}238/*239* make "len" be number of array elements, not ranges
240*/
241len = 2*(len - j);242cand = 1;243while (len > MAXNUMRANGE * 2)244{245min = PG_INT64_MAX;246for (i = 2; i < len; i += 2)247if (min > ((int64)dr[i] - (int64)dr[i - 1]))248{249min = ((int64)dr[i] - (int64)dr[i - 1]);250cand = i;251}252memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int32));253len -= 2;254}255/*256* check sparseness of result
257*/
258lenr = internal_size(dr, len);259if (lenr < 0 || lenr > MAXNUMELTS)260ereport(ERROR,261(errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead")));262
263r = resize_intArrayType(r, len);264retval = palloc(sizeof(GISTENTRY));265gistentryinit(*retval, PointerGetDatum(r),266entry->rel, entry->page, entry->offset, false);267PG_RETURN_POINTER(retval);268}269else270PG_RETURN_POINTER(entry);271}
272
273Datum
274g_int_decompress(PG_FUNCTION_ARGS)275{
276GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);277GISTENTRY *retval;278ArrayType *r;279int *dr,280lenr;281ArrayType *in;282int lenin;283int *din;284int i,285j;286
287in = DatumGetArrayTypeP(entry->key);288
289CHECKARRVALID(in);290if (ARRISEMPTY(in))291{292if (in != (ArrayType *) DatumGetPointer(entry->key))293{294retval = palloc(sizeof(GISTENTRY));295gistentryinit(*retval, PointerGetDatum(in),296entry->rel, entry->page, entry->offset, false);297PG_RETURN_POINTER(retval);298}299
300PG_RETURN_POINTER(entry);301}302
303lenin = ARRNELEMS(in);304
305if (lenin < 2 * MAXNUMRANGE)306{ /* not compressed value */307if (in != (ArrayType *) DatumGetPointer(entry->key))308{309retval = palloc(sizeof(GISTENTRY));310gistentryinit(*retval, PointerGetDatum(in),311entry->rel, entry->page, entry->offset, false);312
313PG_RETURN_POINTER(retval);314}315PG_RETURN_POINTER(entry);316}317
318din = ARRPTR(in);319lenr = internal_size(din, lenin);320if (lenr < 0 || lenr > MAXNUMELTS)321ereport(ERROR,322(errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead")));323
324r = new_intArrayType(lenr);325dr = ARRPTR(r);326
327for (i = 0; i < lenin; i += 2)328for (j = din[i]; j <= din[i + 1]; j++)329if ((!i) || *(dr - 1) != j)330*dr++ = j;331
332if (in != (ArrayType *) DatumGetPointer(entry->key))333pfree(in);334retval = palloc(sizeof(GISTENTRY));335gistentryinit(*retval, PointerGetDatum(r),336entry->rel, entry->page, entry->offset, false);337
338PG_RETURN_POINTER(retval);339}
340
341/*
342** The GiST Penalty method for _intments
343*/
344Datum
345g_int_penalty(PG_FUNCTION_ARGS)346{
347GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);348GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);349float *result = (float *) PG_GETARG_POINTER(2);350ArrayType *ud;351float tmp1,352tmp2;353
354ud = inner_int_union((ArrayType *) DatumGetPointer(origentry->key),355(ArrayType *) DatumGetPointer(newentry->key));356rt__int_size(ud, &tmp1);357rt__int_size((ArrayType *) DatumGetPointer(origentry->key), &tmp2);358*result = tmp1 - tmp2;359pfree(ud);360
361PG_RETURN_POINTER(result);362}
363
364
365
366Datum
367g_int_same(PG_FUNCTION_ARGS)368{
369ArrayType *a = PG_GETARG_ARRAYTYPE_P(0);370ArrayType *b = PG_GETARG_ARRAYTYPE_P(1);371bool *result = (bool *) PG_GETARG_POINTER(2);372int32 n = ARRNELEMS(a);373int32 *da,374*db;375
376CHECKARRVALID(a);377CHECKARRVALID(b);378
379if (n != ARRNELEMS(b))380{381*result = false;382PG_RETURN_POINTER(result);383}384*result = true;385da = ARRPTR(a);386db = ARRPTR(b);387while (n--)388{389if (*da++ != *db++)390{391*result = false;392break;393}394}395
396PG_RETURN_POINTER(result);397}
398
399/*****************************************************************
400** Common GiST Method
401*****************************************************************/
402
403typedef struct404{
405OffsetNumber pos;406float cost;407} SPLITCOST;408
409static int410comparecost(const void *a, const void *b)411{
412if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)413return 0;414else415return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;416}
417
418/*
419** The GiST PickSplit method for _intments
420** We use Guttman's poly time split algorithm
421*/
422Datum
423g_int_picksplit(PG_FUNCTION_ARGS)424{
425GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);426GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);427OffsetNumber i,428j;429ArrayType *datum_alpha,430*datum_beta;431ArrayType *datum_l,432*datum_r;433ArrayType *union_d,434*union_dl,435*union_dr;436ArrayType *inter_d;437bool firsttime;438float size_alpha,439size_beta,440size_union,441size_inter;442float size_waste,443waste;444float size_l,445size_r;446int nbytes;447OffsetNumber seed_1 = 0,448seed_2 = 0;449OffsetNumber *left,450*right;451OffsetNumber maxoff;452SPLITCOST *costvector;453
454#ifdef GIST_DEBUG455elog(DEBUG3, "--------picksplit %d", entryvec->n);456#endif457
458maxoff = entryvec->n - 2;459nbytes = (maxoff + 2) * sizeof(OffsetNumber);460v->spl_left = (OffsetNumber *) palloc(nbytes);461v->spl_right = (OffsetNumber *) palloc(nbytes);462
463firsttime = true;464waste = 0.0;465for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))466{467datum_alpha = GETENTRY(entryvec, i);468for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))469{470datum_beta = GETENTRY(entryvec, j);471
472/* compute the wasted space by unioning these guys */473/* size_waste = size_union - size_inter; */474union_d = inner_int_union(datum_alpha, datum_beta);475rt__int_size(union_d, &size_union);476inter_d = inner_int_inter(datum_alpha, datum_beta);477rt__int_size(inter_d, &size_inter);478size_waste = size_union - size_inter;479
480pfree(union_d);481pfree(inter_d);482
483/*484* are these a more promising split that what we've already seen?
485*/
486
487if (size_waste > waste || firsttime)488{489waste = size_waste;490seed_1 = i;491seed_2 = j;492firsttime = false;493}494}495}496
497left = v->spl_left;498v->spl_nleft = 0;499right = v->spl_right;500v->spl_nright = 0;501if (seed_1 == 0 || seed_2 == 0)502{503seed_1 = 1;504seed_2 = 2;505}506
507datum_alpha = GETENTRY(entryvec, seed_1);508datum_l = copy_intArrayType(datum_alpha);509rt__int_size(datum_l, &size_l);510datum_beta = GETENTRY(entryvec, seed_2);511datum_r = copy_intArrayType(datum_beta);512rt__int_size(datum_r, &size_r);513
514maxoff = OffsetNumberNext(maxoff);515
516/*517* sort entries
518*/
519costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);520for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))521{522costvector[i - 1].pos = i;523datum_alpha = GETENTRY(entryvec, i);524union_d = inner_int_union(datum_l, datum_alpha);525rt__int_size(union_d, &size_alpha);526pfree(union_d);527union_d = inner_int_union(datum_r, datum_alpha);528rt__int_size(union_d, &size_beta);529pfree(union_d);530costvector[i - 1].cost = Abs((size_alpha - size_l) - (size_beta - size_r));531}532qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);533
534/*535* Now split up the regions between the two seeds. An important property
536* of this split algorithm is that the split vector v has the indices of
537* items to be split in order in its left and right vectors. We exploit
538* this property by doing a merge in the code that actually splits the
539* page.
540*
541* For efficiency, we also place the new index tuple in this loop. This is
542* handled at the very end, when we have placed all the existing tuples
543* and i == maxoff + 1.
544*/
545
546
547for (j = 0; j < maxoff; j++)548{549i = costvector[j].pos;550
551/*552* If we've already decided where to place this item, just put it on
553* the right list. Otherwise, we need to figure out which page needs
554* the least enlargement in order to store the item.
555*/
556
557if (i == seed_1)558{559*left++ = i;560v->spl_nleft++;561continue;562}563else if (i == seed_2)564{565*right++ = i;566v->spl_nright++;567continue;568}569
570/* okay, which page needs least enlargement? */571datum_alpha = GETENTRY(entryvec, i);572union_dl = inner_int_union(datum_l, datum_alpha);573union_dr = inner_int_union(datum_r, datum_alpha);574rt__int_size(union_dl, &size_alpha);575rt__int_size(union_dr, &size_beta);576
577/* pick which page to add it to */578if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))579{580pfree(datum_l);581pfree(union_dr);582datum_l = union_dl;583size_l = size_alpha;584*left++ = i;585v->spl_nleft++;586}587else588{589pfree(datum_r);590pfree(union_dl);591datum_r = union_dr;592size_r = size_beta;593*right++ = i;594v->spl_nright++;595}596}597pfree(costvector);598*right = *left = FirstOffsetNumber;599
600v->spl_ldatum = PointerGetDatum(datum_l);601v->spl_rdatum = PointerGetDatum(datum_r);602
603PG_RETURN_POINTER(v);604}
605