PolarDB-for-PostgreSQL

_int_gist.c
604 строки · 14.2 Кб
Перенос по словам
1
/*
2
 * contrib/intarray/_int_gist.c
3
 */
4
#include "postgres.h"
5

6
#include <limits.h>
7

8
#include "access/gist.h"
9
#include "access/stratnum.h"
10

11
#include "_int.h"
12

13
#define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
14

15
/*
16
 * Control the maximum sparseness of compressed keys.
17
 *
18
 * The upper safe bound for this limit is half the maximum allocatable array
19
 * size. A lower bound would give more guarantees that pathological data
20
 * wouldn't eat excessive CPU and memory, but at the expense of breaking
21
 * possibly working (after a fashion) indexes.
22
 */
23
#define MAXNUMELTS (Min((MaxAllocSize / sizeof(Datum)),((MaxAllocSize - ARR_OVERHEAD_NONULLS(1)) / sizeof(int)))/2)
24
/* or: #define MAXNUMELTS 1000000 */
25

26
/*
27
** GiST support methods
28
*/
29
PG_FUNCTION_INFO_V1(g_int_consistent);
30
PG_FUNCTION_INFO_V1(g_int_compress);
31
PG_FUNCTION_INFO_V1(g_int_decompress);
32
PG_FUNCTION_INFO_V1(g_int_penalty);
33
PG_FUNCTION_INFO_V1(g_int_picksplit);
34
PG_FUNCTION_INFO_V1(g_int_union);
35
PG_FUNCTION_INFO_V1(g_int_same);
36

37

38
/*
39
** The GiST Consistent method for _intments
40
** Should return false if for all data items x below entry,
41
** the predicate x op query == false, where op is the oper
42
** corresponding to strategy in the pg_amop table.
43
*/
44
Datum
45
g_int_consistent(PG_FUNCTION_ARGS)
46
{
47
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
48
	ArrayType  *query = PG_GETARG_ARRAYTYPE_P_COPY(1);
49
	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
50

51
	/* Oid		subtype = PG_GETARG_OID(3); */
52
	bool	   *recheck = (bool *) PG_GETARG_POINTER(4);
53
	bool		retval;
54

55
	/* this is exact except for RTSameStrategyNumber */
56
	*recheck = (strategy == RTSameStrategyNumber);
57

58
	if (strategy == BooleanSearchStrategy)
59
	{
60
		retval = execconsistent((QUERYTYPE *) query,
61
								(ArrayType *) DatumGetPointer(entry->key),
62
								GIST_LEAF(entry));
63

64
		pfree(query);
65
		PG_RETURN_BOOL(retval);
66
	}
67

68
	/* sort query for fast search, key is already sorted */
69
	CHECKARRVALID(query);
70
	PREPAREARR(query);
71

72
	switch (strategy)
73
	{
74
		case RTOverlapStrategyNumber:
75
			retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
76
									   query);
77
			break;
78
		case RTSameStrategyNumber:
79
			if (GIST_LEAF(entry))
80
				DirectFunctionCall3(g_int_same,
81
									entry->key,
82
									PointerGetDatum(query),
83
									PointerGetDatum(&retval));
84
			else
85
				retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
86
											query);
87
			break;
88
		case RTContainsStrategyNumber:
89
		case RTOldContainsStrategyNumber:
90
			retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
91
										query);
92
			break;
93
		case RTContainedByStrategyNumber:
94
		case RTOldContainedByStrategyNumber:
95
			if (GIST_LEAF(entry))
96
				retval = inner_int_contains(query,
97
											(ArrayType *) DatumGetPointer(entry->key));
98
			else
99
			{
100
				/*
101
				 * Unfortunately, because empty arrays could be anywhere in
102
				 * the index, we must search the whole tree.
103
				 */
104
				retval = true;
105
			}
106
			break;
107
		default:
108
			retval = false;
109
	}
110
	pfree(query);
111
	PG_RETURN_BOOL(retval);
112
}
113

114
Datum
115
g_int_union(PG_FUNCTION_ARGS)
116
{
117
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
118
	int		   *size = (int *) PG_GETARG_POINTER(1);
119
	int32		i,
120
			   *ptr;
121
	ArrayType  *res;
122
	int			totlen = 0;
123

124
	for (i = 0; i < entryvec->n; i++)
125
	{
126
		ArrayType  *ent = GETENTRY(entryvec, i);
127

128
		CHECKARRVALID(ent);
129
		totlen += ARRNELEMS(ent);
130
	}
131

132
	res = new_intArrayType(totlen);
133
	ptr = ARRPTR(res);
134

135
	for (i = 0; i < entryvec->n; i++)
136
	{
137
		ArrayType  *ent = GETENTRY(entryvec, i);
138
		int			nel;
139

140
		nel = ARRNELEMS(ent);
141
		memcpy(ptr, ARRPTR(ent), nel * sizeof(int32));
142
		ptr += nel;
143
	}
144

145
	QSORT(res, 1);
146
	res = _int_unique(res);
147
	*size = VARSIZE(res);
148
	PG_RETURN_POINTER(res);
149
}
150

151
/*
152
** GiST Compress and Decompress methods
153
*/
154
Datum
155
g_int_compress(PG_FUNCTION_ARGS)
156
{
157
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
158
	GISTENTRY  *retval;
159
	ArrayType  *r;
160
	int			len,
161
				lenr;
162
	int		   *dr;
163
	int			i,
164
				j,
165
				cand;
166
	int64		min;
167

168
	if (entry->leafkey)
169
	{
170
		r = DatumGetArrayTypePCopy(entry->key);
171
		CHECKARRVALID(r);
172
		PREPAREARR(r);
173

174
		if (ARRNELEMS(r) >= 2 * MAXNUMRANGE)
175
			elog(NOTICE, "input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead",
176
				 2 * MAXNUMRANGE - 1, ARRNELEMS(r));
177

178
		retval = palloc(sizeof(GISTENTRY));
179
		gistentryinit(*retval, PointerGetDatum(r),
180
					  entry->rel, entry->page, entry->offset, false);
181

182
		PG_RETURN_POINTER(retval);
183
	}
184

185
	/*
186
	 * leaf entries never compress one more time, only when entry->leafkey
187
	 * ==true, so now we work only with internal keys
188
	 */
189

190
	r = DatumGetArrayTypeP(entry->key);
191
	CHECKARRVALID(r);
192
	if (ARRISEMPTY(r))
193
	{
194
		if (r != (ArrayType *) DatumGetPointer(entry->key))
195
			pfree(r);
196
		PG_RETURN_POINTER(entry);
197
	}
198

199
	if ((len = ARRNELEMS(r)) >= 2 * MAXNUMRANGE)
200
	{							/* compress */
201
		if (r == (ArrayType *) DatumGetPointer(entry->key))
202
			r = DatumGetArrayTypePCopy(entry->key);
203
		r = resize_intArrayType(r, 2 * (len));
204

205
		dr = ARRPTR(r);
206

207
		/*
208
		 * "len" at this point is the number of ranges we will construct.
209
		 * "lenr" is the number of ranges we must eventually remove by
210
		 * merging, we must be careful to remove no more than this number.
211
		 */
212
		lenr = len - MAXNUMRANGE;
213

214
		/*
215
		 * Initially assume we can merge consecutive ints into a range. but we
216
		 * must count every value removed and stop when lenr runs out
217
		 */
218
		for (j = i = len - 1; i > 0 && lenr > 0; i--, j--)
219
		{
220
			int		r_end = dr[i];
221
			int		r_start = r_end;
222
			while (i > 0 && lenr > 0 && dr[i-1] == r_start - 1)
223
				--r_start, --i, --lenr;
224
			dr[2*j] = r_start;
225
			dr[2*j+1] = r_end;
226
		}
227
		/* just copy the rest, if any, as trivial ranges */
228
		for (; i >= 0; i--, j--)
229
			dr[2*j] = dr[2*j + 1] = dr[i];
230

231
		if (++j)
232
		{
233
			/*
234
			 * shunt everything down to start at the right place
235
			 */
236
			memmove((void *) &dr[0], (void *) &dr[2*j], 2*(len - j) * sizeof(int32));
237
		}
238
		/*
239
		 * make "len" be number of array elements, not ranges
240
		 */
241
		len = 2*(len - j);
242
		cand = 1;
243
		while (len > MAXNUMRANGE * 2)
244
		{
245
			min = PG_INT64_MAX;
246
			for (i = 2; i < len; i += 2)
247
				if (min > ((int64)dr[i] - (int64)dr[i - 1]))
248
				{
249
					min = ((int64)dr[i] - (int64)dr[i - 1]);
250
					cand = i;
251
				}
252
			memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int32));
253
			len -= 2;
254
		}
255
		/*
256
		 * check sparseness of result
257
		 */
258
		lenr = internal_size(dr, len);
259
		if (lenr < 0 || lenr > MAXNUMELTS)
260
			ereport(ERROR,
261
					(errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead")));
262

263
		r = resize_intArrayType(r, len);
264
		retval = palloc(sizeof(GISTENTRY));
265
		gistentryinit(*retval, PointerGetDatum(r),
266
					  entry->rel, entry->page, entry->offset, false);
267
		PG_RETURN_POINTER(retval);
268
	}
269
	else
270
		PG_RETURN_POINTER(entry);
271
}
272

273
Datum
274
g_int_decompress(PG_FUNCTION_ARGS)
275
{
276
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
277
	GISTENTRY  *retval;
278
	ArrayType  *r;
279
	int		   *dr,
280
				lenr;
281
	ArrayType  *in;
282
	int			lenin;
283
	int		   *din;
284
	int			i,
285
				j;
286

287
	in = DatumGetArrayTypeP(entry->key);
288

289
	CHECKARRVALID(in);
290
	if (ARRISEMPTY(in))
291
	{
292
		if (in != (ArrayType *) DatumGetPointer(entry->key))
293
		{
294
			retval = palloc(sizeof(GISTENTRY));
295
			gistentryinit(*retval, PointerGetDatum(in),
296
						  entry->rel, entry->page, entry->offset, false);
297
			PG_RETURN_POINTER(retval);
298
		}
299

300
		PG_RETURN_POINTER(entry);
301
	}
302

303
	lenin = ARRNELEMS(in);
304

305
	if (lenin < 2 * MAXNUMRANGE)
306
	{							/* not compressed value */
307
		if (in != (ArrayType *) DatumGetPointer(entry->key))
308
		{
309
			retval = palloc(sizeof(GISTENTRY));
310
			gistentryinit(*retval, PointerGetDatum(in),
311
						  entry->rel, entry->page, entry->offset, false);
312

313
			PG_RETURN_POINTER(retval);
314
		}
315
		PG_RETURN_POINTER(entry);
316
	}
317

318
	din = ARRPTR(in);
319
	lenr = internal_size(din, lenin);
320
	if (lenr < 0 || lenr > MAXNUMELTS)
321
		ereport(ERROR,
322
				(errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead")));
323

324
	r = new_intArrayType(lenr);
325
	dr = ARRPTR(r);
326

327
	for (i = 0; i < lenin; i += 2)
328
		for (j = din[i]; j <= din[i + 1]; j++)
329
			if ((!i) || *(dr - 1) != j)
330
				*dr++ = j;
331

332
	if (in != (ArrayType *) DatumGetPointer(entry->key))
333
		pfree(in);
334
	retval = palloc(sizeof(GISTENTRY));
335
	gistentryinit(*retval, PointerGetDatum(r),
336
				  entry->rel, entry->page, entry->offset, false);
337

338
	PG_RETURN_POINTER(retval);
339
}
340

341
/*
342
** The GiST Penalty method for _intments
343
*/
344
Datum
345
g_int_penalty(PG_FUNCTION_ARGS)
346
{
347
	GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
348
	GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
349
	float	   *result = (float *) PG_GETARG_POINTER(2);
350
	ArrayType  *ud;
351
	float		tmp1,
352
				tmp2;
353

354
	ud = inner_int_union((ArrayType *) DatumGetPointer(origentry->key),
355
						 (ArrayType *) DatumGetPointer(newentry->key));
356
	rt__int_size(ud, &tmp1);
357
	rt__int_size((ArrayType *) DatumGetPointer(origentry->key), &tmp2);
358
	*result = tmp1 - tmp2;
359
	pfree(ud);
360

361
	PG_RETURN_POINTER(result);
362
}
363

364

365

366
Datum
367
g_int_same(PG_FUNCTION_ARGS)
368
{
369
	ArrayType  *a = PG_GETARG_ARRAYTYPE_P(0);
370
	ArrayType  *b = PG_GETARG_ARRAYTYPE_P(1);
371
	bool	   *result = (bool *) PG_GETARG_POINTER(2);
372
	int32		n = ARRNELEMS(a);
373
	int32	   *da,
374
			   *db;
375

376
	CHECKARRVALID(a);
377
	CHECKARRVALID(b);
378

379
	if (n != ARRNELEMS(b))
380
	{
381
		*result = false;
382
		PG_RETURN_POINTER(result);
383
	}
384
	*result = true;
385
	da = ARRPTR(a);
386
	db = ARRPTR(b);
387
	while (n--)
388
	{
389
		if (*da++ != *db++)
390
		{
391
			*result = false;
392
			break;
393
		}
394
	}
395

396
	PG_RETURN_POINTER(result);
397
}
398

399
/*****************************************************************
400
** Common GiST Method
401
*****************************************************************/
402

403
typedef struct
404
{
405
	OffsetNumber pos;
406
	float		cost;
407
} SPLITCOST;
408

409
static int
410
comparecost(const void *a, const void *b)
411
{
412
	if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
413
		return 0;
414
	else
415
		return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
416
}
417

418
/*
419
** The GiST PickSplit method for _intments
420
** We use Guttman's poly time split algorithm
421
*/
422
Datum
423
g_int_picksplit(PG_FUNCTION_ARGS)
424
{
425
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
426
	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
427
	OffsetNumber i,
428
				j;
429
	ArrayType  *datum_alpha,
430
			   *datum_beta;
431
	ArrayType  *datum_l,
432
			   *datum_r;
433
	ArrayType  *union_d,
434
			   *union_dl,
435
			   *union_dr;
436
	ArrayType  *inter_d;
437
	bool		firsttime;
438
	float		size_alpha,
439
				size_beta,
440
				size_union,
441
				size_inter;
442
	float		size_waste,
443
				waste;
444
	float		size_l,
445
				size_r;
446
	int			nbytes;
447
	OffsetNumber seed_1 = 0,
448
				seed_2 = 0;
449
	OffsetNumber *left,
450
			   *right;
451
	OffsetNumber maxoff;
452
	SPLITCOST  *costvector;
453

454
#ifdef GIST_DEBUG
455
	elog(DEBUG3, "--------picksplit %d", entryvec->n);
456
#endif
457

458
	maxoff = entryvec->n - 2;
459
	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
460
	v->spl_left = (OffsetNumber *) palloc(nbytes);
461
	v->spl_right = (OffsetNumber *) palloc(nbytes);
462

463
	firsttime = true;
464
	waste = 0.0;
465
	for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
466
	{
467
		datum_alpha = GETENTRY(entryvec, i);
468
		for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
469
		{
470
			datum_beta = GETENTRY(entryvec, j);
471

472
			/* compute the wasted space by unioning these guys */
473
			/* size_waste = size_union - size_inter; */
474
			union_d = inner_int_union(datum_alpha, datum_beta);
475
			rt__int_size(union_d, &size_union);
476
			inter_d = inner_int_inter(datum_alpha, datum_beta);
477
			rt__int_size(inter_d, &size_inter);
478
			size_waste = size_union - size_inter;
479

480
			pfree(union_d);
481
			pfree(inter_d);
482

483
			/*
484
			 * are these a more promising split that what we've already seen?
485
			 */
486

487
			if (size_waste > waste || firsttime)
488
			{
489
				waste = size_waste;
490
				seed_1 = i;
491
				seed_2 = j;
492
				firsttime = false;
493
			}
494
		}
495
	}
496

497
	left = v->spl_left;
498
	v->spl_nleft = 0;
499
	right = v->spl_right;
500
	v->spl_nright = 0;
501
	if (seed_1 == 0 || seed_2 == 0)
502
	{
503
		seed_1 = 1;
504
		seed_2 = 2;
505
	}
506

507
	datum_alpha = GETENTRY(entryvec, seed_1);
508
	datum_l = copy_intArrayType(datum_alpha);
509
	rt__int_size(datum_l, &size_l);
510
	datum_beta = GETENTRY(entryvec, seed_2);
511
	datum_r = copy_intArrayType(datum_beta);
512
	rt__int_size(datum_r, &size_r);
513

514
	maxoff = OffsetNumberNext(maxoff);
515

516
	/*
517
	 * sort entries
518
	 */
519
	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
520
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
521
	{
522
		costvector[i - 1].pos = i;
523
		datum_alpha = GETENTRY(entryvec, i);
524
		union_d = inner_int_union(datum_l, datum_alpha);
525
		rt__int_size(union_d, &size_alpha);
526
		pfree(union_d);
527
		union_d = inner_int_union(datum_r, datum_alpha);
528
		rt__int_size(union_d, &size_beta);
529
		pfree(union_d);
530
		costvector[i - 1].cost = Abs((size_alpha - size_l) - (size_beta - size_r));
531
	}
532
	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
533

534
	/*
535
	 * Now split up the regions between the two seeds.  An important property
536
	 * of this split algorithm is that the split vector v has the indices of
537
	 * items to be split in order in its left and right vectors.  We exploit
538
	 * this property by doing a merge in the code that actually splits the
539
	 * page.
540
	 *
541
	 * For efficiency, we also place the new index tuple in this loop. This is
542
	 * handled at the very end, when we have placed all the existing tuples
543
	 * and i == maxoff + 1.
544
	 */
545

546

547
	for (j = 0; j < maxoff; j++)
548
	{
549
		i = costvector[j].pos;
550

551
		/*
552
		 * If we've already decided where to place this item, just put it on
553
		 * the right list.  Otherwise, we need to figure out which page needs
554
		 * the least enlargement in order to store the item.
555
		 */
556

557
		if (i == seed_1)
558
		{
559
			*left++ = i;
560
			v->spl_nleft++;
561
			continue;
562
		}
563
		else if (i == seed_2)
564
		{
565
			*right++ = i;
566
			v->spl_nright++;
567
			continue;
568
		}
569

570
		/* okay, which page needs least enlargement? */
571
		datum_alpha = GETENTRY(entryvec, i);
572
		union_dl = inner_int_union(datum_l, datum_alpha);
573
		union_dr = inner_int_union(datum_r, datum_alpha);
574
		rt__int_size(union_dl, &size_alpha);
575
		rt__int_size(union_dr, &size_beta);
576

577
		/* pick which page to add it to */
578
		if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))
579
		{
580
			pfree(datum_l);
581
			pfree(union_dr);
582
			datum_l = union_dl;
583
			size_l = size_alpha;
584
			*left++ = i;
585
			v->spl_nleft++;
586
		}
587
		else
588
		{
589
			pfree(datum_r);
590
			pfree(union_dl);
591
			datum_r = union_dr;
592
			size_r = size_beta;
593
			*right++ = i;
594
			v->spl_nright++;
595
		}
596
	}
597
	pfree(costvector);
598
	*right = *left = FirstOffsetNumber;
599

600
	v->spl_ldatum = PointerGetDatum(datum_l);
601
	v->spl_rdatum = PointerGetDatum(datum_r);
602

603
	PG_RETURN_POINTER(v);
604
}
605
PolarDB-for-PostgreSQL

Использование cookies