PolarDB-for-PostgreSQL
477 строк · 12.1 Кб
1/*-------------------------------------------------------------------------
2*
3* heapfuncs.c
4* Functions to investigate heap pages
5*
6* We check the input to these functions for corrupt pointers etc. that
7* might cause crashes, but at the same time we try to print out as much
8* information as possible, even if it's nonsense. That's because if a
9* page is corrupt, we don't know why and how exactly it is corrupt, so we
10* let the user judge it.
11*
12* These functions are restricted to superusers for the fear of introducing
13* security holes if the input checking isn't as water-tight as it should be.
14* You'd need to be superuser to obtain a raw page image anyway, so
15* there's hardly any use case for using these without superuser-rights
16* anyway.
17*
18* Copyright (c) 2007-2018, PostgreSQL Global Development Group
19*
20* IDENTIFICATION
21* contrib/pageinspect/heapfuncs.c
22*
23*-------------------------------------------------------------------------
24*/
25
26#include "postgres.h"27
28#include "pageinspect.h"29
30#include "access/htup_details.h"31#include "funcapi.h"32#include "catalog/pg_type.h"33#include "miscadmin.h"34#include "utils/array.h"35#include "utils/builtins.h"36#include "utils/rel.h"37
38
39/*
40* bits_to_text
41*
42* Converts a bits8-array of 'len' bits to a human-readable
43* c-string representation.
44*/
45static char *46bits_to_text(bits8 *bits, int len)47{
48int i;49char *str;50
51str = palloc(len + 1);52
53for (i = 0; i < len; i++)54str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';55
56str[i] = '\0';57
58return str;59}
60
61
62/*
63* text_to_bits
64*
65* Converts a c-string representation of bits into a bits8-array. This is
66* the reverse operation of previous routine.
67*/
68static bits8 *69text_to_bits(char *str, int len)70{
71bits8 *bits;72int off = 0;73char byte = 0;74
75bits = palloc(len + 1);76
77while (off < len)78{79if (off % 8 == 0)80byte = 0;81
82if ((str[off] == '0') || (str[off] == '1'))83byte = byte | ((str[off] - '0') << off % 8);84else85ereport(ERROR,86(errcode(ERRCODE_DATA_CORRUPTED),87errmsg("illegal character '%c' in t_bits string", str[off])));88
89if (off % 8 == 7)90bits[off / 8] = byte;91
92off++;93}94
95return bits;96}
97
98/*
99* heap_page_items
100*
101* Allows inspection of line pointers and tuple headers of a heap page.
102*/
103PG_FUNCTION_INFO_V1(heap_page_items);104
105typedef struct heap_page_items_state106{
107TupleDesc tupd;108Page page;109uint16 offset;110} heap_page_items_state;111
112Datum
113heap_page_items(PG_FUNCTION_ARGS)114{
115bytea *raw_page = PG_GETARG_BYTEA_P(0);116heap_page_items_state *inter_call_data = NULL;117FuncCallContext *fctx;118int raw_page_size;119
120if (!superuser())121ereport(ERROR,122(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),123(errmsg("must be superuser to use raw page functions"))));124
125raw_page_size = VARSIZE(raw_page) - VARHDRSZ;126
127if (SRF_IS_FIRSTCALL())128{129TupleDesc tupdesc;130MemoryContext mctx;131
132if (raw_page_size < SizeOfPageHeaderData)133ereport(ERROR,134(errcode(ERRCODE_INVALID_PARAMETER_VALUE),135errmsg("input page too small (%d bytes)", raw_page_size)));136
137fctx = SRF_FIRSTCALL_INIT();138mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);139
140inter_call_data = palloc(sizeof(heap_page_items_state));141
142/* Build a tuple descriptor for our result type */143if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)144elog(ERROR, "return type must be a row type");145
146inter_call_data->tupd = tupdesc;147
148inter_call_data->offset = FirstOffsetNumber;149inter_call_data->page = VARDATA(raw_page);150
151fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);152fctx->user_fctx = inter_call_data;153
154MemoryContextSwitchTo(mctx);155}156
157fctx = SRF_PERCALL_SETUP();158inter_call_data = fctx->user_fctx;159
160if (fctx->call_cntr < fctx->max_calls)161{162Page page = inter_call_data->page;163HeapTuple resultTuple;164Datum result;165ItemId id;166Datum values[14];167bool nulls[14];168uint16 lp_offset;169uint16 lp_flags;170uint16 lp_len;171
172memset(nulls, 0, sizeof(nulls));173
174/* Extract information from the line pointer */175
176id = PageGetItemId(page, inter_call_data->offset);177
178lp_offset = ItemIdGetOffset(id);179lp_flags = ItemIdGetFlags(id);180lp_len = ItemIdGetLength(id);181
182values[0] = UInt16GetDatum(inter_call_data->offset);183values[1] = UInt16GetDatum(lp_offset);184values[2] = UInt16GetDatum(lp_flags);185values[3] = UInt16GetDatum(lp_len);186
187/*188* We do just enough validity checking to make sure we don't reference
189* data outside the page passed to us. The page could be corrupt in
190* many other ways, but at least we won't crash.
191*/
192if (ItemIdHasStorage(id) &&193lp_len >= MinHeapTupleSize &&194lp_offset == MAXALIGN(lp_offset) &&195lp_offset + lp_len <= raw_page_size)196{197HeapTupleHeader tuphdr;198bytea *tuple_data_bytea;199int tuple_data_len;200
201/* Extract information from the tuple header */202
203tuphdr = (HeapTupleHeader) PageGetItem(page, id);204
205values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));206values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));207/* shared with xvac */208values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));209values[7] = PointerGetDatum(&tuphdr->t_ctid);210values[8] = UInt32GetDatum(tuphdr->t_infomask2);211values[9] = UInt32GetDatum(tuphdr->t_infomask);212values[10] = UInt8GetDatum(tuphdr->t_hoff);213
214/* Copy raw tuple data into bytea attribute */215tuple_data_len = lp_len - tuphdr->t_hoff;216tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);217SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);218memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,219tuple_data_len);220values[13] = PointerGetDatum(tuple_data_bytea);221
222/*223* We already checked that the item is completely within the raw
224* page passed to us, with the length given in the line pointer.
225* Let's check that t_hoff doesn't point over lp_len, before using
226* it to access t_bits and oid.
227*/
228if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&229tuphdr->t_hoff <= lp_len &&230tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))231{232if (tuphdr->t_infomask & HEAP_HASNULL)233{234int bits_len;235
236bits_len =237BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;238values[11] = CStringGetTextDatum(239bits_to_text(tuphdr->t_bits, bits_len));240}241else242nulls[11] = true;243
244if (tuphdr->t_infomask & HEAP_HASOID)245values[12] = HeapTupleHeaderGetOid(tuphdr);246else247nulls[12] = true;248}249else250{251nulls[11] = true;252nulls[12] = true;253}254}255else256{257/*258* The line pointer is not used, or it's invalid. Set the rest of
259* the fields to NULL
260*/
261int i;262
263for (i = 4; i <= 13; i++)264nulls[i] = true;265}266
267/* Build and return the result tuple. */268resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);269result = HeapTupleGetDatum(resultTuple);270
271inter_call_data->offset++;272
273SRF_RETURN_NEXT(fctx, result);274}275else276SRF_RETURN_DONE(fctx);277}
278
279/*
280* tuple_data_split_internal
281*
282* Split raw tuple data taken directly from a page into an array of bytea
283* elements. This routine does a lookup on NULL values and creates array
284* elements accordingly. This is a reimplementation of nocachegetattr()
285* in heaptuple.c simplified for educational purposes.
286*/
287static Datum288tuple_data_split_internal(Oid relid, char *tupdata,289uint16 tupdata_len, uint16 t_infomask,290uint16 t_infomask2, bits8 *t_bits,291bool do_detoast)292{
293ArrayBuildState *raw_attrs;294int nattrs;295int i;296int off = 0;297Relation rel;298TupleDesc tupdesc;299
300/* Get tuple descriptor from relation OID */301rel = relation_open(relid, AccessShareLock);302tupdesc = RelationGetDescr(rel);303
304raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);305nattrs = tupdesc->natts;306
307if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))308ereport(ERROR,309(errcode(ERRCODE_DATA_CORRUPTED),310errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));311
312for (i = 0; i < nattrs; i++)313{314Form_pg_attribute attr;315bool is_null;316bytea *attr_data = NULL;317
318attr = TupleDescAttr(tupdesc, i);319
320/*321* Tuple header can specify less attributes than tuple descriptor as
322* ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
323* change tuples in pages, so attributes with numbers greater than
324* (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
325*/
326if (i >= (t_infomask2 & HEAP_NATTS_MASK))327is_null = true;328else329is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);330
331if (!is_null)332{333int len;334
335if (attr->attlen == -1)336{337off = att_align_pointer(off, attr->attalign, -1,338tupdata + off);339
340/*341* As VARSIZE_ANY throws an exception if it can't properly
342* detect the type of external storage in macros VARTAG_SIZE,
343* this check is repeated to have a nicer error handling.
344*/
345if (VARATT_IS_EXTERNAL(tupdata + off) &&346!VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&347!VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))348ereport(ERROR,349(errcode(ERRCODE_DATA_CORRUPTED),350errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));351
352len = VARSIZE_ANY(tupdata + off);353}354else355{356off = att_align_nominal(off, attr->attalign);357len = attr->attlen;358}359
360if (tupdata_len < off + len)361ereport(ERROR,362(errcode(ERRCODE_DATA_CORRUPTED),363errmsg("unexpected end of tuple data")));364
365if (attr->attlen == -1 && do_detoast)366attr_data = DatumGetByteaPCopy(tupdata + off);367else368{369attr_data = (bytea *) palloc(len + VARHDRSZ);370SET_VARSIZE(attr_data, len + VARHDRSZ);371memcpy(VARDATA(attr_data), tupdata + off, len);372}373
374off = att_addlength_pointer(off, attr->attlen,375tupdata + off);376}377
378raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),379is_null, BYTEAOID, CurrentMemoryContext);380if (attr_data)381pfree(attr_data);382}383
384if (tupdata_len != off)385ereport(ERROR,386(errcode(ERRCODE_DATA_CORRUPTED),387errmsg("end of tuple reached without looking at all its data")));388
389relation_close(rel, AccessShareLock);390
391return makeArrayResult(raw_attrs, CurrentMemoryContext);392}
393
394/*
395* tuple_data_split
396*
397* Split raw tuple data taken directly from page into distinct elements
398* taking into account null values.
399*/
400PG_FUNCTION_INFO_V1(tuple_data_split);401
402Datum
403tuple_data_split(PG_FUNCTION_ARGS)404{
405Oid relid;406bytea *raw_data;407uint16 t_infomask;408uint16 t_infomask2;409char *t_bits_str;410bool do_detoast = false;411bits8 *t_bits = NULL;412Datum res;413
414relid = PG_GETARG_OID(0);415raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);416t_infomask = PG_GETARG_INT16(2);417t_infomask2 = PG_GETARG_INT16(3);418t_bits_str = PG_ARGISNULL(4) ? NULL :419text_to_cstring(PG_GETARG_TEXT_PP(4));420
421if (PG_NARGS() >= 6)422do_detoast = PG_GETARG_BOOL(5);423
424if (!superuser())425ereport(ERROR,426(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),427errmsg("must be superuser to use raw page functions")));428
429if (!raw_data)430PG_RETURN_NULL();431
432/*433* Convert t_bits string back to the bits8 array as represented in the
434* tuple header.
435*/
436if (t_infomask & HEAP_HASNULL)437{438int bits_str_len;439int bits_len;440
441bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;442if (!t_bits_str)443ereport(ERROR,444(errcode(ERRCODE_DATA_CORRUPTED),445errmsg("argument of t_bits is null, but it is expected to be null and %d character long",446bits_len)));447
448bits_str_len = strlen(t_bits_str);449if (bits_len != bits_str_len)450ereport(ERROR,451(errcode(ERRCODE_DATA_CORRUPTED),452errmsg("unexpected length of t_bits %u, expected %d",453bits_str_len, bits_len)));454
455/* do the conversion */456t_bits = text_to_bits(t_bits_str, bits_str_len);457}458else459{460if (t_bits_str)461ereport(ERROR,462(errcode(ERRCODE_DATA_CORRUPTED),463errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length",464strlen(t_bits_str))));465}466
467/* Split tuple data */468res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,469VARSIZE(raw_data) - VARHDRSZ,470t_infomask, t_infomask2, t_bits,471do_detoast);472
473if (t_bits)474pfree(t_bits);475
476PG_RETURN_ARRAYTYPE_P(res);477}
478