Skip to content

Commit 22f2a98

Browse files
committed
Redesign the caching done by get_cached_rowtype().
Previously, get_cached_rowtype() cached a pointer to a reference-counted tuple descriptor from the typcache, relying on the ExprContextCallback mechanism to release the tupdesc refcount when the expression tree using the tupdesc was destroyed. This worked fine when it was designed, but the introduction of within-DO-block COMMITs broke it. The refcount is logged in a transaction-lifespan resource owner, but plpgsql won't destroy simple expressions made within the DO block (before its first commit) until the DO block is exited. That results in a warning about a leaked tupdesc refcount when the COMMIT destroys the original resource owner, and then an error about the active resource owner not holding a matching refcount when the expression is destroyed. To fix, get rid of the need to have a shutdown callback at all, by instead caching a pointer to the relevant typcache entry. Those survive for the life of the backend, so we needn't worry about the pointer becoming stale. (For registered RECORD types, we can still cache a pointer to the tupdesc, knowing that it won't change for the life of the backend.) This mechanism has been in use in plpgsql and expandedrecord.c since commit 4b93f57, and seems to work well. This change requires modifying the ExprEvalStep structs used by the relevant expression step types, which is slightly worrisome for back-patching. However, there seems no good reason for extensions to be familiar with the details of these particular sub-structs. Per report from Rohit Bhogate. Back-patch to v11 where within-DO-block COMMITs became a thing. Discussion: https://postgr.es/m/CAAV6ZkQRCVBh8qAY+SZiHnz+U+FqAGBBDaDTjF2yiKa2nJSLKg@mail.gmail.com
1 parent c8da16b commit 22f2a98

File tree

5 files changed

+177
-91
lines changed

5 files changed

+177
-91
lines changed

src/backend/executor/execExpr.c

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,7 +1129,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
11291129
scratch.opcode = EEOP_FIELDSELECT;
11301130
scratch.d.fieldselect.fieldnum = fselect->fieldnum;
11311131
scratch.d.fieldselect.resulttype = fselect->resulttype;
1132-
scratch.d.fieldselect.argdesc = NULL;
1132+
scratch.d.fieldselect.rowcache.cacheptr = NULL;
11331133

11341134
ExprEvalPushStep(state, &scratch);
11351135
break;
@@ -1139,7 +1139,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
11391139
{
11401140
FieldStore *fstore = (FieldStore *) node;
11411141
TupleDesc tupDesc;
1142-
TupleDesc *descp;
1142+
ExprEvalRowtypeCache *rowcachep;
11431143
Datum *values;
11441144
bool *nulls;
11451145
int ncolumns;
@@ -1155,17 +1155,17 @@ ExecInitExprRec(Expr *node, ExprState *state,
11551155
values = (Datum *) palloc(sizeof(Datum) * ncolumns);
11561156
nulls = (bool *) palloc(sizeof(bool) * ncolumns);
11571157

1158-
/* create workspace for runtime tupdesc cache */
1159-
descp = (TupleDesc *) palloc(sizeof(TupleDesc));
1160-
*descp = NULL;
1158+
/* create shared composite-type-lookup cache struct */
1159+
rowcachep = palloc(sizeof(ExprEvalRowtypeCache));
1160+
rowcachep->cacheptr = NULL;
11611161

11621162
/* emit code to evaluate the composite input value */
11631163
ExecInitExprRec(fstore->arg, state, resv, resnull);
11641164

11651165
/* next, deform the input tuple into our workspace */
11661166
scratch.opcode = EEOP_FIELDSTORE_DEFORM;
11671167
scratch.d.fieldstore.fstore = fstore;
1168-
scratch.d.fieldstore.argdesc = descp;
1168+
scratch.d.fieldstore.rowcache = rowcachep;
11691169
scratch.d.fieldstore.values = values;
11701170
scratch.d.fieldstore.nulls = nulls;
11711171
scratch.d.fieldstore.ncolumns = ncolumns;
@@ -1222,7 +1222,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
12221222
/* finally, form result tuple */
12231223
scratch.opcode = EEOP_FIELDSTORE_FORM;
12241224
scratch.d.fieldstore.fstore = fstore;
1225-
scratch.d.fieldstore.argdesc = descp;
1225+
scratch.d.fieldstore.rowcache = rowcachep;
12261226
scratch.d.fieldstore.values = values;
12271227
scratch.d.fieldstore.nulls = nulls;
12281228
scratch.d.fieldstore.ncolumns = ncolumns;
@@ -1368,17 +1368,24 @@ ExecInitExprRec(Expr *node, ExprState *state,
13681368
case T_ConvertRowtypeExpr:
13691369
{
13701370
ConvertRowtypeExpr *convert = (ConvertRowtypeExpr *) node;
1371+
ExprEvalRowtypeCache *rowcachep;
1372+
1373+
/* cache structs must be out-of-line for space reasons */
1374+
rowcachep = palloc(2 * sizeof(ExprEvalRowtypeCache));
1375+
rowcachep[0].cacheptr = NULL;
1376+
rowcachep[1].cacheptr = NULL;
13711377

13721378
/* evaluate argument into step's result area */
13731379
ExecInitExprRec(convert->arg, state, resv, resnull);
13741380

13751381
/* and push conversion step */
13761382
scratch.opcode = EEOP_CONVERT_ROWTYPE;
1377-
scratch.d.convert_rowtype.convert = convert;
1378-
scratch.d.convert_rowtype.indesc = NULL;
1379-
scratch.d.convert_rowtype.outdesc = NULL;
1383+
scratch.d.convert_rowtype.inputtype =
1384+
exprType((Node *) convert->arg);
1385+
scratch.d.convert_rowtype.outputtype = convert->resulttype;
1386+
scratch.d.convert_rowtype.incache = &rowcachep[0];
1387+
scratch.d.convert_rowtype.outcache = &rowcachep[1];
13801388
scratch.d.convert_rowtype.map = NULL;
1381-
scratch.d.convert_rowtype.initialized = false;
13821389

13831390
ExprEvalPushStep(state, &scratch);
13841391
break;
@@ -2013,7 +2020,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
20132020
(int) ntest->nulltesttype);
20142021
}
20152022
/* initialize cache in case it's a row test */
2016-
scratch.d.nulltest_row.argdesc = NULL;
2023+
scratch.d.nulltest_row.rowcache.cacheptr = NULL;
20172024

20182025
/* first evaluate argument into result variable */
20192026
ExecInitExprRec(ntest->arg, state,

src/backend/executor/execExprInterp.c

Lines changed: 89 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ static void ExecInitInterpreter(void);
144144
/* support functions */
145145
static void CheckVarSlotCompatibility(TupleTableSlot *slot, int attnum, Oid vartype);
146146
static TupleDesc get_cached_rowtype(Oid type_id, int32 typmod,
147-
TupleDesc *cache_field, ExprContext *econtext);
148-
static void ShutdownTupleDescRef(Datum arg);
147+
ExprEvalRowtypeCache *rowcache,
148+
bool *changed);
149149
static void ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op,
150150
ExprContext *econtext, bool checkisnull);
151151

@@ -1903,56 +1903,78 @@ CheckVarSlotCompatibility(TupleTableSlot *slot, int attnum, Oid vartype)
19031903
* get_cached_rowtype: utility function to lookup a rowtype tupdesc
19041904
*
19051905
* type_id, typmod: identity of the rowtype
1906-
* cache_field: where to cache the TupleDesc pointer in expression state node
1907-
* (field must be initialized to NULL)
1908-
* econtext: expression context we are executing in
1906+
* rowcache: space for caching identity info
1907+
* (rowcache->cacheptr must be initialized to NULL)
1908+
* changed: if not NULL, *changed is set to true on any update
19091909
*
1910-
* NOTE: because the shutdown callback will be called during plan rescan,
1911-
* must be prepared to re-do this during any node execution; cannot call
1912-
* just once during expression initialization.
1910+
* The returned TupleDesc is not guaranteed pinned; caller must pin it
1911+
* to use it across any operation that might incur cache invalidation.
1912+
* (The TupleDesc is always refcounted, so just use IncrTupleDescRefCount.)
1913+
*
1914+
* NOTE: because composite types can change contents, we must be prepared
1915+
* to re-do this during any node execution; cannot call just once during
1916+
* expression initialization.
19131917
*/
19141918
static TupleDesc
19151919
get_cached_rowtype(Oid type_id, int32 typmod,
1916-
TupleDesc *cache_field, ExprContext *econtext)
1920+
ExprEvalRowtypeCache *rowcache,
1921+
bool *changed)
19171922
{
1918-
TupleDesc tupDesc = *cache_field;
1919-
1920-
/* Do lookup if no cached value or if requested type changed */
1921-
if (tupDesc == NULL ||
1922-
type_id != tupDesc->tdtypeid ||
1923-
typmod != tupDesc->tdtypmod)
1923+
if (type_id != RECORDOID)
19241924
{
1925-
tupDesc = lookup_rowtype_tupdesc(type_id, typmod);
1925+
/*
1926+
* It's a named composite type, so use the regular typcache. Do a
1927+
* lookup first time through, or if the composite type changed. Note:
1928+
* "tupdesc_id == 0" may look redundant, but it protects against the
1929+
* admittedly-theoretical possibility that type_id was RECORDOID the
1930+
* last time through, so that the cacheptr isn't TypeCacheEntry *.
1931+
*/
1932+
TypeCacheEntry *typentry = (TypeCacheEntry *) rowcache->cacheptr;
19261933

1927-
if (*cache_field)
1934+
if (unlikely(typentry == NULL ||
1935+
rowcache->tupdesc_id == 0 ||
1936+
typentry->tupDesc_identifier != rowcache->tupdesc_id))
19281937
{
1929-
/* Release old tupdesc; but callback is already registered */
1930-
ReleaseTupleDesc(*cache_field);
1938+
typentry = lookup_type_cache(type_id, TYPECACHE_TUPDESC);
1939+
if (typentry->tupDesc == NULL)
1940+
ereport(ERROR,
1941+
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1942+
errmsg("type %s is not composite",
1943+
format_type_be(type_id))));
1944+
rowcache->cacheptr = (void *) typentry;
1945+
rowcache->tupdesc_id = typentry->tupDesc_identifier;
1946+
if (changed)
1947+
*changed = true;
19311948
}
1932-
else
1949+
return typentry->tupDesc;
1950+
}
1951+
else
1952+
{
1953+
/*
1954+
* A RECORD type, once registered, doesn't change for the life of the
1955+
* backend. So we don't need a typcache entry as such, which is good
1956+
* because there isn't one. It's possible that the caller is asking
1957+
* about a different type than before, though.
1958+
*/
1959+
TupleDesc tupDesc = (TupleDesc) rowcache->cacheptr;
1960+
1961+
if (unlikely(tupDesc == NULL ||
1962+
rowcache->tupdesc_id != 0 ||
1963+
type_id != tupDesc->tdtypeid ||
1964+
typmod != tupDesc->tdtypmod))
19331965
{
1934-
/* Need to register shutdown callback to release tupdesc */
1935-
RegisterExprContextCallback(econtext,
1936-
ShutdownTupleDescRef,
1937-
PointerGetDatum(cache_field));
1966+
tupDesc = lookup_rowtype_tupdesc(type_id, typmod);
1967+
/* Drop pin acquired by lookup_rowtype_tupdesc */
1968+
ReleaseTupleDesc(tupDesc);
1969+
rowcache->cacheptr = (void *) tupDesc;
1970+
rowcache->tupdesc_id = 0; /* not a valid value for non-RECORD */
1971+
if (changed)
1972+
*changed = true;
19381973
}
1939-
*cache_field = tupDesc;
1974+
return tupDesc;
19401975
}
1941-
return tupDesc;
19421976
}
19431977

1944-
/*
1945-
* Callback function to release a tupdesc refcount at econtext shutdown
1946-
*/
1947-
static void
1948-
ShutdownTupleDescRef(Datum arg)
1949-
{
1950-
TupleDesc *cache_field = (TupleDesc *) DatumGetPointer(arg);
1951-
1952-
if (*cache_field)
1953-
ReleaseTupleDesc(*cache_field);
1954-
*cache_field = NULL;
1955-
}
19561978

19571979
/*
19581980
* Fast-path functions, for very simple expressions
@@ -2473,8 +2495,7 @@ ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op,
24732495

24742496
/* Lookup tupdesc if first time through or if type changes */
24752497
tupDesc = get_cached_rowtype(tupType, tupTypmod,
2476-
&op->d.nulltest_row.argdesc,
2477-
econtext);
2498+
&op->d.nulltest_row.rowcache, NULL);
24782499

24792500
/*
24802501
* heap_attisnull needs a HeapTuple not a bare HeapTupleHeader.
@@ -2910,8 +2931,7 @@ ExecEvalFieldSelect(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
29102931

29112932
/* Lookup tupdesc if first time through or if type changes */
29122933
tupDesc = get_cached_rowtype(tupType, tupTypmod,
2913-
&op->d.fieldselect.argdesc,
2914-
econtext);
2934+
&op->d.fieldselect.rowcache, NULL);
29152935

29162936
/*
29172937
* Find field's attr record. Note we don't support system columns
@@ -2969,9 +2989,9 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte
29692989
{
29702990
TupleDesc tupDesc;
29712991

2972-
/* Lookup tupdesc if first time through or after rescan */
2992+
/* Lookup tupdesc if first time through or if type changes */
29732993
tupDesc = get_cached_rowtype(op->d.fieldstore.fstore->resulttype, -1,
2974-
op->d.fieldstore.argdesc, econtext);
2994+
op->d.fieldstore.rowcache, NULL);
29752995

29762996
/* Check that current tupdesc doesn't have more fields than we allocated */
29772997
if (unlikely(tupDesc->natts > op->d.fieldstore.ncolumns))
@@ -3013,10 +3033,14 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte
30133033
void
30143034
ExecEvalFieldStoreForm(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
30153035
{
3036+
TupleDesc tupDesc;
30163037
HeapTuple tuple;
30173038

3018-
/* argdesc should already be valid from the DeForm step */
3019-
tuple = heap_form_tuple(*op->d.fieldstore.argdesc,
3039+
/* Lookup tupdesc (should be valid already) */
3040+
tupDesc = get_cached_rowtype(op->d.fieldstore.fstore->resulttype, -1,
3041+
op->d.fieldstore.rowcache, NULL);
3042+
3043+
tuple = heap_form_tuple(tupDesc,
30203044
op->d.fieldstore.values,
30213045
op->d.fieldstore.nulls);
30223046

@@ -3227,13 +3251,13 @@ ExecEvalArrayRefAssign(ExprState *state, ExprEvalStep *op)
32273251
void
32283252
ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
32293253
{
3230-
ConvertRowtypeExpr *convert = op->d.convert_rowtype.convert;
32313254
HeapTuple result;
32323255
Datum tupDatum;
32333256
HeapTupleHeader tuple;
32343257
HeapTupleData tmptup;
32353258
TupleDesc indesc,
32363259
outdesc;
3260+
bool changed = false;
32373261

32383262
/* NULL in -> NULL out */
32393263
if (*op->resnull)
@@ -3242,24 +3266,19 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
32423266
tupDatum = *op->resvalue;
32433267
tuple = DatumGetHeapTupleHeader(tupDatum);
32443268

3245-
/* Lookup tupdescs if first time through or after rescan */
3246-
if (op->d.convert_rowtype.indesc == NULL)
3247-
{
3248-
get_cached_rowtype(exprType((Node *) convert->arg), -1,
3249-
&op->d.convert_rowtype.indesc,
3250-
econtext);
3251-
op->d.convert_rowtype.initialized = false;
3252-
}
3253-
if (op->d.convert_rowtype.outdesc == NULL)
3254-
{
3255-
get_cached_rowtype(convert->resulttype, -1,
3256-
&op->d.convert_rowtype.outdesc,
3257-
econtext);
3258-
op->d.convert_rowtype.initialized = false;
3259-
}
3260-
3261-
indesc = op->d.convert_rowtype.indesc;
3262-
outdesc = op->d.convert_rowtype.outdesc;
3269+
/*
3270+
* Lookup tupdescs if first time through or if type changes. We'd better
3271+
* pin them since type conversion functions could do catalog lookups and
3272+
* hence cause cache invalidation.
3273+
*/
3274+
indesc = get_cached_rowtype(op->d.convert_rowtype.inputtype, -1,
3275+
op->d.convert_rowtype.incache,
3276+
&changed);
3277+
IncrTupleDescRefCount(indesc);
3278+
outdesc = get_cached_rowtype(op->d.convert_rowtype.outputtype, -1,
3279+
op->d.convert_rowtype.outcache,
3280+
&changed);
3281+
IncrTupleDescRefCount(outdesc);
32633282

32643283
/*
32653284
* We used to be able to assert that incoming tuples are marked with
@@ -3270,8 +3289,8 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
32703289
Assert(HeapTupleHeaderGetTypeId(tuple) == indesc->tdtypeid ||
32713290
HeapTupleHeaderGetTypeId(tuple) == RECORDOID);
32723291

3273-
/* if first time through, initialize conversion map */
3274-
if (!op->d.convert_rowtype.initialized)
3292+
/* if first time through, or after change, initialize conversion map */
3293+
if (changed)
32753294
{
32763295
MemoryContext old_cxt;
32773296

@@ -3282,7 +3301,6 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
32823301
op->d.convert_rowtype.map =
32833302
convert_tuples_by_name(indesc, outdesc,
32843303
gettext_noop("could not convert row type"));
3285-
op->d.convert_rowtype.initialized = true;
32863304

32873305
MemoryContextSwitchTo(old_cxt);
32883306
}
@@ -3312,6 +3330,9 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
33123330
*/
33133331
*op->resvalue = heap_copy_tuple_as_datum(&tmptup, outdesc);
33143332
}
3333+
3334+
DecrTupleDescRefCount(indesc);
3335+
DecrTupleDescRefCount(outdesc);
33153336
}
33163337

33173338
/*

0 commit comments

Comments
 (0)