Skip to content

Commit c2db458

Browse files
committed
Redesign the caching done by get_cached_rowtype().
Previously, get_cached_rowtype() cached a pointer to a reference-counted tuple descriptor from the typcache, relying on the ExprContextCallback mechanism to release the tupdesc refcount when the expression tree using the tupdesc was destroyed. This worked fine when it was designed, but the introduction of within-DO-block COMMITs broke it. The refcount is logged in a transaction-lifespan resource owner, but plpgsql won't destroy simple expressions made within the DO block (before its first commit) until the DO block is exited. That results in a warning about a leaked tupdesc refcount when the COMMIT destroys the original resource owner, and then an error about the active resource owner not holding a matching refcount when the expression is destroyed. To fix, get rid of the need to have a shutdown callback at all, by instead caching a pointer to the relevant typcache entry. Those survive for the life of the backend, so we needn't worry about the pointer becoming stale. (For registered RECORD types, we can still cache a pointer to the tupdesc, knowing that it won't change for the life of the backend.) This mechanism has been in use in plpgsql and expandedrecord.c since commit 4b93f57, and seems to work well. This change requires modifying the ExprEvalStep structs used by the relevant expression step types, which is slightly worrisome for back-patching. However, there seems no good reason for extensions to be familiar with the details of these particular sub-structs. Per report from Rohit Bhogate. Back-patch to v11 where within-DO-block COMMITs became a thing. Discussion: https://postgr.es/m/CAAV6ZkQRCVBh8qAY+SZiHnz+U+FqAGBBDaDTjF2yiKa2nJSLKg@mail.gmail.com
1 parent 34f581c commit c2db458

File tree

5 files changed

+180
-94
lines changed

5 files changed

+180
-94
lines changed

src/backend/executor/execExpr.c

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,7 +1375,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
13751375
scratch.opcode = EEOP_FIELDSELECT;
13761376
scratch.d.fieldselect.fieldnum = fselect->fieldnum;
13771377
scratch.d.fieldselect.resulttype = fselect->resulttype;
1378-
scratch.d.fieldselect.argdesc = NULL;
1378+
scratch.d.fieldselect.rowcache.cacheptr = NULL;
13791379

13801380
ExprEvalPushStep(state, &scratch);
13811381
break;
@@ -1385,7 +1385,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
13851385
{
13861386
FieldStore *fstore = (FieldStore *) node;
13871387
TupleDesc tupDesc;
1388-
TupleDesc *descp;
1388+
ExprEvalRowtypeCache *rowcachep;
13891389
Datum *values;
13901390
bool *nulls;
13911391
int ncolumns;
@@ -1401,17 +1401,17 @@ ExecInitExprRec(Expr *node, ExprState *state,
14011401
values = (Datum *) palloc(sizeof(Datum) * ncolumns);
14021402
nulls = (bool *) palloc(sizeof(bool) * ncolumns);
14031403

1404-
/* create workspace for runtime tupdesc cache */
1405-
descp = (TupleDesc *) palloc(sizeof(TupleDesc));
1406-
*descp = NULL;
1404+
/* create shared composite-type-lookup cache struct */
1405+
rowcachep = palloc(sizeof(ExprEvalRowtypeCache));
1406+
rowcachep->cacheptr = NULL;
14071407

14081408
/* emit code to evaluate the composite input value */
14091409
ExecInitExprRec(fstore->arg, state, resv, resnull);
14101410

14111411
/* next, deform the input tuple into our workspace */
14121412
scratch.opcode = EEOP_FIELDSTORE_DEFORM;
14131413
scratch.d.fieldstore.fstore = fstore;
1414-
scratch.d.fieldstore.argdesc = descp;
1414+
scratch.d.fieldstore.rowcache = rowcachep;
14151415
scratch.d.fieldstore.values = values;
14161416
scratch.d.fieldstore.nulls = nulls;
14171417
scratch.d.fieldstore.ncolumns = ncolumns;
@@ -1469,7 +1469,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
14691469
/* finally, form result tuple */
14701470
scratch.opcode = EEOP_FIELDSTORE_FORM;
14711471
scratch.d.fieldstore.fstore = fstore;
1472-
scratch.d.fieldstore.argdesc = descp;
1472+
scratch.d.fieldstore.rowcache = rowcachep;
14731473
scratch.d.fieldstore.values = values;
14741474
scratch.d.fieldstore.nulls = nulls;
14751475
scratch.d.fieldstore.ncolumns = ncolumns;
@@ -1615,17 +1615,24 @@ ExecInitExprRec(Expr *node, ExprState *state,
16151615
case T_ConvertRowtypeExpr:
16161616
{
16171617
ConvertRowtypeExpr *convert = (ConvertRowtypeExpr *) node;
1618+
ExprEvalRowtypeCache *rowcachep;
1619+
1620+
/* cache structs must be out-of-line for space reasons */
1621+
rowcachep = palloc(2 * sizeof(ExprEvalRowtypeCache));
1622+
rowcachep[0].cacheptr = NULL;
1623+
rowcachep[1].cacheptr = NULL;
16181624

16191625
/* evaluate argument into step's result area */
16201626
ExecInitExprRec(convert->arg, state, resv, resnull);
16211627

16221628
/* and push conversion step */
16231629
scratch.opcode = EEOP_CONVERT_ROWTYPE;
1624-
scratch.d.convert_rowtype.convert = convert;
1625-
scratch.d.convert_rowtype.indesc = NULL;
1626-
scratch.d.convert_rowtype.outdesc = NULL;
1630+
scratch.d.convert_rowtype.inputtype =
1631+
exprType((Node *) convert->arg);
1632+
scratch.d.convert_rowtype.outputtype = convert->resulttype;
1633+
scratch.d.convert_rowtype.incache = &rowcachep[0];
1634+
scratch.d.convert_rowtype.outcache = &rowcachep[1];
16271635
scratch.d.convert_rowtype.map = NULL;
1628-
scratch.d.convert_rowtype.initialized = false;
16291636

16301637
ExprEvalPushStep(state, &scratch);
16311638
break;
@@ -2250,7 +2257,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
22502257
(int) ntest->nulltesttype);
22512258
}
22522259
/* initialize cache in case it's a row test */
2253-
scratch.d.nulltest_row.argdesc = NULL;
2260+
scratch.d.nulltest_row.rowcache.cacheptr = NULL;
22542261

22552262
/* first evaluate argument into result variable */
22562263
ExecInitExprRec(ntest->arg, state,

src/backend/executor/execExprInterp.c

Lines changed: 92 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ static void ExecInitInterpreter(void);
145145
static void CheckVarSlotCompatibility(TupleTableSlot *slot, int attnum, Oid vartype);
146146
static void CheckOpSlotCompatibility(ExprEvalStep *op, TupleTableSlot *slot);
147147
static TupleDesc get_cached_rowtype(Oid type_id, int32 typmod,
148-
TupleDesc *cache_field, ExprContext *econtext);
149-
static void ShutdownTupleDescRef(Datum arg);
148+
ExprEvalRowtypeCache *rowcache,
149+
bool *changed);
150150
static void ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op,
151151
ExprContext *econtext, bool checkisnull);
152152

@@ -1959,56 +1959,78 @@ CheckOpSlotCompatibility(ExprEvalStep *op, TupleTableSlot *slot)
19591959
* get_cached_rowtype: utility function to lookup a rowtype tupdesc
19601960
*
19611961
* type_id, typmod: identity of the rowtype
1962-
* cache_field: where to cache the TupleDesc pointer in expression state node
1963-
* (field must be initialized to NULL)
1964-
* econtext: expression context we are executing in
1962+
* rowcache: space for caching identity info
1963+
* (rowcache->cacheptr must be initialized to NULL)
1964+
* changed: if not NULL, *changed is set to true on any update
19651965
*
1966-
* NOTE: because the shutdown callback will be called during plan rescan,
1967-
* must be prepared to re-do this during any node execution; cannot call
1968-
* just once during expression initialization.
1966+
* The returned TupleDesc is not guaranteed pinned; caller must pin it
1967+
* to use it across any operation that might incur cache invalidation.
1968+
* (The TupleDesc is always refcounted, so just use IncrTupleDescRefCount.)
1969+
*
1970+
* NOTE: because composite types can change contents, we must be prepared
1971+
* to re-do this during any node execution; cannot call just once during
1972+
* expression initialization.
19691973
*/
19701974
static TupleDesc
19711975
get_cached_rowtype(Oid type_id, int32 typmod,
1972-
TupleDesc *cache_field, ExprContext *econtext)
1976+
ExprEvalRowtypeCache *rowcache,
1977+
bool *changed)
19731978
{
1974-
TupleDesc tupDesc = *cache_field;
1975-
1976-
/* Do lookup if no cached value or if requested type changed */
1977-
if (tupDesc == NULL ||
1978-
type_id != tupDesc->tdtypeid ||
1979-
typmod != tupDesc->tdtypmod)
1979+
if (type_id != RECORDOID)
19801980
{
1981-
tupDesc = lookup_rowtype_tupdesc(type_id, typmod);
1981+
/*
1982+
* It's a named composite type, so use the regular typcache. Do a
1983+
* lookup first time through, or if the composite type changed. Note:
1984+
* "tupdesc_id == 0" may look redundant, but it protects against the
1985+
* admittedly-theoretical possibility that type_id was RECORDOID the
1986+
* last time through, so that the cacheptr isn't TypeCacheEntry *.
1987+
*/
1988+
TypeCacheEntry *typentry = (TypeCacheEntry *) rowcache->cacheptr;
19821989

1983-
if (*cache_field)
1990+
if (unlikely(typentry == NULL ||
1991+
rowcache->tupdesc_id == 0 ||
1992+
typentry->tupDesc_identifier != rowcache->tupdesc_id))
19841993
{
1985-
/* Release old tupdesc; but callback is already registered */
1986-
ReleaseTupleDesc(*cache_field);
1987-
}
1988-
else
1989-
{
1990-
/* Need to register shutdown callback to release tupdesc */
1991-
RegisterExprContextCallback(econtext,
1992-
ShutdownTupleDescRef,
1993-
PointerGetDatum(cache_field));
1994-
}
1995-
*cache_field = tupDesc;
1994+
typentry = lookup_type_cache(type_id, TYPECACHE_TUPDESC);
1995+
if (typentry->tupDesc == NULL)
1996+
ereport(ERROR,
1997+
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1998+
errmsg("type %s is not composite",
1999+
format_type_be(type_id))));
2000+
rowcache->cacheptr = (void *) typentry;
2001+
rowcache->tupdesc_id = typentry->tupDesc_identifier;
2002+
if (changed)
2003+
*changed = true;
2004+
}
2005+
return typentry->tupDesc;
2006+
}
2007+
else
2008+
{
2009+
/*
2010+
* A RECORD type, once registered, doesn't change for the life of the
2011+
* backend. So we don't need a typcache entry as such, which is good
2012+
* because there isn't one. It's possible that the caller is asking
2013+
* about a different type than before, though.
2014+
*/
2015+
TupleDesc tupDesc = (TupleDesc) rowcache->cacheptr;
2016+
2017+
if (unlikely(tupDesc == NULL ||
2018+
rowcache->tupdesc_id != 0 ||
2019+
type_id != tupDesc->tdtypeid ||
2020+
typmod != tupDesc->tdtypmod))
2021+
{
2022+
tupDesc = lookup_rowtype_tupdesc(type_id, typmod);
2023+
/* Drop pin acquired by lookup_rowtype_tupdesc */
2024+
ReleaseTupleDesc(tupDesc);
2025+
rowcache->cacheptr = (void *) tupDesc;
2026+
rowcache->tupdesc_id = 0; /* not a valid value for non-RECORD */
2027+
if (changed)
2028+
*changed = true;
2029+
}
2030+
return tupDesc;
19962031
}
1997-
return tupDesc;
19982032
}
19992033

2000-
/*
2001-
* Callback function to release a tupdesc refcount at econtext shutdown
2002-
*/
2003-
static void
2004-
ShutdownTupleDescRef(Datum arg)
2005-
{
2006-
TupleDesc *cache_field = (TupleDesc *) DatumGetPointer(arg);
2007-
2008-
if (*cache_field)
2009-
ReleaseTupleDesc(*cache_field);
2010-
*cache_field = NULL;
2011-
}
20122034

20132035
/*
20142036
* Fast-path functions, for very simple expressions
@@ -2600,8 +2622,7 @@ ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op,
26002622

26012623
/* Lookup tupdesc if first time through or if type changes */
26022624
tupDesc = get_cached_rowtype(tupType, tupTypmod,
2603-
&op->d.nulltest_row.argdesc,
2604-
econtext);
2625+
&op->d.nulltest_row.rowcache, NULL);
26052626

26062627
/*
26072628
* heap_attisnull needs a HeapTuple not a bare HeapTupleHeader.
@@ -3034,8 +3055,7 @@ ExecEvalFieldSelect(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
30343055

30353056
/* Lookup tupdesc if first time through or if type changes */
30363057
tupDesc = get_cached_rowtype(tupType, tupTypmod,
3037-
&op->d.fieldselect.argdesc,
3038-
econtext);
3058+
&op->d.fieldselect.rowcache, NULL);
30393059

30403060
/*
30413061
* Find field's attr record. Note we don't support system columns
@@ -3093,9 +3113,9 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte
30933113
{
30943114
TupleDesc tupDesc;
30953115

3096-
/* Lookup tupdesc if first time through or after rescan */
3116+
/* Lookup tupdesc if first time through or if type changes */
30973117
tupDesc = get_cached_rowtype(op->d.fieldstore.fstore->resulttype, -1,
3098-
op->d.fieldstore.argdesc, econtext);
3118+
op->d.fieldstore.rowcache, NULL);
30993119

31003120
/* Check that current tupdesc doesn't have more fields than we allocated */
31013121
if (unlikely(tupDesc->natts > op->d.fieldstore.ncolumns))
@@ -3137,10 +3157,14 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte
31373157
void
31383158
ExecEvalFieldStoreForm(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
31393159
{
3160+
TupleDesc tupDesc;
31403161
HeapTuple tuple;
31413162

3142-
/* argdesc should already be valid from the DeForm step */
3143-
tuple = heap_form_tuple(*op->d.fieldstore.argdesc,
3163+
/* Lookup tupdesc (should be valid already) */
3164+
tupDesc = get_cached_rowtype(op->d.fieldstore.fstore->resulttype, -1,
3165+
op->d.fieldstore.rowcache, NULL);
3166+
3167+
tuple = heap_form_tuple(tupDesc,
31443168
op->d.fieldstore.values,
31453169
op->d.fieldstore.nulls);
31463170

@@ -3157,13 +3181,13 @@ ExecEvalFieldStoreForm(ExprState *state, ExprEvalStep *op, ExprContext *econtext
31573181
void
31583182
ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
31593183
{
3160-
ConvertRowtypeExpr *convert = op->d.convert_rowtype.convert;
31613184
HeapTuple result;
31623185
Datum tupDatum;
31633186
HeapTupleHeader tuple;
31643187
HeapTupleData tmptup;
31653188
TupleDesc indesc,
31663189
outdesc;
3190+
bool changed = false;
31673191

31683192
/* NULL in -> NULL out */
31693193
if (*op->resnull)
@@ -3172,24 +3196,19 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
31723196
tupDatum = *op->resvalue;
31733197
tuple = DatumGetHeapTupleHeader(tupDatum);
31743198

3175-
/* Lookup tupdescs if first time through or after rescan */
3176-
if (op->d.convert_rowtype.indesc == NULL)
3177-
{
3178-
get_cached_rowtype(exprType((Node *) convert->arg), -1,
3179-
&op->d.convert_rowtype.indesc,
3180-
econtext);
3181-
op->d.convert_rowtype.initialized = false;
3182-
}
3183-
if (op->d.convert_rowtype.outdesc == NULL)
3184-
{
3185-
get_cached_rowtype(convert->resulttype, -1,
3186-
&op->d.convert_rowtype.outdesc,
3187-
econtext);
3188-
op->d.convert_rowtype.initialized = false;
3189-
}
3190-
3191-
indesc = op->d.convert_rowtype.indesc;
3192-
outdesc = op->d.convert_rowtype.outdesc;
3199+
/*
3200+
* Lookup tupdescs if first time through or if type changes. We'd better
3201+
* pin them since type conversion functions could do catalog lookups and
3202+
* hence cause cache invalidation.
3203+
*/
3204+
indesc = get_cached_rowtype(op->d.convert_rowtype.inputtype, -1,
3205+
op->d.convert_rowtype.incache,
3206+
&changed);
3207+
IncrTupleDescRefCount(indesc);
3208+
outdesc = get_cached_rowtype(op->d.convert_rowtype.outputtype, -1,
3209+
op->d.convert_rowtype.outcache,
3210+
&changed);
3211+
IncrTupleDescRefCount(outdesc);
31933212

31943213
/*
31953214
* We used to be able to assert that incoming tuples are marked with
@@ -3200,8 +3219,8 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
32003219
Assert(HeapTupleHeaderGetTypeId(tuple) == indesc->tdtypeid ||
32013220
HeapTupleHeaderGetTypeId(tuple) == RECORDOID);
32023221

3203-
/* if first time through, initialize conversion map */
3204-
if (!op->d.convert_rowtype.initialized)
3222+
/* if first time through, or after change, initialize conversion map */
3223+
if (changed)
32053224
{
32063225
MemoryContext old_cxt;
32073226

@@ -3210,7 +3229,6 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
32103229

32113230
/* prepare map from old to new attribute numbers */
32123231
op->d.convert_rowtype.map = convert_tuples_by_name(indesc, outdesc);
3213-
op->d.convert_rowtype.initialized = true;
32143232

32153233
MemoryContextSwitchTo(old_cxt);
32163234
}
@@ -3240,6 +3258,9 @@ ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext
32403258
*/
32413259
*op->resvalue = heap_copy_tuple_as_datum(&tmptup, outdesc);
32423260
}
3261+
3262+
DecrTupleDescRefCount(indesc);
3263+
DecrTupleDescRefCount(outdesc);
32433264
}
32443265

32453266
/*

0 commit comments

Comments
 (0)