Skip to content

Commit 0ea9d40

Browse files
committed
For inplace update, send nontransactional invalidations.
The inplace update survives ROLLBACK. The inval didn't, so another backend's DDL could then update the row without incorporating the inplace update. In the test this fixes, a mix of CREATE INDEX and ALTER TABLE resulted in a table with an index, yet relhasindex=f. That is a source of index corruption. Back-patch to v12 (all supported versions). The back branch versions don't change WAL, because those branches just added end-of-recovery SIResetAll(). All branches change the ABI of extern function PrepareToInvalidateCacheTuple(). No PGXN extension calls that, and there's no apparent use case in extensions. Reviewed by Nitin Motiani and (in earlier versions) Andres Freund. Discussion: https://postgr.es/m/20240523000548.58.nmisch@google.com
1 parent 67f30c7 commit 0ea9d40

File tree

11 files changed

+289
-117
lines changed

11 files changed

+289
-117
lines changed

src/backend/access/heap/heapam.c

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6111,6 +6111,24 @@ heap_inplace_update_and_unlock(Relation relation,
61116111
if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
61126112
elog(ERROR, "wrong tuple length");
61136113

6114+
/*
6115+
* Construct shared cache inval if necessary. Note that because we only
6116+
* pass the new version of the tuple, this mustn't be used for any
6117+
* operations that could change catcache lookup keys. But we aren't
6118+
* bothering with index updates either, so that's true a fortiori.
6119+
*/
6120+
CacheInvalidateHeapTupleInplace(relation, tuple, NULL);
6121+
6122+
/*
6123+
* Unlink relcache init files as needed. If unlinking, acquire
6124+
* RelCacheInitLock until after associated invalidations. By doing this
6125+
* in advance, if we checkpoint and then crash between inplace
6126+
* XLogInsert() and inval, we don't rely on StartupXLOG() ->
6127+
* RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6128+
* neglect to PANIC on EIO.
6129+
*/
6130+
PreInplace_Inval();
6131+
61146132
/* NO EREPORT(ERROR) from here till changes are logged */
61156133
START_CRIT_SECTION();
61166134

@@ -6154,17 +6172,28 @@ heap_inplace_update_and_unlock(Relation relation,
61546172
PageSetLSN(BufferGetPage(buffer), recptr);
61556173
}
61566174

6175+
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6176+
6177+
/*
6178+
* Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6179+
* do this before UnlockTuple().
6180+
*
6181+
* If we're mutating a tuple visible only to this transaction, there's an
6182+
* equivalent transactional inval from the action that created the tuple,
6183+
* and this inval is superfluous.
6184+
*/
6185+
AtInplace_Inval();
6186+
61576187
END_CRIT_SECTION();
6188+
UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
61586189

6159-
heap_inplace_unlock(relation, oldtup, buffer);
6190+
AcceptInvalidationMessages(); /* local processing of just-sent inval */
61606191

61616192
/*
6162-
* Send out shared cache inval if necessary. Note that because we only
6163-
* pass the new version of the tuple, this mustn't be used for any
6164-
* operations that could change catcache lookup keys. But we aren't
6165-
* bothering with index updates either, so that's true a fortiori.
6166-
*
6167-
* XXX ROLLBACK discards the invalidation. See test inplace-inval.spec.
6193+
* Queue a transactional inval. The immediate invalidation we just sent
6194+
* is the only one known to be necessary. To reduce risk from the
6195+
* transition to immediate invalidation, continue sending a transactional
6196+
* invalidation like we've long done. Third-party code might rely on it.
61686197
*/
61696198
if (!IsBootstrapProcessingMode())
61706199
CacheInvalidateHeapTuple(relation, tuple, NULL);

src/backend/access/transam/xact.c

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,14 +1249,24 @@ RecordTransactionCommit(void)
12491249

12501250
/*
12511251
* Transactions without an assigned xid can contain invalidation
1252-
* messages (e.g. explicit relcache invalidations or catcache
1253-
* invalidations for inplace updates); standbys need to process those.
1254-
* We can't emit a commit record without an xid, and we don't want to
1255-
* force assigning an xid, because that'd be problematic for e.g.
1256-
* vacuum. Hence we emit a bespoke record for the invalidations. We
1257-
* don't want to use that in case a commit record is emitted, so they
1258-
* happen synchronously with commits (besides not wanting to emit more
1259-
* WAL records).
1252+
* messages. While inplace updates do this, this is not known to be
1253+
* necessary; see comment at inplace CacheInvalidateHeapTuple().
1254+
* Extensions might still rely on this capability, and standbys may
1255+
* need to process those invals. We can't emit a commit record
1256+
* without an xid, and we don't want to force assigning an xid,
1257+
* because that'd be problematic for e.g. vacuum. Hence we emit a
1258+
* bespoke record for the invalidations. We don't want to use that in
1259+
* case a commit record is emitted, so they happen synchronously with
1260+
* commits (besides not wanting to emit more WAL records).
1261+
*
1262+
* XXX Every known use of this capability is a defect. Since an XID
1263+
* isn't controlling visibility of the change that prompted invals,
1264+
* other sessions need the inval even if this transactions aborts.
1265+
*
1266+
* ON COMMIT DELETE ROWS does a nontransactional index_build(), which
1267+
* queues a relcache inval, including in transactions without an xid
1268+
* that had read the (empty) table. Standbys don't need any ON COMMIT
1269+
* DELETE ROWS invals, but we've not done the work to withhold them.
12601270
*/
12611271
if (nmsgs != 0)
12621272
{

src/backend/catalog/index.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2882,12 +2882,19 @@ index_update_stats(Relation rel,
28822882
if (dirty)
28832883
{
28842884
systable_inplace_update_finish(state, tuple);
2885-
/* the above sends a cache inval message */
2885+
/* the above sends transactional and immediate cache inval messages */
28862886
}
28872887
else
28882888
{
28892889
systable_inplace_update_cancel(state);
2890-
/* no need to change tuple, but force relcache inval anyway */
2890+
2891+
/*
2892+
* While we didn't change relhasindex, CREATE INDEX needs a
2893+
* transactional inval for when the new index's catalog rows become
2894+
* visible. Other CREATE INDEX and REINDEX code happens to also queue
2895+
* this inval, but keep this in case rare callers rely on this part of
2896+
* our API contract.
2897+
*/
28912898
CacheInvalidateRelcacheByTuple(tuple);
28922899
}
28932900

src/backend/replication/logical/decode.c

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -460,23 +460,19 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
460460

461461
/*
462462
* Inplace updates are only ever performed on catalog tuples and
463-
* can, per definition, not change tuple visibility. Since we
464-
* don't decode catalog tuples, we're not interested in the
465-
* record's contents.
463+
* can, per definition, not change tuple visibility. Inplace
464+
* updates don't affect storage or interpretation of table rows,
465+
* so they don't affect logicalrep_write_tuple() outcomes. Hence,
466+
* we don't process invalidations from the original operation. If
467+
* inplace updates did affect those things, invalidations wouldn't
468+
* make it work, since there are no snapshot-specific versions of
469+
* inplace-updated values. Since we also don't decode catalog
470+
* tuples, we're not interested in the record's contents.
466471
*
467-
* In-place updates can be used either by XID-bearing transactions
468-
* (e.g. in CREATE INDEX CONCURRENTLY) or by XID-less
469-
* transactions (e.g. VACUUM). In the former case, the commit
470-
* record will include cache invalidations, so we mark the
471-
* transaction as catalog modifying here. Currently that's
472-
* redundant because the commit will do that as well, but once we
473-
* support decoding in-progress relations, this will be important.
472+
* WAL contains likely-unnecessary commit-time invals from the
473+
* CacheInvalidateHeapTuple() call in heap_inplace_update().
474+
* Excess invalidation is safe.
474475
*/
475-
if (!TransactionIdIsValid(xid))
476-
break;
477-
478-
SnapBuildProcessChange(builder, xid, buf->origptr);
479-
ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
480476
break;
481477

482478
case XLOG_HEAP_CONFIRM:

src/backend/utils/cache/catcache.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2129,7 +2129,8 @@ void
21292129
PrepareToInvalidateCacheTuple(Relation relation,
21302130
HeapTuple tuple,
21312131
HeapTuple newtuple,
2132-
void (*function) (int, uint32, Oid))
2132+
void (*function) (int, uint32, Oid, void *),
2133+
void *context)
21332134
{
21342135
slist_iter iter;
21352136
Oid reloid;
@@ -2170,7 +2171,7 @@ PrepareToInvalidateCacheTuple(Relation relation,
21702171
hashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, tuple);
21712172
dbid = ccp->cc_relisshared ? (Oid) 0 : MyDatabaseId;
21722173

2173-
(*function) (ccp->id, hashvalue, dbid);
2174+
(*function) (ccp->id, hashvalue, dbid, context);
21742175

21752176
if (newtuple)
21762177
{
@@ -2179,7 +2180,7 @@ PrepareToInvalidateCacheTuple(Relation relation,
21792180
newhashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, newtuple);
21802181

21812182
if (newhashvalue != hashvalue)
2182-
(*function) (ccp->id, newhashvalue, dbid);
2183+
(*function) (ccp->id, newhashvalue, dbid, context);
21832184
}
21842185
}
21852186
}

0 commit comments

Comments
 (0)