Skip to content

Commit 95c5acb

Browse files
committed
For inplace update, send nontransactional invalidations.
The inplace update survives ROLLBACK. The inval didn't, so another backend's DDL could then update the row without incorporating the inplace update. In the test this fixes, a mix of CREATE INDEX and ALTER TABLE resulted in a table with an index, yet relhasindex=f. That is a source of index corruption. Back-patch to v12 (all supported versions). The back branch versions don't change WAL, because those branches just added end-of-recovery SIResetAll(). All branches change the ABI of extern function PrepareToInvalidateCacheTuple(). No PGXN extension calls that, and there's no apparent use case in extensions. Reviewed by Nitin Motiani and (in earlier versions) Andres Freund. Discussion: https://postgr.es/m/20240523000548.58.nmisch@google.com
1 parent a4668c9 commit 95c5acb

File tree

13 files changed

+320
-137
lines changed

13 files changed

+320
-137
lines changed

src/backend/access/heap/heapam.c

+36-7
Original file line numberDiff line numberDiff line change
@@ -6351,6 +6351,24 @@ heap_inplace_update_and_unlock(Relation relation,
63516351
if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
63526352
elog(ERROR, "wrong tuple length");
63536353

6354+
/*
6355+
* Construct shared cache inval if necessary. Note that because we only
6356+
* pass the new version of the tuple, this mustn't be used for any
6357+
* operations that could change catcache lookup keys. But we aren't
6358+
* bothering with index updates either, so that's true a fortiori.
6359+
*/
6360+
CacheInvalidateHeapTupleInplace(relation, tuple, NULL);
6361+
6362+
/*
6363+
* Unlink relcache init files as needed. If unlinking, acquire
6364+
* RelCacheInitLock until after associated invalidations. By doing this
6365+
* in advance, if we checkpoint and then crash between inplace
6366+
* XLogInsert() and inval, we don't rely on StartupXLOG() ->
6367+
* RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6368+
* neglect to PANIC on EIO.
6369+
*/
6370+
PreInplace_Inval();
6371+
63546372
/* NO EREPORT(ERROR) from here till changes are logged */
63556373
START_CRIT_SECTION();
63566374

@@ -6394,17 +6412,28 @@ heap_inplace_update_and_unlock(Relation relation,
63946412
PageSetLSN(BufferGetPage(buffer), recptr);
63956413
}
63966414

6415+
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6416+
6417+
/*
6418+
* Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6419+
* do this before UnlockTuple().
6420+
*
6421+
* If we're mutating a tuple visible only to this transaction, there's an
6422+
* equivalent transactional inval from the action that created the tuple,
6423+
* and this inval is superfluous.
6424+
*/
6425+
AtInplace_Inval();
6426+
63976427
END_CRIT_SECTION();
6428+
UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
63986429

6399-
heap_inplace_unlock(relation, oldtup, buffer);
6430+
AcceptInvalidationMessages(); /* local processing of just-sent inval */
64006431

64016432
/*
6402-
* Send out shared cache inval if necessary. Note that because we only
6403-
* pass the new version of the tuple, this mustn't be used for any
6404-
* operations that could change catcache lookup keys. But we aren't
6405-
* bothering with index updates either, so that's true a fortiori.
6406-
*
6407-
* XXX ROLLBACK discards the invalidation. See test inplace-inval.spec.
6433+
* Queue a transactional inval. The immediate invalidation we just sent
6434+
* is the only one known to be necessary. To reduce risk from the
6435+
* transition to immediate invalidation, continue sending a transactional
6436+
* invalidation like we've long done. Third-party code might rely on it.
64086437
*/
64096438
if (!IsBootstrapProcessingMode())
64106439
CacheInvalidateHeapTuple(relation, tuple, NULL);

src/backend/access/transam/xact.c

+18-8
Original file line numberDiff line numberDiff line change
@@ -1358,14 +1358,24 @@ RecordTransactionCommit(void)
13581358

13591359
/*
13601360
* Transactions without an assigned xid can contain invalidation
1361-
* messages (e.g. explicit relcache invalidations or catcache
1362-
* invalidations for inplace updates); standbys need to process those.
1363-
* We can't emit a commit record without an xid, and we don't want to
1364-
* force assigning an xid, because that'd be problematic for e.g.
1365-
* vacuum. Hence we emit a bespoke record for the invalidations. We
1366-
* don't want to use that in case a commit record is emitted, so they
1367-
* happen synchronously with commits (besides not wanting to emit more
1368-
* WAL records).
1361+
* messages. While inplace updates do this, this is not known to be
1362+
* necessary; see comment at inplace CacheInvalidateHeapTuple().
1363+
* Extensions might still rely on this capability, and standbys may
1364+
* need to process those invals. We can't emit a commit record
1365+
* without an xid, and we don't want to force assigning an xid,
1366+
* because that'd be problematic for e.g. vacuum. Hence we emit a
1367+
* bespoke record for the invalidations. We don't want to use that in
1368+
* case a commit record is emitted, so they happen synchronously with
1369+
* commits (besides not wanting to emit more WAL records).
1370+
*
1371+
* XXX Every known use of this capability is a defect. Since an XID
1372+
* isn't controlling visibility of the change that prompted invals,
1373+
* other sessions need the inval even if this transactions aborts.
1374+
*
1375+
* ON COMMIT DELETE ROWS does a nontransactional index_build(), which
1376+
* queues a relcache inval, including in transactions without an xid
1377+
* that had read the (empty) table. Standbys don't need any ON COMMIT
1378+
* DELETE ROWS invals, but we've not done the work to withhold them.
13691379
*/
13701380
if (nmsgs != 0)
13711381
{

src/backend/catalog/index.c

+9-2
Original file line numberDiff line numberDiff line change
@@ -2889,12 +2889,19 @@ index_update_stats(Relation rel,
28892889
if (dirty)
28902890
{
28912891
systable_inplace_update_finish(state, tuple);
2892-
/* the above sends a cache inval message */
2892+
/* the above sends transactional and immediate cache inval messages */
28932893
}
28942894
else
28952895
{
28962896
systable_inplace_update_cancel(state);
2897-
/* no need to change tuple, but force relcache inval anyway */
2897+
2898+
/*
2899+
* While we didn't change relhasindex, CREATE INDEX needs a
2900+
* transactional inval for when the new index's catalog rows become
2901+
* visible. Other CREATE INDEX and REINDEX code happens to also queue
2902+
* this inval, but keep this in case rare callers rely on this part of
2903+
* our API contract.
2904+
*/
28982905
CacheInvalidateRelcacheByTuple(tuple);
28992906
}
29002907

src/backend/commands/event_trigger.c

-5
Original file line numberDiff line numberDiff line change
@@ -975,11 +975,6 @@ EventTriggerOnLogin(void)
975975
* this instead of regular updates serves two purposes. First,
976976
* that avoids possible waiting on the row-level lock. Second,
977977
* that avoids dealing with TOAST.
978-
*
979-
* Changes made by inplace update may be lost due to
980-
* concurrent normal updates; see inplace-inval.spec. However,
981-
* we are OK with that. The subsequent connections will still
982-
* have a chance to set "dathasloginevt" to false.
983978
*/
984979
systable_inplace_update_finish(state, tuple);
985980
}

src/backend/replication/logical/decode.c

+11-15
Original file line numberDiff line numberDiff line change
@@ -508,23 +508,19 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
508508

509509
/*
510510
* Inplace updates are only ever performed on catalog tuples and
511-
* can, per definition, not change tuple visibility. Since we
512-
* don't decode catalog tuples, we're not interested in the
513-
* record's contents.
511+
* can, per definition, not change tuple visibility. Inplace
512+
* updates don't affect storage or interpretation of table rows,
513+
* so they don't affect logicalrep_write_tuple() outcomes. Hence,
514+
* we don't process invalidations from the original operation. If
515+
* inplace updates did affect those things, invalidations wouldn't
516+
* make it work, since there are no snapshot-specific versions of
517+
* inplace-updated values. Since we also don't decode catalog
518+
* tuples, we're not interested in the record's contents.
514519
*
515-
* In-place updates can be used either by XID-bearing transactions
516-
* (e.g. in CREATE INDEX CONCURRENTLY) or by XID-less
517-
* transactions (e.g. VACUUM). In the former case, the commit
518-
* record will include cache invalidations, so we mark the
519-
* transaction as catalog modifying here. Currently that's
520-
* redundant because the commit will do that as well, but once we
521-
* support decoding in-progress relations, this will be important.
520+
* WAL contains likely-unnecessary commit-time invals from the
521+
* CacheInvalidateHeapTuple() call in heap_inplace_update().
522+
* Excess invalidation is safe.
522523
*/
523-
if (!TransactionIdIsValid(xid))
524-
break;
525-
526-
(void) SnapBuildProcessChange(builder, xid, buf->origptr);
527-
ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
528524
break;
529525

530526
case XLOG_HEAP_CONFIRM:

src/backend/utils/cache/catcache.c

+4-3
Original file line numberDiff line numberDiff line change
@@ -2288,7 +2288,8 @@ void
22882288
PrepareToInvalidateCacheTuple(Relation relation,
22892289
HeapTuple tuple,
22902290
HeapTuple newtuple,
2291-
void (*function) (int, uint32, Oid))
2291+
void (*function) (int, uint32, Oid, void *),
2292+
void *context)
22922293
{
22932294
slist_iter iter;
22942295
Oid reloid;
@@ -2329,7 +2330,7 @@ PrepareToInvalidateCacheTuple(Relation relation,
23292330
hashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, tuple);
23302331
dbid = ccp->cc_relisshared ? (Oid) 0 : MyDatabaseId;
23312332

2332-
(*function) (ccp->id, hashvalue, dbid);
2333+
(*function) (ccp->id, hashvalue, dbid, context);
23332334

23342335
if (newtuple)
23352336
{
@@ -2338,7 +2339,7 @@ PrepareToInvalidateCacheTuple(Relation relation,
23382339
newhashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, newtuple);
23392340

23402341
if (newhashvalue != hashvalue)
2341-
(*function) (ccp->id, newhashvalue, dbid);
2342+
(*function) (ccp->id, newhashvalue, dbid, context);
23422343
}
23432344
}
23442345
}

0 commit comments

Comments
 (0)