Skip to content

Commit 0e2f3d7

Browse files
committed
Fix MVCC bug with prepared xact with subxacts on standby
We did not recover the subtransaction IDs of prepared transactions when starting a hot standby from a shutdown checkpoint. As a result, such subtransactions were considered as aborted, rather than in-progress. That would lead to hint bits being set incorrectly, and the subtransactions suddenly becoming visible to old snapshots when the prepared transaction was committed. To fix, update pg_subtrans with prepared transactions's subxids when starting hot standby from a shutdown checkpoint. The snapshots taken from that state need to be marked as "suboverflowed", so that we also check the pg_subtrans. Backport to all supported versions. Discussion: https://www.postgresql.org/message-id/6b852e98-2d49-4ca1-9e95-db419a2696e0@iki.fi
1 parent 8b6a772 commit 0e2f3d7

File tree

7 files changed

+85
-17
lines changed

7 files changed

+85
-17
lines changed

src/backend/access/transam/twophase.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2013,9 +2013,8 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
20132013
* This is never called at the end of recovery - we use
20142014
* RecoverPreparedTransactions() at that point.
20152015
*
2016-
* The lack of calls to SubTransSetParent() calls here is by design;
2017-
* those calls are made by RecoverPreparedTransactions() at the end of recovery
2018-
* for those xacts that need this.
2016+
* This updates pg_subtrans, so that any subtransactions will be correctly
2017+
* seen as in-progress in snapshots taken during recovery.
20192018
*/
20202019
void
20212020
StandbyRecoverPreparedTransactions(void)
@@ -2035,7 +2034,7 @@ StandbyRecoverPreparedTransactions(void)
20352034

20362035
buf = ProcessTwoPhaseBuffer(xid,
20372036
gxact->prepare_start_lsn,
2038-
gxact->ondisk, false, false);
2037+
gxact->ondisk, true, false);
20392038
if (buf != NULL)
20402039
pfree(buf);
20412040
}

src/backend/access/transam/xlog.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5273,6 +5273,9 @@ StartupXLOG(void)
52735273
RunningTransactionsData running;
52745274
TransactionId latestCompletedXid;
52755275

5276+
/* Update pg_subtrans entries for any prepared transactions */
5277+
StandbyRecoverPreparedTransactions();
5278+
52765279
/*
52775280
* Construct a RunningTransactions snapshot representing a
52785281
* shut down server, with only prepared transactions still
@@ -5281,7 +5284,7 @@ StartupXLOG(void)
52815284
*/
52825285
running.xcnt = nxids;
52835286
running.subxcnt = 0;
5284-
running.subxid_overflow = false;
5287+
running.subxid_status = SUBXIDS_IN_SUBTRANS;
52855288
running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
52865289
running.oldestRunningXid = oldestActiveXID;
52875290
latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
@@ -5291,8 +5294,6 @@ StartupXLOG(void)
52915294
running.xids = xids;
52925295

52935296
ProcArrayApplyRecoveryInfo(&running);
5294-
5295-
StandbyRecoverPreparedTransactions();
52965297
}
52975298
}
52985299

@@ -7647,6 +7648,9 @@ xlog_redo(XLogReaderState *record)
76477648

76487649
oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
76497650

7651+
/* Update pg_subtrans entries for any prepared transactions */
7652+
StandbyRecoverPreparedTransactions();
7653+
76507654
/*
76517655
* Construct a RunningTransactions snapshot representing a shut
76527656
* down server, with only prepared transactions still alive. We're
@@ -7655,7 +7659,7 @@ xlog_redo(XLogReaderState *record)
76557659
*/
76567660
running.xcnt = nxids;
76577661
running.subxcnt = 0;
7658-
running.subxid_overflow = false;
7662+
running.subxid_status = SUBXIDS_IN_SUBTRANS;
76597663
running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
76607664
running.oldestRunningXid = oldestActiveXID;
76617665
latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
@@ -7665,8 +7669,6 @@ xlog_redo(XLogReaderState *record)
76657669
running.xids = xids;
76667670

76677671
ProcArrayApplyRecoveryInfo(&running);
7668-
7669-
StandbyRecoverPreparedTransactions();
76707672
}
76717673

76727674
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */

src/backend/storage/ipc/procarray.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,7 +1099,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
10991099
* If the snapshot isn't overflowed or if its empty we can reset our
11001100
* pending state and use this snapshot instead.
11011101
*/
1102-
if (!running->subxid_overflow || running->xcnt == 0)
1102+
if (running->subxid_status != SUBXIDS_MISSING || running->xcnt == 0)
11031103
{
11041104
/*
11051105
* If we have already collected known assigned xids, we need to
@@ -1251,7 +1251,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
12511251
* missing, so conservatively assume the last one is latestObservedXid.
12521252
* ----------
12531253
*/
1254-
if (running->subxid_overflow)
1254+
if (running->subxid_status == SUBXIDS_MISSING)
12551255
{
12561256
standbyState = STANDBY_SNAPSHOT_PENDING;
12571257

@@ -1263,6 +1263,18 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
12631263
standbyState = STANDBY_SNAPSHOT_READY;
12641264

12651265
standbySnapshotPendingXmin = InvalidTransactionId;
1266+
1267+
/*
1268+
* If the 'xids' array didn't include all subtransactions, we have to
1269+
* mark any snapshots taken as overflowed.
1270+
*/
1271+
if (running->subxid_status == SUBXIDS_IN_SUBTRANS)
1272+
procArray->lastOverflowedXid = latestObservedXid;
1273+
else
1274+
{
1275+
Assert(running->subxid_status == SUBXIDS_IN_ARRAY);
1276+
procArray->lastOverflowedXid = InvalidTransactionId;
1277+
}
12661278
}
12671279

12681280
/*
@@ -2897,7 +2909,7 @@ GetRunningTransactionData(void)
28972909

28982910
CurrentRunningXacts->xcnt = count - subcount;
28992911
CurrentRunningXacts->subxcnt = subcount;
2900-
CurrentRunningXacts->subxid_overflow = suboverflowed;
2912+
CurrentRunningXacts->subxid_status = suboverflowed ? SUBXIDS_IN_SUBTRANS : SUBXIDS_IN_ARRAY;
29012913
CurrentRunningXacts->nextXid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
29022914
CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
29032915
CurrentRunningXacts->latestCompletedXid = latestCompletedXid;

src/backend/storage/ipc/standby.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,7 @@ standby_redo(XLogReaderState *record)
11301130

11311131
running.xcnt = xlrec->xcnt;
11321132
running.subxcnt = xlrec->subxcnt;
1133-
running.subxid_overflow = xlrec->subxid_overflow;
1133+
running.subxid_status = xlrec->subxid_overflow ? SUBXIDS_MISSING : SUBXIDS_IN_ARRAY;
11341134
running.nextXid = xlrec->nextXid;
11351135
running.latestCompletedXid = xlrec->latestCompletedXid;
11361136
running.oldestRunningXid = xlrec->oldestRunningXid;
@@ -1286,7 +1286,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
12861286

12871287
xlrec.xcnt = CurrRunningXacts->xcnt;
12881288
xlrec.subxcnt = CurrRunningXacts->subxcnt;
1289-
xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow;
1289+
xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY);
12901290
xlrec.nextXid = CurrRunningXacts->nextXid;
12911291
xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
12921292
xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
@@ -1303,7 +1303,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
13031303

13041304
recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
13051305

1306-
if (CurrRunningXacts->subxid_overflow)
1306+
if (xlrec.subxid_overflow)
13071307
elog(trace_recovery(DEBUG2),
13081308
"snapshot of %u running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
13091309
CurrRunningXacts->xcnt,

src/include/storage/standby.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,19 @@ extern void StandbyReleaseOldLocks(TransactionId oldxid);
7474
* almost immediately see the data we need to begin executing queries.
7575
*/
7676

77+
typedef enum
78+
{
79+
SUBXIDS_IN_ARRAY, /* xids array includes all running subxids */
80+
SUBXIDS_MISSING, /* snapshot overflowed, subxids are missing */
81+
SUBXIDS_IN_SUBTRANS, /* subxids are not included in 'xids', but
82+
* pg_subtrans is fully up-to-date */
83+
} subxids_array_status;
84+
7785
typedef struct RunningTransactionsData
7886
{
7987
int xcnt; /* # of xact ids in xids[] */
8088
int subxcnt; /* # of subxact ids in xids[] */
81-
bool subxid_overflow; /* snapshot overflowed, subxids missing */
89+
subxids_array_status subxid_status;
8290
TransactionId nextXid; /* xid from ShmemVariableCache->nextXid */
8391
TransactionId oldestRunningXid; /* *not* oldestXmin */
8492
TransactionId latestCompletedXid; /* so we can set xmax */

src/test/recovery/t/009_twophase.pl

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,52 @@ sub configure_and_reload
308308

309309
$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_12'");
310310

311+
###############################################################################
312+
# Check visibility of prepared transactions in standby after a restart while
313+
# primary is down.
314+
###############################################################################
315+
316+
$cur_primary->psql(
317+
'postgres', "
318+
CREATE TABLE t_009_tbl_standby_mvcc (id int, msg text);
319+
BEGIN;
320+
INSERT INTO t_009_tbl_standby_mvcc VALUES (1, 'issued to ${cur_primary_name}');
321+
SAVEPOINT s1;
322+
INSERT INTO t_009_tbl_standby_mvcc VALUES (2, 'issued to ${cur_primary_name}');
323+
PREPARE TRANSACTION 'xact_009_standby_mvcc';
324+
");
325+
$cur_primary->stop;
326+
$cur_standby->restart;
327+
328+
# Acquire a snapshot in standby, before we commit the prepared transaction
329+
my $standby_session = $cur_standby->background_psql('postgres', on_error_die => 1);
330+
$standby_session->query_safe("BEGIN ISOLATION LEVEL REPEATABLE READ");
331+
$psql_out = $standby_session->query_safe(
332+
"SELECT count(*) FROM t_009_tbl_standby_mvcc");
333+
is($psql_out, '0',
334+
"Prepared transaction not visible in standby before commit");
335+
336+
# Commit the transaction in primary
337+
$cur_primary->start;
338+
$cur_primary->psql('postgres', "
339+
SET synchronous_commit='remote_apply'; -- To ensure the standby is caught up
340+
COMMIT PREPARED 'xact_009_standby_mvcc';
341+
");
342+
343+
# Still not visible to the old snapshot
344+
$psql_out = $standby_session->query_safe(
345+
"SELECT count(*) FROM t_009_tbl_standby_mvcc");
346+
is($psql_out, '0',
347+
"Committed prepared transaction not visible to old snapshot in standby");
348+
349+
# Is visible to a new snapshot
350+
$standby_session->query_safe("COMMIT");
351+
$psql_out = $standby_session->query_safe(
352+
"SELECT count(*) FROM t_009_tbl_standby_mvcc");
353+
is($psql_out, '2',
354+
"Committed prepared transaction is visible to new snapshot in standby");
355+
$standby_session->quit;
356+
311357
###############################################################################
312358
# Check for a lock conflict between prepared transaction with DDL inside and
313359
# replay of XLOG_STANDBY_LOCK wal record.

src/tools/pgindent/typedefs.list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3698,6 +3698,7 @@ string
36983698
substitute_actual_parameters_context
36993699
substitute_actual_srf_parameters_context
37003700
substitute_phv_relids_context
3701+
subxids_array_status
37013702
symbol
37023703
tablespaceinfo
37033704
teSection

0 commit comments

Comments
 (0)