Skip to content

Commit e9c8747

Browse files
committed
Fix MVCC bug with prepared xact with subxacts on standby
We did not recover the subtransaction IDs of prepared transactions when starting a hot standby from a shutdown checkpoint. As a result, such subtransactions were considered as aborted, rather than in-progress. That would lead to hint bits being set incorrectly, and the subtransactions suddenly becoming visible to old snapshots when the prepared transaction was committed. To fix, update pg_subtrans with prepared transactions's subxids when starting hot standby from a shutdown checkpoint. The snapshots taken from that state need to be marked as "suboverflowed", so that we also check the pg_subtrans. Backport to all supported versions. Discussion: https://www.postgresql.org/message-id/6b852e98-2d49-4ca1-9e95-db419a2696e0@iki.fi
1 parent 071e19a commit e9c8747

File tree

7 files changed

+85
-18
lines changed

7 files changed

+85
-18
lines changed

src/backend/access/transam/twophase.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1997,9 +1997,8 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
19971997
* This is never called at the end of recovery - we use
19981998
* RecoverPreparedTransactions() at that point.
19991999
*
2000-
* The lack of calls to SubTransSetParent() calls here is by design;
2001-
* those calls are made by RecoverPreparedTransactions() at the end of recovery
2002-
* for those xacts that need this.
2000+
* This updates pg_subtrans, so that any subtransactions will be correctly
2001+
* seen as in-progress in snapshots taken during recovery.
20032002
*/
20042003
void
20052004
StandbyRecoverPreparedTransactions(void)
@@ -2019,7 +2018,7 @@ StandbyRecoverPreparedTransactions(void)
20192018

20202019
buf = ProcessTwoPhaseBuffer(xid,
20212020
gxact->prepare_start_lsn,
2022-
gxact->ondisk, false, false);
2021+
gxact->ondisk, true, false);
20232022
if (buf != NULL)
20242023
pfree(buf);
20252024
}

src/backend/access/transam/xlog.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7134,6 +7134,9 @@ StartupXLOG(void)
71347134
RunningTransactionsData running;
71357135
TransactionId latestCompletedXid;
71367136

7137+
/* Update pg_subtrans entries for any prepared transactions */
7138+
StandbyRecoverPreparedTransactions();
7139+
71377140
/*
71387141
* Construct a RunningTransactions snapshot representing a
71397142
* shut down server, with only prepared transactions still
@@ -7142,7 +7145,7 @@ StartupXLOG(void)
71427145
*/
71437146
running.xcnt = nxids;
71447147
running.subxcnt = 0;
7145-
running.subxid_overflow = false;
7148+
running.subxid_status = SUBXIDS_IN_SUBTRANS;
71467149
running.nextXid = XidFromFullTransactionId(checkPoint.nextFullXid);
71477150
running.oldestRunningXid = oldestActiveXID;
71487151
latestCompletedXid = XidFromFullTransactionId(checkPoint.nextFullXid);
@@ -7152,8 +7155,6 @@ StartupXLOG(void)
71527155
running.xids = xids;
71537156

71547157
ProcArrayApplyRecoveryInfo(&running);
7155-
7156-
StandbyRecoverPreparedTransactions();
71577158
}
71587159
}
71597160

@@ -10217,6 +10218,9 @@ xlog_redo(XLogReaderState *record)
1021710218

1021810219
oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
1021910220

10221+
/* Update pg_subtrans entries for any prepared transactions */
10222+
StandbyRecoverPreparedTransactions();
10223+
1022010224
/*
1022110225
* Construct a RunningTransactions snapshot representing a shut
1022210226
* down server, with only prepared transactions still alive. We're
@@ -10225,7 +10229,7 @@ xlog_redo(XLogReaderState *record)
1022510229
*/
1022610230
running.xcnt = nxids;
1022710231
running.subxcnt = 0;
10228-
running.subxid_overflow = false;
10232+
running.subxid_status = SUBXIDS_IN_SUBTRANS;
1022910233
running.nextXid = XidFromFullTransactionId(checkPoint.nextFullXid);
1023010234
running.oldestRunningXid = oldestActiveXID;
1023110235
latestCompletedXid = XidFromFullTransactionId(checkPoint.nextFullXid);
@@ -10235,8 +10239,6 @@ xlog_redo(XLogReaderState *record)
1023510239
running.xids = xids;
1023610240

1023710241
ProcArrayApplyRecoveryInfo(&running);
10238-
10239-
StandbyRecoverPreparedTransactions();
1024010242
}
1024110243

1024210244
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */

src/backend/storage/ipc/procarray.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -734,7 +734,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
734734
* If the snapshot isn't overflowed or if its empty we can reset our
735735
* pending state and use this snapshot instead.
736736
*/
737-
if (!running->subxid_overflow || running->xcnt == 0)
737+
if (running->subxid_status != SUBXIDS_MISSING || running->xcnt == 0)
738738
{
739739
/*
740740
* If we have already collected known assigned xids, we need to
@@ -886,7 +886,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
886886
* missing, so conservatively assume the last one is latestObservedXid.
887887
* ----------
888888
*/
889-
if (running->subxid_overflow)
889+
if (running->subxid_status == SUBXIDS_MISSING)
890890
{
891891
standbyState = STANDBY_SNAPSHOT_PENDING;
892892

@@ -898,6 +898,18 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
898898
standbyState = STANDBY_SNAPSHOT_READY;
899899

900900
standbySnapshotPendingXmin = InvalidTransactionId;
901+
902+
/*
903+
* If the 'xids' array didn't include all subtransactions, we have to
904+
* mark any snapshots taken as overflowed.
905+
*/
906+
if (running->subxid_status == SUBXIDS_IN_SUBTRANS)
907+
procArray->lastOverflowedXid = latestObservedXid;
908+
else
909+
{
910+
Assert(running->subxid_status == SUBXIDS_IN_ARRAY);
911+
procArray->lastOverflowedXid = InvalidTransactionId;
912+
}
901913
}
902914

903915
/*
@@ -2129,7 +2141,7 @@ GetRunningTransactionData(void)
21292141

21302142
CurrentRunningXacts->xcnt = count - subcount;
21312143
CurrentRunningXacts->subxcnt = subcount;
2132-
CurrentRunningXacts->subxid_overflow = suboverflowed;
2144+
CurrentRunningXacts->subxid_status = suboverflowed ? SUBXIDS_IN_SUBTRANS : SUBXIDS_IN_ARRAY;
21332145
CurrentRunningXacts->nextXid = XidFromFullTransactionId(ShmemVariableCache->nextFullXid);
21342146
CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
21352147
CurrentRunningXacts->latestCompletedXid = latestCompletedXid;

src/backend/storage/ipc/standby.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -937,7 +937,7 @@ standby_redo(XLogReaderState *record)
937937

938938
running.xcnt = xlrec->xcnt;
939939
running.subxcnt = xlrec->subxcnt;
940-
running.subxid_overflow = xlrec->subxid_overflow;
940+
running.subxid_status = xlrec->subxid_overflow ? SUBXIDS_MISSING : SUBXIDS_IN_ARRAY;
941941
running.nextXid = xlrec->nextXid;
942942
running.latestCompletedXid = xlrec->latestCompletedXid;
943943
running.oldestRunningXid = xlrec->oldestRunningXid;
@@ -1093,7 +1093,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
10931093

10941094
xlrec.xcnt = CurrRunningXacts->xcnt;
10951095
xlrec.subxcnt = CurrRunningXacts->subxcnt;
1096-
xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow;
1096+
xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY);
10971097
xlrec.nextXid = CurrRunningXacts->nextXid;
10981098
xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
10991099
xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
@@ -1110,7 +1110,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
11101110

11111111
recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
11121112

1113-
if (CurrRunningXacts->subxid_overflow)
1113+
if (xlrec.subxid_overflow)
11141114
elog(trace_recovery(DEBUG2),
11151115
"snapshot of %u running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
11161116
CurrRunningXacts->xcnt,

src/include/storage/standby.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,19 @@ extern void StandbyReleaseOldLocks(TransactionId oldxid);
6767
* almost immediately see the data we need to begin executing queries.
6868
*/
6969

70+
typedef enum
71+
{
72+
SUBXIDS_IN_ARRAY, /* xids array includes all running subxids */
73+
SUBXIDS_MISSING, /* snapshot overflowed, subxids are missing */
74+
SUBXIDS_IN_SUBTRANS, /* subxids are not included in 'xids', but
75+
* pg_subtrans is fully up-to-date */
76+
} subxids_array_status;
77+
7078
typedef struct RunningTransactionsData
7179
{
7280
int xcnt; /* # of xact ids in xids[] */
7381
int subxcnt; /* # of subxact ids in xids[] */
74-
bool subxid_overflow; /* snapshot overflowed, subxids missing */
82+
subxids_array_status subxid_status;
7583
TransactionId nextXid; /* xid from ShmemVariableCache->nextFullXid */
7684
TransactionId oldestRunningXid; /* *not* oldestXmin */
7785
TransactionId latestCompletedXid; /* so we can set xmax */

src/test/recovery/t/009_twophase.pl

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
use PostgresNode;
66
use TestLib;
7-
use Test::More tests => 24;
7+
use Test::More tests => 27;
88

99
my $psql_out = '';
1010
my $psql_rc = '';
@@ -305,6 +305,51 @@ sub configure_and_reload
305305

306306
$cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_12'");
307307

308+
###############################################################################
309+
# Check visibility of prepared transactions in standby after a restart while
310+
# primary is down.
311+
###############################################################################
312+
313+
$cur_master->psql(
314+
'postgres', "
315+
CREATE TABLE t_009_tbl_standby_mvcc (id int, msg text);
316+
BEGIN;
317+
INSERT INTO t_009_tbl_standby_mvcc VALUES (1, 'issued to ${cur_master_name}');
318+
SAVEPOINT s1;
319+
INSERT INTO t_009_tbl_standby_mvcc VALUES (2, 'issued to ${cur_master_name}');
320+
PREPARE TRANSACTION 'xact_009_standby_mvcc';
321+
");
322+
$cur_master->stop;
323+
$cur_standby->restart;
324+
325+
# Acquire a snapshot in standby, before we commit the prepared transaction
326+
my $standby_session = $cur_standby->background_psql('postgres', on_error_die => 1);
327+
$standby_session->query_safe("BEGIN ISOLATION LEVEL REPEATABLE READ");
328+
$psql_out = $standby_session->query_safe(
329+
"SELECT count(*) FROM t_009_tbl_standby_mvcc");
330+
is($psql_out, '0',
331+
"Prepared transaction not visible in standby before commit");
332+
333+
# Commit the transaction in primary
334+
$cur_master->start;
335+
$cur_master->psql('postgres', "
336+
SET synchronous_commit='remote_apply'; -- To ensure the standby is caught up
337+
COMMIT PREPARED 'xact_009_standby_mvcc'
338+
");
339+
340+
# Still not visible to the old snapshot
341+
$psql_out = $standby_session->query_safe(
342+
"SELECT count(*) FROM t_009_tbl_standby_mvcc");
343+
is($psql_out, '0',
344+
"Committed prepared transaction not visible to old snapshot in standby");
345+
346+
# Is visible to a new snapshot
347+
$standby_session->query_safe("COMMIT");
348+
$psql_out = $standby_session->query_safe(
349+
"SELECT count(*) FROM t_009_tbl_standby_mvcc");
350+
is($psql_out, '2',
351+
"Committed prepared transaction is visible to new snapshot in standby");
352+
308353
###############################################################################
309354
# Check for a lock conflict between prepared transaction with DDL inside and
310355
# replay of XLOG_STANDBY_LOCK wal record.

src/tools/pgindent/typedefs.list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3373,6 +3373,7 @@ string
33733373
substitute_actual_parameters_context
33743374
substitute_actual_srf_parameters_context
33753375
substitute_phv_relids_context
3376+
subxids_array_status
33763377
svtype
33773378
symbol
33783379
tablespaceinfo

0 commit comments

Comments
 (0)