Skip to content

Commit cbfbda7

Browse files
committed
Fix MVCC bug with prepared xact with subxacts on standby
We did not recover the subtransaction IDs of prepared transactions when starting a hot standby from a shutdown checkpoint. As a result, such subtransactions were considered as aborted, rather than in-progress. That would lead to hint bits being set incorrectly, and the subtransactions suddenly becoming visible to old snapshots when the prepared transaction was committed. To fix, update pg_subtrans with prepared transactions's subxids when starting hot standby from a shutdown checkpoint. The snapshots taken from that state need to be marked as "suboverflowed", so that we also check the pg_subtrans. Backport to all supported versions. Discussion: https://www.postgresql.org/message-id/6b852e98-2d49-4ca1-9e95-db419a2696e0@iki.fi
1 parent ecbf6ac commit cbfbda7

File tree

7 files changed

+85
-17
lines changed

7 files changed

+85
-17
lines changed

src/backend/access/transam/twophase.c

+3-4
Original file line numberDiff line numberDiff line change
@@ -2035,9 +2035,8 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
20352035
* This is never called at the end of recovery - we use
20362036
* RecoverPreparedTransactions() at that point.
20372037
*
2038-
* The lack of calls to SubTransSetParent() calls here is by design;
2039-
* those calls are made by RecoverPreparedTransactions() at the end of recovery
2040-
* for those xacts that need this.
2038+
* This updates pg_subtrans, so that any subtransactions will be correctly
2039+
* seen as in-progress in snapshots taken during recovery.
20412040
*/
20422041
void
20432042
StandbyRecoverPreparedTransactions(void)
@@ -2057,7 +2056,7 @@ StandbyRecoverPreparedTransactions(void)
20572056

20582057
buf = ProcessTwoPhaseBuffer(xid,
20592058
gxact->prepare_start_lsn,
2060-
gxact->ondisk, false, false);
2059+
gxact->ondisk, true, false);
20612060
if (buf != NULL)
20622061
pfree(buf);
20632062
}

src/backend/access/transam/xlog.c

+8-6
Original file line numberDiff line numberDiff line change
@@ -5777,6 +5777,9 @@ StartupXLOG(void)
57775777
RunningTransactionsData running;
57785778
TransactionId latestCompletedXid;
57795779

5780+
/* Update pg_subtrans entries for any prepared transactions */
5781+
StandbyRecoverPreparedTransactions();
5782+
57805783
/*
57815784
* Construct a RunningTransactions snapshot representing a
57825785
* shut down server, with only prepared transactions still
@@ -5785,7 +5788,7 @@ StartupXLOG(void)
57855788
*/
57865789
running.xcnt = nxids;
57875790
running.subxcnt = 0;
5788-
running.subxid_overflow = false;
5791+
running.subxid_status = SUBXIDS_IN_SUBTRANS;
57895792
running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
57905793
running.oldestRunningXid = oldestActiveXID;
57915794
latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
@@ -5795,8 +5798,6 @@ StartupXLOG(void)
57955798
running.xids = xids;
57965799

57975800
ProcArrayApplyRecoveryInfo(&running);
5798-
5799-
StandbyRecoverPreparedTransactions();
58005801
}
58015802
}
58025803

@@ -8244,6 +8245,9 @@ xlog_redo(XLogReaderState *record)
82448245

82458246
oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
82468247

8248+
/* Update pg_subtrans entries for any prepared transactions */
8249+
StandbyRecoverPreparedTransactions();
8250+
82478251
/*
82488252
* Construct a RunningTransactions snapshot representing a shut
82498253
* down server, with only prepared transactions still alive. We're
@@ -8252,7 +8256,7 @@ xlog_redo(XLogReaderState *record)
82528256
*/
82538257
running.xcnt = nxids;
82548258
running.subxcnt = 0;
8255-
running.subxid_overflow = false;
8259+
running.subxid_status = SUBXIDS_IN_SUBTRANS;
82568260
running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
82578261
running.oldestRunningXid = oldestActiveXID;
82588262
latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
@@ -8262,8 +8266,6 @@ xlog_redo(XLogReaderState *record)
82628266
running.xids = xids;
82638267

82648268
ProcArrayApplyRecoveryInfo(&running);
8265-
8266-
StandbyRecoverPreparedTransactions();
82678269
}
82688270

82698271
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */

src/backend/storage/ipc/procarray.c

+15-3
Original file line numberDiff line numberDiff line change
@@ -1106,7 +1106,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
11061106
* If the snapshot isn't overflowed or if its empty we can reset our
11071107
* pending state and use this snapshot instead.
11081108
*/
1109-
if (!running->subxid_overflow || running->xcnt == 0)
1109+
if (running->subxid_status != SUBXIDS_MISSING || running->xcnt == 0)
11101110
{
11111111
/*
11121112
* If we have already collected known assigned xids, we need to
@@ -1258,7 +1258,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
12581258
* missing, so conservatively assume the last one is latestObservedXid.
12591259
* ----------
12601260
*/
1261-
if (running->subxid_overflow)
1261+
if (running->subxid_status == SUBXIDS_MISSING)
12621262
{
12631263
standbyState = STANDBY_SNAPSHOT_PENDING;
12641264

@@ -1270,6 +1270,18 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
12701270
standbyState = STANDBY_SNAPSHOT_READY;
12711271

12721272
standbySnapshotPendingXmin = InvalidTransactionId;
1273+
1274+
/*
1275+
* If the 'xids' array didn't include all subtransactions, we have to
1276+
* mark any snapshots taken as overflowed.
1277+
*/
1278+
if (running->subxid_status == SUBXIDS_IN_SUBTRANS)
1279+
procArray->lastOverflowedXid = latestObservedXid;
1280+
else
1281+
{
1282+
Assert(running->subxid_status == SUBXIDS_IN_ARRAY);
1283+
procArray->lastOverflowedXid = InvalidTransactionId;
1284+
}
12731285
}
12741286

12751287
/*
@@ -2833,7 +2845,7 @@ GetRunningTransactionData(void)
28332845

28342846
CurrentRunningXacts->xcnt = count - subcount;
28352847
CurrentRunningXacts->subxcnt = subcount;
2836-
CurrentRunningXacts->subxid_overflow = suboverflowed;
2848+
CurrentRunningXacts->subxid_status = suboverflowed ? SUBXIDS_IN_SUBTRANS : SUBXIDS_IN_ARRAY;
28372849
CurrentRunningXacts->nextXid = XidFromFullTransactionId(TransamVariables->nextXid);
28382850
CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
28392851
CurrentRunningXacts->oldestDatabaseRunningXid = oldestDatabaseRunningXid;

src/backend/storage/ipc/standby.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -1184,7 +1184,7 @@ standby_redo(XLogReaderState *record)
11841184

11851185
running.xcnt = xlrec->xcnt;
11861186
running.subxcnt = xlrec->subxcnt;
1187-
running.subxid_overflow = xlrec->subxid_overflow;
1187+
running.subxid_status = xlrec->subxid_overflow ? SUBXIDS_MISSING : SUBXIDS_IN_ARRAY;
11881188
running.nextXid = xlrec->nextXid;
11891189
running.latestCompletedXid = xlrec->latestCompletedXid;
11901190
running.oldestRunningXid = xlrec->oldestRunningXid;
@@ -1349,7 +1349,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
13491349

13501350
xlrec.xcnt = CurrRunningXacts->xcnt;
13511351
xlrec.subxcnt = CurrRunningXacts->subxcnt;
1352-
xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow;
1352+
xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY);
13531353
xlrec.nextXid = CurrRunningXacts->nextXid;
13541354
xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
13551355
xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
@@ -1366,7 +1366,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
13661366

13671367
recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
13681368

1369-
if (CurrRunningXacts->subxid_overflow)
1369+
if (xlrec.subxid_overflow)
13701370
elog(DEBUG2,
13711371
"snapshot of %d running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
13721372
CurrRunningXacts->xcnt,

src/include/storage/standby.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,19 @@ extern void StandbyReleaseOldLocks(TransactionId oldxid);
7575
* almost immediately see the data we need to begin executing queries.
7676
*/
7777

78+
typedef enum
79+
{
80+
SUBXIDS_IN_ARRAY, /* xids array includes all running subxids */
81+
SUBXIDS_MISSING, /* snapshot overflowed, subxids are missing */
82+
SUBXIDS_IN_SUBTRANS, /* subxids are not included in 'xids', but
83+
* pg_subtrans is fully up-to-date */
84+
} subxids_array_status;
85+
7886
typedef struct RunningTransactionsData
7987
{
8088
int xcnt; /* # of xact ids in xids[] */
8189
int subxcnt; /* # of subxact ids in xids[] */
82-
bool subxid_overflow; /* snapshot overflowed, subxids missing */
90+
subxids_array_status subxid_status;
8391
TransactionId nextXid; /* xid from TransamVariables->nextXid */
8492
TransactionId oldestRunningXid; /* *not* oldestXmin */
8593
TransactionId oldestDatabaseRunningXid; /* same as above, but within the

src/test/recovery/t/009_twophase.pl

+46
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,52 @@ sub configure_and_reload
312312

313313
$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_12'");
314314

315+
###############################################################################
316+
# Check visibility of prepared transactions in standby after a restart while
317+
# primary is down.
318+
###############################################################################
319+
320+
$cur_primary->psql(
321+
'postgres', "
322+
CREATE TABLE t_009_tbl_standby_mvcc (id int, msg text);
323+
BEGIN;
324+
INSERT INTO t_009_tbl_standby_mvcc VALUES (1, 'issued to ${cur_primary_name}');
325+
SAVEPOINT s1;
326+
INSERT INTO t_009_tbl_standby_mvcc VALUES (2, 'issued to ${cur_primary_name}');
327+
PREPARE TRANSACTION 'xact_009_standby_mvcc';
328+
");
329+
$cur_primary->stop;
330+
$cur_standby->restart;
331+
332+
# Acquire a snapshot in standby, before we commit the prepared transaction
333+
my $standby_session = $cur_standby->background_psql('postgres', on_error_die => 1);
334+
$standby_session->query_safe("BEGIN ISOLATION LEVEL REPEATABLE READ");
335+
$psql_out = $standby_session->query_safe(
336+
"SELECT count(*) FROM t_009_tbl_standby_mvcc");
337+
is($psql_out, '0',
338+
"Prepared transaction not visible in standby before commit");
339+
340+
# Commit the transaction in primary
341+
$cur_primary->start;
342+
$cur_primary->psql('postgres', "
343+
SET synchronous_commit='remote_apply'; -- To ensure the standby is caught up
344+
COMMIT PREPARED 'xact_009_standby_mvcc';
345+
");
346+
347+
# Still not visible to the old snapshot
348+
$psql_out = $standby_session->query_safe(
349+
"SELECT count(*) FROM t_009_tbl_standby_mvcc");
350+
is($psql_out, '0',
351+
"Committed prepared transaction not visible to old snapshot in standby");
352+
353+
# Is visible to a new snapshot
354+
$standby_session->query_safe("COMMIT");
355+
$psql_out = $standby_session->query_safe(
356+
"SELECT count(*) FROM t_009_tbl_standby_mvcc");
357+
is($psql_out, '2',
358+
"Committed prepared transaction is visible to new snapshot in standby");
359+
$standby_session->quit;
360+
315361
###############################################################################
316362
# Check for a lock conflict between prepared transaction with DDL inside and
317363
# replay of XLOG_STANDBY_LOCK wal record.

src/tools/pgindent/typedefs.list

+1
Original file line numberDiff line numberDiff line change
@@ -3931,6 +3931,7 @@ string
39313931
substitute_actual_parameters_context
39323932
substitute_actual_srf_parameters_context
39333933
substitute_phv_relids_context
3934+
subxids_array_status
39343935
symbol
39353936
tablespaceinfo
39363937
td_entry

0 commit comments

Comments
 (0)