Skip to content

Commit 10520f4

Browse files
committed
Rethink the delay-checkpoint-end mechanism in the back-branches.
The back-patch of commit bbace56 had the unfortunate effect of changing the layout of PGPROC in the back-branches, which could break extensions. This happened because it changed the delayChkpt from type bool to type int. So, change it back, and add a new bool delayChkptEnd field instead. The new field should fall within what used to be padding space within the struct, and so hopefully won't cause any extensions to break. Per report from Markus Wanner and discussion with Tom Lane and others. Patch originally by me, somewhat revised by Markus Wanner per a suggestion from Michael Paquier. A very similar patch was developed by Kyotaro Horiguchi, but I failed to see the email in which that was posted before writing one of my own. Discussion: http://postgr.es/m/CA+Tgmoao-kUD9c5nG5sub3F7tbo39+cdr8jKaOVEs_1aBWcJ3Q@mail.gmail.com Discussion: http://postgr.es/m/20220406.164521.17171257901083417.horikyota.ntt@gmail.com
1 parent df6bbe7 commit 10520f4

File tree

10 files changed

+109
-80
lines changed

10 files changed

+109
-80
lines changed

src/backend/access/transam/multixact.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3075,8 +3075,8 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
30753075
* crash/basebackup, even though the state of the data directory would
30763076
* require it.
30773077
*/
3078-
Assert((MyProc->delayChkpt & DELAY_CHKPT_START) == 0);
3079-
MyProc->delayChkpt |= DELAY_CHKPT_START;
3078+
Assert(!MyProc->delayChkpt);
3079+
MyProc->delayChkpt = true;
30803080

30813081
/* WAL log truncation */
30823082
WriteMTruncateXlogRec(newOldestMultiDB,
@@ -3102,7 +3102,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
31023102
/* Then offsets */
31033103
PerformOffsetsTruncation(oldestMulti, newOldestMulti);
31043104

3105-
MyProc->delayChkpt &= ~DELAY_CHKPT_START;
3105+
MyProc->delayChkpt = false;
31063106

31073107
END_CRIT_SECTION();
31083108
LWLockRelease(MultiXactTruncationLock);

src/backend/access/transam/twophase.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,9 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
474474
}
475475
proc->xid = xid;
476476
Assert(proc->xmin == InvalidTransactionId);
477-
proc->delayChkpt = 0;
477+
proc->delayChkpt = false;
478478
proc->statusFlags = 0;
479+
proc->delayChkptEnd = false;
479480
proc->pid = 0;
480481
proc->databaseId = databaseid;
481482
proc->roleId = owner;
@@ -1165,8 +1166,8 @@ EndPrepare(GlobalTransaction gxact)
11651166

11661167
START_CRIT_SECTION();
11671168

1168-
Assert((MyProc->delayChkpt & DELAY_CHKPT_START) == 0);
1169-
MyProc->delayChkpt |= DELAY_CHKPT_START;
1169+
Assert(!MyProc->delayChkpt);
1170+
MyProc->delayChkpt = true;
11701171

11711172
XLogBeginInsert();
11721173
for (record = records.head; record != NULL; record = record->next)
@@ -1209,7 +1210,7 @@ EndPrepare(GlobalTransaction gxact)
12091210
* checkpoint starting after this will certainly see the gxact as a
12101211
* candidate for fsyncing.
12111212
*/
1212-
MyProc->delayChkpt &= ~DELAY_CHKPT_START;
1213+
MyProc->delayChkpt = false;
12131214

12141215
/*
12151216
* Remember that we have this GlobalTransaction entry locked for us. If
@@ -2276,8 +2277,8 @@ RecordTransactionCommitPrepared(TransactionId xid,
22762277
START_CRIT_SECTION();
22772278

22782279
/* See notes in RecordTransactionCommit */
2279-
Assert((MyProc->delayChkpt & DELAY_CHKPT_START) == 0);
2280-
MyProc->delayChkpt |= DELAY_CHKPT_START;
2280+
Assert(!MyProc->delayChkpt);
2281+
MyProc->delayChkpt = true;
22812282

22822283
/*
22832284
* Emit the XLOG commit record. Note that we mark 2PC commits as
@@ -2325,7 +2326,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
23252326
TransactionIdCommitTree(xid, nchildren, children);
23262327

23272328
/* Checkpoint can proceed now */
2328-
MyProc->delayChkpt &= ~DELAY_CHKPT_START;
2329+
MyProc->delayChkpt = false;
23292330

23302331
END_CRIT_SECTION();
23312332

src/backend/access/transam/xact.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,9 +1335,9 @@ RecordTransactionCommit(void)
13351335
* This makes checkpoint's determination of which xacts are delayChkpt
13361336
* a bit fuzzy, but it doesn't matter.
13371337
*/
1338-
Assert((MyProc->delayChkpt & DELAY_CHKPT_START) == 0);
1338+
Assert(!MyProc->delayChkpt);
13391339
START_CRIT_SECTION();
1340-
MyProc->delayChkpt |= DELAY_CHKPT_START;
1340+
MyProc->delayChkpt = true;
13411341

13421342
SetCurrentTransactionStopTimestamp();
13431343

@@ -1438,7 +1438,7 @@ RecordTransactionCommit(void)
14381438
*/
14391439
if (markXidCommitted)
14401440
{
1441-
MyProc->delayChkpt &= ~DELAY_CHKPT_START;
1441+
MyProc->delayChkpt = false;
14421442
END_CRIT_SECTION();
14431443
}
14441444

src/backend/access/transam/xlog.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9228,27 +9228,25 @@ CreateCheckPoint(int flags)
92289228
* and we will correctly flush the update below. So we cannot miss any
92299229
* xacts we need to wait for.
92309230
*/
9231-
vxids = GetVirtualXIDsDelayingChkpt(&nvxids, DELAY_CHKPT_START);
9231+
vxids = GetVirtualXIDsDelayingChkpt(&nvxids);
92329232
if (nvxids > 0)
92339233
{
92349234
do
92359235
{
92369236
pg_usleep(10000L); /* wait for 10 msec */
9237-
} while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
9238-
DELAY_CHKPT_START));
9237+
} while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids));
92399238
}
92409239
pfree(vxids);
92419240

92429241
CheckPointGuts(checkPoint.redo, flags);
92439242

9244-
vxids = GetVirtualXIDsDelayingChkpt(&nvxids, DELAY_CHKPT_COMPLETE);
9243+
vxids = GetVirtualXIDsDelayingChkptEnd(&nvxids);
92459244
if (nvxids > 0)
92469245
{
92479246
do
92489247
{
92499248
pg_usleep(10000L); /* wait for 10 msec */
9250-
} while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
9251-
DELAY_CHKPT_COMPLETE));
9249+
} while (HaveVirtualXIDsDelayingChkptEnd(vxids, nvxids));
92529250
}
92539251
pfree(vxids);
92549252

src/backend/access/transam/xloginsert.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -925,7 +925,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
925925
/*
926926
* Ensure no checkpoint can change our view of RedoRecPtr.
927927
*/
928-
Assert((MyProc->delayChkpt & DELAY_CHKPT_START) != 0);
928+
Assert(MyProc->delayChkpt);
929929

930930
/*
931931
* Update RedoRecPtr so that we can make the right decision

src/backend/catalog/storage.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,8 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
338338
* the blocks to not exist on disk at all, but not for them to have the
339339
* wrong contents.
340340
*/
341-
Assert((MyProc->delayChkpt & DELAY_CHKPT_COMPLETE) == 0);
342-
MyProc->delayChkpt |= DELAY_CHKPT_COMPLETE;
341+
Assert(!MyProc->delayChkptEnd);
342+
MyProc->delayChkptEnd = true;
343343

344344
/*
345345
* We WAL-log the truncation before actually truncating, which means
@@ -387,7 +387,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
387387
smgrtruncate(rel->rd_smgr, forks, nforks, blocks);
388388

389389
/* We've done all the critical work, so checkpoints are OK now. */
390-
MyProc->delayChkpt &= ~DELAY_CHKPT_COMPLETE;
390+
MyProc->delayChkptEnd = false;
391391

392392
/*
393393
* Update upper-level FSM pages to account for the truncation. This is

src/backend/storage/buffer/bufmgr.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3946,8 +3946,8 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
39463946
* essential that CreateCheckpoint waits for virtual transactions
39473947
* rather than full transactionids.
39483948
*/
3949-
Assert((MyProc->delayChkpt & DELAY_CHKPT_START) == 0);
3950-
MyProc->delayChkpt |= DELAY_CHKPT_START;
3949+
Assert(!MyProc->delayChkpt);
3950+
MyProc->delayChkpt = true;
39513951
delayChkpt = true;
39523952
lsn = XLogSaveBufferForHint(buffer, buffer_std);
39533953
}
@@ -3981,7 +3981,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
39813981
UnlockBufHdr(bufHdr, buf_state);
39823982

39833983
if (delayChkpt)
3984-
MyProc->delayChkpt &= ~DELAY_CHKPT_START;
3984+
MyProc->delayChkpt = false;
39853985

39863986
if (dirtied)
39873987
{

src/backend/storage/ipc/procarray.c

Lines changed: 75 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,11 @@ static void DisplayXidCache(void);
330330
#define xc_slow_answer_inc() ((void) 0)
331331
#endif /* XIDCACHE_DEBUG */
332332

333+
static VirtualTransactionId *GetVirtualXIDsDelayingChkptGuts(int *nvxids,
334+
int type);
335+
static bool HaveVirtualXIDsDelayingChkptGuts(VirtualTransactionId *vxids,
336+
int nvxids, int type);
337+
333338
/* Primitives for KnownAssignedXids array handling for standby */
334339
static void KnownAssignedXidsCompress(bool force);
335340
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
@@ -690,8 +695,9 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
690695
proc->lxid = InvalidLocalTransactionId;
691696
proc->xmin = InvalidTransactionId;
692697

693-
/* be sure this is cleared in abort */
694-
proc->delayChkpt = 0;
698+
/* be sure these are cleared in abort */
699+
proc->delayChkpt = false;
700+
proc->delayChkptEnd = false;
695701

696702
proc->recoveryConflictPending = false;
697703

@@ -732,8 +738,9 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
732738
proc->lxid = InvalidLocalTransactionId;
733739
proc->xmin = InvalidTransactionId;
734740

735-
/* be sure this is cleared in abort */
736-
proc->delayChkpt = 0;
741+
/* be sure these are cleared in abort */
742+
proc->delayChkpt = false;
743+
proc->delayChkptEnd = false;
737744

738745
proc->recoveryConflictPending = false;
739746

@@ -3045,26 +3052,28 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
30453052
}
30463053

30473054
/*
3048-
* GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
3049-
* delaying checkpoint because they have critical actions in progress.
3055+
* GetVirtualXIDsDelayingChkptGuts -- Get the VXIDs of transactions that are
3056+
* delaying the start or end of a checkpoint because they have critical
3057+
* actions in progress.
30503058
*
30513059
* Constructs an array of VXIDs of transactions that are currently in commit
3052-
* critical sections, as shown by having specified delayChkpt bits set in their
3053-
* PGPROC.
3060+
* critical sections, as shown by having delayChkpt or delayChkptEnd set in
3061+
* their PGPROC.
30543062
*
30553063
* Returns a palloc'd array that should be freed by the caller.
30563064
* *nvxids is the number of valid entries.
30573065
*
3058-
* Note that because backends set or clear delayChkpt without holding any lock,
3059-
* the result is somewhat indeterminate, but we don't really care. Even in
3060-
* a multiprocessor with delayed writes to shared memory, it should be certain
3061-
* that setting of delayChkpt will propagate to shared memory when the backend
3062-
* takes a lock, so we cannot fail to see a virtual xact as delayChkpt if
3063-
* it's already inserted its commit record. Whether it takes a little while
3064-
* for clearing of delayChkpt to propagate is unimportant for correctness.
3066+
* Note that because backends set or clear delayChkpt and delayChkptEnd
3067+
* without holding any lock, the result is somewhat indeterminate, but we
3068+
* don't really care. Even in a multiprocessor with delayed writes to
3069+
* shared memory, it should be certain that setting of delayChkpt will
3070+
* propagate to shared memory when the backend takes a lock, so we cannot
3071+
* fail to see a virtual xact as delayChkpt if it's already inserted its
3072+
* commit record. Whether it takes a little while for clearing of
3073+
* delayChkpt to propagate is unimportant for correctness.
30653074
*/
3066-
VirtualTransactionId *
3067-
GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
3075+
static VirtualTransactionId *
3076+
GetVirtualXIDsDelayingChkptGuts(int *nvxids, int type)
30683077
{
30693078
VirtualTransactionId *vxids;
30703079
ProcArrayStruct *arrayP = procArray;
@@ -3084,7 +3093,8 @@ GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
30843093
int pgprocno = arrayP->pgprocnos[index];
30853094
PGPROC *proc = &allProcs[pgprocno];
30863095

3087-
if ((proc->delayChkpt & type) != 0)
3096+
if (((type & DELAY_CHKPT_START) && proc->delayChkpt) ||
3097+
((type & DELAY_CHKPT_COMPLETE) && proc->delayChkptEnd))
30883098
{
30893099
VirtualTransactionId vxid;
30903100

@@ -3100,6 +3110,26 @@ GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
31003110
return vxids;
31013111
}
31023112

3113+
/*
3114+
* GetVirtualXIDsDelayingChkpt - Get the VXIDs of transactions that are
3115+
* delaying the start of a checkpoint.
3116+
*/
3117+
VirtualTransactionId *
3118+
GetVirtualXIDsDelayingChkpt(int *nvxids)
3119+
{
3120+
return GetVirtualXIDsDelayingChkptGuts(nvxids, DELAY_CHKPT_START);
3121+
}
3122+
3123+
/*
3124+
* GetVirtualXIDsDelayingChkptEnd - Get the VXIDs of transactions that are
3125+
* delaying the end of a checkpoint.
3126+
*/
3127+
VirtualTransactionId *
3128+
GetVirtualXIDsDelayingChkptEnd(int *nvxids)
3129+
{
3130+
return GetVirtualXIDsDelayingChkptGuts(nvxids, DELAY_CHKPT_COMPLETE);
3131+
}
3132+
31033133
/*
31043134
* HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
31053135
*
@@ -3109,8 +3139,9 @@ GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
31093139
* Note: this is O(N^2) in the number of vxacts that are/were delaying, but
31103140
* those numbers should be small enough for it not to be a problem.
31113141
*/
3112-
bool
3113-
HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
3142+
static bool
3143+
HaveVirtualXIDsDelayingChkptGuts(VirtualTransactionId *vxids, int nvxids,
3144+
int type)
31143145
{
31153146
bool result = false;
31163147
ProcArrayStruct *arrayP = procArray;
@@ -3128,7 +3159,8 @@ HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
31283159

31293160
GET_VXID_FROM_PGPROC(vxid, *proc);
31303161

3131-
if ((proc->delayChkpt & type) != 0 &&
3162+
if ((((type & DELAY_CHKPT_START) && proc->delayChkpt) ||
3163+
((type & DELAY_CHKPT_COMPLETE) && proc->delayChkptEnd)) &&
31323164
VirtualTransactionIdIsValid(vxid))
31333165
{
31343166
int i;
@@ -3151,6 +3183,28 @@ HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
31513183
return result;
31523184
}
31533185

3186+
/*
3187+
* HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying
3188+
* the start of a checkpoint?
3189+
*/
3190+
bool
3191+
HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
3192+
{
3193+
return HaveVirtualXIDsDelayingChkptGuts(vxids, nvxids,
3194+
DELAY_CHKPT_START);
3195+
}
3196+
3197+
/*
3198+
* HaveVirtualXIDsDelayingChkptEnd -- Are any of the specified VXIDs delaying
3199+
* the end of a checkpoint?
3200+
*/
3201+
bool
3202+
HaveVirtualXIDsDelayingChkptEnd(VirtualTransactionId *vxids, int nvxids)
3203+
{
3204+
return HaveVirtualXIDsDelayingChkptGuts(vxids, nvxids,
3205+
DELAY_CHKPT_COMPLETE);
3206+
}
3207+
31543208
/*
31553209
* BackendPidGetProc -- get a backend's PGPROC given its PID
31563210
*

src/include/storage/proc.h

Lines changed: 4 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -87,36 +87,8 @@ struct XidCache
8787
#define INVALID_PGPROCNO PG_INT32_MAX
8888

8989
/*
90-
* Flags for PGPROC.delayChkpt
91-
*
92-
* These flags can be used to delay the start or completion of a checkpoint
93-
* for short periods. A flag is in effect if the corresponding bit is set in
94-
* the PGPROC of any backend.
95-
*
96-
* For our purposes here, a checkpoint has three phases: (1) determine the
97-
* location to which the redo pointer will be moved, (2) write all the
98-
* data durably to disk, and (3) WAL-log the checkpoint.
99-
*
100-
* Setting DELAY_CHKPT_START prevents the system from moving from phase 1
101-
* to phase 2. This is useful when we are performing a WAL-logged modification
102-
* of data that will be flushed to disk in phase 2. By setting this flag
103-
* before writing WAL and clearing it after we've both written WAL and
104-
* performed the corresponding modification, we ensure that if the WAL record
105-
* is inserted prior to the new redo point, the corresponding data changes will
106-
* also be flushed to disk before the checkpoint can complete. (In the
107-
* extremely common case where the data being modified is in shared buffers
108-
* and we acquire an exclusive content lock on the relevant buffers before
109-
* writing WAL, this mechanism is not needed, because phase 2 will block
110-
* until we release the content lock and then flush the modified data to
111-
* disk.)
112-
*
113-
* Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2
114-
* to phase 3. This is useful if we are performing a WAL-logged operation that
115-
* might invalidate buffers, such as relation truncation. In this case, we need
116-
* to ensure that any buffers which were invalidated and thus not flushed by
117-
* the checkpoint are actaully destroyed on disk. Replay can cope with a file
118-
* or block that doesn't exist, but not with a block that has the wrong
119-
* contents.
90+
* Flags used only for type of internal functions
91+
* GetVirtualXIDsDelayingChkptGuts and HaveVirtualXIDsDelayingChkptGuts.
12092
*/
12193
#define DELAY_CHKPT_START (1<<0)
12294
#define DELAY_CHKPT_COMPLETE (1<<1)
@@ -226,11 +198,12 @@ struct PGPROC
226198
pg_atomic_uint64 waitStart; /* time at which wait for lock acquisition
227199
* started */
228200

229-
int delayChkpt; /* for DELAY_CHKPT_* flags */
201+
bool delayChkpt; /* true if this proc delays checkpoint start */
230202

231203
uint8 statusFlags; /* this backend's status flags, see PROC_*
232204
* above. mirrored in
233205
* ProcGlobal->statusFlags[pgxactoff] */
206+
bool delayChkptEnd; /* true if this proc delays checkpoint end */
234207

235208
/*
236209
* Info to allow us to wait for synchronous replication, if needed.

0 commit comments

Comments
 (0)