Skip to content

Commit 00f690a

Browse files
committed
Revert "Get rid of the dedicated latch for signaling the startup process".
Revert ac22929, as well as the followup fix 113d359. Because it broke the assumption that the startup process waiting for the recovery conflict on buffer pin should be waken up only by buffer unpin or the timeout enabled in ResolveRecoveryConflictWithBufferPin(). It caused, for example, SIGHUP signal handler or walreceiver process to wake that startup process up unnecessarily frequently. Additionally, add the comments about why that dedicated latch that the reverted patch tried to get rid of should not be removed. Thanks to Kyotaro Horiguchi for the discussion. Author: Fujii Masao Discussion: https://postgr.es/m/d8c0c608-021b-3c73-fffd-3240829ee986@oss.nttdata.com
1 parent 88e014c commit 00f690a

File tree

3 files changed

+52
-21
lines changed

3 files changed

+52
-21
lines changed

src/backend/access/transam/xlog.c

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -681,8 +681,18 @@ typedef struct XLogCtlData
681681
* recoveryWakeupLatch is used to wake up the startup process to continue
682682
* WAL replay, if it is waiting for WAL to arrive or failover trigger file
683683
* to appear.
684+
*
685+
* Note that the startup process also uses another latch, its procLatch,
686+
* to wait for recovery conflict. If we get rid of recoveryWakeupLatch for
687+
* signaling the startup process in favor of using its procLatch, which
688+
* comports better with possible generic signal handlers using that latch.
689+
* But we should not do that because the startup process doesn't assume
690+
* that it's waken up by walreceiver process or SIGHUP signal handler
691+
* while it's waiting for recovery conflict. The separate latches,
692+
* recoveryWakeupLatch and procLatch, should be used for inter-process
693+
* communication for WAL replay and recovery conflict, respectively.
684694
*/
685-
Latch *recoveryWakeupLatch;
695+
Latch recoveryWakeupLatch;
686696

687697
/*
688698
* During recovery, we keep a copy of the latest checkpoint record here.
@@ -5186,6 +5196,7 @@ XLOGShmemInit(void)
51865196
SpinLockInit(&XLogCtl->Insert.insertpos_lck);
51875197
SpinLockInit(&XLogCtl->info_lck);
51885198
SpinLockInit(&XLogCtl->ulsn_lck);
5199+
InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
51895200
}
51905201

51915202
/*
@@ -6121,7 +6132,7 @@ recoveryApplyDelay(XLogReaderState *record)
61216132

61226133
while (true)
61236134
{
6124-
ResetLatch(MyLatch);
6135+
ResetLatch(&XLogCtl->recoveryWakeupLatch);
61256136

61266137
/* might change the trigger file's location */
61276138
HandleStartupProcInterrupts();
@@ -6140,7 +6151,7 @@ recoveryApplyDelay(XLogReaderState *record)
61406151

61416152
elog(DEBUG2, "recovery apply delay %ld milliseconds", msecs);
61426153

6143-
(void) WaitLatch(MyLatch,
6154+
(void) WaitLatch(&XLogCtl->recoveryWakeupLatch,
61446155
WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
61456156
msecs,
61466157
WAIT_EVENT_RECOVERY_APPLY_DELAY);
@@ -6469,11 +6480,11 @@ StartupXLOG(void)
64696480
}
64706481

64716482
/*
6472-
* Advertise our latch that other processes can use to wake us up
6473-
* if we're going to sleep during recovery.
6483+
* Take ownership of the wakeup latch if we're going to sleep during
6484+
* recovery.
64746485
*/
64756486
if (ArchiveRecoveryRequested)
6476-
XLogCtl->recoveryWakeupLatch = &MyProc->procLatch;
6487+
OwnLatch(&XLogCtl->recoveryWakeupLatch);
64776488

64786489
/* Set up XLOG reader facility */
64796490
MemSet(&private, 0, sizeof(XLogPageReadPrivate));
@@ -7484,11 +7495,11 @@ StartupXLOG(void)
74847495
ResetUnloggedRelations(UNLOGGED_RELATION_INIT);
74857496

74867497
/*
7487-
* We don't need the latch anymore. It's not strictly necessary to reset
7488-
* it to NULL, but let's do it for the sake of tidiness.
7498+
* We don't need the latch anymore. It's not strictly necessary to disown
7499+
* it, but let's do it for the sake of tidiness.
74897500
*/
74907501
if (ArchiveRecoveryRequested)
7491-
XLogCtl->recoveryWakeupLatch = NULL;
7502+
DisownLatch(&XLogCtl->recoveryWakeupLatch);
74927503

74937504
/*
74947505
* We are now done reading the xlog from stream. Turn off streaming
@@ -12300,12 +12311,12 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1230012311
wait_time = wal_retrieve_retry_interval -
1230112312
TimestampDifferenceMilliseconds(last_fail_time, now);
1230212313

12303-
(void) WaitLatch(MyLatch,
12314+
(void) WaitLatch(&XLogCtl->recoveryWakeupLatch,
1230412315
WL_LATCH_SET | WL_TIMEOUT |
1230512316
WL_EXIT_ON_PM_DEATH,
1230612317
wait_time,
1230712318
WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL);
12308-
ResetLatch(MyLatch);
12319+
ResetLatch(&XLogCtl->recoveryWakeupLatch);
1230912320
now = GetCurrentTimestamp();
1231012321

1231112322
/* Handle interrupt signals of startup process */
@@ -12559,11 +12570,11 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1255912570
* to react to a trigger file promptly and to check if the
1256012571
* WAL receiver is still active.
1256112572
*/
12562-
(void) WaitLatch(MyLatch,
12573+
(void) WaitLatch(&XLogCtl->recoveryWakeupLatch,
1256312574
WL_LATCH_SET | WL_TIMEOUT |
1256412575
WL_EXIT_ON_PM_DEATH,
1256512576
5000L, WAIT_EVENT_RECOVERY_WAL_STREAM);
12566-
ResetLatch(MyLatch);
12577+
ResetLatch(&XLogCtl->recoveryWakeupLatch);
1256712578
break;
1256812579
}
1256912580

@@ -12735,8 +12746,7 @@ CheckPromoteSignal(void)
1273512746
void
1273612747
WakeupRecovery(void)
1273712748
{
12738-
if (XLogCtl->recoveryWakeupLatch)
12739-
SetLatch(XLogCtl->recoveryWakeupLatch);
12749+
SetLatch(&XLogCtl->recoveryWakeupLatch);
1274012750
}
1274112751

1274212752
/*

src/backend/postmaster/startup.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
/*
3838
* Flags set by interrupt handlers for later service in the redo loop.
3939
*/
40+
static volatile sig_atomic_t got_SIGHUP = false;
4041
static volatile sig_atomic_t shutdown_requested = false;
4142
static volatile sig_atomic_t promote_signaled = false;
4243

@@ -48,6 +49,7 @@ static volatile sig_atomic_t in_restore_command = false;
4849

4950
/* Signal handlers */
5051
static void StartupProcTriggerHandler(SIGNAL_ARGS);
52+
static void StartupProcSigHupHandler(SIGNAL_ARGS);
5153

5254

5355
/* --------------------------------
@@ -62,7 +64,19 @@ StartupProcTriggerHandler(SIGNAL_ARGS)
6264
int save_errno = errno;
6365

6466
promote_signaled = true;
65-
SetLatch(MyLatch);
67+
WakeupRecovery();
68+
69+
errno = save_errno;
70+
}
71+
72+
/* SIGHUP: set flag to re-read config file at next convenient time */
73+
static void
74+
StartupProcSigHupHandler(SIGNAL_ARGS)
75+
{
76+
int save_errno = errno;
77+
78+
got_SIGHUP = true;
79+
WakeupRecovery();
6680

6781
errno = save_errno;
6882
}
@@ -77,7 +91,7 @@ StartupProcShutdownHandler(SIGNAL_ARGS)
7791
proc_exit(1);
7892
else
7993
shutdown_requested = true;
80-
SetLatch(MyLatch);
94+
WakeupRecovery();
8195

8296
errno = save_errno;
8397
}
@@ -123,9 +137,9 @@ HandleStartupProcInterrupts(void)
123137
/*
124138
* Process any requests or signals received recently.
125139
*/
126-
if (ConfigReloadPending)
140+
if (got_SIGHUP)
127141
{
128-
ConfigReloadPending = false;
142+
got_SIGHUP = false;
129143
StartupRereadConfig();
130144
}
131145

@@ -158,7 +172,7 @@ StartupProcessMain(void)
158172
/*
159173
* Properly accept or ignore signals the postmaster might send us.
160174
*/
161-
pqsignal(SIGHUP, SignalHandlerForConfigReload); /* reload config file */
175+
pqsignal(SIGHUP, StartupProcSigHupHandler); /* reload config file */
162176
pqsignal(SIGINT, SIG_IGN); /* ignore query cancel */
163177
pqsignal(SIGTERM, StartupProcShutdownHandler); /* request shutdown */
164178
/* SIGQUIT handler was already set up by InitPostmasterChild */

src/backend/storage/ipc/standby.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,14 @@ ResolveRecoveryConflictWithBufferPin(void)
519519
enable_timeouts(timeouts, 2);
520520
}
521521

522-
/* Wait to be signaled by UnpinBuffer() */
522+
/*
523+
* Wait to be signaled by UnpinBuffer().
524+
*
525+
* We assume that only UnpinBuffer() and the timeout requests established
526+
* above can wake us up here. WakeupRecovery() called by walreceiver or
527+
* SIGHUP signal handler, etc cannot do that because it uses the different
528+
* latch from that ProcWaitForSignal() waits on.
529+
*/
523530
ProcWaitForSignal(PG_WAIT_BUFFER_PIN);
524531

525532
/*

0 commit comments

Comments
 (0)