Skip to content

Commit 368ffde

Browse files
committed
Fix waiting in RegisterSyncRequest().
If we run out of space in the checkpointer sync request queue (which is hopefully rare on real systems, but common with very small buffer pool), we wait for it to drain. While waiting, we should report that as a wait event so that users know what is going on, and also handle postmaster death, since otherwise the loop might never terminate if the checkpointer has exited. Back-patch to 12. Although the problem exists in earlier releases too, the code is structured differently before 12 so I haven't gone any further for now, in the absence of field complaints. Reported-by: Andres Freund <andres@anarazel.de> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/20220226213942.nb7uvb2pamyu26dj%40alap3.anarazel.de
1 parent af8a8eb commit 368ffde

File tree

4 files changed

+14
-3
lines changed

4 files changed

+14
-3
lines changed

doc/src/sgml/monitoring.sgml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1483,7 +1483,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
14831483
<entry>Waiting for confirmation from remote server during synchronous replication.</entry>
14841484
</row>
14851485
<row>
1486-
<entry morerows="2"><literal>Timeout</literal></entry>
1486+
<entry morerows="3"><literal>Timeout</literal></entry>
14871487
<entry><literal>BaseBackupThrottle</literal></entry>
14881488
<entry>Waiting during base backup when throttling activity.</entry>
14891489
</row>
@@ -1495,6 +1495,11 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
14951495
<entry><literal>RecoveryApplyDelay</literal></entry>
14961496
<entry>Waiting to apply WAL at recovery because it is delayed.</entry>
14971497
</row>
1498+
<row>
1499+
<entry><literal>RegisterSyncRequest</literal></entry>
1500+
<entry>Waiting while sending synchronization requests to the
1501+
checkpointer, because the request queue is full.</entry>
1502+
</row>
14981503
<row>
14991504
<entry morerows="67"><literal>IO</literal></entry>
15001505
<entry><literal>BufFileRead</literal></entry>

src/backend/postmaster/pgstat.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3889,6 +3889,9 @@ pgstat_get_wait_timeout(WaitEventTimeout w)
38893889
case WAIT_EVENT_RECOVERY_APPLY_DELAY:
38903890
event_name = "RecoveryApplyDelay";
38913891
break;
3892+
case WAIT_EVENT_REGISTER_SYNC_REQUEST:
3893+
event_name = "RegisterSyncRequest";
3894+
break;
38923895
/* no default case, so that compiler will warn */
38933896
}
38943897

src/backend/storage/sync/sync.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "postmaster/bgwriter.h"
2828
#include "storage/bufmgr.h"
2929
#include "storage/ipc.h"
30+
#include "storage/latch.h"
3031
#include "storage/md.h"
3132
#include "utils/hsearch.h"
3233
#include "utils/memutils.h"
@@ -569,7 +570,8 @@ RegisterSyncRequest(const FileTag *ftag, SyncRequestType type,
569570
if (ret || (!ret && !retryOnError))
570571
break;
571572

572-
pg_usleep(10000L);
573+
WaitLatch(NULL, WL_EXIT_ON_PM_DEATH | WL_TIMEOUT, 10,
574+
WAIT_EVENT_REGISTER_SYNC_REQUEST);
573575
}
574576

575577
return ret;

src/include/pgstat.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -867,7 +867,8 @@ typedef enum
867867
{
868868
WAIT_EVENT_BASE_BACKUP_THROTTLE = PG_WAIT_TIMEOUT,
869869
WAIT_EVENT_PG_SLEEP,
870-
WAIT_EVENT_RECOVERY_APPLY_DELAY
870+
WAIT_EVENT_RECOVERY_APPLY_DELAY,
871+
WAIT_EVENT_REGISTER_SYNC_REQUEST
871872
} WaitEventTimeout;
872873

873874
/* ----------

0 commit comments

Comments
 (0)