Skip to content

Commit d8735b8

Browse files
committed
Fix issues in pg_stat_wal.
1) Previously there were both pgstat_send_wal() and pgstat_report_wal() in order to send WAL activity to the stats collector. With the former being used by wal writer, the latter by most other processes. They were a bit redundant and so this commit merges them into pgstat_send_wal() to simplify the code. 2) Previously WAL global statistics counters were calculated and then compared with zero-filled buffer in order to determine whether any WAL activity has happened since the last submission. These calculation and comparison were not cheap. This was regularly exercised even in read-only workloads. This commit fixes the issue by making some WAL activity counters directly be checked to determine if there's WAL activity stats to send. 3) Previously pgstat_report_stat() did not check if there's WAL activity stats to send as part of the "Don't expend a clock check if nothing to do" check at the top. It's probably rare to have pending WAL stats without also passing one of the other conditions, but for safely this commit changes pgstat_report_stats() so that it checks also some WAL activity counters at the top. This commit also adds the comments about the design of WAL stats. Reported-by: Andres Freund Author: Masahiro Ikeda Reviewed-by: Kyotaro Horiguchi, Atsushi Torikoshi, Andres Freund, Fujii Masao Discussion: https://postgr.es/m/20210324232224.vrfiij2rxxwqqjjb@alap3.anarazel.de
1 parent 694da19 commit d8735b8

File tree

4 files changed

+82
-58
lines changed

4 files changed

+82
-58
lines changed

src/backend/postmaster/checkpointer.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ CheckpointerMain(void)
505505
pgstat_send_bgwriter();
506506

507507
/* Send WAL statistics to the stats collector. */
508-
pgstat_report_wal();
508+
pgstat_send_wal(true);
509509

510510
/*
511511
* If any checkpoint flags have been set, redo the loop to handle the
@@ -583,7 +583,7 @@ HandleCheckpointerInterrupts(void)
583583
BgWriterStats.m_requested_checkpoints++;
584584
ShutdownXLOG(0, 0);
585585
pgstat_send_bgwriter();
586-
pgstat_report_wal();
586+
pgstat_send_wal(true);
587587

588588
/* Normal exit from the checkpointer is here */
589589
proc_exit(0); /* done */

src/backend/postmaster/pgstat.c

+67-54
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@ PgStat_MsgWal WalStats;
133133

134134
/*
135135
* WAL usage counters saved from pgWALUsage at the previous call to
136-
* pgstat_report_wal(). This is used to calculate how much WAL usage
137-
* happens between pgstat_report_wal() calls, by substracting
136+
* pgstat_send_wal(). This is used to calculate how much WAL usage
137+
* happens between pgstat_send_wal() calls, by substracting
138138
* the previous counters from the current ones.
139139
*/
140140
static WalUsage prevWalUsage;
@@ -852,9 +852,19 @@ pgstat_report_stat(bool disconnect)
852852
TabStatusArray *tsa;
853853
int i;
854854

855-
/* Don't expend a clock check if nothing to do */
855+
/*
856+
* Don't expend a clock check if nothing to do.
857+
*
858+
* To determine whether any WAL activity has occurred since last time, not
859+
* only the number of generated WAL records but also the numbers of WAL
860+
* writes and syncs need to be checked. Because even transaction that
861+
* generates no WAL records can write or sync WAL data when flushing the
862+
* data pages.
863+
*/
856864
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
857865
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
866+
pgWalUsage.wal_records == prevWalUsage.wal_records &&
867+
WalStats.m_wal_write == 0 && WalStats.m_wal_sync == 0 &&
858868
!have_function_stats && !disconnect)
859869
return;
860870

@@ -948,7 +958,7 @@ pgstat_report_stat(bool disconnect)
948958
pgstat_send_funcstats();
949959

950960
/* Send WAL statistics */
951-
pgstat_report_wal();
961+
pgstat_send_wal(true);
952962

953963
/* Finally send SLRU statistics */
954964
pgstat_send_slru();
@@ -2918,7 +2928,7 @@ void
29182928
pgstat_initialize(void)
29192929
{
29202930
/*
2921-
* Initialize prevWalUsage with pgWalUsage so that pgstat_report_wal() can
2931+
* Initialize prevWalUsage with pgWalUsage so that pgstat_send_wal() can
29222932
* calculate how much pgWalUsage counters are increased by substracting
29232933
* prevWalUsage from pgWalUsage.
29242934
*/
@@ -3030,83 +3040,88 @@ pgstat_send_bgwriter(void)
30303040
MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
30313041
}
30323042

3033-
/* ----------
3034-
* pgstat_report_wal() -
3035-
*
3036-
* Calculate how much WAL usage counters are increased and send
3037-
* WAL statistics to the collector.
3038-
*
3039-
* Must be called by processes that generate WAL.
3040-
* ----------
3041-
*/
3042-
void
3043-
pgstat_report_wal(void)
3044-
{
3045-
WalUsage walusage;
3046-
3047-
/*
3048-
* Calculate how much WAL usage counters are increased by substracting the
3049-
* previous counters from the current ones. Fill the results in WAL stats
3050-
* message.
3051-
*/
3052-
MemSet(&walusage, 0, sizeof(WalUsage));
3053-
WalUsageAccumDiff(&walusage, &pgWalUsage, &prevWalUsage);
3054-
3055-
WalStats.m_wal_records = walusage.wal_records;
3056-
WalStats.m_wal_fpi = walusage.wal_fpi;
3057-
WalStats.m_wal_bytes = walusage.wal_bytes;
3058-
3059-
/*
3060-
* Send WAL stats message to the collector.
3061-
*/
3062-
if (!pgstat_send_wal(true))
3063-
return;
3064-
3065-
/*
3066-
* Save the current counters for the subsequent calculation of WAL usage.
3067-
*/
3068-
prevWalUsage = pgWalUsage;
3069-
}
3070-
30713043
/* ----------
30723044
* pgstat_send_wal() -
30733045
*
30743046
* Send WAL statistics to the collector.
30753047
*
30763048
* If 'force' is not set, WAL stats message is only sent if enough time has
30773049
* passed since last one was sent to reach PGSTAT_STAT_INTERVAL.
3078-
*
3079-
* Return true if the message is sent, and false otherwise.
30803050
* ----------
30813051
*/
3082-
bool
3052+
void
30833053
pgstat_send_wal(bool force)
30843054
{
3085-
/* We assume this initializes to zeroes */
3086-
static const PgStat_MsgWal all_zeroes;
30873055
static TimestampTz sendTime = 0;
30883056

30893057
/*
30903058
* This function can be called even if nothing at all has happened. In
30913059
* this case, avoid sending a completely empty message to the stats
30923060
* collector.
3061+
*
3062+
* Check wal_records counter to determine whether any WAL activity has
3063+
* happened since last time. Note that other WalUsage counters don't need
3064+
* to be checked because they are incremented always together with
3065+
* wal_records counter.
3066+
*
3067+
* m_wal_buffers_full also doesn't need to be checked because it's
3068+
* incremented only when at least one WAL record is generated (i.e.,
3069+
* wal_records counter is incremented). But for safely, we assert that
3070+
* m_wal_buffers_full is always zero when no WAL record is generated
3071+
*
3072+
* This function can be called by a process like walwriter that normally
3073+
* generates no WAL records. To determine whether any WAL activity has
3074+
* happened at that process since the last time, the numbers of WAL writes
3075+
* and syncs are also checked.
30933076
*/
3094-
if (memcmp(&WalStats, &all_zeroes, sizeof(PgStat_MsgWal)) == 0)
3095-
return false;
3077+
if (pgWalUsage.wal_records == prevWalUsage.wal_records &&
3078+
WalStats.m_wal_write == 0 && WalStats.m_wal_sync == 0)
3079+
{
3080+
Assert(WalStats.m_wal_buffers_full == 0);
3081+
return;
3082+
}
30963083

30973084
if (!force)
30983085
{
30993086
TimestampTz now = GetCurrentTimestamp();
31003087

31013088
/*
31023089
* Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
3103-
* msec since we last sent one.
3090+
* msec since we last sent one to avoid overloading the stats
3091+
* collector.
31043092
*/
31053093
if (!TimestampDifferenceExceeds(sendTime, now, PGSTAT_STAT_INTERVAL))
3106-
return false;
3094+
return;
31073095
sendTime = now;
31083096
}
31093097

3098+
/*
3099+
* Set the counters related to generated WAL data if the counters were
3100+
* updated.
3101+
*/
3102+
if (pgWalUsage.wal_records != prevWalUsage.wal_records)
3103+
{
3104+
WalUsage walusage;
3105+
3106+
/*
3107+
* Calculate how much WAL usage counters were increased by
3108+
* substracting the previous counters from the current ones. Fill the
3109+
* results in WAL stats message.
3110+
*/
3111+
MemSet(&walusage, 0, sizeof(WalUsage));
3112+
WalUsageAccumDiff(&walusage, &pgWalUsage, &prevWalUsage);
3113+
3114+
WalStats.m_wal_records = walusage.wal_records;
3115+
WalStats.m_wal_fpi = walusage.wal_fpi;
3116+
WalStats.m_wal_bytes = walusage.wal_bytes;
3117+
3118+
/*
3119+
* Save the current counters for the subsequent calculation of WAL
3120+
* usage.
3121+
*/
3122+
prevWalUsage = pgWalUsage;
3123+
}
3124+
31103125
/*
31113126
* Prepare and send the message
31123127
*/
@@ -3117,8 +3132,6 @@ pgstat_send_wal(bool force)
31173132
* Clear out the statistics buffer, so it can be re-used.
31183133
*/
31193134
MemSet(&WalStats, 0, sizeof(WalStats));
3120-
3121-
return true;
31223135
}
31233136

31243137
/* ----------

src/include/executor/instrument.h

+12
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
#include "portability/instr_time.h"
1717

1818

19+
/*
20+
* BufferUsage and WalUsage counters keep being incremented infinitely,
21+
* i.e., must never be reset to zero, so that we can calculate how much
22+
* the counters are incremented in an arbitrary period.
23+
*/
1924
typedef struct BufferUsage
2025
{
2126
int64 shared_blks_hit; /* # of shared buffer hits */
@@ -32,6 +37,13 @@ typedef struct BufferUsage
3237
instr_time blk_write_time; /* time spent writing */
3338
} BufferUsage;
3439

40+
/*
41+
* WalUsage tracks only WAL activity like WAL records generation that
42+
* can be measured per query and is displayed by EXPLAIN command,
43+
* pg_stat_statements extension, etc. It does not track other WAL activity
44+
* like WAL writes that it's not worth measuring per query. That's tracked
45+
* by WAL global statistics counters in WalStats, instead.
46+
*/
3547
typedef struct WalUsage
3648
{
3749
int64 wal_records; /* # of WAL records produced */

src/include/pgstat.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -1091,8 +1091,7 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
10911091

10921092
extern void pgstat_send_archiver(const char *xlog, bool failed);
10931093
extern void pgstat_send_bgwriter(void);
1094-
extern void pgstat_report_wal(void);
1095-
extern bool pgstat_send_wal(bool force);
1094+
extern void pgstat_send_wal(bool force);
10961095

10971096
/* ----------
10981097
* Support functions for the SQL-callable functions to

0 commit comments

Comments
 (0)