Skip to content

Commit ff99918

Browse files
committed
Track total amounts of times spent writing and syncing WAL data to disk.
This commit adds new GUC track_wal_io_timing. When this is enabled, the total amounts of time XLogWrite writes and issue_xlog_fsync syncs WAL data to disk are counted in pg_stat_wal. This information would be useful to check how much WAL write and sync affect the performance. Enabling track_wal_io_timing will make the server query the operating system for the current time every time WAL is written or synced, which may cause significant overhead on some platforms. To avoid such additional overhead in the server with track_io_timing enabled, this commit introduces track_wal_io_timing as a separate parameter from track_io_timing. Note that WAL write and sync activity by walreceiver has not been tracked yet. This commit makes the server also track the numbers of times XLogWrite writes and issue_xlog_fsync syncs WAL data to disk, in pg_stat_wal, regardless of the setting of track_wal_io_timing. This counters can be used to calculate the WAL write and sync time per request, for example. Bump PGSTAT_FILE_FORMAT_ID. Bump catalog version. Author: Masahiro Ikeda Reviewed-By: Japin Li, Hayato Kuroda, Masahiko Sawada, David Johnston, Fujii Masao Discussion: https://postgr.es/m/0509ad67b585a5b86a83d445dfa75392@oss.nttdata.com
1 parent 9d2d457 commit ff99918

File tree

16 files changed

+285
-29
lines changed

16 files changed

+285
-29
lines changed

doc/src/sgml/config.sgml

+22-1
Original file line numberDiff line numberDiff line change
@@ -7450,7 +7450,7 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
74507450
<listitem>
74517451
<para>
74527452
Enables timing of database I/O calls. This parameter is off by
7453-
default, because it will repeatedly query the operating system for
7453+
default, as it will repeatedly query the operating system for
74547454
the current time, which may cause significant overhead on some
74557455
platforms. You can use the <xref linkend="pgtesttiming"/> tool to
74567456
measure the overhead of timing on your system.
@@ -7464,6 +7464,27 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
74647464
</listitem>
74657465
</varlistentry>
74667466

7467+
<varlistentry id="guc-track-wal-io-timing" xreflabel="track_wal_io_timing">
7468+
<term><varname>track_wal_io_timing</varname> (<type>boolean</type>)
7469+
<indexterm>
7470+
<primary><varname>track_wal_io_timing</varname> configuration parameter</primary>
7471+
</indexterm>
7472+
</term>
7473+
<listitem>
7474+
<para>
7475+
Enables timing of WAL I/O calls. This parameter is off by default,
7476+
as it will repeatedly query the operating system for the current time,
7477+
which may cause significant overhead on some platforms.
7478+
You can use the <application>pg_test_timing</application> tool to
7479+
measure the overhead of timing on your system.
7480+
I/O timing information is
7481+
displayed in <link linkend="monitoring-pg-stat-wal-view">
7482+
<structname>pg_stat_wal</structname></link>. Only superusers can
7483+
change this setting.
7484+
</para>
7485+
</listitem>
7486+
</varlistentry>
7487+
74677488
<varlistentry id="guc-track-functions" xreflabel="track_functions">
74687489
<term><varname>track_functions</varname> (<type>enum</type>)
74697490
<indexterm>

doc/src/sgml/monitoring.sgml

+62
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,11 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
185185
of block read and write times.
186186
</para>
187187

188+
<para>
189+
The parameter <xref linkend="guc-track-wal-io-timing"/> enables monitoring
190+
of WAL write times.
191+
</para>
192+
188193
<para>
189194
Normally these parameters are set in <filename>postgresql.conf</filename> so
190195
that they apply to all server processes, but it is possible to turn
@@ -3477,6 +3482,63 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
34773482
</para></entry>
34783483
</row>
34793484

3485+
<row>
3486+
<entry role="catalog_table_entry"><para role="column_definition">
3487+
<structfield>wal_write</structfield> <type>bigint</type>
3488+
</para>
3489+
<para>
3490+
Number of times WAL buffers were written out to disk via
3491+
<function>XLogWrite</function> request.
3492+
See <xref linkend="wal-configuration"/> for more information about
3493+
the internal WAL function <function>XLogWrite</function>.
3494+
</para></entry>
3495+
</row>
3496+
3497+
<row>
3498+
<entry role="catalog_table_entry"><para role="column_definition">
3499+
<structfield>wal_sync</structfield> <type>bigint</type>
3500+
</para>
3501+
<para>
3502+
Number of times WAL files were synced to disk via
3503+
<function>issue_xlog_fsync</function> request
3504+
(if <xref linkend="guc-fsync"/> is <literal>on</literal> and
3505+
<xref linkend="guc-wal-sync-method"/> is either
3506+
<literal>fdatasync</literal>, <literal>fsync</literal> or
3507+
<literal>fsync_writethrough</literal>, otherwise zero).
3508+
See <xref linkend="wal-configuration"/> for more information about
3509+
the internal WAL function <function>issue_xlog_fsync</function>.
3510+
</para></entry>
3511+
</row>
3512+
3513+
<row>
3514+
<entry role="catalog_table_entry"><para role="column_definition">
3515+
<structfield>wal_write_time</structfield> <type>double precision</type>
3516+
</para>
3517+
<para>
3518+
Total amount of time spent writing WAL buffers to disk via
3519+
<function>XLogWrite</function> request, in milliseconds
3520+
(if <xref linkend="guc-track-wal-io-timing"/> is enabled,
3521+
otherwise zero). This includes the sync time when
3522+
<varname>wal_sync_method</varname> is either
3523+
<literal>open_datasync</literal> or <literal>open_sync</literal>.
3524+
</para></entry>
3525+
</row>
3526+
3527+
<row>
3528+
<entry role="catalog_table_entry"><para role="column_definition">
3529+
<structfield>wal_sync_time</structfield> <type>double precision</type>
3530+
</para>
3531+
<para>
3532+
Total amount of time spent syncing WAL files to disk via
3533+
<function>issue_xlog_fsync</function> request, in milliseconds
3534+
(if <varname>track_wal_io_timing</varname> is enabled,
3535+
<varname>fsync</varname> is <literal>on</literal>, and
3536+
<varname>wal_sync_method</varname> is either
3537+
<literal>fdatasync</literal>, <literal>fsync</literal> or
3538+
<literal>fsync_writethrough</literal>, otherwise zero).
3539+
</para></entry>
3540+
</row>
3541+
34803542
<row>
34813543
<entry role="catalog_table_entry"><para role="column_definition">
34823544
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>

doc/src/sgml/wal.sgml

+29
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,35 @@
767767
<acronym>WAL</acronym> call being logged to the server log. This
768768
option might be replaced by a more general mechanism in the future.
769769
</para>
770+
771+
<para>
772+
There are two internal functions to write WAL data to disk:
773+
<function>XLogWrite</function> and <function>issue_xlog_fsync</function>.
774+
When <xref linkend="guc-track-wal-io-timing"/> is enabled, the total
775+
amounts of time <function>XLogWrite</function> writes and
776+
<function>issue_xlog_fsync</function> syncs WAL data to disk are counted as
777+
<literal>wal_write_time</literal> and <literal>wal_sync_time</literal> in
778+
<xref linkend="pg-stat-wal-view"/>, respectively.
779+
<function>XLogWrite</function> is normally called by
780+
<function>XLogInsertRecord</function> (when there is no space for the new
781+
record in WAL buffers), <function>XLogFlush</function> and the WAL writer,
782+
to write WAL buffers to disk and call <function>issue_xlog_fsync</function>.
783+
<function>issue_xlog_fsync</function> is normally called by
784+
<function>XLogWrite</function> to sync WAL files to disk.
785+
If <varname>wal_sync_method</varname> is either
786+
<literal>open_datasync</literal> or <literal>open_sync</literal>,
787+
a write operation in <function>XLogWrite</function> guarantees to sync written
788+
WAL data to disk and <function>issue_xlog_fsync</function> does nothing.
789+
If <varname>wal_sync_method</varname> is either <literal>fdatasync</literal>,
790+
<literal>fsync</literal>, or <literal>fsync_writethrough</literal>,
791+
the write operation moves WAL buffers to kernel cache and
792+
<function>issue_xlog_fsync</function> syncs them to disk. Regardless
793+
of the setting of <varname>track_wal_io_timing</varname>, the numbers
794+
of times <function>XLogWrite</function> writes and
795+
<function>issue_xlog_fsync</function> syncs WAL data to disk are also
796+
counted as <literal>wal_write</literal> and <literal>wal_sync</literal>
797+
in <structname>pg_stat_wal</structname>, respectively.
798+
</para>
770799
</sect1>
771800

772801
<sect1 id="wal-internals">

src/backend/access/transam/xlog.c

+53-1
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ int CommitDelay = 0; /* precommit delay in microseconds */
110110
int CommitSiblings = 5; /* # concurrent xacts needed to sleep */
111111
int wal_retrieve_retry_interval = 5000;
112112
int max_slot_wal_keep_size_mb = -1;
113+
bool track_wal_io_timing = false;
113114

114115
#ifdef WAL_DEBUG
115116
bool XLOG_DEBUG = false;
@@ -2533,6 +2534,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
25332534
Size nbytes;
25342535
Size nleft;
25352536
int written;
2537+
instr_time start;
25362538

25372539
/* OK to write the page(s) */
25382540
from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
@@ -2541,9 +2543,30 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
25412543
do
25422544
{
25432545
errno = 0;
2546+
2547+
/* Measure I/O timing to write WAL data */
2548+
if (track_wal_io_timing)
2549+
INSTR_TIME_SET_CURRENT(start);
2550+
25442551
pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
25452552
written = pg_pwrite(openLogFile, from, nleft, startoffset);
25462553
pgstat_report_wait_end();
2554+
2555+
/*
2556+
* Increment the I/O timing and the number of times WAL data
2557+
* were written out to disk.
2558+
*/
2559+
if (track_wal_io_timing)
2560+
{
2561+
instr_time duration;
2562+
2563+
INSTR_TIME_SET_CURRENT(duration);
2564+
INSTR_TIME_SUBTRACT(duration, start);
2565+
WalStats.m_wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
2566+
}
2567+
2568+
WalStats.m_wal_write++;
2569+
25472570
if (written <= 0)
25482571
{
25492572
char xlogfname[MAXFNAMELEN];
@@ -10524,6 +10547,20 @@ void
1052410547
issue_xlog_fsync(int fd, XLogSegNo segno)
1052510548
{
1052610549
char *msg = NULL;
10550+
instr_time start;
10551+
10552+
/*
10553+
* Quick exit if fsync is disabled or write() has already synced the WAL
10554+
* file.
10555+
*/
10556+
if (!enableFsync ||
10557+
sync_method == SYNC_METHOD_OPEN ||
10558+
sync_method == SYNC_METHOD_OPEN_DSYNC)
10559+
return;
10560+
10561+
/* Measure I/O timing to sync the WAL file */
10562+
if (track_wal_io_timing)
10563+
INSTR_TIME_SET_CURRENT(start);
1052710564

1052810565
pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
1052910566
switch (sync_method)
@@ -10546,7 +10583,8 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
1054610583
#endif
1054710584
case SYNC_METHOD_OPEN:
1054810585
case SYNC_METHOD_OPEN_DSYNC:
10549-
/* write synced it already */
10586+
/* not reachable */
10587+
Assert(false);
1055010588
break;
1055110589
default:
1055210590
elog(PANIC, "unrecognized wal_sync_method: %d", sync_method);
@@ -10568,6 +10606,20 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
1056810606
}
1056910607

1057010608
pgstat_report_wait_end();
10609+
10610+
/*
10611+
* Increment the I/O timing and the number of times WAL files were synced.
10612+
*/
10613+
if (track_wal_io_timing)
10614+
{
10615+
instr_time duration;
10616+
10617+
INSTR_TIME_SET_CURRENT(duration);
10618+
INSTR_TIME_SUBTRACT(duration, start);
10619+
WalStats.m_wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
10620+
}
10621+
10622+
WalStats.m_wal_sync++;
1057110623
}
1057210624

1057310625
/*

src/backend/catalog/system_views.sql

+4
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,10 @@ CREATE VIEW pg_stat_wal AS
10041004
w.wal_fpi,
10051005
w.wal_bytes,
10061006
w.wal_buffers_full,
1007+
w.wal_write,
1008+
w.wal_sync,
1009+
w.wal_write_time,
1010+
w.wal_sync_time,
10071011
w.stats_reset
10081012
FROM pg_stat_get_wal() w;
10091013

src/backend/postmaster/checkpointer.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ CheckpointerMain(void)
505505
pgstat_send_bgwriter();
506506

507507
/* Send WAL statistics to the stats collector. */
508-
pgstat_send_wal();
508+
pgstat_report_wal();
509509

510510
/*
511511
* If any checkpoint flags have been set, redo the loop to handle the

0 commit comments

Comments
 (0)