Skip to content

Commit 6c349d8

Browse files
committed
Re-add GUC track_wal_io_timing
This commit is a rework of 2421e9a, about which Andres Freund has raised some concerns as it is valuable to have both track_io_timing and track_wal_io_timing in some cases, as the WAL write and fsync paths can be a major bottleneck for some workloads. Hence, it can be relevant to not calculate the WAL timings in environments where pg_test_timing performs poorly while capturing some IO data under track_io_timing for the non-WAL IO paths. The opposite can be also true: it should be possible to disable the non-WAL timings and enable the WAL timings (the previous GUC setups allowed this possibility). track_wal_io_timing is added back in this commit, controlling if WAL timings should be calculated in pg_stat_io for the read, fsync and write paths, as done previously with pg_stat_wal. pg_stat_wal previously tracked only the sync and write parts (now removed), read stats is new data tracked in pg_stat_io, all three are aggregated if track_wal_io_timing is enabled. The read part matters during recovery or if a XLogReader is used. Extra note: more control over if the types of timings calculated in pg_stat_io could be done with a GUC that lists pairs of (IOObject,IOOp). Reported-by: Andres Freund <andres@anarazel.de> Author: Bertrand Drouvot <bertranddrouvot.pg@gmail.com> Co-authored-by: Michael Paquier <michael@paquier.xyz> Discussion: https://postgr.es/m/3opf2wh2oljco6ldyqf7ukabw3jijnnhno6fjb4mlu6civ5h24@fcwmhsgmlmzu
1 parent a5cbdeb commit 6c349d8

File tree

15 files changed

+96
-32
lines changed

15 files changed

+96
-32
lines changed

doc/src/sgml/config.sgml

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8378,9 +8378,11 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
83788378
displayed in <link linkend="monitoring-pg-stat-database-view">
83798379
<structname>pg_stat_database</structname></link>,
83808380
<link linkend="monitoring-pg-stat-io-view">
8381-
<structname>pg_stat_io</structname></link>, in the output of the
8381+
<structname>pg_stat_io</structname></link> (if <varname>object</varname>
8382+
is not <literal>wal</literal>), in the output of the
83828383
<link linkend="pg-stat-get-backend-io">
8383-
<function>pg_stat_get_backend_io()</function></link> function, in the
8384+
<function>pg_stat_get_backend_io()</function></link> function (if
8385+
<varname>object</varname> is not <literal>wal</literal>), in the
83848386
output of <xref linkend="sql-explain"/> when the <literal>BUFFERS</literal>
83858387
option is used, in the output of <xref linkend="sql-vacuum"/> when
83868388
the <literal>VERBOSE</literal> option is used, by autovacuum
@@ -8393,6 +8395,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
83938395
</listitem>
83948396
</varlistentry>
83958397

8398+
<varlistentry id="guc-track-wal-io-timing" xreflabel="track_wal_io_timing">
8399+
<term><varname>track_wal_io_timing</varname> (<type>boolean</type>)
8400+
<indexterm>
8401+
<primary><varname>track_wal_io_timing</varname> configuration parameter</primary>
8402+
</indexterm>
8403+
</term>
8404+
<listitem>
8405+
<para>
8406+
Enables timing of WAL I/O calls. This parameter is off by default,
8407+
as it will repeatedly query the operating system for the current time,
8408+
which may cause significant overhead on some platforms.
8409+
You can use the <application>pg_test_timing</application> tool to
8410+
measure the overhead of timing on your system.
8411+
I/O timing information is displayed in
8412+
<link linkend="monitoring-pg-stat-io-view">
8413+
<structname>pg_stat_io</structname></link> for the
8414+
<varname>object</varname> <literal>wal</literal> and in the output of
8415+
the <link linkend="pg-stat-get-backend-io">
8416+
<function>pg_stat_get_backend_io()</function></link> function for the
8417+
<varname>object</varname> <literal>wal</literal>.
8418+
Only superusers and users with the appropriate <literal>SET</literal>
8419+
privilege can change this setting.
8420+
</para>
8421+
</listitem>
8422+
</varlistentry>
8423+
83968424
<varlistentry id="guc-track-functions" xreflabel="track_functions">
83978425
<term><varname>track_functions</varname> (<type>enum</type>)
83988426
<indexterm>

doc/src/sgml/monitoring.sgml

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,11 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
183183
of block read, write, extend, and fsync times.
184184
</para>
185185

186+
<para>
187+
The parameter <xref linkend="guc-track-wal-io-timing"/> enables monitoring
188+
of WAL read, write and fsync times.
189+
</para>
190+
186191
<para>
187192
Normally these parameters are set in <filename>postgresql.conf</filename> so
188193
that they apply to all server processes, but it is possible to turn
@@ -2723,7 +2728,11 @@ description | Waiting for a newly initialized WAL file to reach durable storage
27232728
</para>
27242729
<para>
27252730
Time spent in read operations in milliseconds (if
2726-
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
2731+
<xref linkend="guc-track-io-timing"/> is enabled and
2732+
<varname>object</varname> is not <literal>wal</literal>,
2733+
or if <xref linkend="guc-track-wal-io-timing"/> is enabled
2734+
and <varname>object</varname> is <literal>wal</literal>,
2735+
otherwise zero)
27272736
</para>
27282737
</entry>
27292738
</row>
@@ -2757,7 +2766,11 @@ description | Waiting for a newly initialized WAL file to reach durable storage
27572766
</para>
27582767
<para>
27592768
Time spent in write operations in milliseconds (if
2760-
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
2769+
<xref linkend="guc-track-io-timing"/> is enabled and
2770+
<varname>object</varname> is not <literal>wal</literal>,
2771+
or if <xref linkend="guc-track-wal-io-timing"/> is enabled
2772+
and <varname>object</varname> is <literal>wal</literal>,
2773+
otherwise zero)
27612774
</para>
27622775
</entry>
27632776
</row>
@@ -2816,8 +2829,12 @@ description | Waiting for a newly initialized WAL file to reach durable storage
28162829
<structfield>extend_time</structfield> <type>double precision</type>
28172830
</para>
28182831
<para>
2819-
Time spent in extend operations in milliseconds (if
2820-
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
2832+
Time spent in extend operations in milliseconds. (if
2833+
<xref linkend="guc-track-io-timing"/> is enabled and
2834+
<varname>object</varname> is not <literal>wal</literal>,
2835+
or if <xref linkend="guc-track-wal-io-timing"/> is enabled
2836+
and <varname>object</varname> is <literal>wal</literal>,
2837+
otherwise zero)
28212838
</para>
28222839
</entry>
28232840
</row>
@@ -2887,7 +2904,11 @@ description | Waiting for a newly initialized WAL file to reach durable storage
28872904
</para>
28882905
<para>
28892906
Time spent in fsync operations in milliseconds (if
2890-
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
2907+
<xref linkend="guc-track-io-timing"/> is enabled and
2908+
<varname>object</varname> is not <literal>wal</literal>,
2909+
or if <xref linkend="guc-track-wal-io-timing"/> is enabled
2910+
and <varname>object</varname> is <literal>wal</literal>,
2911+
otherwise zero)
28912912
</para>
28922913
</entry>
28932914
</row>

doc/src/sgml/wal.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,7 @@
808808
<para>
809809
There are two internal functions to write WAL data to disk:
810810
<function>XLogWrite</function> and <function>issue_xlog_fsync</function>.
811-
When <xref linkend="guc-track-io-timing"/> is enabled, the total
811+
When <xref linkend="guc-track-wal-io-timing"/> is enabled, the total
812812
amounts of time <function>XLogWrite</function> writes and
813813
<function>issue_xlog_fsync</function> syncs WAL data to disk are counted as
814814
<varname>write_time</varname> and <varname>fsync_time</varname> in
@@ -828,7 +828,7 @@
828828
<literal>fsync</literal>, or <literal>fsync_writethrough</literal>,
829829
the write operation moves WAL buffers to kernel cache and
830830
<function>issue_xlog_fsync</function> syncs them to disk. Regardless
831-
of the setting of <varname>track_io_timing</varname>, the number
831+
of the setting of <varname>track_wal_io_timing</varname>, the number
832832
of times <function>XLogWrite</function> writes and
833833
<function>issue_xlog_fsync</function> syncs WAL data to disk are also
834834
counted as <varname>writes</varname> and <varname>fsyncs</varname>

src/backend/access/transam/xlog.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ int CommitSiblings = 5; /* # concurrent xacts needed to sleep */
134134
int wal_retrieve_retry_interval = 5000;
135135
int max_slot_wal_keep_size_mb = -1;
136136
int wal_decode_buffer_size = 512 * 1024;
137+
bool track_wal_io_timing = false;
137138

138139
#ifdef WAL_DEBUG
139140
bool XLOG_DEBUG = false;
@@ -2437,7 +2438,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
24372438
/*
24382439
* Measure I/O timing to write WAL data, for pg_stat_io.
24392440
*/
2440-
start = pgstat_prepare_io_time();
2441+
start = pgstat_prepare_io_time(track_wal_io_timing);
24412442

24422443
pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
24432444
written = pg_pwrite(openLogFile, from, nleft, startoffset);
@@ -3248,7 +3249,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
32483249
errmsg("could not create file \"%s\": %m", tmppath)));
32493250

32503251
/* Measure I/O timing when initializing segment */
3251-
io_start = pgstat_prepare_io_time();
3252+
io_start = pgstat_prepare_io_time(track_wal_io_timing);
32523253

32533254
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
32543255
save_errno = 0;
@@ -3310,7 +3311,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
33103311
}
33113312

33123313
/* Measure I/O timing when flushing segment */
3313-
io_start = pgstat_prepare_io_time();
3314+
io_start = pgstat_prepare_io_time(track_wal_io_timing);
33143315

33153316
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
33163317
if (pg_fsync(fd) != 0)
@@ -8744,7 +8745,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
87448745
/*
87458746
* Measure I/O timing to sync the WAL file for pg_stat_io.
87468747
*/
8747-
start = pgstat_prepare_io_time();
8748+
start = pgstat_prepare_io_time(track_wal_io_timing);
87488749

87498750
pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
87508751
switch (wal_sync_method)

src/backend/access/transam/xlogreader.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1557,7 +1557,7 @@ WALRead(XLogReaderState *state,
15571557

15581558
#ifndef FRONTEND
15591559
/* Measure I/O timing when reading segment */
1560-
io_start = pgstat_prepare_io_time();
1560+
io_start = pgstat_prepare_io_time(track_wal_io_timing);
15611561

15621562
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
15631563
#endif

src/backend/access/transam/xlogrecovery.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3401,7 +3401,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
34013401
readOff = targetPageOff;
34023402

34033403
/* Measure I/O timing when reading segment */
3404-
io_start = pgstat_prepare_io_time();
3404+
io_start = pgstat_prepare_io_time(track_wal_io_timing);
34053405

34063406
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
34073407
r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);

src/backend/storage/buffer/bufmgr.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,7 +1509,7 @@ WaitReadBuffers(ReadBuffersOperation *operation)
15091509
io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]);
15101510
}
15111511

1512-
io_start = pgstat_prepare_io_time();
1512+
io_start = pgstat_prepare_io_time(track_io_timing);
15131513
smgrreadv(operation->smgr, forknum, io_first_block, io_pages, io_buffers_len);
15141514
pgstat_count_io_op_time(io_object, io_context, IOOP_READ, io_start,
15151515
1, io_buffers_len * BLCKSZ);
@@ -2402,7 +2402,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
24022402
}
24032403
}
24042404

2405-
io_start = pgstat_prepare_io_time();
2405+
io_start = pgstat_prepare_io_time(track_io_timing);
24062406

24072407
/*
24082408
* Note: if smgrzeroextend fails, we will end up with buffers that are
@@ -3857,7 +3857,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
38573857
*/
38583858
bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
38593859

3860-
io_start = pgstat_prepare_io_time();
3860+
io_start = pgstat_prepare_io_time(track_io_timing);
38613861

38623862
/*
38633863
* bufToWrite is either the shared buffer or a copy, as appropriate.
@@ -4459,7 +4459,7 @@ FlushRelationBuffers(Relation rel)
44594459

44604460
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
44614461

4462-
io_start = pgstat_prepare_io_time();
4462+
io_start = pgstat_prepare_io_time(track_io_timing);
44634463

44644464
smgrwrite(srel,
44654465
BufTagGetForkNum(&bufHdr->tag),
@@ -5905,7 +5905,7 @@ IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
59055905
sort_pending_writebacks(wb_context->pending_writebacks,
59065906
wb_context->nr_pending);
59075907

5908-
io_start = pgstat_prepare_io_time();
5908+
io_start = pgstat_prepare_io_time(track_io_timing);
59095909

59105910
/*
59115911
* Coalesce neighbouring writes, but nothing else. For that we iterate

src/backend/storage/buffer/localbuf.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ GetLocalVictimBuffer(void)
244244

245245
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
246246

247-
io_start = pgstat_prepare_io_time();
247+
io_start = pgstat_prepare_io_time(track_io_timing);
248248

249249
/* And write... */
250250
smgrwrite(oreln,
@@ -414,7 +414,7 @@ ExtendBufferedRelLocal(BufferManagerRelation bmr,
414414
}
415415
}
416416

417-
io_start = pgstat_prepare_io_time();
417+
io_start = pgstat_prepare_io_time(track_io_timing);
418418

419419
/* actually extend relation */
420420
smgrzeroextend(bmr.smgr, fork, first_block, extend_by, false);

src/backend/storage/smgr/md.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,7 +1391,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
13911391
ereport(DEBUG1,
13921392
(errmsg_internal("could not forward fsync request because request queue is full")));
13931393

1394-
io_start = pgstat_prepare_io_time();
1394+
io_start = pgstat_prepare_io_time(track_io_timing);
13951395

13961396
if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
13971397
ereport(data_sync_elevel(ERROR),
@@ -1790,7 +1790,7 @@ mdsyncfiletag(const FileTag *ftag, char *path)
17901790
need_to_close = true;
17911791
}
17921792

1793-
io_start = pgstat_prepare_io_time();
1793+
io_start = pgstat_prepare_io_time(track_io_timing);
17941794

17951795
/* Sync the file. */
17961796
result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);

src/backend/utils/activity/pgstat_backend.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include "postgres.h"
2626

27+
#include "access/xlog.h"
2728
#include "storage/bufmgr.h"
2829
#include "utils/memutils.h"
2930
#include "utils/pgstat_internal.h"
@@ -43,7 +44,7 @@ void
4344
pgstat_count_backend_io_op_time(IOObject io_object, IOContext io_context,
4445
IOOp io_op, instr_time io_time)
4546
{
46-
Assert(track_io_timing);
47+
Assert(track_io_timing || track_wal_io_timing);
4748

4849
if (!pgstat_tracks_backend_bktype(MyBackendType))
4950
return;

src/backend/utils/activity/pgstat_io.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,20 +83,22 @@ pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op,
8383
}
8484

8585
/*
86-
* Initialize the internal timing for an IO operation.
86+
* Initialize the internal timing for an IO operation, depending on an
87+
* IO timing GUC.
8788
*/
8889
instr_time
89-
pgstat_prepare_io_time(void)
90+
pgstat_prepare_io_time(bool track_io_guc)
9091
{
9192
instr_time io_start;
9293

93-
if (track_io_timing)
94+
if (track_io_guc)
9495
INSTR_TIME_SET_CURRENT(io_start);
9596
else
9697
{
9798
/*
98-
* There is no need to set io_start when an IO timing GUC is disabled,
99-
* still initialize it to zero to avoid compiler warnings.
99+
* There is no need to set io_start when an IO timing GUC is disabled.
100+
* Initialize it to zero to avoid compiler warnings and to let
101+
* pgstat_count_io_op_time() know that timings should be ignored.
100102
*/
101103
INSTR_TIME_SET_ZERO(io_start);
102104
}
@@ -119,7 +121,7 @@ void
119121
pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
120122
instr_time start_time, uint32 cnt, uint64 bytes)
121123
{
122-
if (track_io_timing)
124+
if (!INSTR_TIME_IS_ZERO(start_time))
123125
{
124126
instr_time io_time;
125127

src/backend/utils/misc/guc_tables.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,6 +1501,15 @@ struct config_bool ConfigureNamesBool[] =
15011501
false,
15021502
NULL, NULL, NULL
15031503
},
1504+
{
1505+
{"track_wal_io_timing", PGC_SUSET, STATS_CUMULATIVE,
1506+
gettext_noop("Collects timing statistics for WAL I/O activity."),
1507+
NULL
1508+
},
1509+
&track_wal_io_timing,
1510+
false,
1511+
NULL, NULL, NULL
1512+
},
15041513

15051514
{
15061515
{"update_process_title", PGC_SUSET, PROCESS_TITLE,

src/backend/utils/misc/postgresql.conf.sample

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,7 @@
641641
#track_counts = on
642642
#track_cost_delay_timing = off
643643
#track_io_timing = off
644+
#track_wal_io_timing = off
644645
#track_functions = none # none, pl, all
645646
#stats_fetch_consistency = cache # cache, none, snapshot
646647

src/include/access/xlog.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ extern PGDLLIMPORT char *wal_consistency_checking_string;
5454
extern PGDLLIMPORT bool log_checkpoints;
5555
extern PGDLLIMPORT int CommitDelay;
5656
extern PGDLLIMPORT int CommitSiblings;
57+
extern PGDLLIMPORT bool track_wal_io_timing;
5758
extern PGDLLIMPORT int wal_decode_buffer_size;
5859

5960
extern PGDLLIMPORT int CheckPointSegments;

src/include/pgstat.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
564564
BackendType bktype);
565565
extern void pgstat_count_io_op(IOObject io_object, IOContext io_context,
566566
IOOp io_op, uint32 cnt, uint64 bytes);
567-
extern instr_time pgstat_prepare_io_time(void);
567+
extern instr_time pgstat_prepare_io_time(bool track_io_guc);
568568
extern void pgstat_count_io_op_time(IOObject io_object, IOContext io_context,
569569
IOOp io_op, instr_time start_time,
570570
uint32 cnt, uint64 bytes);

0 commit comments

Comments
 (0)