Skip to content

Commit ac8d53d

Browse files
committed
Track IO times in pg_stat_io
a9c70b4 and 8aaa04b32S added counting of IO operations to a new view, pg_stat_io. Now, add IO timing for reads, writes, extends, and fsyncs to pg_stat_io as well. This combines the tracking for pgBufferUsage with the tracking for pg_stat_io into a new function pgstat_count_io_op_time(). This should make it a bit easier to avoid the somewhat costly instr_time conversion done for pgBufferUsage. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com> Discussion: https://postgr.es/m/flat/CAAKRu_ay5iKmnbXZ3DsauViF3eMxu4m1oNnJXqV_HyqYeg55Ww%40mail.gmail.com
1 parent 1c453cf commit ac8d53d

File tree

11 files changed

+275
-107
lines changed

11 files changed

+275
-107
lines changed

doc/src/sgml/monitoring.sgml

+59
Original file line numberDiff line numberDiff line change
@@ -3814,6 +3814,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
38143814
</entry>
38153815
</row>
38163816

3817+
<row>
3818+
<entry role="catalog_table_entry">
3819+
<para role="column_definition">
3820+
<structfield>read_time</structfield> <type>double precision</type>
3821+
</para>
3822+
<para>
3823+
Time spent in read operations in milliseconds (if
3824+
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
3825+
</para>
3826+
</entry>
3827+
</row>
3828+
38173829
<row>
38183830
<entry role="catalog_table_entry">
38193831
<para role="column_definition">
@@ -3826,6 +3838,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
38263838
</entry>
38273839
</row>
38283840

3841+
<row>
3842+
<entry role="catalog_table_entry">
3843+
<para role="column_definition">
3844+
<structfield>write_time</structfield> <type>double precision</type>
3845+
</para>
3846+
<para>
3847+
Time spent in write operations in milliseconds (if
3848+
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
3849+
</para>
3850+
</entry>
3851+
</row>
3852+
38293853
<row>
38303854
<entry role="catalog_table_entry">
38313855
<para role="column_definition">
@@ -3838,6 +3862,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
38383862
</entry>
38393863
</row>
38403864

3865+
<row>
3866+
<entry role="catalog_table_entry">
3867+
<para role="column_definition">
3868+
<structfield>extend_time</structfield> <type>double precision</type>
3869+
</para>
3870+
<para>
3871+
Time spent in extend operations in milliseconds (if
3872+
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
3873+
</para>
3874+
</entry>
3875+
</row>
3876+
38413877
<row>
38423878
<entry role="catalog_table_entry">
38433879
<para role="column_definition">
@@ -3913,6 +3949,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
39133949
</entry>
39143950
</row>
39153951

3952+
<row>
3953+
<entry role="catalog_table_entry">
3954+
<para role="column_definition">
3955+
<structfield>fsync_time</structfield> <type>double precision</type>
3956+
</para>
3957+
<para>
3958+
Time spent in fsync operations in milliseconds (if
3959+
<xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
3960+
</para>
3961+
</entry>
3962+
</row>
3963+
39163964
<row>
39173965
<entry role="catalog_table_entry">
39183966
<para role="column_definition">
@@ -3978,6 +4026,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
39784026
</itemizedlist>
39794027
</para>
39804028

4029+
<note>
4030+
<para>
4031+
Columns tracking I/O time will only be non-zero when
4032+
<xref linkend="guc-track-io-timing"/> is enabled. The user should be
4033+
careful when referencing these columns in combination with their
4034+
corresponding IO operations in case <varname>track_io_timing</varname>
4035+
was not enabled for the entire time since the last stats reset.
4036+
</para>
4037+
</note>
4038+
4039+
39814040

39824041
</sect2>
39834042

src/backend/catalog/system_views.sql

+4
Original file line numberDiff line numberDiff line change
@@ -1125,13 +1125,17 @@ SELECT
11251125
b.io_object,
11261126
b.io_context,
11271127
b.reads,
1128+
b.read_time,
11281129
b.writes,
1130+
b.write_time,
11291131
b.extends,
1132+
b.extend_time,
11301133
b.op_bytes,
11311134
b.hits,
11321135
b.evictions,
11331136
b.reuses,
11341137
b.fsyncs,
1138+
b.fsync_time,
11351139
b.stats_reset
11361140
FROM pg_stat_get_io() b;
11371141

src/backend/storage/buffer/bufmgr.c

+19-47
Original file line numberDiff line numberDiff line change
@@ -1112,23 +1112,12 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11121112
MemSet((char *) bufBlock, 0, BLCKSZ);
11131113
else
11141114
{
1115-
instr_time io_start,
1116-
io_time;
1117-
1118-
if (track_io_timing)
1119-
INSTR_TIME_SET_CURRENT(io_start);
1115+
instr_time io_start = pgstat_prepare_io_time();
11201116

11211117
smgrread(smgr, forkNum, blockNum, bufBlock);
11221118

1123-
if (track_io_timing)
1124-
{
1125-
INSTR_TIME_SET_CURRENT(io_time);
1126-
INSTR_TIME_SUBTRACT(io_time, io_start);
1127-
pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
1128-
INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
1129-
}
1130-
1131-
pgstat_count_io_op(io_object, io_context, IOOP_READ);
1119+
pgstat_count_io_op_time(io_object, io_context,
1120+
IOOP_READ, io_start, 1);
11321121

11331122
/* check for garbage data */
11341123
if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
@@ -1837,6 +1826,7 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
18371826
{
18381827
BlockNumber first_block;
18391828
IOContext io_context = IOContextForStrategy(strategy);
1829+
instr_time io_start;
18401830

18411831
LimitAdditionalPins(&extend_by);
18421832

@@ -2044,6 +2034,8 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
20442034
}
20452035
}
20462036

2037+
io_start = pgstat_prepare_io_time();
2038+
20472039
/*
20482040
* Note: if smgzerorextend fails, we will end up with buffers that are
20492041
* allocated but not marked BM_VALID. The next relation extension will
@@ -2066,6 +2058,9 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
20662058
if (!(flags & EB_SKIP_EXTENSION_LOCK))
20672059
UnlockRelationForExtension(eb.rel, ExclusiveLock);
20682060

2061+
pgstat_count_io_op_time(IOOBJECT_RELATION, io_context, IOOP_EXTEND,
2062+
io_start, extend_by);
2063+
20692064
/* Set BM_VALID, terminate IO, and wake up any waiters */
20702065
for (int i = 0; i < extend_by; i++)
20712066
{
@@ -2089,8 +2084,6 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb,
20892084
}
20902085

20912086
pgBufferUsage.shared_blks_written += extend_by;
2092-
pgstat_count_io_op_n(IOOBJECT_RELATION, io_context, IOOP_EXTEND,
2093-
extend_by);
20942087

20952088
*extended_by = extend_by;
20962089

@@ -3344,8 +3337,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
33443337
{
33453338
XLogRecPtr recptr;
33463339
ErrorContextCallback errcallback;
3347-
instr_time io_start,
3348-
io_time;
3340+
instr_time io_start;
33493341
Block bufBlock;
33503342
char *bufToWrite;
33513343
uint32 buf_state;
@@ -3420,10 +3412,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
34203412
*/
34213413
bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
34223414

3423-
if (track_io_timing)
3424-
INSTR_TIME_SET_CURRENT(io_start);
3425-
else
3426-
INSTR_TIME_SET_ZERO(io_start);
3415+
io_start = pgstat_prepare_io_time();
34273416

34283417
/*
34293418
* bufToWrite is either the shared buffer or a copy, as appropriate.
@@ -3452,15 +3441,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
34523441
* When a strategy is not in use, the write can only be a "regular" write
34533442
* of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
34543443
*/
3455-
pgstat_count_io_op(IOOBJECT_RELATION, io_context, IOOP_WRITE);
3456-
3457-
if (track_io_timing)
3458-
{
3459-
INSTR_TIME_SET_CURRENT(io_time);
3460-
INSTR_TIME_SUBTRACT(io_time, io_start);
3461-
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
3462-
INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
3463-
}
3444+
pgstat_count_io_op_time(IOOBJECT_RELATION, io_context,
3445+
IOOP_WRITE, io_start, 1);
34643446

34653447
pgBufferUsage.shared_blks_written++;
34663448

@@ -4062,14 +4044,13 @@ FlushRelationBuffers(Relation rel)
40624044
{
40634045
int i;
40644046
BufferDesc *bufHdr;
4065-
instr_time io_start,
4066-
io_time;
40674047

40684048
if (RelationUsesLocalBuffers(rel))
40694049
{
40704050
for (i = 0; i < NLocBuffer; i++)
40714051
{
40724052
uint32 buf_state;
4053+
instr_time io_start;
40734054

40744055
bufHdr = GetLocalBufferDescriptor(i);
40754056
if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
@@ -4089,30 +4070,21 @@ FlushRelationBuffers(Relation rel)
40894070

40904071
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
40914072

4092-
if (track_io_timing)
4093-
INSTR_TIME_SET_CURRENT(io_start);
4094-
else
4095-
INSTR_TIME_SET_ZERO(io_start);
4073+
io_start = pgstat_prepare_io_time();
40964074

40974075
smgrwrite(RelationGetSmgr(rel),
40984076
BufTagGetForkNum(&bufHdr->tag),
40994077
bufHdr->tag.blockNum,
41004078
localpage,
41014079
false);
41024080

4081+
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION,
4082+
IOCONTEXT_NORMAL, IOOP_WRITE,
4083+
io_start, 1);
4084+
41034085
buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
41044086
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
41054087

4106-
pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
4107-
4108-
if (track_io_timing)
4109-
{
4110-
INSTR_TIME_SET_CURRENT(io_time);
4111-
INSTR_TIME_SUBTRACT(io_time, io_start);
4112-
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
4113-
INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
4114-
}
4115-
41164088
pgBufferUsage.local_blks_written++;
41174089

41184090
/* Pop the error context stack */

src/backend/storage/buffer/localbuf.c

+12-19
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,6 @@ GetLocalVictimBuffer(void)
176176
int trycounter;
177177
uint32 buf_state;
178178
BufferDesc *bufHdr;
179-
instr_time io_start,
180-
io_time;
181179

182180
ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
183181

@@ -233,6 +231,7 @@ GetLocalVictimBuffer(void)
233231
*/
234232
if (buf_state & BM_DIRTY)
235233
{
234+
instr_time io_start;
236235
SMgrRelation oreln;
237236
Page localpage = (char *) LocalBufHdrGetBlock(bufHdr);
238237

@@ -241,10 +240,7 @@ GetLocalVictimBuffer(void)
241240

242241
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
243242

244-
if (track_io_timing)
245-
INSTR_TIME_SET_CURRENT(io_start);
246-
else
247-
INSTR_TIME_SET_ZERO(io_start);
243+
io_start = pgstat_prepare_io_time();
248244

249245
/* And write... */
250246
smgrwrite(oreln,
@@ -253,21 +249,14 @@ GetLocalVictimBuffer(void)
253249
localpage,
254250
false);
255251

252+
/* Temporary table I/O does not use Buffer Access Strategies */
253+
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL,
254+
IOOP_WRITE, io_start, 1);
255+
256256
/* Mark not-dirty now in case we error out below */
257257
buf_state &= ~BM_DIRTY;
258258
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
259259

260-
/* Temporary table I/O does not use Buffer Access Strategies */
261-
pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
262-
263-
if (track_io_timing)
264-
{
265-
INSTR_TIME_SET_CURRENT(io_time);
266-
INSTR_TIME_SUBTRACT(io_time, io_start);
267-
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
268-
INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
269-
}
270-
271260
pgBufferUsage.local_blks_written++;
272261
}
273262

@@ -325,6 +314,7 @@ ExtendBufferedRelLocal(ExtendBufferedWhat eb,
325314
uint32 *extended_by)
326315
{
327316
BlockNumber first_block;
317+
instr_time io_start;
328318

329319
/* Initialize local buffers if first request in this session */
330320
if (LocalBufHash == NULL)
@@ -415,9 +405,14 @@ ExtendBufferedRelLocal(ExtendBufferedWhat eb,
415405
}
416406
}
417407

408+
io_start = pgstat_prepare_io_time();
409+
418410
/* actually extend relation */
419411
smgrzeroextend(eb.smgr, fork, first_block, extend_by, false);
420412

413+
pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_EXTEND,
414+
io_start, extend_by);
415+
421416
for (int i = 0; i < extend_by; i++)
422417
{
423418
Buffer buf = buffers[i];
@@ -434,8 +429,6 @@ ExtendBufferedRelLocal(ExtendBufferedWhat eb,
434429
*extended_by = extend_by;
435430

436431
pgBufferUsage.temp_blks_written += extend_by;
437-
pgstat_count_io_op_n(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_EXTEND,
438-
extend_by);
439432

440433
return first_block;
441434
}

0 commit comments

Comments
 (0)