Skip to content

Commit f30d62c

Browse files
committed
pgstat: Track more detailed relation IO statistics
Commit 28e626b introduced the infrastructure for tracking more detailed IO statistics. This commit adds the actual collection of the new IO statistics for relations and temporary relations. See aforementioned commit for goals and high-level design. The changes in this commit are fairly straight-forward. The bulk of the change is to passing sufficient information to the callsites of pgstat_count_io_op(). A somewhat unsightly detail is that it currently is hard to find a better place to count fsyncs than in md.c, whereas the other pgstat_count_io_op() calls are in bufmgr.c/localbuf.c. As the number of fsyncs is tied to md.c implementation details, it's not obvious there is a better answer. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/20200124195226.lth52iydq2n2uilq@alap3.anarazel.de
1 parent 40d0b2d commit f30d62c

File tree

6 files changed

+184
-36
lines changed

6 files changed

+184
-36
lines changed

src/backend/storage/buffer/bufmgr.c

Lines changed: 94 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,9 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
472472
ForkNumber forkNum,
473473
BlockNumber blockNum,
474474
BufferAccessStrategy strategy,
475-
bool *foundPtr);
476-
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
475+
bool *foundPtr, IOContext *io_context);
476+
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln,
477+
IOObject io_object, IOContext io_context);
477478
static void FindAndDropRelationBuffers(RelFileLocator rlocator,
478479
ForkNumber forkNum,
479480
BlockNumber nForkBlock,
@@ -814,6 +815,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
814815
BufferDesc *bufHdr;
815816
Block bufBlock;
816817
bool found;
818+
IOContext io_context;
819+
IOObject io_object;
817820
bool isExtend;
818821
bool isLocalBuf = SmgrIsTemp(smgr);
819822

@@ -846,7 +849,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
846849

847850
if (isLocalBuf)
848851
{
849-
bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
852+
/*
853+
* LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We
854+
* do not use a BufferAccessStrategy for I/O of temporary tables.
855+
* However, in some cases, the "strategy" may not be NULL, so we can't
856+
* rely on IOContextForStrategy() to set the right IOContext for us.
857+
* This may happen in cases like CREATE TEMPORARY TABLE AS...
858+
*/
859+
bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context);
850860
if (found)
851861
pgBufferUsage.local_blks_hit++;
852862
else if (isExtend)
@@ -862,7 +872,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
862872
* not currently in memory.
863873
*/
864874
bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
865-
strategy, &found);
875+
strategy, &found, &io_context);
866876
if (found)
867877
pgBufferUsage.shared_blks_hit++;
868878
else if (isExtend)
@@ -977,7 +987,16 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
977987
*/
978988
Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */
979989

980-
bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
990+
if (isLocalBuf)
991+
{
992+
bufBlock = LocalBufHdrGetBlock(bufHdr);
993+
io_object = IOOBJECT_TEMP_RELATION;
994+
}
995+
else
996+
{
997+
bufBlock = BufHdrGetBlock(bufHdr);
998+
io_object = IOOBJECT_RELATION;
999+
}
9811000

9821001
if (isExtend)
9831002
{
@@ -986,6 +1005,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
9861005
/* don't set checksum for all-zero page */
9871006
smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, false);
9881007

1008+
pgstat_count_io_op(io_object, io_context, IOOP_EXTEND);
1009+
9891010
/*
9901011
* NB: we're *not* doing a ScheduleBufferTagForWriteback here;
9911012
* although we're essentially performing a write. At least on linux
@@ -1013,6 +1034,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
10131034

10141035
smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
10151036

1037+
pgstat_count_io_op(io_object, io_context, IOOP_READ);
1038+
10161039
if (track_io_timing)
10171040
{
10181041
INSTR_TIME_SET_CURRENT(io_time);
@@ -1106,14 +1129,19 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11061129
* *foundPtr is actually redundant with the buffer's BM_VALID flag, but
11071130
* we keep it for simplicity in ReadBuffer.
11081131
*
1132+
* io_context is passed as an output parameter to avoid calling
1133+
* IOContextForStrategy() when there is a shared buffers hit and no IO
1134+
* statistics need be captured.
1135+
*
11091136
* No locks are held either at entry or exit.
11101137
*/
11111138
static BufferDesc *
11121139
BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11131140
BlockNumber blockNum,
11141141
BufferAccessStrategy strategy,
1115-
bool *foundPtr)
1142+
bool *foundPtr, IOContext *io_context)
11161143
{
1144+
bool from_ring;
11171145
BufferTag newTag; /* identity of requested block */
11181146
uint32 newHash; /* hash value for newTag */
11191147
LWLock *newPartitionLock; /* buffer partition lock for it */
@@ -1165,8 +1193,11 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11651193
{
11661194
/*
11671195
* If we get here, previous attempts to read the buffer must
1168-
* have failed ... but we shall bravely try again.
1196+
* have failed ... but we shall bravely try again. Set
1197+
* io_context since we will in fact need to count an IO
1198+
* Operation.
11691199
*/
1200+
*io_context = IOContextForStrategy(strategy);
11701201
*foundPtr = false;
11711202
}
11721203
}
@@ -1180,6 +1211,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11801211
*/
11811212
LWLockRelease(newPartitionLock);
11821213

1214+
*io_context = IOContextForStrategy(strategy);
1215+
11831216
/* Loop here in case we have to try another victim buffer */
11841217
for (;;)
11851218
{
@@ -1193,7 +1226,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11931226
* Select a victim buffer. The buffer is returned with its header
11941227
* spinlock still held!
11951228
*/
1196-
buf = StrategyGetBuffer(strategy, &buf_state);
1229+
buf = StrategyGetBuffer(strategy, &buf_state, &from_ring);
11971230

11981231
Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
11991232

@@ -1247,7 +1280,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
12471280
UnlockBufHdr(buf, buf_state);
12481281

12491282
if (XLogNeedsFlush(lsn) &&
1250-
StrategyRejectBuffer(strategy, buf))
1283+
StrategyRejectBuffer(strategy, buf, from_ring))
12511284
{
12521285
/* Drop lock/pin and loop around for another buffer */
12531286
LWLockRelease(BufferDescriptorGetContentLock(buf));
@@ -1262,7 +1295,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
12621295
smgr->smgr_rlocator.locator.dbOid,
12631296
smgr->smgr_rlocator.locator.relNumber);
12641297

1265-
FlushBuffer(buf, NULL);
1298+
FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context);
12661299
LWLockRelease(BufferDescriptorGetContentLock(buf));
12671300

12681301
ScheduleBufferTagForWriteback(&BackendWritebackContext,
@@ -1443,6 +1476,28 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
14431476

14441477
LWLockRelease(newPartitionLock);
14451478

1479+
if (oldFlags & BM_VALID)
1480+
{
1481+
/*
1482+
* When a BufferAccessStrategy is in use, blocks evicted from shared
1483+
* buffers are counted as IOOP_EVICT in the corresponding context
1484+
* (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
1485+
* strategy in two cases: 1) while initially claiming buffers for the
1486+
* strategy ring 2) to replace an existing strategy ring buffer
1487+
* because it is pinned or in use and cannot be reused.
1488+
*
1489+
* Blocks evicted from buffers already in the strategy ring are
1490+
* counted as IOOP_REUSE in the corresponding strategy context.
1491+
*
1492+
* At this point, we can accurately count evictions and reuses,
1493+
* because we have successfully claimed the valid buffer. Previously,
1494+
* we may have been forced to release the buffer due to concurrent
1495+
* pinners or erroring out.
1496+
*/
1497+
pgstat_count_io_op(IOOBJECT_RELATION, *io_context,
1498+
from_ring ? IOOP_REUSE : IOOP_EVICT);
1499+
}
1500+
14461501
/*
14471502
* Buffer contents are currently invalid. Try to obtain the right to
14481503
* start I/O. If StartBufferIO returns false, then someone else managed
@@ -2563,7 +2618,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
25632618
PinBuffer_Locked(bufHdr);
25642619
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
25652620

2566-
FlushBuffer(bufHdr, NULL);
2621+
FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
25672622

25682623
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
25692624

@@ -2813,7 +2868,8 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum,
28132868
* as the second parameter. If not, pass NULL.
28142869
*/
28152870
static void
2816-
FlushBuffer(BufferDesc *buf, SMgrRelation reln)
2871+
FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
2872+
IOContext io_context)
28172873
{
28182874
XLogRecPtr recptr;
28192875
ErrorContextCallback errcallback;
@@ -2907,6 +2963,26 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
29072963
bufToWrite,
29082964
false);
29092965

2966+
/*
2967+
* When a strategy is in use, only flushes of dirty buffers already in the
2968+
* strategy ring are counted as strategy writes (IOCONTEXT
2969+
* [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
2970+
* statistics tracking.
2971+
*
2972+
* If a shared buffer initially added to the ring must be flushed before
2973+
* being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
2974+
*
2975+
* If a shared buffer which was added to the ring later because the
2976+
* current strategy buffer is pinned or in use or because all strategy
2977+
* buffers were dirty and rejected (for BAS_BULKREAD operations only)
2978+
* requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
2979+
* (from_ring will be false).
2980+
*
2981+
* When a strategy is not in use, the write can only be a "regular" write
2982+
* of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
2983+
*/
2984+
pgstat_count_io_op(IOOBJECT_RELATION, io_context, IOOP_WRITE);
2985+
29102986
if (track_io_timing)
29112987
{
29122988
INSTR_TIME_SET_CURRENT(io_time);
@@ -3549,6 +3625,8 @@ FlushRelationBuffers(Relation rel)
35493625
buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
35503626
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
35513627

3628+
pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
3629+
35523630
/* Pop the error context stack */
35533631
error_context_stack = errcallback.previous;
35543632
}
@@ -3581,7 +3659,7 @@ FlushRelationBuffers(Relation rel)
35813659
{
35823660
PinBuffer_Locked(bufHdr);
35833661
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
3584-
FlushBuffer(bufHdr, RelationGetSmgr(rel));
3662+
FlushBuffer(bufHdr, RelationGetSmgr(rel), IOOBJECT_RELATION, IOCONTEXT_NORMAL);
35853663
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
35863664
UnpinBuffer(bufHdr);
35873665
}
@@ -3679,7 +3757,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
36793757
{
36803758
PinBuffer_Locked(bufHdr);
36813759
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
3682-
FlushBuffer(bufHdr, srelent->srel);
3760+
FlushBuffer(bufHdr, srelent->srel, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
36833761
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
36843762
UnpinBuffer(bufHdr);
36853763
}
@@ -3889,7 +3967,7 @@ FlushDatabaseBuffers(Oid dbid)
38893967
{
38903968
PinBuffer_Locked(bufHdr);
38913969
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
3892-
FlushBuffer(bufHdr, NULL);
3970+
FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
38933971
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
38943972
UnpinBuffer(bufHdr);
38953973
}
@@ -3916,7 +3994,7 @@ FlushOneBuffer(Buffer buffer)
39163994

39173995
Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
39183996

3919-
FlushBuffer(bufHdr, NULL);
3997+
FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
39203998
}
39213999

39224000
/*

0 commit comments

Comments
 (0)