@@ -472,8 +472,9 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
472
472
ForkNumber forkNum ,
473
473
BlockNumber blockNum ,
474
474
BufferAccessStrategy strategy ,
475
- bool * foundPtr );
476
- static void FlushBuffer (BufferDesc * buf , SMgrRelation reln );
475
+ bool * foundPtr , IOContext * io_context );
476
+ static void FlushBuffer (BufferDesc * buf , SMgrRelation reln ,
477
+ IOObject io_object , IOContext io_context );
477
478
static void FindAndDropRelationBuffers (RelFileLocator rlocator ,
478
479
ForkNumber forkNum ,
479
480
BlockNumber nForkBlock ,
@@ -814,6 +815,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
814
815
BufferDesc * bufHdr ;
815
816
Block bufBlock ;
816
817
bool found ;
818
+ IOContext io_context ;
819
+ IOObject io_object ;
817
820
bool isExtend ;
818
821
bool isLocalBuf = SmgrIsTemp (smgr );
819
822
@@ -846,7 +849,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
846
849
847
850
if (isLocalBuf )
848
851
{
849
- bufHdr = LocalBufferAlloc (smgr , forkNum , blockNum , & found );
852
+ /*
853
+ * LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We
854
+ * do not use a BufferAccessStrategy for I/O of temporary tables.
855
+ * However, in some cases, the "strategy" may not be NULL, so we can't
856
+ * rely on IOContextForStrategy() to set the right IOContext for us.
857
+ * This may happen in cases like CREATE TEMPORARY TABLE AS...
858
+ */
859
+ bufHdr = LocalBufferAlloc (smgr , forkNum , blockNum , & found , & io_context );
850
860
if (found )
851
861
pgBufferUsage .local_blks_hit ++ ;
852
862
else if (isExtend )
@@ -862,7 +872,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
862
872
* not currently in memory.
863
873
*/
864
874
bufHdr = BufferAlloc (smgr , relpersistence , forkNum , blockNum ,
865
- strategy , & found );
875
+ strategy , & found , & io_context );
866
876
if (found )
867
877
pgBufferUsage .shared_blks_hit ++ ;
868
878
else if (isExtend )
@@ -977,7 +987,16 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
977
987
*/
978
988
Assert (!(pg_atomic_read_u32 (& bufHdr -> state ) & BM_VALID )); /* spinlock not needed */
979
989
980
- bufBlock = isLocalBuf ? LocalBufHdrGetBlock (bufHdr ) : BufHdrGetBlock (bufHdr );
990
+ if (isLocalBuf )
991
+ {
992
+ bufBlock = LocalBufHdrGetBlock (bufHdr );
993
+ io_object = IOOBJECT_TEMP_RELATION ;
994
+ }
995
+ else
996
+ {
997
+ bufBlock = BufHdrGetBlock (bufHdr );
998
+ io_object = IOOBJECT_RELATION ;
999
+ }
981
1000
982
1001
if (isExtend )
983
1002
{
@@ -986,6 +1005,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
986
1005
/* don't set checksum for all-zero page */
987
1006
smgrextend (smgr , forkNum , blockNum , (char * ) bufBlock , false);
988
1007
1008
+ pgstat_count_io_op (io_object , io_context , IOOP_EXTEND );
1009
+
989
1010
/*
990
1011
* NB: we're *not* doing a ScheduleBufferTagForWriteback here;
991
1012
* although we're essentially performing a write. At least on linux
@@ -1013,6 +1034,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
1013
1034
1014
1035
smgrread (smgr , forkNum , blockNum , (char * ) bufBlock );
1015
1036
1037
+ pgstat_count_io_op (io_object , io_context , IOOP_READ );
1038
+
1016
1039
if (track_io_timing )
1017
1040
{
1018
1041
INSTR_TIME_SET_CURRENT (io_time );
@@ -1106,14 +1129,19 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
1106
1129
* *foundPtr is actually redundant with the buffer's BM_VALID flag, but
1107
1130
* we keep it for simplicity in ReadBuffer.
1108
1131
*
1132
+ * io_context is passed as an output parameter to avoid calling
1133
+ * IOContextForStrategy() when there is a shared buffers hit and no IO
1134
+ * statistics need be captured.
1135
+ *
1109
1136
* No locks are held either at entry or exit.
1110
1137
*/
1111
1138
static BufferDesc *
1112
1139
BufferAlloc (SMgrRelation smgr , char relpersistence , ForkNumber forkNum ,
1113
1140
BlockNumber blockNum ,
1114
1141
BufferAccessStrategy strategy ,
1115
- bool * foundPtr )
1142
+ bool * foundPtr , IOContext * io_context )
1116
1143
{
1144
+ bool from_ring ;
1117
1145
BufferTag newTag ; /* identity of requested block */
1118
1146
uint32 newHash ; /* hash value for newTag */
1119
1147
LWLock * newPartitionLock ; /* buffer partition lock for it */
@@ -1165,8 +1193,11 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
1165
1193
{
1166
1194
/*
1167
1195
* If we get here, previous attempts to read the buffer must
1168
- * have failed ... but we shall bravely try again.
1196
+ * have failed ... but we shall bravely try again. Set
1197
+ * io_context since we will in fact need to count an IO
1198
+ * Operation.
1169
1199
*/
1200
+ * io_context = IOContextForStrategy (strategy );
1170
1201
* foundPtr = false;
1171
1202
}
1172
1203
}
@@ -1180,6 +1211,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
1180
1211
*/
1181
1212
LWLockRelease (newPartitionLock );
1182
1213
1214
+ * io_context = IOContextForStrategy (strategy );
1215
+
1183
1216
/* Loop here in case we have to try another victim buffer */
1184
1217
for (;;)
1185
1218
{
@@ -1193,7 +1226,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
1193
1226
* Select a victim buffer. The buffer is returned with its header
1194
1227
* spinlock still held!
1195
1228
*/
1196
- buf = StrategyGetBuffer (strategy , & buf_state );
1229
+ buf = StrategyGetBuffer (strategy , & buf_state , & from_ring );
1197
1230
1198
1231
Assert (BUF_STATE_GET_REFCOUNT (buf_state ) == 0 );
1199
1232
@@ -1247,7 +1280,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
1247
1280
UnlockBufHdr (buf , buf_state );
1248
1281
1249
1282
if (XLogNeedsFlush (lsn ) &&
1250
- StrategyRejectBuffer (strategy , buf ))
1283
+ StrategyRejectBuffer (strategy , buf , from_ring ))
1251
1284
{
1252
1285
/* Drop lock/pin and loop around for another buffer */
1253
1286
LWLockRelease (BufferDescriptorGetContentLock (buf ));
@@ -1262,7 +1295,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
1262
1295
smgr -> smgr_rlocator .locator .dbOid ,
1263
1296
smgr -> smgr_rlocator .locator .relNumber );
1264
1297
1265
- FlushBuffer (buf , NULL );
1298
+ FlushBuffer (buf , NULL , IOOBJECT_RELATION , * io_context );
1266
1299
LWLockRelease (BufferDescriptorGetContentLock (buf ));
1267
1300
1268
1301
ScheduleBufferTagForWriteback (& BackendWritebackContext ,
@@ -1443,6 +1476,28 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
1443
1476
1444
1477
LWLockRelease (newPartitionLock );
1445
1478
1479
+ if (oldFlags & BM_VALID )
1480
+ {
1481
+ /*
1482
+ * When a BufferAccessStrategy is in use, blocks evicted from shared
1483
+ * buffers are counted as IOOP_EVICT in the corresponding context
1484
+ * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
1485
+ * strategy in two cases: 1) while initially claiming buffers for the
1486
+ * strategy ring 2) to replace an existing strategy ring buffer
1487
+ * because it is pinned or in use and cannot be reused.
1488
+ *
1489
+ * Blocks evicted from buffers already in the strategy ring are
1490
+ * counted as IOOP_REUSE in the corresponding strategy context.
1491
+ *
1492
+ * At this point, we can accurately count evictions and reuses,
1493
+ * because we have successfully claimed the valid buffer. Previously,
1494
+ * we may have been forced to release the buffer due to concurrent
1495
+ * pinners or erroring out.
1496
+ */
1497
+ pgstat_count_io_op (IOOBJECT_RELATION , * io_context ,
1498
+ from_ring ? IOOP_REUSE : IOOP_EVICT );
1499
+ }
1500
+
1446
1501
/*
1447
1502
* Buffer contents are currently invalid. Try to obtain the right to
1448
1503
* start I/O. If StartBufferIO returns false, then someone else managed
@@ -2563,7 +2618,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
2563
2618
PinBuffer_Locked (bufHdr );
2564
2619
LWLockAcquire (BufferDescriptorGetContentLock (bufHdr ), LW_SHARED );
2565
2620
2566
- FlushBuffer (bufHdr , NULL );
2621
+ FlushBuffer (bufHdr , NULL , IOOBJECT_RELATION , IOCONTEXT_NORMAL );
2567
2622
2568
2623
LWLockRelease (BufferDescriptorGetContentLock (bufHdr ));
2569
2624
@@ -2813,7 +2868,8 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum,
2813
2868
* as the second parameter. If not, pass NULL.
2814
2869
*/
2815
2870
static void
2816
- FlushBuffer (BufferDesc * buf , SMgrRelation reln )
2871
+ FlushBuffer (BufferDesc * buf , SMgrRelation reln , IOObject io_object ,
2872
+ IOContext io_context )
2817
2873
{
2818
2874
XLogRecPtr recptr ;
2819
2875
ErrorContextCallback errcallback ;
@@ -2907,6 +2963,26 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
2907
2963
bufToWrite ,
2908
2964
false);
2909
2965
2966
+ /*
2967
+ * When a strategy is in use, only flushes of dirty buffers already in the
2968
+ * strategy ring are counted as strategy writes (IOCONTEXT
2969
+ * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
2970
+ * statistics tracking.
2971
+ *
2972
+ * If a shared buffer initially added to the ring must be flushed before
2973
+ * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
2974
+ *
2975
+ * If a shared buffer which was added to the ring later because the
2976
+ * current strategy buffer is pinned or in use or because all strategy
2977
+ * buffers were dirty and rejected (for BAS_BULKREAD operations only)
2978
+ * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
2979
+ * (from_ring will be false).
2980
+ *
2981
+ * When a strategy is not in use, the write can only be a "regular" write
2982
+ * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
2983
+ */
2984
+ pgstat_count_io_op (IOOBJECT_RELATION , io_context , IOOP_WRITE );
2985
+
2910
2986
if (track_io_timing )
2911
2987
{
2912
2988
INSTR_TIME_SET_CURRENT (io_time );
@@ -3549,6 +3625,8 @@ FlushRelationBuffers(Relation rel)
3549
3625
buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED );
3550
3626
pg_atomic_unlocked_write_u32 (& bufHdr -> state , buf_state );
3551
3627
3628
+ pgstat_count_io_op (IOOBJECT_TEMP_RELATION , IOCONTEXT_NORMAL , IOOP_WRITE );
3629
+
3552
3630
/* Pop the error context stack */
3553
3631
error_context_stack = errcallback .previous ;
3554
3632
}
@@ -3581,7 +3659,7 @@ FlushRelationBuffers(Relation rel)
3581
3659
{
3582
3660
PinBuffer_Locked (bufHdr );
3583
3661
LWLockAcquire (BufferDescriptorGetContentLock (bufHdr ), LW_SHARED );
3584
- FlushBuffer (bufHdr , RelationGetSmgr (rel ));
3662
+ FlushBuffer (bufHdr , RelationGetSmgr (rel ), IOOBJECT_RELATION , IOCONTEXT_NORMAL );
3585
3663
LWLockRelease (BufferDescriptorGetContentLock (bufHdr ));
3586
3664
UnpinBuffer (bufHdr );
3587
3665
}
@@ -3679,7 +3757,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
3679
3757
{
3680
3758
PinBuffer_Locked (bufHdr );
3681
3759
LWLockAcquire (BufferDescriptorGetContentLock (bufHdr ), LW_SHARED );
3682
- FlushBuffer (bufHdr , srelent -> srel );
3760
+ FlushBuffer (bufHdr , srelent -> srel , IOOBJECT_RELATION , IOCONTEXT_NORMAL );
3683
3761
LWLockRelease (BufferDescriptorGetContentLock (bufHdr ));
3684
3762
UnpinBuffer (bufHdr );
3685
3763
}
@@ -3889,7 +3967,7 @@ FlushDatabaseBuffers(Oid dbid)
3889
3967
{
3890
3968
PinBuffer_Locked (bufHdr );
3891
3969
LWLockAcquire (BufferDescriptorGetContentLock (bufHdr ), LW_SHARED );
3892
- FlushBuffer (bufHdr , NULL );
3970
+ FlushBuffer (bufHdr , NULL , IOOBJECT_RELATION , IOCONTEXT_NORMAL );
3893
3971
LWLockRelease (BufferDescriptorGetContentLock (bufHdr ));
3894
3972
UnpinBuffer (bufHdr );
3895
3973
}
@@ -3916,7 +3994,7 @@ FlushOneBuffer(Buffer buffer)
3916
3994
3917
3995
Assert (LWLockHeldByMe (BufferDescriptorGetContentLock (bufHdr )));
3918
3996
3919
- FlushBuffer (bufHdr , NULL );
3997
+ FlushBuffer (bufHdr , NULL , IOOBJECT_RELATION , IOCONTEXT_NORMAL );
3920
3998
}
3921
3999
3922
4000
/*
0 commit comments