Skip to content

Commit df3b181

Browse files
author
Amit Kapila
committed
Add infrastructure to track WAL usage.
This allows gathering the WAL generation statistics for each statement execution. The three statistics that we collect are the number of WAL records, the number of full page writes and the amount of WAL bytes generated. This helps the users who have write-intensive workload to see the impact of I/O due to WAL. This further enables us to see approximately what percentage of overall WAL is due to full page writes. In the future, we can extend this functionality to allow us to compute the the exact amount of WAL data due to full page writes. This patch in itself is just an infrastructure to compute WAL usage data. The upcoming patches will expose this data via explain, auto_explain, pg_stat_statements and verbose (auto)vacuum output. Author: Kirill Bychik, Julien Rouhaud Reviewed-by: Dilip Kumar, Fujii Masao and Amit Kapila Discussion: https://postgr.es/m/CAB-hujrP8ZfUkvL5OYETipQwA=e3n7oqHFU=4ZLxWS_Cza3kQQ@mail.gmail.com
1 parent 0588ee6 commit df3b181

File tree

10 files changed

+182
-32
lines changed

10 files changed

+182
-32
lines changed

src/backend/access/heap/vacuumlazy.c

+28-9
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
#define PARALLEL_VACUUM_KEY_DEAD_TUPLES 2
140140
#define PARALLEL_VACUUM_KEY_QUERY_TEXT 3
141141
#define PARALLEL_VACUUM_KEY_BUFFER_USAGE 4
142+
#define PARALLEL_VACUUM_KEY_WAL_USAGE 5
142143

143144
/*
144145
* Macro to check if we are in a parallel vacuum. If true, we are in the
@@ -275,6 +276,9 @@ typedef struct LVParallelState
275276
/* Points to buffer usage area in DSM */
276277
BufferUsage *buffer_usage;
277278

279+
/* Points to WAL usage area in DSM */
280+
WalUsage *wal_usage;
281+
278282
/*
279283
* The number of indexes that support parallel index bulk-deletion and
280284
* parallel index cleanup respectively.
@@ -2143,8 +2147,8 @@ lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
21432147
vacrelstats->dead_tuples, nindexes, vacrelstats);
21442148

21452149
/*
2146-
* Next, accumulate buffer usage. (This must wait for the workers to
2147-
* finish, or we might get incomplete data.)
2150+
* Next, accumulate buffer and WAL usage. (This must wait for the workers
2151+
* to finish, or we might get incomplete data.)
21482152
*/
21492153
if (nworkers > 0)
21502154
{
@@ -2154,7 +2158,7 @@ lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
21542158
WaitForParallelWorkersToFinish(lps->pcxt);
21552159

21562160
for (i = 0; i < lps->pcxt->nworkers_launched; i++)
2157-
InstrAccumParallelQuery(&lps->buffer_usage[i]);
2161+
InstrAccumParallelQuery(&lps->buffer_usage[i], &lps->wal_usage[i]);
21582162
}
21592163

21602164
/*
@@ -3171,6 +3175,7 @@ begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats,
31713175
LVShared *shared;
31723176
LVDeadTuples *dead_tuples;
31733177
BufferUsage *buffer_usage;
3178+
WalUsage *wal_usage;
31743179
bool *can_parallel_vacuum;
31753180
long maxtuples;
31763181
char *sharedquery;
@@ -3255,15 +3260,19 @@ begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats,
32553260
shm_toc_estimate_keys(&pcxt->estimator, 1);
32563261

32573262
/*
3258-
* Estimate space for BufferUsage -- PARALLEL_VACUUM_KEY_BUFFER_USAGE.
3263+
* Estimate space for BufferUsage and WalUsage --
3264+
* PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
32593265
*
32603266
* If there are no extensions loaded that care, we could skip this. We
3261-
* have no way of knowing whether anyone's looking at pgBufferUsage, so do
3262-
* it unconditionally.
3267+
* have no way of knowing whether anyone's looking at pgBufferUsage or
3268+
* pgWalUsage, so do it unconditionally.
32633269
*/
32643270
shm_toc_estimate_chunk(&pcxt->estimator,
32653271
mul_size(sizeof(BufferUsage), pcxt->nworkers));
32663272
shm_toc_estimate_keys(&pcxt->estimator, 1);
3273+
shm_toc_estimate_chunk(&pcxt->estimator,
3274+
mul_size(sizeof(WalUsage), pcxt->nworkers));
3275+
shm_toc_estimate_keys(&pcxt->estimator, 1);
32673276

32683277
/* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
32693278
querylen = strlen(debug_query_string);
@@ -3299,11 +3308,18 @@ begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats,
32993308
shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, dead_tuples);
33003309
vacrelstats->dead_tuples = dead_tuples;
33013310

3302-
/* Allocate space for each worker's BufferUsage; no need to initialize */
3311+
/*
3312+
* Allocate space for each worker's BufferUsage and WalUsage; no need to
3313+
* initialize
3314+
*/
33033315
buffer_usage = shm_toc_allocate(pcxt->toc,
33043316
mul_size(sizeof(BufferUsage), pcxt->nworkers));
33053317
shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage);
33063318
lps->buffer_usage = buffer_usage;
3319+
wal_usage = shm_toc_allocate(pcxt->toc,
3320+
mul_size(sizeof(WalUsage), pcxt->nworkers));
3321+
shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_WAL_USAGE, wal_usage);
3322+
lps->wal_usage = wal_usage;
33073323

33083324
/* Store query string for workers */
33093325
sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
@@ -3435,6 +3451,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
34353451
LVShared *lvshared;
34363452
LVDeadTuples *dead_tuples;
34373453
BufferUsage *buffer_usage;
3454+
WalUsage *wal_usage;
34383455
int nindexes;
34393456
char *sharedquery;
34403457
IndexBulkDeleteResult **stats;
@@ -3511,9 +3528,11 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
35113528
parallel_vacuum_index(indrels, stats, lvshared, dead_tuples, nindexes,
35123529
&vacrelstats);
35133530

3514-
/* Report buffer usage during parallel execution */
3531+
/* Report buffer/WAL usage during parallel execution */
35153532
buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false);
3516-
InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber]);
3533+
wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false);
3534+
InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber],
3535+
&wal_usage[ParallelWorkerNumber]);
35173536

35183537
/* Pop the error context stack */
35193538
error_context_stack = errcallback.previous;

src/backend/access/nbtree/nbtsort.c

+40
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
#include "access/xloginsert.h"
6868
#include "catalog/index.h"
6969
#include "commands/progress.h"
70+
#include "executor/instrument.h"
7071
#include "miscadmin.h"
7172
#include "pgstat.h"
7273
#include "storage/smgr.h"
@@ -81,6 +82,7 @@
8182
#define PARALLEL_KEY_TUPLESORT UINT64CONST(0xA000000000000002)
8283
#define PARALLEL_KEY_TUPLESORT_SPOOL2 UINT64CONST(0xA000000000000003)
8384
#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000004)
85+
#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xA000000000000005)
8486

8587
/*
8688
* DISABLE_LEADER_PARTICIPATION disables the leader's participation in
@@ -203,6 +205,7 @@ typedef struct BTLeader
203205
Sharedsort *sharedsort;
204206
Sharedsort *sharedsort2;
205207
Snapshot snapshot;
208+
WalUsage *walusage;
206209
} BTLeader;
207210

208211
/*
@@ -1476,6 +1479,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
14761479
Sharedsort *sharedsort2;
14771480
BTSpool *btspool = buildstate->spool;
14781481
BTLeader *btleader = (BTLeader *) palloc0(sizeof(BTLeader));
1482+
WalUsage *walusage;
14791483
bool leaderparticipates = true;
14801484
char *sharedquery;
14811485
int querylen;
@@ -1528,6 +1532,18 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
15281532
shm_toc_estimate_keys(&pcxt->estimator, 3);
15291533
}
15301534

1535+
/*
1536+
* Estimate space for WalUsage -- PARALLEL_KEY_WAL_USAGE
1537+
*
1538+
* WalUsage during execution of maintenance command can be used by an
1539+
* extension that reports the WAL usage, such as pg_stat_statements. We
1540+
* have no way of knowing whether anyone's looking at pgWalUsage, so do it
1541+
* unconditionally.
1542+
*/
1543+
shm_toc_estimate_chunk(&pcxt->estimator,
1544+
mul_size(sizeof(WalUsage), pcxt->nworkers));
1545+
shm_toc_estimate_keys(&pcxt->estimator, 1);
1546+
15311547
/* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
15321548
querylen = strlen(debug_query_string);
15331549
shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
@@ -1599,6 +1615,11 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
15991615
memcpy(sharedquery, debug_query_string, querylen + 1);
16001616
shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
16011617

1618+
/* Allocate space for each worker's WalUsage; no need to initialize */
1619+
walusage = shm_toc_allocate(pcxt->toc,
1620+
mul_size(sizeof(WalUsage), pcxt->nworkers));
1621+
shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
1622+
16021623
/* Launch workers, saving status for leader/caller */
16031624
LaunchParallelWorkers(pcxt);
16041625
btleader->pcxt = pcxt;
@@ -1609,6 +1630,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
16091630
btleader->sharedsort = sharedsort;
16101631
btleader->sharedsort2 = sharedsort2;
16111632
btleader->snapshot = snapshot;
1633+
btleader->walusage = walusage;
16121634

16131635
/* If no workers were successfully launched, back out (do serial build) */
16141636
if (pcxt->nworkers_launched == 0)
@@ -1637,8 +1659,18 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
16371659
static void
16381660
_bt_end_parallel(BTLeader *btleader)
16391661
{
1662+
int i;
1663+
16401664
/* Shutdown worker processes */
16411665
WaitForParallelWorkersToFinish(btleader->pcxt);
1666+
1667+
/*
1668+
* Next, accumulate WAL usage. (This must wait for the workers to finish,
1669+
* or we might get incomplete data.)
1670+
*/
1671+
for (i = 0; i < btleader->pcxt->nworkers_launched; i++)
1672+
InstrAccumParallelQuery(NULL, &btleader->walusage[i]);
1673+
16421674
/* Free last reference to MVCC snapshot, if one was used */
16431675
if (IsMVCCSnapshot(btleader->snapshot))
16441676
UnregisterSnapshot(btleader->snapshot);
@@ -1769,6 +1801,7 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
17691801
Relation indexRel;
17701802
LOCKMODE heapLockmode;
17711803
LOCKMODE indexLockmode;
1804+
WalUsage *walusage;
17721805
int sortmem;
17731806

17741807
#ifdef BTREE_BUILD_STATS
@@ -1830,11 +1863,18 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
18301863
tuplesort_attach_shared(sharedsort2, seg);
18311864
}
18321865

1866+
/* Prepare to track buffer usage during parallel execution */
1867+
InstrStartParallelQuery();
1868+
18331869
/* Perform sorting of spool, and possibly a spool2 */
18341870
sortmem = maintenance_work_mem / btshared->scantuplesortstates;
18351871
_bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
18361872
sharedsort2, sortmem, false);
18371873

1874+
/* Report WAL usage during parallel execution */
1875+
walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
1876+
InstrEndParallelQuery(NULL, &walusage[ParallelWorkerNumber]);
1877+
18381878
#ifdef BTREE_BUILD_STATS
18391879
if (log_btree_build_stats)
18401880
{

src/backend/access/transam/xlog.c

+11-1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "commands/progress.h"
4444
#include "commands/tablespace.h"
4545
#include "common/controldata_utils.h"
46+
#include "executor/instrument.h"
4647
#include "miscadmin.h"
4748
#include "pg_trace.h"
4849
#include "pgstat.h"
@@ -996,7 +997,8 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);
996997
XLogRecPtr
997998
XLogInsertRecord(XLogRecData *rdata,
998999
XLogRecPtr fpw_lsn,
999-
uint8 flags)
1000+
uint8 flags,
1001+
int num_fpw)
10001002
{
10011003
XLogCtlInsert *Insert = &XLogCtl->Insert;
10021004
pg_crc32c rdata_crc;
@@ -1252,6 +1254,14 @@ XLogInsertRecord(XLogRecData *rdata,
12521254
ProcLastRecPtr = StartPos;
12531255
XactLastRecEnd = EndPos;
12541256

1257+
/* Report WAL traffic to the instrumentation. */
1258+
if (inserted)
1259+
{
1260+
pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1261+
pgWalUsage.wal_records++;
1262+
pgWalUsage.wal_num_fpw += num_fpw;
1263+
}
1264+
12551265
return EndPos;
12561266
}
12571267

src/backend/access/transam/xloginsert.c

+9-4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "access/xloginsert.h"
2626
#include "catalog/pg_control.h"
2727
#include "common/pg_lzcompress.h"
28+
#include "executor/instrument.h"
2829
#include "miscadmin.h"
2930
#include "pg_trace.h"
3031
#include "replication/origin.h"
@@ -108,7 +109,7 @@ static MemoryContext xloginsert_cxt;
108109

109110
static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info,
110111
XLogRecPtr RedoRecPtr, bool doPageWrites,
111-
XLogRecPtr *fpw_lsn);
112+
XLogRecPtr *fpw_lsn, int *num_fpw);
112113
static bool XLogCompressBackupBlock(char *page, uint16 hole_offset,
113114
uint16 hole_length, char *dest, uint16 *dlen);
114115

@@ -448,6 +449,7 @@ XLogInsert(RmgrId rmid, uint8 info)
448449
bool doPageWrites;
449450
XLogRecPtr fpw_lsn;
450451
XLogRecData *rdt;
452+
int num_fpw = 0;
451453

452454
/*
453455
* Get values needed to decide whether to do full-page writes. Since
@@ -457,9 +459,9 @@ XLogInsert(RmgrId rmid, uint8 info)
457459
GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
458460

459461
rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites,
460-
&fpw_lsn);
462+
&fpw_lsn, &num_fpw);
461463

462-
EndPos = XLogInsertRecord(rdt, fpw_lsn, curinsert_flags);
464+
EndPos = XLogInsertRecord(rdt, fpw_lsn, curinsert_flags, num_fpw);
463465
} while (EndPos == InvalidXLogRecPtr);
464466

465467
XLogResetInsertion();
@@ -482,7 +484,7 @@ XLogInsert(RmgrId rmid, uint8 info)
482484
static XLogRecData *
483485
XLogRecordAssemble(RmgrId rmid, uint8 info,
484486
XLogRecPtr RedoRecPtr, bool doPageWrites,
485-
XLogRecPtr *fpw_lsn)
487+
XLogRecPtr *fpw_lsn, int *num_fpw)
486488
{
487489
XLogRecData *rdt;
488490
uint32 total_len = 0;
@@ -635,6 +637,9 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
635637
*/
636638
bkpb.fork_flags |= BKPBLOCK_HAS_IMAGE;
637639

640+
/* Report a full page image constructed for the WAL record */
641+
*num_fpw += 1;
642+
638643
/*
639644
* Construct XLogRecData entries for the page content.
640645
*/

0 commit comments

Comments
 (0)