Skip to content

Commit 755a4c1

Browse files
committed
bufmgr/smgr: Don't cross segment boundaries in StartReadBuffers()
With real AIO it doesn't make sense to cross segment boundaries with one IO. Add smgrmaxcombine() to allow upper layers to query which buffers can be merged. We could continue to cross segment boundaries when not using AIO, but it doesn't really make sense, because md.c will never be able to perform the read across the segment boundary in one system call. Which means we'll mark more buffers as undergoing IO than really makes sense - if another backend desires to read the same blocks, it'll be blocked longer than necessary. So it seems better to just never cross the boundary. Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-by: Noah Misch <noah@leadboat.com> Discussion: https://postgr.es/m/1f6b50a7-38ef-4d87-8246-786d39f46ab9@iki.fi
1 parent 488f826 commit 755a4c1

File tree

5 files changed

+65
-0
lines changed

5 files changed

+65
-0
lines changed

src/backend/storage/buffer/bufmgr.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,7 @@ StartReadBuffersImpl(ReadBuffersOperation *operation,
12591259
{
12601260
int actual_nblocks = *nblocks;
12611261
int io_buffers_len = 0;
1262+
int maxcombine = 0;
12621263

12631264
Assert(*nblocks > 0);
12641265
Assert(*nblocks <= MAX_IO_COMBINE_LIMIT);
@@ -1290,6 +1291,23 @@ StartReadBuffersImpl(ReadBuffersOperation *operation,
12901291
{
12911292
/* Extend the readable range to cover this block. */
12921293
io_buffers_len++;
1294+
1295+
/*
1296+
* Check how many blocks we can cover with the same IO. The smgr
1297+
* implementation might e.g. be limited due to a segment boundary.
1298+
*/
1299+
if (i == 0 && actual_nblocks > 1)
1300+
{
1301+
maxcombine = smgrmaxcombine(operation->smgr,
1302+
operation->forknum,
1303+
blockNum);
1304+
if (unlikely(maxcombine < actual_nblocks))
1305+
{
1306+
elog(DEBUG2, "limiting nblocks at %u from %u to %u",
1307+
blockNum, actual_nblocks, maxcombine);
1308+
actual_nblocks = maxcombine;
1309+
}
1310+
}
12931311
}
12941312
}
12951313
*nblocks = actual_nblocks;

src/backend/storage/smgr/md.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,21 @@ buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
803803
return iovcnt;
804804
}
805805

806+
/*
807+
* mdmaxcombine() -- Return the maximum number of total blocks that can be
808+
* combined with an IO starting at blocknum.
809+
*/
810+
uint32
811+
mdmaxcombine(SMgrRelation reln, ForkNumber forknum,
812+
BlockNumber blocknum)
813+
{
814+
BlockNumber segoff;
815+
816+
segoff = blocknum % ((BlockNumber) RELSEG_SIZE);
817+
818+
return RELSEG_SIZE - segoff;
819+
}
820+
806821
/*
807822
* mdreadv() -- Read the specified blocks from a relation.
808823
*/
@@ -833,6 +848,9 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
833848
RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
834849
nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
835850

851+
if (nblocks_this_segment != nblocks)
852+
elog(ERROR, "read crosses segment boundary");
853+
836854
iovcnt = buffers_to_iovec(iov, buffers, nblocks_this_segment);
837855
size_this_segment = nblocks_this_segment * BLCKSZ;
838856
transferred_this_segment = 0;
@@ -956,6 +974,9 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
956974
RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
957975
nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
958976

977+
if (nblocks_this_segment != nblocks)
978+
elog(ERROR, "write crosses segment boundary");
979+
959980
iovcnt = buffers_to_iovec(iov, (void **) buffers, nblocks_this_segment);
960981
size_this_segment = nblocks_this_segment * BLCKSZ;
961982
transferred_this_segment = 0;

src/backend/storage/smgr/smgr.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ typedef struct f_smgr
8888
BlockNumber blocknum, int nblocks, bool skipFsync);
8989
bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
9090
BlockNumber blocknum, int nblocks);
91+
uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum,
92+
BlockNumber blocknum);
9193
void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum,
9294
BlockNumber blocknum,
9395
void **buffers, BlockNumber nblocks);
@@ -117,6 +119,7 @@ static const f_smgr smgrsw[] = {
117119
.smgr_extend = mdextend,
118120
.smgr_zeroextend = mdzeroextend,
119121
.smgr_prefetch = mdprefetch,
122+
.smgr_maxcombine = mdmaxcombine,
120123
.smgr_readv = mdreadv,
121124
.smgr_writev = mdwritev,
122125
.smgr_writeback = mdwriteback,
@@ -588,13 +591,29 @@ smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
588591
return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks);
589592
}
590593

594+
/*
595+
* smgrmaxcombine() - Return the maximum number of total blocks that can be
596+
* combined with an IO starting at blocknum.
597+
*
598+
* The returned value includes the IO for blocknum itself.
599+
*/
600+
uint32
601+
smgrmaxcombine(SMgrRelation reln, ForkNumber forknum,
602+
BlockNumber blocknum)
603+
{
604+
return smgrsw[reln->smgr_which].smgr_maxcombine(reln, forknum, blocknum);
605+
}
606+
591607
/*
592608
* smgrreadv() -- read a particular block range from a relation into the
593609
* supplied buffers.
594610
*
595611
* This routine is called from the buffer manager in order to
596612
* instantiate pages in the shared buffer cache. All storage managers
597613
* return pages in the format that POSTGRES expects.
614+
*
615+
* If more than one block is intended to be read, callers need to use
616+
* smgrmaxcombine() to check how many blocks can be combined into one IO.
598617
*/
599618
void
600619
smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
@@ -626,6 +645,9 @@ smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
626645
* skipFsync indicates that the caller will make other provisions to
627646
* fsync the relation, so we needn't bother. Temporary relations also
628647
* do not require fsync.
648+
*
649+
* If more than one block is intended to be read, callers need to use
650+
* smgrmaxcombine() to check how many blocks can be combined into one IO.
629651
*/
630652
void
631653
smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,

src/include/storage/md.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ extern void mdzeroextend(SMgrRelation reln, ForkNumber forknum,
3232
BlockNumber blocknum, int nblocks, bool skipFsync);
3333
extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum,
3434
BlockNumber blocknum, int nblocks);
35+
extern uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum,
36+
BlockNumber blocknum);
3537
extern void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
3638
void **buffers, BlockNumber nblocks);
3739
extern void mdwritev(SMgrRelation reln, ForkNumber forknum,

src/include/storage/smgr.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ extern void smgrzeroextend(SMgrRelation reln, ForkNumber forknum,
9292
BlockNumber blocknum, int nblocks, bool skipFsync);
9393
extern bool smgrprefetch(SMgrRelation reln, ForkNumber forknum,
9494
BlockNumber blocknum, int nblocks);
95+
extern uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum,
96+
BlockNumber blocknum);
9597
extern void smgrreadv(SMgrRelation reln, ForkNumber forknum,
9698
BlockNumber blocknum,
9799
void **buffers, BlockNumber nblocks);

0 commit comments

Comments
 (0)