Skip to content

Commit ed12700

Browse files
committed
Align buffer descriptors to cache line boundaries.
Benchmarks has shown that aligning the buffer descriptor array to cache lines is important for scalability; especially on bigger, multi-socket, machines. Currently the array sometimes already happens to be aligned by happenstance, depending how large previous shared memory allocations were. That can lead to wildly varying performance results after minor configuration changes. In addition to aligning the start of descriptor array, also force the size of individual descriptors to be of a common cache line size (64 bytes). That happens to already be the case on 64bit platforms, but this way we can change the struct BufferDesc more easily. As the alignment primarily matters in highly concurrent workloads which probably all are 64bit these days, and the space wastage of element alignment would be a bit more noticeable on 32bit systems, we don't force the stride to be cacheline sized on 32bit platforms for now. If somebody does actual performance testing, we can reevaluate that decision by changing the definition of BUFFERDESC_PADDED_SIZE. Discussion: 20140202151319.GD32123@awork2.anarazel.de Per discussion with Bruce Momjan, Tom Lane, Robert Haas, and Peter Geoghegan.
1 parent 7142bfb commit ed12700

File tree

7 files changed

+93
-54
lines changed

7 files changed

+93
-54
lines changed

contrib/pg_buffercache/pg_buffercache_pages.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
7373
if (SRF_IS_FIRSTCALL())
7474
{
7575
int i;
76-
volatile BufferDesc *bufHdr;
7776

7877
funcctx = SRF_FIRSTCALL_INIT();
7978

@@ -146,8 +145,11 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
146145
* Scan though all the buffers, saving the relevant fields in the
147146
* fctx->record structure.
148147
*/
149-
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
148+
for (i = 0; i < NBuffers; i++)
150149
{
150+
volatile BufferDesc *bufHdr;
151+
152+
bufHdr = GetBufferDescriptor(i);
151153
/* Lock each buffer header before inspecting. */
152154
LockBufHdr(bufHdr);
153155

src/backend/storage/buffer/buf_init.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#include "storage/buf_internals.h"
1919

2020

21-
BufferDesc *BufferDescriptors;
21+
BufferDescPadded *BufferDescriptors;
2222
char *BufferBlocks;
2323

2424

@@ -67,9 +67,11 @@ InitBufferPool(void)
6767
bool foundBufs,
6868
foundDescs;
6969

70-
BufferDescriptors = (BufferDesc *)
70+
/* Align descriptors to a cacheline boundary. */
71+
BufferDescriptors = (BufferDescPadded *) CACHELINEALIGN(
7172
ShmemInitStruct("Buffer Descriptors",
72-
NBuffers * sizeof(BufferDesc), &foundDescs);
73+
NBuffers * sizeof(BufferDescPadded) + PG_CACHE_LINE_SIZE,
74+
&foundDescs));
7375

7476
BufferBlocks = (char *)
7577
ShmemInitStruct("Buffer Blocks",
@@ -83,16 +85,15 @@ InitBufferPool(void)
8385
}
8486
else
8587
{
86-
BufferDesc *buf;
8788
int i;
8889

89-
buf = BufferDescriptors;
90-
9190
/*
9291
* Initialize all the buffer headers.
9392
*/
94-
for (i = 0; i < NBuffers; buf++, i++)
93+
for (i = 0; i < NBuffers; i++)
9594
{
95+
BufferDesc *buf = GetBufferDescriptor(i);
96+
9697
CLEAR_BUFFERTAG(buf->tag);
9798
buf->flags = 0;
9899
buf->usage_count = 0;
@@ -114,7 +115,7 @@ InitBufferPool(void)
114115
}
115116

116117
/* Correct last entry of linked list */
117-
BufferDescriptors[NBuffers - 1].freeNext = FREENEXT_END_OF_LIST;
118+
GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST;
118119
}
119120

120121
/* Init other shared buffer-management stuff */
@@ -133,7 +134,9 @@ BufferShmemSize(void)
133134
Size size = 0;
134135

135136
/* size of buffer descriptors */
136-
size = add_size(size, mul_size(NBuffers, sizeof(BufferDesc)));
137+
size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded)));
138+
/* to allow aligning buffer descriptors */
139+
size = add_size(size, PG_CACHE_LINE_SIZE);
137140

138141
/* size of data pages */
139142
size = add_size(size, mul_size(NBuffers, BLCKSZ));

src/backend/storage/buffer/bufmgr.c

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -898,7 +898,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
898898
* buffer pool, and check to see if the correct data has been loaded
899899
* into the buffer.
900900
*/
901-
buf = &BufferDescriptors[buf_id];
901+
buf = GetBufferDescriptor(buf_id);
902902

903903
valid = PinBuffer(buf, strategy);
904904

@@ -1105,7 +1105,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
11051105

11061106
/* remaining code should match code at top of routine */
11071107

1108-
buf = &BufferDescriptors[buf_id];
1108+
buf = GetBufferDescriptor(buf_id);
11091109

11101110
valid = PinBuffer(buf, strategy);
11111111

@@ -1328,7 +1328,7 @@ MarkBufferDirty(Buffer buffer)
13281328
return;
13291329
}
13301330

1331-
bufHdr = &BufferDescriptors[buffer - 1];
1331+
bufHdr = GetBufferDescriptor(buffer - 1);
13321332

13331333
Assert(BufferIsPinned(buffer));
13341334
/* unfortunately we can't check if the lock is held exclusively */
@@ -1380,7 +1380,7 @@ ReleaseAndReadBuffer(Buffer buffer,
13801380
Assert(BufferIsPinned(buffer));
13811381
if (BufferIsLocal(buffer))
13821382
{
1383-
bufHdr = &LocalBufferDescriptors[-buffer - 1];
1383+
bufHdr = GetLocalBufferDescriptor(-buffer - 1);
13841384
if (bufHdr->tag.blockNum == blockNum &&
13851385
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
13861386
bufHdr->tag.forkNum == forkNum)
@@ -1390,7 +1390,7 @@ ReleaseAndReadBuffer(Buffer buffer,
13901390
}
13911391
else
13921392
{
1393-
bufHdr = &BufferDescriptors[buffer - 1];
1393+
bufHdr = GetBufferDescriptor(buffer - 1);
13941394
/* we have pin, so it's ok to examine tag without spinlock */
13951395
if (bufHdr->tag.blockNum == blockNum &&
13961396
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
@@ -1609,7 +1609,7 @@ BufferSync(int flags)
16091609
num_to_write = 0;
16101610
for (buf_id = 0; buf_id < NBuffers; buf_id++)
16111611
{
1612-
volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
1612+
volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
16131613

16141614
/*
16151615
* Header spinlock is enough to examine BM_DIRTY, see comment in
@@ -1644,7 +1644,7 @@ BufferSync(int flags)
16441644
num_written = 0;
16451645
while (num_to_scan-- > 0)
16461646
{
1647-
volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
1647+
volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
16481648

16491649
/*
16501650
* We don't need to acquire the lock here, because we're only looking
@@ -2016,7 +2016,7 @@ BgBufferSync(void)
20162016
static int
20172017
SyncOneBuffer(int buf_id, bool skip_recently_used)
20182018
{
2019-
volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
2019+
volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
20202020
int result = 0;
20212021

20222022
ReservePrivateRefCountEntry();
@@ -2196,13 +2196,13 @@ PrintBufferLeakWarning(Buffer buffer)
21962196
Assert(BufferIsValid(buffer));
21972197
if (BufferIsLocal(buffer))
21982198
{
2199-
buf = &LocalBufferDescriptors[-buffer - 1];
2199+
buf = GetLocalBufferDescriptor(-buffer - 1);
22002200
loccount = LocalRefCount[-buffer - 1];
22012201
backend = MyBackendId;
22022202
}
22032203
else
22042204
{
2205-
buf = &BufferDescriptors[buffer - 1];
2205+
buf = GetBufferDescriptor(buffer - 1);
22062206
loccount = GetPrivateRefCount(buffer);
22072207
backend = InvalidBackendId;
22082208
}
@@ -2265,9 +2265,9 @@ BufferGetBlockNumber(Buffer buffer)
22652265
Assert(BufferIsPinned(buffer));
22662266

22672267
if (BufferIsLocal(buffer))
2268-
bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
2268+
bufHdr = GetLocalBufferDescriptor(-buffer - 1);
22692269
else
2270-
bufHdr = &BufferDescriptors[buffer - 1];
2270+
bufHdr = GetBufferDescriptor(buffer - 1);
22712271

22722272
/* pinned, so OK to read tag without spinlock */
22732273
return bufHdr->tag.blockNum;
@@ -2288,9 +2288,9 @@ BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum,
22882288
Assert(BufferIsPinned(buffer));
22892289

22902290
if (BufferIsLocal(buffer))
2291-
bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
2291+
bufHdr = GetLocalBufferDescriptor(-buffer - 1);
22922292
else
2293-
bufHdr = &BufferDescriptors[buffer - 1];
2293+
bufHdr = GetBufferDescriptor(buffer - 1);
22942294

22952295
/* pinned, so OK to read tag without spinlock */
22962296
*rnode = bufHdr->tag.rnode;
@@ -2473,7 +2473,7 @@ BufferIsPermanent(Buffer buffer)
24732473
* changing an aligned 2-byte BufFlags value is atomic, so we'll read the
24742474
* old value or the new value, but not random garbage.
24752475
*/
2476-
bufHdr = &BufferDescriptors[buffer - 1];
2476+
bufHdr = GetBufferDescriptor(buffer - 1);
24772477
return (bufHdr->flags & BM_PERMANENT) != 0;
24782478
}
24792479

@@ -2486,7 +2486,7 @@ BufferIsPermanent(Buffer buffer)
24862486
XLogRecPtr
24872487
BufferGetLSNAtomic(Buffer buffer)
24882488
{
2489-
volatile BufferDesc *bufHdr = &BufferDescriptors[buffer - 1];
2489+
volatile BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
24902490
char *page = BufferGetPage(buffer);
24912491
XLogRecPtr lsn;
24922492

@@ -2549,7 +2549,7 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
25492549

25502550
for (i = 0; i < NBuffers; i++)
25512551
{
2552-
volatile BufferDesc *bufHdr = &BufferDescriptors[i];
2552+
volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
25532553

25542554
/*
25552555
* We can make this a tad faster by prechecking the buffer tag before
@@ -2639,7 +2639,7 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
26392639
for (i = 0; i < NBuffers; i++)
26402640
{
26412641
RelFileNode *rnode = NULL;
2642-
volatile BufferDesc *bufHdr = &BufferDescriptors[i];
2642+
volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
26432643

26442644
/*
26452645
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2703,7 +2703,7 @@ DropDatabaseBuffers(Oid dbid)
27032703

27042704
for (i = 0; i < NBuffers; i++)
27052705
{
2706-
volatile BufferDesc *bufHdr = &BufferDescriptors[i];
2706+
volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
27072707

27082708
/*
27092709
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2732,10 +2732,11 @@ void
27322732
PrintBufferDescs(void)
27332733
{
27342734
int i;
2735-
volatile BufferDesc *buf = BufferDescriptors;
27362735

2737-
for (i = 0; i < NBuffers; ++i, ++buf)
2736+
for (i = 0; i < NBuffers; ++i)
27382737
{
2738+
volatile BufferDesc *buf = GetBufferDescriptor(i);
2739+
27392740
/* theoretically we should lock the bufhdr here */
27402741
elog(LOG,
27412742
"[%02d] (freeNext=%d, rel=%s, "
@@ -2753,10 +2754,11 @@ void
27532754
PrintPinnedBufs(void)
27542755
{
27552756
int i;
2756-
volatile BufferDesc *buf = BufferDescriptors;
27572757

2758-
for (i = 0; i < NBuffers; ++i, ++buf)
2758+
for (i = 0; i < NBuffers; ++i)
27592759
{
2760+
volatile BufferDesc *buf = GetBufferDescriptor(i);
2761+
27602762
if (GetPrivateRefCount(i + 1) > 0)
27612763
{
27622764
/* theoretically we should lock the bufhdr here */
@@ -2804,7 +2806,7 @@ FlushRelationBuffers(Relation rel)
28042806
{
28052807
for (i = 0; i < NLocBuffer; i++)
28062808
{
2807-
bufHdr = &LocalBufferDescriptors[i];
2809+
bufHdr = GetLocalBufferDescriptor(i);
28082810
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
28092811
(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
28102812
{
@@ -2842,7 +2844,7 @@ FlushRelationBuffers(Relation rel)
28422844

28432845
for (i = 0; i < NBuffers; i++)
28442846
{
2845-
bufHdr = &BufferDescriptors[i];
2847+
bufHdr = GetBufferDescriptor(i);
28462848

28472849
/*
28482850
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2894,7 +2896,7 @@ FlushDatabaseBuffers(Oid dbid)
28942896

28952897
for (i = 0; i < NBuffers; i++)
28962898
{
2897-
bufHdr = &BufferDescriptors[i];
2899+
bufHdr = GetBufferDescriptor(i);
28982900

28992901
/*
29002902
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2938,7 +2940,7 @@ ReleaseBuffer(Buffer buffer)
29382940
return;
29392941
}
29402942

2941-
UnpinBuffer(&BufferDescriptors[buffer - 1], true);
2943+
UnpinBuffer(GetBufferDescriptor(buffer - 1), true);
29422944
}
29432945

29442946
/*
@@ -3007,7 +3009,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
30073009
return;
30083010
}
30093011

3010-
bufHdr = &BufferDescriptors[buffer - 1];
3012+
bufHdr = GetBufferDescriptor(buffer - 1);
30113013

30123014
Assert(GetPrivateRefCount(buffer) > 0);
30133015
/* here, either share or exclusive lock is OK */
@@ -3161,7 +3163,7 @@ LockBuffer(Buffer buffer, int mode)
31613163
if (BufferIsLocal(buffer))
31623164
return; /* local buffers need no lock */
31633165

3164-
buf = &(BufferDescriptors[buffer - 1]);
3166+
buf = GetBufferDescriptor(buffer - 1);
31653167

31663168
if (mode == BUFFER_LOCK_UNLOCK)
31673169
LWLockRelease(buf->content_lock);
@@ -3187,7 +3189,7 @@ ConditionalLockBuffer(Buffer buffer)
31873189
if (BufferIsLocal(buffer))
31883190
return true; /* act as though we got it */
31893191

3190-
buf = &(BufferDescriptors[buffer - 1]);
3192+
buf = GetBufferDescriptor(buffer - 1);
31913193

31923194
return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE);
31933195
}
@@ -3231,7 +3233,7 @@ LockBufferForCleanup(Buffer buffer)
32313233
elog(ERROR, "incorrect local pin count: %d",
32323234
GetPrivateRefCount(buffer));
32333235

3234-
bufHdr = &BufferDescriptors[buffer - 1];
3236+
bufHdr = GetBufferDescriptor(buffer - 1);
32353237

32363238
for (;;)
32373239
{
@@ -3332,7 +3334,7 @@ ConditionalLockBufferForCleanup(Buffer buffer)
33323334
if (!ConditionalLockBuffer(buffer))
33333335
return false;
33343336

3335-
bufHdr = &BufferDescriptors[buffer - 1];
3337+
bufHdr = GetBufferDescriptor(buffer - 1);
33363338
LockBufHdr(bufHdr);
33373339
Assert(bufHdr->refcount > 0);
33383340
if (bufHdr->refcount == 1)

src/backend/storage/buffer/freelist.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
259259
break;
260260
}
261261

262-
buf = &BufferDescriptors[StrategyControl->firstFreeBuffer];
262+
buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer);
263263
Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);
264264

265265
/* Unconditionally remove buffer from freelist */
@@ -296,7 +296,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
296296
for (;;)
297297
{
298298

299-
buf = &BufferDescriptors[ClockSweepTick()];
299+
buf = GetBufferDescriptor(ClockSweepTick());
300300

301301
/*
302302
* If the buffer is pinned or has a nonzero usage_count, we cannot use
@@ -614,7 +614,7 @@ GetBufferFromRing(BufferAccessStrategy strategy)
614614
* higher usage_count indicates someone else has touched the buffer, so we
615615
* shouldn't re-use it.
616616
*/
617-
buf = &BufferDescriptors[bufnum - 1];
617+
buf = GetBufferDescriptor(bufnum - 1);
618618
LockBufHdr(buf);
619619
if (buf->refcount == 0 && buf->usage_count <= 1)
620620
{

0 commit comments

Comments
 (0)