Skip to content

Commit 5af055e

Browse files
committed
Avoid using potentially-under-aligned page buffers.
There's a project policy against using plain "char buf[BLCKSZ]" local or static variables as page buffers; preferred style is to palloc or malloc each buffer to ensure it is MAXALIGN'd. However, that policy's been ignored in an increasing number of places. We've apparently got away with it so far, probably because (a) relatively few people use platforms on which misalignment causes core dumps and/or (b) the variables chance to be sufficiently aligned anyway. But this is not something to rely on. Moreover, even if we don't get a core dump, we might be paying a lot of cycles for misaligned accesses. To fix, invent new union types PGAlignedBlock and PGAlignedXLogBlock that the compiler must allocate with sufficient alignment, and use those in place of plain char arrays. I used these types even for variables where there's no risk of a misaligned access, since ensuring proper alignment should make kernel data transfers faster. I also changed some places where we had been palloc'ing short-lived buffers, for coding style uniformity and to save palloc/pfree overhead. Since this seems to be a live portability hazard (despite the lack of field reports), back-patch to all supported versions. Patch by me; thanks to Michael Paquier for review. Discussion: https://postgr.es/m/1535618100.1286.3.camel@credativ.de
1 parent 65f3940 commit 5af055e

File tree

13 files changed

+79
-90
lines changed

13 files changed

+79
-90
lines changed

src/backend/access/gin/ginentrypage.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -557,15 +557,15 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
557557
/* these must be static so they can be returned to caller */
558558
static XLogRecData rdata[2];
559559
static ginxlogSplit data;
560-
static char tupstore[2 * BLCKSZ];
560+
static PGAlignedBlock tupstore[2];
561561

562562
*prdata = rdata;
563563
data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
564564
InvalidOffsetNumber : GinGetDownlink(btree->entry);
565565
data.updateBlkno = entryPreparePage(btree, lpage, off);
566566

567567
maxoff = PageGetMaxOffsetNumber(lpage);
568-
ptr = tupstore;
568+
ptr = tupstore[0].data;
569569

570570
for (i = FirstOffsetNumber; i <= maxoff; i++)
571571
{
@@ -595,7 +595,7 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
595595
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
596596
GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
597597

598-
ptr = tupstore;
598+
ptr = tupstore[0].data;
599599
maxoff++;
600600
lsize = 0;
601601

@@ -643,7 +643,7 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
643643
rdata[0].next = &rdata[1];
644644

645645
rdata[1].buffer = InvalidBuffer;
646-
rdata[1].data = tupstore;
646+
rdata[1].data = tupstore[0].data;
647647
rdata[1].len = MAXALIGN(totalsize);
648648
rdata[1].next = NULL;
649649

src/backend/access/gin/ginfast.c

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,18 +52,15 @@ writeListPage(Relation index, Buffer buffer,
5252
size = 0;
5353
OffsetNumber l,
5454
off;
55-
char *workspace;
55+
PGAlignedBlock workspace;
5656
char *ptr;
5757

58-
/* workspace could be a local array; we use palloc for alignment */
59-
workspace = palloc(BLCKSZ);
60-
6158
START_CRIT_SECTION();
6259

6360
GinInitBuffer(buffer, GIN_LIST);
6461

6562
off = FirstOffsetNumber;
66-
ptr = workspace;
63+
ptr = workspace.data;
6764

6865
for (i = 0; i < ntuples; i++)
6966
{
@@ -120,7 +117,7 @@ writeListPage(Relation index, Buffer buffer,
120117
rdata[0].next = rdata + 1;
121118

122119
rdata[1].buffer = InvalidBuffer;
123-
rdata[1].data = workspace;
120+
rdata[1].data = workspace.data;
124121
rdata[1].len = size;
125122
rdata[1].next = NULL;
126123

@@ -135,8 +132,6 @@ writeListPage(Relation index, Buffer buffer,
135132

136133
END_CRIT_SECTION();
137134

138-
pfree(workspace);
139-
140135
return freesize;
141136
}
142137

src/backend/access/hash/hashpage.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ static bool
710710
_hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
711711
{
712712
BlockNumber lastblock;
713-
char zerobuf[BLCKSZ];
713+
PGAlignedBlock zerobuf;
714714

715715
lastblock = firstblock + nblocks - 1;
716716

@@ -721,10 +721,10 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
721721
if (lastblock < firstblock || lastblock == InvalidBlockNumber)
722722
return false;
723723

724-
MemSet(zerobuf, 0, sizeof(zerobuf));
724+
MemSet(zerobuf.data, 0, sizeof(zerobuf));
725725

726726
RelationOpenSmgr(rel);
727-
smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, false);
727+
smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf.data, false);
728728

729729
return true;
730730
}

src/backend/access/heap/heapam.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2258,7 +2258,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
22582258
HeapTuple *heaptuples;
22592259
int i;
22602260
int ndone;
2261-
char *scratch = NULL;
2261+
PGAlignedBlock scratch;
22622262
Page page;
22632263
bool needwal;
22642264
Size saveFreeSpace;
@@ -2273,14 +2273,6 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
22732273
heaptuples[i] = heap_prepare_insert(relation, tuples[i],
22742274
xid, cid, options);
22752275

2276-
/*
2277-
* Allocate some memory to use for constructing the WAL record. Using
2278-
* palloc() within a critical section is not safe, so we allocate this
2279-
* beforehand.
2280-
*/
2281-
if (needwal)
2282-
scratch = palloc(BLCKSZ);
2283-
22842276
/*
22852277
* We're about to do the actual inserts -- but check for conflict first,
22862278
* to minimize the possibility of having to roll back work we've just
@@ -2365,7 +2357,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
23652357
uint8 info = XLOG_HEAP2_MULTI_INSERT;
23662358
char *tupledata;
23672359
int totaldatalen;
2368-
char *scratchptr = scratch;
2360+
char *scratchptr = scratch.data;
23692361
bool init;
23702362

23712363
/*
@@ -2425,10 +2417,10 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
24252417
scratchptr += datalen;
24262418
}
24272419
totaldatalen = scratchptr - tupledata;
2428-
Assert((scratchptr - scratch) < BLCKSZ);
2420+
Assert((scratchptr - scratch.data) < BLCKSZ);
24292421

24302422
rdata[0].data = (char *) xlrec;
2431-
rdata[0].len = tupledata - scratch;
2423+
rdata[0].len = tupledata - scratch.data;
24322424
rdata[0].buffer = InvalidBuffer;
24332425
rdata[0].next = &rdata[1];
24342426

src/backend/access/heap/visibilitymap.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -610,10 +610,9 @@ static void
610610
vm_extend(Relation rel, BlockNumber vm_nblocks)
611611
{
612612
BlockNumber vm_nblocks_now;
613-
Page pg;
613+
PGAlignedBlock pg;
614614

615-
pg = (Page) palloc(BLCKSZ);
616-
PageInit(pg, BLCKSZ, 0);
615+
PageInit((Page) pg.data, BLCKSZ, 0);
617616

618617
/*
619618
* We use the relation extension lock to lock out other backends trying to
@@ -644,10 +643,10 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
644643
/* Now extend the file */
645644
while (vm_nblocks_now < vm_nblocks)
646645
{
647-
PageSetChecksumInplace(pg, vm_nblocks_now);
646+
PageSetChecksumInplace((Page) pg.data, vm_nblocks_now);
648647

649648
smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now,
650-
(char *) pg, false);
649+
pg.data, false);
651650
vm_nblocks_now++;
652651
}
653652

@@ -664,6 +663,4 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
664663
rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now;
665664

666665
UnlockRelationForExtension(rel, ExclusiveLock);
667-
668-
pfree(pg);
669666
}

src/backend/access/transam/xlog.c

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2306,8 +2306,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
23062306
{
23072307
char path[MAXPGPATH];
23082308
char tmppath[MAXPGPATH];
2309-
char zbuffer_raw[XLOG_BLCKSZ + MAXIMUM_ALIGNOF];
2310-
char *zbuffer;
2309+
PGAlignedXLogBlock zbuffer;
23112310
XLogSegNo installed_segno;
23122311
int max_advance;
23132312
int fd;
@@ -2361,16 +2360,12 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
23612360
* fsync below) that all the indirect blocks are down on disk. Therefore,
23622361
* fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
23632362
* log file.
2364-
*
2365-
* Note: ensure the buffer is reasonably well-aligned; this may save a few
2366-
* cycles transferring data to the kernel.
23672363
*/
2368-
zbuffer = (char *) MAXALIGN(zbuffer_raw);
2369-
memset(zbuffer, 0, XLOG_BLCKSZ);
2364+
memset(zbuffer.data, 0, XLOG_BLCKSZ);
23702365
for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
23712366
{
23722367
errno = 0;
2373-
if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
2368+
if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
23742369
{
23752370
int save_errno = errno;
23762371

@@ -2461,7 +2456,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno)
24612456
{
24622457
char path[MAXPGPATH];
24632458
char tmppath[MAXPGPATH];
2464-
char buffer[XLOG_BLCKSZ];
2459+
PGAlignedXLogBlock buffer;
24652460
int srcfd;
24662461
int fd;
24672462
int nbytes;
@@ -2497,7 +2492,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno)
24972492
for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(buffer))
24982493
{
24992494
errno = 0;
2500-
if ((int) read(srcfd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
2495+
if ((int) read(srcfd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
25012496
{
25022497
if (errno != 0)
25032498
ereport(ERROR,
@@ -2508,7 +2503,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno)
25082503
(errmsg("not enough data in file \"%s\"", path)));
25092504
}
25102505
errno = 0;
2511-
if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
2506+
if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
25122507
{
25132508
int save_errno = errno;
25142509

@@ -8017,7 +8012,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
80178012
*/
80188013
if (XLogCheckBuffer(rdata, false, &lsn, &bkpb))
80198014
{
8020-
char copied_buffer[BLCKSZ];
8015+
PGAlignedBlock copied_buffer;
80218016
char *origdata = (char *) BufferGetBlock(buffer);
80228017

80238018
/*
@@ -8028,8 +8023,8 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
80288023
* With buffer_std set to false, XLogCheckBuffer() sets hole_length and
80298024
* hole_offset to 0; so the following code is safe for either case.
80308025
*/
8031-
memcpy(copied_buffer, origdata, bkpb.hole_offset);
8032-
memcpy(copied_buffer + bkpb.hole_offset,
8026+
memcpy(copied_buffer.data, origdata, bkpb.hole_offset);
8027+
memcpy(copied_buffer.data + bkpb.hole_offset,
80338028
origdata + bkpb.hole_offset + bkpb.hole_length,
80348029
BLCKSZ - bkpb.hole_offset - bkpb.hole_length);
80358030

@@ -8044,7 +8039,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
80448039
/*
80458040
* Save copy of the buffer.
80468041
*/
8047-
rdata[1].data = copied_buffer;
8042+
rdata[1].data = copied_buffer.data;
80488043
rdata[1].len = BLCKSZ - bkpb.hole_length;
80498044
rdata[1].buffer = InvalidBuffer;
80508045
rdata[1].next = NULL;

src/backend/commands/tablecmds.c

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8983,21 +8983,14 @@ static void
89838983
copy_relation_data(SMgrRelation src, SMgrRelation dst,
89848984
ForkNumber forkNum, char relpersistence)
89858985
{
8986-
char *buf;
8986+
PGAlignedBlock buf;
89878987
Page page;
89888988
bool use_wal;
89898989
bool copying_initfork;
89908990
BlockNumber nblocks;
89918991
BlockNumber blkno;
89928992

8993-
/*
8994-
* palloc the buffer so that it's MAXALIGN'd. If it were just a local
8995-
* char[] array, the compiler might align it on any byte boundary, which
8996-
* can seriously hurt transfer speed to and from the kernel; not to
8997-
* mention possibly making log_newpage's accesses to the page header fail.
8998-
*/
8999-
buf = (char *) palloc(BLCKSZ);
9000-
page = (Page) buf;
8993+
page = (Page) buf.data;
90018994

90028995
/*
90038996
* The init fork for an unlogged relation in many respects has to be
@@ -9021,7 +9014,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
90219014
/* If we got a cancel signal during the copy of the data, quit */
90229015
CHECK_FOR_INTERRUPTS();
90239016

9024-
smgrread(src, forkNum, blkno, buf);
9017+
smgrread(src, forkNum, blkno, buf.data);
90259018

90269019
if (!PageIsVerified(page, blkno))
90279020
ereport(ERROR,
@@ -9043,11 +9036,9 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
90439036
* rel, because there's no need for smgr to schedule an fsync for this
90449037
* write; we'll do it ourselves below.
90459038
*/
9046-
smgrextend(dst, forkNum, blkno, buf, true);
9039+
smgrextend(dst, forkNum, blkno, buf.data, true);
90479040
}
90489041

9049-
pfree(buf);
9050-
90519042
/*
90529043
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
90539044
* to ensure that the toast table gets fsync'd too. (For a temp or

src/backend/replication/walsender.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -381,16 +381,16 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
381381
bytesleft = histfilelen;
382382
while (bytesleft > 0)
383383
{
384-
char rbuf[BLCKSZ];
384+
PGAlignedBlock rbuf;
385385
int nread;
386386

387-
nread = read(fd, rbuf, sizeof(rbuf));
387+
nread = read(fd, rbuf.data, sizeof(rbuf));
388388
if (nread <= 0)
389389
ereport(ERROR,
390390
(errcode_for_file_access(),
391391
errmsg("could not read file \"%s\": %m",
392392
path)));
393-
pq_sendbytes(&buf, rbuf, nread);
393+
pq_sendbytes(&buf, rbuf.data, nread);
394394
bytesleft -= nread;
395395
}
396396
CloseTransientFile(fd);

src/backend/storage/file/buffile.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ struct BufFile
8686
off_t curOffset; /* offset part of current pos */
8787
int pos; /* next read/write position in buffer */
8888
int nbytes; /* total # of valid bytes in buffer */
89-
char buffer[BLCKSZ];
89+
PGAlignedBlock buffer;
9090
};
9191

9292
static BufFile *makeBufFile(File firstfile);
@@ -254,7 +254,7 @@ BufFileLoadBuffer(BufFile *file)
254254
/*
255255
* Read whatever we can get, up to a full bufferload.
256256
*/
257-
file->nbytes = FileRead(thisfile, file->buffer, sizeof(file->buffer));
257+
file->nbytes = FileRead(thisfile, file->buffer.data, sizeof(file->buffer));
258258
if (file->nbytes < 0)
259259
file->nbytes = 0;
260260
file->offsets[file->curFile] += file->nbytes;
@@ -317,7 +317,7 @@ BufFileDumpBuffer(BufFile *file)
317317
return; /* seek failed, give up */
318318
file->offsets[file->curFile] = file->curOffset;
319319
}
320-
bytestowrite = FileWrite(thisfile, file->buffer + wpos, bytestowrite);
320+
bytestowrite = FileWrite(thisfile, file->buffer.data + wpos, bytestowrite);
321321
if (bytestowrite <= 0)
322322
return; /* failed to write */
323323
file->offsets[file->curFile] += bytestowrite;
@@ -385,7 +385,7 @@ BufFileRead(BufFile *file, void *ptr, size_t size)
385385
nthistime = size;
386386
Assert(nthistime > 0);
387387

388-
memcpy(ptr, file->buffer + file->pos, nthistime);
388+
memcpy(ptr, file->buffer.data + file->pos, nthistime);
389389

390390
file->pos += nthistime;
391391
ptr = (void *) ((char *) ptr + nthistime);
@@ -432,7 +432,7 @@ BufFileWrite(BufFile *file, void *ptr, size_t size)
432432
nthistime = size;
433433
Assert(nthistime > 0);
434434

435-
memcpy(file->buffer + file->pos, ptr, nthistime);
435+
memcpy(file->buffer.data + file->pos, ptr, nthistime);
436436

437437
file->dirty = true;
438438
file->pos += nthistime;

0 commit comments

Comments
 (0)