Skip to content

Commit 083d9ce

Browse files
committed
Avoid using potentially-under-aligned page buffers.
There's a project policy against using plain "char buf[BLCKSZ]" local or static variables as page buffers; preferred style is to palloc or malloc each buffer to ensure it is MAXALIGN'd. However, that policy's been ignored in an increasing number of places. We've apparently got away with it so far, probably because (a) relatively few people use platforms on which misalignment causes core dumps and/or (b) the variables chance to be sufficiently aligned anyway. But this is not something to rely on. Moreover, even if we don't get a core dump, we might be paying a lot of cycles for misaligned accesses. To fix, invent new union types PGAlignedBlock and PGAlignedXLogBlock that the compiler must allocate with sufficient alignment, and use those in place of plain char arrays. I used these types even for variables where there's no risk of a misaligned access, since ensuring proper alignment should make kernel data transfers faster. I also changed some places where we had been palloc'ing short-lived buffers, for coding style uniformity and to save palloc/pfree overhead. Since this seems to be a live portability hazard (despite the lack of field reports), back-patch to all supported versions. Patch by me; thanks to Michael Paquier for review. Discussion: https://postgr.es/m/1535618100.1286.3.camel@credativ.de
1 parent 20cd888 commit 083d9ce

File tree

14 files changed

+82
-93
lines changed

14 files changed

+82
-93
lines changed

contrib/pg_prewarm/pg_prewarm.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ typedef enum
3737
PREWARM_BUFFER
3838
} PrewarmType;
3939

40-
static char blockbuffer[BLCKSZ];
40+
static PGAlignedBlock blockbuffer;
4141

4242
/*
4343
* pg_prewarm(regclass, mode text, fork text,
@@ -179,7 +179,7 @@ pg_prewarm(PG_FUNCTION_ARGS)
179179
for (block = first_block; block <= last_block; ++block)
180180
{
181181
CHECK_FOR_INTERRUPTS();
182-
smgrread(rel->rd_smgr, forkNumber, block, blockbuffer);
182+
smgrread(rel->rd_smgr, forkNumber, block, blockbuffer.data);
183183
++blocks_done;
184184
}
185185
}

src/backend/access/gin/ginentrypage.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
633633

634634
/* these must be static so they can be returned to caller */
635635
static ginxlogSplitEntry data;
636-
static char tupstore[2 * BLCKSZ];
636+
static PGAlignedBlock tupstore[2];
637637

638638
entryPreparePage(btree, lpage, off, insertData, updateblkno);
639639

@@ -642,7 +642,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
642642
* one after another in a temporary workspace.
643643
*/
644644
maxoff = PageGetMaxOffsetNumber(lpage);
645-
ptr = tupstore;
645+
ptr = tupstore[0].data;
646646
for (i = FirstOffsetNumber; i <= maxoff; i++)
647647
{
648648
if (i == off)
@@ -667,7 +667,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
667667
ptr += size;
668668
totalsize += size + sizeof(ItemIdData);
669669
}
670-
tupstoresize = ptr - tupstore;
670+
tupstoresize = ptr - tupstore[0].data;
671671

672672
/*
673673
* Initialize the left and right pages, and copy all the tuples back to
@@ -676,7 +676,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
676676
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
677677
GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
678678

679-
ptr = tupstore;
679+
ptr = tupstore[0].data;
680680
maxoff++;
681681
lsize = 0;
682682

@@ -715,7 +715,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
715715
rdata[0].next = &rdata[1];
716716

717717
rdata[1].buffer = InvalidBuffer;
718-
rdata[1].data = tupstore;
718+
rdata[1].data = tupstore[0].data;
719719
rdata[1].len = tupstoresize;
720720
rdata[1].next = NULL;
721721

src/backend/access/gin/ginfast.c

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,18 +52,15 @@ writeListPage(Relation index, Buffer buffer,
5252
size = 0;
5353
OffsetNumber l,
5454
off;
55-
char *workspace;
55+
PGAlignedBlock workspace;
5656
char *ptr;
5757

58-
/* workspace could be a local array; we use palloc for alignment */
59-
workspace = palloc(BLCKSZ);
60-
6158
START_CRIT_SECTION();
6259

6360
GinInitBuffer(buffer, GIN_LIST);
6461

6562
off = FirstOffsetNumber;
66-
ptr = workspace;
63+
ptr = workspace.data;
6764

6865
for (i = 0; i < ntuples; i++)
6966
{
@@ -120,7 +117,7 @@ writeListPage(Relation index, Buffer buffer,
120117
rdata[0].next = rdata + 1;
121118

122119
rdata[1].buffer = InvalidBuffer;
123-
rdata[1].data = workspace;
120+
rdata[1].data = workspace.data;
124121
rdata[1].len = size;
125122
rdata[1].next = NULL;
126123

@@ -135,8 +132,6 @@ writeListPage(Relation index, Buffer buffer,
135132

136133
END_CRIT_SECTION();
137134

138-
pfree(workspace);
139-
140135
return freesize;
141136
}
142137

src/backend/access/hash/hashpage.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ static bool
710710
_hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
711711
{
712712
BlockNumber lastblock;
713-
char zerobuf[BLCKSZ];
713+
PGAlignedBlock zerobuf;
714714

715715
lastblock = firstblock + nblocks - 1;
716716

@@ -721,10 +721,10 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
721721
if (lastblock < firstblock || lastblock == InvalidBlockNumber)
722722
return false;
723723

724-
MemSet(zerobuf, 0, sizeof(zerobuf));
724+
MemSet(zerobuf.data, 0, sizeof(zerobuf));
725725

726726
RelationOpenSmgr(rel);
727-
smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, false);
727+
smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf.data, false);
728728

729729
return true;
730730
}

src/backend/access/heap/heapam.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2318,7 +2318,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
23182318
HeapTuple *heaptuples;
23192319
int i;
23202320
int ndone;
2321-
char *scratch = NULL;
2321+
PGAlignedBlock scratch;
23222322
Page page;
23232323
bool needwal;
23242324
Size saveFreeSpace;
@@ -2335,14 +2335,6 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
23352335
heaptuples[i] = heap_prepare_insert(relation, tuples[i],
23362336
xid, cid, options);
23372337

2338-
/*
2339-
* Allocate some memory to use for constructing the WAL record. Using
2340-
* palloc() within a critical section is not safe, so we allocate this
2341-
* beforehand.
2342-
*/
2343-
if (needwal)
2344-
scratch = palloc(BLCKSZ);
2345-
23462338
/*
23472339
* We're about to do the actual inserts -- but check for conflict first,
23482340
* to minimize the possibility of having to roll back work we've just
@@ -2427,7 +2419,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
24272419
uint8 info = XLOG_HEAP2_MULTI_INSERT;
24282420
char *tupledata;
24292421
int totaldatalen;
2430-
char *scratchptr = scratch;
2422+
char *scratchptr = scratch.data;
24312423
bool init;
24322424

24332425
/*
@@ -2494,10 +2486,10 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
24942486
log_heap_new_cid(relation, heaptup);
24952487
}
24962488
totaldatalen = scratchptr - tupledata;
2497-
Assert((scratchptr - scratch) < BLCKSZ);
2489+
Assert((scratchptr - scratch.data) < BLCKSZ);
24982490

24992491
rdata[0].data = (char *) xlrec;
2500-
rdata[0].len = tupledata - scratch;
2492+
rdata[0].len = tupledata - scratch.data;
25012493
rdata[0].buffer = InvalidBuffer;
25022494
rdata[0].next = &rdata[1];
25032495

src/backend/access/heap/visibilitymap.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -610,10 +610,9 @@ static void
610610
vm_extend(Relation rel, BlockNumber vm_nblocks)
611611
{
612612
BlockNumber vm_nblocks_now;
613-
Page pg;
613+
PGAlignedBlock pg;
614614

615-
pg = (Page) palloc(BLCKSZ);
616-
PageInit(pg, BLCKSZ, 0);
615+
PageInit((Page) pg.data, BLCKSZ, 0);
617616

618617
/*
619618
* We use the relation extension lock to lock out other backends trying to
@@ -644,10 +643,10 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
644643
/* Now extend the file */
645644
while (vm_nblocks_now < vm_nblocks)
646645
{
647-
PageSetChecksumInplace(pg, vm_nblocks_now);
646+
PageSetChecksumInplace((Page) pg.data, vm_nblocks_now);
648647

649648
smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now,
650-
(char *) pg, false);
649+
pg.data, false);
651650
vm_nblocks_now++;
652651
}
653652

@@ -664,6 +663,4 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
664663
rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now;
665664

666665
UnlockRelationForExtension(rel, ExclusiveLock);
667-
668-
pfree(pg);
669666
}

src/backend/access/transam/xlog.c

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3173,8 +3173,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
31733173
{
31743174
char path[MAXPGPATH];
31753175
char tmppath[MAXPGPATH];
3176-
char zbuffer_raw[XLOG_BLCKSZ + MAXIMUM_ALIGNOF];
3177-
char *zbuffer;
3176+
PGAlignedXLogBlock zbuffer;
31783177
XLogSegNo installed_segno;
31793178
int max_advance;
31803179
int fd;
@@ -3228,16 +3227,12 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
32283227
* fsync below) that all the indirect blocks are down on disk. Therefore,
32293228
* fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
32303229
* log file.
3231-
*
3232-
* Note: ensure the buffer is reasonably well-aligned; this may save a few
3233-
* cycles transferring data to the kernel.
32343230
*/
3235-
zbuffer = (char *) MAXALIGN(zbuffer_raw);
3236-
memset(zbuffer, 0, XLOG_BLCKSZ);
3231+
memset(zbuffer.data, 0, XLOG_BLCKSZ);
32373232
for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
32383233
{
32393234
errno = 0;
3240-
if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
3235+
if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
32413236
{
32423237
int save_errno = errno;
32433238

@@ -3328,7 +3323,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno)
33283323
{
33293324
char path[MAXPGPATH];
33303325
char tmppath[MAXPGPATH];
3331-
char buffer[XLOG_BLCKSZ];
3326+
PGAlignedXLogBlock buffer;
33323327
int srcfd;
33333328
int fd;
33343329
int nbytes;
@@ -3364,7 +3359,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno)
33643359
for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(buffer))
33653360
{
33663361
errno = 0;
3367-
if ((int) read(srcfd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
3362+
if ((int) read(srcfd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
33683363
{
33693364
if (errno != 0)
33703365
ereport(ERROR,
@@ -3375,7 +3370,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno)
33753370
(errmsg("not enough data in file \"%s\"", path)));
33763371
}
33773372
errno = 0;
3378-
if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
3373+
if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
33793374
{
33803375
int save_errno = errno;
33813376

@@ -9200,7 +9195,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
92009195
*/
92019196
if (XLogCheckBuffer(rdata, false, &lsn, &bkpb))
92029197
{
9203-
char copied_buffer[BLCKSZ];
9198+
PGAlignedBlock copied_buffer;
92049199
char *origdata = (char *) BufferGetBlock(buffer);
92059200

92069201
/*
@@ -9212,8 +9207,8 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
92129207
* and hole_offset to 0; so the following code is safe for either
92139208
* case.
92149209
*/
9215-
memcpy(copied_buffer, origdata, bkpb.hole_offset);
9216-
memcpy(copied_buffer + bkpb.hole_offset,
9210+
memcpy(copied_buffer.data, origdata, bkpb.hole_offset);
9211+
memcpy(copied_buffer.data + bkpb.hole_offset,
92179212
origdata + bkpb.hole_offset + bkpb.hole_length,
92189213
BLCKSZ - bkpb.hole_offset - bkpb.hole_length);
92199214

@@ -9228,7 +9223,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
92289223
/*
92299224
* Save copy of the buffer.
92309225
*/
9231-
rdata[1].data = copied_buffer;
9226+
rdata[1].data = copied_buffer.data;
92329227
rdata[1].len = BLCKSZ - bkpb.hole_length;
92339228
rdata[1].buffer = InvalidBuffer;
92349229
rdata[1].next = NULL;

src/backend/commands/tablecmds.c

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9486,21 +9486,14 @@ static void
94869486
copy_relation_data(SMgrRelation src, SMgrRelation dst,
94879487
ForkNumber forkNum, char relpersistence)
94889488
{
9489-
char *buf;
9489+
PGAlignedBlock buf;
94909490
Page page;
94919491
bool use_wal;
94929492
bool copying_initfork;
94939493
BlockNumber nblocks;
94949494
BlockNumber blkno;
94959495

9496-
/*
9497-
* palloc the buffer so that it's MAXALIGN'd. If it were just a local
9498-
* char[] array, the compiler might align it on any byte boundary, which
9499-
* can seriously hurt transfer speed to and from the kernel; not to
9500-
* mention possibly making log_newpage's accesses to the page header fail.
9501-
*/
9502-
buf = (char *) palloc(BLCKSZ);
9503-
page = (Page) buf;
9496+
page = (Page) buf.data;
95049497

95059498
/*
95069499
* The init fork for an unlogged relation in many respects has to be
@@ -9524,7 +9517,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
95249517
/* If we got a cancel signal during the copy of the data, quit */
95259518
CHECK_FOR_INTERRUPTS();
95269519

9527-
smgrread(src, forkNum, blkno, buf);
9520+
smgrread(src, forkNum, blkno, buf.data);
95289521

95299522
if (!PageIsVerified(page, blkno))
95309523
ereport(ERROR,
@@ -9550,11 +9543,9 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
95509543
* rel, because there's no need for smgr to schedule an fsync for this
95519544
* write; we'll do it ourselves below.
95529545
*/
9553-
smgrextend(dst, forkNum, blkno, buf, true);
9546+
smgrextend(dst, forkNum, blkno, buf.data, true);
95549547
}
95559548

9556-
pfree(buf);
9557-
95589549
/*
95599550
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
95609551
* to ensure that the toast table gets fsync'd too. (For a temp or

src/backend/replication/walsender.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -494,16 +494,16 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
494494
bytesleft = histfilelen;
495495
while (bytesleft > 0)
496496
{
497-
char rbuf[BLCKSZ];
497+
PGAlignedBlock rbuf;
498498
int nread;
499499

500-
nread = read(fd, rbuf, sizeof(rbuf));
500+
nread = read(fd, rbuf.data, sizeof(rbuf));
501501
if (nread <= 0)
502502
ereport(ERROR,
503503
(errcode_for_file_access(),
504504
errmsg("could not read file \"%s\": %m",
505505
path)));
506-
pq_sendbytes(&buf, rbuf, nread);
506+
pq_sendbytes(&buf, rbuf.data, nread);
507507
bytesleft -= nread;
508508
}
509509
CloseTransientFile(fd);

src/backend/storage/file/buffile.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ struct BufFile
8686
off_t curOffset; /* offset part of current pos */
8787
int pos; /* next read/write position in buffer */
8888
int nbytes; /* total # of valid bytes in buffer */
89-
char buffer[BLCKSZ];
89+
PGAlignedBlock buffer;
9090
};
9191

9292
static BufFile *makeBufFile(File firstfile);
@@ -254,7 +254,7 @@ BufFileLoadBuffer(BufFile *file)
254254
/*
255255
* Read whatever we can get, up to a full bufferload.
256256
*/
257-
file->nbytes = FileRead(thisfile, file->buffer, sizeof(file->buffer));
257+
file->nbytes = FileRead(thisfile, file->buffer.data, sizeof(file->buffer));
258258
if (file->nbytes < 0)
259259
file->nbytes = 0;
260260
file->offsets[file->curFile] += file->nbytes;
@@ -317,7 +317,7 @@ BufFileDumpBuffer(BufFile *file)
317317
return; /* seek failed, give up */
318318
file->offsets[file->curFile] = file->curOffset;
319319
}
320-
bytestowrite = FileWrite(thisfile, file->buffer + wpos, bytestowrite);
320+
bytestowrite = FileWrite(thisfile, file->buffer.data + wpos, bytestowrite);
321321
if (bytestowrite <= 0)
322322
return; /* failed to write */
323323
file->offsets[file->curFile] += bytestowrite;
@@ -385,7 +385,7 @@ BufFileRead(BufFile *file, void *ptr, size_t size)
385385
nthistime = size;
386386
Assert(nthistime > 0);
387387

388-
memcpy(ptr, file->buffer + file->pos, nthistime);
388+
memcpy(ptr, file->buffer.data + file->pos, nthistime);
389389

390390
file->pos += nthistime;
391391
ptr = (void *) ((char *) ptr + nthistime);
@@ -432,7 +432,7 @@ BufFileWrite(BufFile *file, void *ptr, size_t size)
432432
nthistime = size;
433433
Assert(nthistime > 0);
434434

435-
memcpy(file->buffer + file->pos, ptr, nthistime);
435+
memcpy(file->buffer.data + file->pos, ptr, nthistime);
436436

437437
file->dirty = true;
438438
file->pos += nthistime;

0 commit comments

Comments
 (0)