Skip to content

Commit 10b9af3

Browse files
committed
Avoid using potentially-under-aligned page buffers.
There's a project policy against using plain "char buf[BLCKSZ]" local or static variables as page buffers; preferred style is to palloc or malloc each buffer to ensure it is MAXALIGN'd. However, that policy's been ignored in an increasing number of places. We've apparently got away with it so far, probably because (a) relatively few people use platforms on which misalignment causes core dumps and/or (b) the variables chance to be sufficiently aligned anyway. But this is not something to rely on. Moreover, even if we don't get a core dump, we might be paying a lot of cycles for misaligned accesses. To fix, invent new union types PGAlignedBlock and PGAlignedXLogBlock that the compiler must allocate with sufficient alignment, and use those in place of plain char arrays. I used these types even for variables where there's no risk of a misaligned access, since ensuring proper alignment should make kernel data transfers faster. I also changed some places where we had been palloc'ing short-lived buffers, for coding style uniformity and to save palloc/pfree overhead. Since this seems to be a live portability hazard (despite the lack of field reports), back-patch to all supported versions. Patch by me; thanks to Michael Paquier for review. Discussion: https://postgr.es/m/1535618100.1286.3.camel@credativ.de
1 parent 1664c8b commit 10b9af3

File tree

21 files changed

+129
-153
lines changed

21 files changed

+129
-153
lines changed

contrib/bloom/blinsert.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ typedef struct
3636
int64 indtuples; /* total number of tuples indexed */
3737
MemoryContext tmpCtx; /* temporary memory context reset after each
3838
* tuple */
39-
char data[BLCKSZ]; /* cached page */
39+
PGAlignedBlock data; /* cached page */
4040
int count; /* number of tuples in cached page */
4141
} BloomBuildState;
4242

@@ -52,7 +52,7 @@ flushCachedPage(Relation index, BloomBuildState *buildstate)
5252

5353
state = GenericXLogStart(index);
5454
page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE);
55-
memcpy(page, buildstate->data, BLCKSZ);
55+
memcpy(page, buildstate->data.data, BLCKSZ);
5656
GenericXLogFinish(state);
5757
UnlockReleaseBuffer(buffer);
5858
}
@@ -63,8 +63,8 @@ flushCachedPage(Relation index, BloomBuildState *buildstate)
6363
static void
6464
initCachedPage(BloomBuildState *buildstate)
6565
{
66-
memset(buildstate->data, 0, BLCKSZ);
67-
BloomInitPage(buildstate->data, 0);
66+
memset(buildstate->data.data, 0, BLCKSZ);
67+
BloomInitPage(buildstate->data.data, 0);
6868
buildstate->count = 0;
6969
}
7070

@@ -84,7 +84,7 @@ bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
8484
itup = BloomFormTuple(&buildstate->blstate, &htup->t_self, values, isnull);
8585

8686
/* Try to add next item to cached page */
87-
if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup))
87+
if (BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup))
8888
{
8989
/* Next item was added successfully */
9090
buildstate->count++;
@@ -98,7 +98,7 @@ bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
9898

9999
initCachedPage(buildstate);
100100

101-
if (!BloomPageAddItem(&buildstate->blstate, buildstate->data, itup))
101+
if (!BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup))
102102
{
103103
/* We shouldn't be here since we're inserting to the empty page */
104104
elog(ERROR, "could not add new bloom tuple to empty page");

contrib/pg_prewarm/pg_prewarm.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ typedef enum
3737
PREWARM_BUFFER
3838
} PrewarmType;
3939

40-
static char blockbuffer[BLCKSZ];
40+
static PGAlignedBlock blockbuffer;
4141

4242
/*
4343
* pg_prewarm(regclass, mode text, fork text,
@@ -179,7 +179,7 @@ pg_prewarm(PG_FUNCTION_ARGS)
179179
for (block = first_block; block <= last_block; ++block)
180180
{
181181
CHECK_FOR_INTERRUPTS();
182-
smgrread(rel->rd_smgr, forkNumber, block, blockbuffer);
182+
smgrread(rel->rd_smgr, forkNumber, block, blockbuffer.data);
183183
++blocks_done;
184184
}
185185
}

src/backend/access/gin/ginentrypage.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
616616
Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf));
617617
Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf));
618618
Size pageSize = PageGetPageSize(lpage);
619-
char tupstore[2 * BLCKSZ];
619+
PGAlignedBlock tupstore[2]; /* could need 2 pages' worth of tuples */
620620

621621
entryPreparePage(btree, lpage, off, insertData, updateblkno);
622622

@@ -625,7 +625,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
625625
* one after another in a temporary workspace.
626626
*/
627627
maxoff = PageGetMaxOffsetNumber(lpage);
628-
ptr = tupstore;
628+
ptr = tupstore[0].data;
629629
for (i = FirstOffsetNumber; i <= maxoff; i++)
630630
{
631631
if (i == off)
@@ -658,7 +658,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
658658
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
659659
GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
660660

661-
ptr = tupstore;
661+
ptr = tupstore[0].data;
662662
maxoff++;
663663
lsize = 0;
664664

src/backend/access/gin/ginfast.c

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,18 +63,15 @@ writeListPage(Relation index, Buffer buffer,
6363
size = 0;
6464
OffsetNumber l,
6565
off;
66-
char *workspace;
66+
PGAlignedBlock workspace;
6767
char *ptr;
6868

69-
/* workspace could be a local array; we use palloc for alignment */
70-
workspace = palloc(BLCKSZ);
71-
7269
START_CRIT_SECTION();
7370

7471
GinInitBuffer(buffer, GIN_LIST);
7572

7673
off = FirstOffsetNumber;
77-
ptr = workspace;
74+
ptr = workspace.data;
7875

7976
for (i = 0; i < ntuples; i++)
8077
{
@@ -126,7 +123,7 @@ writeListPage(Relation index, Buffer buffer,
126123
XLogRegisterData((char *) &data, sizeof(ginxlogInsertListPage));
127124

128125
XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
129-
XLogRegisterBufData(0, workspace, size);
126+
XLogRegisterBufData(0, workspace.data, size);
130127

131128
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE);
132129
PageSetLSN(page, recptr);
@@ -139,8 +136,6 @@ writeListPage(Relation index, Buffer buffer,
139136

140137
END_CRIT_SECTION();
141138

142-
pfree(workspace);
143-
144139
return freesize;
145140
}
146141

src/backend/access/hash/hashpage.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -998,7 +998,7 @@ static bool
998998
_hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
999999
{
10001000
BlockNumber lastblock;
1001-
char zerobuf[BLCKSZ];
1001+
PGAlignedBlock zerobuf;
10021002
Page page;
10031003
HashPageOpaque ovflopaque;
10041004

@@ -1011,7 +1011,7 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
10111011
if (lastblock < firstblock || lastblock == InvalidBlockNumber)
10121012
return false;
10131013

1014-
page = (Page) zerobuf;
1014+
page = (Page) zerobuf.data;
10151015

10161016
/*
10171017
* Initialize the page. Just zeroing the page won't work; see
@@ -1032,11 +1032,11 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
10321032
log_newpage(&rel->rd_node,
10331033
MAIN_FORKNUM,
10341034
lastblock,
1035-
zerobuf,
1035+
zerobuf.data,
10361036
true);
10371037

10381038
RelationOpenSmgr(rel);
1039-
smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, false);
1039+
smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf.data, false);
10401040

10411041
return true;
10421042
}

src/backend/access/heap/heapam.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2662,7 +2662,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
26622662
HeapTuple *heaptuples;
26632663
int i;
26642664
int ndone;
2665-
char *scratch = NULL;
2665+
PGAlignedBlock scratch;
26662666
Page page;
26672667
bool needwal;
26682668
Size saveFreeSpace;
@@ -2679,14 +2679,6 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
26792679
heaptuples[i] = heap_prepare_insert(relation, tuples[i],
26802680
xid, cid, options);
26812681

2682-
/*
2683-
* Allocate some memory to use for constructing the WAL record. Using
2684-
* palloc() within a critical section is not safe, so we allocate this
2685-
* beforehand.
2686-
*/
2687-
if (needwal)
2688-
scratch = palloc(BLCKSZ);
2689-
26902682
/*
26912683
* We're about to do the actual inserts -- but check for conflict first,
26922684
* to minimize the possibility of having to roll back work we've just
@@ -2779,7 +2771,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
27792771
uint8 info = XLOG_HEAP2_MULTI_INSERT;
27802772
char *tupledata;
27812773
int totaldatalen;
2782-
char *scratchptr = scratch;
2774+
char *scratchptr = scratch.data;
27832775
bool init;
27842776
int bufflags = 0;
27852777

@@ -2838,7 +2830,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
28382830
scratchptr += datalen;
28392831
}
28402832
totaldatalen = scratchptr - tupledata;
2841-
Assert((scratchptr - scratch) < BLCKSZ);
2833+
Assert((scratchptr - scratch.data) < BLCKSZ);
28422834

28432835
if (need_tuple_data)
28442836
xlrec->flags |= XLH_INSERT_CONTAINS_NEW_TUPLE;
@@ -2865,7 +2857,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
28652857
bufflags |= REGBUF_KEEP_DATA;
28662858

28672859
XLogBeginInsert();
2868-
XLogRegisterData((char *) xlrec, tupledata - scratch);
2860+
XLogRegisterData((char *) xlrec, tupledata - scratch.data);
28692861
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
28702862

28712863
XLogRegisterBufData(0, tupledata, totaldatalen);

src/backend/access/heap/visibilitymap.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -645,10 +645,9 @@ static void
645645
vm_extend(Relation rel, BlockNumber vm_nblocks)
646646
{
647647
BlockNumber vm_nblocks_now;
648-
Page pg;
648+
PGAlignedBlock pg;
649649

650-
pg = (Page) palloc(BLCKSZ);
651-
PageInit(pg, BLCKSZ, 0);
650+
PageInit((Page) pg.data, BLCKSZ, 0);
652651

653652
/*
654653
* We use the relation extension lock to lock out other backends trying to
@@ -679,10 +678,10 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
679678
/* Now extend the file */
680679
while (vm_nblocks_now < vm_nblocks)
681680
{
682-
PageSetChecksumInplace(pg, vm_nblocks_now);
681+
PageSetChecksumInplace((Page) pg.data, vm_nblocks_now);
683682

684683
smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now,
685-
(char *) pg, false);
684+
pg.data, false);
686685
vm_nblocks_now++;
687686
}
688687

@@ -699,6 +698,4 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
699698
rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now;
700699

701700
UnlockRelationForExtension(rel, ExclusiveLock);
702-
703-
pfree(pg);
704701
}

src/backend/access/transam/generic_xlog.c

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,11 @@ typedef struct
6161
/* State of generic xlog record construction */
6262
struct GenericXLogState
6363
{
64-
/*
65-
* page's images. Should be first in this struct to have MAXALIGN'ed
66-
* images addresses, because some code working with pages directly aligns
67-
* addresses, not offsets from beginning of page
68-
*/
69-
char images[MAX_GENERIC_XLOG_PAGES * BLCKSZ];
64+
/* Info about each page, see above */
7065
PageData pages[MAX_GENERIC_XLOG_PAGES];
7166
bool isLogged;
67+
/* Page images (properly aligned) */
68+
PGAlignedBlock images[MAX_GENERIC_XLOG_PAGES];
7269
};
7370

7471
static void writeFragment(PageData *pageData, OffsetNumber offset,
@@ -251,12 +248,12 @@ computeDelta(PageData *pageData, Page curpage, Page targetpage)
251248
#ifdef WAL_DEBUG
252249
if (XLOG_DEBUG)
253250
{
254-
char tmp[BLCKSZ];
251+
PGAlignedBlock tmp;
255252

256-
memcpy(tmp, curpage, BLCKSZ);
257-
applyPageRedo(tmp, pageData->delta, pageData->deltaLen);
258-
if (memcmp(tmp, targetpage, targetLower) != 0 ||
259-
memcmp(tmp + targetUpper, targetpage + targetUpper,
253+
memcpy(tmp.data, curpage, BLCKSZ);
254+
applyPageRedo(tmp.data, pageData->delta, pageData->deltaLen);
255+
if (memcmp(tmp.data, targetpage, targetLower) != 0 ||
256+
memcmp(tmp.data + targetUpper, targetpage + targetUpper,
260257
BLCKSZ - targetUpper) != 0)
261258
elog(ERROR, "result of generic xlog apply does not match");
262259
}
@@ -277,7 +274,7 @@ GenericXLogStart(Relation relation)
277274

278275
for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
279276
{
280-
state->pages[i].image = state->images + BLCKSZ * i;
277+
state->pages[i].image = state->images[i].data;
281278
state->pages[i].buffer = InvalidBuffer;
282279
}
283280

src/backend/access/transam/xlog.c

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3182,8 +3182,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
31823182
{
31833183
char path[MAXPGPATH];
31843184
char tmppath[MAXPGPATH];
3185-
char zbuffer_raw[XLOG_BLCKSZ + MAXIMUM_ALIGNOF];
3186-
char *zbuffer;
3185+
PGAlignedXLogBlock zbuffer;
31873186
XLogSegNo installed_segno;
31883187
XLogSegNo max_segno;
31893188
int fd;
@@ -3237,17 +3236,13 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
32373236
* fsync below) that all the indirect blocks are down on disk. Therefore,
32383237
* fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
32393238
* log file.
3240-
*
3241-
* Note: ensure the buffer is reasonably well-aligned; this may save a few
3242-
* cycles transferring data to the kernel.
32433239
*/
3244-
zbuffer = (char *) MAXALIGN(zbuffer_raw);
3245-
memset(zbuffer, 0, XLOG_BLCKSZ);
3240+
memset(zbuffer.data, 0, XLOG_BLCKSZ);
32463241
for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
32473242
{
32483243
errno = 0;
32493244
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
3250-
if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
3245+
if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
32513246
{
32523247
int save_errno = errno;
32533248

@@ -3355,7 +3350,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
33553350
{
33563351
char path[MAXPGPATH];
33573352
char tmppath[MAXPGPATH];
3358-
char buffer[XLOG_BLCKSZ];
3353+
PGAlignedXLogBlock buffer;
33593354
int srcfd;
33603355
int fd;
33613356
int nbytes;
@@ -3399,15 +3394,15 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
33993394
* zeros.
34003395
*/
34013396
if (nread < sizeof(buffer))
3402-
memset(buffer, 0, sizeof(buffer));
3397+
memset(buffer.data, 0, sizeof(buffer));
34033398

34043399
if (nread > 0)
34053400
{
34063401
if (nread > sizeof(buffer))
34073402
nread = sizeof(buffer);
34083403
errno = 0;
34093404
pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
3410-
if (read(srcfd, buffer, nread) != nread)
3405+
if (read(srcfd, buffer.data, nread) != nread)
34113406
{
34123407
if (errno != 0)
34133408
ereport(ERROR,
@@ -3423,7 +3418,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
34233418
}
34243419
errno = 0;
34253420
pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
3426-
if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
3421+
if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
34273422
{
34283423
int save_errno = errno;
34293424

@@ -6410,8 +6405,11 @@ StartupXLOG(void)
64106405
xlogreader->system_identifier = ControlFile->system_identifier;
64116406

64126407
/*
6413-
* Allocate pages dedicated to WAL consistency checks, those had better be
6414-
* aligned.
6408+
* Allocate two page buffers dedicated to WAL consistency checks. We do
6409+
* it this way, rather than just making static arrays, for two reasons:
6410+
* (1) no need to waste the storage in most instantiations of the backend;
6411+
* (2) a static char array isn't guaranteed to have any particular
6412+
* alignment, whereas palloc() will provide MAXALIGN'd storage.
64156413
*/
64166414
replay_image_masked = (char *) palloc(BLCKSZ);
64176415
master_image_masked = (char *) palloc(BLCKSZ);

0 commit comments

Comments
 (0)