Skip to content

Commit 20ba5ca

Browse files
committed
Move WAL continuation record information to WAL page header.
The continuation record only contained one field, xl_rem_len, so it makes things simpler to just include it in the WAL page header. This wastes four bytes on pages that don't begin with a continuation from previos page, plus four bytes on every page, because of padding. The motivation of this is to make it easier to calculate how much space a WAL record needs. Before this patch, it depended on how many page boundaries the record crosses. The motivation of that, in turn, is to separate the allocation of space in the WAL from the copying of the record data to the allocated space. Keeping the calculation of space required simple helps to keep the critical section of allocating the space from WAL short. But that's not included in this patch yet. Bump WAL version number again, as this is an incompatible change.
1 parent dfda6eb commit 20ba5ca

File tree

2 files changed

+28
-38
lines changed

2 files changed

+28
-38
lines changed

src/backend/access/transam/xlog.c

+14-17
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,6 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
696696
{
697697
XLogCtlInsert *Insert = &XLogCtl->Insert;
698698
XLogRecord *record;
699-
XLogContRecord *contrecord;
700699
XLogRecPtr RecPtr;
701700
XLogRecPtr WriteRqst;
702701
uint32 freespace;
@@ -1085,9 +1084,7 @@ begin:;
10851084
curridx = Insert->curridx;
10861085
/* Insert cont-record header */
10871086
Insert->currpage->xlp_info |= XLP_FIRST_IS_CONTRECORD;
1088-
contrecord = (XLogContRecord *) Insert->currpos;
1089-
contrecord->xl_rem_len = write_len;
1090-
Insert->currpos += SizeOfXLogContRecord;
1087+
Insert->currpage->xlp_rem_len = write_len;
10911088
freespace = INSERT_FREESPACE(Insert);
10921089
}
10931090

@@ -3941,7 +3938,8 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
39413938
if (total_len > len)
39423939
{
39433940
/* Need to reassemble record */
3944-
XLogContRecord *contrecord;
3941+
char *contrecord;
3942+
XLogPageHeader pageHeader;
39453943
XLogRecPtr pagelsn;
39463944
uint32 gotlen = len;
39473945

@@ -3969,39 +3967,38 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
39693967
readOff)));
39703968
goto next_record_is_invalid;
39713969
}
3972-
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
3973-
contrecord = (XLogContRecord *) ((char *) readBuf + pageHeaderSize);
3974-
if (contrecord->xl_rem_len == 0 ||
3975-
total_len != (contrecord->xl_rem_len + gotlen))
3970+
pageHeader = (XLogPageHeader) readBuf;
3971+
pageHeaderSize = XLogPageHeaderSize(pageHeader);
3972+
contrecord = (char *) readBuf + pageHeaderSize;
3973+
if (pageHeader->xlp_rem_len == 0 ||
3974+
total_len != (pageHeader->xlp_rem_len + gotlen))
39763975
{
39773976
char fname[MAXFNAMELEN];
39783977
XLogFileName(fname, curFileTLI, readSegNo);
39793978
ereport(emode_for_corrupt_record(emode, *RecPtr),
39803979
(errmsg("invalid contrecord length %u in log segment %s, offset %u",
3981-
contrecord->xl_rem_len,
3980+
pageHeader->xlp_rem_len,
39823981
XLogFileNameP(curFileTLI, readSegNo),
39833982
readOff)));
39843983
goto next_record_is_invalid;
39853984
}
3986-
len = XLOG_BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
3987-
if (contrecord->xl_rem_len > len)
3985+
len = XLOG_BLCKSZ - pageHeaderSize;
3986+
if (pageHeader->xlp_rem_len > len)
39883987
{
3989-
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len);
3988+
memcpy(buffer, (char *) contrecord, len);
39903989
gotlen += len;
39913990
buffer += len;
39923991
continue;
39933992
}
3994-
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord,
3995-
contrecord->xl_rem_len);
3993+
memcpy(buffer, (char *) contrecord, pageHeader->xlp_rem_len);
39963994
break;
39973995
}
39983996
if (!RecordIsValid(record, *RecPtr, emode))
39993997
goto next_record_is_invalid;
40003998
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
40013999
XLogSegNoOffsetToRecPtr(
40024000
readSegNo,
4003-
readOff + pageHeaderSize +
4004-
MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len),
4001+
readOff + pageHeaderSize + MAXALIGN(pageHeader->xlp_rem_len),
40054002
EndRecPtr);
40064003
ReadRecPtr = *RecPtr;
40074004
/* needn't worry about XLOG SWITCH, it can't cross page boundaries */

src/include/access/xlog_internal.h

+14-21
Original file line numberDiff line numberDiff line change
@@ -48,37 +48,30 @@ typedef struct BkpBlock
4848
/* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
4949
} BkpBlock;
5050

51-
/*
52-
* When there is not enough space on current page for whole record, we
53-
* continue on the next page with continuation record. (However, the
54-
* XLogRecord header will never be split across pages; if there's less than
55-
* SizeOfXLogRecord space left at the end of a page, we just waste it.)
56-
*
57-
* Note that xl_rem_len includes backup-block data; that is, it tracks
58-
* xl_tot_len not xl_len in the initial header. Also note that the
59-
* continuation data isn't necessarily aligned.
60-
*/
61-
typedef struct XLogContRecord
62-
{
63-
uint32 xl_rem_len; /* total len of remaining data for record */
64-
65-
/* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
66-
67-
} XLogContRecord;
68-
69-
#define SizeOfXLogContRecord sizeof(XLogContRecord)
70-
7151
/*
7252
* Each page of XLOG file has a header like this:
7353
*/
74-
#define XLOG_PAGE_MAGIC 0xD072 /* can be used as WAL version indicator */
54+
#define XLOG_PAGE_MAGIC 0xD073 /* can be used as WAL version indicator */
7555

7656
typedef struct XLogPageHeaderData
7757
{
7858
uint16 xlp_magic; /* magic value for correctness checks */
7959
uint16 xlp_info; /* flag bits, see below */
8060
TimeLineID xlp_tli; /* TimeLineID of first record on page */
8161
XLogRecPtr xlp_pageaddr; /* XLOG address of this page */
62+
63+
/*
64+
* When there is not enough space on current page for whole record, we
65+
* continue on the next page. xlp_rem_len is the number of bytes
66+
* remaining from a previous page. (However, the XLogRecord header will
67+
* never be split across pages; if there's less than SizeOfXLogRecord
68+
* space left at the end of a page, we just waste it.)
69+
*
70+
* Note that xl_rem_len includes backup-block data; that is, it tracks
71+
* xl_tot_len not xl_len in the initial header. Also note that the
72+
* continuation data isn't necessarily aligned.
73+
*/
74+
uint32 xlp_rem_len; /* total len of remaining data for record */
8275
} XLogPageHeaderData;
8376

8477
#define SizeOfXLogShortPHD MAXALIGN(sizeof(XLogPageHeaderData))

0 commit comments

Comments
 (0)