Skip to content

Commit 1667148

Browse files
committed
Improve representation of 'moved partitions' indicator on deleted tuples.
Previously a tuple that has been moved to a different partition (see f16241b), set the block number on the old tuple to an invalid value to indicate that fact. But the tuple offset was left untouched. That turned out to trigger a wal_consistency_checking failure as reported by Peter Geoghegan, as the offset wasn't always overwritten during WAL replay. Heikki observed that we're wasting valuable data by not putting information also in the offset. Thus set that to MovedPartitionsOffsetNumber when a tuple indicates it has moved. We continue to set the block number to MovedPartitionsBlockNumber, as that seems more likely to cause problems for code not updated to know about moved tuples. As t_ctid's offset number is now always set, this refinement also fixes the wal_consistency_checking issue. This technically is a minor disk format break, with previously created moved tuples not being recognized anymore. But since there not even has been a beta release since f16241b... Reported-By: Peter Geoghegan Author: Heikki Linnakangas, Amul Sul Discussion: https://postgr.es/m/CAH2-Wzm9ty+1BX7-GMNJ=xPRg67oJTVeDNdA9LSyJJtMgRiCMA@mail.gmail.com
1 parent 37a3058 commit 1667148

File tree

2 files changed

+39
-19
lines changed

2 files changed

+39
-19
lines changed

src/include/access/htup_details.h

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,15 @@
8383
*
8484
* A word about t_ctid: whenever a new tuple is stored on disk, its t_ctid
8585
* is initialized with its own TID (location). If the tuple is ever updated,
86-
* its t_ctid is changed to point to the replacement version of the tuple or
87-
* the block number (ip_blkid) is invalidated if the tuple is moved from one
88-
* partition to another partition relation due to an update of the partition
89-
* key. Thus, a tuple is the latest version of its row iff XMAX is invalid or
86+
* its t_ctid is changed to point to the replacement version of the tuple. Or
87+
* if the tuple is moved from one partition to another, due to an update of
88+
* the partition key, t_ctid is set to a special value to indicate that
89+
* (see ItemPointerSetMovedPartitions). Thus, a tuple is the latest version
90+
* of its row iff XMAX is invalid or
9091
* t_ctid points to itself (in which case, if XMAX is valid, the tuple is
9192
* either locked or deleted). One can follow the chain of t_ctid links
92-
* to find the newest version of the row. Beware however that VACUUM might
93+
* to find the newest version of the row, unless it was moved to a different
94+
* partition. Beware however that VACUUM might
9395
* erase the pointed-to (newer) tuple before erasing the pointing (older)
9496
* tuple. Hence, when following a t_ctid link, it is necessary to check
9597
* to see if the referenced slot is empty or contains an unrelated tuple.
@@ -287,14 +289,6 @@ struct HeapTupleHeaderData
287289
*/
288290
#define HEAP_TUPLE_HAS_MATCH HEAP_ONLY_TUPLE /* tuple has a join match */
289291

290-
/*
291-
* Special value used in t_ctid.ip_posid, to indicate that it holds a
292-
* speculative insertion token rather than a real TID. This must be higher
293-
* than MaxOffsetNumber, so that it can be distinguished from a valid
294-
* offset number in a regular item pointer.
295-
*/
296-
#define SpecTokenOffsetNumber 0xfffe
297-
298292
/*
299293
* HeapTupleHeader accessor macros
300294
*
@@ -447,11 +441,12 @@ do { \
447441
ItemPointerSet(&(tup)->t_ctid, token, SpecTokenOffsetNumber) \
448442
)
449443

450-
#define HeapTupleHeaderSetMovedPartitions(tup) \
451-
ItemPointerSetMovedPartitions(&(tup)->t_ctid)
452-
453444
#define HeapTupleHeaderIndicatesMovedPartitions(tup) \
454-
ItemPointerIndicatesMovedPartitions(&tup->t_ctid)
445+
(ItemPointerGetOffsetNumber(&(tup)->t_ctid) == MovedPartitionsOffsetNumber && \
446+
ItemPointerGetBlockNumberNoCheck(&(tup)->t_ctid) == MovedPartitionsBlockNumber)
447+
448+
#define HeapTupleHeaderSetMovedPartitions(tup) \
449+
ItemPointerSet(&(tup)->t_ctid, MovedPartitionsBlockNumber, MovedPartitionsOffsetNumber)
455450

456451
#define HeapTupleHeaderGetDatumLength(tup) \
457452
VARSIZE(tup)

src/include/storage/itemptr.h

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,28 @@ ItemPointerData;
4848

4949
typedef ItemPointerData *ItemPointer;
5050

51+
/* ----------------
52+
* special values used in heap tuples (t_ctid)
53+
* ----------------
54+
*/
55+
56+
/*
57+
* If a heap tuple holds a speculative insertion token rather than a real
58+
* TID, ip_posid is set to SpecTokenOffsetNumber, and the token is stored in
59+
* ip_blkid. SpecTokenOffsetNumber must be higher than MaxOffsetNumber, so
60+
* that it can be distinguished from a valid offset number in a regular item
61+
* pointer.
62+
*/
63+
#define SpecTokenOffsetNumber 0xfffe
64+
65+
/*
66+
* When a tuple is moved to a different partition by UPDATE, the t_ctid of
67+
* the old tuple version is set to this magic value.
68+
*/
69+
#define MovedPartitionsOffsetNumber 0xfffd
70+
#define MovedPartitionsBlockNumber InvalidBlockNumber
71+
72+
5173
/* ----------------
5274
* support macros
5375
* ----------------
@@ -160,15 +182,18 @@ typedef ItemPointerData *ItemPointer;
160182
* partition.
161183
*/
162184
#define ItemPointerIndicatesMovedPartitions(pointer) \
163-
!BlockNumberIsValid(ItemPointerGetBlockNumberNoCheck(pointer))
185+
( \
186+
ItemPointerGetOffsetNumber(pointer) == MovedPartitionsOffsetNumber && \
187+
ItemPointerGetBlockNumberNoCheck(pointer) == MovedPartitionsBlockNumber \
188+
)
164189

165190
/*
166191
* ItemPointerSetMovedPartitions
167192
* Indicate that the item referenced by the itempointer has moved into a
168193
* different partition.
169194
*/
170195
#define ItemPointerSetMovedPartitions(pointer) \
171-
ItemPointerSetBlockNumber((pointer), InvalidBlockNumber)
196+
ItemPointerSet((pointer), MovedPartitionsBlockNumber, MovedPartitionsOffsetNumber)
172197

173198
/* ----------------
174199
* externs

0 commit comments

Comments
 (0)