Skip to content

Commit d65485b

Browse files
author
Amit Kapila
committed
Fix xmin advancement during fast_forward decoding.
During logical decoding, we advance catalog_xmin of logical too early in fast_forward mode, resulting in required catalog data being removed by vacuum. This mode is normally used to advance the slot without processing the changes, but we still can't let the slot's xmin to advance to an incorrect value. Commit f49a80c fixed a similar issue where the logical slot's catalog_xmin was getting advanced prematurely during non-fast-forward mode. During xl_running_xacts processing, instead of directly advancing the slot's xmin to the oldest running xid in the record, it allowed the xmin to be held back for snapshots that can be used for not-yet-replayed transactions, as those might consider older txns as running too. However, it missed the fact that the same problem can happen during fast_forward mode decoding, as we won't build a base snapshot in that mode, and the future call to get_changes from the same slot can miss seeing the required catalog changes leading to incorrect reslts. This commit allows building the base snapshot even in fast_forward mode to prevent the early advancement of xmin. Reported-by: Amit Kapila <amit.kapila16@gmail.com> Author: Zhijie Hou <houzj.fnst@fujitsu.com> Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-by: shveta malik <shveta.malik@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Backpatch-through: 13 Discussion: https://postgr.es/m/CAA4eK1LqWncUOqKijiafe+Ypt1gQAQRjctKLMY953J79xDBgAg@mail.gmail.com Discussion: https://postgr.es/m/OS0PR01MB57163087F86621D44D9A72BF94BB2@OS0PR01MB5716.jpnprd01.prod.outlook.com
1 parent 4164d69 commit d65485b

File tree

3 files changed

+71
-13
lines changed

3 files changed

+71
-13
lines changed

contrib/test_decoding/expected/oldest_xmin.out

+41
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,44 @@ COMMIT
3838
stop
3939
(1 row)
4040

41+
42+
starting permutation: s0_begin s0_getxid s1_begin s1_insert s0_alter s0_commit s0_checkpoint s0_advance_slot s0_advance_slot s1_commit s0_vacuum s0_get_changes
43+
step s0_begin: BEGIN;
44+
step s0_getxid: SELECT pg_current_xact_id() IS NULL;
45+
?column?
46+
--------
47+
f
48+
(1 row)
49+
50+
step s1_begin: BEGIN;
51+
step s1_insert: INSERT INTO harvest VALUES ((1, 2, 3));
52+
step s0_alter: ALTER TYPE basket DROP ATTRIBUTE mangos;
53+
step s0_commit: COMMIT;
54+
step s0_checkpoint: CHECKPOINT;
55+
step s0_advance_slot: SELECT slot_name FROM pg_replication_slot_advance('isolation_slot', pg_current_wal_lsn());
56+
slot_name
57+
--------------
58+
isolation_slot
59+
(1 row)
60+
61+
step s0_advance_slot: SELECT slot_name FROM pg_replication_slot_advance('isolation_slot', pg_current_wal_lsn());
62+
slot_name
63+
--------------
64+
isolation_slot
65+
(1 row)
66+
67+
step s1_commit: COMMIT;
68+
step s0_vacuum: VACUUM pg_attribute;
69+
step s0_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
70+
data
71+
------------------------------------------------------
72+
BEGIN
73+
table public.harvest: INSERT: fruits[basket]:'(1,2,3)'
74+
COMMIT
75+
(3 rows)
76+
77+
?column?
78+
--------
79+
stop
80+
(1 row)
81+

contrib/test_decoding/specs/oldest_xmin.spec

+5
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ step "s0_commit" { COMMIT; }
2525
step "s0_checkpoint" { CHECKPOINT; }
2626
step "s0_vacuum" { VACUUM pg_attribute; }
2727
step "s0_get_changes" { SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); }
28+
step "s0_advance_slot" { SELECT slot_name FROM pg_replication_slot_advance('isolation_slot', pg_current_wal_lsn()); }
2829

2930
session "s1"
3031
setup { SET synchronous_commit=on; }
@@ -40,3 +41,7 @@ step "s1_commit" { COMMIT; }
4041
# will be removed (xmax set) before T1 commits. That is, interlocking doesn't
4142
# forbid modifying catalog after someone read it (and didn't commit yet).
4243
permutation "s0_begin" "s0_getxid" "s1_begin" "s1_insert" "s0_alter" "s0_commit" "s0_checkpoint" "s0_get_changes" "s0_get_changes" "s1_commit" "s0_vacuum" "s0_get_changes"
44+
45+
# Perform the same testing process as described above, but use advance_slot to
46+
# forces xmin advancement during fast forward decoding.
47+
permutation "s0_begin" "s0_getxid" "s1_begin" "s1_insert" "s0_alter" "s0_commit" "s0_checkpoint" "s0_advance_slot" "s0_advance_slot" "s1_commit" "s0_vacuum" "s0_get_changes"

src/backend/replication/logical/decode.c

+25-13
Original file line numberDiff line numberDiff line change
@@ -362,20 +362,24 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
362362

363363
/*
364364
* If we don't have snapshot or we are just fast-forwarding, there is no
365-
* point in decoding changes.
365+
* point in decoding data changes. However, it's crucial to build the base
366+
* snapshot during fast-forward mode (as is done in
367+
* SnapBuildProcessChange()) because we require the snapshot's xmin when
368+
* determining the candidate catalog_xmin for the replication slot. See
369+
* SnapBuildProcessRunningXacts().
366370
*/
367-
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
368-
ctx->fast_forward)
371+
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
369372
return;
370373

371374
switch (info)
372375
{
373376
case XLOG_HEAP2_MULTI_INSERT:
374-
if (!ctx->fast_forward &&
375-
SnapBuildProcessChange(builder, xid, buf->origptr))
377+
if (SnapBuildProcessChange(builder, xid, buf->origptr) &&
378+
!ctx->fast_forward)
376379
DecodeMultiInsert(ctx, buf);
377380
break;
378381
case XLOG_HEAP2_NEW_CID:
382+
if (!ctx->fast_forward)
379383
{
380384
xl_heap_new_cid *xlrec;
381385

@@ -422,16 +426,20 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
422426

423427
/*
424428
* If we don't have snapshot or we are just fast-forwarding, there is no
425-
* point in decoding data changes.
429+
* point in decoding data changes. However, it's crucial to build the base
430+
* snapshot during fast-forward mode (as is done in
431+
* SnapBuildProcessChange()) because we require the snapshot's xmin when
432+
* determining the candidate catalog_xmin for the replication slot. See
433+
* SnapBuildProcessRunningXacts().
426434
*/
427-
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
428-
ctx->fast_forward)
435+
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
429436
return;
430437

431438
switch (info)
432439
{
433440
case XLOG_HEAP_INSERT:
434-
if (SnapBuildProcessChange(builder, xid, buf->origptr))
441+
if (SnapBuildProcessChange(builder, xid, buf->origptr) &&
442+
!ctx->fast_forward)
435443
DecodeInsert(ctx, buf);
436444
break;
437445

@@ -442,17 +450,20 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
442450
*/
443451
case XLOG_HEAP_HOT_UPDATE:
444452
case XLOG_HEAP_UPDATE:
445-
if (SnapBuildProcessChange(builder, xid, buf->origptr))
453+
if (SnapBuildProcessChange(builder, xid, buf->origptr) &&
454+
!ctx->fast_forward)
446455
DecodeUpdate(ctx, buf);
447456
break;
448457

449458
case XLOG_HEAP_DELETE:
450-
if (SnapBuildProcessChange(builder, xid, buf->origptr))
459+
if (SnapBuildProcessChange(builder, xid, buf->origptr) &&
460+
!ctx->fast_forward)
451461
DecodeDelete(ctx, buf);
452462
break;
453463

454464
case XLOG_HEAP_TRUNCATE:
455-
if (SnapBuildProcessChange(builder, xid, buf->origptr))
465+
if (SnapBuildProcessChange(builder, xid, buf->origptr) &&
466+
!ctx->fast_forward)
456467
DecodeTruncate(ctx, buf);
457468
break;
458469

@@ -480,7 +491,8 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
480491
break;
481492

482493
case XLOG_HEAP_CONFIRM:
483-
if (SnapBuildProcessChange(builder, xid, buf->origptr))
494+
if (SnapBuildProcessChange(builder, xid, buf->origptr) &&
495+
!ctx->fast_forward)
484496
DecodeSpecConfirm(ctx, buf);
485497
break;
486498

0 commit comments

Comments
 (0)