Skip to content

Commit 6ae701b

Browse files
author
Amit Kapila
committed
Track invalidation_reason in pg_replication_slots.
Till now, the reason for replication slot invalidation is not tracked directly in pg_replication_slots. A recent commit 007693f added 'conflict_reason' to show the reasons for slot conflict/invalidation, but only for logical slots. This commit adds a new column 'invalidation_reason' to show invalidation reasons for both physical and logical slots. And, this commit also turns 'conflict_reason' text column to 'conflicting' boolean column (effectively reverting commit 007693f). The 'conflicting' column is true for invalidation reasons 'rows_removed' and 'wal_level_insufficient' because those make the slot conflict with recovery. When 'conflicting' is true, one can now look at the new 'invalidation_reason' column for the reason for the logical slot's conflict with recovery. The new 'invalidation_reason' column will also be useful to track other invalidation reasons in the future commit. Author: Bharath Rupireddy Reviewed-by: Bertrand Drouvot, Amit Kapila, Shveta Malik Discussion: https://www.postgresql.org/message-id/ZfR7HuzFEswakt/a%40ip-10-97-1-34.eu-west-3.compute.internal Discussion: https://www.postgresql.org/message-id/CALj2ACW4aUe-_uFQOjdWCEN-xXoLGhmvRFnL8SNw_TZ5nJe+aw@mail.gmail.com
1 parent b4080fa commit 6ae701b

File tree

13 files changed

+94
-72
lines changed

13 files changed

+94
-72
lines changed

doc/src/sgml/ref/pgupgrade.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,8 @@ make prefix=/usr/local/pgsql.new install
453453
<para>
454454
All slots on the old cluster must be usable, i.e., there are no slots
455455
whose
456-
<link linkend="view-pg-replication-slots">pg_replication_slots</link>.<structfield>conflict_reason</structfield>
457-
is not <literal>NULL</literal>.
456+
<link linkend="view-pg-replication-slots">pg_replication_slots</link>.<structfield>conflicting</structfield>
457+
is not <literal>true</literal>.
458458
</para>
459459
</listitem>
460460
<listitem>

doc/src/sgml/system-views.sgml

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2525,13 +2525,24 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
25252525

25262526
<row>
25272527
<entry role="catalog_table_entry"><para role="column_definition">
2528-
<structfield>conflict_reason</structfield> <type>text</type>
2528+
<structfield>conflicting</structfield> <type>bool</type>
25292529
</para>
25302530
<para>
2531-
The reason for the logical slot's conflict with recovery. It is always
2532-
NULL for physical slots, as well as for logical slots which are not
2533-
invalidated. The non-NULL values indicate that the slot is marked
2534-
as invalidated. Possible values are:
2531+
True if this logical slot conflicted with recovery (and so is now
2532+
invalidated). When this column is true, check
2533+
<structfield>invalidation_reason</structfield> column for the conflict
2534+
reason. Always NULL for physical slots.
2535+
</para></entry>
2536+
</row>
2537+
2538+
<row>
2539+
<entry role="catalog_table_entry"><para role="column_definition">
2540+
<structfield>invalidation_reason</structfield> <type>text</type>
2541+
</para>
2542+
<para>
2543+
The reason for the slot's invalidation. It is set for both logical and
2544+
physical slots. <literal>NULL</literal> if the slot is not invalidated.
2545+
Possible values are:
25352546
<itemizedlist spacing="compact">
25362547
<listitem>
25372548
<para>
@@ -2542,14 +2553,14 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
25422553
<listitem>
25432554
<para>
25442555
<literal>rows_removed</literal> means that the required rows have
2545-
been removed.
2556+
been removed. It is set only for logical slots.
25462557
</para>
25472558
</listitem>
25482559
<listitem>
25492560
<para>
25502561
<literal>wal_level_insufficient</literal> means that the
25512562
primary doesn't have a <xref linkend="guc-wal-level"/> sufficient to
2552-
perform logical decoding.
2563+
perform logical decoding. It is set only for logical slots.
25532564
</para>
25542565
</listitem>
25552566
</itemizedlist>

src/backend/catalog/system_views.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1023,7 +1023,8 @@ CREATE VIEW pg_replication_slots AS
10231023
L.wal_status,
10241024
L.safe_wal_size,
10251025
L.two_phase,
1026-
L.conflict_reason,
1026+
L.conflicting,
1027+
L.invalidation_reason,
10271028
L.failover,
10281029
L.synced
10291030
FROM pg_get_replication_slots() AS L

src/backend/replication/logical/slotsync.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ synchronize_slots(WalReceiverConn *wrconn)
663663
bool started_tx = false;
664664
const char *query = "SELECT slot_name, plugin, confirmed_flush_lsn,"
665665
" restart_lsn, catalog_xmin, two_phase, failover,"
666-
" database, conflict_reason"
666+
" database, invalidation_reason"
667667
" FROM pg_catalog.pg_replication_slots"
668668
" WHERE failover and NOT temporary";
669669

src/backend/replication/slot.c

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1525,14 +1525,14 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause,
15251525
XLogRecPtr initial_effective_xmin = InvalidXLogRecPtr;
15261526
XLogRecPtr initial_catalog_effective_xmin = InvalidXLogRecPtr;
15271527
XLogRecPtr initial_restart_lsn = InvalidXLogRecPtr;
1528-
ReplicationSlotInvalidationCause conflict_prev PG_USED_FOR_ASSERTS_ONLY = RS_INVAL_NONE;
1528+
ReplicationSlotInvalidationCause invalidation_cause_prev PG_USED_FOR_ASSERTS_ONLY = RS_INVAL_NONE;
15291529

15301530
for (;;)
15311531
{
15321532
XLogRecPtr restart_lsn;
15331533
NameData slotname;
15341534
int active_pid = 0;
1535-
ReplicationSlotInvalidationCause conflict = RS_INVAL_NONE;
1535+
ReplicationSlotInvalidationCause invalidation_cause = RS_INVAL_NONE;
15361536

15371537
Assert(LWLockHeldByMeInMode(ReplicationSlotControlLock, LW_SHARED));
15381538

@@ -1554,17 +1554,14 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause,
15541554

15551555
restart_lsn = s->data.restart_lsn;
15561556

1557-
/*
1558-
* If the slot is already invalid or is a non conflicting slot, we
1559-
* don't need to do anything.
1560-
*/
1557+
/* we do nothing if the slot is already invalid */
15611558
if (s->data.invalidated == RS_INVAL_NONE)
15621559
{
15631560
/*
15641561
* The slot's mutex will be released soon, and it is possible that
15651562
* those values change since the process holding the slot has been
15661563
* terminated (if any), so record them here to ensure that we
1567-
* would report the correct conflict cause.
1564+
* would report the correct invalidation cause.
15681565
*/
15691566
if (!terminated)
15701567
{
@@ -1578,7 +1575,7 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause,
15781575
case RS_INVAL_WAL_REMOVED:
15791576
if (initial_restart_lsn != InvalidXLogRecPtr &&
15801577
initial_restart_lsn < oldestLSN)
1581-
conflict = cause;
1578+
invalidation_cause = cause;
15821579
break;
15831580
case RS_INVAL_HORIZON:
15841581
if (!SlotIsLogical(s))
@@ -1589,30 +1586,30 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause,
15891586
if (TransactionIdIsValid(initial_effective_xmin) &&
15901587
TransactionIdPrecedesOrEquals(initial_effective_xmin,
15911588
snapshotConflictHorizon))
1592-
conflict = cause;
1589+
invalidation_cause = cause;
15931590
else if (TransactionIdIsValid(initial_catalog_effective_xmin) &&
15941591
TransactionIdPrecedesOrEquals(initial_catalog_effective_xmin,
15951592
snapshotConflictHorizon))
1596-
conflict = cause;
1593+
invalidation_cause = cause;
15971594
break;
15981595
case RS_INVAL_WAL_LEVEL:
15991596
if (SlotIsLogical(s))
1600-
conflict = cause;
1597+
invalidation_cause = cause;
16011598
break;
16021599
case RS_INVAL_NONE:
16031600
pg_unreachable();
16041601
}
16051602
}
16061603

16071604
/*
1608-
* The conflict cause recorded previously should not change while the
1609-
* process owning the slot (if any) has been terminated.
1605+
* The invalidation cause recorded previously should not change while
1606+
* the process owning the slot (if any) has been terminated.
16101607
*/
1611-
Assert(!(conflict_prev != RS_INVAL_NONE && terminated &&
1612-
conflict_prev != conflict));
1608+
Assert(!(invalidation_cause_prev != RS_INVAL_NONE && terminated &&
1609+
invalidation_cause_prev != invalidation_cause));
16131610

1614-
/* if there's no conflict, we're done */
1615-
if (conflict == RS_INVAL_NONE)
1611+
/* if there's no invalidation, we're done */
1612+
if (invalidation_cause == RS_INVAL_NONE)
16161613
{
16171614
SpinLockRelease(&s->mutex);
16181615
if (released_lock)
@@ -1632,13 +1629,13 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause,
16321629
{
16331630
MyReplicationSlot = s;
16341631
s->active_pid = MyProcPid;
1635-
s->data.invalidated = conflict;
1632+
s->data.invalidated = invalidation_cause;
16361633

16371634
/*
16381635
* XXX: We should consider not overwriting restart_lsn and instead
16391636
* just rely on .invalidated.
16401637
*/
1641-
if (conflict == RS_INVAL_WAL_REMOVED)
1638+
if (invalidation_cause == RS_INVAL_WAL_REMOVED)
16421639
s->data.restart_lsn = InvalidXLogRecPtr;
16431640

16441641
/* Let caller know */
@@ -1681,7 +1678,7 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause,
16811678
*/
16821679
if (last_signaled_pid != active_pid)
16831680
{
1684-
ReportSlotInvalidation(conflict, true, active_pid,
1681+
ReportSlotInvalidation(invalidation_cause, true, active_pid,
16851682
slotname, restart_lsn,
16861683
oldestLSN, snapshotConflictHorizon);
16871684

@@ -1694,7 +1691,7 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause,
16941691

16951692
last_signaled_pid = active_pid;
16961693
terminated = true;
1697-
conflict_prev = conflict;
1694+
invalidation_cause_prev = invalidation_cause;
16981695
}
16991696

17001697
/* Wait until the slot is released. */
@@ -1727,7 +1724,7 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause,
17271724
ReplicationSlotSave();
17281725
ReplicationSlotRelease();
17291726

1730-
ReportSlotInvalidation(conflict, false, active_pid,
1727+
ReportSlotInvalidation(invalidation_cause, false, active_pid,
17311728
slotname, restart_lsn,
17321729
oldestLSN, snapshotConflictHorizon);
17331730

@@ -2356,21 +2353,21 @@ RestoreSlotFromDisk(const char *name)
23562353
}
23572354

23582355
/*
2359-
* Maps a conflict reason for a replication slot to
2356+
* Maps an invalidation reason for a replication slot to
23602357
* ReplicationSlotInvalidationCause.
23612358
*/
23622359
ReplicationSlotInvalidationCause
2363-
GetSlotInvalidationCause(const char *conflict_reason)
2360+
GetSlotInvalidationCause(const char *invalidation_reason)
23642361
{
23652362
ReplicationSlotInvalidationCause cause;
23662363
ReplicationSlotInvalidationCause result = RS_INVAL_NONE;
23672364
bool found PG_USED_FOR_ASSERTS_ONLY = false;
23682365

2369-
Assert(conflict_reason);
2366+
Assert(invalidation_reason);
23702367

23712368
for (cause = RS_INVAL_NONE; cause <= RS_INVAL_MAX_CAUSES; cause++)
23722369
{
2373-
if (strcmp(SlotInvalidationCauses[cause], conflict_reason) == 0)
2370+
if (strcmp(SlotInvalidationCauses[cause], invalidation_reason) == 0)
23742371
{
23752372
found = true;
23762373
result = cause;

src/backend/replication/slotfuncs.c

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ pg_drop_replication_slot(PG_FUNCTION_ARGS)
239239
Datum
240240
pg_get_replication_slots(PG_FUNCTION_ARGS)
241241
{
242-
#define PG_GET_REPLICATION_SLOTS_COLS 17
242+
#define PG_GET_REPLICATION_SLOTS_COLS 18
243243
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
244244
XLogRecPtr currlsn;
245245
int slotno;
@@ -263,6 +263,7 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
263263
bool nulls[PG_GET_REPLICATION_SLOTS_COLS];
264264
WALAvailability walstate;
265265
int i;
266+
ReplicationSlotInvalidationCause cause;
266267

267268
if (!slot->in_use)
268269
continue;
@@ -409,18 +410,28 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
409410

410411
values[i++] = BoolGetDatum(slot_contents.data.two_phase);
411412

412-
if (slot_contents.data.database == InvalidOid)
413+
cause = slot_contents.data.invalidated;
414+
415+
if (SlotIsPhysical(&slot_contents))
413416
nulls[i++] = true;
414417
else
415418
{
416-
ReplicationSlotInvalidationCause cause = slot_contents.data.invalidated;
417-
418-
if (cause == RS_INVAL_NONE)
419-
nulls[i++] = true;
419+
/*
420+
* rows_removed and wal_level_insufficient are the only two
421+
* reasons for the logical slot's conflict with recovery.
422+
*/
423+
if (cause == RS_INVAL_HORIZON ||
424+
cause == RS_INVAL_WAL_LEVEL)
425+
values[i++] = BoolGetDatum(true);
420426
else
421-
values[i++] = CStringGetTextDatum(SlotInvalidationCauses[cause]);
427+
values[i++] = BoolGetDatum(false);
422428
}
423429

430+
if (cause == RS_INVAL_NONE)
431+
nulls[i++] = true;
432+
else
433+
values[i++] = CStringGetTextDatum(SlotInvalidationCauses[cause]);
434+
424435
values[i++] = BoolGetDatum(slot_contents.data.failover);
425436

426437
values[i++] = BoolGetDatum(slot_contents.data.synced);

src/bin/pg_upgrade/info.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -676,13 +676,13 @@ get_old_cluster_logical_slot_infos(DbInfo *dbinfo, bool live_check)
676676
* removed.
677677
*/
678678
res = executeQueryOrDie(conn, "SELECT slot_name, plugin, two_phase, failover, "
679-
"%s as caught_up, conflict_reason IS NOT NULL as invalid "
679+
"%s as caught_up, invalidation_reason IS NOT NULL as invalid "
680680
"FROM pg_catalog.pg_replication_slots "
681681
"WHERE slot_type = 'logical' AND "
682682
"database = current_database() AND "
683683
"temporary IS FALSE;",
684684
live_check ? "FALSE" :
685-
"(CASE WHEN conflict_reason IS NOT NULL THEN FALSE "
685+
"(CASE WHEN invalidation_reason IS NOT NULL THEN FALSE "
686686
"ELSE (SELECT pg_catalog.binary_upgrade_logical_slot_has_caught_up(slot_name)) "
687687
"END)");
688688

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@
5757
*/
5858

5959
/* yyyymmddN */
60-
#define CATALOG_VERSION_NO 202403221
60+
#define CATALOG_VERSION_NO 202403222
6161

6262
#endif

src/include/catalog/pg_proc.dat

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11133,9 +11133,9 @@
1113311133
proname => 'pg_get_replication_slots', prorows => '10', proisstrict => 'f',
1113411134
proretset => 't', provolatile => 's', prorettype => 'record',
1113511135
proargtypes => '',
11136-
proallargtypes => '{name,name,text,oid,bool,bool,int4,xid,xid,pg_lsn,pg_lsn,text,int8,bool,text,bool,bool}',
11137-
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
11138-
proargnames => '{slot_name,plugin,slot_type,datoid,temporary,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn,wal_status,safe_wal_size,two_phase,conflict_reason,failover,synced}',
11136+
proallargtypes => '{name,name,text,oid,bool,bool,int4,xid,xid,pg_lsn,pg_lsn,text,int8,bool,bool,text,bool,bool}',
11137+
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
11138+
proargnames => '{slot_name,plugin,slot_type,datoid,temporary,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn,wal_status,safe_wal_size,two_phase,conflicting,invalidation_reason,failover,synced}',
1113911139
prosrc => 'pg_get_replication_slots' },
1114011140
{ oid => '3786', descr => 'set up a logical replication slot',
1114111141
proname => 'pg_create_logical_replication_slot', provolatile => 'v',

src/include/replication/slot.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ extern void CheckPointReplicationSlots(bool is_shutdown);
273273
extern void CheckSlotRequirements(void);
274274
extern void CheckSlotPermissions(void);
275275
extern ReplicationSlotInvalidationCause
276-
GetSlotInvalidationCause(const char *conflict_reason);
276+
GetSlotInvalidationCause(const char *invalidation_reason);
277277

278278
extern bool SlotExistsInStandbySlotNames(const char *slot_name);
279279
extern bool StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel);

0 commit comments

Comments
 (0)