Skip to content

Commit 3f8148c

Browse files
committed
Revert 019_replslot_limit.pl related debugging aids.
This reverts most of 91c0570, f28bf66, fe0972e, afdeff1. The only thing left is the retry loop in 019_replslot_limit.pl that avoids spurious failures by retrying a couple times. We haven't seen any hard evidence that this is caused by anything but slow process shutdown. We did not find any cases where walsenders did not vanish after waiting for longer. Therefore there's no reason for this debugging code to remain. Discussion: https://postgr.es/m/20220530190155.47wr3x2prdwyciah@alap3.anarazel.de Backpatch: 15-
1 parent b9eb0ff commit 3f8148c

File tree

6 files changed

+5
-87
lines changed

6 files changed

+5
-87
lines changed

src/backend/replication/slot.c

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,6 @@ ReplicationSlotInitialize(void)
177177
static void
178178
ReplicationSlotShmemExit(int code, Datum arg)
179179
{
180-
/* temp debugging aid to analyze 019_replslot_limit failures */
181-
elog(DEBUG3, "replication slot exit hook, %s active slot",
182-
MyReplicationSlot != NULL ? "with" : "without");
183-
184180
/* Make sure active replication slots are released */
185181
if (MyReplicationSlot != NULL)
186182
ReplicationSlotRelease();
@@ -582,9 +578,6 @@ ReplicationSlotCleanup(void)
582578
Assert(MyReplicationSlot == NULL);
583579

584580
restart:
585-
/* temp debugging aid to analyze 019_replslot_limit failures */
586-
elog(DEBUG3, "temporary replication slot cleanup: begin");
587-
588581
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
589582
for (i = 0; i < max_replication_slots; i++)
590583
{
@@ -593,10 +586,6 @@ ReplicationSlotCleanup(void)
593586
if (!s->in_use)
594587
continue;
595588

596-
/* unlocked read of active_pid is ok for debugging purposes */
597-
elog(DEBUG3, "temporary replication slot cleanup: %d in use, active_pid: %d",
598-
i, (int) s->active_pid);
599-
600589
SpinLockAcquire(&s->mutex);
601590
if (s->active_pid == MyProcPid)
602591
{
@@ -614,8 +603,6 @@ ReplicationSlotCleanup(void)
614603
}
615604

616605
LWLockRelease(ReplicationSlotControlLock);
617-
618-
elog(DEBUG3, "temporary replication slot cleanup: done");
619606
}
620607

621608
/*
@@ -657,9 +644,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
657644
char path[MAXPGPATH];
658645
char tmppath[MAXPGPATH];
659646

660-
/* temp debugging aid to analyze 019_replslot_limit failures */
661-
elog(DEBUG3, "replication slot drop: %s: begin", NameStr(slot->data.name));
662-
663647
/*
664648
* If some other backend ran this code concurrently with us, we might try
665649
* to delete a slot with a certain name while someone else was trying to
@@ -710,9 +694,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
710694
path, tmppath)));
711695
}
712696

713-
elog(DEBUG3, "replication slot drop: %s: removed on-disk",
714-
NameStr(slot->data.name));
715-
716697
/*
717698
* The slot is definitely gone. Lock out concurrent scans of the array
718699
* long enough to kill it. It's OK to clear the active PID here without
@@ -726,22 +707,15 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
726707
slot->active_pid = 0;
727708
slot->in_use = false;
728709
LWLockRelease(ReplicationSlotControlLock);
729-
730-
elog(DEBUG3, "replication slot drop: %s: marked as not in use", NameStr(slot->data.name));
731-
732710
ConditionVariableBroadcast(&slot->active_cv);
733711

734-
elog(DEBUG3, "replication slot drop: %s: notified others", NameStr(slot->data.name));
735-
736712
/*
737713
* Slot is dead and doesn't prevent resource removal anymore, recompute
738714
* limits.
739715
*/
740716
ReplicationSlotsComputeRequiredXmin(false);
741717
ReplicationSlotsComputeRequiredLSN();
742718

743-
elog(DEBUG3, "replication slot drop: %s: computed required", NameStr(slot->data.name));
744-
745719
/*
746720
* If removing the directory fails, the worst thing that will happen is
747721
* that the user won't be able to create a new slot with the same name
@@ -751,8 +725,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
751725
ereport(WARNING,
752726
(errmsg("could not remove directory \"%s\"", tmppath)));
753727

754-
elog(DEBUG3, "replication slot drop: %s: removed directory", NameStr(slot->data.name));
755-
756728
/*
757729
* Drop the statistics entry for the replication slot. Do this while
758730
* holding ReplicationSlotAllocationLock so that we don't drop a
@@ -767,9 +739,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
767739
* a slot while we're still cleaning up the detritus of the old one.
768740
*/
769741
LWLockRelease(ReplicationSlotAllocationLock);
770-
771-
elog(DEBUG3, "replication slot drop: %s: done",
772-
NameStr(slot->data.name));
773742
}
774743

775744
/*
@@ -1329,12 +1298,6 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN,
13291298
(void) kill(active_pid, SIGTERM);
13301299
last_signaled_pid = active_pid;
13311300
}
1332-
else
1333-
{
1334-
/* temp debugging aid to analyze 019_replslot_limit failures */
1335-
elog(DEBUG3, "not signalling process %d during invalidation of slot \"%s\"",
1336-
active_pid, NameStr(slotname));
1337-
}
13381301

13391302
/* Wait until the slot is released. */
13401303
ConditionVariableSleep(&s->active_cv,
@@ -1398,10 +1361,6 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
13981361
XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
13991362

14001363
restart:
1401-
/* temp debugging aid to analyze 019_replslot_limit failures */
1402-
elog(DEBUG3, "begin invalidating obsolete replication slots older than %X/%X",
1403-
LSN_FORMAT_ARGS(oldestLSN));
1404-
14051364
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
14061365
for (int i = 0; i < max_replication_slots; i++)
14071366
{
@@ -1427,8 +1386,6 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
14271386
ReplicationSlotsComputeRequiredLSN();
14281387
}
14291388

1430-
elog(DEBUG3, "done invalidating obsolete replication slots");
1431-
14321389
return invalidated;
14331390
}
14341391

src/backend/storage/lmgr/lwlock.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,10 +1942,3 @@ LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
19421942
}
19431943
return false;
19441944
}
1945-
1946-
/* temp debugging aid to analyze 019_replslot_limit failures */
1947-
int
1948-
LWLockHeldCount(void)
1949-
{
1950-
return num_held_lwlocks;
1951-
}

src/backend/utils/init/postinit.c

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,24 +1231,6 @@ ShutdownPostgres(int code, Datum arg)
12311231
* them explicitly.
12321232
*/
12331233
LockReleaseAll(USER_LOCKMETHOD, true);
1234-
1235-
/*
1236-
* temp debugging aid to analyze 019_replslot_limit failures
1237-
*
1238-
* If an error were thrown outside of a transaction nothing up to now
1239-
* would have released lwlocks. We probably will add an
1240-
* LWLockReleaseAll(). But for now make it easier to understand such cases
1241-
* by warning if any lwlocks are held.
1242-
*/
1243-
#ifdef USE_ASSERT_CHECKING
1244-
{
1245-
int held_lwlocks = LWLockHeldCount();
1246-
1247-
if (held_lwlocks)
1248-
elog(WARNING, "holding %d lwlocks at the end of ShutdownPostgres()",
1249-
held_lwlocks);
1250-
}
1251-
#endif
12521234
}
12531235

12541236

src/bin/pg_basebackup/pg_basebackup.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -694,16 +694,8 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier,
694694
bgchild = fork();
695695
if (bgchild == 0)
696696
{
697-
int ret;
698-
699697
/* in child process */
700-
ret = LogStreamerMain(param);
701-
702-
/* temp debugging aid to analyze 019_replslot_limit failures */
703-
if (verbose)
704-
pg_log_info("log streamer with pid %d exiting", getpid());
705-
706-
exit(ret);
698+
exit(LogStreamerMain(param));
707699
}
708700
else if (bgchild < 0)
709701
pg_fatal("could not create background process: %m");

src/include/storage/lwlock.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ extern void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val);
121121
extern void LWLockReleaseAll(void);
122122
extern bool LWLockHeldByMe(LWLock *lock);
123123
extern bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode);
124-
extern int LWLockHeldCount(void);
125124

126125
extern bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval);
127126
extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);

src/test/recovery/t/019_replslot_limit.pl

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -316,16 +316,13 @@
316316
max_wal_size = 2MB
317317
log_checkpoints = yes
318318
max_slot_wal_keep_size = 1MB
319-
320-
# temp debugging aid to analyze 019_replslot_limit failures
321-
log_min_messages=debug3
322319
));
323320
$node_primary3->start;
324321
$node_primary3->safe_psql('postgres',
325322
"SELECT pg_create_physical_replication_slot('rep3')");
326323
# Take backup
327324
$backup_name = 'my_backup';
328-
$node_primary3->backup($backup_name, backup_options => ['--verbose']);
325+
$node_primary3->backup($backup_name);
329326
# Create standby
330327
my $node_standby3 = PostgreSQL::Test::Cluster->new('standby_3');
331328
$node_standby3->init_from_backup($node_primary3, $backup_name,
@@ -336,11 +333,9 @@
336333

337334
my $senderpid;
338335

339-
# We've seen occasional cases where multiple walsender pids are active. It
340-
# could be that we're just observing process shutdown being slow. To collect
341-
# more information, retry a couple times, print a bit of debugging information
342-
# each iteration. Don't fail the test if retries find just one pid, the
343-
# buildfarm failures are too noisy.
336+
# We've seen occasional cases where multiple walsender pids are still active
337+
# at this point, apparently just due to process shutdown being slow. To avoid
338+
# spurious failures, retry a couple times.
344339
my $i = 0;
345340
while (1)
346341
{

0 commit comments

Comments
 (0)