Skip to content

Commit 4fe0424

Browse files
committed
Fix postmaster's handling of fork failure for a bgworker process.
This corner case didn't behave nicely at all: the postmaster would (partially) update its state as though the process had started successfully, and be quite confused thereafter. Fix it to act like the worker had crashed, instead. In passing, refactor so that do_start_bgworker contains all the state-change logic for bgworker launch, rather than just some of it. Back-patch as far as 9.4. 9.3 contains similar logic, but it's just enough different that I don't feel comfortable applying the patch without more study; and the use of bgworkers in 9.3 was so small that it doesn't seem worth the extra work. transam/parallel.c is still entirely unprepared for the possibility of bgworker startup failure, but that seems like material for a separate patch. Discussion: https://postgr.es/m/4905.1492813727@sss.pgh.pa.us
1 parent 4b34624 commit 4fe0424

File tree

1 file changed

+77
-33
lines changed

1 file changed

+77
-33
lines changed

src/backend/postmaster/postmaster.c

Lines changed: 77 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,7 @@ static void TerminateChildren(int signal);
420420
#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
421421

422422
static int CountChildren(int target);
423+
static bool assign_backendlist_entry(RegisteredBgWorker *rw);
423424
static void maybe_start_bgworker(void);
424425
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
425426
static pid_t StartChildProcess(AuxProcType type);
@@ -5531,13 +5532,33 @@ bgworker_forkexec(int shmem_slot)
55315532
* Start a new bgworker.
55325533
* Starting time conditions must have been checked already.
55335534
*
5535+
* Returns true on success, false on failure.
5536+
* In either case, update the RegisteredBgWorker's state appropriately.
5537+
*
55345538
* This code is heavily based on autovacuum.c, q.v.
55355539
*/
5536-
static void
5540+
static bool
55375541
do_start_bgworker(RegisteredBgWorker *rw)
55385542
{
55395543
pid_t worker_pid;
55405544

5545+
Assert(rw->rw_pid == 0);
5546+
5547+
/*
5548+
* Allocate and assign the Backend element. Note we must do this before
5549+
* forking, so that we can handle out of memory properly.
5550+
*
5551+
* Treat failure as though the worker had crashed. That way, the
5552+
* postmaster will wait a bit before attempting to start it again; if it
5553+
* tried again right away, most likely it'd find itself repeating the
5554+
* out-of-memory or fork failure condition.
5555+
*/
5556+
if (!assign_backendlist_entry(rw))
5557+
{
5558+
rw->rw_crashed_at = GetCurrentTimestamp();
5559+
return false;
5560+
}
5561+
55415562
ereport(DEBUG1,
55425563
(errmsg("starting background worker process \"%s\"",
55435564
rw->rw_worker.bgw_name)));
@@ -5549,9 +5570,17 @@ do_start_bgworker(RegisteredBgWorker *rw)
55495570
#endif
55505571
{
55515572
case -1:
5573+
/* in postmaster, fork failed ... */
55525574
ereport(LOG,
55535575
(errmsg("could not fork worker process: %m")));
5554-
return;
5576+
/* undo what assign_backendlist_entry did */
5577+
ReleasePostmasterChildSlot(rw->rw_child_slot);
5578+
rw->rw_child_slot = 0;
5579+
free(rw->rw_backend);
5580+
rw->rw_backend = NULL;
5581+
/* mark entry as crashed, so we'll try again later */
5582+
rw->rw_crashed_at = GetCurrentTimestamp();
5583+
break;
55555584

55565585
#ifndef EXEC_BACKEND
55575586
case 0:
@@ -5575,14 +5604,24 @@ do_start_bgworker(RegisteredBgWorker *rw)
55755604
PostmasterContext = NULL;
55765605

55775606
StartBackgroundWorker();
5607+
5608+
exit(1); /* should not get here */
55785609
break;
55795610
#endif
55805611
default:
5612+
/* in postmaster, fork successful ... */
55815613
rw->rw_pid = worker_pid;
55825614
rw->rw_backend->pid = rw->rw_pid;
55835615
ReportBackgroundWorkerPID(rw);
5584-
break;
5616+
/* add new worker to lists of backends */
5617+
dlist_push_head(&BackendList, &rw->rw_backend->elem);
5618+
#ifdef EXEC_BACKEND
5619+
ShmemBackendArrayAdd(rw->rw_backend);
5620+
#endif
5621+
return true;
55855622
}
5623+
5624+
return false;
55865625
}
55875626

55885627
/*
@@ -5629,6 +5668,8 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
56295668
* Allocate the Backend struct for a connected background worker, but don't
56305669
* add it to the list of backends just yet.
56315670
*
5671+
* On failure, return false without changing any worker state.
5672+
*
56325673
* Some info from the Backend is copied into the passed rw.
56335674
*/
56345675
static bool
@@ -5647,8 +5688,6 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
56475688
ereport(LOG,
56485689
(errcode(ERRCODE_INTERNAL_ERROR),
56495690
errmsg("could not generate random cancel key")));
5650-
5651-
rw->rw_crashed_at = GetCurrentTimestamp();
56525691
return false;
56535692
}
56545693

@@ -5658,14 +5697,6 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
56585697
ereport(LOG,
56595698
(errcode(ERRCODE_OUT_OF_MEMORY),
56605699
errmsg("out of memory")));
5661-
5662-
/*
5663-
* The worker didn't really crash, but setting this nonzero makes
5664-
* postmaster wait a bit before attempting to start it again; if it
5665-
* tried again right away, most likely it'd find itself under the same
5666-
* memory pressure.
5667-
*/
5668-
rw->rw_crashed_at = GetCurrentTimestamp();
56695700
return false;
56705701
}
56715702

@@ -5687,20 +5718,31 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
56875718
* As a side effect, the bgworker control variables are set or reset whenever
56885719
* there are more workers to start after this one, and whenever the overall
56895720
* system state requires it.
5721+
*
5722+
* The reason we start at most one worker per call is to avoid consuming the
5723+
* postmaster's attention for too long when many such requests are pending.
5724+
* As long as StartWorkerNeeded is true, ServerLoop will not block and will
5725+
* call this function again after dealing with any other issues.
56905726
*/
56915727
static void
56925728
maybe_start_bgworker(void)
56935729
{
56945730
slist_mutable_iter iter;
56955731
TimestampTz now = 0;
56965732

5733+
/*
5734+
* During crash recovery, we have no need to be called until the state
5735+
* transition out of recovery.
5736+
*/
56975737
if (FatalError)
56985738
{
56995739
StartWorkerNeeded = false;
57005740
HaveCrashedWorker = false;
5701-
return; /* not yet */
5741+
return;
57025742
}
57035743

5744+
/* Don't need to be called again unless we find a reason for it below */
5745+
StartWorkerNeeded = false;
57045746
HaveCrashedWorker = false;
57055747

57065748
slist_foreach_modify(iter, &BackgroundWorkerList)
@@ -5709,11 +5751,11 @@ maybe_start_bgworker(void)
57095751

57105752
rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
57115753

5712-
/* already running? */
5754+
/* ignore if already running */
57135755
if (rw->rw_pid != 0)
57145756
continue;
57155757

5716-
/* marked for death? */
5758+
/* if marked for death, clean up and remove from list */
57175759
if (rw->rw_terminate)
57185760
{
57195761
ForgetBackgroundWorker(&iter);
@@ -5735,48 +5777,50 @@ maybe_start_bgworker(void)
57355777
continue;
57365778
}
57375779

5780+
/* read system time only when needed */
57385781
if (now == 0)
57395782
now = GetCurrentTimestamp();
57405783

57415784
if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
57425785
rw->rw_worker.bgw_restart_time * 1000))
57435786
{
5787+
/* Set flag to remember that we have workers to start later */
57445788
HaveCrashedWorker = true;
57455789
continue;
57465790
}
57475791
}
57485792

57495793
if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
57505794
{
5751-
/* reset crash time before calling assign_backendlist_entry */
5795+
/* reset crash time before trying to start worker */
57525796
rw->rw_crashed_at = 0;
57535797

57545798
/*
5755-
* Allocate and assign the Backend element. Note we must do this
5756-
* before forking, so that we can handle out of memory properly.
5799+
* Try to start the worker.
5800+
*
5801+
* On failure, give up processing workers for now, but set
5802+
* StartWorkerNeeded so we'll come back here on the next iteration
5803+
* of ServerLoop to try again. (We don't want to wait, because
5804+
* there might be additional ready-to-run workers.) We could set
5805+
* HaveCrashedWorker as well, since this worker is now marked
5806+
* crashed, but there's no need because the next run of this
5807+
* function will do that.
57575808
*/
5758-
if (!assign_backendlist_entry(rw))
5809+
if (!do_start_bgworker(rw))
5810+
{
5811+
StartWorkerNeeded = true;
57595812
return;
5760-
5761-
do_start_bgworker(rw); /* sets rw->rw_pid */
5762-
5763-
dlist_push_head(&BackendList, &rw->rw_backend->elem);
5764-
#ifdef EXEC_BACKEND
5765-
ShmemBackendArrayAdd(rw->rw_backend);
5766-
#endif
5813+
}
57675814

57685815
/*
5769-
* Have ServerLoop call us again. Note that there might not
5770-
* actually *be* another runnable worker, but we don't care all
5771-
* that much; we will find out the next time we run.
5816+
* Quit, but have ServerLoop call us again to look for additional
5817+
* ready-to-run workers. There might not be any, but we'll find
5818+
* out the next time we run.
57725819
*/
57735820
StartWorkerNeeded = true;
57745821
return;
57755822
}
57765823
}
5777-
5778-
/* no runnable worker found */
5779-
StartWorkerNeeded = false;
57805824
}
57815825

57825826
/*

0 commit comments

Comments
 (0)