Skip to content

Commit dba1f31

Browse files
committed
Fix postmaster's handling of fork failure for a bgworker process.
This corner case didn't behave nicely at all: the postmaster would (partially) update its state as though the process had started successfully, and be quite confused thereafter. Fix it to act like the worker had crashed, instead. In passing, refactor so that do_start_bgworker contains all the state-change logic for bgworker launch, rather than just some of it. Back-patch as far as 9.4. 9.3 contains similar logic, but it's just enough different that I don't feel comfortable applying the patch without more study; and the use of bgworkers in 9.3 was so small that it doesn't seem worth the extra work. transam/parallel.c is still entirely unprepared for the possibility of bgworker startup failure, but that seems like material for a separate patch. Discussion: https://postgr.es/m/4905.1492813727@sss.pgh.pa.us
1 parent 7be3678 commit dba1f31

File tree

1 file changed

+77
-31
lines changed

1 file changed

+77
-31
lines changed

src/backend/postmaster/postmaster.c

Lines changed: 77 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ static void TerminateChildren(int signal);
408408
#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
409409

410410
static int CountChildren(int target);
411+
static bool assign_backendlist_entry(RegisteredBgWorker *rw);
411412
static void maybe_start_bgworker(void);
412413
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
413414
static pid_t StartChildProcess(AuxProcType type);
@@ -5465,13 +5466,33 @@ bgworker_forkexec(int shmem_slot)
54655466
* Start a new bgworker.
54665467
* Starting time conditions must have been checked already.
54675468
*
5469+
* Returns true on success, false on failure.
5470+
* In either case, update the RegisteredBgWorker's state appropriately.
5471+
*
54685472
* This code is heavily based on autovacuum.c, q.v.
54695473
*/
5470-
static void
5474+
static bool
54715475
do_start_bgworker(RegisteredBgWorker *rw)
54725476
{
54735477
pid_t worker_pid;
54745478

5479+
Assert(rw->rw_pid == 0);
5480+
5481+
/*
5482+
* Allocate and assign the Backend element. Note we must do this before
5483+
* forking, so that we can handle out of memory properly.
5484+
*
5485+
* Treat failure as though the worker had crashed. That way, the
5486+
* postmaster will wait a bit before attempting to start it again; if it
5487+
* tried again right away, most likely it'd find itself repeating the
5488+
* out-of-memory or fork failure condition.
5489+
*/
5490+
if (!assign_backendlist_entry(rw))
5491+
{
5492+
rw->rw_crashed_at = GetCurrentTimestamp();
5493+
return false;
5494+
}
5495+
54755496
ereport(DEBUG1,
54765497
(errmsg("starting background worker process \"%s\"",
54775498
rw->rw_worker.bgw_name)));
@@ -5483,9 +5504,17 @@ do_start_bgworker(RegisteredBgWorker *rw)
54835504
#endif
54845505
{
54855506
case -1:
5507+
/* in postmaster, fork failed ... */
54865508
ereport(LOG,
54875509
(errmsg("could not fork worker process: %m")));
5488-
return;
5510+
/* undo what assign_backendlist_entry did */
5511+
ReleasePostmasterChildSlot(rw->rw_child_slot);
5512+
rw->rw_child_slot = 0;
5513+
free(rw->rw_backend);
5514+
rw->rw_backend = NULL;
5515+
/* mark entry as crashed, so we'll try again later */
5516+
rw->rw_crashed_at = GetCurrentTimestamp();
5517+
break;
54895518

54905519
#ifndef EXEC_BACKEND
54915520
case 0:
@@ -5499,13 +5528,24 @@ do_start_bgworker(RegisteredBgWorker *rw)
54995528

55005529
MyBgworkerEntry = &rw->rw_worker;
55015530
StartBackgroundWorker();
5531+
5532+
exit(1); /* should not get here */
55025533
break;
55035534
#endif
55045535
default:
5536+
/* in postmaster, fork successful ... */
55055537
rw->rw_pid = worker_pid;
55065538
rw->rw_backend->pid = rw->rw_pid;
55075539
ReportBackgroundWorkerPID(rw);
5540+
/* add new worker to lists of backends */
5541+
dlist_push_head(&BackendList, &rw->rw_backend->elem);
5542+
#ifdef EXEC_BACKEND
5543+
ShmemBackendArrayAdd(rw->rw_backend);
5544+
#endif
5545+
return true;
55085546
}
5547+
5548+
return false;
55095549
}
55105550

55115551
/*
@@ -5552,6 +5592,8 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
55525592
* Allocate the Backend struct for a connected background worker, but don't
55535593
* add it to the list of backends just yet.
55545594
*
5595+
* On failure, return false without changing any worker state.
5596+
*
55555597
* Some info from the Backend is copied into the passed rw.
55565598
*/
55575599
static bool
@@ -5564,14 +5606,6 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
55645606
ereport(LOG,
55655607
(errcode(ERRCODE_OUT_OF_MEMORY),
55665608
errmsg("out of memory")));
5567-
5568-
/*
5569-
* The worker didn't really crash, but setting this nonzero makes
5570-
* postmaster wait a bit before attempting to start it again; if it
5571-
* tried again right away, most likely it'd find itself under the same
5572-
* memory pressure.
5573-
*/
5574-
rw->rw_crashed_at = GetCurrentTimestamp();
55755609
return false;
55765610
}
55775611

@@ -5601,20 +5635,31 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
56015635
* As a side effect, the bgworker control variables are set or reset whenever
56025636
* there are more workers to start after this one, and whenever the overall
56035637
* system state requires it.
5638+
*
5639+
* The reason we start at most one worker per call is to avoid consuming the
5640+
* postmaster's attention for too long when many such requests are pending.
5641+
* As long as StartWorkerNeeded is true, ServerLoop will not block and will
5642+
* call this function again after dealing with any other issues.
56045643
*/
56055644
static void
56065645
maybe_start_bgworker(void)
56075646
{
56085647
slist_mutable_iter iter;
56095648
TimestampTz now = 0;
56105649

5650+
/*
5651+
* During crash recovery, we have no need to be called until the state
5652+
* transition out of recovery.
5653+
*/
56115654
if (FatalError)
56125655
{
56135656
StartWorkerNeeded = false;
56145657
HaveCrashedWorker = false;
5615-
return; /* not yet */
5658+
return;
56165659
}
56175660

5661+
/* Don't need to be called again unless we find a reason for it below */
5662+
StartWorkerNeeded = false;
56185663
HaveCrashedWorker = false;
56195664

56205665
slist_foreach_modify(iter, &BackgroundWorkerList)
@@ -5623,11 +5668,11 @@ maybe_start_bgworker(void)
56235668

56245669
rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
56255670

5626-
/* already running? */
5671+
/* ignore if already running */
56275672
if (rw->rw_pid != 0)
56285673
continue;
56295674

5630-
/* marked for death? */
5675+
/* if marked for death, clean up and remove from list */
56315676
if (rw->rw_terminate)
56325677
{
56335678
ForgetBackgroundWorker(&iter);
@@ -5649,49 +5694,50 @@ maybe_start_bgworker(void)
56495694
continue;
56505695
}
56515696

5697+
/* read system time only when needed */
56525698
if (now == 0)
56535699
now = GetCurrentTimestamp();
56545700

56555701
if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
56565702
rw->rw_worker.bgw_restart_time * 1000))
56575703
{
5704+
/* Set flag to remember that we have workers to start later */
56585705
HaveCrashedWorker = true;
56595706
continue;
56605707
}
56615708
}
56625709

56635710
if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
56645711
{
5665-
/* reset crash time before calling assign_backendlist_entry */
5712+
/* reset crash time before trying to start worker */
56665713
rw->rw_crashed_at = 0;
56675714

56685715
/*
5669-
* Allocate and assign the Backend element. Note we
5670-
* must do this before forking, so that we can handle out of
5671-
* memory properly.
5716+
* Try to start the worker.
5717+
*
5718+
* On failure, give up processing workers for now, but set
5719+
* StartWorkerNeeded so we'll come back here on the next iteration
5720+
* of ServerLoop to try again. (We don't want to wait, because
5721+
* there might be additional ready-to-run workers.) We could set
5722+
* HaveCrashedWorker as well, since this worker is now marked
5723+
* crashed, but there's no need because the next run of this
5724+
* function will do that.
56725725
*/
5673-
if (!assign_backendlist_entry(rw))
5726+
if (!do_start_bgworker(rw))
5727+
{
5728+
StartWorkerNeeded = true;
56745729
return;
5675-
5676-
do_start_bgworker(rw); /* sets rw->rw_pid */
5677-
5678-
dlist_push_head(&BackendList, &rw->rw_backend->elem);
5679-
#ifdef EXEC_BACKEND
5680-
ShmemBackendArrayAdd(rw->rw_backend);
5681-
#endif
5730+
}
56825731

56835732
/*
5684-
* Have ServerLoop call us again. Note that there might not
5685-
* actually *be* another runnable worker, but we don't care all
5686-
* that much; we will find out the next time we run.
5733+
* Quit, but have ServerLoop call us again to look for additional
5734+
* ready-to-run workers. There might not be any, but we'll find
5735+
* out the next time we run.
56875736
*/
56885737
StartWorkerNeeded = true;
56895738
return;
56905739
}
56915740
}
5692-
5693-
/* no runnable worker found */
5694-
StartWorkerNeeded = false;
56955741
}
56965742

56975743
/*

0 commit comments

Comments
 (0)