28
28
#include <time.h>
29
29
#include <sys/types.h>
30
30
#include <sys/stat.h>
31
+ #include <sys/wait.h>
31
32
#include <unistd.h>
32
33
33
34
#ifdef HAVE_SYS_RESOURCE_H
@@ -154,10 +155,10 @@ static int CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo,
154
155
155
156
static pgpid_t get_pgpid (void );
156
157
static char * * readfile (const char * path );
157
- static int start_postmaster (void );
158
+ static pgpid_t start_postmaster (void );
158
159
static void read_post_opts (void );
159
160
160
- static PGPing test_postmaster_connection (bool );
161
+ static PGPing test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint );
161
162
static bool postmaster_is_alive (pid_t pid );
162
163
163
164
#if defined(HAVE_GETRLIMIT ) && defined(RLIMIT_CORE )
@@ -374,36 +375,73 @@ readfile(const char *path)
374
375
* start/test/stop routines
375
376
*/
376
377
377
- static int
378
+ /*
379
+ * Start the postmaster and return its PID.
380
+ *
381
+ * Currently, on Windows what we return is the PID of the shell process
382
+ * that launched the postmaster (and, we trust, is waiting for it to exit).
383
+ * So the PID is usable for "is the postmaster still running" checks,
384
+ * but cannot be compared directly to postmaster.pid.
385
+ *
386
+ * On Windows, we also save aside a handle to the shell process in
387
+ * "postmasterProcess", which the caller should close when done with it.
388
+ */
389
+ static pgpid_t
378
390
start_postmaster (void )
379
391
{
380
392
char cmd [MAXPGPATH ];
381
393
382
394
#ifndef WIN32
395
+ pgpid_t pm_pid ;
396
+
397
+ /* Flush stdio channels just before fork, to avoid double-output problems */
398
+ fflush (stdout );
399
+ fflush (stderr );
400
+
401
+ pm_pid = fork ();
402
+ if (pm_pid < 0 )
403
+ {
404
+ /* fork failed */
405
+ write_stderr (_ ("%s: could not start server: %s\n" ),
406
+ progname , strerror (errno ));
407
+ exit (1 );
408
+ }
409
+ if (pm_pid > 0 )
410
+ {
411
+ /* fork succeeded, in parent */
412
+ return pm_pid ;
413
+ }
414
+
415
+ /* fork succeeded, in child */
383
416
384
417
/*
385
418
* Since there might be quotes to handle here, it is easier simply to pass
386
- * everything to a shell to process them.
387
- *
388
- * XXX it would be better to fork and exec so that we would know the child
389
- * postmaster's PID directly; then test_postmaster_connection could use
390
- * the PID without having to rely on reading it back from the pidfile.
419
+ * everything to a shell to process them. Use exec so that the postmaster
420
+ * has the same PID as the current child process.
391
421
*/
392
422
if (log_file != NULL )
393
- snprintf (cmd , MAXPGPATH , SYSTEMQUOTE " \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1 &" SYSTEMQUOTE ,
423
+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1" ,
394
424
exec_path , pgdata_opt , post_opts ,
395
425
DEVNULL , log_file );
396
426
else
397
- snprintf (cmd , MAXPGPATH , SYSTEMQUOTE " \"%s\" %s%s < \"%s\" 2>&1 &" SYSTEMQUOTE ,
427
+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" 2>&1" ,
398
428
exec_path , pgdata_opt , post_opts , DEVNULL );
399
429
400
- return system (cmd );
430
+ (void ) execl ("/bin/sh" , "/bin/sh" , "-c" , cmd , (char * ) NULL );
431
+
432
+ /* exec failed */
433
+ write_stderr (_ ("%s: could not start server: %s\n" ),
434
+ progname , strerror (errno ));
435
+ exit (1 );
436
+
437
+ return 0 ; /* keep dumb compilers quiet */
438
+
401
439
#else /* WIN32 */
402
440
403
441
/*
404
- * On win32 we don't use system(). So we don't need to use & (which would
405
- * be START /B on win32). However, we still call the shell ( CMD.EXE) with
406
- * it to handle redirection etc .
442
+ * As with the Unix case, it's easiest to use the shell (CMD.EXE) to
443
+ * handle redirection etc. Unfortunately CMD.EXE lacks any equivalent of
444
+ * "exec", so we don't get to find out the postmaster's PID immediately .
407
445
*/
408
446
PROCESS_INFORMATION pi ;
409
447
@@ -415,10 +453,15 @@ start_postmaster(void)
415
453
exec_path , pgdata_opt , post_opts , DEVNULL );
416
454
417
455
if (!CreateRestrictedProcess (cmd , & pi , false))
418
- return GetLastError ();
419
- CloseHandle (pi .hProcess );
456
+ {
457
+ write_stderr (_ ("%s: could not start server: error code %lu\n" ),
458
+ progname , (unsigned long ) GetLastError ());
459
+ exit (1 );
460
+ }
461
+ /* Don't close command process handle here; caller must do so */
462
+ postmasterProcess = pi .hProcess ;
420
463
CloseHandle (pi .hThread );
421
- return 0 ;
464
+ return pi . dwProcessId ; /* Shell's PID, not postmaster's! */
422
465
#endif /* WIN32 */
423
466
}
424
467
@@ -427,15 +470,21 @@ start_postmaster(void)
427
470
/*
428
471
* Find the pgport and try a connection
429
472
*
473
+ * On Unix, pm_pid is the PID of the just-launched postmaster. On Windows,
474
+ * it may be the PID of an ancestor shell process, so we can't check the
475
+ * contents of postmaster.pid quite as carefully.
476
+ *
477
+ * On Windows, the static variable postmasterProcess is an implicit argument
478
+ * to this routine; it contains a handle to the postmaster process or an
479
+ * ancestor shell process thereof.
480
+ *
430
481
* Note that the checkpoint parameter enables a Windows service control
431
482
* manager checkpoint, it's got nothing to do with database checkpoints!!
432
483
*/
433
484
static PGPing
434
- test_postmaster_connection (bool do_checkpoint )
485
+ test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint )
435
486
{
436
487
PGPing ret = PQPING_NO_RESPONSE ;
437
- bool found_stale_pidfile = false;
438
- pgpid_t pm_pid = 0 ;
439
488
char connstr [MAXPGPATH * 2 + 256 ];
440
489
int i ;
441
490
@@ -490,29 +539,27 @@ test_postmaster_connection(bool do_checkpoint)
490
539
optlines [5 ] != NULL )
491
540
{
492
541
/* File is complete enough for us, parse it */
493
- long pmpid ;
542
+ pgpid_t pmpid ;
494
543
time_t pmstart ;
495
544
496
545
/*
497
- * Make sanity checks. If it's for a standalone backend
498
- * (negative PID), or the recorded start time is before
499
- * pg_ctl started, then either we are looking at the wrong
500
- * data directory, or this is a pre-existing pidfile that
501
- * hasn't (yet?) been overwritten by our child postmaster.
502
- * Allow 2 seconds slop for possible cross-process clock
503
- * skew.
546
+ * Make sanity checks. If it's for the wrong PID, or the
547
+ * recorded start time is before pg_ctl started, then
548
+ * either we are looking at the wrong data directory, or
549
+ * this is a pre-existing pidfile that hasn't (yet?) been
550
+ * overwritten by our child postmaster. Allow 2 seconds
551
+ * slop for possible cross-process clock skew.
504
552
*/
505
553
pmpid = atol (optlines [LOCK_FILE_LINE_PID - 1 ]);
506
554
pmstart = atol (optlines [LOCK_FILE_LINE_START_TIME - 1 ]);
507
- if (pmpid <= 0 || pmstart < start_time - 2 )
508
- {
509
- /*
510
- * Set flag to report stale pidfile if it doesn't get
511
- * overwritten before we give up waiting.
512
- */
513
- found_stale_pidfile = true;
514
- }
515
- else
555
+ if (pmstart >= start_time - 2 &&
556
+ #ifndef WIN32
557
+ pmpid == pm_pid
558
+ #else
559
+ /* Windows can only reject standalone-backend PIDs */
560
+ pmpid > 0
561
+ #endif
562
+ )
516
563
{
517
564
/*
518
565
* OK, seems to be a valid pidfile from our child.
@@ -522,9 +569,6 @@ test_postmaster_connection(bool do_checkpoint)
522
569
char * hostaddr ;
523
570
char host_str [MAXPGPATH ];
524
571
525
- found_stale_pidfile = false;
526
- pm_pid = (pgpid_t ) pmpid ;
527
-
528
572
/*
529
573
* Extract port number and host string to use. Prefer
530
574
* using Unix socket if available.
@@ -583,37 +627,23 @@ test_postmaster_connection(bool do_checkpoint)
583
627
}
584
628
585
629
/*
586
- * The postmaster should create postmaster.pid very soon after being
587
- * started. If it's not there after we've waited 5 or more seconds,
588
- * assume startup failed and give up waiting. (Note this covers both
589
- * cases where the pidfile was never created, and where it was created
590
- * and then removed during postmaster exit.) Also, if there *is* a
591
- * file there but it appears stale, issue a suitable warning and give
592
- * up waiting.
630
+ * Check whether the child postmaster process is still alive. This
631
+ * lets us exit early if the postmaster fails during startup.
632
+ *
633
+ * On Windows, we may be checking the postmaster's parent shell, but
634
+ * that's fine for this purpose.
593
635
*/
594
- if ( i >= 5 )
636
+ #ifndef WIN32
595
637
{
596
- struct stat statbuf ;
597
-
598
- if (stat (pid_file , & statbuf ) != 0 )
599
- return PQPING_NO_RESPONSE ;
638
+ int exitstatus ;
600
639
601
- if (found_stale_pidfile )
602
- {
603
- write_stderr (_ ("\n%s: this data directory appears to be running a pre-existing postmaster\n" ),
604
- progname );
640
+ if (waitpid ((pid_t ) pm_pid , & exitstatus , WNOHANG ) == (pid_t ) pm_pid )
605
641
return PQPING_NO_RESPONSE ;
606
- }
607
642
}
608
-
609
- /*
610
- * If we've been able to identify the child postmaster's PID, check
611
- * the process is still alive. This covers cases where the postmaster
612
- * successfully created the pidfile but then crashed without removing
613
- * it.
614
- */
615
- if (pm_pid > 0 && !postmaster_is_alive ((pid_t ) pm_pid ))
643
+ #else
644
+ if (WaitForSingleObject (postmasterProcess , 0 ) == WAIT_OBJECT_0 )
616
645
return PQPING_NO_RESPONSE ;
646
+ #endif
617
647
618
648
/* No response, or startup still in process; wait */
619
649
#if defined(WIN32 )
@@ -776,7 +806,7 @@ static void
776
806
do_start (void )
777
807
{
778
808
pgpid_t old_pid = 0 ;
779
- int exitcode ;
809
+ pgpid_t pm_pid ;
780
810
781
811
if (ctl_command != RESTART_COMMAND )
782
812
{
@@ -816,19 +846,13 @@ do_start(void)
816
846
}
817
847
#endif
818
848
819
- exitcode = start_postmaster ();
820
- if (exitcode != 0 )
821
- {
822
- write_stderr (_ ("%s: could not start server: exit code was %d\n" ),
823
- progname , exitcode );
824
- exit (1 );
825
- }
849
+ pm_pid = start_postmaster ();
826
850
827
851
if (do_wait )
828
852
{
829
853
print_msg (_ ("waiting for server to start..." ));
830
854
831
- switch (test_postmaster_connection (false))
855
+ switch (test_postmaster_connection (pm_pid , false))
832
856
{
833
857
case PQPING_OK :
834
858
print_msg (_ (" done\n" ));
@@ -854,6 +878,12 @@ do_start(void)
854
878
}
855
879
else
856
880
print_msg (_ ("server starting\n" ));
881
+
882
+ #ifdef WIN32
883
+ /* Now we don't need the handle to the shell process anymore */
884
+ CloseHandle (postmasterProcess );
885
+ postmasterProcess = INVALID_HANDLE_VALUE ;
886
+ #endif
857
887
}
858
888
859
889
@@ -1495,7 +1525,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
1495
1525
if (do_wait )
1496
1526
{
1497
1527
write_eventlog (EVENTLOG_INFORMATION_TYPE , _ ("Waiting for server startup...\n" ));
1498
- if (test_postmaster_connection (true) != PQPING_OK )
1528
+ if (test_postmaster_connection (postmasterPID , true) != PQPING_OK )
1499
1529
{
1500
1530
write_eventlog (EVENTLOG_ERROR_TYPE , _ ("Timed out waiting for server startup\n" ));
1501
1531
pgwin32_SetServiceStatus (SERVICE_STOPPED );
@@ -1516,10 +1546,9 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
1516
1546
{
1517
1547
/*
1518
1548
* status.dwCheckPoint can be incremented by
1519
- * test_postmaster_connection(true), so it might not
1520
- * start from 0.
1549
+ * test_postmaster_connection(), so it might not start from 0.
1521
1550
*/
1522
- int maxShutdownCheckPoint = status .dwCheckPoint + 12 ; ;
1551
+ int maxShutdownCheckPoint = status .dwCheckPoint + 12 ;
1523
1552
1524
1553
kill (postmasterPID , SIGINT );
1525
1554
0 commit comments