28
28
#include <time.h>
29
29
#include <sys/types.h>
30
30
#include <sys/stat.h>
31
+ #include <sys/wait.h>
31
32
#include <unistd.h>
32
33
33
34
#ifdef HAVE_SYS_RESOURCE_H
@@ -156,10 +157,10 @@ static int CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo,
156
157
static pgpid_t get_pgpid (bool is_status_request );
157
158
static char * * readfile (const char * path );
158
159
static void free_readfile (char * * optlines );
159
- static int start_postmaster (void );
160
+ static pgpid_t start_postmaster (void );
160
161
static void read_post_opts (void );
161
162
162
- static PGPing test_postmaster_connection (bool );
163
+ static PGPing test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint );
163
164
static bool postmaster_is_alive (pid_t pid );
164
165
165
166
#if defined(HAVE_GETRLIMIT ) && defined(RLIMIT_CORE )
@@ -420,36 +421,73 @@ free_readfile(char **optlines)
420
421
* start/test/stop routines
421
422
*/
422
423
423
- static int
424
+ /*
425
+ * Start the postmaster and return its PID.
426
+ *
427
+ * Currently, on Windows what we return is the PID of the shell process
428
+ * that launched the postmaster (and, we trust, is waiting for it to exit).
429
+ * So the PID is usable for "is the postmaster still running" checks,
430
+ * but cannot be compared directly to postmaster.pid.
431
+ *
432
+ * On Windows, we also save aside a handle to the shell process in
433
+ * "postmasterProcess", which the caller should close when done with it.
434
+ */
435
+ static pgpid_t
424
436
start_postmaster (void )
425
437
{
426
438
char cmd [MAXPGPATH ];
427
439
428
440
#ifndef WIN32
441
+ pgpid_t pm_pid ;
442
+
443
+ /* Flush stdio channels just before fork, to avoid double-output problems */
444
+ fflush (stdout );
445
+ fflush (stderr );
446
+
447
+ pm_pid = fork ();
448
+ if (pm_pid < 0 )
449
+ {
450
+ /* fork failed */
451
+ write_stderr (_ ("%s: could not start server: %s\n" ),
452
+ progname , strerror (errno ));
453
+ exit (1 );
454
+ }
455
+ if (pm_pid > 0 )
456
+ {
457
+ /* fork succeeded, in parent */
458
+ return pm_pid ;
459
+ }
460
+
461
+ /* fork succeeded, in child */
429
462
430
463
/*
431
464
* Since there might be quotes to handle here, it is easier simply to pass
432
- * everything to a shell to process them.
433
- *
434
- * XXX it would be better to fork and exec so that we would know the child
435
- * postmaster's PID directly; then test_postmaster_connection could use
436
- * the PID without having to rely on reading it back from the pidfile.
465
+ * everything to a shell to process them. Use exec so that the postmaster
466
+ * has the same PID as the current child process.
437
467
*/
438
468
if (log_file != NULL )
439
- snprintf (cmd , MAXPGPATH , "\"%s\" %s%s < \"%s\" >> \"%s\" 2>&1 & " ,
469
+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1" ,
440
470
exec_path , pgdata_opt , post_opts ,
441
471
DEVNULL , log_file );
442
472
else
443
- snprintf (cmd , MAXPGPATH , "\"%s\" %s%s < \"%s\" 2>&1 & " ,
473
+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" 2>&1" ,
444
474
exec_path , pgdata_opt , post_opts , DEVNULL );
445
475
446
- return system (cmd );
476
+ (void ) execl ("/bin/sh" , "/bin/sh" , "-c" , cmd , (char * ) NULL );
477
+
478
+ /* exec failed */
479
+ write_stderr (_ ("%s: could not start server: %s\n" ),
480
+ progname , strerror (errno ));
481
+ exit (1 );
482
+
483
+ return 0 ; /* keep dumb compilers quiet */
484
+
447
485
#else /* WIN32 */
448
486
449
487
/*
450
- * On win32 we don't use system(). So we don't need to use & (which would
451
- * be START /B on win32). However, we still call the shell ( CMD.EXE) with
452
- * it to handle redirection etc .
488
+ * As with the Unix case, it's easiest to use the shell (CMD.EXE) to
489
+ * handle redirection etc. Unfortunately CMD.EXE lacks any equivalent of
490
+ * "exec", so we don't get to find out the postmaster's PID immediately .
453
491
*/
454
492
PROCESS_INFORMATION pi ;
455
493
@@ -461,10 +499,15 @@ start_postmaster(void)
461
499
exec_path , pgdata_opt , post_opts , DEVNULL );
462
500
463
501
if (!CreateRestrictedProcess (cmd , & pi , false))
464
- return GetLastError ();
465
- CloseHandle (pi .hProcess );
502
+ {
503
+ write_stderr (_ ("%s: could not start server: error code %lu\n" ),
504
+ progname , (unsigned long ) GetLastError ());
505
+ exit (1 );
506
+ }
507
+ /* Don't close command process handle here; caller must do so */
508
+ postmasterProcess = pi .hProcess ;
466
509
CloseHandle (pi .hThread );
467
- return 0 ;
510
+ return pi . dwProcessId ; /* Shell's PID, not postmaster's! */
468
511
#endif /* WIN32 */
469
512
}
470
513
@@ -473,15 +516,21 @@ start_postmaster(void)
473
516
/*
474
517
* Find the pgport and try a connection
475
518
*
519
+ * On Unix, pm_pid is the PID of the just-launched postmaster. On Windows,
520
+ * it may be the PID of an ancestor shell process, so we can't check the
521
+ * contents of postmaster.pid quite as carefully.
522
+ *
523
+ * On Windows, the static variable postmasterProcess is an implicit argument
524
+ * to this routine; it contains a handle to the postmaster process or an
525
+ * ancestor shell process thereof.
526
+ *
476
527
* Note that the checkpoint parameter enables a Windows service control
477
528
* manager checkpoint, it's got nothing to do with database checkpoints!!
478
529
*/
479
530
static PGPing
480
- test_postmaster_connection (bool do_checkpoint )
531
+ test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint )
481
532
{
482
533
PGPing ret = PQPING_NO_RESPONSE ;
483
- bool found_stale_pidfile = false;
484
- pgpid_t pm_pid = 0 ;
485
534
char connstr [MAXPGPATH * 2 + 256 ];
486
535
int i ;
487
536
@@ -536,29 +585,27 @@ test_postmaster_connection(bool do_checkpoint)
536
585
optlines [5 ] != NULL )
537
586
{
538
587
/* File is complete enough for us, parse it */
539
- long pmpid ;
588
+ pgpid_t pmpid ;
540
589
time_t pmstart ;
541
590
542
591
/*
543
- * Make sanity checks. If it's for a standalone backend
544
- * (negative PID), or the recorded start time is before
545
- * pg_ctl started, then either we are looking at the wrong
546
- * data directory, or this is a pre-existing pidfile that
547
- * hasn't (yet?) been overwritten by our child postmaster.
548
- * Allow 2 seconds slop for possible cross-process clock
549
- * skew.
592
+ * Make sanity checks. If it's for the wrong PID, or the
593
+ * recorded start time is before pg_ctl started, then
594
+ * either we are looking at the wrong data directory, or
595
+ * this is a pre-existing pidfile that hasn't (yet?) been
596
+ * overwritten by our child postmaster. Allow 2 seconds
597
+ * slop for possible cross-process clock skew.
550
598
*/
551
599
pmpid = atol (optlines [LOCK_FILE_LINE_PID - 1 ]);
552
600
pmstart = atol (optlines [LOCK_FILE_LINE_START_TIME - 1 ]);
553
- if (pmpid <= 0 || pmstart < start_time - 2 )
554
- {
555
- /*
556
- * Set flag to report stale pidfile if it doesn't get
557
- * overwritten before we give up waiting.
558
- */
559
- found_stale_pidfile = true;
560
- }
561
- else
601
+ if (pmstart >= start_time - 2 &&
602
+ #ifndef WIN32
603
+ pmpid == pm_pid
604
+ #else
605
+ /* Windows can only reject standalone-backend PIDs */
606
+ pmpid > 0
607
+ #endif
608
+ )
562
609
{
563
610
/*
564
611
* OK, seems to be a valid pidfile from our child.
@@ -568,9 +615,6 @@ test_postmaster_connection(bool do_checkpoint)
568
615
char * hostaddr ;
569
616
char host_str [MAXPGPATH ];
570
617
571
- found_stale_pidfile = false;
572
- pm_pid = (pgpid_t ) pmpid ;
573
-
574
618
/*
575
619
* Extract port number and host string to use. Prefer
576
620
* using Unix socket if available.
@@ -636,37 +680,23 @@ test_postmaster_connection(bool do_checkpoint)
636
680
}
637
681
638
682
/*
639
- * The postmaster should create postmaster.pid very soon after being
640
- * started. If it's not there after we've waited 5 or more seconds,
641
- * assume startup failed and give up waiting. (Note this covers both
642
- * cases where the pidfile was never created, and where it was created
643
- * and then removed during postmaster exit.) Also, if there *is* a
644
- * file there but it appears stale, issue a suitable warning and give
645
- * up waiting.
683
+ * Check whether the child postmaster process is still alive. This
684
+ * lets us exit early if the postmaster fails during startup.
685
+ *
686
+ * On Windows, we may be checking the postmaster's parent shell, but
687
+ * that's fine for this purpose.
646
688
*/
647
- if ( i >= 5 )
689
+ #ifndef WIN32
648
690
{
649
- struct stat statbuf ;
650
-
651
- if (stat (pid_file , & statbuf ) != 0 )
652
- return PQPING_NO_RESPONSE ;
691
+ int exitstatus ;
653
692
654
- if (found_stale_pidfile )
655
- {
656
- write_stderr (_ ("\n%s: this data directory appears to be running a pre-existing postmaster\n" ),
657
- progname );
693
+ if (waitpid ((pid_t ) pm_pid , & exitstatus , WNOHANG ) == (pid_t ) pm_pid )
658
694
return PQPING_NO_RESPONSE ;
659
- }
660
695
}
661
-
662
- /*
663
- * If we've been able to identify the child postmaster's PID, check
664
- * the process is still alive. This covers cases where the postmaster
665
- * successfully created the pidfile but then crashed without removing
666
- * it.
667
- */
668
- if (pm_pid > 0 && !postmaster_is_alive ((pid_t ) pm_pid ))
696
+ #else
697
+ if (WaitForSingleObject (postmasterProcess , 0 ) == WAIT_OBJECT_0 )
669
698
return PQPING_NO_RESPONSE ;
699
+ #endif
670
700
671
701
/* No response, or startup still in process; wait */
672
702
#if defined(WIN32 )
@@ -832,7 +862,7 @@ static void
832
862
do_start (void )
833
863
{
834
864
pgpid_t old_pid = 0 ;
835
- int exitcode ;
865
+ pgpid_t pm_pid ;
836
866
837
867
if (ctl_command != RESTART_COMMAND )
838
868
{
@@ -872,19 +902,13 @@ do_start(void)
872
902
}
873
903
#endif
874
904
875
- exitcode = start_postmaster ();
876
- if (exitcode != 0 )
877
- {
878
- write_stderr (_ ("%s: could not start server: exit code was %d\n" ),
879
- progname , exitcode );
880
- exit (1 );
881
- }
905
+ pm_pid = start_postmaster ();
882
906
883
907
if (do_wait )
884
908
{
885
909
print_msg (_ ("waiting for server to start..." ));
886
910
887
- switch (test_postmaster_connection (false))
911
+ switch (test_postmaster_connection (pm_pid , false))
888
912
{
889
913
case PQPING_OK :
890
914
print_msg (_ (" done\n" ));
@@ -910,6 +934,12 @@ do_start(void)
910
934
}
911
935
else
912
936
print_msg (_ ("server starting\n" ));
937
+
938
+ #ifdef WIN32
939
+ /* Now we don't need the handle to the shell process anymore */
940
+ CloseHandle (postmasterProcess );
941
+ postmasterProcess = INVALID_HANDLE_VALUE ;
942
+ #endif
913
943
}
914
944
915
945
@@ -1572,7 +1602,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
1572
1602
if (do_wait )
1573
1603
{
1574
1604
write_eventlog (EVENTLOG_INFORMATION_TYPE , _ ("Waiting for server startup...\n" ));
1575
- if (test_postmaster_connection (true) != PQPING_OK )
1605
+ if (test_postmaster_connection (postmasterPID , true) != PQPING_OK )
1576
1606
{
1577
1607
write_eventlog (EVENTLOG_ERROR_TYPE , _ ("Timed out waiting for server startup\n" ));
1578
1608
pgwin32_SetServiceStatus (SERVICE_STOPPED );
@@ -1593,10 +1623,9 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
1593
1623
{
1594
1624
/*
1595
1625
* status.dwCheckPoint can be incremented by
1596
- * test_postmaster_connection(true), so it might not
1597
- * start from 0.
1626
+ * test_postmaster_connection(), so it might not start from 0.
1598
1627
*/
1599
- int maxShutdownCheckPoint = status .dwCheckPoint + 12 ; ;
1628
+ int maxShutdownCheckPoint = status .dwCheckPoint + 12 ;
1600
1629
1601
1630
kill (postmasterPID , SIGINT );
1602
1631
0 commit comments