|
28 | 28 | #include <fcntl.h>
|
29 | 29 | #include <signal.h>
|
30 | 30 | #include <time.h>
|
| 31 | +#include <sys/stat.h> |
31 | 32 | #include <sys/time.h>
|
32 | 33 | #include <sys/wait.h>
|
33 | 34 | #include <unistd.h>
|
|
59 | 60 | #define PGARCH_RESTART_INTERVAL 10 /* How often to attempt to restart a
|
60 | 61 | * failed archiver; in seconds. */
|
61 | 62 |
|
| 63 | +/* |
| 64 | + * Maximum number of retries allowed when attempting to archive a WAL |
| 65 | + * file. |
| 66 | + */ |
62 | 67 | #define NUM_ARCHIVE_RETRIES 3
|
63 | 68 |
|
| 69 | +/* |
| 70 | + * Maximum number of retries allowed when attempting to remove an |
| 71 | + * orphan archive status file. |
| 72 | + */ |
| 73 | +#define NUM_ORPHAN_CLEANUP_RETRIES 3 |
| 74 | + |
64 | 75 |
|
65 | 76 | /* ----------
|
66 | 77 | * Local data
|
@@ -424,9 +435,13 @@ pgarch_ArchiverCopyLoop(void)
|
424 | 435 | while (pgarch_readyXlog(xlog))
|
425 | 436 | {
|
426 | 437 | int failures = 0;
|
| 438 | + int failures_orphan = 0; |
427 | 439 |
|
428 | 440 | for (;;)
|
429 | 441 | {
|
| 442 | + struct stat stat_buf; |
| 443 | + char pathname[MAXPGPATH]; |
| 444 | + |
430 | 445 | /*
|
431 | 446 | * Do not initiate any more archive commands after receiving
|
432 | 447 | * SIGTERM, nor after the postmaster has died unexpectedly. The
|
@@ -456,6 +471,46 @@ pgarch_ArchiverCopyLoop(void)
|
456 | 471 | return;
|
457 | 472 | }
|
458 | 473 |
|
| 474 | + /* |
| 475 | + * Since archive status files are not removed in a durable manner, |
| 476 | + * a system crash could leave behind .ready files for WAL segments |
| 477 | + * that have already been recycled or removed. In this case, |
| 478 | + * simply remove the orphan status file and move on. unlink() is |
| 479 | + * used here as even on subsequent crashes the same orphan files |
| 480 | + * would get removed, so there is no need to worry about |
| 481 | + * durability. |
| 482 | + */ |
| 483 | + snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog); |
| 484 | + if (stat(pathname, &stat_buf) != 0 && errno == ENOENT) |
| 485 | + { |
| 486 | + char xlogready[MAXPGPATH]; |
| 487 | + |
| 488 | + StatusFilePath(xlogready, xlog, ".ready"); |
| 489 | + if (unlink(xlogready) == 0) |
| 490 | + { |
| 491 | + ereport(WARNING, |
| 492 | + (errmsg("removed orphan archive status file \"%s\"", |
| 493 | + xlogready))); |
| 494 | + |
| 495 | + /* leave loop and move to the next status file */ |
| 496 | + break; |
| 497 | + } |
| 498 | + |
| 499 | + if (++failures_orphan >= NUM_ORPHAN_CLEANUP_RETRIES) |
| 500 | + { |
| 501 | + ereport(WARNING, |
| 502 | + (errmsg("removal of orphan archive status file \"%s\" failed too many times, will try again later", |
| 503 | + xlogready))); |
| 504 | + |
| 505 | + /* give up cleanup of orphan status files */ |
| 506 | + return; |
| 507 | + } |
| 508 | + |
| 509 | + /* wait a bit before retrying */ |
| 510 | + pg_usleep(1000000L); |
| 511 | + continue; |
| 512 | + } |
| 513 | + |
459 | 514 | if (pgarch_archiveXlog(xlog))
|
460 | 515 | {
|
461 | 516 | /* successful */
|
|
0 commit comments