Skip to content

Commit 7a294ad

Browse files
committed
Use checkpoint_timeout in stop_streaming()
1 parent dd83e73 commit 7a294ad

File tree

1 file changed

+77
-28
lines changed

1 file changed

+77
-28
lines changed

backup.c

Lines changed: 77 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,15 @@
2828

2929
static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
3030
static XLogRecPtr stop_backup_lsn = InvalidXLogRecPtr;
31+
32+
/*
33+
* How long we should wait for streaming end in seconds.
34+
* Retreived as checkpoint_timeout + checkpoint_timeout * 0.1
35+
*/
36+
static uint32 stream_stop_timeout = 0;
37+
/* Time in which we started to wait for streaming end */
38+
static time_t stream_stop_begin = 0;
39+
3140
const char *progname = "pg_probackup";
3241

3342
/* list of files contained in backup */
@@ -65,6 +74,7 @@ static void do_backup_database(parray *backup_list);
6574
static void pg_start_backup(const char *label, bool smooth, pgBackup *backup);
6675
static void pg_switch_wal(void);
6776
static void pg_stop_backup(pgBackup *backup);
77+
static int checkpoint_timeout(void);
6878

6979
static void add_pgdata_files(parray *files, const char *root);
7080
static void write_backup_file_list(parray *files, const char *root);
@@ -785,8 +795,12 @@ pg_ptrack_get_and_clear(Oid tablespace_oid, Oid db_oid, Oid rel_oid,
785795
}
786796

787797
/*
788-
* Wait for target 'lsn' to be archived in archive 'wal' directory with
789-
* WAL segment file.
798+
* Wait for target 'lsn'.
799+
*
800+
* If current backup started in archive mode wait for 'lsn' to be archived in
801+
* archive 'wal' directory with WAL segment file.
802+
* If current backup started in stream mode wait for 'lsn' to be streamed in
803+
* 'pg_xlog' directory.
790804
*/
791805
static void
792806
wait_wal_lsn(XLogRecPtr lsn)
@@ -807,45 +821,28 @@ wait_wal_lsn(XLogRecPtr lsn)
807821

808822
if (stream_wal)
809823
{
810-
PGresult *res;
811-
const char *val;
812-
const char *hintmsg;
813-
814824
pgBackupGetPath2(&current, wal_dir, lengthof(wal_dir),
815825
DATABASE_DIR, PG_XLOG_DIR);
816826
join_path_components(wal_segment_full_path, wal_dir, wal_segment);
817827

818-
res = pgut_execute(backup_conn, "show checkpoint_timeout", 0, NULL);
819-
val = PQgetvalue(res, 0, 0);
820-
PQclear(res);
821-
822-
if (!parse_int(val, (int *) &timeout, OPTION_UNIT_S,
823-
&hintmsg))
824-
{
825-
if (hintmsg)
826-
elog(ERROR, "Invalid value of checkout_timeout %s: %s", val,
827-
hintmsg);
828-
else
829-
elog(ERROR, "Invalid value of checkout_timeout %s", val);
830-
}
831-
/* Add 3 seconds to the initial value of checkpoint_timeout */
832-
timeout = timeout + 3;
828+
timeout = (uint32) checkpoint_timeout();
829+
timeout = timeout + timeout * 0.1;
833830
}
834831
else
835832
{
836833
join_path_components(wal_segment_full_path, arclog_path, wal_segment);
837834
timeout = archive_timeout;
838835
}
839836

840-
/* Wait until target LSN is archived */
837+
/* Wait until target LSN is archived or streamed */
841838
while (true)
842839
{
843840
bool file_exists = fileExists(wal_segment_full_path);
844841

845842
if (file_exists)
846843
{
847844
/*
848-
* WAL segment was archived. Check LSN on it.
845+
* A WAL segment found. Check LSN on it.
849846
*/
850847
if ((stream_wal && wal_contains_lsn(wal_dir, lsn, tli)) ||
851848
(!stream_wal && wal_contains_lsn(arclog_path, lsn, tli)))
@@ -996,11 +993,12 @@ pg_stop_backup(pgBackup *backup)
996993
PQclear(res);
997994

998995
if (stream_wal)
999-
{
1000996
/* Wait for the completion of stream */
1001-
elog(INFO, "Wait end of WAL streaming");
1002997
pthread_join(stream_thread, NULL);
1003-
}
998+
/*
999+
* Wait for stop_lsn to be archived or streamed.
1000+
* We wait for stop_lsn in stream mode just in case.
1001+
*/
10041002
wait_wal_lsn(stop_backup_lsn);
10051003

10061004
/* Fill in fields if that is the correct end of backup. */
@@ -1032,6 +1030,33 @@ pg_stop_backup(pgBackup *backup)
10321030
}
10331031
}
10341032

1033+
/*
1034+
* Retreive checkpoint_timeout GUC value in seconds.
1035+
*/
1036+
static int
1037+
checkpoint_timeout(void)
1038+
{
1039+
PGresult *res;
1040+
const char *val;
1041+
const char *hintmsg;
1042+
int val_int;
1043+
1044+
res = pgut_execute(backup_conn, "show checkpoint_timeout", 0, NULL);
1045+
val = PQgetvalue(res, 0, 0);
1046+
PQclear(res);
1047+
1048+
if (!parse_int(val, &val_int, OPTION_UNIT_S, &hintmsg))
1049+
{
1050+
if (hintmsg)
1051+
elog(ERROR, "Invalid value of checkout_timeout %s: %s", val,
1052+
hintmsg);
1053+
else
1054+
elog(ERROR, "Invalid value of checkout_timeout %s", val);
1055+
}
1056+
1057+
return val_int;
1058+
}
1059+
10351060
/*
10361061
* Return true if the path is a existing regular file.
10371062
*/
@@ -1647,8 +1672,28 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished)
16471672
elog(LOG, _("switched to timeline %u at %X/%X\n"),
16481673
timeline, (uint32) (prevpos >> 32), (uint32) prevpos);
16491674

1650-
if (stop_backup_lsn != InvalidXLogRecPtr && xlogpos > stop_backup_lsn)
1651-
return true;
1675+
if (!XLogRecPtrIsInvalid(stop_backup_lsn))
1676+
{
1677+
if (xlogpos > stop_backup_lsn)
1678+
return true;
1679+
1680+
/* pg_stop_backup() was executed, wait for the completion of stream */
1681+
if (stream_stop_timeout == 0)
1682+
{
1683+
elog(INFO, "Wait for LSN %X/%X to be streamed",
1684+
(uint32) (stop_backup_lsn >> 32), (uint32) stop_backup_lsn);
1685+
1686+
stream_stop_timeout = checkpoint_timeout();
1687+
stream_stop_timeout = stream_stop_timeout + stream_stop_timeout * 0.1;
1688+
1689+
stream_stop_begin = time(NULL);
1690+
}
1691+
1692+
if (time(NULL) - stream_stop_begin > stream_stop_timeout)
1693+
elog(ERROR, "Target LSN %X/%X could not be streamed in %d seconds",
1694+
(uint32) (stop_backup_lsn >> 32), (uint32) stop_backup_lsn,
1695+
stream_stop_timeout);
1696+
}
16521697

16531698
prevtimeline = timeline;
16541699
prevpos = xlogpos;
@@ -1709,6 +1754,10 @@ StreamLog(void *arg)
17091754
*/
17101755
startpos -= startpos % XLOG_SEG_SIZE;
17111756

1757+
/* Initialize timeout */
1758+
stream_stop_timeout = 0;
1759+
stream_stop_begin = 0;
1760+
17121761
/*
17131762
* Start the replication
17141763
*/

0 commit comments

Comments
 (0)