Skip to content

Commit ab3b160

Browse files
committed
send pg_stop_backup() asynchronously, cancel query if no answer in 300 seconds
1 parent 1a9dd34 commit ab3b160

File tree

5 files changed

+90
-9
lines changed

5 files changed

+90
-9
lines changed

backup.c

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,9 @@ pg_stop_backup(pgBackup *backup)
909909
uint32 xlogid;
910910
uint32 xrecoff;
911911
XLogRecPtr restore_lsn;
912+
bool sent = false;
913+
int pg_stop_backup_timeout = 0;
914+
int is_busy = 1;
912915

913916
/*
914917
* We will use this values if there are no transactions between start_lsn
@@ -999,23 +1002,72 @@ pg_stop_backup(pgBackup *backup)
9991002
pfree(backup_id);
10001003
}
10011004

1005+
/*
1006+
* send pg_stop_backup asynchronously because we could came
1007+
* here from backup_cleanup() after some error caused by
1008+
* postgres archive_command problem and in this case we will
1009+
* wait for pg_stop_backup() forever.
1010+
*/
10021011
if (!exclusive_backup)
10031012
/*
10041013
* Stop the non-exclusive backup. Besides stop_lsn it returns from
10051014
* pg_stop_backup(false) copy of the backup label and tablespace map
10061015
* so they can be written to disk by the caller.
10071016
*/
1008-
res = pgut_execute(backup_conn,
1017+
sent = pgut_send(backup_conn,
10091018
"SELECT *, txid_snapshot_xmax(txid_current_snapshot()),"
10101019
" current_timestamp(0)::timestamp"
10111020
" FROM pg_stop_backup(false)",
1012-
0, NULL);
1021+
0, NULL, WARNING);
10131022
else
1014-
res = pgut_execute(backup_conn,
1023+
sent = pgut_send(backup_conn,
10151024
"SELECT *, txid_snapshot_xmax(txid_current_snapshot()),"
10161025
" current_timestamp(0)::timestamp"
10171026
" FROM pg_stop_backup()",
1018-
0, NULL);
1027+
0, NULL, WARNING);
1028+
1029+
if (!sent)
1030+
elog(WARNING, "Failed to send pg_stop_backup query");
1031+
1032+
1033+
/*
1034+
* Wait for the result of pg_stop_backup(),
1035+
* but no longer than PG_STOP_BACKUP_TIMEOUT seconds
1036+
*/
1037+
elog(INFO, "wait for pg_stop_backup()");
1038+
do
1039+
{
1040+
/*
1041+
* PQisBusy returns 1 if a command is busy, that is, PQgetResult would
1042+
* block waiting for input. A 0 return indicates that PQgetResult can
1043+
* be called with assurance of not blocking
1044+
*/
1045+
is_busy = PQisBusy(backup_conn);
1046+
pg_stop_backup_timeout++;
1047+
sleep(1);
1048+
1049+
if (interrupted)
1050+
{
1051+
pgut_cancel(backup_conn);
1052+
elog(ERROR, "interrupted during waiting for pg_stop_backup");
1053+
}
1054+
1055+
} while (is_busy && pg_stop_backup_timeout < PG_STOP_BACKUP_TIMEOUT);
1056+
1057+
/*
1058+
* If postgres haven't answered in PG_STOP_BACKUP_TIMEOUT seconds,
1059+
* send an interrupt.
1060+
*/
1061+
if (is_busy)
1062+
{
1063+
pgut_cancel(backup_conn);
1064+
elog(ERROR, "pg_stop_backup doesn't finish in 300 seconds.");
1065+
}
1066+
1067+
res = PQgetResult(backup_conn);
1068+
1069+
if (!res)
1070+
elog(ERROR, "pg_stop backup() failed");
10191071

10201072
backup_in_progress = false;
10211073

data.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -570,8 +570,6 @@ restore_data_file(const char *from_root,
570570
if (uncompressed_size != BLCKSZ)
571571
elog(ERROR, "page uncompressed to %ld bytes. != BLCKSZ", uncompressed_size);
572572
}
573-
else
574-
memcpy(page.data, compressed_page.data, BLCKSZ);
575573

576574
/*
577575
* Seek and write the restored page.
@@ -580,9 +578,20 @@ restore_data_file(const char *from_root,
580578
if (fseek(out, blknum * BLCKSZ, SEEK_SET) < 0)
581579
elog(ERROR, "cannot seek block %u of \"%s\": %s",
582580
blknum, to_path, strerror(errno));
583-
if (fwrite(page.data, 1, sizeof(page), out) != sizeof(page))
584-
elog(ERROR, "cannot write block %u of \"%s\": %s",
585-
blknum, file->path, strerror(errno));
581+
582+
if (header.compressed_size < BLCKSZ)
583+
{
584+
if (fwrite(page.data, 1, BLCKSZ, out) != BLCKSZ)
585+
elog(ERROR, "cannot write block %u of \"%s\": %s",
586+
blknum, file->path, strerror(errno));
587+
}
588+
else
589+
{
590+
/* if page wasn't compressed, we've read full block */
591+
if (fwrite(compressed_page.data, 1, BLCKSZ, out) != BLCKSZ)
592+
elog(ERROR, "cannot write block %u of \"%s\": %s",
593+
blknum, file->path, strerror(errno));
594+
}
586595
}
587596

588597
/* update file permission */

pg_probackup.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
#include "datapagemap.h"
3535

36+
# define PG_STOP_BACKUP_TIMEOUT 300
3637
/*
3738
* Macro needed to parse ptrack.
3839
* NOTE Keep those values syncronised with definitions in ptrack.h

utils/pgut.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,24 @@ pgut_send(PGconn* conn, const char *query, int nParams, const char **params, int
11741174
return true;
11751175
}
11761176

1177+
void
1178+
pgut_cancel(PGconn* conn)
1179+
{
1180+
PGcancel *cancel_conn = PQgetCancel(conn);
1181+
char errbuf[256];
1182+
1183+
if (cancel_conn != NULL)
1184+
{
1185+
if (PQcancel(cancel_conn, errbuf, sizeof(errbuf)))
1186+
elog(WARNING, "Cancel request sent");
1187+
else
1188+
elog(WARNING, "Cancel request failed");
1189+
}
1190+
1191+
if (cancel_conn)
1192+
PQfreeCancel(cancel_conn);
1193+
}
1194+
11771195
int
11781196
pgut_wait(int num, PGconn *connections[], struct timeval *timeout)
11791197
{

utils/pgut.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ extern PGconn *pgut_connect_extended(const char *pghost, const char *pgport,
122122
extern void pgut_disconnect(PGconn *conn);
123123
extern PGresult *pgut_execute(PGconn* conn, const char *query, int nParams, const char **params);
124124
extern bool pgut_send(PGconn* conn, const char *query, int nParams, const char **params, int elevel);
125+
extern void pgut_cancel(PGconn* conn);
125126
extern int pgut_wait(int num, PGconn *connections[], struct timeval *timeout);
126127

127128
extern const char *pgut_get_host(void);

0 commit comments

Comments
 (0)