Skip to content

Commit 3036401

Browse files
committed
Fix minor problems with non-exclusive backup cleanup.
The previous coding imagined that it could call before_shmem_exit() when a non-exclusive backup began and then remove the previously-added handler by calling cancel_before_shmem_exit() when that backup ended. However, this only works provided that nothing else in the system has registered a before_shmem_exit() hook in the interim, because cancel_before_shmem_exit() is documented to remove a callback only if it is the latest callback registered. It also only works if nothing can ERROR out between the time that sessionBackupState is reset and the time that cancel_before_shmem_exit(), which doesn't seem to be strictly true. To fix, leave the handler installed for the lifetime of the session, arrange to install it just once, and teach it to quietly do nothing if there isn't a non-exclusive backup in process. This is a bug, but for now I'm not going to back-patch, because the consequences are minor. It's possible to cause a spurious warning to be generated, but that doesn't really matter. It's also possible to trigger an assertion failure, but production builds shouldn't have assertions enabled. Patch by me, reviewed by Kyotaro Horiguchi, Michael Paquier (who preferred a different approach, but got outvoted), Fujii Masao, and Tom Lane, and with comments by various others. Discussion: http://postgr.es/m/CA+TgmobMjnyBfNhGTKQEDbqXYE3_rXWpc4CM63fhyerNCes3mA@mail.gmail.com
1 parent e975c1a commit 3036401

File tree

4 files changed

+35
-33
lines changed

4 files changed

+35
-33
lines changed

src/backend/access/transam/xlog.c

+29-3
Original file line numberDiff line numberDiff line change
@@ -11133,23 +11133,30 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
1113311133
* system out of backup mode, thus making it a lot more safe to call from
1113411134
* an error handler.
1113511135
*
11136+
* The caller can pass 'arg' as 'true' or 'false' to control whether a warning
11137+
* is emitted.
11138+
*
1113611139
* NB: This is only for aborting a non-exclusive backup that doesn't write
1113711140
* backup_label. A backup started with pg_start_backup() needs to be finished
1113811141
* with pg_stop_backup().
11142+
*
11143+
* NB: This gets used as a before_shmem_exit handler, hence the odd-looking
11144+
* signature.
1113911145
*/
1114011146
void
11141-
do_pg_abort_backup(void)
11147+
do_pg_abort_backup(int code, Datum arg)
1114211148
{
11149+
bool emit_warning = DatumGetBool(arg);
11150+
1114311151
/*
1114411152
* Quick exit if session is not keeping around a non-exclusive backup
1114511153
* already started.
1114611154
*/
11147-
if (sessionBackupState == SESSION_BACKUP_NONE)
11155+
if (sessionBackupState != SESSION_BACKUP_NON_EXCLUSIVE)
1114811156
return;
1114911157

1115011158
WALInsertLockAcquireExclusive();
1115111159
Assert(XLogCtl->Insert.nonExclusiveBackups > 0);
11152-
Assert(sessionBackupState == SESSION_BACKUP_NON_EXCLUSIVE);
1115311160
XLogCtl->Insert.nonExclusiveBackups--;
1115411161

1115511162
if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
@@ -11158,6 +11165,25 @@ do_pg_abort_backup(void)
1115811165
XLogCtl->Insert.forcePageWrites = false;
1115911166
}
1116011167
WALInsertLockRelease();
11168+
11169+
if (emit_warning)
11170+
ereport(WARNING,
11171+
(errmsg("aborting backup due to backend exiting before pg_stop_back up was called")));
11172+
}
11173+
11174+
/*
11175+
* Register a handler that will warn about unterminated backups at end of
11176+
* session, unless this has already been done.
11177+
*/
11178+
void
11179+
register_persistent_abort_backup_handler(void)
11180+
{
11181+
static bool already_done = false;
11182+
11183+
if (already_done)
11184+
return;
11185+
before_shmem_exit(do_pg_abort_backup, DatumGetBool(true));
11186+
already_done = true;
1116111187
}
1116211188

1116311189
/*

src/backend/access/transam/xlogfuncs.c

+2-15
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,6 @@
4444
static StringInfo label_file;
4545
static StringInfo tblspc_map_file;
4646

47-
/*
48-
* Called when the backend exits with a running non-exclusive base backup,
49-
* to clean up state.
50-
*/
51-
static void
52-
nonexclusive_base_backup_cleanup(int code, Datum arg)
53-
{
54-
do_pg_abort_backup();
55-
ereport(WARNING,
56-
(errmsg("aborting backup due to backend exiting before pg_stop_backup was called")));
57-
}
58-
5947
/*
6048
* pg_start_backup: set up for taking an on-line backup dump
6149
*
@@ -103,10 +91,10 @@ pg_start_backup(PG_FUNCTION_ARGS)
10391
tblspc_map_file = makeStringInfo();
10492
MemoryContextSwitchTo(oldcontext);
10593

94+
register_persistent_abort_backup_handler();
95+
10696
startpoint = do_pg_start_backup(backupidstr, fast, NULL, label_file,
10797
NULL, tblspc_map_file, false, true);
108-
109-
before_shmem_exit(nonexclusive_base_backup_cleanup, (Datum) 0);
11098
}
11199

112100
PG_RETURN_LSN(startpoint);
@@ -248,7 +236,6 @@ pg_stop_backup_v2(PG_FUNCTION_ARGS)
248236
* and tablespace map so they can be written to disk by the caller.
249237
*/
250238
stoppoint = do_pg_stop_backup(label_file->data, waitforarchive, NULL);
251-
cancel_before_shmem_exit(nonexclusive_base_backup_cleanup, (Datum) 0);
252239

253240
values[1] = CStringGetTextDatum(label_file->data);
254241
values[2] = CStringGetTextDatum(tblspc_map_file->data);

src/backend/replication/basebackup.c

+2-14
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *sta
6565
bool sizeonly);
6666
static void send_int8_string(StringInfoData *buf, int64 intval);
6767
static void SendBackupHeader(List *tablespaces);
68-
static void base_backup_cleanup(int code, Datum arg);
6968
static void perform_base_backup(basebackup_options *opt);
7069
static void parse_basebackup_options(List *options, basebackup_options *opt);
7170
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
@@ -216,17 +215,6 @@ static const char *const noChecksumFiles[] = {
216215
NULL,
217216
};
218217

219-
220-
/*
221-
* Called when ERROR or FATAL happens in perform_base_backup() after
222-
* we have started the backup - make sure we end it!
223-
*/
224-
static void
225-
base_backup_cleanup(int code, Datum arg)
226-
{
227-
do_pg_abort_backup();
228-
}
229-
230218
/*
231219
* Actually do a base backup for the specified tablespaces.
232220
*
@@ -265,7 +253,7 @@ perform_base_backup(basebackup_options *opt)
265253
* do_pg_stop_backup() should be inside the error cleanup block!
266254
*/
267255

268-
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
256+
PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
269257
{
270258
ListCell *lc;
271259
tablespaceinfo *ti;
@@ -374,7 +362,7 @@ perform_base_backup(basebackup_options *opt)
374362

375363
endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
376364
}
377-
PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
365+
PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
378366

379367

380368
if (opt->includewal)

src/include/access/xlog.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,8 @@ extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
349349
bool needtblspcmapfile);
350350
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
351351
TimeLineID *stoptli_p);
352-
extern void do_pg_abort_backup(void);
352+
extern void do_pg_abort_backup(int code, Datum arg);
353+
extern void register_persistent_abort_backup_handler(void);
353354
extern SessionBackupState get_backup_status(void);
354355

355356
/* File path names (all relative to $PGDATA) */

0 commit comments

Comments
 (0)