Skip to content

Commit 25ec228

Browse files
committed
Track the current XID wrap limit (or more accurately, the oldest unfrozen
XID) in checkpoint records. This eliminates the need to recompute the value from scratch during database startup, which is one of the two remaining reasons for the flatfile code to exist. It should also simplify life for hot-standby operation. To avoid bloating the checkpoint records unreasonably, I switched from tracking the oldest database by name to tracking it by OID. This turns out to save cycles in general (everywhere but the warning-generating paths, which we hardly care about) and also helps us deal with the case that the oldest database got dropped instead of being vacuumed. The prior coding might go for a long time without updating the wrap limit in that case, which is bad because it might result in a lot of useless autovacuum activity.
1 parent e1cc641 commit 25ec228

File tree

9 files changed

+198
-77
lines changed

9 files changed

+198
-77
lines changed

src/backend/access/transam/varsup.c

+125-31
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2000-2009, PostgreSQL Global Development Group
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.84 2009/04/23 00:23:45 tgl Exp $
9+
* $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.85 2009/08/31 02:23:21 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -16,11 +16,13 @@
1616
#include "access/clog.h"
1717
#include "access/subtrans.h"
1818
#include "access/transam.h"
19+
#include "commands/dbcommands.h"
1920
#include "miscadmin.h"
2021
#include "postmaster/autovacuum.h"
2122
#include "storage/pmsignal.h"
2223
#include "storage/proc.h"
2324
#include "utils/builtins.h"
25+
#include "utils/syscache.h"
2426

2527

2628
/* Number of OIDs to prefetch (preallocate) per XLOG write */
@@ -31,9 +33,14 @@ VariableCache ShmemVariableCache = NULL;
3133

3234

3335
/*
34-
* Allocate the next XID for my new transaction or subtransaction.
36+
* Allocate the next XID for a new transaction or subtransaction.
3537
*
3638
* The new XID is also stored into MyProc before returning.
39+
*
40+
* Note: when this is called, we are actually already inside a valid
41+
* transaction, since XIDs are now not allocated until the transaction
42+
* does something. So it is safe to do a database lookup if we want to
43+
* issue a warning about XID wrap.
3744
*/
3845
TransactionId
3946
GetNewTransactionId(bool isSubXact)
@@ -72,6 +79,20 @@ GetNewTransactionId(bool isSubXact)
7279
if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidVacLimit) &&
7380
TransactionIdIsValid(ShmemVariableCache->xidVacLimit))
7481
{
82+
/*
83+
* For safety's sake, we release XidGenLock while sending signals,
84+
* warnings, etc. This is not so much because we care about
85+
* preserving concurrency in this situation, as to avoid any
86+
* possibility of deadlock while doing get_database_name().
87+
* First, copy all the shared values we'll need in this path.
88+
*/
89+
TransactionId xidWarnLimit = ShmemVariableCache->xidWarnLimit;
90+
TransactionId xidStopLimit = ShmemVariableCache->xidStopLimit;
91+
TransactionId xidWrapLimit = ShmemVariableCache->xidWrapLimit;
92+
Oid oldest_datoid = ShmemVariableCache->oldestXidDB;
93+
94+
LWLockRelease(XidGenLock);
95+
7596
/*
7697
* To avoid swamping the postmaster with signals, we issue the autovac
7798
* request only once per 64K transaction starts. This still gives
@@ -81,22 +102,50 @@ GetNewTransactionId(bool isSubXact)
81102
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
82103

83104
if (IsUnderPostmaster &&
84-
TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidStopLimit))
85-
ereport(ERROR,
86-
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
87-
errmsg("database is not accepting commands to avoid wraparound data loss in database \"%s\"",
88-
NameStr(ShmemVariableCache->limit_datname)),
89-
errhint("Stop the postmaster and use a standalone backend to vacuum database \"%s\".\n"
90-
"You might also need to commit or roll back old prepared transactions.",
91-
NameStr(ShmemVariableCache->limit_datname))));
92-
else if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidWarnLimit))
93-
ereport(WARNING,
94-
(errmsg("database \"%s\" must be vacuumed within %u transactions",
95-
NameStr(ShmemVariableCache->limit_datname),
96-
ShmemVariableCache->xidWrapLimit - xid),
97-
errhint("To avoid a database shutdown, execute a database-wide VACUUM in \"%s\".\n"
98-
"You might also need to commit or roll back old prepared transactions.",
99-
NameStr(ShmemVariableCache->limit_datname))));
105+
TransactionIdFollowsOrEquals(xid, xidStopLimit))
106+
{
107+
char *oldest_datname = get_database_name(oldest_datoid);
108+
109+
/* complain even if that DB has disappeared */
110+
if (oldest_datname)
111+
ereport(ERROR,
112+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
113+
errmsg("database is not accepting commands to avoid wraparound data loss in database \"%s\"",
114+
oldest_datname),
115+
errhint("Stop the postmaster and use a standalone backend to vacuum that database.\n"
116+
"You might also need to commit or roll back old prepared transactions.")));
117+
else
118+
ereport(ERROR,
119+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
120+
errmsg("database is not accepting commands to avoid wraparound data loss in database with OID %u",
121+
oldest_datoid),
122+
errhint("Stop the postmaster and use a standalone backend to vacuum that database.\n"
123+
"You might also need to commit or roll back old prepared transactions.")));
124+
}
125+
else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
126+
{
127+
char *oldest_datname = get_database_name(oldest_datoid);
128+
129+
/* complain even if that DB has disappeared */
130+
if (oldest_datname)
131+
ereport(WARNING,
132+
(errmsg("database \"%s\" must be vacuumed within %u transactions",
133+
oldest_datname,
134+
xidWrapLimit - xid),
135+
errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
136+
"You might also need to commit or roll back old prepared transactions.")));
137+
else
138+
ereport(WARNING,
139+
(errmsg("database with OID %u must be vacuumed within %u transactions",
140+
oldest_datoid,
141+
xidWrapLimit - xid),
142+
errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
143+
"You might also need to commit or roll back old prepared transactions.")));
144+
}
145+
146+
/* Re-acquire lock and start over */
147+
LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
148+
xid = ShmemVariableCache->nextXid;
100149
}
101150

102151
/*
@@ -199,11 +248,10 @@ ReadNewTransactionId(void)
199248
/*
200249
* Determine the last safe XID to allocate given the currently oldest
201250
* datfrozenxid (ie, the oldest XID that might exist in any database
202-
* of our cluster).
251+
* of our cluster), and the OID of the (or a) database with that value.
203252
*/
204253
void
205-
SetTransactionIdLimit(TransactionId oldest_datfrozenxid,
206-
Name oldest_datname)
254+
SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
207255
{
208256
TransactionId xidVacLimit;
209257
TransactionId xidWarnLimit;
@@ -275,14 +323,14 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid,
275323
ShmemVariableCache->xidWarnLimit = xidWarnLimit;
276324
ShmemVariableCache->xidStopLimit = xidStopLimit;
277325
ShmemVariableCache->xidWrapLimit = xidWrapLimit;
278-
namecpy(&ShmemVariableCache->limit_datname, oldest_datname);
326+
ShmemVariableCache->oldestXidDB = oldest_datoid;
279327
curXid = ShmemVariableCache->nextXid;
280328
LWLockRelease(XidGenLock);
281329

282330
/* Log the info */
283331
ereport(DEBUG1,
284-
(errmsg("transaction ID wrap limit is %u, limited by database \"%s\"",
285-
xidWrapLimit, NameStr(*oldest_datname))));
332+
(errmsg("transaction ID wrap limit is %u, limited by database with OID %u",
333+
xidWrapLimit, oldest_datoid)));
286334

287335
/*
288336
* If past the autovacuum force point, immediately signal an autovac
@@ -297,13 +345,59 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid,
297345

298346
/* Give an immediate warning if past the wrap warn point */
299347
if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit))
300-
ereport(WARNING,
301-
(errmsg("database \"%s\" must be vacuumed within %u transactions",
302-
NameStr(*oldest_datname),
303-
xidWrapLimit - curXid),
304-
errhint("To avoid a database shutdown, execute a database-wide VACUUM in \"%s\".\n"
305-
"You might also need to commit or roll back old prepared transactions.",
306-
NameStr(*oldest_datname))));
348+
{
349+
char *oldest_datname = get_database_name(oldest_datoid);
350+
351+
/*
352+
* Note: it's possible that get_database_name fails and returns NULL,
353+
* for example because the database just got dropped. We'll still
354+
* warn, even though the warning might now be unnecessary.
355+
*/
356+
if (oldest_datname)
357+
ereport(WARNING,
358+
(errmsg("database \"%s\" must be vacuumed within %u transactions",
359+
oldest_datname,
360+
xidWrapLimit - curXid),
361+
errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
362+
"You might also need to commit or roll back old prepared transactions.")));
363+
else
364+
ereport(WARNING,
365+
(errmsg("database with OID %u must be vacuumed within %u transactions",
366+
oldest_datoid,
367+
xidWrapLimit - curXid),
368+
errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
369+
"You might also need to commit or roll back old prepared transactions.")));
370+
}
371+
}
372+
373+
374+
/*
375+
* TransactionIdLimitIsValid -- is the shared XID wrap-limit data sane?
376+
*
377+
* We primarily check whether oldestXidDB is valid. The cases we have in
378+
* mind are that that database was dropped, or the field was reset to zero
379+
* by pg_resetxlog. In either case we should force recalculation of the
380+
* wrap limit. In future we might add some more sanity checks here.
381+
*/
382+
bool
383+
TransactionIdLimitIsValid(void)
384+
{
385+
TransactionId oldestXid;
386+
Oid oldestXidDB;
387+
388+
/* Locking is probably not really necessary, but let's be careful */
389+
LWLockAcquire(XidGenLock, LW_SHARED);
390+
oldestXid = ShmemVariableCache->oldestXid;
391+
oldestXidDB = ShmemVariableCache->oldestXidDB;
392+
LWLockRelease(XidGenLock);
393+
394+
if (!TransactionIdIsNormal(oldestXid))
395+
return false; /* shouldn't happen, but just in case */
396+
if (!SearchSysCacheExists(DATABASEOID,
397+
ObjectIdGetDatum(oldestXidDB),
398+
0, 0, 0))
399+
return false; /* could happen, per comment above */
400+
return true;
307401
}
308402

309403

src/backend/access/transam/xlog.c

+25-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.349 2009/08/27 07:15:41 heikki Exp $
10+
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.350 2009/08/31 02:23:22 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -34,6 +34,7 @@
3434
#include "access/xlogutils.h"
3535
#include "catalog/catversion.h"
3636
#include "catalog/pg_control.h"
37+
#include "catalog/pg_database.h"
3738
#include "catalog/pg_type.h"
3839
#include "funcapi.h"
3940
#include "libpq/pqsignal.h"
@@ -4638,12 +4639,16 @@ BootStrapXLOG(void)
46384639
checkPoint.nextOid = FirstBootstrapObjectId;
46394640
checkPoint.nextMulti = FirstMultiXactId;
46404641
checkPoint.nextMultiOffset = 0;
4642+
checkPoint.oldestXid = FirstNormalTransactionId;
4643+
checkPoint.oldestXidDB = TemplateDbOid;
46414644
checkPoint.time = (pg_time_t) time(NULL);
46424645

46434646
ShmemVariableCache->nextXid = checkPoint.nextXid;
46444647
ShmemVariableCache->nextOid = checkPoint.nextOid;
46454648
ShmemVariableCache->oidCount = 0;
46464649
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
4650+
ShmemVariableCache->oldestXid = checkPoint.oldestXid;
4651+
ShmemVariableCache->oldestXidDB = checkPoint.oldestXidDB;
46474652

46484653
/* Set up the XLOG page header */
46494654
page->xlp_magic = XLOG_PAGE_MAGIC;
@@ -5355,6 +5360,9 @@ StartupXLOG(void)
53555360
ereport(DEBUG1,
53565361
(errmsg("next MultiXactId: %u; next MultiXactOffset: %u",
53575362
checkPoint.nextMulti, checkPoint.nextMultiOffset)));
5363+
ereport(DEBUG1,
5364+
(errmsg("oldest unfrozen transaction ID: %u, in database %u",
5365+
checkPoint.oldestXid, checkPoint.oldestXidDB)));
53585366
if (!TransactionIdIsNormal(checkPoint.nextXid))
53595367
ereport(PANIC,
53605368
(errmsg("invalid next transaction ID")));
@@ -5363,6 +5371,8 @@ StartupXLOG(void)
53635371
ShmemVariableCache->nextOid = checkPoint.nextOid;
53645372
ShmemVariableCache->oidCount = 0;
53655373
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5374+
ShmemVariableCache->oldestXid = checkPoint.oldestXid;
5375+
ShmemVariableCache->oldestXidDB = checkPoint.oldestXidDB;
53665376

53675377
/*
53685378
* We must replay WAL entries using the same TimeLineID they were created
@@ -6546,6 +6556,8 @@ CreateCheckPoint(int flags)
65466556
*/
65476557
LWLockAcquire(XidGenLock, LW_SHARED);
65486558
checkPoint.nextXid = ShmemVariableCache->nextXid;
6559+
checkPoint.oldestXid = ShmemVariableCache->oldestXid;
6560+
checkPoint.oldestXidDB = ShmemVariableCache->oldestXidDB;
65496561
LWLockRelease(XidGenLock);
65506562

65516563
/* Increase XID epoch if we've wrapped around since last checkpoint */
@@ -6984,6 +6996,8 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
69846996
ShmemVariableCache->oidCount = 0;
69856997
MultiXactSetNextMXact(checkPoint.nextMulti,
69866998
checkPoint.nextMultiOffset);
6999+
ShmemVariableCache->oldestXid = checkPoint.oldestXid;
7000+
ShmemVariableCache->oldestXidDB = checkPoint.oldestXidDB;
69877001

69887002
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
69897003
ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch;
@@ -7022,6 +7036,12 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
70227036
}
70237037
MultiXactAdvanceNextMXact(checkPoint.nextMulti,
70247038
checkPoint.nextMultiOffset);
7039+
if (TransactionIdPrecedes(ShmemVariableCache->oldestXid,
7040+
checkPoint.oldestXid))
7041+
{
7042+
ShmemVariableCache->oldestXid = checkPoint.oldestXid;
7043+
ShmemVariableCache->oldestXidDB = checkPoint.oldestXidDB;
7044+
}
70257045

70267046
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
70277047
ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch;
@@ -7056,13 +7076,16 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
70567076
CheckPoint *checkpoint = (CheckPoint *) rec;
70577077

70587078
appendStringInfo(buf, "checkpoint: redo %X/%X; "
7059-
"tli %u; xid %u/%u; oid %u; multi %u; offset %u; %s",
7079+
"tli %u; xid %u/%u; oid %u; multi %u; offset %u; "
7080+
"oldest xid %u in DB %u; %s",
70607081
checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
70617082
checkpoint->ThisTimeLineID,
70627083
checkpoint->nextXidEpoch, checkpoint->nextXid,
70637084
checkpoint->nextOid,
70647085
checkpoint->nextMulti,
70657086
checkpoint->nextMultiOffset,
7087+
checkpoint->oldestXid,
7088+
checkpoint->oldestXidDB,
70667089
(info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
70677090
}
70687091
else if (info == XLOG_NOOP)

src/backend/commands/vacuum.c

+9-8
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
*
1515
* IDENTIFICATION
16-
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.390 2009/08/24 02:18:31 tgl Exp $
16+
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.391 2009/08/31 02:23:22 tgl Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -895,8 +895,9 @@ vac_update_datfrozenxid(void)
895895
/*
896896
* If we were able to advance datfrozenxid, mark the flat-file copy of
897897
* pg_database for update at commit, and see if we can truncate pg_clog.
898+
* Also force update if the shared XID-wrap-limit info is stale.
898899
*/
899-
if (dirty)
900+
if (dirty || !TransactionIdLimitIsValid())
900901
{
901902
database_file_update_needed();
902903
vac_truncate_clog(newFrozenXid);
@@ -916,7 +917,7 @@ vac_update_datfrozenxid(void)
916917
*
917918
* This routine is shared by full and lazy VACUUM. Note that it's
918919
* only invoked when we've managed to change our DB's datfrozenxid
919-
* entry.
920+
* entry, or we found that the shared XID-wrap-limit info is stale.
920921
*/
921922
static void
922923
vac_truncate_clog(TransactionId frozenXID)
@@ -925,11 +926,11 @@ vac_truncate_clog(TransactionId frozenXID)
925926
Relation relation;
926927
HeapScanDesc scan;
927928
HeapTuple tuple;
928-
NameData oldest_datname;
929+
Oid oldest_datoid;
929930
bool frozenAlreadyWrapped = false;
930931

931-
/* init oldest_datname to sync with my frozenXID */
932-
namestrcpy(&oldest_datname, get_database_name(MyDatabaseId));
932+
/* init oldest_datoid to sync with my frozenXID */
933+
oldest_datoid = MyDatabaseId;
933934

934935
/*
935936
* Scan pg_database to compute the minimum datfrozenxid
@@ -958,7 +959,7 @@ vac_truncate_clog(TransactionId frozenXID)
958959
else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
959960
{
960961
frozenXID = dbform->datfrozenxid;
961-
namecpy(&oldest_datname, &dbform->datname);
962+
oldest_datoid = HeapTupleGetOid(tuple);
962963
}
963964
}
964965

@@ -987,7 +988,7 @@ vac_truncate_clog(TransactionId frozenXID)
987988
* Update the wrap limit for GetNewTransactionId. Note: this function
988989
* will also signal the postmaster for an(other) autovac cycle if needed.
989990
*/
990-
SetTransactionIdLimit(frozenXID, &oldest_datname);
991+
SetTransactionIdLimit(frozenXID, oldest_datoid);
991992
}
992993

993994

0 commit comments

Comments
 (0)