Skip to content

Commit d0b776b

Browse files
committed
Fix failure to guarantee that a checkpoint will write out pg_clog updates
for transaction commits that occurred just before the checkpoint. This is an EXTREMELY serious bug --- kudos to Satoshi Okada for creating a reproducible test case to prove its existence.
1 parent fbec0d7 commit d0b776b

File tree

3 files changed

+40
-7
lines changed

3 files changed

+40
-7
lines changed

src/backend/access/transam/xact.c

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.115.2.1 2002/03/15 19:20:43 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.115.2.2 2004/08/11 04:09:12 tgl Exp $
1212
*
1313
* NOTES
1414
* Transaction aborts can now occur two ways:
@@ -557,13 +557,27 @@ RecordTransactionCommit(void)
557557
*/
558558
if (MyXactMadeXLogEntry)
559559
{
560+
bool madeTCentries;
560561
XLogRecPtr recptr;
561562

562563
BufmgrCommit();
563564

564565
START_CRIT_SECTION();
565566

566-
if (MyLastRecPtr.xrecoff != 0)
567+
madeTCentries = (MyLastRecPtr.xrecoff != 0);
568+
569+
/*
570+
* We need to lock out checkpoint start between writing our XLOG
571+
* record and updating pg_clog. Otherwise it is possible for the
572+
* checkpoint to set REDO after the XLOG record but fail to flush the
573+
* pg_clog update to disk, leading to loss of the transaction commit
574+
* if we crash a little later. Slightly klugy fix for problem
575+
* discovered 2004-08-10.
576+
*/
577+
if (madeTCentries)
578+
LWLockAcquire(CheckpointStartLock, LW_SHARED);
579+
580+
if (madeTCentries)
567581
{
568582
/* Need to emit a commit record */
569583
XLogRecData rdata;
@@ -610,9 +624,13 @@ RecordTransactionCommit(void)
610624
XLogFlush(recptr);
611625

612626
/* Mark the transaction committed in clog, if needed */
613-
if (MyLastRecPtr.xrecoff != 0)
627+
if (madeTCentries)
614628
TransactionIdCommit(xid);
615629

630+
/* Unlock checkpoint lock if we acquired it */
631+
if (madeTCentries)
632+
LWLockRelease(CheckpointStartLock);
633+
616634
END_CRIT_SECTION();
617635
}
618636

@@ -712,6 +730,8 @@ RecordTransactionAbort(void)
712730
* nowhere in permanent storage, so no one will ever care if it
713731
* committed.) We do not flush XLOG to disk in any case, since the
714732
* default assumption after a crash would be that we aborted, anyway.
733+
* For the same reason, we don't need to worry about interlocking
734+
* against checkpoint start.
715735
*
716736
* Extra check here is to catch case that we aborted partway through
717737
* RecordTransactionCommit ...

src/backend/access/transam/xlog.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.86.2.3 2003/01/21 19:51:42 tgl Exp $
10+
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.86.2.4 2004/08/11 04:09:12 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -2945,6 +2945,15 @@ CreateCheckPoint(bool shutdown, bool force)
29452945
checkPoint.ThisStartUpID = ThisStartUpID;
29462946
checkPoint.time = time(NULL);
29472947

2948+
/*
2949+
* We must hold CheckpointStartLock while determining the checkpoint
2950+
* REDO pointer. This ensures that any concurrent transaction commits
2951+
* will be either not yet logged, or logged and recorded in pg_clog.
2952+
* See notes in RecordTransactionCommit().
2953+
*/
2954+
LWLockAcquire(CheckpointStartLock, LW_EXCLUSIVE);
2955+
2956+
/* And we need WALInsertLock too */
29482957
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
29492958

29502959
/*
@@ -2976,6 +2985,7 @@ CreateCheckPoint(bool shutdown, bool force)
29762985
ControlFile->checkPointCopy.redo.xrecoff)
29772986
{
29782987
LWLockRelease(WALInsertLock);
2988+
LWLockRelease(CheckpointStartLock);
29792989
LWLockRelease(CheckpointLock);
29802990
END_CRIT_SECTION();
29812991
return;
@@ -3035,11 +3045,13 @@ CreateCheckPoint(bool shutdown, bool force)
30353045
#endif
30363046

30373047
/*
3038-
* Now we can release insert lock, allowing other xacts to proceed
3039-
* even while we are flushing disk buffers.
3048+
* Now we can release insert lock and checkpoint start lock, allowing
3049+
* other xacts to proceed even while we are flushing disk buffers.
30403050
*/
30413051
LWLockRelease(WALInsertLock);
30423052

3053+
LWLockRelease(CheckpointStartLock);
3054+
30433055
LWLockAcquire(XidGenLock, LW_SHARED);
30443056
checkPoint.nextXid = ShmemVariableCache->nextXid;
30453057
LWLockRelease(XidGenLock);

src/include/storage/lwlock.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $Id: lwlock.h,v 1.4 2001/11/05 17:46:35 momjian Exp $
10+
* $Id: lwlock.h,v 1.4.2.1 2004/08/11 04:09:14 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -37,6 +37,7 @@ typedef enum LWLockId
3737
WALWriteLock,
3838
ControlFileLock,
3939
CheckpointLock,
40+
CheckpointStartLock,
4041
CLogControlLock,
4142

4243
NumFixedLWLocks, /* must be last except for

0 commit comments

Comments
 (0)