Skip to content

Commit 30f540b

Browse files
committed
Repair very-low-probability race condition between relation extension
and VACUUM: in the interval between adding a new page to the relation and formatting it, it was possible for VACUUM to come along and decide it should format the page too. Though not harmful in itself, this would cause data loss if a third transaction were able to insert tuples into the vacuumed page before the original extender got control back.
1 parent b72e5fa commit 30f540b

File tree

4 files changed

+73
-18
lines changed

4 files changed

+73
-18
lines changed

src/backend/access/heap/hio.c

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.55 2005/04/29 22:28:23 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.56 2005/05/07 21:32:23 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -241,13 +241,6 @@ RelationGetBufferForTuple(Relation relation, Size len,
241241
*/
242242
buffer = ReadBuffer(relation, P_NEW);
243243

244-
/*
245-
* Release the file-extension lock; it's now OK for someone else to
246-
* extend the relation some more.
247-
*/
248-
if (needLock)
249-
UnlockRelationForExtension(relation, ExclusiveLock);
250-
251244
/*
252245
* We can be certain that locking the otherBuffer first is OK, since
253246
* it must have a lower page number.
@@ -256,9 +249,22 @@ RelationGetBufferForTuple(Relation relation, Size len,
256249
LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
257250

258251
/*
259-
* We need to initialize the empty new page.
252+
* Now acquire lock on the new page.
260253
*/
261254
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
255+
256+
/*
257+
* Release the file-extension lock; it's now OK for someone else to
258+
* extend the relation some more. Note that we cannot release this
259+
* lock before we have buffer lock on the new page, or we risk a
260+
* race condition against vacuumlazy.c --- see comments therein.
261+
*/
262+
if (needLock)
263+
UnlockRelationForExtension(relation, ExclusiveLock);
264+
265+
/*
266+
* We need to initialize the empty new page.
267+
*/
262268
pageHeader = (Page) BufferGetPage(buffer);
263269
Assert(PageIsNew((PageHeader) pageHeader));
264270
PageInit(pageHeader, BufferGetPageSize(buffer), 0);

src/backend/access/nbtree/nbtpage.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.83 2005/04/29 22:28:24 tgl Exp $
12+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.84 2005/05/07 21:32:23 tgl Exp $
1313
*
1414
* NOTES
1515
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -491,18 +491,21 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
491491

492492
buf = ReadBuffer(rel, P_NEW);
493493

494+
/* Acquire buffer lock on new page */
495+
LockBuffer(buf, BT_WRITE);
496+
494497
/*
495-
* Release the file-extension lock; it's now OK for someone else
496-
* to extend the relation some more.
498+
* Release the file-extension lock; it's now OK for someone else to
499+
* extend the relation some more. Note that we cannot release this
500+
* lock before we have buffer lock on the new page, or we risk a
501+
* race condition against btvacuumcleanup --- see comments therein.
497502
*/
498503
if (needLock)
499504
UnlockRelationForExtension(rel, ExclusiveLock);
500505

501-
/* Acquire appropriate buffer lock on new page */
502-
LockBuffer(buf, access);
503-
504506
/* Initialize the new page before returning it */
505507
page = BufferGetPage(buf);
508+
Assert(PageIsNew((PageHeader) page));
506509
_bt_pageinit(page, BufferGetPageSize(buf));
507510
}
508511

src/backend/access/nbtree/nbtree.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Portions Copyright (c) 1994, Regents of the University of California
1313
*
1414
* IDENTIFICATION
15-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.128 2005/05/06 17:24:52 tgl Exp $
15+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.129 2005/05/07 21:32:23 tgl Exp $
1616
*
1717
*-------------------------------------------------------------------------
1818
*/
@@ -739,11 +739,35 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
739739
BlockNumber pages_deleted = 0;
740740
MemoryContext mycontext;
741741
MemoryContext oldcontext;
742+
bool needLock;
742743

743744
Assert(stats != NULL);
744745

746+
/*
747+
* First find out the number of pages in the index. We must acquire
748+
* the relation-extension lock while doing this to avoid a race
749+
* condition: if someone else is extending the relation, there is
750+
* a window where bufmgr/smgr have created a new all-zero page but
751+
* it hasn't yet been write-locked by _bt_getbuf(). If we manage to
752+
* scan such a page here, we'll improperly assume it can be recycled.
753+
* Taking the lock synchronizes things enough to prevent a problem:
754+
* either num_pages won't include the new page, or _bt_getbuf already
755+
* has write lock on the buffer and it will be fully initialized before
756+
* we can examine it. (See also vacuumlazy.c, which has the same issue.)
757+
*
758+
* We can skip locking for new or temp relations,
759+
* however, since no one else could be accessing them.
760+
*/
761+
needLock = !RELATION_IS_LOCAL(rel);
762+
763+
if (needLock)
764+
LockRelationForExtension(rel, ExclusiveLock);
765+
745766
num_pages = RelationGetNumberOfBlocks(rel);
746767

768+
if (needLock)
769+
UnlockRelationForExtension(rel, ExclusiveLock);
770+
747771
/* No point in remembering more than MaxFSMPages pages */
748772
maxFreePages = MaxFSMPages;
749773
if ((BlockNumber) maxFreePages > num_pages)

src/backend/commands/vacuumlazy.c

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
*
3232
*
3333
* IDENTIFICATION
34-
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.52 2005/03/25 22:51:31 tgl Exp $
34+
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.53 2005/05/07 21:32:24 tgl Exp $
3535
*
3636
*-------------------------------------------------------------------------
3737
*/
@@ -280,8 +280,30 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
280280

281281
if (PageIsNew(page))
282282
{
283-
/* Not sure we still need to handle this case, but... */
283+
/*
284+
* An all-zeroes page could be left over if a backend extends
285+
* the relation but crashes before initializing the page.
286+
* Reclaim such pages for use.
287+
*
288+
* We have to be careful here because we could be looking at
289+
* a page that someone has just added to the relation and not
290+
* yet been able to initialize (see RelationGetBufferForTuple).
291+
* To interlock against that, release the buffer read lock
292+
* (which we must do anyway) and grab the relation extension
293+
* lock before re-locking in exclusive mode. If the page is
294+
* still uninitialized by then, it must be left over from a
295+
* crashed backend, and we can initialize it.
296+
*
297+
* We don't really need the relation lock when this is a new
298+
* or temp relation, but it's probably not worth the code space
299+
* to check that, since this surely isn't a critical path.
300+
*
301+
* Note: the comparable code in vacuum.c need not do all this
302+
* because it's got exclusive lock on the whole relation.
303+
*/
284304
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
305+
LockRelationForExtension(onerel, ExclusiveLock);
306+
UnlockRelationForExtension(onerel, ExclusiveLock);
285307
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
286308
if (PageIsNew(page))
287309
{

0 commit comments

Comments
 (0)