Skip to content

Commit fcb7c14

Browse files
committed
Repair subtle VACUUM bug that led to 'HEAP_MOVED_IN was not expected'
errors. VACUUM normally compacts the table back-to-front, and stops as soon as it gets to a page that it has moved some tuples onto. (This logic doesn't make for a complete packing of the table, but it should be pretty close.) But the way it was checking whether it had got to a page with some moved-in tuples was to look at whether the current page was the same as the last page of the list of pages that have enough free space to be move-in targets. And there was other code that would remove pages from that list once they got full. There was a kluge that prevented the last list entry from being removed, but it didn't get the job done. Fixed by keeping a separate variable that contains the largest block number into which a tuple has been moved. There's no longer any need to protect the last element of the fraged_pages list. Also, fix NOTICE messages to describe elapsed user/system CPU time correctly.
1 parent b86ca72 commit fcb7c14

File tree

1 file changed

+110
-76
lines changed

1 file changed

+110
-76
lines changed

src/backend/commands/vacuum.c

Lines changed: 110 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.133 1999/12/29 10:13:20 momjian Exp $
10+
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.134 2000/01/10 04:09:50 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -95,6 +95,8 @@ static int vc_cmp_blk(const void *left, const void *right);
9595
static int vc_cmp_offno(const void *left, const void *right);
9696
static int vc_cmp_vtlinks(const void *left, const void *right);
9797
static bool vc_enough_space(VPageDescr vpd, Size len);
98+
static char *vc_show_rusage(struct rusage *ru0);
99+
98100

99101
void
100102
vacuum(char *vacrel, bool verbose, bool analyze, List *va_spec)
@@ -637,12 +639,11 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
637639
Size min_tlen = MaxTupleSize;
638640
Size max_tlen = 0;
639641
int32 i;
640-
struct rusage ru0,
641-
ru1;
642642
bool do_shrinking = true;
643643
VTupleLink vtlinks = (VTupleLink) palloc(100 * sizeof(VTupleLinkData));
644644
int num_vtlinks = 0;
645645
int free_vtlinks = 100;
646+
struct rusage ru0;
646647

647648
getrusage(RUSAGE_SELF, &ru0);
648649

@@ -987,25 +988,21 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
987988
pfree(vtlinks);
988989
}
989990

990-
getrusage(RUSAGE_SELF, &ru1);
991-
992991
elog(MESSAGE_LEVEL, "Pages %u: Changed %u, Reapped %u, Empty %u, New %u; \
993992
Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %u, MaxLen %u; \
994-
Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. \
995-
Elapsed %u/%u sec.",
993+
Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. %s",
996994
nblocks, changed_pages, vacuum_pages->vpl_num_pages, empty_pages,
997995
new_pages, num_tuples, tups_vacuumed,
998996
nkeep, vacrelstats->num_vtlinks, ncrash,
999997
nunused, min_tlen, max_tlen, free_size, usable_free_size,
1000998
empty_end_pages, fraged_pages->vpl_num_pages,
1001-
ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1002-
ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
999+
vc_show_rusage(&ru0));
10031000

10041001
} /* vc_scanheap */
10051002

10061003

10071004
/*
1008-
* vc_rpfheap() -- try to repaire relation' fragmentation
1005+
* vc_rpfheap() -- try to repair relation's fragmentation
10091006
*
10101007
* This routine marks dead tuples as unused and tries re-use dead space
10111008
* by moving tuples (and inserting indices if needed). It constructs
@@ -1016,7 +1013,8 @@ Elapsed %u/%u sec.",
10161013
*/
10171014
static void
10181015
vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
1019-
VPageList vacuum_pages, VPageList fraged_pages, int nindices, Relation *Irel)
1016+
VPageList vacuum_pages, VPageList fraged_pages,
1017+
int nindices, Relation *Irel)
10201018
{
10211019
TransactionId myXID;
10221020
CommandId myCID;
@@ -1040,14 +1038,13 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
10401038
InsertIndexResult iresult;
10411039
VPageListData Nvpl;
10421040
VPageDescr cur_page = NULL,
1043-
last_fraged_page,
10441041
last_vacuum_page,
10451042
vpc,
10461043
*vpp;
10471044
int cur_item = 0;
10481045
IndDesc *Idesc,
10491046
*idcur;
1050-
int last_fraged_block,
1047+
int last_move_dest_block = -1,
10511048
last_vacuum_block,
10521049
i = 0;
10531050
Size tuple_len;
@@ -1060,8 +1057,7 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
10601057
bool isempty,
10611058
dowrite,
10621059
chain_tuple_moved;
1063-
struct rusage ru0,
1064-
ru1;
1060+
struct rusage ru0;
10651061

10661062
getrusage(RUSAGE_SELF, &ru0);
10671063

@@ -1078,26 +1074,32 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
10781074

10791075
Nvpl.vpl_num_pages = 0;
10801076
num_fraged_pages = fraged_pages->vpl_num_pages;
1081-
last_fraged_page = fraged_pages->vpl_pagedesc[num_fraged_pages - 1];
1082-
last_fraged_block = last_fraged_page->vpd_blkno;
10831077
Assert(vacuum_pages->vpl_num_pages > vacuum_pages->vpl_empty_end_pages);
10841078
vacuumed_pages = vacuum_pages->vpl_num_pages - vacuum_pages->vpl_empty_end_pages;
10851079
last_vacuum_page = vacuum_pages->vpl_pagedesc[vacuumed_pages - 1];
10861080
last_vacuum_block = last_vacuum_page->vpd_blkno;
1087-
Assert(last_vacuum_block >= last_fraged_block);
10881081
cur_buffer = InvalidBuffer;
10891082
num_moved = 0;
10901083

10911084
vpc = (VPageDescr) palloc(sizeof(VPageDescrData) + MaxOffsetNumber * sizeof(OffsetNumber));
10921085
vpc->vpd_offsets_used = vpc->vpd_offsets_free = 0;
10931086

1087+
/*
1088+
* Scan pages backwards from the last nonempty page, trying to move
1089+
* tuples down to lower pages. Quit when we reach a page that we
1090+
* have moved any tuples onto. Note that if a page is still in the
1091+
* fraged_pages list (list of candidate move-target pages) when we
1092+
* reach it, we will remove it from the list. This ensures we never
1093+
* move a tuple up to a higher page number.
1094+
*
1095+
* NB: this code depends on the vacuum_pages and fraged_pages lists
1096+
* being in order, and on fraged_pages being a subset of vacuum_pages.
1097+
*/
10941098
nblocks = vacrelstats->num_pages;
1095-
for (blkno = nblocks - vacuum_pages->vpl_empty_end_pages - 1;; blkno--)
1099+
for (blkno = nblocks - vacuum_pages->vpl_empty_end_pages - 1;
1100+
blkno > last_move_dest_block;
1101+
blkno--)
10961102
{
1097-
/* if it's reapped page and it was used by me - quit */
1098-
if (blkno == last_fraged_block && last_fraged_page->vpd_offsets_used > 0)
1099-
break;
1100-
11011103
buf = ReadBuffer(onerel, blkno);
11021104
page = BufferGetPage(buf);
11031105

@@ -1117,21 +1119,24 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
11171119
else
11181120
Assert(isempty);
11191121
--vacuumed_pages;
1120-
Assert(vacuumed_pages > 0);
1121-
/* get prev reapped page from vacuum_pages */
1122-
last_vacuum_page = vacuum_pages->vpl_pagedesc[vacuumed_pages - 1];
1123-
last_vacuum_block = last_vacuum_page->vpd_blkno;
1124-
if (blkno == last_fraged_block) /* this page in
1125-
* fraged_pages too */
1122+
if (vacuumed_pages > 0)
1123+
{
1124+
/* get prev reapped page from vacuum_pages */
1125+
last_vacuum_page = vacuum_pages->vpl_pagedesc[vacuumed_pages - 1];
1126+
last_vacuum_block = last_vacuum_page->vpd_blkno;
1127+
}
1128+
else
11261129
{
1130+
last_vacuum_page = NULL;
1131+
last_vacuum_block = -1;
1132+
}
1133+
if (num_fraged_pages > 0 &&
1134+
blkno ==
1135+
fraged_pages->vpl_pagedesc[num_fraged_pages-1]->vpd_blkno)
1136+
{
1137+
/* page is in fraged_pages too; remove it */
11271138
--num_fraged_pages;
1128-
Assert(num_fraged_pages > 0);
1129-
Assert(last_fraged_page->vpd_offsets_used == 0);
1130-
/* get prev reapped page from fraged_pages */
1131-
last_fraged_page = fraged_pages->vpl_pagedesc[num_fraged_pages - 1];
1132-
last_fraged_block = last_fraged_page->vpd_blkno;
11331139
}
1134-
Assert(last_fraged_block <= last_vacuum_block);
11351140
if (isempty)
11361141
{
11371142
ReleaseBuffer(buf);
@@ -1217,10 +1222,10 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
12171222
HeapTupleData tp = tuple;
12181223
Size tlen = tuple_len;
12191224
VTupleMove vtmove = (VTupleMove)
1220-
palloc(100 * sizeof(VTupleMoveData));
1225+
palloc(100 * sizeof(VTupleMoveData));
12211226
int num_vtmove = 0;
12221227
int free_vtmove = 100;
1223-
VPageDescr to_vpd = fraged_pages->vpl_pagedesc[0];
1228+
VPageDescr to_vpd = NULL;
12241229
int to_item = 0;
12251230
bool freeCbuf = false;
12261231
int ti;
@@ -1276,17 +1281,20 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
12761281
/* first, can chain be moved ? */
12771282
for (;;)
12781283
{
1279-
if (!vc_enough_space(to_vpd, tlen))
1284+
if (to_vpd == NULL ||
1285+
!vc_enough_space(to_vpd, tlen))
12801286
{
1281-
if (to_vpd != last_fraged_page &&
1282-
!vc_enough_space(to_vpd, vacrelstats->min_tlen))
1287+
/* if to_vpd no longer has enough free space to be
1288+
* useful, remove it from fraged_pages list
1289+
*/
1290+
if (to_vpd != NULL &&
1291+
!vc_enough_space(to_vpd, vacrelstats->min_tlen))
12831292
{
1284-
Assert(num_fraged_pages > to_item + 1);
1293+
Assert(num_fraged_pages > to_item);
12851294
memmove(fraged_pages->vpl_pagedesc + to_item,
1286-
fraged_pages->vpl_pagedesc + to_item + 1,
1287-
sizeof(VPageDescr *) * (num_fraged_pages - to_item - 1));
1295+
fraged_pages->vpl_pagedesc + to_item + 1,
1296+
sizeof(VPageDescr) * (num_fraged_pages - to_item - 1));
12881297
num_fraged_pages--;
1289-
Assert(last_fraged_page == fraged_pages->vpl_pagedesc[num_fraged_pages - 1]);
12901298
}
12911299
for (i = 0; i < num_fraged_pages; i++)
12921300
{
@@ -1477,6 +1485,8 @@ moving chain: failed to add item with len = %u to page %u",
14771485
newtup.t_datamcxt = NULL;
14781486
newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
14791487
ItemPointerSet(&(newtup.t_self), vtmove[ti].vpd->vpd_blkno, newoff);
1488+
if (((int) vtmove[ti].vpd->vpd_blkno) > last_move_dest_block)
1489+
last_move_dest_block = vtmove[ti].vpd->vpd_blkno;
14801490

14811491
/*
14821492
* Set t_ctid pointing to itself for last tuple in
@@ -1545,23 +1555,17 @@ moving chain: failed to add item with len = %u to page %u",
15451555
{
15461556
WriteBuffer(cur_buffer);
15471557
cur_buffer = InvalidBuffer;
1548-
15491558
/*
1550-
* If no one tuple can't be added to this page -
1551-
* remove page from fraged_pages. - vadim 11/27/96
1552-
*
1553-
* But we can't remove last page - this is our
1554-
* "show-stopper" !!! - vadim 02/25/98
1559+
* If previous target page is now too full to add
1560+
* *any* tuple to it, remove it from fraged_pages.
15551561
*/
1556-
if (cur_page != last_fraged_page &&
1557-
!vc_enough_space(cur_page, vacrelstats->min_tlen))
1562+
if (!vc_enough_space(cur_page, vacrelstats->min_tlen))
15581563
{
1559-
Assert(num_fraged_pages > cur_item + 1);
1564+
Assert(num_fraged_pages > cur_item);
15601565
memmove(fraged_pages->vpl_pagedesc + cur_item,
15611566
fraged_pages->vpl_pagedesc + cur_item + 1,
1562-
sizeof(VPageDescr *) * (num_fraged_pages - cur_item - 1));
1567+
sizeof(VPageDescr) * (num_fraged_pages - cur_item - 1));
15631568
num_fraged_pages--;
1564-
Assert(last_fraged_page == fraged_pages->vpl_pagedesc[num_fraged_pages - 1]);
15651569
}
15661570
}
15671571
for (i = 0; i < num_fraged_pages; i++)
@@ -1623,6 +1627,9 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
16231627
cur_page->vpd_offsets_used++;
16241628
num_moved++;
16251629
cur_page->vpd_free = ((PageHeader) ToPage)->pd_upper - ((PageHeader) ToPage)->pd_lower;
1630+
if (((int) cur_page->vpd_blkno) > last_move_dest_block)
1631+
last_move_dest_block = cur_page->vpd_blkno;
1632+
16261633
vpc->vpd_offsets[vpc->vpd_offsets_free++] = offnum;
16271634

16281635
/* insert index' tuples if needed */
@@ -1789,14 +1796,10 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
17891796
}
17901797
Assert(num_moved == checked_moved);
17911798

1792-
getrusage(RUSAGE_SELF, &ru1);
1793-
1794-
elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u. \
1795-
Elapsed %u/%u sec.",
1799+
elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u. %s",
17961800
RelationGetRelationName(onerel),
17971801
nblocks, blkno, num_moved,
1798-
ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1799-
ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1802+
vc_show_rusage(&ru0));
18001803

18011804
if (Nvpl.vpl_num_pages > 0)
18021805
{
@@ -1950,14 +1953,17 @@ vc_vacheap(VRelStats *vacrelstats, Relation onerel, VPageList vacuum_pages)
19501953

19511954
/*
19521955
* vc_vacpage() -- free dead tuples on a page
1953-
* and repaire its fragmentation.
1956+
* and repair its fragmentation.
19541957
*/
19551958
static void
19561959
vc_vacpage(Page page, VPageDescr vpd)
19571960
{
19581961
ItemId itemid;
19591962
int i;
19601963

1964+
/* There shouldn't be any tuples moved onto the page yet! */
1965+
Assert(vpd->vpd_offsets_used == 0);
1966+
19611967
for (i = 0; i < vpd->vpd_offsets_free; i++)
19621968
{
19631969
itemid = &(((PageHeader) page)->pd_linp[vpd->vpd_offsets[i] - 1]);
@@ -1978,8 +1984,7 @@ vc_scanoneind(Relation indrel, int num_tuples)
19781984
IndexScanDesc iscan;
19791985
int nitups;
19801986
int nipages;
1981-
struct rusage ru0,
1982-
ru1;
1987+
struct rusage ru0;
19831988

19841989
getrusage(RUSAGE_SELF, &ru0);
19851990

@@ -2000,12 +2005,9 @@ vc_scanoneind(Relation indrel, int num_tuples)
20002005
nipages = RelationGetNumberOfBlocks(indrel);
20012006
vc_updstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
20022007

2003-
getrusage(RUSAGE_SELF, &ru1);
2004-
2005-
elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. Elapsed %u/%u sec.",
2008+
elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
20062009
RelationGetRelationName(indrel), nipages, nitups,
2007-
ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
2008-
ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
2010+
vc_show_rusage(&ru0));
20092011

20102012
if (nitups != num_tuples)
20112013
elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
@@ -2036,8 +2038,7 @@ vc_vaconeind(VPageList vpl, Relation indrel, int num_tuples, int keep_tuples)
20362038
int num_index_tuples;
20372039
int num_pages;
20382040
VPageDescr vp;
2039-
struct rusage ru0,
2040-
ru1;
2041+
struct rusage ru0;
20412042

20422043
getrusage(RUSAGE_SELF, &ru0);
20432044

@@ -2081,13 +2082,10 @@ vc_vaconeind(VPageList vpl, Relation indrel, int num_tuples, int keep_tuples)
20812082
num_pages = RelationGetNumberOfBlocks(indrel);
20822083
vc_updstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
20832084

2084-
getrusage(RUSAGE_SELF, &ru1);
2085-
2086-
elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. Elapsed %u/%u sec.",
2085+
elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
20872086
RelationGetRelationName(indrel), num_pages,
20882087
num_index_tuples - keep_tuples, tups_vacuumed,
2089-
ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
2090-
ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
2088+
vc_show_rusage(&ru0));
20912089

20922090
if (num_index_tuples != num_tuples + keep_tuples)
20932091
elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
@@ -2905,3 +2903,39 @@ vc_enough_space(VPageDescr vpd, Size len)
29052903
return false;
29062904

29072905
} /* vc_enough_space */
2906+
2907+
2908+
/*
2909+
* Compute elapsed time since ru0 usage snapshot, and format into
2910+
* a displayable string. Result is in a static string, which is
2911+
* tacky, but no one ever claimed that the Postgres backend is
2912+
* threadable...
2913+
*/
2914+
static char *
2915+
vc_show_rusage(struct rusage *ru0)
2916+
{
2917+
static char result[64];
2918+
struct rusage ru1;
2919+
2920+
getrusage(RUSAGE_SELF, &ru1);
2921+
2922+
if (ru1.ru_stime.tv_usec < ru0->ru_stime.tv_usec)
2923+
{
2924+
ru1.ru_stime.tv_sec--;
2925+
ru1.ru_stime.tv_usec += 1000000;
2926+
}
2927+
if (ru1.ru_utime.tv_usec < ru0->ru_utime.tv_usec)
2928+
{
2929+
ru1.ru_utime.tv_sec--;
2930+
ru1.ru_utime.tv_usec += 1000000;
2931+
}
2932+
2933+
snprintf(result, sizeof(result),
2934+
"CPU %d.%02ds/%d.%02du sec.",
2935+
(int) (ru1.ru_stime.tv_sec - ru0->ru_stime.tv_sec),
2936+
(int) (ru1.ru_stime.tv_usec - ru0->ru_stime.tv_usec) / 10000,
2937+
(int) (ru1.ru_utime.tv_sec - ru0->ru_utime.tv_sec),
2938+
(int) (ru1.ru_utime.tv_usec - ru0->ru_utime.tv_usec) / 10000);
2939+
2940+
return result;
2941+
}

0 commit comments

Comments
 (0)