Skip to content

Commit bd8e2b3

Browse files
committed
Fix BRIN summarization concurrent with extension
If a process is extending a table concurrently with some BRIN summarization process, it is possible for the latter to miss pages added by the former because the number of pages is computed ahead of time. Fix by determining a fresh relation size after inserting the placeholder tuple: any process that further extends the table concurrently will update the placeholder tuple, while previous pages will be processed by the heap scan. Reported-by: Tomas Vondra Reviewed-by: Tom Lane Author: Álvaro Herrera Discussion: https://postgr.es/m/083d996a-4a8a-0e13-800a-851dd09ad8cc@2ndquadrant.com Backpatch-to: 9.5
1 parent 6cf68e2 commit bd8e2b3

File tree

1 file changed

+49
-8
lines changed

1 file changed

+49
-8
lines changed

src/backend/access/brin/brin.c

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,8 @@ terminate_brin_buildstate(BrinBuildState *state)
951951
}
952952

953953
/*
954-
* Summarize the given page range of the given index.
954+
* On the given BRIN index, summarize the heap page range that corresponds
955+
* to the heap block number given.
955956
*
956957
* This routine can run in parallel with insertions into the heap. To avoid
957958
* missing those values from the summary tuple, we first insert a placeholder
@@ -961,6 +962,12 @@ terminate_brin_buildstate(BrinBuildState *state)
961962
* update of the index value happens in a loop, so that if somebody updates
962963
* the placeholder tuple after we read it, we detect the case and try again.
963964
* This ensures that the concurrently inserted tuples are not lost.
965+
*
966+
* A further corner case is this routine being asked to summarize the partial
967+
* range at the end of the table. heapNumBlocks is the (possibly outdated)
968+
* table size; if we notice that the requested range lies beyond that size,
969+
* we re-compute the table size after inserting the placeholder tuple, to
970+
* avoid missing pages that were appended recently.
964971
*/
965972
static void
966973
summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
@@ -981,6 +988,33 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
981988
state->bs_rmAccess, &phbuf,
982989
heapBlk, phtup, phsz);
983990

991+
/*
992+
* Compute range end. We hold ShareUpdateExclusive lock on table, so it
993+
* cannot shrink concurrently (but it can grow).
994+
*/
995+
Assert(heapBlk % state->bs_pagesPerRange == 0);
996+
if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
997+
{
998+
/*
999+
* If we're asked to scan what we believe to be the final range on the
1000+
* table (i.e. a range that might be partial) we need to recompute our
1001+
* idea of what the latest page is after inserting the placeholder
1002+
* tuple. Anyone that grows the table later will update the
1003+
* placeholder tuple, so it doesn't matter that we won't scan these
1004+
* pages ourselves. Careful: the table might have been extended
1005+
* beyond the current range, so clamp our result.
1006+
*
1007+
* Fortunately, this should occur infrequently.
1008+
*/
1009+
scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
1010+
state->bs_pagesPerRange);
1011+
}
1012+
else
1013+
{
1014+
/* Easy case: range is known to be complete */
1015+
scanNumBlks = state->bs_pagesPerRange;
1016+
}
1017+
9841018
/*
9851019
* Execute the partial heap scan covering the heap blocks in the specified
9861020
* page range, summarizing the heap tuples in it. This scan stops just
@@ -991,8 +1025,6 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
9911025
* by transactions that are still in progress, among other corner cases.
9921026
*/
9931027
state->bs_currRangeStart = heapBlk;
994-
scanNumBlks = heapBlk + state->bs_pagesPerRange <= heapNumBlks ?
995-
state->bs_pagesPerRange : heapNumBlks - heapBlk;
9961028
IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true,
9971029
heapBlk, scanNumBlks,
9981030
brinbuildCallback, (void *) state);
@@ -1069,25 +1101,34 @@ brinsummarize(Relation index, Relation heapRel, double *numSummarized,
10691101
BrinBuildState *state = NULL;
10701102
IndexInfo *indexInfo = NULL;
10711103
BlockNumber heapNumBlocks;
1072-
BlockNumber heapBlk;
10731104
BlockNumber pagesPerRange;
10741105
Buffer buf;
1106+
BlockNumber startBlk;
10751107

10761108
revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
10771109

1110+
/* determine range of pages to process: always start from the beginning */
1111+
heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
1112+
startBlk = 0;
1113+
10781114
/*
10791115
* Scan the revmap to find unsummarized items.
10801116
*/
10811117
buf = InvalidBuffer;
1082-
heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
1083-
for (heapBlk = 0; heapBlk < heapNumBlocks; heapBlk += pagesPerRange)
1118+
for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
10841119
{
10851120
BrinTuple *tup;
10861121
OffsetNumber off;
10871122

1123+
/*
1124+
* Go away now if we think the next range is partial.
1125+
*/
1126+
if (startBlk + pagesPerRange > heapNumBlocks)
1127+
break;
1128+
10881129
CHECK_FOR_INTERRUPTS();
10891130

1090-
tup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL,
1131+
tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL,
10911132
BUFFER_LOCK_SHARE, NULL);
10921133
if (tup == NULL)
10931134
{
@@ -1100,7 +1141,7 @@ brinsummarize(Relation index, Relation heapRel, double *numSummarized,
11001141
pagesPerRange);
11011142
indexInfo = BuildIndexInfo(index);
11021143
}
1103-
summarize_range(indexInfo, state, heapRel, heapBlk, heapNumBlocks);
1144+
summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
11041145

11051146
/* and re-initialize state for the next range */
11061147
brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);

0 commit comments

Comments
 (0)