Skip to content

Commit 6c63bcb

Browse files
committed
Minor cleanup of the BRIN parallel build code
Commit b437571 added support for parallel builds for BRIN indexes, using code similar to BTREE parallel builds, and also a new tuplesort variant. This commit simplifies the new code in two ways: * The "spool" grouping tuplesort and the heap/index is not necessary. The heap/index are available as separate arguments, causing confusion. So remove the spool, and use the tuplesort directly. * The new tuplesort variant does not need the heap/index, as it sorts simply by the range block number, without accessing the tuple data. So simplify that too. Initial report and patch by Ranier Vilela, further cleanup by me. Author: Ranier Vilela Discussion: https://postgr.es/m/CAEudQAqD7f2i4iyEaAz-5o-bf6zXVX-AkNUBm-YjUXEemaEh6A%40mail.gmail.com
1 parent 5632d6e commit 6c63bcb

File tree

4 files changed

+39
-90
lines changed

4 files changed

+39
-90
lines changed

src/backend/access/brin/brin.c

Lines changed: 36 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,6 @@
5050
#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xB000000000000004)
5151
#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xB000000000000005)
5252

53-
/*
54-
* Status record for spooling/sorting phase.
55-
*/
56-
typedef struct BrinSpool
57-
{
58-
Tuplesortstate *sortstate; /* state data for tuplesort.c */
59-
Relation heap;
60-
Relation index;
61-
} BrinSpool;
62-
6353
/*
6454
* Status for index builds performed in parallel. This is allocated in a
6555
* dynamic shared memory segment.
@@ -183,7 +173,13 @@ typedef struct BrinBuildState
183173
*/
184174
BrinLeader *bs_leader;
185175
int bs_worker_id;
186-
BrinSpool *bs_spool;
176+
177+
/*
178+
* The sortstate is used by workers (including the leader). It has to be
179+
* part of the build state, because that's the only thing passed to the
180+
* build callback etc.
181+
*/
182+
Tuplesortstate *bs_sortstate;
187183
} BrinBuildState;
188184

189185
/*
@@ -231,12 +227,11 @@ static void brin_fill_empty_ranges(BrinBuildState *state,
231227
/* parallel index builds */
232228
static void _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index,
233229
bool isconcurrent, int request);
234-
static void _brin_end_parallel(BrinLeader *btleader, BrinBuildState *state);
230+
static void _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state);
235231
static Size _brin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
236232
static void _brin_leader_participate_as_worker(BrinBuildState *buildstate,
237233
Relation heap, Relation index);
238234
static void _brin_parallel_scan_and_build(BrinBuildState *buildstate,
239-
BrinSpool *brinspool,
240235
BrinShared *brinshared,
241236
Sharedsort *sharedsort,
242237
Relation heap, Relation index,
@@ -1143,10 +1138,6 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
11431138
state = initialize_brin_buildstate(index, revmap, pagesPerRange,
11441139
RelationGetNumberOfBlocks(heap));
11451140

1146-
state->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
1147-
state->bs_spool->heap = heap;
1148-
state->bs_spool->index = index;
1149-
11501141
/*
11511142
* Attempt to launch parallel worker scan when required
11521143
*
@@ -1160,11 +1151,13 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
11601151
indexInfo->ii_ParallelWorkers);
11611152

11621153
/*
1163-
* Now scan the relation. No syncscan allowed here because we want the
1164-
* heap blocks in physical order.
1165-
*
11661154
* If parallel build requested and at least one worker process was
1167-
* successfully launched, set up coordination state
1155+
* successfully launched, set up coordination state, wait for workers to
1156+
* complete. Then read all tuples from the shared tuplesort and insert
1157+
* them into the index.
1158+
*
1159+
* In serial mode, simply scan the table and build the index one index
1160+
* tuple at a time.
11681161
*/
11691162
if (state->bs_leader)
11701163
{
@@ -1176,9 +1169,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
11761169
state->bs_leader->nparticipanttuplesorts;
11771170
coordinate->sharedsort = state->bs_leader->sharedsort;
11781171

1179-
11801172
/*
1181-
* Begin serial/leader tuplesort.
1173+
* Begin leader tuplesort.
11821174
*
11831175
* In cases where parallelism is involved, the leader receives the
11841176
* same share of maintenance_work_mem as a serial sort (it is
@@ -1199,19 +1191,18 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
11991191
* INDEX operation, regardless of the use of parallelism or any other
12001192
* factor.
12011193
*/
1202-
state->bs_spool->sortstate =
1203-
tuplesort_begin_index_brin(heap, index,
1204-
maintenance_work_mem, coordinate,
1194+
state->bs_sortstate =
1195+
tuplesort_begin_index_brin(maintenance_work_mem, coordinate,
12051196
TUPLESORT_NONE);
12061197

1207-
/*
1208-
* In parallel mode, wait for workers to complete, and then read all
1209-
* tuples from the shared tuplesort and insert them into the index.
1210-
*/
12111198
_brin_end_parallel(state->bs_leader, state);
12121199
}
12131200
else /* no parallel index build */
12141201
{
1202+
/*
1203+
* Now scan the relation. No syncscan allowed here because we want
1204+
* the heap blocks in physical order.
1205+
*/
12151206
reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
12161207
brinbuildCallback, (void *) state, NULL);
12171208

@@ -1671,7 +1662,7 @@ initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap,
16711662
state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
16721663
state->bs_leader = NULL;
16731664
state->bs_worker_id = 0;
1674-
state->bs_spool = NULL;
1665+
state->bs_sortstate = NULL;
16751666
state->bs_context = CurrentMemoryContext;
16761667
state->bs_emptyTuple = NULL;
16771668
state->bs_emptyTupleLen = 0;
@@ -2002,7 +1993,7 @@ form_and_spill_tuple(BrinBuildState *state)
20021993
state->bs_dtuple, &size);
20031994

20041995
/* write the BRIN tuple to the tuplesort */
2005-
tuplesort_putbrintuple(state->bs_spool->sortstate, tup, size);
1996+
tuplesort_putbrintuple(state->bs_sortstate, tup, size);
20061997

20071998
state->bs_numtuples++;
20081999

@@ -2522,7 +2513,6 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
25222513
Size tuplen;
25232514
BrinShared *brinshared = brinleader->brinshared;
25242515
BlockNumber prevblkno = InvalidBlockNumber;
2525-
BrinSpool *spool;
25262516
MemoryContext rangeCxt,
25272517
oldCxt;
25282518

@@ -2541,8 +2531,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
25412531
state->bs_numtuples = brinshared->indtuples;
25422532

25432533
/* do the actual sort in the leader */
2544-
spool = state->bs_spool;
2545-
tuplesort_performsort(spool->sortstate);
2534+
tuplesort_performsort(state->bs_sortstate);
25462535

25472536
/*
25482537
* Initialize BrinMemTuple we'll use to union summaries from workers (in
@@ -2568,7 +2557,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
25682557
* That probably gives us an index that is cheaper to scan, thanks to
25692558
* mostly getting data from the same index page as before.
25702559
*/
2571-
while ((btup = tuplesort_getbrintuple(spool->sortstate, &tuplen, true)) != NULL)
2560+
while ((btup = tuplesort_getbrintuple(state->bs_sortstate, &tuplen, true)) != NULL)
25722561
{
25732562
/* Ranges should be multiples of pages_per_range for the index. */
25742563
Assert(btup->bt_blkno % brinshared->pagesPerRange == 0);
@@ -2640,7 +2629,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
26402629
prevblkno = btup->bt_blkno;
26412630
}
26422631

2643-
tuplesort_end(spool->sortstate);
2632+
tuplesort_end(state->bs_sortstate);
26442633

26452634
/* Fill the BRIN tuple for the last page range with data. */
26462635
if (prevblkno != InvalidBlockNumber)
@@ -2704,11 +2693,6 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
27042693
BrinLeader *brinleader = buildstate->bs_leader;
27052694
int sortmem;
27062695

2707-
/* Allocate memory and initialize private spool */
2708-
buildstate->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
2709-
buildstate->bs_spool->heap = buildstate->bs_spool->heap;
2710-
buildstate->bs_spool->index = buildstate->bs_spool->index;
2711-
27122696
/*
27132697
* Might as well use reliable figure when doling out maintenance_work_mem
27142698
* (when requested number of workers were not launched, this will be
@@ -2717,27 +2701,25 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
27172701
sortmem = maintenance_work_mem / brinleader->nparticipanttuplesorts;
27182702

27192703
/* Perform work common to all participants */
2720-
_brin_parallel_scan_and_build(buildstate, buildstate->bs_spool, brinleader->brinshared,
2704+
_brin_parallel_scan_and_build(buildstate, brinleader->brinshared,
27212705
brinleader->sharedsort, heap, index, sortmem, true);
27222706
}
27232707

27242708
/*
27252709
* Perform a worker's portion of a parallel sort.
27262710
*
2727-
* This generates a tuplesort for passed btspool, and a second tuplesort
2728-
* state if a second btspool is need (i.e. for unique index builds). All
2729-
* other spool fields should already be set when this is called.
2711+
* This generates a tuplesort for the worker portion of the table.
27302712
*
27312713
* sortmem is the amount of working memory to use within each worker,
27322714
* expressed in KBs.
27332715
*
27342716
* When this returns, workers are done, and need only release resources.
27352717
*/
27362718
static void
2737-
_brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
2719+
_brin_parallel_scan_and_build(BrinBuildState *state,
27382720
BrinShared *brinshared, Sharedsort *sharedsort,
2739-
Relation heap, Relation index, int sortmem,
2740-
bool progress)
2721+
Relation heap, Relation index,
2722+
int sortmem, bool progress)
27412723
{
27422724
SortCoordinate coordinate;
27432725
TableScanDesc scan;
@@ -2751,10 +2733,8 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
27512733
coordinate->sharedsort = sharedsort;
27522734

27532735
/* Begin "partial" tuplesort */
2754-
brinspool->sortstate = tuplesort_begin_index_brin(brinspool->heap,
2755-
brinspool->index,
2756-
sortmem, coordinate,
2757-
TUPLESORT_NONE);
2736+
state->bs_sortstate = tuplesort_begin_index_brin(sortmem, coordinate,
2737+
TUPLESORT_NONE);
27582738

27592739
/* Join parallel scan */
27602740
indexInfo = BuildIndexInfo(index);
@@ -2770,7 +2750,7 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
27702750
form_and_spill_tuple(state);
27712751

27722752
/* sort the BRIN ranges built by this worker */
2773-
tuplesort_performsort(brinspool->sortstate);
2753+
tuplesort_performsort(state->bs_sortstate);
27742754

27752755
state->bs_reltuples += reltuples;
27762756

@@ -2786,7 +2766,7 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
27862766
/* Notify leader */
27872767
ConditionVariableSignal(&brinshared->workersdonecv);
27882768

2789-
tuplesort_end(brinspool->sortstate);
2769+
tuplesort_end(state->bs_sortstate);
27902770
}
27912771

27922772
/*
@@ -2844,11 +2824,6 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
28442824
brinshared->pagesPerRange,
28452825
InvalidBlockNumber);
28462826

2847-
/* Initialize worker's own spool */
2848-
buildstate->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
2849-
buildstate->bs_spool->heap = heapRel;
2850-
buildstate->bs_spool->index = indexRel;
2851-
28522827
/* Look up shared state private to tuplesort.c */
28532828
sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
28542829
tuplesort_attach_shared(sharedsort, seg);
@@ -2863,8 +2838,7 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
28632838
*/
28642839
sortmem = maintenance_work_mem / brinshared->scantuplesortstates;
28652840

2866-
_brin_parallel_scan_and_build(buildstate, buildstate->bs_spool,
2867-
brinshared, sharedsort,
2841+
_brin_parallel_scan_and_build(buildstate, brinshared, sharedsort,
28682842
heapRel, indexRel, sortmem, false);
28692843

28702844
/* Report WAL/buffer usage during parallel execution */

src/backend/utils/sort/tuplesortvariants.c

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -137,16 +137,6 @@ typedef struct
137137
uint32 max_buckets;
138138
} TuplesortIndexHashArg;
139139

140-
/*
141-
* Data struture pointed by "TuplesortPublic.arg" for the index_brin subcase.
142-
*/
143-
typedef struct
144-
{
145-
TuplesortIndexArg index;
146-
147-
/* XXX do we need something here? */
148-
} TuplesortIndexBrinArg;
149-
150140
/*
151141
* Data struture pointed by "TuplesortPublic.arg" for the Datum case.
152142
* Set by tuplesort_begin_datum and used only by the DatumTuple routines.
@@ -562,20 +552,13 @@ tuplesort_begin_index_gist(Relation heapRel,
562552
}
563553

564554
Tuplesortstate *
565-
tuplesort_begin_index_brin(Relation heapRel,
566-
Relation indexRel,
567-
int workMem,
555+
tuplesort_begin_index_brin(int workMem,
568556
SortCoordinate coordinate,
569557
int sortopt)
570558
{
571559
Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
572560
sortopt);
573561
TuplesortPublic *base = TuplesortstateGetPublic(state);
574-
MemoryContext oldcontext;
575-
TuplesortIndexBrinArg *arg;
576-
577-
oldcontext = MemoryContextSwitchTo(base->maincontext);
578-
arg = (TuplesortIndexBrinArg *) palloc(sizeof(TuplesortIndexBrinArg));
579562

580563
#ifdef TRACE_SORT
581564
if (trace_sort)
@@ -592,12 +575,7 @@ tuplesort_begin_index_brin(Relation heapRel,
592575
base->writetup = writetup_index_brin;
593576
base->readtup = readtup_index_brin;
594577
base->haveDatum1 = true;
595-
base->arg = arg;
596-
597-
arg->index.heapRel = heapRel;
598-
arg->index.indexRel = indexRel;
599-
600-
MemoryContextSwitchTo(oldcontext);
578+
base->arg = NULL;
601579

602580
return state;
603581
}

src/include/utils/tuplesort.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -430,9 +430,7 @@ extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
430430
Relation indexRel,
431431
int workMem, SortCoordinate coordinate,
432432
int sortopt);
433-
extern Tuplesortstate *tuplesort_begin_index_brin(Relation heapRel,
434-
Relation indexRel,
435-
int workMem, SortCoordinate coordinate,
433+
extern Tuplesortstate *tuplesort_begin_index_brin(int workMem, SortCoordinate coordinate,
436434
int sortopt);
437435
extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
438436
Oid sortOperator, Oid sortCollation,

src/tools/pgindent/typedefs.list

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,6 @@ BrinRevmap
307307
BrinShared
308308
BrinSortTuple
309309
BrinSpecialSpace
310-
BrinSpool
311310
BrinStatsData
312311
BrinTuple
313312
BrinValues

0 commit comments

Comments
 (0)