Skip to content

Commit b2029ed

Browse files
tvondrapull[bot]
authored andcommitted
Minor cleanup of the BRIN parallel build code
Commit b437571 added support for parallel builds for BRIN indexes, using code similar to BTREE parallel builds, and also a new tuplesort variant. This commit simplifies the new code in two ways: * The "spool" grouping tuplesort and the heap/index is not necessary. The heap/index are available as separate arguments, causing confusion. So remove the spool, and use the tuplesort directly. * The new tuplesort variant does not need the heap/index, as it sorts simply by the range block number, without accessing the tuple data. So simplify that too. Initial report and patch by Ranier Vilela, further cleanup by me. Author: Ranier Vilela Discussion: https://postgr.es/m/CAEudQAqD7f2i4iyEaAz-5o-bf6zXVX-AkNUBm-YjUXEemaEh6A%40mail.gmail.com
1 parent d5759cb commit b2029ed

File tree

4 files changed

+39
-90
lines changed

4 files changed

+39
-90
lines changed

src/backend/access/brin/brin.c

Lines changed: 36 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,6 @@
5050
#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xB000000000000004)
5151
#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xB000000000000005)
5252

53-
/*
54-
* Status record for spooling/sorting phase.
55-
*/
56-
typedef struct BrinSpool
57-
{
58-
Tuplesortstate *sortstate; /* state data for tuplesort.c */
59-
Relation heap;
60-
Relation index;
61-
} BrinSpool;
62-
6353
/*
6454
* Status for index builds performed in parallel. This is allocated in a
6555
* dynamic shared memory segment.
@@ -183,7 +173,13 @@ typedef struct BrinBuildState
183173
*/
184174
BrinLeader *bs_leader;
185175
int bs_worker_id;
186-
BrinSpool *bs_spool;
176+
177+
/*
178+
* The sortstate is used by workers (including the leader). It has to be
179+
* part of the build state, because that's the only thing passed to the
180+
* build callback etc.
181+
*/
182+
Tuplesortstate *bs_sortstate;
187183
} BrinBuildState;
188184

189185
/*
@@ -231,12 +227,11 @@ static void brin_fill_empty_ranges(BrinBuildState *state,
231227
/* parallel index builds */
232228
static void _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index,
233229
bool isconcurrent, int request);
234-
static void _brin_end_parallel(BrinLeader *btleader, BrinBuildState *state);
230+
static void _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state);
235231
static Size _brin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
236232
static void _brin_leader_participate_as_worker(BrinBuildState *buildstate,
237233
Relation heap, Relation index);
238234
static void _brin_parallel_scan_and_build(BrinBuildState *buildstate,
239-
BrinSpool *brinspool,
240235
BrinShared *brinshared,
241236
Sharedsort *sharedsort,
242237
Relation heap, Relation index,
@@ -1143,10 +1138,6 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
11431138
state = initialize_brin_buildstate(index, revmap, pagesPerRange,
11441139
RelationGetNumberOfBlocks(heap));
11451140

1146-
state->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
1147-
state->bs_spool->heap = heap;
1148-
state->bs_spool->index = index;
1149-
11501141
/*
11511142
* Attempt to launch parallel worker scan when required
11521143
*
@@ -1160,11 +1151,13 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
11601151
indexInfo->ii_ParallelWorkers);
11611152

11621153
/*
1163-
* Now scan the relation. No syncscan allowed here because we want the
1164-
* heap blocks in physical order.
1165-
*
11661154
* If parallel build requested and at least one worker process was
1167-
* successfully launched, set up coordination state
1155+
* successfully launched, set up coordination state, wait for workers to
1156+
* complete. Then read all tuples from the shared tuplesort and insert
1157+
* them into the index.
1158+
*
1159+
* In serial mode, simply scan the table and build the index one index
1160+
* tuple at a time.
11681161
*/
11691162
if (state->bs_leader)
11701163
{
@@ -1176,9 +1169,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
11761169
state->bs_leader->nparticipanttuplesorts;
11771170
coordinate->sharedsort = state->bs_leader->sharedsort;
11781171

1179-
11801172
/*
1181-
* Begin serial/leader tuplesort.
1173+
* Begin leader tuplesort.
11821174
*
11831175
* In cases where parallelism is involved, the leader receives the
11841176
* same share of maintenance_work_mem as a serial sort (it is
@@ -1199,19 +1191,18 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
11991191
* INDEX operation, regardless of the use of parallelism or any other
12001192
* factor.
12011193
*/
1202-
state->bs_spool->sortstate =
1203-
tuplesort_begin_index_brin(heap, index,
1204-
maintenance_work_mem, coordinate,
1194+
state->bs_sortstate =
1195+
tuplesort_begin_index_brin(maintenance_work_mem, coordinate,
12051196
TUPLESORT_NONE);
12061197

1207-
/*
1208-
* In parallel mode, wait for workers to complete, and then read all
1209-
* tuples from the shared tuplesort and insert them into the index.
1210-
*/
12111198
_brin_end_parallel(state->bs_leader, state);
12121199
}
12131200
else /* no parallel index build */
12141201
{
1202+
/*
1203+
* Now scan the relation. No syncscan allowed here because we want
1204+
* the heap blocks in physical order.
1205+
*/
12151206
reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
12161207
brinbuildCallback, (void *) state, NULL);
12171208

@@ -1671,7 +1662,7 @@ initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap,
16711662
state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
16721663
state->bs_leader = NULL;
16731664
state->bs_worker_id = 0;
1674-
state->bs_spool = NULL;
1665+
state->bs_sortstate = NULL;
16751666
state->bs_context = CurrentMemoryContext;
16761667
state->bs_emptyTuple = NULL;
16771668
state->bs_emptyTupleLen = 0;
@@ -2002,7 +1993,7 @@ form_and_spill_tuple(BrinBuildState *state)
20021993
state->bs_dtuple, &size);
20031994

20041995
/* write the BRIN tuple to the tuplesort */
2005-
tuplesort_putbrintuple(state->bs_spool->sortstate, tup, size);
1996+
tuplesort_putbrintuple(state->bs_sortstate, tup, size);
20061997

20071998
state->bs_numtuples++;
20081999

@@ -2522,7 +2513,6 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
25222513
Size tuplen;
25232514
BrinShared *brinshared = brinleader->brinshared;
25242515
BlockNumber prevblkno = InvalidBlockNumber;
2525-
BrinSpool *spool;
25262516
MemoryContext rangeCxt,
25272517
oldCxt;
25282518

@@ -2541,8 +2531,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
25412531
state->bs_numtuples = brinshared->indtuples;
25422532

25432533
/* do the actual sort in the leader */
2544-
spool = state->bs_spool;
2545-
tuplesort_performsort(spool->sortstate);
2534+
tuplesort_performsort(state->bs_sortstate);
25462535

25472536
/*
25482537
* Initialize BrinMemTuple we'll use to union summaries from workers (in
@@ -2568,7 +2557,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
25682557
* That probably gives us an index that is cheaper to scan, thanks to
25692558
* mostly getting data from the same index page as before.
25702559
*/
2571-
while ((btup = tuplesort_getbrintuple(spool->sortstate, &tuplen, true)) != NULL)
2560+
while ((btup = tuplesort_getbrintuple(state->bs_sortstate, &tuplen, true)) != NULL)
25722561
{
25732562
/* Ranges should be multiples of pages_per_range for the index. */
25742563
Assert(btup->bt_blkno % brinshared->pagesPerRange == 0);
@@ -2640,7 +2629,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
26402629
prevblkno = btup->bt_blkno;
26412630
}
26422631

2643-
tuplesort_end(spool->sortstate);
2632+
tuplesort_end(state->bs_sortstate);
26442633

26452634
/* Fill the BRIN tuple for the last page range with data. */
26462635
if (prevblkno != InvalidBlockNumber)
@@ -2704,11 +2693,6 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
27042693
BrinLeader *brinleader = buildstate->bs_leader;
27052694
int sortmem;
27062695

2707-
/* Allocate memory and initialize private spool */
2708-
buildstate->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
2709-
buildstate->bs_spool->heap = buildstate->bs_spool->heap;
2710-
buildstate->bs_spool->index = buildstate->bs_spool->index;
2711-
27122696
/*
27132697
* Might as well use reliable figure when doling out maintenance_work_mem
27142698
* (when requested number of workers were not launched, this will be
@@ -2717,27 +2701,25 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
27172701
sortmem = maintenance_work_mem / brinleader->nparticipanttuplesorts;
27182702

27192703
/* Perform work common to all participants */
2720-
_brin_parallel_scan_and_build(buildstate, buildstate->bs_spool, brinleader->brinshared,
2704+
_brin_parallel_scan_and_build(buildstate, brinleader->brinshared,
27212705
brinleader->sharedsort, heap, index, sortmem, true);
27222706
}
27232707

27242708
/*
27252709
* Perform a worker's portion of a parallel sort.
27262710
*
2727-
* This generates a tuplesort for passed btspool, and a second tuplesort
2728-
* state if a second btspool is need (i.e. for unique index builds). All
2729-
* other spool fields should already be set when this is called.
2711+
* This generates a tuplesort for the worker portion of the table.
27302712
*
27312713
* sortmem is the amount of working memory to use within each worker,
27322714
* expressed in KBs.
27332715
*
27342716
* When this returns, workers are done, and need only release resources.
27352717
*/
27362718
static void
2737-
_brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
2719+
_brin_parallel_scan_and_build(BrinBuildState *state,
27382720
BrinShared *brinshared, Sharedsort *sharedsort,
2739-
Relation heap, Relation index, int sortmem,
2740-
bool progress)
2721+
Relation heap, Relation index,
2722+
int sortmem, bool progress)
27412723
{
27422724
SortCoordinate coordinate;
27432725
TableScanDesc scan;
@@ -2751,10 +2733,8 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
27512733
coordinate->sharedsort = sharedsort;
27522734

27532735
/* Begin "partial" tuplesort */
2754-
brinspool->sortstate = tuplesort_begin_index_brin(brinspool->heap,
2755-
brinspool->index,
2756-
sortmem, coordinate,
2757-
TUPLESORT_NONE);
2736+
state->bs_sortstate = tuplesort_begin_index_brin(sortmem, coordinate,
2737+
TUPLESORT_NONE);
27582738

27592739
/* Join parallel scan */
27602740
indexInfo = BuildIndexInfo(index);
@@ -2770,7 +2750,7 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
27702750
form_and_spill_tuple(state);
27712751

27722752
/* sort the BRIN ranges built by this worker */
2773-
tuplesort_performsort(brinspool->sortstate);
2753+
tuplesort_performsort(state->bs_sortstate);
27742754

27752755
state->bs_reltuples += reltuples;
27762756

@@ -2786,7 +2766,7 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
27862766
/* Notify leader */
27872767
ConditionVariableSignal(&brinshared->workersdonecv);
27882768

2789-
tuplesort_end(brinspool->sortstate);
2769+
tuplesort_end(state->bs_sortstate);
27902770
}
27912771

27922772
/*
@@ -2844,11 +2824,6 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
28442824
brinshared->pagesPerRange,
28452825
InvalidBlockNumber);
28462826

2847-
/* Initialize worker's own spool */
2848-
buildstate->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
2849-
buildstate->bs_spool->heap = heapRel;
2850-
buildstate->bs_spool->index = indexRel;
2851-
28522827
/* Look up shared state private to tuplesort.c */
28532828
sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
28542829
tuplesort_attach_shared(sharedsort, seg);
@@ -2863,8 +2838,7 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
28632838
*/
28642839
sortmem = maintenance_work_mem / brinshared->scantuplesortstates;
28652840

2866-
_brin_parallel_scan_and_build(buildstate, buildstate->bs_spool,
2867-
brinshared, sharedsort,
2841+
_brin_parallel_scan_and_build(buildstate, brinshared, sharedsort,
28682842
heapRel, indexRel, sortmem, false);
28692843

28702844
/* Report WAL/buffer usage during parallel execution */

src/backend/utils/sort/tuplesortvariants.c

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -137,16 +137,6 @@ typedef struct
137137
uint32 max_buckets;
138138
} TuplesortIndexHashArg;
139139

140-
/*
141-
* Data struture pointed by "TuplesortPublic.arg" for the index_brin subcase.
142-
*/
143-
typedef struct
144-
{
145-
TuplesortIndexArg index;
146-
147-
/* XXX do we need something here? */
148-
} TuplesortIndexBrinArg;
149-
150140
/*
151141
* Data struture pointed by "TuplesortPublic.arg" for the Datum case.
152142
* Set by tuplesort_begin_datum and used only by the DatumTuple routines.
@@ -562,20 +552,13 @@ tuplesort_begin_index_gist(Relation heapRel,
562552
}
563553

564554
Tuplesortstate *
565-
tuplesort_begin_index_brin(Relation heapRel,
566-
Relation indexRel,
567-
int workMem,
555+
tuplesort_begin_index_brin(int workMem,
568556
SortCoordinate coordinate,
569557
int sortopt)
570558
{
571559
Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
572560
sortopt);
573561
TuplesortPublic *base = TuplesortstateGetPublic(state);
574-
MemoryContext oldcontext;
575-
TuplesortIndexBrinArg *arg;
576-
577-
oldcontext = MemoryContextSwitchTo(base->maincontext);
578-
arg = (TuplesortIndexBrinArg *) palloc(sizeof(TuplesortIndexBrinArg));
579562

580563
#ifdef TRACE_SORT
581564
if (trace_sort)
@@ -592,12 +575,7 @@ tuplesort_begin_index_brin(Relation heapRel,
592575
base->writetup = writetup_index_brin;
593576
base->readtup = readtup_index_brin;
594577
base->haveDatum1 = true;
595-
base->arg = arg;
596-
597-
arg->index.heapRel = heapRel;
598-
arg->index.indexRel = indexRel;
599-
600-
MemoryContextSwitchTo(oldcontext);
578+
base->arg = NULL;
601579

602580
return state;
603581
}

src/include/utils/tuplesort.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -430,9 +430,7 @@ extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
430430
Relation indexRel,
431431
int workMem, SortCoordinate coordinate,
432432
int sortopt);
433-
extern Tuplesortstate *tuplesort_begin_index_brin(Relation heapRel,
434-
Relation indexRel,
435-
int workMem, SortCoordinate coordinate,
433+
extern Tuplesortstate *tuplesort_begin_index_brin(int workMem, SortCoordinate coordinate,
436434
int sortopt);
437435
extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
438436
Oid sortOperator, Oid sortCollation,

src/tools/pgindent/typedefs.list

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,6 @@ BrinRevmap
307307
BrinShared
308308
BrinSortTuple
309309
BrinSpecialSpace
310-
BrinSpool
311310
BrinStatsData
312311
BrinTuple
313312
BrinValues

0 commit comments

Comments
 (0)