Skip to content

Commit 8492feb

Browse files
committed
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE and BRIN. For large tables this may result in significant speedup when the build is CPU-bound. The work is divided so that each worker builds index entries on a subset of the table, determined by the regular parallel scan used to read the data. Each worker uses a local tuplesort to sort and merge the entries for the same key. The TID lists do not overlap (for a given key), which means the merge sort simply concatenates the two lists. The merged entries are written into a shared tuplesort for the leader. The leader needs to merge the sorted entries again, before writing them into the index. But this way a significant part of the work happens in the workers, and the leader is left with merging fewer large entries, which is more efficient. Most of the parallelism infrastructure is a simplified copy of the code used by BTREE indexes, omitting the parts irrelevant for GIN indexes (e.g. uniqueness checks). Original patch by me, with reviews and substantial improvements by Matthias van de Meent, certainly enough to make him a co-author. Author: Tomas Vondra, Matthias van de Meent Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
1 parent 3f1db99 commit 8492feb

File tree

9 files changed

+1937
-16
lines changed

9 files changed

+1937
-16
lines changed

src/backend/access/gin/gininsert.c

Lines changed: 1635 additions & 14 deletions
Large diffs are not rendered by default.

src/backend/access/gin/ginutil.c

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "access/xloginsert.h"
2121
#include "catalog/pg_collation.h"
2222
#include "catalog/pg_type.h"
23+
#include "commands/progress.h"
2324
#include "commands/vacuum.h"
2425
#include "miscadmin.h"
2526
#include "storage/indexfsm.h"
@@ -55,7 +56,7 @@ ginhandler(PG_FUNCTION_ARGS)
5556
amroutine->amclusterable = false;
5657
amroutine->ampredlocks = true;
5758
amroutine->amcanparallel = false;
58-
amroutine->amcanbuildparallel = false;
59+
amroutine->amcanbuildparallel = true;
5960
amroutine->amcaninclude = false;
6061
amroutine->amusemaintenanceworkmem = true;
6162
amroutine->amsummarizing = false;
@@ -74,7 +75,7 @@ ginhandler(PG_FUNCTION_ARGS)
7475
amroutine->amgettreeheight = NULL;
7576
amroutine->amoptions = ginoptions;
7677
amroutine->amproperty = NULL;
77-
amroutine->ambuildphasename = NULL;
78+
amroutine->ambuildphasename = ginbuildphasename;
7879
amroutine->amvalidate = ginvalidate;
7980
amroutine->amadjustmembers = ginadjustmembers;
8081
amroutine->ambeginscan = ginbeginscan;
@@ -702,3 +703,28 @@ ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
702703

703704
END_CRIT_SECTION();
704705
}
706+
707+
/*
708+
* ginbuildphasename() -- Return name of index build phase.
709+
*/
710+
char *
711+
ginbuildphasename(int64 phasenum)
712+
{
713+
switch (phasenum)
714+
{
715+
case PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE:
716+
return "initializing";
717+
case PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN:
718+
return "scanning table";
719+
case PROGRESS_GIN_PHASE_PERFORMSORT_1:
720+
return "sorting tuples (workers)";
721+
case PROGRESS_GIN_PHASE_MERGE_1:
722+
return "merging tuples (workers)";
723+
case PROGRESS_GIN_PHASE_PERFORMSORT_2:
724+
return "sorting tuples";
725+
case PROGRESS_GIN_PHASE_MERGE_2:
726+
return "merging tuples";
727+
default:
728+
return NULL;
729+
}
730+
}

src/backend/access/transam/parallel.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "postgres.h"
1616

1717
#include "access/brin.h"
18+
#include "access/gin.h"
1819
#include "access/nbtree.h"
1920
#include "access/parallel.h"
2021
#include "access/session.h"
@@ -148,6 +149,9 @@ static const struct
148149
{
149150
"_brin_parallel_build_main", _brin_parallel_build_main
150151
},
152+
{
153+
"_gin_parallel_build_main", _gin_parallel_build_main
154+
},
151155
{
152156
"parallel_vacuum_main", parallel_vacuum_main
153157
}

src/backend/utils/sort/tuplesortvariants.c

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@
2020
#include "postgres.h"
2121

2222
#include "access/brin_tuple.h"
23+
#include "access/gin_tuple.h"
2324
#include "access/hash.h"
2425
#include "access/htup_details.h"
2526
#include "access/nbtree.h"
2627
#include "catalog/index.h"
28+
#include "catalog/pg_collation.h"
2729
#include "executor/executor.h"
2830
#include "pg_trace.h"
2931
#include "utils/datum.h"
@@ -46,6 +48,8 @@ static void removeabbrev_index(Tuplesortstate *state, SortTuple *stups,
4648
int count);
4749
static void removeabbrev_index_brin(Tuplesortstate *state, SortTuple *stups,
4850
int count);
51+
static void removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups,
52+
int count);
4953
static void removeabbrev_datum(Tuplesortstate *state, SortTuple *stups,
5054
int count);
5155
static int comparetup_heap(const SortTuple *a, const SortTuple *b,
@@ -74,6 +78,8 @@ static int comparetup_index_hash_tiebreak(const SortTuple *a, const SortTuple *b
7478
Tuplesortstate *state);
7579
static int comparetup_index_brin(const SortTuple *a, const SortTuple *b,
7680
Tuplesortstate *state);
81+
static int comparetup_index_gin(const SortTuple *a, const SortTuple *b,
82+
Tuplesortstate *state);
7783
static void writetup_index(Tuplesortstate *state, LogicalTape *tape,
7884
SortTuple *stup);
7985
static void readtup_index(Tuplesortstate *state, SortTuple *stup,
@@ -82,6 +88,10 @@ static void writetup_index_brin(Tuplesortstate *state, LogicalTape *tape,
8288
SortTuple *stup);
8389
static void readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
8490
LogicalTape *tape, unsigned int len);
91+
static void writetup_index_gin(Tuplesortstate *state, LogicalTape *tape,
92+
SortTuple *stup);
93+
static void readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
94+
LogicalTape *tape, unsigned int len);
8595
static int comparetup_datum(const SortTuple *a, const SortTuple *b,
8696
Tuplesortstate *state);
8797
static int comparetup_datum_tiebreak(const SortTuple *a, const SortTuple *b,
@@ -568,6 +578,77 @@ tuplesort_begin_index_brin(int workMem,
568578
return state;
569579
}
570580

581+
Tuplesortstate *
582+
tuplesort_begin_index_gin(Relation heapRel,
583+
Relation indexRel,
584+
int workMem, SortCoordinate coordinate,
585+
int sortopt)
586+
{
587+
Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
588+
sortopt);
589+
TuplesortPublic *base = TuplesortstateGetPublic(state);
590+
MemoryContext oldcontext;
591+
int i;
592+
TupleDesc desc = RelationGetDescr(indexRel);
593+
594+
oldcontext = MemoryContextSwitchTo(base->maincontext);
595+
596+
#ifdef TRACE_SORT
597+
if (trace_sort)
598+
elog(LOG,
599+
"begin index sort: workMem = %d, randomAccess = %c",
600+
workMem,
601+
sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
602+
#endif
603+
604+
/*
605+
* Multi-column GIN indexes expand the row into a separate index entry for
606+
* attribute, and that's what we write into the tuplesort. But we still
607+
* need to initialize sortsupport for all the attributes.
608+
*/
609+
base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
610+
611+
/* Prepare SortSupport data for each column */
612+
base->sortKeys = (SortSupport) palloc0(base->nKeys *
613+
sizeof(SortSupportData));
614+
615+
for (i = 0; i < base->nKeys; i++)
616+
{
617+
SortSupport sortKey = base->sortKeys + i;
618+
Form_pg_attribute att = TupleDescAttr(desc, i);
619+
TypeCacheEntry *typentry;
620+
621+
sortKey->ssup_cxt = CurrentMemoryContext;
622+
sortKey->ssup_collation = indexRel->rd_indcollation[i];
623+
sortKey->ssup_nulls_first = false;
624+
sortKey->ssup_attno = i + 1;
625+
sortKey->abbreviate = false;
626+
627+
Assert(sortKey->ssup_attno != 0);
628+
629+
if (!OidIsValid(sortKey->ssup_collation))
630+
sortKey->ssup_collation = DEFAULT_COLLATION_OID;
631+
632+
/*
633+
* Look for a ordering for the index key data type, and then the sort
634+
* support function.
635+
*/
636+
typentry = lookup_type_cache(att->atttypid, TYPECACHE_LT_OPR);
637+
PrepareSortSupportFromOrderingOp(typentry->lt_opr, sortKey);
638+
}
639+
640+
base->removeabbrev = removeabbrev_index_gin;
641+
base->comparetup = comparetup_index_gin;
642+
base->writetup = writetup_index_gin;
643+
base->readtup = readtup_index_gin;
644+
base->haveDatum1 = false;
645+
base->arg = NULL;
646+
647+
MemoryContextSwitchTo(oldcontext);
648+
649+
return state;
650+
}
651+
571652
Tuplesortstate *
572653
tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
573654
bool nullsFirstFlag, int workMem,
@@ -803,6 +884,37 @@ tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size)
803884
MemoryContextSwitchTo(oldcontext);
804885
}
805886

887+
void
888+
tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tuple, Size size)
889+
{
890+
SortTuple stup;
891+
GinTuple *ctup;
892+
TuplesortPublic *base = TuplesortstateGetPublic(state);
893+
MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
894+
Size tuplen;
895+
896+
/* copy the GinTuple into the right memory context */
897+
ctup = palloc(size);
898+
memcpy(ctup, tuple, size);
899+
900+
stup.tuple = ctup;
901+
stup.datum1 = (Datum) 0;
902+
stup.isnull1 = false;
903+
904+
/* GetMemoryChunkSpace is not supported for bump contexts */
905+
if (TupleSortUseBumpTupleCxt(base->sortopt))
906+
tuplen = MAXALIGN(size);
907+
else
908+
tuplen = GetMemoryChunkSpace(ctup);
909+
910+
tuplesort_puttuple_common(state, &stup,
911+
base->sortKeys &&
912+
base->sortKeys->abbrev_converter &&
913+
!stup.isnull1, tuplen);
914+
915+
MemoryContextSwitchTo(oldcontext);
916+
}
917+
806918
/*
807919
* Accept one Datum while collecting input data for sort.
808920
*
@@ -975,6 +1087,29 @@ tuplesort_getbrintuple(Tuplesortstate *state, Size *len, bool forward)
9751087
return &btup->tuple;
9761088
}
9771089

1090+
GinTuple *
1091+
tuplesort_getgintuple(Tuplesortstate *state, Size *len, bool forward)
1092+
{
1093+
TuplesortPublic *base = TuplesortstateGetPublic(state);
1094+
MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
1095+
SortTuple stup;
1096+
GinTuple *tup;
1097+
1098+
if (!tuplesort_gettuple_common(state, forward, &stup))
1099+
stup.tuple = NULL;
1100+
1101+
MemoryContextSwitchTo(oldcontext);
1102+
1103+
if (!stup.tuple)
1104+
return false;
1105+
1106+
tup = (GinTuple *) stup.tuple;
1107+
1108+
*len = tup->tuplen;
1109+
1110+
return tup;
1111+
}
1112+
9781113
/*
9791114
* Fetch the next Datum in either forward or back direction.
9801115
* Returns false if no more datums.
@@ -1763,6 +1898,69 @@ readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
17631898
stup->datum1 = tuple->tuple.bt_blkno;
17641899
}
17651900

1901+
/*
1902+
* Routines specialized for GIN case
1903+
*/
1904+
1905+
static void
1906+
removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups, int count)
1907+
{
1908+
Assert(false);
1909+
elog(ERROR, "removeabbrev_index_gin not implemented");
1910+
}
1911+
1912+
static int
1913+
comparetup_index_gin(const SortTuple *a, const SortTuple *b,
1914+
Tuplesortstate *state)
1915+
{
1916+
TuplesortPublic *base = TuplesortstateGetPublic(state);
1917+
1918+
Assert(!TuplesortstateGetPublic(state)->haveDatum1);
1919+
1920+
return _gin_compare_tuples((GinTuple *) a->tuple,
1921+
(GinTuple *) b->tuple,
1922+
base->sortKeys);
1923+
}
1924+
1925+
static void
1926+
writetup_index_gin(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
1927+
{
1928+
TuplesortPublic *base = TuplesortstateGetPublic(state);
1929+
GinTuple *tuple = (GinTuple *) stup->tuple;
1930+
unsigned int tuplen = tuple->tuplen;
1931+
1932+
tuplen = tuplen + sizeof(tuplen);
1933+
LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
1934+
LogicalTapeWrite(tape, tuple, tuple->tuplen);
1935+
if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
1936+
LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
1937+
}
1938+
1939+
static void
1940+
readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
1941+
LogicalTape *tape, unsigned int len)
1942+
{
1943+
GinTuple *tuple;
1944+
TuplesortPublic *base = TuplesortstateGetPublic(state);
1945+
unsigned int tuplen = len - sizeof(unsigned int);
1946+
1947+
/*
1948+
* Allocate space for the GIN sort tuple, which already has the proper
1949+
* length included in the header.
1950+
*/
1951+
tuple = (GinTuple *) tuplesort_readtup_alloc(state, tuplen);
1952+
1953+
tuple->tuplen = tuplen;
1954+
1955+
LogicalTapeReadExact(tape, tuple, tuplen);
1956+
if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
1957+
LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
1958+
stup->tuple = (void *) tuple;
1959+
1960+
/* no abbreviations (FIXME maybe use attrnum for this?) */
1961+
stup->datum1 = (Datum) 0;
1962+
}
1963+
17661964
/*
17671965
* Routines specialized for DatumTuple case
17681966
*/

src/include/access/gin.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
#include "access/xlogreader.h"
1414
#include "lib/stringinfo.h"
15+
#include "nodes/execnodes.h"
16+
#include "storage/shm_toc.h"
1517
#include "storage/block.h"
1618
#include "utils/relcache.h"
1719

@@ -36,6 +38,17 @@
3638
#define GIN_SEARCH_MODE_ALL 2
3739
#define GIN_SEARCH_MODE_EVERYTHING 3 /* for internal use only */
3840

41+
/*
42+
* Constant definition for progress reporting. Phase numbers must match
43+
* ginbuildphasename.
44+
*/
45+
/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 (see progress.h) */
46+
#define PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN 2
47+
#define PROGRESS_GIN_PHASE_PERFORMSORT_1 3
48+
#define PROGRESS_GIN_PHASE_MERGE_1 4
49+
#define PROGRESS_GIN_PHASE_PERFORMSORT_2 5
50+
#define PROGRESS_GIN_PHASE_MERGE_2 6
51+
3952
/*
4053
* GinStatsData represents stats data for planner use
4154
*/
@@ -88,4 +101,6 @@ extern void ginGetStats(Relation index, GinStatsData *stats);
88101
extern void ginUpdateStats(Relation index, const GinStatsData *stats,
89102
bool is_build);
90103

104+
extern void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc);
105+
91106
#endif /* GIN_H */

src/include/access/gin_private.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
109109
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
110110
extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
111111
GinNullCategory *category);
112+
extern char *ginbuildphasename(int64 phasenum);
112113

113114
/* gininsert.c */
114115
extern IndexBuildResult *ginbuild(Relation heap, Relation index,

0 commit comments

Comments
 (0)