Skip to content

Commit 6ed83d5

Browse files
committed
Use bump memory context for tuplesorts
29f6a95 added a bump allocator type for efficient compact allocations. Here we make use of this for non-bounded tuplesorts to store tuples. This is very space efficient when storing narrow tuples due to bump.c not having chunk headers. This means we can fit more tuples in work_mem before spilling to disk, or perform an in-memory sort touching fewer cacheline. Author: David Rowley Reviewed-by: Nathan Bossart Reviewed-by: Matthias van de Meent Reviewed-by: Tomas Vondra Reviewed-by: John Naylor Discussion: https://postgr.es/m/CAApHDvqGSpCU95TmM=Bp=6xjL_nLys4zdZOpfNyWBk97Xrdj2w@mail.gmail.com
1 parent f3ff7bf commit 6ed83d5

File tree

3 files changed

+78
-33
lines changed

3 files changed

+78
-33
lines changed

src/backend/utils/sort/tuplesort.c

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,11 @@ struct Tuplesortstate
191191
* tuples to return? */
192192
bool boundUsed; /* true if we made use of a bounded heap */
193193
int bound; /* if bounded, the maximum number of tuples */
194+
int64 tupleMem; /* memory consumed by individual tuples.
195+
* storing this separately from what we track
196+
* in availMem allows us to subtract the
197+
* memory consumed by all tuples when dumping
198+
* tuples to tape */
194199
int64 availMem; /* remaining memory available, in bytes */
195200
int64 allowedMem; /* total memory allowed, in bytes */
196201
int maxTapes; /* max number of input tapes to merge in each
@@ -764,18 +769,18 @@ tuplesort_begin_batch(Tuplesortstate *state)
764769
* in the parent context, not this context, because there is no need to
765770
* free memtuples early. For bounded sorts, tuples may be pfreed in any
766771
* order, so we use a regular aset.c context so that it can make use of
767-
* free'd memory. When the sort is not bounded, we make use of a
768-
* generation.c context as this keeps allocations more compact with less
769-
* wastage. Allocations are also slightly more CPU efficient.
770-
*/
771-
if (state->base.sortopt & TUPLESORT_ALLOWBOUNDED)
772+
* free'd memory. When the sort is not bounded, we make use of a bump.c
773+
* context as this keeps allocations more compact with less wastage.
774+
* Allocations are also slightly more CPU efficient.
775+
*/
776+
if (TupleSortUseBumpTupleCxt(state->base.sortopt))
777+
state->base.tuplecontext = BumpContextCreate(state->base.sortcontext,
778+
"Caller tuples",
779+
ALLOCSET_DEFAULT_SIZES);
780+
else
772781
state->base.tuplecontext = AllocSetContextCreate(state->base.sortcontext,
773782
"Caller tuples",
774783
ALLOCSET_DEFAULT_SIZES);
775-
else
776-
state->base.tuplecontext = GenerationContextCreate(state->base.sortcontext,
777-
"Caller tuples",
778-
ALLOCSET_DEFAULT_SIZES);
779784

780785

781786
state->status = TSS_INITIAL;
@@ -1181,15 +1186,16 @@ grow_memtuples(Tuplesortstate *state)
11811186
* Shared code for tuple and datum cases.
11821187
*/
11831188
void
1184-
tuplesort_puttuple_common(Tuplesortstate *state, SortTuple *tuple, bool useAbbrev)
1189+
tuplesort_puttuple_common(Tuplesortstate *state, SortTuple *tuple,
1190+
bool useAbbrev, Size tuplen)
11851191
{
11861192
MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
11871193

11881194
Assert(!LEADER(state));
11891195

1190-
/* Count the size of the out-of-line data */
1191-
if (tuple->tuple != NULL)
1192-
USEMEM(state, GetMemoryChunkSpace(tuple->tuple));
1196+
/* account for the memory used for this tuple */
1197+
USEMEM(state, tuplen);
1198+
state->tupleMem += tuplen;
11931199

11941200
if (!useAbbrev)
11951201
{
@@ -2397,26 +2403,26 @@ dumptuples(Tuplesortstate *state, bool alltuples)
23972403
SortTuple *stup = &state->memtuples[i];
23982404

23992405
WRITETUP(state, state->destTape, stup);
2400-
2401-
/*
2402-
* Account for freeing the tuple, but no need to do the actual pfree
2403-
* since the tuplecontext is being reset after the loop.
2404-
*/
2405-
if (stup->tuple != NULL)
2406-
FREEMEM(state, GetMemoryChunkSpace(stup->tuple));
24072406
}
24082407

24092408
state->memtupcount = 0;
24102409

24112410
/*
24122411
* Reset tuple memory. We've freed all of the tuples that we previously
24132412
* allocated. It's important to avoid fragmentation when there is a stark
2414-
* change in the sizes of incoming tuples. Fragmentation due to
2415-
* AllocSetFree's bucketing by size class might be particularly bad if
2416-
* this step wasn't taken.
2413+
* change in the sizes of incoming tuples. In bounded sorts,
2414+
* fragmentation due to AllocSetFree's bucketing by size class might be
2415+
* particularly bad if this step wasn't taken.
24172416
*/
24182417
MemoryContextReset(state->base.tuplecontext);
24192418

2419+
/*
2420+
* Now update the memory accounting to subtract the memory used by the
2421+
* tuple.
2422+
*/
2423+
FREEMEM(state, state->tupleMem);
2424+
state->tupleMem = 0;
2425+
24202426
markrunend(state->destTape);
24212427

24222428
#ifdef TRACE_SORT

src/backend/utils/sort/tuplesortvariants.c

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,7 @@ tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
674674
SortTuple stup;
675675
MinimalTuple tuple;
676676
HeapTupleData htup;
677+
Size tuplen;
677678

678679
/* copy the tuple into sort storage */
679680
tuple = ExecCopySlotMinimalTuple(slot);
@@ -686,9 +687,15 @@ tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
686687
tupDesc,
687688
&stup.isnull1);
688689

690+
/* GetMemoryChunkSpace is not supported for bump contexts */
691+
if (TupleSortUseBumpTupleCxt(base->sortopt))
692+
tuplen = MAXALIGN(tuple->t_len);
693+
else
694+
tuplen = GetMemoryChunkSpace(tuple);
695+
689696
tuplesort_puttuple_common(state, &stup,
690697
base->sortKeys->abbrev_converter &&
691-
!stup.isnull1);
698+
!stup.isnull1, tuplen);
692699

693700
MemoryContextSwitchTo(oldcontext);
694701
}
@@ -705,6 +712,7 @@ tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
705712
TuplesortPublic *base = TuplesortstateGetPublic(state);
706713
MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
707714
TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
715+
Size tuplen;
708716

709717
/* copy the tuple into sort storage */
710718
tup = heap_copytuple(tup);
@@ -722,10 +730,16 @@ tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
722730
&stup.isnull1);
723731
}
724732

733+
/* GetMemoryChunkSpace is not supported for bump contexts */
734+
if (TupleSortUseBumpTupleCxt(base->sortopt))
735+
tuplen = MAXALIGN(HEAPTUPLESIZE + tup->t_len);
736+
else
737+
tuplen = GetMemoryChunkSpace(tup);
738+
725739
tuplesort_puttuple_common(state, &stup,
726740
base->haveDatum1 &&
727741
base->sortKeys->abbrev_converter &&
728-
!stup.isnull1);
742+
!stup.isnull1, tuplen);
729743

730744
MemoryContextSwitchTo(oldcontext);
731745
}
@@ -743,6 +757,7 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
743757
IndexTuple tuple;
744758
TuplesortPublic *base = TuplesortstateGetPublic(state);
745759
TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
760+
Size tuplen;
746761

747762
stup.tuple = index_form_tuple_context(RelationGetDescr(rel), values,
748763
isnull, base->tuplecontext);
@@ -754,10 +769,16 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
754769
RelationGetDescr(arg->indexRel),
755770
&stup.isnull1);
756771

772+
/* GetMemoryChunkSpace is not supported for bump contexts */
773+
if (TupleSortUseBumpTupleCxt(base->sortopt))
774+
tuplen = MAXALIGN(tuple->t_info & INDEX_SIZE_MASK);
775+
else
776+
tuplen = GetMemoryChunkSpace(tuple);
777+
757778
tuplesort_puttuple_common(state, &stup,
758779
base->sortKeys &&
759780
base->sortKeys->abbrev_converter &&
760-
!stup.isnull1);
781+
!stup.isnull1, tuplen);
761782
}
762783

763784
/*
@@ -770,6 +791,7 @@ tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size)
770791
BrinSortTuple *bstup;
771792
TuplesortPublic *base = TuplesortstateGetPublic(state);
772793
MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
794+
Size tuplen;
773795

774796
/* allocate space for the whole BRIN sort tuple */
775797
bstup = palloc(BRINSORTTUPLE_SIZE(size));
@@ -781,10 +803,16 @@ tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size)
781803
stup.datum1 = tuple->bt_blkno;
782804
stup.isnull1 = false;
783805

806+
/* GetMemoryChunkSpace is not supported for bump contexts */
807+
if (TupleSortUseBumpTupleCxt(base->sortopt))
808+
tuplen = MAXALIGN(BRINSORTTUPLE_SIZE(size));
809+
else
810+
tuplen = GetMemoryChunkSpace(bstup);
811+
784812
tuplesort_puttuple_common(state, &stup,
785813
base->sortKeys &&
786814
base->sortKeys->abbrev_converter &&
787-
!stup.isnull1);
815+
!stup.isnull1, tuplen);
788816

789817
MemoryContextSwitchTo(oldcontext);
790818
}
@@ -833,7 +861,7 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
833861

834862
tuplesort_puttuple_common(state, &stup,
835863
base->tuples &&
836-
base->sortKeys->abbrev_converter && !isNull);
864+
base->sortKeys->abbrev_converter && !isNull, 0);
837865

838866
MemoryContextSwitchTo(oldcontext);
839867
}

src/include/utils/tuplesort.h

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,15 @@ typedef enum
9898
/* specifies if the tuplesort is able to support bounded sorts */
9999
#define TUPLESORT_ALLOWBOUNDED (1 << 1)
100100

101+
/*
102+
* For bounded sort, tuples get pfree'd when they fall outside of the bound.
103+
* When bounded sorts are not required, we can use a bump context for tuple
104+
* allocation as there's no risk that pfree will ever be called for a tuple.
105+
* Define a macro to make it easier for code to figure out if we're using a
106+
* bump allocator.
107+
*/
108+
#define TupleSortUseBumpTupleCxt(opt) (((opt) & TUPLESORT_ALLOWBOUNDED) == 0)
109+
101110
typedef struct TuplesortInstrumentation
102111
{
103112
TuplesortMethod sortMethod; /* sort algorithm used */
@@ -109,10 +118,11 @@ typedef struct TuplesortInstrumentation
109118
* The objects we actually sort are SortTuple structs. These contain
110119
* a pointer to the tuple proper (might be a MinimalTuple or IndexTuple),
111120
* which is a separate palloc chunk --- we assume it is just one chunk and
112-
* can be freed by a simple pfree() (except during merge, when we use a
113-
* simple slab allocator). SortTuples also contain the tuple's first key
114-
* column in Datum/nullflag format, and a source/input tape number that
115-
* tracks which tape each heap element/slot belongs to during merging.
121+
* can be freed by a simple pfree() (except during merge, where we use a
122+
* simple slab allocator, and during a non-bounded sort where we use a bump
123+
* allocator). SortTuples also contain the tuple's first key column in
124+
* Datum/nullflag format, and a source/input tape number that tracks which
125+
* tape each heap element/slot belongs to during merging.
116126
*
117127
* Storing the first key column lets us save heap_getattr or index_getattr
118128
* calls during tuple comparisons. We could extract and save all the key
@@ -367,7 +377,8 @@ extern Tuplesortstate *tuplesort_begin_common(int workMem,
367377
extern void tuplesort_set_bound(Tuplesortstate *state, int64 bound);
368378
extern bool tuplesort_used_bound(Tuplesortstate *state);
369379
extern void tuplesort_puttuple_common(Tuplesortstate *state,
370-
SortTuple *tuple, bool useAbbrev);
380+
SortTuple *tuple, bool useAbbrev,
381+
Size tuplen);
371382
extern void tuplesort_performsort(Tuplesortstate *state);
372383
extern bool tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
373384
SortTuple *stup);

0 commit comments

Comments
 (0)