@@ -406,6 +406,7 @@ static void build_hash_tables(AggState *aggstate);
406
406
static void build_hash_table (AggState * aggstate , int setno , long nbuckets );
407
407
static void hashagg_recompile_expressions (AggState * aggstate , bool minslot ,
408
408
bool nullcheck );
409
+ static void hash_create_memory (AggState * aggstate );
409
410
static long hash_choose_num_buckets (double hashentrysize ,
410
411
long ngroups , Size memory );
411
412
static int hash_choose_num_partitions (double input_groups ,
@@ -1509,7 +1510,7 @@ build_hash_table(AggState *aggstate, int setno, long nbuckets)
1509
1510
{
1510
1511
AggStatePerHash perhash = & aggstate -> perhash [setno ];
1511
1512
MemoryContext metacxt = aggstate -> hash_metacxt ;
1512
- MemoryContext hashcxt = aggstate -> hashcontext -> ecxt_per_tuple_memory ;
1513
+ MemoryContext tablecxt = aggstate -> hash_tablecxt ;
1513
1514
MemoryContext tmpcxt = aggstate -> tmpcontext -> ecxt_per_tuple_memory ;
1514
1515
Size additionalsize ;
1515
1516
@@ -1535,7 +1536,7 @@ build_hash_table(AggState *aggstate, int setno, long nbuckets)
1535
1536
nbuckets ,
1536
1537
additionalsize ,
1537
1538
metacxt ,
1538
- hashcxt ,
1539
+ tablecxt ,
1539
1540
tmpcxt ,
1540
1541
DO_AGGSPLIT_SKIPFINAL (aggstate -> aggsplit ));
1541
1542
}
@@ -1706,15 +1707,19 @@ hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
1706
1707
tupleWidth );
1707
1708
Size pergroupSize = numTrans * sizeof (AggStatePerGroupData );
1708
1709
1709
- tupleChunkSize = CHUNKHDRSZ + tupleSize ;
1710
-
1711
- if ( pergroupSize > 0 )
1712
- pergroupChunkSize = CHUNKHDRSZ + pergroupSize ;
1713
- else
1714
- pergroupChunkSize = 0 ;
1710
+ /*
1711
+ * Entries use the Bump allocator, so the chunk sizes are the same as the
1712
+ * requested sizes.
1713
+ */
1714
+ tupleChunkSize = MAXALIGN ( tupleSize );
1715
+ pergroupChunkSize = pergroupSize ;
1715
1716
1717
+ /*
1718
+ * Transition values use AllocSet, which has a chunk header and also uses
1719
+ * power-of-two allocations.
1720
+ */
1716
1721
if (transitionSpace > 0 )
1717
- transitionChunkSize = CHUNKHDRSZ + transitionSpace ;
1722
+ transitionChunkSize = CHUNKHDRSZ + pg_nextpower2_size_t ( transitionSpace ) ;
1718
1723
else
1719
1724
transitionChunkSize = 0 ;
1720
1725
@@ -1864,8 +1869,11 @@ hash_agg_check_limits(AggState *aggstate)
1864
1869
uint64 ngroups = aggstate -> hash_ngroups_current ;
1865
1870
Size meta_mem = MemoryContextMemAllocated (aggstate -> hash_metacxt ,
1866
1871
true);
1867
- Size hashkey_mem = MemoryContextMemAllocated (aggstate -> hashcontext -> ecxt_per_tuple_memory ,
1868
- true);
1872
+ Size entry_mem = MemoryContextMemAllocated (aggstate -> hash_tablecxt ,
1873
+ true);
1874
+ Size tval_mem = MemoryContextMemAllocated (aggstate -> hashcontext -> ecxt_per_tuple_memory ,
1875
+ true);
1876
+ Size total_mem = meta_mem + entry_mem + tval_mem ;
1869
1877
bool do_spill = false;
1870
1878
1871
1879
#ifdef USE_INJECTION_POINTS
@@ -1884,7 +1892,7 @@ hash_agg_check_limits(AggState *aggstate)
1884
1892
* can be sure to make progress even in edge cases.
1885
1893
*/
1886
1894
if (aggstate -> hash_ngroups_current > 0 &&
1887
- (meta_mem + hashkey_mem > aggstate -> hash_mem_limit ||
1895
+ (total_mem > aggstate -> hash_mem_limit ||
1888
1896
ngroups > aggstate -> hash_ngroups_limit ))
1889
1897
{
1890
1898
do_spill = true;
@@ -1939,6 +1947,7 @@ static void
1939
1947
hash_agg_update_metrics (AggState * aggstate , bool from_tape , int npartitions )
1940
1948
{
1941
1949
Size meta_mem ;
1950
+ Size entry_mem ;
1942
1951
Size hashkey_mem ;
1943
1952
Size buffer_mem ;
1944
1953
Size total_mem ;
@@ -1950,7 +1959,10 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
1950
1959
/* memory for the hash table itself */
1951
1960
meta_mem = MemoryContextMemAllocated (aggstate -> hash_metacxt , true);
1952
1961
1953
- /* memory for the group keys and transition states */
1962
+ /* memory for hash entries */
1963
+ entry_mem = MemoryContextMemAllocated (aggstate -> hash_tablecxt , true);
1964
+
1965
+ /* memory for byref transition states */
1954
1966
hashkey_mem = MemoryContextMemAllocated (aggstate -> hashcontext -> ecxt_per_tuple_memory , true);
1955
1967
1956
1968
/* memory for read/write tape buffers, if spilled */
@@ -1959,7 +1971,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
1959
1971
buffer_mem += HASHAGG_READ_BUFFER_SIZE ;
1960
1972
1961
1973
/* update peak mem */
1962
- total_mem = meta_mem + hashkey_mem + buffer_mem ;
1974
+ total_mem = meta_mem + entry_mem + hashkey_mem + buffer_mem ;
1963
1975
if (total_mem > aggstate -> hash_mem_peak )
1964
1976
aggstate -> hash_mem_peak = total_mem ;
1965
1977
@@ -1981,6 +1993,64 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
1981
1993
}
1982
1994
}
1983
1995
1996
+ /*
1997
+ * Create memory contexts used for hash aggregation.
1998
+ */
1999
+ static void
2000
+ hash_create_memory (AggState * aggstate )
2001
+ {
2002
+ Size maxBlockSize = ALLOCSET_DEFAULT_MAXSIZE ;
2003
+
2004
+ /*
2005
+ * The hashcontext's per-tuple memory will be used for byref transition
2006
+ * values and returned by AggCheckCallContext().
2007
+ */
2008
+ aggstate -> hashcontext = CreateWorkExprContext (aggstate -> ss .ps .state );
2009
+
2010
+ /*
2011
+ * The meta context will be used for the bucket array of
2012
+ * TupleHashEntryData (or arrays, in the case of grouping sets). As the
2013
+ * hash table grows, the bucket array will double in size and the old one
2014
+ * will be freed, so an AllocSet is appropriate. For large bucket arrays,
2015
+ * the large allocation path will be used, so it's not worth worrying
2016
+ * about wasting space due to power-of-two allocations.
2017
+ */
2018
+ aggstate -> hash_metacxt = AllocSetContextCreate (aggstate -> ss .ps .state -> es_query_cxt ,
2019
+ "HashAgg meta context" ,
2020
+ ALLOCSET_DEFAULT_SIZES );
2021
+
2022
+ /*
2023
+ * The hash entries themselves, which include the grouping key
2024
+ * (firstTuple) and pergroup data, are stored in the table context. The
2025
+ * bump allocator can be used because the entries are not freed until the
2026
+ * entire hash table is reset. The bump allocator is faster for
2027
+ * allocations and avoids wasting space on the chunk header or
2028
+ * power-of-two allocations.
2029
+ *
2030
+ * Like CreateWorkExprContext(), use smaller sizings for smaller work_mem,
2031
+ * to avoid large jumps in memory usage.
2032
+ */
2033
+
2034
+ /*
2035
+ * Like CreateWorkExprContext(), use smaller sizings for smaller work_mem,
2036
+ * to avoid large jumps in memory usage.
2037
+ */
2038
+ maxBlockSize = pg_prevpower2_size_t (work_mem * (Size ) 1024 / 16 );
2039
+
2040
+ /* But no bigger than ALLOCSET_DEFAULT_MAXSIZE */
2041
+ maxBlockSize = Min (maxBlockSize , ALLOCSET_DEFAULT_MAXSIZE );
2042
+
2043
+ /* and no smaller than ALLOCSET_DEFAULT_INITSIZE */
2044
+ maxBlockSize = Max (maxBlockSize , ALLOCSET_DEFAULT_INITSIZE );
2045
+
2046
+ aggstate -> hash_tablecxt = BumpContextCreate (aggstate -> ss .ps .state -> es_query_cxt ,
2047
+ "HashAgg table context" ,
2048
+ ALLOCSET_DEFAULT_MINSIZE ,
2049
+ ALLOCSET_DEFAULT_INITSIZE ,
2050
+ maxBlockSize );
2051
+
2052
+ }
2053
+
1984
2054
/*
1985
2055
* Choose a reasonable number of buckets for the initial hash table size.
1986
2056
*/
@@ -2642,6 +2712,7 @@ agg_refill_hash_table(AggState *aggstate)
2642
2712
2643
2713
/* free memory and reset hash tables */
2644
2714
ReScanExprContext (aggstate -> hashcontext );
2715
+ MemoryContextReset (aggstate -> hash_tablecxt );
2645
2716
for (int setno = 0 ; setno < aggstate -> num_hashes ; setno ++ )
2646
2717
ResetTupleHashTable (aggstate -> perhash [setno ].hashtable );
2647
2718
@@ -3326,7 +3397,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
3326
3397
}
3327
3398
3328
3399
if (use_hashing )
3329
- aggstate -> hashcontext = CreateWorkExprContext ( estate );
3400
+ hash_create_memory ( aggstate );
3330
3401
3331
3402
ExecAssignExprContext (estate , & aggstate -> ss .ps );
3332
3403
@@ -3621,9 +3692,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
3621
3692
Plan * outerplan = outerPlan (node );
3622
3693
uint64 totalGroups = 0 ;
3623
3694
3624
- aggstate -> hash_metacxt = AllocSetContextCreate (aggstate -> ss .ps .state -> es_query_cxt ,
3625
- "HashAgg meta context" ,
3626
- ALLOCSET_DEFAULT_SIZES );
3627
3695
aggstate -> hash_spill_rslot = ExecInitExtraTupleSlot (estate , scanDesc ,
3628
3696
& TTSOpsMinimalTuple );
3629
3697
aggstate -> hash_spill_wslot = ExecInitExtraTupleSlot (estate , scanDesc ,
@@ -4368,6 +4436,12 @@ ExecEndAgg(AggState *node)
4368
4436
MemoryContextDelete (node -> hash_metacxt );
4369
4437
node -> hash_metacxt = NULL ;
4370
4438
}
4439
+ if (node -> hash_tablecxt != NULL )
4440
+ {
4441
+ MemoryContextDelete (node -> hash_tablecxt );
4442
+ node -> hash_tablecxt = NULL ;
4443
+ }
4444
+
4371
4445
4372
4446
for (transno = 0 ; transno < node -> numtrans ; transno ++ )
4373
4447
{
@@ -4484,6 +4558,7 @@ ExecReScanAgg(AggState *node)
4484
4558
node -> hash_ngroups_current = 0 ;
4485
4559
4486
4560
ReScanExprContext (node -> hashcontext );
4561
+ MemoryContextReset (node -> hash_tablecxt );
4487
4562
/* Rebuild an empty hash table */
4488
4563
build_hash_tables (node );
4489
4564
node -> table_filled = false;
0 commit comments