22
22
#include "nodes/makefuncs.h"
23
23
#include "nodes/nodeFuncs.h"
24
24
#include "nodes/plannodes.h"
25
+ #include "optimizer/cost.h"
25
26
#include "optimizer/optimizer.h"
26
27
#include "optimizer/pathnode.h"
27
28
#include "optimizer/paths.h"
28
29
#include "partitioning/partbounds.h"
29
30
#include "utils/lsyscache.h"
30
31
32
+ /* Consider reordering of GROUP BY keys? */
33
+ bool enable_group_by_reordering = true;
31
34
32
35
static bool pathkey_is_redundant (PathKey * new_pathkey , List * pathkeys );
33
36
static bool matches_boolean_partition_clause (RestrictInfo * rinfo ,
@@ -350,6 +353,202 @@ pathkeys_contained_in(List *keys1, List *keys2)
350
353
return false;
351
354
}
352
355
356
+ /*
357
+ * group_keys_reorder_by_pathkeys
358
+ * Reorder GROUP BY keys to match the input pathkeys.
359
+ *
360
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
361
+ * stay untouched.
362
+ *
363
+ * Returns the number of GROUP BY keys with a matching pathkey.
364
+ */
365
+ static int
366
+ group_keys_reorder_by_pathkeys (List * pathkeys , List * * group_pathkeys ,
367
+ List * * group_clauses ,
368
+ int num_groupby_pathkeys )
369
+ {
370
+ List * new_group_pathkeys = NIL ,
371
+ * new_group_clauses = NIL ;
372
+ ListCell * lc ;
373
+ int n ;
374
+
375
+ if (pathkeys == NIL || * group_pathkeys == NIL )
376
+ return 0 ;
377
+
378
+ /*
379
+ * Walk the pathkeys (determining ordering of the input path) and see if
380
+ * there's a matching GROUP BY key. If we find one, we append it to the
381
+ * list, and do the same for the clauses.
382
+ *
383
+ * Once we find the first pathkey without a matching GROUP BY key, the
384
+ * rest of the pathkeys are useless and can't be used to evaluate the
385
+ * grouping, so we abort the loop and ignore the remaining pathkeys.
386
+ */
387
+ foreach (lc , pathkeys )
388
+ {
389
+ PathKey * pathkey = (PathKey * ) lfirst (lc );
390
+ SortGroupClause * sgc ;
391
+
392
+ /*
393
+ * Pathkeys are built in a way that allows simply comparing pointers.
394
+ * Give up if we can't find the matching pointer. Also give up if
395
+ * there is no sortclause reference for some reason.
396
+ */
397
+ if (foreach_current_index (lc ) >= num_groupby_pathkeys ||
398
+ !list_member_ptr (* group_pathkeys , pathkey ) ||
399
+ pathkey -> pk_eclass -> ec_sortref == 0 )
400
+ break ;
401
+
402
+ /*
403
+ * Since 1349d27 pathkey coming from underlying node can be in the
404
+ * root->group_pathkeys but not in the processed_groupClause. So, we
405
+ * should be careful here.
406
+ */
407
+ sgc = get_sortgroupref_clause_noerr (pathkey -> pk_eclass -> ec_sortref ,
408
+ * group_clauses );
409
+ if (!sgc )
410
+ /* The grouping clause does not cover this pathkey */
411
+ break ;
412
+
413
+ /*
414
+ * Sort group clause should have an ordering operator as long as there
415
+ * is an associated pathkey.
416
+ */
417
+ Assert (OidIsValid (sgc -> sortop ));
418
+
419
+ new_group_pathkeys = lappend (new_group_pathkeys , pathkey );
420
+ new_group_clauses = lappend (new_group_clauses , sgc );
421
+ }
422
+
423
+ /* remember the number of pathkeys with a matching GROUP BY key */
424
+ n = list_length (new_group_pathkeys );
425
+
426
+ /* append the remaining group pathkeys (will be treated as not sorted) */
427
+ * group_pathkeys = list_concat_unique_ptr (new_group_pathkeys ,
428
+ * group_pathkeys );
429
+ * group_clauses = list_concat_unique_ptr (new_group_clauses ,
430
+ * group_clauses );
431
+
432
+ return n ;
433
+ }
434
+
435
+ /*
436
+ * pathkeys_are_duplicate
437
+ * Check if give pathkeys are already contained the list of
438
+ * PathKeyInfo's.
439
+ */
440
+ static bool
441
+ pathkeys_are_duplicate (List * infos , List * pathkeys )
442
+ {
443
+ ListCell * lc ;
444
+
445
+ foreach (lc , infos )
446
+ {
447
+ PathKeyInfo * info = lfirst_node (PathKeyInfo , lc );
448
+
449
+ if (compare_pathkeys (pathkeys , info -> pathkeys ) == PATHKEYS_EQUAL )
450
+ return true;
451
+ }
452
+ return false;
453
+ }
454
+
455
+ /*
456
+ * get_useful_group_keys_orderings
457
+ * Determine which orderings of GROUP BY keys are potentially interesting.
458
+ *
459
+ * Returns a list of PathKeyInfo items, each representing an interesting
460
+ * ordering of GROUP BY keys. Each item stores pathkeys and clauses in the
461
+ * matching order.
462
+ *
463
+ * The function considers (and keeps) multiple GROUP BY orderings:
464
+ *
465
+ * - the original ordering, as specified by the GROUP BY clause,
466
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
467
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible).
468
+ */
469
+ List *
470
+ get_useful_group_keys_orderings (PlannerInfo * root , Path * path )
471
+ {
472
+ Query * parse = root -> parse ;
473
+ List * infos = NIL ;
474
+ PathKeyInfo * info ;
475
+
476
+ List * pathkeys = root -> group_pathkeys ;
477
+ List * clauses = root -> processed_groupClause ;
478
+
479
+ /* always return at least the original pathkeys/clauses */
480
+ info = makeNode (PathKeyInfo );
481
+ info -> pathkeys = pathkeys ;
482
+ info -> clauses = clauses ;
483
+ infos = lappend (infos , info );
484
+
485
+ /*
486
+ * Should we try generating alternative orderings of the group keys? If
487
+ * not, we produce only the order specified in the query, i.e. the
488
+ * optimization is effectively disabled.
489
+ */
490
+ if (!enable_group_by_reordering )
491
+ return infos ;
492
+
493
+ /*
494
+ * Grouping sets have own and more complex logic to decide the ordering.
495
+ */
496
+ if (parse -> groupingSets )
497
+ return infos ;
498
+
499
+ /*
500
+ * If the path is sorted in some way, try reordering the group keys to
501
+ * match the path as much of the ordering as possible. Then thanks to
502
+ * incremental sort we would get this sort as cheap as possible.
503
+ */
504
+ if (path -> pathkeys &&
505
+ !pathkeys_contained_in (path -> pathkeys , root -> group_pathkeys ))
506
+ {
507
+ int n ;
508
+
509
+ n = group_keys_reorder_by_pathkeys (path -> pathkeys , & pathkeys , & clauses ,
510
+ root -> num_groupby_pathkeys );
511
+
512
+ if (n > 0 &&
513
+ (enable_incremental_sort || n == root -> num_groupby_pathkeys ) &&
514
+ !pathkeys_are_duplicate (infos , pathkeys ))
515
+ {
516
+ info = makeNode (PathKeyInfo );
517
+ info -> pathkeys = pathkeys ;
518
+ info -> clauses = clauses ;
519
+
520
+ infos = lappend (infos , info );
521
+ }
522
+ }
523
+
524
+ /*
525
+ * Try reordering pathkeys to minimize the sort cost (this time consider
526
+ * the ORDER BY clause).
527
+ */
528
+ if (root -> sort_pathkeys &&
529
+ !pathkeys_contained_in (root -> sort_pathkeys , root -> group_pathkeys ))
530
+ {
531
+ int n ;
532
+
533
+ n = group_keys_reorder_by_pathkeys (root -> sort_pathkeys , & pathkeys ,
534
+ & clauses ,
535
+ root -> num_groupby_pathkeys );
536
+
537
+ if (n > 0 &&
538
+ (enable_incremental_sort || n == list_length (root -> sort_pathkeys )) &&
539
+ !pathkeys_are_duplicate (infos , pathkeys ))
540
+ {
541
+ info = makeNode (PathKeyInfo );
542
+ info -> pathkeys = pathkeys ;
543
+ info -> clauses = clauses ;
544
+
545
+ infos = lappend (infos , info );
546
+ }
547
+ }
548
+
549
+ return infos ;
550
+ }
551
+
353
552
/*
354
553
* pathkeys_count_contained_in
355
554
* Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2138,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
1939
2138
return n_common_pathkeys ;
1940
2139
}
1941
2140
2141
+ /*
2142
+ * pathkeys_useful_for_grouping
2143
+ * Count the number of pathkeys that are useful for grouping (instead of
2144
+ * explicit sort)
2145
+ *
2146
+ * Group pathkeys could be reordered to benefit from the ordering. The
2147
+ * ordering may not be "complete" and may require incremental sort, but that's
2148
+ * fine. So we simply count prefix pathkeys with a matching group key, and
2149
+ * stop once we find the first pathkey without a match.
2150
+ *
2151
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
2152
+ * pathkeys are useful for grouping, and we might do incremental sort to get
2153
+ * path ordered by (a,b,e).
2154
+ *
2155
+ * This logic is necessary to retain paths with ordering not matching grouping
2156
+ * keys directly, without the reordering.
2157
+ *
2158
+ * Returns the length of pathkey prefix with matching group keys.
2159
+ */
2160
+ static int
2161
+ pathkeys_useful_for_grouping (PlannerInfo * root , List * pathkeys )
2162
+ {
2163
+ ListCell * key ;
2164
+ int n = 0 ;
2165
+
2166
+ /* no special ordering requested for grouping */
2167
+ if (root -> group_pathkeys == NIL )
2168
+ return 0 ;
2169
+
2170
+ /* unordered path */
2171
+ if (pathkeys == NIL )
2172
+ return 0 ;
2173
+
2174
+ /* walk the pathkeys and search for matching group key */
2175
+ foreach (key , pathkeys )
2176
+ {
2177
+ PathKey * pathkey = (PathKey * ) lfirst (key );
2178
+
2179
+ /* no matching group key, we're done */
2180
+ if (!list_member_ptr (root -> group_pathkeys , pathkey ))
2181
+ break ;
2182
+
2183
+ n ++ ;
2184
+ }
2185
+
2186
+ return n ;
2187
+ }
2188
+
1942
2189
/*
1943
2190
* truncate_useless_pathkeys
1944
2191
* Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2200,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
1953
2200
1954
2201
nuseful = pathkeys_useful_for_merging (root , rel , pathkeys );
1955
2202
nuseful2 = pathkeys_useful_for_ordering (root , pathkeys );
2203
+ if (nuseful2 > nuseful )
2204
+ nuseful = nuseful2 ;
2205
+ nuseful2 = pathkeys_useful_for_grouping (root , pathkeys );
1956
2206
if (nuseful2 > nuseful )
1957
2207
nuseful = nuseful2 ;
1958
2208
@@ -1988,6 +2238,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
1988
2238
{
1989
2239
if (rel -> joininfo != NIL || rel -> has_eclass_joins )
1990
2240
return true; /* might be able to use pathkeys for merging */
2241
+ if (root -> group_pathkeys != NIL )
2242
+ return true; /* might be able to use pathkeys for grouping */
1991
2243
if (root -> query_pathkeys != NIL )
1992
2244
return true; /* might be able to use them for ordering */
1993
2245
return false; /* definitely useless */
0 commit comments