Skip to content

Commit 775a06d

Browse files
committed
Make group_similar_or_args() reorder clause list as little as possible
Currently, group_similar_or_args() permutes original positions of clauses independently on whether it manages to find any groups of similar clauses. While we are not providing any strict warranties on saving the original order of OR-clauses, it is preferred that the original order be modified as little as possible. This commit changes the reordering algorithm of group_similar_or_args() in the following way. We reorder each group of similar clauses so that the first item of the group stays in place, but all the other items are moved after it. So, if there are no similar clauses, the order of clauses stays the same. When there are some groups, only required reordering happens while the rest of the clauses remain in their places. Reported-by: Andrei Lepikhov <lepihov@gmail.com> Discussion: https://postgr.es/m/3ac7c436-81e1-4191-9caf-b0dd70b51511%40gmail.com Reviewed-by: Pavel Borisov <pashkin.elfe@gmail.com> Reviewed-by: Andrei Lepikhov <lepihov@gmail.com> Reviewed-by: Alena Rybakina <a.rybakina@postgrespro.ru>
1 parent 519338a commit 775a06d

File tree

5 files changed

+141
-42
lines changed

5 files changed

+141
-42
lines changed

src/backend/optimizer/path/indxpath.c

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1189,6 +1189,8 @@ typedef struct
11891189
Oid inputcollid; /* OID of the OpClause input collation */
11901190
int argindex; /* index of the clause in the list of
11911191
* arguments */
1192+
int groupindex; /* value of argindex for the fist clause in
1193+
* the group of similar clauses */
11921194
} OrArgIndexMatch;
11931195

11941196
/*
@@ -1229,6 +1231,29 @@ or_arg_index_match_cmp(const void *a, const void *b)
12291231
return 0;
12301232
}
12311233

1234+
/*
1235+
* Another comparison function for OrArgIndexMatch. It sorts groups together
1236+
* using groupindex. The group items are then sorted by argindex.
1237+
*/
1238+
static int
1239+
or_arg_index_match_cmp_group(const void *a, const void *b)
1240+
{
1241+
const OrArgIndexMatch *match_a = (const OrArgIndexMatch *) a;
1242+
const OrArgIndexMatch *match_b = (const OrArgIndexMatch *) b;
1243+
1244+
if (match_a->groupindex < match_b->groupindex)
1245+
return -1;
1246+
else if (match_a->groupindex > match_b->groupindex)
1247+
return 1;
1248+
1249+
if (match_a->argindex < match_b->argindex)
1250+
return -1;
1251+
else if (match_a->argindex > match_b->argindex)
1252+
return 1;
1253+
1254+
return 0;
1255+
}
1256+
12321257
/*
12331258
* group_similar_or_args
12341259
* Transform incoming OR-restrictinfo into a list of sub-restrictinfos,
@@ -1282,6 +1307,7 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
12821307

12831308
i++;
12841309
matches[i].argindex = i;
1310+
matches[i].groupindex = i;
12851311
matches[i].indexnum = -1;
12861312
matches[i].colnum = -1;
12871313
matches[i].opno = InvalidOid;
@@ -1400,9 +1426,40 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
14001426
return orargs;
14011427
}
14021428

1403-
/* Sort clauses to make similar clauses go together */
1429+
/*
1430+
* Sort clauses to make similar clauses go together. But at the same
1431+
* time, we would like to change the order of clauses as little as
1432+
* possible. To do so, we reorder each group of similar clauses so that
1433+
* the first item of the group stays in place, and all the other items are
1434+
* moved after it. So, if there are no similar clauses, the order of
1435+
* clauses stays the same. When there are some groups, required
1436+
* reordering happens while the rest of the clauses remain in their
1437+
* places. That is achieved by assigning a 'groupindex' to each clause:
1438+
* the number of the first item in the group in the original clause list.
1439+
*/
14041440
qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp);
14051441

1442+
/* Assign groupindex to the sorted clauses */
1443+
for (i = 1; i < n; i++)
1444+
{
1445+
/*
1446+
* When two clauses are similar and should belong to the same group,
1447+
* copy the 'groupindex' from the previous clause. Given we are
1448+
* considering clauses in direct order, all the clauses would have a
1449+
* 'groupindex' equal to the 'groupindex' of the first clause in the
1450+
* group.
1451+
*/
1452+
if (matches[i].indexnum == matches[i - 1].indexnum &&
1453+
matches[i].colnum == matches[i - 1].colnum &&
1454+
matches[i].opno == matches[i - 1].opno &&
1455+
matches[i].inputcollid == matches[i - 1].inputcollid &&
1456+
matches[i].indexnum != -1)
1457+
matches[i].groupindex = matches[i - 1].groupindex;
1458+
}
1459+
1460+
/* Re-sort clauses first by groupindex then by argindex */
1461+
qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp_group);
1462+
14061463
/*
14071464
* Group similar clauses into single sub-restrictinfo. Side effect: the
14081465
* resulting list of restrictions will be sorted by indexnum and colnum.

src/test/regress/expected/create_index.out

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,13 +1899,13 @@ SELECT * FROM tenk1
18991899
QUERY PLAN
19001900
-------------------------------------------------------------------------------------------------------------------------------------------
19011901
Bitmap Heap Scan on tenk1
1902-
Recheck Cond: (((thousand = 42) AND (tenthous IS NULL)) OR ((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))))
1902+
Recheck Cond: (((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))) OR ((thousand = 42) AND (tenthous IS NULL)))
19031903
Filter: ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42) OR (tenthous IS NULL))
19041904
-> BitmapOr
1905-
-> Bitmap Index Scan on tenk1_thous_tenthous
1906-
Index Cond: ((thousand = 42) AND (tenthous IS NULL))
19071905
-> Bitmap Index Scan on tenk1_thous_tenthous
19081906
Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[])))
1907+
-> Bitmap Index Scan on tenk1_thous_tenthous
1908+
Index Cond: ((thousand = 42) AND (tenthous IS NULL))
19091909
(8 rows)
19101910

19111911
EXPLAIN (COSTS OFF)
@@ -1938,13 +1938,13 @@ SELECT * FROM tenk1
19381938
QUERY PLAN
19391939
-----------------------------------------------------------------------------------------------------------------------------------------------------
19401940
Bitmap Heap Scan on tenk1
1941-
Recheck Cond: (((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))) OR ((thousand = 42) AND (tenthous = '1'::smallint)))
1941+
Recheck Cond: (((thousand = 42) AND (tenthous = '1'::smallint)) OR ((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))))
19421942
Filter: ((tenthous = '1'::smallint) OR (tenthous = '3'::bigint) OR (tenthous = '42'::bigint))
19431943
-> BitmapOr
1944-
-> Bitmap Index Scan on tenk1_thous_tenthous
1945-
Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[])))
19461944
-> Bitmap Index Scan on tenk1_thous_tenthous
19471945
Index Cond: ((thousand = 42) AND (tenthous = '1'::smallint))
1946+
-> Bitmap Index Scan on tenk1_thous_tenthous
1947+
Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[])))
19481948
(8 rows)
19491949

19501950
EXPLAIN (COSTS OFF)
@@ -2129,16 +2129,16 @@ SELECT count(*) FROM tenk1
21292129
---------------------------------------------------------------------------------------------------------------------------
21302130
Aggregate
21312131
-> Bitmap Heap Scan on tenk1
2132-
Recheck Cond: ((hundred = 42) AND (((thousand = 99) AND (tenthous = 2)) OR ((thousand = 42) OR (thousand = 41))))
2132+
Recheck Cond: ((hundred = 42) AND (((thousand = 42) OR (thousand = 41)) OR ((thousand = 99) AND (tenthous = 2))))
21332133
Filter: ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2)))
21342134
-> BitmapAnd
21352135
-> Bitmap Index Scan on tenk1_hundred
21362136
Index Cond: (hundred = 42)
21372137
-> BitmapOr
2138-
-> Bitmap Index Scan on tenk1_thous_tenthous
2139-
Index Cond: ((thousand = 99) AND (tenthous = 2))
21402138
-> Bitmap Index Scan on tenk1_thous_tenthous
21412139
Index Cond: (thousand = ANY ('{42,41}'::integer[]))
2140+
-> Bitmap Index Scan on tenk1_thous_tenthous
2141+
Index Cond: ((thousand = 99) AND (tenthous = 2))
21422142
(12 rows)
21432143

21442144
SELECT count(*) FROM tenk1
@@ -3248,6 +3248,37 @@ SELECT b.relname,
32483248
(2 rows)
32493249

32503250
DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before;
3251+
-- No OR-clause groupings should happen, and there should be no clause
3252+
-- permutations in the filtering conditions we could see in the EXPLAIN.
3253+
EXPLAIN (COSTS OFF)
3254+
SELECT * FROM tenk1 WHERE unique1 < 1 OR hundred < 2;
3255+
QUERY PLAN
3256+
--------------------------------------------------
3257+
Bitmap Heap Scan on tenk1
3258+
Recheck Cond: ((unique1 < 1) OR (hundred < 2))
3259+
-> BitmapOr
3260+
-> Bitmap Index Scan on tenk1_unique1
3261+
Index Cond: (unique1 < 1)
3262+
-> Bitmap Index Scan on tenk1_hundred
3263+
Index Cond: (hundred < 2)
3264+
(7 rows)
3265+
3266+
-- OR clauses in the 'unique1' column are grouped, so clause permutation
3267+
-- occurs. W e can see it in the 'Recheck Cond': the second clause involving
3268+
-- the 'unique1' column goes just after the first one.
3269+
EXPLAIN (COSTS OFF)
3270+
SELECT * FROM tenk1 WHERE unique1 < 1 OR unique1 < 3 OR hundred < 2;
3271+
QUERY PLAN
3272+
---------------------------------------------------------------------
3273+
Bitmap Heap Scan on tenk1
3274+
Recheck Cond: (((unique1 < 1) OR (unique1 < 3)) OR (hundred < 2))
3275+
-> BitmapOr
3276+
-> Bitmap Index Scan on tenk1_unique1
3277+
Index Cond: (unique1 < ANY ('{1,3}'::integer[]))
3278+
-> Bitmap Index Scan on tenk1_hundred
3279+
Index Cond: (hundred < 2)
3280+
(7 rows)
3281+
32513282
-- Check bitmap scan can consider similar OR arguments separately without
32523283
-- grouping them into SAOP.
32533284
CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL);

src/test/regress/expected/join.out

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4333,20 +4333,20 @@ select * from tenk1 a join tenk1 b on
43334333
Nested Loop
43344334
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4)))
43354335
-> Bitmap Heap Scan on tenk1 b
4336-
Recheck Cond: ((hundred = 4) OR (unique1 = 2))
4336+
Recheck Cond: ((unique1 = 2) OR (hundred = 4))
43374337
-> BitmapOr
4338-
-> Bitmap Index Scan on tenk1_hundred
4339-
Index Cond: (hundred = 4)
43404338
-> Bitmap Index Scan on tenk1_unique1
43414339
Index Cond: (unique1 = 2)
4340+
-> Bitmap Index Scan on tenk1_hundred
4341+
Index Cond: (hundred = 4)
43424342
-> Materialize
43434343
-> Bitmap Heap Scan on tenk1 a
4344-
Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
4344+
Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
43454345
-> BitmapOr
4346-
-> Bitmap Index Scan on tenk1_unique2
4347-
Index Cond: (unique2 = 3)
43484346
-> Bitmap Index Scan on tenk1_unique1
43494347
Index Cond: (unique1 = 1)
4348+
-> Bitmap Index Scan on tenk1_unique2
4349+
Index Cond: (unique2 = 3)
43504350
(17 rows)
43514351

43524352
explain (costs off)
@@ -4360,12 +4360,12 @@ select * from tenk1 a join tenk1 b on
43604360
Filter: ((unique1 = 2) OR (ten = 4))
43614361
-> Materialize
43624362
-> Bitmap Heap Scan on tenk1 a
4363-
Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
4363+
Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
43644364
-> BitmapOr
4365-
-> Bitmap Index Scan on tenk1_unique2
4366-
Index Cond: (unique2 = 3)
43674365
-> Bitmap Index Scan on tenk1_unique1
43684366
Index Cond: (unique1 = 1)
4367+
-> Bitmap Index Scan on tenk1_unique2
4368+
Index Cond: (unique2 = 3)
43694369
(12 rows)
43704370

43714371
explain (costs off)
@@ -4377,21 +4377,21 @@ select * from tenk1 a join tenk1 b on
43774377
Nested Loop
43784378
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
43794379
-> Bitmap Heap Scan on tenk1 b
4380-
Recheck Cond: ((hundred = 4) OR (unique1 = 2))
4380+
Recheck Cond: ((unique1 = 2) OR (hundred = 4))
43814381
-> BitmapOr
4382-
-> Bitmap Index Scan on tenk1_hundred
4383-
Index Cond: (hundred = 4)
43844382
-> Bitmap Index Scan on tenk1_unique1
43854383
Index Cond: (unique1 = 2)
4384+
-> Bitmap Index Scan on tenk1_hundred
4385+
Index Cond: (hundred = 4)
43864386
-> Materialize
43874387
-> Bitmap Heap Scan on tenk1 a
4388-
Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1))
4388+
Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7)))
43894389
Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
43904390
-> BitmapOr
4391-
-> Bitmap Index Scan on tenk1_unique2
4392-
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
43934391
-> Bitmap Index Scan on tenk1_unique1
43944392
Index Cond: (unique1 = 1)
4393+
-> Bitmap Index Scan on tenk1_unique2
4394+
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
43954395
(18 rows)
43964396

43974397
explain (costs off)
@@ -4403,21 +4403,21 @@ select * from tenk1 a join tenk1 b on
44034403
Nested Loop
44044404
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
44054405
-> Bitmap Heap Scan on tenk1 b
4406-
Recheck Cond: ((hundred = 4) OR (unique1 = 2))
4406+
Recheck Cond: ((unique1 = 2) OR (hundred = 4))
44074407
-> BitmapOr
4408-
-> Bitmap Index Scan on tenk1_hundred
4409-
Index Cond: (hundred = 4)
44104408
-> Bitmap Index Scan on tenk1_unique1
44114409
Index Cond: (unique1 = 2)
4410+
-> Bitmap Index Scan on tenk1_hundred
4411+
Index Cond: (hundred = 4)
44124412
-> Materialize
44134413
-> Bitmap Heap Scan on tenk1 a
4414-
Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1))
4414+
Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7)))
44154415
Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
44164416
-> BitmapOr
4417-
-> Bitmap Index Scan on tenk1_unique2
4418-
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
44194417
-> Bitmap Index Scan on tenk1_unique1
44204418
Index Cond: (unique1 = 1)
4419+
-> Bitmap Index Scan on tenk1_unique2
4420+
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
44214421
(18 rows)
44224422

44234423
explain (costs off)
@@ -4431,15 +4431,15 @@ select * from tenk1 a join tenk1 b on
44314431
-> Seq Scan on tenk1 b
44324432
-> Materialize
44334433
-> Bitmap Heap Scan on tenk1 a
4434-
Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR ((unique1 = 3) OR (unique1 = 1)) OR (unique1 < 20))
4434+
Recheck Cond: ((unique1 < 20) OR ((unique1 = 3) OR (unique1 = 1)) OR ((unique2 = 3) OR (unique2 = 7)))
44354435
Filter: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
44364436
-> BitmapOr
4437-
-> Bitmap Index Scan on tenk1_unique2
4438-
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
4439-
-> Bitmap Index Scan on tenk1_unique1
4440-
Index Cond: (unique1 = ANY ('{3,1}'::integer[]))
44414437
-> Bitmap Index Scan on tenk1_unique1
44424438
Index Cond: (unique1 < 20)
4439+
-> Bitmap Index Scan on tenk1_unique1
4440+
Index Cond: (unique1 = ANY ('{3,1}'::integer[]))
4441+
-> Bitmap Index Scan on tenk1_unique2
4442+
Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
44434443
(14 rows)
44444444

44454445
--

src/test/regress/expected/partition_join.out

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2568,24 +2568,24 @@ where not exists (select 1 from prtx2
25682568
-> Seq Scan on prtx1_1
25692569
Filter: ((a < 20) AND (c = 91))
25702570
-> Bitmap Heap Scan on prtx2_1
2571-
Recheck Cond: ((c = 99) OR (b = (prtx1_1.b + 1)))
2571+
Recheck Cond: ((b = (prtx1_1.b + 1)) OR (c = 99))
25722572
Filter: (a = prtx1_1.a)
25732573
-> BitmapOr
2574-
-> Bitmap Index Scan on prtx2_1_c_idx
2575-
Index Cond: (c = 99)
25762574
-> Bitmap Index Scan on prtx2_1_b_idx
25772575
Index Cond: (b = (prtx1_1.b + 1))
2576+
-> Bitmap Index Scan on prtx2_1_c_idx
2577+
Index Cond: (c = 99)
25782578
-> Nested Loop Anti Join
25792579
-> Seq Scan on prtx1_2
25802580
Filter: ((a < 20) AND (c = 91))
25812581
-> Bitmap Heap Scan on prtx2_2
2582-
Recheck Cond: ((c = 99) OR (b = (prtx1_2.b + 1)))
2582+
Recheck Cond: ((b = (prtx1_2.b + 1)) OR (c = 99))
25832583
Filter: (a = prtx1_2.a)
25842584
-> BitmapOr
2585-
-> Bitmap Index Scan on prtx2_2_c_idx
2586-
Index Cond: (c = 99)
25872585
-> Bitmap Index Scan on prtx2_2_b_idx
25882586
Index Cond: (b = (prtx1_2.b + 1))
2587+
-> Bitmap Index Scan on prtx2_2_c_idx
2588+
Index Cond: (c = 99)
25892589
(23 rows)
25902590

25912591
select * from prtx1

src/test/regress/sql/create_index.sql

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,6 +1355,17 @@ SELECT b.relname,
13551355
ORDER BY 1;
13561356
DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before;
13571357

1358+
-- No OR-clause groupings should happen, and there should be no clause
1359+
-- permutations in the filtering conditions we could see in the EXPLAIN.
1360+
EXPLAIN (COSTS OFF)
1361+
SELECT * FROM tenk1 WHERE unique1 < 1 OR hundred < 2;
1362+
1363+
-- OR clauses in the 'unique1' column are grouped, so clause permutation
1364+
-- occurs. W e can see it in the 'Recheck Cond': the second clause involving
1365+
-- the 'unique1' column goes just after the first one.
1366+
EXPLAIN (COSTS OFF)
1367+
SELECT * FROM tenk1 WHERE unique1 < 1 OR unique1 < 3 OR hundred < 2;
1368+
13581369
-- Check bitmap scan can consider similar OR arguments separately without
13591370
-- grouping them into SAOP.
13601371
CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL);

0 commit comments

Comments
 (0)