Skip to content

Commit 5619125

Browse files
authored
corr update (oracle-samples#166)
1 parent 8ba6b78 commit 5619125

File tree

3 files changed

+23
-5
lines changed

3 files changed

+23
-5
lines changed

optimizer/column_groups/corr_from_plan.sql

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@
1313
-- Y/N where Y - will create the column groups immediately
1414
-- N - will print the column group creation script only
1515
--
16+
--
17+
-- The cursor C1 includes some predicates I've commented out
18+
-- If uncommented, they will reduce the number of columns comapared, but this
19+
-- risks missing some correlated columns. I chose to leave these ideas
20+
-- visible, but I think the best way to speed things up
21+
-- is to reduce the row sample percentage.
22+
--
1623
var create_now varchar2(1)
1724
set echo off
1825
column tab_owner format a20
@@ -83,7 +90,7 @@ declare
8390
select t1.column_name c1, t2.column_name c2
8491
from w t1, w t2 /* , (select num_rows from dba_tables where owner = :ownname and table_name = :tabname) t */
8592
where t1.column_name > t2.column_name
86-
and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values */
93+
--and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values? */
8794
--and t1.num_distinct < t.num_rows/10 /* Perhaps eliminate sequenced columns? */
8895
order by t1.column_name;
8996
c number(6,5);

optimizer/column_groups/corr_from_sts.sql

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
-- Correlation is set to 80% - an arbitary figure
66
-- Data types limited
77
-- Only columns with shorter strings compared
8-
-- Columns checked must have a 'similar' number of distinct values (NDVs must not differ by 2X)
98
-- A sample of rows can be used to speed up execution time - which can be substantial
109
--
1110
-- Parameters:
@@ -71,6 +70,12 @@ declare
7170
-- column pairs that have similar NDV - so some NULL cases will be missed.
7271
-- There is also an assumption that longer strings are rarely used in comparison
7372
--
73+
-- The cursor W includes some predicates I've commented out
74+
-- If uncommented, they will reduce the number of columns comapared, but this
75+
-- risks missing some correlated columns. I chose to leave these ideas
76+
-- visible, but I think the best way to speed things up
77+
-- is to reduce the row sample percentage.
78+
--
7479
cursor c1 is
7580
with w as (
7681
select column_name, num_distinct
@@ -86,7 +91,7 @@ declare
8691
select t1.column_name c1, t2.column_name c2
8792
from w t1, w t2 /* , (select num_rows from dba_tables where owner = :ownname and table_name = :tabname) t */
8893
where t1.column_name > t2.column_name
89-
and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values */
94+
--and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values? */
9095
--and t1.num_distinct < t.num_rows/10 /* Perhaps eliminate sequenced columns? */
9196
order by t1.column_name;
9297
c number(6,5);

optimizer/column_groups/corr_from_table.sql

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
-- Correlation is set to 80% - an arbitary figure
66
-- Data types limited
77
-- Only columns with shorter strings compared
8-
-- Columns checked must have a 'similar' number of distinct values (NDVs must not differ by 2X)
98
-- A sample of rows can be used to speed up execution time - which can be substantial
109
--
1110
-- Parameters:
@@ -15,6 +14,13 @@
1514
-- Y/N - Y to create the column groups immediately
1615
-- N to spool SQL to create column groups
1716
--
17+
--
18+
-- The cursor C1 includes some predicates I've commented out
19+
-- If uncommented, they will reduce the number of columns comapared, but this
20+
-- risks missing some correlated columns. I chose to leave these ideas
21+
-- visible, but I think the best way to speed things up
22+
-- is to reduce the row sample percentage
23+
--
1824
var create_now varchar2(1)
1925
set echo off
2026
column tab_owner format a20
@@ -80,7 +86,7 @@ declare
8086
select t1.column_name c1, t2.column_name c2
8187
from w t1, w t2 /* , (select num_rows from dba_tables where owner = :ownname and table_name = :tabname) t */
8288
where t1.column_name > t2.column_name
83-
and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values */
89+
--and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values? */
8490
--and t1.num_distinct < t.num_rows/10 /* Perhaps eliminate sequenced columns? */
8591
order by t1.column_name;
8692
c number(6,5);

0 commit comments

Comments
 (0)