|
17 | 17 |
|
18 | 18 | #include <math.h>
|
19 | 19 |
|
| 20 | +#include "access/sysattr.h" |
20 | 21 | #include "catalog/pg_class.h"
|
21 | 22 | #include "catalog/pg_operator.h"
|
| 23 | +#include "catalog/pg_type.h" |
22 | 24 | #include "foreign/fdwapi.h"
|
| 25 | +#include "nodes/makefuncs.h" |
23 | 26 | #include "nodes/nodeFuncs.h"
|
24 | 27 | #ifdef OPTIMIZER_DEBUG
|
25 | 28 | #include "nodes/print.h"
|
@@ -98,6 +101,7 @@ static void subquery_push_qual(Query *subquery,
|
98 | 101 | RangeTblEntry *rte, Index rti, Node *qual);
|
99 | 102 | static void recurse_push_qual(Node *setOp, Query *topquery,
|
100 | 103 | RangeTblEntry *rte, Index rti, Node *qual);
|
| 104 | +static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); |
101 | 105 |
|
102 | 106 |
|
103 | 107 | /*
|
@@ -1124,7 +1128,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
1124 | 1128 | /*
|
1125 | 1129 | * Must copy the Query so that planning doesn't mess up the RTE contents
|
1126 | 1130 | * (really really need to fix the planner to not scribble on its input,
|
1127 |
| - * someday). |
| 1131 | + * someday ... but see remove_unused_subquery_outputs to start with). |
1128 | 1132 | */
|
1129 | 1133 | subquery = copyObject(subquery);
|
1130 | 1134 |
|
@@ -1198,6 +1202,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
1198 | 1202 |
|
1199 | 1203 | pfree(unsafeColumns);
|
1200 | 1204 |
|
| 1205 | + /* |
| 1206 | + * The upper query might not use all the subquery's output columns; if |
| 1207 | + * not, we can simplify. |
| 1208 | + */ |
| 1209 | + remove_unused_subquery_outputs(subquery, rel); |
| 1210 | + |
1201 | 1211 | /*
|
1202 | 1212 | * We can safely pass the outer tuple_fraction down to the subquery if the
|
1203 | 1213 | * outer level has no joining, aggregation, or sorting to do. Otherwise
|
@@ -2032,6 +2042,129 @@ recurse_push_qual(Node *setOp, Query *topquery,
|
2032 | 2042 | }
|
2033 | 2043 | }
|
2034 | 2044 |
|
| 2045 | +/***************************************************************************** |
| 2046 | + * SIMPLIFYING SUBQUERY TARGETLISTS |
| 2047 | + *****************************************************************************/ |
| 2048 | + |
| 2049 | +/* |
| 2050 | + * remove_unused_subquery_outputs |
| 2051 | + * Remove subquery targetlist items we don't need |
| 2052 | + * |
| 2053 | + * It's possible, even likely, that the upper query does not read all the |
| 2054 | + * output columns of the subquery. We can remove any such outputs that are |
| 2055 | + * not needed by the subquery itself (e.g., as sort/group columns) and do not |
| 2056 | + * affect semantics otherwise (e.g., volatile functions can't be removed). |
| 2057 | + * This is useful not only because we might be able to remove expensive-to- |
| 2058 | + * compute expressions, but because deletion of output columns might allow |
| 2059 | + * optimizations such as join removal to occur within the subquery. |
| 2060 | + * |
| 2061 | + * To avoid affecting column numbering in the targetlist, we don't physically |
| 2062 | + * remove unused tlist entries, but rather replace their expressions with NULL |
| 2063 | + * constants. This is implemented by modifying subquery->targetList. |
| 2064 | + */ |
| 2065 | +static void |
| 2066 | +remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel) |
| 2067 | +{ |
| 2068 | + Bitmapset *attrs_used = NULL; |
| 2069 | + ListCell *lc; |
| 2070 | + |
| 2071 | + /* |
| 2072 | + * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we |
| 2073 | + * could update all the child SELECTs' tlists, but it seems not worth the |
| 2074 | + * trouble presently. |
| 2075 | + */ |
| 2076 | + if (subquery->setOperations) |
| 2077 | + return; |
| 2078 | + |
| 2079 | + /* |
| 2080 | + * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our |
| 2081 | + * time: all its output columns must be used in the distinctClause. |
| 2082 | + */ |
| 2083 | + if (subquery->distinctClause && !subquery->hasDistinctOn) |
| 2084 | + return; |
| 2085 | + |
| 2086 | + /* |
| 2087 | + * Collect a bitmap of all the output column numbers used by the upper |
| 2088 | + * query. |
| 2089 | + * |
| 2090 | + * Add all the attributes needed for joins or final output. Note: we must |
| 2091 | + * look at reltargetlist, not the attr_needed data, because attr_needed |
| 2092 | + * isn't computed for inheritance child rels, cf set_append_rel_size(). |
| 2093 | + * (XXX might be worth changing that sometime.) |
| 2094 | + */ |
| 2095 | + pull_varattnos((Node *) rel->reltargetlist, rel->relid, &attrs_used); |
| 2096 | + |
| 2097 | + /* Add all the attributes used by un-pushed-down restriction clauses. */ |
| 2098 | + foreach(lc, rel->baserestrictinfo) |
| 2099 | + { |
| 2100 | + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); |
| 2101 | + |
| 2102 | + pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used); |
| 2103 | + } |
| 2104 | + |
| 2105 | + /* |
| 2106 | + * If there's a whole-row reference to the subquery, we can't remove |
| 2107 | + * anything. |
| 2108 | + */ |
| 2109 | + if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used)) |
| 2110 | + return; |
| 2111 | + |
| 2112 | + /* |
| 2113 | + * Run through the tlist and zap entries we don't need. It's okay to |
| 2114 | + * modify the tlist items in-place because set_subquery_pathlist made a |
| 2115 | + * copy of the subquery. |
| 2116 | + */ |
| 2117 | + foreach(lc, subquery->targetList) |
| 2118 | + { |
| 2119 | + TargetEntry *tle = (TargetEntry *) lfirst(lc); |
| 2120 | + |
| 2121 | + /* |
| 2122 | + * If it has a sortgroupref number, it's used in some sort/group |
| 2123 | + * clause so we'd better not remove it. Also, don't remove any |
| 2124 | + * resjunk columns, since their reason for being has nothing to do |
| 2125 | + * with anybody reading the subquery's output. (It's likely that |
| 2126 | + * resjunk columns in a sub-SELECT would always have ressortgroupref |
| 2127 | + * set, but even if they don't, it seems imprudent to remove them.) |
| 2128 | + */ |
| 2129 | + if (tle->ressortgroupref || tle->resjunk) |
| 2130 | + continue; |
| 2131 | + |
| 2132 | + /* |
| 2133 | + * If it's used by the upper query, we can't remove it. |
| 2134 | + */ |
| 2135 | + if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber, |
| 2136 | + attrs_used)) |
| 2137 | + continue; |
| 2138 | + |
| 2139 | + /* |
| 2140 | + * If it contains a set-returning function, we can't remove it since |
| 2141 | + * that could change the number of rows returned by the subquery. |
| 2142 | + */ |
| 2143 | + if (expression_returns_set((Node *) tle->expr)) |
| 2144 | + continue; |
| 2145 | + |
| 2146 | + /* |
| 2147 | + * If it contains volatile functions, we daren't remove it for fear |
| 2148 | + * that the user is expecting their side-effects to happen. |
| 2149 | + */ |
| 2150 | + if (contain_volatile_functions((Node *) tle->expr)) |
| 2151 | + continue; |
| 2152 | + |
| 2153 | + /* |
| 2154 | + * OK, we don't need it. Replace the expression with a NULL constant. |
| 2155 | + * We can just make the constant be of INT4 type, since nothing's |
| 2156 | + * going to look at it anyway. |
| 2157 | + */ |
| 2158 | + tle->expr = (Expr *) makeConst(INT4OID, |
| 2159 | + -1, |
| 2160 | + InvalidOid, |
| 2161 | + sizeof(int32), |
| 2162 | + (Datum) 0, |
| 2163 | + true, /* isnull */ |
| 2164 | + true /* byval */ ); |
| 2165 | + } |
| 2166 | +} |
| 2167 | + |
2035 | 2168 | /*****************************************************************************
|
2036 | 2169 | * DEBUG SUPPORT
|
2037 | 2170 | *****************************************************************************/
|
|
0 commit comments