Skip to content

Commit 51db645

Browse files
committed
Repair error noticed by Roberto Cornacchia: selectivity code
was rejecting negative attnums as bogus, which of course they are not. Add code to get_attdisbursion to produce a useful value for OID attribute, since VACUUM does not store stats for system attributes. Also, repair bug that's been in eqjoinsel for a long time: it was taking the max of the two columns' disbursions, whereas it should use the min.
1 parent 4550096 commit 51db645

File tree

5 files changed

+66
-37
lines changed

5 files changed

+66
-37
lines changed

src/backend/optimizer/path/clausesel.c

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.25 1999/07/25 23:07:24 tgl Exp $
10+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.26 1999/09/09 02:35:47 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -230,24 +230,24 @@ compute_clause_selec(Query *root, Node *clause)
230230
int flag;
231231

232232
get_relattval(clause, 0, &relidx, &attno, &constval, &flag);
233-
if (relidx <= 0 || attno <= 0)
233+
if (relidx && attno)
234+
s1 = (Cost) restriction_selectivity(oprrest,
235+
opno,
236+
getrelid(relidx,
237+
root->rtable),
238+
attno,
239+
constval,
240+
flag);
241+
else
234242
{
235243
/*
236-
* attno can be Invalid if the clause had a function in it,
244+
* attno can be 0 if the clause had a function in it,
237245
* i.e. WHERE myFunc(f) = 10
238246
*
239247
* XXX should be FIXED to use function selectivity
240248
*/
241249
s1 = (Cost) (0.5);
242250
}
243-
else
244-
s1 = (Cost) restriction_selectivity(oprrest,
245-
opno,
246-
getrelid(relidx,
247-
root->rtable),
248-
attno,
249-
constval,
250-
flag);
251251
}
252252
}
253253
else
@@ -274,7 +274,8 @@ compute_clause_selec(Query *root, Node *clause)
274274
attno2;
275275

276276
get_rels_atts(clause, &relid1, &attno1, &relid2, &attno2);
277-
if (relid1 > 0 && relid2 > 0 && attno1 > 0 && attno2 > 0)
277+
if (relid1 && relid2 && attno1 && attno2)
278+
278279
s1 = (Cost) join_selectivity(oprjoin,
279280
opno,
280281
getrelid(relid1,

src/backend/optimizer/util/clauses.c

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.50 1999/08/26 05:09:05 tgl Exp $
10+
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.51 1999/09/09 02:35:53 tgl Exp $
1111
*
1212
* HISTORY
1313
* AUTHOR DATE MAJOR EVENT
@@ -116,7 +116,10 @@ make_opclause(Oper *op, Var *leftop, Var *rightop)
116116
*
117117
* Returns the left operand of a clause of the form (op expr expr)
118118
* or (op expr)
119-
* NB: it is assumed (for now) that all expr must be Var nodes
119+
*
120+
* NB: for historical reasons, the result is declared Var *, even
121+
* though many callers can cope with results that are not Vars.
122+
* The result really ought to be declared Expr * or Node *.
120123
*/
121124
Var *
122125
get_leftop(Expr *clause)
@@ -549,8 +552,11 @@ NumRelids(Node *clause)
549552
* if the "something" is a constant, the value of the constant
550553
* flags indicating whether a constant was found, and on which side.
551554
* Default values are returned if the expression is too complicated,
552-
* specifically -1 for the relid and attno, 0 for the constant value.
553-
* Note that InvalidAttrNumber is *not* -1, but 0.
555+
* specifically 0 for the relid and attno, 0 for the constant value.
556+
*
557+
* Note that negative attno values are *not* invalid, but represent
558+
* system attributes such as OID. It's sufficient to check for relid=0
559+
* to determine whether the routine succeeded.
554560
*/
555561
void
556562
get_relattval(Node *clause,
@@ -610,8 +616,8 @@ get_relattval(Node *clause,
610616
{
611617
/* Duh, it's too complicated for me... */
612618
default_results:
613-
*relid = -1;
614-
*attno = -1;
619+
*relid = 0;
620+
*attno = 0;
615621
*constval = 0;
616622
*flag = 0;
617623
return;
@@ -663,7 +669,7 @@ static int is_single_func(Node *node)
663669
* for a joinclause.
664670
*
665671
* If the clause is not of the form (var op var) or if any of the vars
666-
* refer to nested attributes, then -1's are returned.
672+
* refer to nested attributes, then zeroes are returned.
667673
*
668674
*/
669675
void
@@ -674,10 +680,10 @@ get_rels_atts(Node *clause,
674680
AttrNumber *attno2)
675681
{
676682
/* set default values */
677-
*relid1 = -1;
678-
*attno1 = -1;
679-
*relid2 = -1;
680-
*attno2 = -1;
683+
*relid1 = 0;
684+
*attno1 = 0;
685+
*relid2 = 0;
686+
*attno2 = 0;
681687

682688
if (is_opclause(clause))
683689
{

src/backend/optimizer/util/plancat.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.36 1999/07/25 23:07:26 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.37 1999/09/09 02:35:53 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -288,7 +288,7 @@ index_selectivity(Query *root,
288288
}
289289

290290
/*
291-
* restriction_selectivity in lisp system.
291+
* restriction_selectivity
292292
*
293293
* NOTE: The routine is now merged with RestrictionClauseSelectivity
294294
* as defined in plancat.c
@@ -298,7 +298,7 @@ index_selectivity(Query *root,
298298
* operator relation, by calling the function manager.
299299
*
300300
* XXX The assumption in the selectivity procedures is that if the
301-
* relation OIDs or attribute numbers are -1, then the clause
301+
* relation OIDs or attribute numbers are 0, then the clause
302302
* isn't of the form (op var const).
303303
*/
304304
Cost
@@ -337,7 +337,7 @@ restriction_selectivity(Oid functionObjectId,
337337
* information.
338338
*
339339
* XXX The assumption in the selectivity procedures is that if the
340-
* relation OIDs or attribute numbers are -1, then the clause
340+
* relation OIDs or attribute numbers are 0, then the clause
341341
* isn't of the form (op var var).
342342
*/
343343
Cost

src/backend/utils/adt/selfuncs.c

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.39 1999/08/21 00:56:18 tgl Exp $
13+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.40 1999/09/09 02:35:58 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -31,7 +31,7 @@
3131
#include "utils/syscache.h"
3232

3333
/* N is not a valid var/constant or relation id */
34-
#define NONVALUE(N) ((N) == -1)
34+
#define NONVALUE(N) ((N) == 0)
3535

3636
/* are we looking at a functional index selectivity request? */
3737
#define FunctionalSelectivity(nIndKeys,attNum) ((attNum)==InvalidAttrNumber)
@@ -170,7 +170,9 @@ eqsel(Oid opid,
170170
else
171171
{
172172
/* No VACUUM ANALYZE stats available, so make a guess using
173-
* the disbursion stat (if we have that, which is unlikely...)
173+
* the disbursion stat (if we have that, which is unlikely
174+
* for a normal attribute; but for a system attribute we may
175+
* be able to estimate it).
174176
*/
175177
selec = get_attdisbursion(relid, attno, 0.01);
176178
}
@@ -366,7 +368,7 @@ eqjoinsel(Oid opid,
366368
float64 result;
367369
float64data num1,
368370
num2,
369-
max;
371+
min;
370372

371373
result = (float64) palloc(sizeof(float64data));
372374
if (NONVALUE(attno1) || NONVALUE(relid1) ||
@@ -376,11 +378,23 @@ eqjoinsel(Oid opid,
376378
{
377379
num1 = get_attdisbursion(relid1, attno1, 0.01);
378380
num2 = get_attdisbursion(relid2, attno2, 0.01);
379-
max = (num1 > num2) ? num1 : num2;
380-
if (max <= 0)
381-
*result = 1.0;
382-
else
383-
*result = max;
381+
/*
382+
* The join selectivity cannot be more than num2, since each
383+
* tuple in table 1 could match no more than num2 fraction of
384+
* tuples in table 2 (and that's only if the table-1 tuple
385+
* matches the most common value in table 2, so probably it's
386+
* less). By the same reasoning it is not more than num1.
387+
* The min is therefore an upper bound.
388+
*
389+
* XXX can we make a better estimate here? Using the nullfrac
390+
* statistic might be helpful, for example. Assuming the operator
391+
* is strict (does not succeed for null inputs) then the selectivity
392+
* couldn't be more than (1-nullfrac1)*(1-nullfrac2), which might
393+
* be usefully small if there are many nulls. How about applying
394+
* the operator to the most common values?
395+
*/
396+
min = (num1 < num2) ? num1 : num2;
397+
*result = min;
384398
}
385399
return result;
386400
}

src/backend/utils/cache/lsyscache.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 1994, Regents of the University of California
77
*
88
* IDENTIFICATION
9-
* $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.33 1999/08/16 02:06:25 tgl Exp $
9+
* $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.34 1999/09/09 02:36:04 tgl Exp $
1010
*
1111
* NOTES
1212
* Eventually, the index information should go through here, too.
@@ -223,6 +223,14 @@ get_attdisbursion(Oid relid, AttrNumber attnum, double min_estimate)
223223
if (disbursion < 0.0) /* VACUUM thinks there are no duplicates */
224224
return 1.0 / (double) ntuples;
225225

226+
/*
227+
* VACUUM ANALYZE does not compute disbursion for system attributes,
228+
* but some of them can reasonably be assumed unique anyway.
229+
*/
230+
if (attnum == ObjectIdAttributeNumber ||
231+
attnum == SelfItemPointerAttributeNumber)
232+
return 1.0 / (double) ntuples;
233+
226234
/*
227235
* VACUUM ANALYZE has not been run for this table.
228236
* Produce an estimate = 1/numtuples. This may produce

0 commit comments

Comments
 (0)