Skip to content

Commit 165fa27

Browse files
committed
Fix assorted issues in convert_to_scalar().
If convert_to_scalar is passed a pair of datatypes it can't cope with, its former behavior was just to elog(ERROR). While this is OK so far as the core code is concerned, there's extension code that would like to use scalarltsel/scalargtsel/etc as selectivity estimators for operators that work on non-core datatypes, and this behavior is a show-stopper for that use-case. If we simply allow convert_to_scalar to return FALSE instead of outright failing, then the main logic of scalarltsel/scalargtsel will work fine for any operator that behaves like a scalar inequality comparison. The lack of conversion capability will mean that we can't estimate to better than histogram-bin-width precision, since the code will effectively assume that the comparison constant falls at the middle of its bin. But that's still a lot better than nothing. (Someday we should provide a way for extension code to supply a custom version of convert_to_scalar, but today is not that day.) While poking at this issue, we noted that the existing code for handling type bytea in convert_to_scalar is several bricks shy of a load. It assumes without checking that if the comparison value is type bytea, the bounds values are too; in the worst case this could lead to a crash. It also fails to detoast the input values, so that the comparison result is complete garbage if any input is toasted out-of-line, compressed, or even just short-header. I'm not sure how often such cases actually occur --- the bounds values, at least, are probably safe since they are elements of an array and hence can't be toasted. But that doesn't make this code OK. Back-patch to all supported branches, partly because author requested that, but mostly because of the bytea bugs. The change in API for the exposed routine convert_network_to_scalar() is theoretically a back-patch hazard, but it seems pretty unlikely that any third-party code is calling that function directly. Tomas Vondra, with some adjustments by me Discussion: https://postgr.es/m/b68441b6-d18f-13ab-b43b-9a72188a4e02@2ndquadrant.com
1 parent 947f06c commit 165fa27

File tree

4 files changed

+90
-57
lines changed

4 files changed

+90
-57
lines changed

contrib/btree_gist/btree_inet.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,11 @@ gbt_inet_compress(PG_FUNCTION_ARGS)
9999
if (entry->leafkey)
100100
{
101101
inetKEY *r = (inetKEY *) palloc(sizeof(inetKEY));
102+
bool failure = false;
102103

103104
retval = palloc(sizeof(GISTENTRY));
104-
r->lower = convert_network_to_scalar(entry->key, INETOID);
105+
r->lower = convert_network_to_scalar(entry->key, INETOID, &failure);
106+
Assert(!failure);
105107
r->upper = r->lower;
106108
gistentryinit(*retval, PointerGetDatum(r),
107109
entry->rel, entry->page,
@@ -118,13 +120,18 @@ Datum
118120
gbt_inet_consistent(PG_FUNCTION_ARGS)
119121
{
120122
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
121-
double query = convert_network_to_scalar(PG_GETARG_DATUM(1), INETOID);
123+
Datum dquery = PG_GETARG_DATUM(1);
122124
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
123125

124126
/* Oid subtype = PG_GETARG_OID(3); */
125127
bool *recheck = (bool *) PG_GETARG_POINTER(4);
126128
inetKEY *kkk = (inetKEY *) DatumGetPointer(entry->key);
127129
GBT_NUMKEY_R key;
130+
double query;
131+
bool failure = false;
132+
133+
query = convert_network_to_scalar(dquery, INETOID, &failure);
134+
Assert(!failure);
128135

129136
/* All cases served by this function are inexact */
130137
*recheck = true;

src/backend/utils/adt/network.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -864,9 +864,12 @@ network_hostmask(PG_FUNCTION_ARGS)
864864
* Convert a value of a network datatype to an approximate scalar value.
865865
* This is used for estimating selectivities of inequality operators
866866
* involving network types.
867+
*
868+
* On failure (e.g., unsupported typid), set *failure to true;
869+
* otherwise, that variable is not changed.
867870
*/
868871
double
869-
convert_network_to_scalar(Datum value, Oid typid)
872+
convert_network_to_scalar(Datum value, Oid typid, bool *failure)
870873
{
871874
switch (typid)
872875
{
@@ -893,8 +896,6 @@ convert_network_to_scalar(Datum value, Oid typid)
893896
res += ip_addr(ip)[i];
894897
}
895898
return res;
896-
897-
break;
898899
}
899900
case MACADDROID:
900901
{
@@ -908,11 +909,7 @@ convert_network_to_scalar(Datum value, Oid typid)
908909
}
909910
}
910911

911-
/*
912-
* Can't get here unless someone tries to use scalarltsel/scalargtsel on
913-
* an operator with one network and one non-network operand.
914-
*/
915-
elog(ERROR, "unsupported type: %u", typid);
912+
*failure = true;
916913
return 0;
917914
}
918915

src/backend/utils/adt/selfuncs.c

Lines changed: 75 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ static double eqjoinsel_semi(Oid operator,
164164
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
165165
Datum lobound, Datum hibound, Oid boundstypid,
166166
double *scaledlobound, double *scaledhibound);
167-
static double convert_numeric_to_scalar(Datum value, Oid typid);
167+
static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure);
168168
static void convert_string_to_scalar(char *value,
169169
double *scaledvalue,
170170
char *lobound,
@@ -181,8 +181,9 @@ static double convert_one_string_to_scalar(char *value,
181181
int rangelo, int rangehi);
182182
static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
183183
int rangelo, int rangehi);
184-
static char *convert_string_datum(Datum value, Oid typid);
185-
static double convert_timevalue_to_scalar(Datum value, Oid typid);
184+
static char *convert_string_datum(Datum value, Oid typid, bool *failure);
185+
static double convert_timevalue_to_scalar(Datum value, Oid typid,
186+
bool *failure);
186187
static void examine_simple_variable(PlannerInfo *root, Var *var,
187188
VariableStatData *vardata);
188189
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
@@ -527,7 +528,8 @@ neqsel(PG_FUNCTION_ARGS)
527528
*
528529
* This routine works for any datatype (or pair of datatypes) known to
529530
* convert_to_scalar(). If it is applied to some other datatype,
530-
* it will return a default estimate.
531+
* it will return an approximate estimate based on assuming that the constant
532+
* value falls in the middle of the bin identified by binary search.
531533
*/
532534
static double
533535
scalarineqsel(PlannerInfo *root, Oid operator, bool isgt,
@@ -3605,10 +3607,15 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
36053607
Datum lobound, Datum hibound, Oid boundstypid,
36063608
double *scaledlobound, double *scaledhibound)
36073609
{
3610+
bool failure = false;
3611+
36083612
/*
36093613
* Both the valuetypid and the boundstypid should exactly match the
3610-
* declared input type(s) of the operator we are invoked for, so we just
3611-
* error out if either is not recognized.
3614+
* declared input type(s) of the operator we are invoked for. However,
3615+
* extensions might try to use scalarineqsel as estimator for operators
3616+
* with input type(s) we don't handle here; in such cases, we want to
3617+
* return false, not fail. In any case, we mustn't assume that valuetypid
3618+
* and boundstypid are identical.
36123619
*
36133620
* XXX The histogram we are interpolating between points of could belong
36143621
* to a column that's only binary-compatible with the declared type. In
@@ -3641,10 +3648,13 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
36413648
case REGTYPEOID:
36423649
case REGCONFIGOID:
36433650
case REGDICTIONARYOID:
3644-
*scaledvalue = convert_numeric_to_scalar(value, valuetypid);
3645-
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
3646-
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
3647-
return true;
3651+
*scaledvalue = convert_numeric_to_scalar(value, valuetypid,
3652+
&failure);
3653+
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
3654+
&failure);
3655+
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid,
3656+
&failure);
3657+
return !failure;
36483658

36493659
/*
36503660
* Built-in string types
@@ -3655,9 +3665,20 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
36553665
case TEXTOID:
36563666
case NAMEOID:
36573667
{
3658-
char *valstr = convert_string_datum(value, valuetypid);
3659-
char *lostr = convert_string_datum(lobound, boundstypid);
3660-
char *histr = convert_string_datum(hibound, boundstypid);
3668+
char *valstr = convert_string_datum(value, valuetypid,
3669+
&failure);
3670+
char *lostr = convert_string_datum(lobound, boundstypid,
3671+
&failure);
3672+
char *histr = convert_string_datum(hibound, boundstypid,
3673+
&failure);
3674+
3675+
/*
3676+
* Bail out if any of the values is not of string type. We
3677+
* might leak converted strings for the other value(s), but
3678+
* that's not worth troubling over.
3679+
*/
3680+
if (failure)
3681+
return false;
36613682

36623683
convert_string_to_scalar(valstr, scaledvalue,
36633684
lostr, scaledlobound,
@@ -3673,6 +3694,9 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
36733694
*/
36743695
case BYTEAOID:
36753696
{
3697+
/* We only support bytea vs bytea comparison */
3698+
if (boundstypid != BYTEAOID)
3699+
return false;
36763700
convert_bytea_to_scalar(value, scaledvalue,
36773701
lobound, scaledlobound,
36783702
hibound, scaledhibound);
@@ -3691,21 +3715,27 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
36913715
case TINTERVALOID:
36923716
case TIMEOID:
36933717
case TIMETZOID:
3694-
*scaledvalue = convert_timevalue_to_scalar(value, valuetypid);
3695-
*scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid);
3696-
*scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid);
3697-
return true;
3718+
*scaledvalue = convert_timevalue_to_scalar(value, valuetypid,
3719+
&failure);
3720+
*scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid,
3721+
&failure);
3722+
*scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid,
3723+
&failure);
3724+
return !failure;
36983725

36993726
/*
37003727
* Built-in network types
37013728
*/
37023729
case INETOID:
37033730
case CIDROID:
37043731
case MACADDROID:
3705-
*scaledvalue = convert_network_to_scalar(value, valuetypid);
3706-
*scaledlobound = convert_network_to_scalar(lobound, boundstypid);
3707-
*scaledhibound = convert_network_to_scalar(hibound, boundstypid);
3708-
return true;
3732+
*scaledvalue = convert_network_to_scalar(value, valuetypid,
3733+
&failure);
3734+
*scaledlobound = convert_network_to_scalar(lobound, boundstypid,
3735+
&failure);
3736+
*scaledhibound = convert_network_to_scalar(hibound, boundstypid,
3737+
&failure);
3738+
return !failure;
37093739
}
37103740
/* Don't know how to convert */
37113741
*scaledvalue = *scaledlobound = *scaledhibound = 0;
@@ -3714,9 +3744,12 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
37143744

37153745
/*
37163746
* Do convert_to_scalar()'s work for any numeric data type.
3747+
*
3748+
* On failure (e.g., unsupported typid), set *failure to true;
3749+
* otherwise, that variable is not changed.
37173750
*/
37183751
static double
3719-
convert_numeric_to_scalar(Datum value, Oid typid)
3752+
convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
37203753
{
37213754
switch (typid)
37223755
{
@@ -3750,11 +3783,7 @@ convert_numeric_to_scalar(Datum value, Oid typid)
37503783
return (double) DatumGetObjectId(value);
37513784
}
37523785

3753-
/*
3754-
* Can't get here unless someone tries to use scalarltsel/scalargtsel on
3755-
* an operator with one numeric and one non-numeric operand.
3756-
*/
3757-
elog(ERROR, "unsupported type: %u", typid);
3786+
*failure = true;
37583787
return 0;
37593788
}
37603789

@@ -3897,11 +3926,14 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
38973926
/*
38983927
* Convert a string-type Datum into a palloc'd, null-terminated string.
38993928
*
3929+
* On failure (e.g., unsupported typid), set *failure to true;
3930+
* otherwise, that variable is not changed. (We'll return NULL on failure.)
3931+
*
39003932
* When using a non-C locale, we must pass the string through strxfrm()
39013933
* before continuing, so as to generate correct locale-specific results.
39023934
*/
39033935
static char *
3904-
convert_string_datum(Datum value, Oid typid)
3936+
convert_string_datum(Datum value, Oid typid, bool *failure)
39053937
{
39063938
char *val;
39073939

@@ -3925,12 +3957,7 @@ convert_string_datum(Datum value, Oid typid)
39253957
break;
39263958
}
39273959
default:
3928-
3929-
/*
3930-
* Can't get here unless someone tries to use scalarltsel on an
3931-
* operator with one string and one non-string operand.
3932-
*/
3933-
elog(ERROR, "unsupported type: %u", typid);
3960+
*failure = true;
39343961
return NULL;
39353962
}
39363963

@@ -4010,16 +4037,19 @@ convert_bytea_to_scalar(Datum value,
40104037
Datum hibound,
40114038
double *scaledhibound)
40124039
{
4040+
bytea *valuep = DatumGetByteaPP(value);
4041+
bytea *loboundp = DatumGetByteaPP(lobound);
4042+
bytea *hiboundp = DatumGetByteaPP(hibound);
40134043
int rangelo,
40144044
rangehi,
4015-
valuelen = VARSIZE(DatumGetPointer(value)) - VARHDRSZ,
4016-
loboundlen = VARSIZE(DatumGetPointer(lobound)) - VARHDRSZ,
4017-
hiboundlen = VARSIZE(DatumGetPointer(hibound)) - VARHDRSZ,
4045+
valuelen = VARSIZE_ANY_EXHDR(valuep),
4046+
loboundlen = VARSIZE_ANY_EXHDR(loboundp),
4047+
hiboundlen = VARSIZE_ANY_EXHDR(hiboundp),
40184048
i,
40194049
minlen;
4020-
unsigned char *valstr = (unsigned char *) VARDATA(DatumGetPointer(value)),
4021-
*lostr = (unsigned char *) VARDATA(DatumGetPointer(lobound)),
4022-
*histr = (unsigned char *) VARDATA(DatumGetPointer(hibound));
4050+
unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep);
4051+
unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp);
4052+
unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp);
40234053

40244054
/*
40254055
* Assume bytea data is uniformly distributed across all byte values.
@@ -4086,9 +4116,12 @@ convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
40864116

40874117
/*
40884118
* Do convert_to_scalar()'s work for any timevalue data type.
4119+
*
4120+
* On failure (e.g., unsupported typid), set *failure to true;
4121+
* otherwise, that variable is not changed.
40894122
*/
40904123
static double
4091-
convert_timevalue_to_scalar(Datum value, Oid typid)
4124+
convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure)
40924125
{
40934126
switch (typid)
40944127
{
@@ -4152,11 +4185,7 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
41524185
}
41534186
}
41544187

4155-
/*
4156-
* Can't get here unless someone tries to use scalarltsel/scalargtsel on
4157-
* an operator with one timevalue and one non-timevalue operand.
4158-
*/
4159-
elog(ERROR, "unsupported type: %u", typid);
4188+
*failure = true;
41604189
return 0;
41614190
}
41624191

src/include/utils/builtins.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -926,7 +926,7 @@ extern Datum network_host(PG_FUNCTION_ARGS);
926926
extern Datum network_show(PG_FUNCTION_ARGS);
927927
extern Datum inet_abbrev(PG_FUNCTION_ARGS);
928928
extern Datum cidr_abbrev(PG_FUNCTION_ARGS);
929-
extern double convert_network_to_scalar(Datum value, Oid typid);
929+
extern double convert_network_to_scalar(Datum value, Oid typid, bool *failure);
930930
extern Datum inet_to_cidr(PG_FUNCTION_ARGS);
931931
extern Datum inet_set_masklen(PG_FUNCTION_ARGS);
932932
extern Datum cidr_set_masklen(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)