Skip to content

Commit cd3dfb0

Browse files
committed
Make contrib/btree_gist's GiST penalty function a bit saner.
The previous coding supposed that the first differing bytes in two varlena datums must have the same sign difference as their overall comparison result. This is obviously bogus for text strings in non-C locales, and probably wrong for numeric, and even for bytea I think it was wrong on machines where char is signed. When the assumption failed, the function could deliver a zero or negative penalty in situations where such a result is quite ridiculous, leading the core GiST code to make very bad page-split decisions. To fix, take the absolute values of the byte-level differences. Also, switch the code to using unsigned char not just char, so that the behavior will be consistent whether char is signed or not. Per investigation of a trouble report from Tomas Vondra. Back-patch to all supported branches.
1 parent 500889a commit cd3dfb0

File tree

1 file changed

+10
-14
lines changed

1 file changed

+10
-14
lines changed

contrib/btree_gist/btree_utils_var.c

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -106,14 +106,12 @@ gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo)
106106
static int32
107107
gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
108108
{
109-
110109
GBT_VARKEY_R r = gbt_var_key_readable(node);
111110
int32 i = 0;
112111
int32 l = 0;
113112
int32 t1len = VARSIZE(r.lower) - VARHDRSZ;
114113
int32 t2len = VARSIZE(r.upper) - VARHDRSZ;
115114
int32 ml = Min(t1len, t2len);
116-
117115
char *p1 = VARDATA(r.lower);
118116
char *p2 = VARDATA(r.upper);
119117

@@ -124,7 +122,6 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
124122
{
125123
if (tinfo->eml > 1 && l == 0)
126124
{
127-
128125
if ((l = pg_mblen(p1)) != pg_mblen(p2))
129126
{
130127
return i;
@@ -367,13 +364,14 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n,
367364
GBT_VARKEY *newe = (GBT_VARKEY *) DatumGetPointer(n->key);
368365
GBT_VARKEY_R ok,
369366
nk;
370-
GBT_VARKEY *tmp = NULL;
371367

372368
*res = 0.0;
373369

374370
nk = gbt_var_key_readable(newe);
375371
if (nk.lower == nk.upper) /* leaf */
376372
{
373+
GBT_VARKEY *tmp;
374+
377375
tmp = gbt_var_leaf2node(newe, tinfo);
378376
if (tmp != newe)
379377
nk = gbt_var_key_readable(tmp);
@@ -388,7 +386,7 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n,
388386
gbt_bytea_pf_match(ok.upper, nk.upper, tinfo))))
389387
{
390388
Datum d = PointerGetDatum(0);
391-
double dres = 0.0;
389+
double dres;
392390
int32 ol,
393391
ul;
394392

@@ -399,20 +397,18 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n,
399397

400398
if (ul < ol)
401399
{
402-
dres = (ol - ul); /* lost of common prefix len */
400+
dres = (ol - ul); /* reduction of common prefix len */
403401
}
404402
else
405403
{
406404
GBT_VARKEY_R uk = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(d));
405+
unsigned char tmp[4];
407406

408-
char tmp[4];
409-
410-
tmp[0] = ((VARSIZE(ok.lower) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(ok.lower)[ul]);
411-
tmp[1] = ((VARSIZE(uk.lower) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(uk.lower)[ul]);
412-
tmp[2] = ((VARSIZE(ok.upper) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(ok.upper)[ul]);
413-
tmp[3] = ((VARSIZE(uk.upper) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(uk.upper)[ul]);
414-
dres = (tmp[0] - tmp[1]) +
415-
(tmp[3] - tmp[2]);
407+
tmp[0] = (unsigned char) (((VARSIZE(ok.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.lower)[ul]));
408+
tmp[1] = (unsigned char) (((VARSIZE(uk.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.lower)[ul]));
409+
tmp[2] = (unsigned char) (((VARSIZE(ok.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.upper)[ul]));
410+
tmp[3] = (unsigned char) (((VARSIZE(uk.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.upper)[ul]));
411+
dres = Abs(tmp[0] - tmp[1]) + Abs(tmp[3] - tmp[2]);
416412
dres /= 256.0;
417413
}
418414

0 commit comments

Comments
 (0)