Skip to content

Commit f504baa

Browse files
committed
Make contrib/btree_gist's GiST penalty function a bit saner.
The previous coding supposed that the first differing bytes in two varlena datums must have the same sign difference as their overall comparison result. This is obviously bogus for text strings in non-C locales, and probably wrong for numeric, and even for bytea I think it was wrong on machines where char is signed. When the assumption failed, the function could deliver a zero or negative penalty in situations where such a result is quite ridiculous, leading the core GiST code to make very bad page-split decisions. To fix, take the absolute values of the byte-level differences. Also, switch the code to using unsigned char not just char, so that the behavior will be consistent whether char is signed or not. Per investigation of a trouble report from Tomas Vondra. Back-patch to all supported branches.
1 parent 45b315e commit f504baa

File tree

1 file changed

+10
-14
lines changed

1 file changed

+10
-14
lines changed

contrib/btree_gist/btree_utils_var.c

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,12 @@ gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo)
9292
static int32
9393
gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
9494
{
95-
9695
GBT_VARKEY_R r = gbt_var_key_readable(node);
9796
int32 i = 0;
9897
int32 l = 0;
9998
int32 t1len = VARSIZE(r.lower) - VARHDRSZ;
10099
int32 t2len = VARSIZE(r.upper) - VARHDRSZ;
101100
int32 ml = Min(t1len, t2len);
102-
103101
char *p1 = VARDATA(r.lower);
104102
char *p2 = VARDATA(r.upper);
105103

@@ -110,7 +108,6 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
110108
{
111109
if (tinfo->eml > 1 && l == 0)
112110
{
113-
114111
if ((l = pg_mblen(p1)) != pg_mblen(p2))
115112
{
116113
return i;
@@ -378,13 +375,14 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n, const gbtree
378375
GBT_VARKEY *newe = (GBT_VARKEY *) DatumGetPointer(n->key);
379376
GBT_VARKEY_R ok,
380377
nk;
381-
GBT_VARKEY *tmp = NULL;
382378

383379
*res = 0.0;
384380

385381
nk = gbt_var_key_readable(newe);
386382
if (nk.lower == nk.upper) /* leaf */
387383
{
384+
GBT_VARKEY *tmp;
385+
388386
tmp = gbt_var_leaf2node(newe, tinfo);
389387
if (tmp != newe)
390388
nk = gbt_var_key_readable(tmp);
@@ -401,7 +399,7 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n, const gbtree
401399
))
402400
{
403401
Datum d = PointerGetDatum(0);
404-
double dres = 0.0;
402+
double dres;
405403
int32 ol,
406404
ul;
407405

@@ -412,20 +410,18 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n, const gbtree
412410

413411
if (ul < ol)
414412
{
415-
dres = (ol - ul); /* lost of common prefix len */
413+
dres = (ol - ul); /* reduction of common prefix len */
416414
}
417415
else
418416
{
419417
GBT_VARKEY_R uk = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(d));
418+
unsigned char tmp[4];
420419

421-
char tmp[4];
422-
423-
tmp[0] = ((VARSIZE(ok.lower) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(ok.lower)[ul]);
424-
tmp[1] = ((VARSIZE(uk.lower) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(uk.lower)[ul]);
425-
tmp[2] = ((VARSIZE(ok.upper) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(ok.upper)[ul]);
426-
tmp[3] = ((VARSIZE(uk.upper) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(uk.upper)[ul]);
427-
dres = (tmp[0] - tmp[1]) +
428-
(tmp[3] - tmp[2]);
420+
tmp[0] = (unsigned char) (((VARSIZE(ok.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.lower)[ul]));
421+
tmp[1] = (unsigned char) (((VARSIZE(uk.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.lower)[ul]));
422+
tmp[2] = (unsigned char) (((VARSIZE(ok.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.upper)[ul]));
423+
tmp[3] = (unsigned char) (((VARSIZE(uk.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.upper)[ul]));
424+
dres = Abs(tmp[0] - tmp[1]) + Abs(tmp[3] - tmp[2]);
429425
dres /= 256.0;
430426
}
431427

0 commit comments

Comments
 (0)