Skip to content

Commit 8a17afe

Browse files
committed
Fix failure to zero-pad the result of bitshiftright().
If the bitstring length is not a multiple of 8, we'd shift the rightmost bits into the pad space, which must be zeroes --- bit_cmp, for one, depends on that. This'd lead to the result failing to compare equal to what it should compare equal to, as reported in bug #16013 from Daryl Waycott. This is, if memory serves, not the first such bug in the bitstring functions. In hopes of making it the last one, do a bit more work than minimally necessary to fix the bug: * Add assertion checks to bit_out() and varbit_out() to complain if they are given incorrectly-padded input. This will improve the odds that manual testing of any new patch finds problems. * Encapsulate the padding-related logic in macros to make it easier to use. Also, remove unnecessary padding logic from bit_or() and bitxor(). Somebody had already noted that we need not re-pad the result of bit_and() since the inputs are required to be the same length, but failed to extrapolate that to the other two. Also, move a comment block that once was near the head of varbit.c (but people kept putting other stuff in front of it), to put it in the header block. Note for the release notes: if anyone has inconsistent data as a result of saving the output of bitshiftright() in a table, it's possible to fix it with something like UPDATE mytab SET bitcol = ~(~bitcol) WHERE bitcol != ~(~bitcol); This has been broken since day one, so back-patch to all supported branches. Discussion: https://postgr.es/m/16013-c2765b6996aacae9@postgresql.org
1 parent 29e47f8 commit 8a17afe

File tree

4 files changed

+154
-89
lines changed

4 files changed

+154
-89
lines changed

src/backend/utils/adt/varbit.c

Lines changed: 77 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,21 @@
33
* varbit.c
44
* Functions for the SQL datatypes BIT() and BIT VARYING().
55
*
6+
* The data structure contains the following elements:
7+
* header -- length of the whole data structure (incl header)
8+
* in bytes (as with all varying length datatypes)
9+
* data section -- private data section for the bits data structures
10+
* bitlength -- length of the bit string in bits
11+
* bitdata -- bit string, most significant byte first
12+
*
13+
* The length of the bitdata vector should always be exactly as many
14+
* bytes as are needed for the given bitlength. If the bitlength is
15+
* not a multiple of 8, the extra low-order padding bits of the last
16+
* byte must be zeroes.
17+
*
18+
* attypmod is defined as the length of the bit string in bits, or for
19+
* varying bits the maximum length.
20+
*
621
* Code originally contributed by Adriaan Joubert.
722
*
823
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
@@ -24,6 +39,40 @@
2439

2540
#define HEXDIG(z) ((z)<10 ? ((z)+'0') : ((z)-10+'A'))
2641

42+
/* Mask off any bits that should be zero in the last byte of a bitstring */
43+
#define VARBIT_PAD(vb) \
44+
do { \
45+
int32 pad_ = VARBITPAD(vb); \
46+
Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
47+
if (pad_ > 0) \
48+
*(VARBITS(vb) + VARBITBYTES(vb) - 1) &= BITMASK << pad_; \
49+
} while (0)
50+
51+
/*
52+
* Many functions work byte-by-byte, so they have a pointer handy to the
53+
* last-plus-one byte, which saves a cycle or two.
54+
*/
55+
#define VARBIT_PAD_LAST(vb, ptr) \
56+
do { \
57+
int32 pad_ = VARBITPAD(vb); \
58+
Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
59+
if (pad_ > 0) \
60+
*((ptr) - 1) &= BITMASK << pad_; \
61+
} while (0)
62+
63+
/* Assert proper padding of a bitstring */
64+
#ifdef USE_ASSERT_CHECKING
65+
#define VARBIT_CORRECTLY_PADDED(vb) \
66+
do { \
67+
int32 pad_ = VARBITPAD(vb); \
68+
Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
69+
Assert(pad_ == 0 || \
70+
(*(VARBITS(vb) + VARBITBYTES(vb) - 1) & ~(BITMASK << pad_)) == 0); \
71+
} while (0)
72+
#else
73+
#define VARBIT_CORRECTLY_PADDED(vb) ((void) 0)
74+
#endif
75+
2776
static VarBit *bit_catenate(VarBit *arg1, VarBit *arg2);
2877
static VarBit *bitsubstring(VarBit *arg, int32 s, int32 l,
2978
bool length_not_specified);
@@ -84,24 +133,6 @@ anybit_typmodout(int32 typmod)
84133
}
85134

86135

87-
/*----------
88-
* attypmod -- contains the length of the bit string in bits, or for
89-
* varying bits the maximum length.
90-
*
91-
* The data structure contains the following elements:
92-
* header -- length of the whole data structure (incl header)
93-
* in bytes. (as with all varying length datatypes)
94-
* data section -- private data section for the bits data structures
95-
* bitlength -- length of the bit string in bits
96-
* bitdata -- bit string, most significant byte first
97-
*
98-
* The length of the bitdata vector should always be exactly as many
99-
* bytes as are needed for the given bitlength. If the bitlength is
100-
* not a multiple of 8, the extra low-order padding bits of the last
101-
* byte must be zeroes.
102-
*----------
103-
*/
104-
105136
/*
106137
* bit_in -
107138
* converts a char string to the internal representation of a bitstring.
@@ -261,6 +292,9 @@ bit_out(PG_FUNCTION_ARGS)
261292
len,
262293
bitlen;
263294

295+
/* Assertion to help catch any bit functions that don't pad correctly */
296+
VARBIT_CORRECTLY_PADDED(s);
297+
264298
bitlen = VARBITLEN(s);
265299
len = (bitlen + 3) / 4;
266300
result = (char *) palloc(len + 2);
@@ -301,8 +335,6 @@ bit_recv(PG_FUNCTION_ARGS)
301335
VarBit *result;
302336
int len,
303337
bitlen;
304-
int ipad;
305-
bits8 mask;
306338

307339
bitlen = pq_getmsgint(buf, sizeof(int32));
308340
if (bitlen < 0 || bitlen > VARBITMAXLEN)
@@ -327,13 +359,8 @@ bit_recv(PG_FUNCTION_ARGS)
327359

328360
pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result));
329361

330-
/* Make sure last byte is zero-padded if needed */
331-
ipad = VARBITPAD(result);
332-
if (ipad > 0)
333-
{
334-
mask = BITMASK << ipad;
335-
*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
336-
}
362+
/* Make sure last byte is correctly zero-padded */
363+
VARBIT_PAD(result);
337364

338365
PG_RETURN_VARBIT_P(result);
339366
}
@@ -364,8 +391,6 @@ bit(PG_FUNCTION_ARGS)
364391
bool isExplicit = PG_GETARG_BOOL(2);
365392
VarBit *result;
366393
int rlen;
367-
int ipad;
368-
bits8 mask;
369394

370395
/* No work if typmod is invalid or supplied data matches it already */
371396
if (len <= 0 || len > VARBITMAXLEN || len == VARBITLEN(arg))
@@ -391,12 +416,7 @@ bit(PG_FUNCTION_ARGS)
391416
* if source data was shorter than target length (we assume the last byte
392417
* of the source data was itself correctly zero-padded).
393418
*/
394-
ipad = VARBITPAD(result);
395-
if (ipad > 0)
396-
{
397-
mask = BITMASK << ipad;
398-
*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
399-
}
419+
VARBIT_PAD(result);
400420

401421
PG_RETURN_VARBIT_P(result);
402422
}
@@ -571,6 +591,9 @@ varbit_out(PG_FUNCTION_ARGS)
571591
k,
572592
len;
573593

594+
/* Assertion to help catch any bit functions that don't pad correctly */
595+
VARBIT_CORRECTLY_PADDED(s);
596+
574597
len = VARBITLEN(s);
575598
result = (char *) palloc(len + 1);
576599
sp = VARBITS(s);
@@ -617,8 +640,6 @@ varbit_recv(PG_FUNCTION_ARGS)
617640
VarBit *result;
618641
int len,
619642
bitlen;
620-
int ipad;
621-
bits8 mask;
622643

623644
bitlen = pq_getmsgint(buf, sizeof(int32));
624645
if (bitlen < 0 || bitlen > VARBITMAXLEN)
@@ -643,13 +664,8 @@ varbit_recv(PG_FUNCTION_ARGS)
643664

644665
pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result));
645666

646-
/* Make sure last byte is zero-padded if needed */
647-
ipad = VARBITPAD(result);
648-
if (ipad > 0)
649-
{
650-
mask = BITMASK << ipad;
651-
*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
652-
}
667+
/* Make sure last byte is correctly zero-padded */
668+
VARBIT_PAD(result);
653669

654670
PG_RETURN_VARBIT_P(result);
655671
}
@@ -718,8 +734,6 @@ varbit(PG_FUNCTION_ARGS)
718734
bool isExplicit = PG_GETARG_BOOL(2);
719735
VarBit *result;
720736
int rlen;
721-
int ipad;
722-
bits8 mask;
723737

724738
/* No work if typmod is invalid or supplied data matches it already */
725739
if (len <= 0 || len >= VARBITLEN(arg))
@@ -738,13 +752,8 @@ varbit(PG_FUNCTION_ARGS)
738752

739753
memcpy(VARBITS(result), VARBITS(arg), VARBITBYTES(result));
740754

741-
/* Make sure last byte is zero-padded if needed */
742-
ipad = VARBITPAD(result);
743-
if (ipad > 0)
744-
{
745-
mask = BITMASK << ipad;
746-
*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
747-
}
755+
/* Make sure last byte is correctly zero-padded */
756+
VARBIT_PAD(result);
748757

749758
PG_RETURN_VARBIT_P(result);
750759
}
@@ -1002,6 +1011,8 @@ bit_catenate(VarBit *arg1, VarBit *arg2)
10021011
}
10031012
}
10041013

1014+
/* The pad bits should be already zero at this point */
1015+
10051016
return result;
10061017
}
10071018

@@ -1035,14 +1046,12 @@ bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified)
10351046
int bitlen,
10361047
rbitlen,
10371048
len,
1038-
ipad = 0,
10391049
ishift,
10401050
i;
10411051
int e,
10421052
s1,
10431053
e1;
1044-
bits8 mask,
1045-
*r,
1054+
bits8 *r,
10461055
*ps;
10471056

10481057
bitlen = VARBITLEN(arg);
@@ -1107,13 +1116,9 @@ bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified)
11071116
r++;
11081117
}
11091118
}
1110-
/* Do we need to pad at the end? */
1111-
ipad = VARBITPAD(result);
1112-
if (ipad > 0)
1113-
{
1114-
mask = BITMASK << ipad;
1115-
*(VARBITS(result) + len - 1) &= mask;
1116-
}
1119+
1120+
/* Make sure last byte is correctly zero-padded */
1121+
VARBIT_PAD(result);
11171122
}
11181123

11191124
return result;
@@ -1236,7 +1241,7 @@ bit_and(PG_FUNCTION_ARGS)
12361241
for (i = 0; i < VARBITBYTES(arg1); i++)
12371242
*r++ = *p1++ & *p2++;
12381243

1239-
/* Padding is not needed as & of 0 pad is 0 */
1244+
/* Padding is not needed as & of 0 pads is 0 */
12401245

12411246
PG_RETURN_VARBIT_P(result);
12421247
}
@@ -1258,7 +1263,6 @@ bit_or(PG_FUNCTION_ARGS)
12581263
bits8 *p1,
12591264
*p2,
12601265
*r;
1261-
bits8 mask;
12621266

12631267
bitlen1 = VARBITLEN(arg1);
12641268
bitlen2 = VARBITLEN(arg2);
@@ -1277,13 +1281,7 @@ bit_or(PG_FUNCTION_ARGS)
12771281
for (i = 0; i < VARBITBYTES(arg1); i++)
12781282
*r++ = *p1++ | *p2++;
12791283

1280-
/* Pad the result */
1281-
mask = BITMASK << VARBITPAD(result);
1282-
if (mask)
1283-
{
1284-
r--;
1285-
*r &= mask;
1286-
}
1284+
/* Padding is not needed as | of 0 pads is 0 */
12871285

12881286
PG_RETURN_VARBIT_P(result);
12891287
}
@@ -1305,7 +1303,6 @@ bitxor(PG_FUNCTION_ARGS)
13051303
bits8 *p1,
13061304
*p2,
13071305
*r;
1308-
bits8 mask;
13091306

13101307
bitlen1 = VARBITLEN(arg1);
13111308
bitlen2 = VARBITLEN(arg2);
@@ -1325,13 +1322,7 @@ bitxor(PG_FUNCTION_ARGS)
13251322
for (i = 0; i < VARBITBYTES(arg1); i++)
13261323
*r++ = *p1++ ^ *p2++;
13271324

1328-
/* Pad the result */
1329-
mask = BITMASK << VARBITPAD(result);
1330-
if (mask)
1331-
{
1332-
r--;
1333-
*r &= mask;
1334-
}
1325+
/* Padding is not needed as ^ of 0 pads is 0 */
13351326

13361327
PG_RETURN_VARBIT_P(result);
13371328
}
@@ -1347,7 +1338,6 @@ bitnot(PG_FUNCTION_ARGS)
13471338
VarBit *result;
13481339
bits8 *p,
13491340
*r;
1350-
bits8 mask;
13511341

13521342
result = (VarBit *) palloc(VARSIZE(arg));
13531343
SET_VARSIZE(result, VARSIZE(arg));
@@ -1358,13 +1348,8 @@ bitnot(PG_FUNCTION_ARGS)
13581348
for (; p < VARBITEND(arg); p++)
13591349
*r++ = ~*p;
13601350

1361-
/* Pad the result */
1362-
mask = BITMASK << VARBITPAD(result);
1363-
if (mask)
1364-
{
1365-
r--;
1366-
*r &= mask;
1367-
}
1351+
/* Must zero-pad the result, because extra bits are surely 1's here */
1352+
VARBIT_PAD_LAST(result, r);
13681353

13691354
PG_RETURN_VARBIT_P(result);
13701355
}
@@ -1431,6 +1416,8 @@ bitshiftleft(PG_FUNCTION_ARGS)
14311416
*r = 0;
14321417
}
14331418

1419+
/* The pad bits should be already zero at this point */
1420+
14341421
PG_RETURN_VARBIT_P(result);
14351422
}
14361423

@@ -1497,6 +1484,8 @@ bitshiftright(PG_FUNCTION_ARGS)
14971484
if ((++r) < VARBITEND(result))
14981485
*r = (*p << (BITS_PER_BYTE - ishift)) & BITMASK;
14991486
}
1487+
/* We may have shifted 1's into the pad bits, so fix that */
1488+
VARBIT_PAD_LAST(result, r);
15001489
}
15011490

15021491
PG_RETURN_VARBIT_P(result);

src/include/utils/varbit.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@
2121

2222
/*
2323
* Modeled on struct varlena from postgres.h, but data type is bits8.
24+
*
25+
* Caution: if bit_len is not a multiple of BITS_PER_BYTE, the low-order
26+
* bits of the last byte of bit_dat[] are unused and MUST be zeroes.
27+
* (This allows bit_cmp() to not bother masking the last byte.)
28+
* Also, there should not be any excess bytes counted in the header length.
2429
*/
2530
typedef struct
2631
{

0 commit comments

Comments
 (0)