|
23 | 23 | /*
|
24 | 24 | * For encoding purposes, item pointers are represented as 64-bit unsigned
|
25 | 25 | * integers. The lowest 11 bits represent the offset number, and the next
|
26 |
| - * lowest 32 bits are the block number. That leaves 17 bits unused, ie. |
| 26 | + * lowest 32 bits are the block number. That leaves 21 bits unused, i.e. |
27 | 27 | * only 43 low bits are used.
|
28 | 28 | *
|
| 29 | + * 11 bits is enough for the offset number, because MaxHeapTuplesPerPage < |
| 30 | + * 2^11 on all supported block sizes. We are frugal with the bits, because |
| 31 | + * smaller integers use fewer bytes in the varbyte encoding, saving disk |
| 32 | + * space. (If we get a new table AM in the future that wants to use the full |
| 33 | + * range of possible offset numbers, we'll need to change this.) |
| 34 | + * |
29 | 35 | * These 43-bit integers are encoded using varbyte encoding. In each byte,
|
30 | 36 | * the 7 low bits contain data, while the highest bit is a continuation bit.
|
31 | 37 | * When the continuation bit is set, the next byte is part of the same
|
32 |
| - * integer, otherwise this is the last byte of this integer. 43 bits fit |
33 |
| - * conveniently in at most 6 bytes when varbyte encoded (the 6th byte does |
34 |
| - * not need a continuation bit, because we know the max size to be 43 bits): |
| 38 | + * integer, otherwise this is the last byte of this integer. 43 bits need |
| 39 | + * at most 7 bytes in this encoding: |
35 | 40 | *
|
36 | 41 | * 0XXXXXXX
|
37 | 42 | * 1XXXXXXX 0XXXXYYY
|
38 | 43 | * 1XXXXXXX 1XXXXYYY 0YYYYYYY
|
39 | 44 | * 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
|
40 | 45 | * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
|
41 |
| - * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY YYYYYYYY |
| 46 | + * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY |
| 47 | + * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY |
42 | 48 | *
|
43 | 49 | * X = bits used for offset number
|
44 | 50 | * Y = bits used for block number
|
| 51 | + * u = unused bit |
45 | 52 | *
|
46 | 53 | * The bytes are in stored in little-endian order.
|
47 | 54 | *
|
|
73 | 80 | */
|
74 | 81 | #define MaxHeapTuplesPerPageBits 11
|
75 | 82 |
|
| 83 | +/* Max. number of bytes needed to encode the largest supported integer. */ |
| 84 | +#define MaxBytesPerInteger 7 |
| 85 | + |
76 | 86 | static inline uint64
|
77 | 87 | itemptr_to_uint64(const ItemPointer iptr)
|
78 | 88 | {
|
@@ -130,33 +140,40 @@ decode_varbyte(unsigned char **ptr)
|
130 | 140 | unsigned char *p = *ptr;
|
131 | 141 | uint64 c;
|
132 | 142 |
|
| 143 | + /* 1st byte */ |
133 | 144 | c = *(p++);
|
134 | 145 | val = c & 0x7F;
|
135 | 146 | if (c & 0x80)
|
136 | 147 | {
|
| 148 | + /* 2nd byte */ |
137 | 149 | c = *(p++);
|
138 | 150 | val |= (c & 0x7F) << 7;
|
139 | 151 | if (c & 0x80)
|
140 | 152 | {
|
| 153 | + /* 3rd byte */ |
141 | 154 | c = *(p++);
|
142 | 155 | val |= (c & 0x7F) << 14;
|
143 | 156 | if (c & 0x80)
|
144 | 157 | {
|
| 158 | + /* 4th byte */ |
145 | 159 | c = *(p++);
|
146 | 160 | val |= (c & 0x7F) << 21;
|
147 | 161 | if (c & 0x80)
|
148 | 162 | {
|
| 163 | + /* 5th byte */ |
149 | 164 | c = *(p++);
|
150 | 165 | val |= (c & 0x7F) << 28;
|
151 | 166 | if (c & 0x80)
|
152 | 167 | {
|
| 168 | + /* 6th byte */ |
153 | 169 | c = *(p++);
|
154 | 170 | val |= (c & 0x7F) << 35;
|
155 | 171 | if (c & 0x80)
|
156 | 172 | {
|
157 |
| - /* last byte, no continuation bit */ |
| 173 | + /* 7th byte, should not have continuation bit */ |
158 | 174 | c = *(p++);
|
159 | 175 | val |= c << 42;
|
| 176 | + Assert((c & 0x80) == 0); |
160 | 177 | }
|
161 | 178 | }
|
162 | 179 | }
|
@@ -212,15 +229,15 @@ ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
|
212 | 229 |
|
213 | 230 | Assert(val > prev);
|
214 | 231 |
|
215 |
| - if (endptr - ptr >= 6) |
| 232 | + if (endptr - ptr >= MaxBytesPerInteger) |
216 | 233 | encode_varbyte(delta, &ptr);
|
217 | 234 | else
|
218 | 235 | {
|
219 | 236 | /*
|
220 |
| - * There are less than 6 bytes left. Have to check if the next |
| 237 | + * There are less than 7 bytes left. Have to check if the next |
221 | 238 | * item fits in that space before writing it out.
|
222 | 239 | */
|
223 |
| - unsigned char buf[6]; |
| 240 | + unsigned char buf[MaxBytesPerInteger]; |
224 | 241 | unsigned char *p = buf;
|
225 | 242 |
|
226 | 243 | encode_varbyte(delta, &p);
|
|
0 commit comments