|
23 | 23 | /*
|
24 | 24 | * For encoding purposes, item pointers are represented as 64-bit unsigned
|
25 | 25 | * integers. The lowest 11 bits represent the offset number, and the next
|
26 |
| - * lowest 32 bits are the block number. That leaves 17 bits unused, i.e. |
| 26 | + * lowest 32 bits are the block number. That leaves 21 bits unused, i.e. |
27 | 27 | * only 43 low bits are used.
|
28 | 28 | *
|
| 29 | + * 11 bits is enough for the offset number, because MaxHeapTuplesPerPage < |
| 30 | + * 2^11 on all supported block sizes. We are frugal with the bits, because |
| 31 | + * smaller integers use fewer bytes in the varbyte encoding, saving disk |
| 32 | + * space. (If we get a new table AM in the future that wants to use the full |
| 33 | + * range of possible offset numbers, we'll need to change this.) |
| 34 | + * |
29 | 35 | * These 43-bit integers are encoded using varbyte encoding. In each byte,
|
30 | 36 | * the 7 low bits contain data, while the highest bit is a continuation bit.
|
31 | 37 | * When the continuation bit is set, the next byte is part of the same
|
32 |
| - * integer, otherwise this is the last byte of this integer. 43 bits fit |
33 |
| - * conveniently in at most 6 bytes when varbyte encoded (the 6th byte does |
34 |
| - * not need a continuation bit, because we know the max size to be 43 bits): |
| 38 | + * integer, otherwise this is the last byte of this integer. 43 bits need |
| 39 | + * at most 7 bytes in this encoding: |
35 | 40 | *
|
36 | 41 | * 0XXXXXXX
|
37 | 42 | * 1XXXXXXX 0XXXXYYY
|
38 | 43 | * 1XXXXXXX 1XXXXYYY 0YYYYYYY
|
39 | 44 | * 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
|
40 | 45 | * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
|
41 |
| - * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY YYYYYYYY |
| 46 | + * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY |
| 47 | + * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY |
42 | 48 | *
|
43 | 49 | * X = bits used for offset number
|
44 | 50 | * Y = bits used for block number
|
| 51 | + * u = unused bit |
45 | 52 | *
|
46 | 53 | * The bytes are in stored in little-endian order.
|
47 | 54 | *
|
|
73 | 80 | */
|
74 | 81 | #define MaxHeapTuplesPerPageBits 11
|
75 | 82 |
|
| 83 | +/* Max. number of bytes needed to encode the largest supported integer. */ |
| 84 | +#define MaxBytesPerInteger 7 |
| 85 | + |
76 | 86 | static inline uint64
|
77 | 87 | itemptr_to_uint64(const ItemPointer iptr)
|
78 | 88 | {
|
@@ -126,33 +136,40 @@ decode_varbyte(unsigned char **ptr)
|
126 | 136 | unsigned char *p = *ptr;
|
127 | 137 | uint64 c;
|
128 | 138 |
|
| 139 | + /* 1st byte */ |
129 | 140 | c = *(p++);
|
130 | 141 | val = c & 0x7F;
|
131 | 142 | if (c & 0x80)
|
132 | 143 | {
|
| 144 | + /* 2nd byte */ |
133 | 145 | c = *(p++);
|
134 | 146 | val |= (c & 0x7F) << 7;
|
135 | 147 | if (c & 0x80)
|
136 | 148 | {
|
| 149 | + /* 3rd byte */ |
137 | 150 | c = *(p++);
|
138 | 151 | val |= (c & 0x7F) << 14;
|
139 | 152 | if (c & 0x80)
|
140 | 153 | {
|
| 154 | + /* 4th byte */ |
141 | 155 | c = *(p++);
|
142 | 156 | val |= (c & 0x7F) << 21;
|
143 | 157 | if (c & 0x80)
|
144 | 158 | {
|
| 159 | + /* 5th byte */ |
145 | 160 | c = *(p++);
|
146 | 161 | val |= (c & 0x7F) << 28;
|
147 | 162 | if (c & 0x80)
|
148 | 163 | {
|
| 164 | + /* 6th byte */ |
149 | 165 | c = *(p++);
|
150 | 166 | val |= (c & 0x7F) << 35;
|
151 | 167 | if (c & 0x80)
|
152 | 168 | {
|
153 |
| - /* last byte, no continuation bit */ |
| 169 | + /* 7th byte, should not have continuation bit */ |
154 | 170 | c = *(p++);
|
155 | 171 | val |= c << 42;
|
| 172 | + Assert((c & 0x80) == 0); |
156 | 173 | }
|
157 | 174 | }
|
158 | 175 | }
|
@@ -208,15 +225,15 @@ ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
|
208 | 225 |
|
209 | 226 | Assert(val > prev);
|
210 | 227 |
|
211 |
| - if (endptr - ptr >= 6) |
| 228 | + if (endptr - ptr >= MaxBytesPerInteger) |
212 | 229 | encode_varbyte(delta, &ptr);
|
213 | 230 | else
|
214 | 231 | {
|
215 | 232 | /*
|
216 |
| - * There are less than 6 bytes left. Have to check if the next |
| 233 | + * There are less than 7 bytes left. Have to check if the next |
217 | 234 | * item fits in that space before writing it out.
|
218 | 235 | */
|
219 |
| - unsigned char buf[6]; |
| 236 | + unsigned char buf[MaxBytesPerInteger]; |
220 | 237 | unsigned char *p = buf;
|
221 | 238 |
|
222 | 239 | encode_varbyte(delta, &p);
|
|
0 commit comments