Skip to content

Commit 3309731

Browse files
committed
Missing file
1 parent 8fb66d1 commit 3309731

File tree

1 file changed

+326
-0
lines changed

1 file changed

+326
-0
lines changed
Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
/**
2+
* This code is released under the
3+
* Apache License Version 2.0 http://www.apache.org/licenses/.
4+
*
5+
* (c) Daniel Lemire, http://lemire.me/en/
6+
*/
7+
package me.lemire.integercompression;
8+
9+
import java.nio.ByteBuffer;
10+
import java.util.Arrays;
11+
12+
/**
13+
* This class is similar to FastPFOR but uses a small block size.
14+
*
15+
* Note that this does not use differential coding: if you are working on sorted
16+
* lists, use IntegratedFastPFOR instead.
17+
*
18+
* For multi-threaded applications, each thread should use its own FastPFOR
19+
* object.
20+
*
21+
* @author Daniel Lemire
22+
*/
23+
public final class FastPFOR128 implements IntegerCODEC,SkippableIntegerCODEC {
24+
final static int OVERHEAD_OF_EACH_EXCEPT = 8;
25+
/**
26+
*
27+
*/
28+
public final static int DEFAULT_PAGE_SIZE = 65536;
29+
/**
30+
*
31+
*/
32+
public final static int DEFAULT_BLOCK_SIZE = 256;
33+
34+
final int blockSize = DEFAULT_BLOCK_SIZE;
35+
final int pageSize;
36+
final int[][] dataTobePacked = new int[33][];
37+
final ByteBuffer byteContainer;
38+
39+
// Working area for compress and uncompress.
40+
int[] dataPointers;
41+
int[] freqs;
42+
final int[] bestbbestcexceptmaxb = new int[3];
43+
44+
/**
45+
* Construct the FastPFOR CODEC.
46+
*
47+
* @param pagesize
48+
* the desired page size (recommended value is FastPFOR.DEFAULT_PAGE_SIZE)
49+
*/
50+
public FastPFOR128(int pagesize) {
51+
pageSize = pagesize;
52+
// Initiate arrrays.
53+
byteContainer = ByteBuffer.allocateDirect(3 * pageSize
54+
/ blockSize + pageSize);
55+
for (int k = 1; k < dataTobePacked.length; ++k)
56+
dataTobePacked[k] = new int[pageSize / 32 * 4]; // heuristic
57+
}
58+
/**
59+
* Construct the fastPFOR CODEC with default parameters.
60+
*/
61+
public FastPFOR128() {
62+
this(DEFAULT_PAGE_SIZE);
63+
}
64+
65+
/**
66+
* Compress data in blocks of BLOCK_SIZE integers (if fewer than BLOCK_SIZE integers
67+
* are provided, nothing is done).
68+
*
69+
* @see IntegerCODEC#compress(int[], IntWrapper, int, int[], IntWrapper)
70+
*/
71+
@Override
72+
public void headlessCompress(int[] in, IntWrapper inpos, int inlength,
73+
int[] out, IntWrapper outpos) {
74+
inlength = Util.greatestMultiple(inlength, blockSize);
75+
if (inlength == 0)
76+
return;
77+
78+
79+
// Allocate memory for working area.
80+
dataPointers = new int[33];
81+
freqs = new int[33];
82+
83+
final int finalinpos = inpos.get() + inlength;
84+
while (inpos.get() != finalinpos) {
85+
int thissize = Math.min(pageSize,
86+
finalinpos - inpos.get());
87+
encodePage(in, inpos, thissize, out, outpos);
88+
}
89+
90+
dataPointers = null;
91+
freqs = null;
92+
}
93+
94+
private void getBestBFromData(int[] in, int pos) {
95+
Arrays.fill(freqs, 0);
96+
for (int k = pos, k_end = pos + blockSize; k < k_end; ++k) {
97+
freqs[Util.bits(in[k])]++;
98+
}
99+
bestbbestcexceptmaxb[0] = 32;
100+
while (freqs[bestbbestcexceptmaxb[0]] == 0)
101+
bestbbestcexceptmaxb[0]--;
102+
bestbbestcexceptmaxb[2] = bestbbestcexceptmaxb[0];
103+
int bestcost = bestbbestcexceptmaxb[0] * blockSize;
104+
int cexcept = 0;
105+
bestbbestcexceptmaxb[1] = cexcept;
106+
for (int b = bestbbestcexceptmaxb[0] - 1; b >= 0; --b) {
107+
cexcept += freqs[b + 1];
108+
if (cexcept == blockSize)
109+
break;
110+
// the extra 8 is the cost of storing maxbits
111+
int thiscost = cexcept * OVERHEAD_OF_EACH_EXCEPT
112+
+ cexcept * (bestbbestcexceptmaxb[2] - b) + b
113+
* blockSize + 8;
114+
if(bestbbestcexceptmaxb[2] - b == 1) thiscost -= cexcept;
115+
if (thiscost < bestcost) {
116+
bestcost = thiscost;
117+
bestbbestcexceptmaxb[0] = b;
118+
bestbbestcexceptmaxb[1] = cexcept;
119+
}
120+
}
121+
}
122+
123+
private void encodePage(int[] in, IntWrapper inpos, int thissize,
124+
int[] out, IntWrapper outpos) {
125+
final int headerpos = outpos.get();
126+
outpos.increment();
127+
int tmpoutpos = outpos.get();
128+
129+
// Clear working area.
130+
Arrays.fill(dataPointers, 0);
131+
byteContainer.clear();
132+
133+
int tmpinpos = inpos.get();
134+
for (final int finalinpos = tmpinpos + thissize - blockSize; tmpinpos <= finalinpos; tmpinpos += blockSize) {
135+
getBestBFromData(in, tmpinpos);
136+
final int tmpbestb = bestbbestcexceptmaxb[0];
137+
byteContainer.put((byte)bestbbestcexceptmaxb[0]);
138+
byteContainer.put((byte)bestbbestcexceptmaxb[1]);
139+
if (bestbbestcexceptmaxb[1] > 0) {
140+
byteContainer.put((byte)bestbbestcexceptmaxb[2]);
141+
final int index = bestbbestcexceptmaxb[2]
142+
- bestbbestcexceptmaxb[0];
143+
if (dataPointers[index]
144+
+ bestbbestcexceptmaxb[1] >= dataTobePacked[index].length) {
145+
int newsize = 2 * (dataPointers[index] + bestbbestcexceptmaxb[1]);
146+
// make sure it is a multiple of 32
147+
newsize = Util
148+
.greatestMultiple(newsize + 31, 32);
149+
dataTobePacked[index] = Arrays.copyOf(
150+
dataTobePacked[index], newsize);
151+
}
152+
for (int k = 0; k < blockSize; ++k) {
153+
if ((in[k + tmpinpos] >>> bestbbestcexceptmaxb[0]) != 0) {
154+
// we have an exception
155+
byteContainer.put((byte) k);
156+
dataTobePacked[index][dataPointers[index]++] = in[k
157+
+ tmpinpos] >>> tmpbestb;
158+
}
159+
}
160+
161+
}
162+
for (int k = 0; k < blockSize; k += 32) {
163+
BitPacking.fastpack(in, tmpinpos + k, out,
164+
tmpoutpos, tmpbestb);
165+
tmpoutpos += tmpbestb;
166+
}
167+
}
168+
inpos.set(tmpinpos);
169+
out[headerpos] = tmpoutpos - headerpos;
170+
final int bytesize = byteContainer.position();
171+
while ((byteContainer.position() & 3) != 0)
172+
byteContainer.put((byte) 0);
173+
out[tmpoutpos++] = bytesize;
174+
final int howmanyints = byteContainer.position() / 4;
175+
byteContainer.flip();
176+
byteContainer.asIntBuffer().get(out, tmpoutpos, howmanyints);
177+
tmpoutpos += howmanyints;
178+
int bitmap = 0;
179+
for (int k = 2; k <= 32; ++k) {
180+
if (dataPointers[k] != 0)
181+
bitmap |= (1 << (k - 1));
182+
}
183+
out[tmpoutpos++] = bitmap;
184+
185+
for (int k = 2; k <= 32; ++k) {
186+
if (dataPointers[k] != 0) {
187+
out[tmpoutpos++] = dataPointers[k];// size
188+
int j = 0;
189+
for (; j < dataPointers[k]; j += 32) {
190+
BitPacking.fastpack(dataTobePacked[k],
191+
j, out, tmpoutpos, k);
192+
tmpoutpos += k;
193+
}
194+
int overflow = j - dataPointers[k];
195+
tmpoutpos -= overflow * k / 32;
196+
}
197+
}
198+
outpos.set(tmpoutpos);
199+
}
200+
201+
/**
202+
* Uncompress data in blocks of integers. In this particular case,
203+
* the inlength parameter is ignored: it is deduced from the compressed
204+
* data.
205+
*
206+
* @see IntegerCODEC#compress(int[], IntWrapper, int, int[], IntWrapper)
207+
*/
208+
@Override
209+
public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
210+
int[] out, IntWrapper outpos, int mynvalue) {
211+
if (inlength == 0)
212+
return;
213+
mynvalue = Util.greatestMultiple(mynvalue, blockSize);
214+
215+
dataPointers = new int[33];
216+
217+
int finalout = outpos.get() + mynvalue;
218+
while (outpos.get() != finalout) {
219+
int thissize = Math.min(pageSize,
220+
finalout - outpos.get());
221+
decodePage(in, inpos, out, outpos, thissize);
222+
}
223+
224+
dataPointers = null;
225+
}
226+
227+
private void decodePage(int[] in, IntWrapper inpos, int[] out,
228+
IntWrapper outpos, int thissize) {
229+
final int initpos = inpos.get();
230+
final int wheremeta = in[inpos.get()];
231+
inpos.increment();
232+
int inexcept = initpos + wheremeta;
233+
final int bytesize = in[inexcept++];
234+
byteContainer.clear();
235+
byteContainer.asIntBuffer().put(in, inexcept, (bytesize + 3) / 4);
236+
inexcept += (bytesize + 3)/ 4;
237+
238+
final int bitmap = in[inexcept++];
239+
for (int k = 2; k <= 32; ++k) {
240+
if ((bitmap & (1 << (k - 1))) != 0) {
241+
int size = in[inexcept++];
242+
int roundedup = Util
243+
.greatestMultiple(size + 31, 32);
244+
if (dataTobePacked[k].length < roundedup)
245+
dataTobePacked[k] = new int[roundedup];
246+
if(inexcept + roundedup/32*k <= in.length) {
247+
int j = 0;
248+
for (; j < size; j += 32) {
249+
BitPacking.fastunpack(in, inexcept,
250+
dataTobePacked[k], j, k);
251+
inexcept += k;
252+
}
253+
int overflow = j - size;
254+
inexcept -= overflow * k / 32;
255+
} else {
256+
int j = 0;
257+
int[] buf = new int[roundedup/32*k];
258+
int initinexcept = inexcept;
259+
System.arraycopy(in, inexcept, buf, 0, in.length - inexcept);
260+
for (; j < size; j += 32) {
261+
BitPacking.fastunpack(buf, inexcept-initinexcept,
262+
dataTobePacked[k], j, k);
263+
inexcept += k;
264+
}
265+
int overflow = j - size;
266+
inexcept -= overflow * k / 32;
267+
}
268+
}
269+
}
270+
Arrays.fill(dataPointers, 0);
271+
int tmpoutpos = outpos.get();
272+
int tmpinpos = inpos.get();
273+
274+
for (int run = 0, run_end = thissize / blockSize; run < run_end; ++run, tmpoutpos += blockSize) {
275+
final int b = byteContainer.get();
276+
final int cexcept = byteContainer.get() & 0xFF;
277+
for (int k = 0; k < blockSize; k += 32) {
278+
BitPacking.fastunpack(in, tmpinpos, out,
279+
tmpoutpos + k, b);
280+
tmpinpos += b;
281+
}
282+
if (cexcept > 0) {
283+
final int maxbits = byteContainer.get();
284+
final int index = maxbits - b;
285+
if(index == 1) {
286+
for (int k = 0; k < cexcept; ++k) {
287+
final int pos = byteContainer.get() &0xFF;
288+
out[pos + tmpoutpos] |= 1 << b;
289+
}
290+
} else {
291+
for (int k = 0; k < cexcept; ++k) {
292+
final int pos = byteContainer.get() &0xFF;
293+
final int exceptvalue = dataTobePacked[index][dataPointers[index]++];
294+
out[pos + tmpoutpos] |= exceptvalue << b;
295+
}
296+
}
297+
}
298+
}
299+
outpos.set(tmpoutpos);
300+
inpos.set(inexcept);
301+
}
302+
@Override
303+
public void compress(int[] in, IntWrapper inpos, int inlength, int[] out,
304+
IntWrapper outpos) {
305+
inlength = Util.greatestMultiple(inlength, blockSize);
306+
if (inlength == 0)
307+
return;
308+
out[outpos.get()] = inlength;
309+
outpos.increment();
310+
headlessCompress(in, inpos, inlength, out, outpos);
311+
}
312+
313+
@Override
314+
public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
315+
IntWrapper outpos) {
316+
if (inlength == 0)
317+
return;
318+
final int outlength = in[inpos.get()];
319+
inpos.increment();
320+
headlessUncompress(in, inpos, inlength, out, outpos, outlength);
321+
}
322+
@Override
323+
public String toString() {
324+
return this.getClass().getSimpleName();
325+
}
326+
}

0 commit comments

Comments
 (0)