diff --git a/.gitignore b/.gitignore
index 53960d2..88b3320 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
.classpath
+.settings
.project
*.class
*.csv
diff --git a/pom.xml b/pom.xml
index fc6912b..a960b2c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,6 +48,12 @@
4.13.1
test
+
+ org.roaringbitmap
+ RoaringBitmap
+ 0.9.35
+ test
+
GitHub Issue Tracking
diff --git a/src/main/java/me/lemire/integercompression/ByteIntegerCODEC.java b/src/main/java/me/lemire/integercompression/ByteIntegerCODEC.java
index 47d4f57..6e8f903 100644
--- a/src/main/java/me/lemire/integercompression/ByteIntegerCODEC.java
+++ b/src/main/java/me/lemire/integercompression/ByteIntegerCODEC.java
@@ -18,9 +18,9 @@ public interface ByteIntegerCODEC {
* Compress data from an array to another array.
*
* Both inpos and outpos are modified to represent how much data was
- * read and written to if 12 ints (inlength = 12) are compressed to 3
+ * read and written to. If 12 ints (inlength = 12) are compressed to 3
* bytes, then inpos will be incremented by 12 while outpos will be
- * incremented by 3 we use IntWrapper to pass the values by reference.
+ * incremented by 3. We use IntWrapper to pass the values by reference.
*
* @param in
* input array
diff --git a/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java b/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java
index 4b2f896..ca9d0ad 100644
--- a/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java
+++ b/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java
@@ -105,7 +105,7 @@ public void uncompress(int[] inBuf, IntWrapper inPos, int inLen,
int ip = inPos.get();
int op = outPos.get();
- int vbcNum = 0, vbcShift = 24; // Varialbe Byte Context.
+ int vbcNum = 0, vbcShift = 24; // Variable Byte Context.
final int inPosLast = ip + inLen;
while (ip < inPosLast) {
// Fetch a byte value.
diff --git a/src/main/java/me/lemire/integercompression/IntegerCODEC.java b/src/main/java/me/lemire/integercompression/IntegerCODEC.java
index 7929e48..f2c9c7a 100644
--- a/src/main/java/me/lemire/integercompression/IntegerCODEC.java
+++ b/src/main/java/me/lemire/integercompression/IntegerCODEC.java
@@ -18,9 +18,9 @@ public interface IntegerCODEC {
* Compress data from an array to another array.
*
* Both inpos and outpos are modified to represent how much data was
- * read and written to if 12 ints (inlength = 12) are compressed to 3
+ * read and written to. If 12 ints (inlength = 12) are compressed to 3
* ints, then inpos will be incremented by 12 while outpos will be
- * incremented by 3 we use IntWrapper to pass the values by reference.
+ * incremented by 3. We use IntWrapper to pass the values by reference.
*
* @param in
* input array
diff --git a/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java b/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java
index c10d2f0..4568d71 100644
--- a/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java
+++ b/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java
@@ -10,7 +10,7 @@
/**
* Interface describing a standard CODEC to compress integers. This is a
- * variation on the IntegerCODEC interface meant to be used for random access.
+ * variation on the IntegerCODEC interface meant to be used for head access.
*
* The main difference is that we must specify the number of integers we wish to
* decode. This information should be stored elsewhere.
@@ -25,8 +25,8 @@ public interface SkippableIntegerCODEC {
* Compress data from an array to another array.
*
* Both inpos and outpos are modified to represent how much data was read
- * and written to if 12 ints (inlength = 12) are compressed to 3 ints, then
- * inpos will be incremented by 12 while outpos will be incremented by 3 we
+ * and written to. If 12 ints (inlength = 12) are compressed to 3 ints, then
+ * inpos will be incremented by 12 while outpos will be incremented by 3. We
* use IntWrapper to pass the values by reference.
*
* @param in
diff --git a/src/main/java/me/lemire/integercompression/VariableByte.java b/src/main/java/me/lemire/integercompression/VariableByte.java
index 5b25c43..09e479b 100644
--- a/src/main/java/me/lemire/integercompression/VariableByte.java
+++ b/src/main/java/me/lemire/integercompression/VariableByte.java
@@ -122,8 +122,11 @@ public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
for (int v = 0, shift = 0; p < finalp;) {
val = in[p];
int c = (byte) (val >>> s);
+ // Shift to next byte
s += 8;
+ // Shift to next integer if s==32
p += s>>5;
+ // cycle from 31 to 0
s = s & 31;
v += ((c & 127) << shift);
if ((c & 128) == 128) {
@@ -187,8 +190,11 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
for (int v = 0, shift = 0; tmpoutpos < finaloutpos;) {
val = in[p];
int c = val >>> s;
+ // Shift to next byte
s += 8;
+ // Shift to next integer if s==32
p += s>>5;
+ // cycle from 31 to 0
s = s & 31;
v += ((c & 127) << shift);
if ((c & 128) == 128) {
diff --git a/src/main/java/me/lemire/longcompression/ByteLongCODEC.java b/src/main/java/me/lemire/longcompression/ByteLongCODEC.java
new file mode 100644
index 0000000..e405370
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/ByteLongCODEC.java
@@ -0,0 +1,62 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
+package me.lemire.longcompression;
+
+import me.lemire.integercompression.IntWrapper;
+
+/**
+ * Interface describing a CODEC to compress longs to bytes.
+ *
+ * @author Benoit Lacelle
+ *
+ */
+public interface ByteLongCODEC {
+ /**
+ * Compress data from an array to another array.
+ *
+ * Both inpos and outpos are modified to represent how much data was
+ * read and written to. If 12 longs (inlength = 12) are compressed to 3
+ * bytes, then inpos will be incremented by 12 while outpos will be
+ * incremented by 3. We use IntWrapper to pass the values by reference.
+ *
+ * @param in
+ * input array
+ * @param inpos
+ * location in the input array
+ * @param inlength
+ * how many longs to compress
+ * @param out
+ * output array
+ * @param outpos
+ * where to write in the output array
+ */
+ public void compress(long[] in, IntWrapper inpos, int inlength,
+ byte[] out, IntWrapper outpos);
+
+ /**
+ * Uncompress data from an array to another array.
+ *
+ * Both inpos and outpos parameters are modified to indicate new
+ * positions after read/write.
+ *
+ * @param in
+ * array containing data in compressed form
+ * @param inpos
+ * where to start reading in the array
+ * @param inlength
+ * length of the compressed data (ignored by some
+ * schemes)
+ * @param out
+ * array where to write the compressed output
+ * @param outpos
+ * where to write the compressed output in out
+ */
+ public void uncompress(byte[] in, IntWrapper inpos, int inlength,
+ long[] out, IntWrapper outpos);
+
+}
diff --git a/src/main/java/me/lemire/longcompression/IntegratedLongCODEC.java b/src/main/java/me/lemire/longcompression/IntegratedLongCODEC.java
new file mode 100644
index 0000000..b21ef68
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/IntegratedLongCODEC.java
@@ -0,0 +1,11 @@
+package me.lemire.longcompression;
+
+/**
+ * This is just like LongCODEC, except that it indicates that delta coding is
+ * "integrated", so that you don't need a separate step for delta coding.
+ *
+ * @author Benoit Lacelle
+ */
+public interface IntegratedLongCODEC extends LongCODEC {
+
+}
diff --git a/src/main/java/me/lemire/longcompression/LongAs2IntsCodec.java b/src/main/java/me/lemire/longcompression/LongAs2IntsCodec.java
new file mode 100644
index 0000000..3b2bc76
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/LongAs2IntsCodec.java
@@ -0,0 +1,189 @@
+package me.lemire.longcompression;
+
+import java.util.Arrays;
+
+import me.lemire.integercompression.BinaryPacking;
+import me.lemire.integercompression.Composition;
+import me.lemire.integercompression.IntCompressor;
+import me.lemire.integercompression.IntWrapper;
+import me.lemire.integercompression.IntegerCODEC;
+import me.lemire.integercompression.VariableByte;
+
+/**
+ * A {@link LongCODEC} which split each long in a highpart (32 first bits) and a low part (32 last bits).
+ *
+ * @author Benoit Lacelle
+ *
+ */
+public class LongAs2IntsCodec implements LongCODEC {
+ final IntegerCODEC highPartsCodec;
+ final IntegerCODEC lowPartsCodec;
+
+ public LongAs2IntsCodec(IntegerCODEC highPartsCodec, IntegerCODEC lowPartsCodec) {
+ this.highPartsCodec = highPartsCodec;
+ this.lowPartsCodec = lowPartsCodec;
+ }
+
+ /**
+ * By default, we expect longs to be slightly above Integer.MAX_VALUE. Hence highParts to be small and positive
+ * integers. For lowParts, we rely on {@link IntCompressor} default IntegerCODEC
+ */
+ public LongAs2IntsCodec() {
+ this(new VariableByte(), new Composition(new BinaryPacking(), new VariableByte()));
+ }
+
+ @Override
+ public void compress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) {
+ if (inlength == 0) {
+ return;
+ }
+
+ int[] highParts = new int[inlength];
+ int[] lowParts = new int[inlength];
+
+ for (int i = 0; i < inlength; i++) {
+ int inPosition = inpos.get() + i;
+
+ highParts[i] = RoaringIntPacking.high(in[inPosition]);
+ lowParts[i] = RoaringIntPacking.low(in[inPosition]);
+ }
+
+ // TODO What would be a relevant buffer size?
+ int[] buffer = new int[inlength * 16];
+
+ int outPosition = outpos.get();
+
+ boolean hasLeftover;
+ {
+ // The first integer is reserved to hold the number of compressed ints
+ IntWrapper highPartsOutPosition = new IntWrapper(1);
+
+ highPartsCodec.compress(highParts, new IntWrapper(), inlength, buffer, highPartsOutPosition);
+
+ // Record the compressedHighparts length
+ buffer[0] = highPartsOutPosition.get() - 1;
+
+ for (int i = 0; i < highPartsOutPosition.get() / 2; i++) {
+ long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]);
+ out[outPosition++] = pack;
+ }
+
+ if (1 == highPartsOutPosition.get() % 2) {
+ // Shift the trailing integer as first in the buffer
+ hasLeftover = true;
+ buffer[0] = buffer[highPartsOutPosition.get() - 1];
+ } else {
+ hasLeftover = false;
+ }
+ }
+
+ {
+ // The first integer is reserved to hold the number of compressed ints
+ IntWrapper lowPartsOutPosition = new IntWrapper(1);
+ if (hasLeftover) {
+ // Keep the trailing int from highParts before the reserved int from lowParts compressed length
+ lowPartsOutPosition.set(2);
+ }
+
+ lowPartsCodec.compress(lowParts, new IntWrapper(0), inlength, buffer, lowPartsOutPosition);
+
+ // Record the compressedHighparts length
+ buffer[hasLeftover ? 1 : 0] = lowPartsOutPosition.get() - (hasLeftover ? 2 : 1);
+
+ for (int i = 0; i < lowPartsOutPosition.get() / 2; i++) {
+ long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]);
+ out[outPosition++] = pack;
+ }
+
+ if (1 == lowPartsOutPosition.get() % 2) {
+ // The trailing integer is packed with a 0
+ long pack = RoaringIntPacking.pack(buffer[lowPartsOutPosition.get() - 1], 0);
+ out[outPosition++] = pack;
+ }
+ }
+
+ inpos.add(inlength);
+ outpos.set(outPosition);
+ }
+
+ /**
+ * inlength is ignored by this codec. We may rely on it instead of storing the compressedLowPart length
+ */
+ @Override
+ public void uncompress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) {
+ if (inlength == 0) {
+ return;
+ }
+
+ int longIndex = inpos.get();
+
+ int nbCompressedHighParts = RoaringIntPacking.high(in[longIndex]);
+ int[] compressedHighParts = new int[nbCompressedHighParts];
+
+ // !highPart as we just read the highPart for nbCompressedHighParts
+ boolean highPart = false;
+ for (int i = 0; i < nbCompressedHighParts; i++) {
+ int nextInt;
+ if (highPart) {
+ nextInt = RoaringIntPacking.high(in[longIndex + (i + 1) / 2]);
+ } else {
+ nextInt = RoaringIntPacking.low(in[longIndex + (i + 1) / 2]);
+ }
+ compressedHighParts[i] = nextInt;
+
+ highPart = !highPart;
+ }
+
+ // TODO What would be a relevant buffer size?
+ int[] buffer = new int[inlength * 16];
+
+ IntWrapper highPartsOutPosition = new IntWrapper();
+ highPartsCodec.uncompress(compressedHighParts,
+ new IntWrapper(),
+ compressedHighParts.length,
+ buffer,
+ highPartsOutPosition);
+ int[] highParts = Arrays.copyOf(buffer, highPartsOutPosition.get());
+
+ // +1 as we initially read nbCompressedHighParts
+ int intIndexNbCompressedLowParts = longIndex * 2 + 1 + nbCompressedHighParts;
+ int nbCompressedLowParts;
+ if (highPart) {
+ nbCompressedLowParts = RoaringIntPacking.high(in[intIndexNbCompressedLowParts / 2]);
+ } else {
+ nbCompressedLowParts = RoaringIntPacking.low(in[intIndexNbCompressedLowParts / 2]);
+ }
+ highPart = !highPart;
+
+ int[] compressedLowParts = new int[nbCompressedLowParts];
+ for (int i = 0; i < nbCompressedLowParts; i++) {
+ int nextInt;
+ if (highPart) {
+ nextInt = RoaringIntPacking.high(in[(intIndexNbCompressedLowParts + 1 + i) / 2]);
+ } else {
+ nextInt = RoaringIntPacking.low(in[(intIndexNbCompressedLowParts + 1 + i) / 2]);
+ }
+ compressedLowParts[i] = nextInt;
+
+ highPart = !highPart;
+ }
+
+ IntWrapper lowPartsOutPosition = new IntWrapper();
+ lowPartsCodec.uncompress(compressedLowParts,
+ new IntWrapper(),
+ compressedLowParts.length,
+ buffer,
+ lowPartsOutPosition);
+ int[] lowParts = Arrays.copyOf(buffer, lowPartsOutPosition.get());
+ assert highParts.length == lowParts.length;
+
+ int outposition = outpos.get();
+ for (int i = 0; i < highParts.length; i++) {
+ out[outposition++] = RoaringIntPacking.pack(highParts[i], lowParts[i]);
+ }
+
+ inpos.add(inlength);
+ outpos.set(outposition);
+ }
+
+}
diff --git a/src/main/java/me/lemire/longcompression/LongCODEC.java b/src/main/java/me/lemire/longcompression/LongCODEC.java
new file mode 100644
index 0000000..c0f67b2
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/LongCODEC.java
@@ -0,0 +1,62 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
+package me.lemire.longcompression;
+
+import me.lemire.integercompression.IntWrapper;
+
+/**
+ * Interface describing a standard CODEC to compress longs.
+ *
+ * @author Benoit Lacelle
+ *
+ */
+public interface LongCODEC {
+ /**
+ * Compress data from an array to another array.
+ *
+ * Both inpos and outpos are modified to represent how much data was
+ * read and written to. If 12 longs (inlength = 12) are compressed to 3
+ * longs, then inpos will be incremented by 12 while outpos will be
+ * incremented by 3. We use IntWrapper to pass the values by reference.
+ *
+ * @param in
+ * input array
+ * @param inpos
+ * location in the input array
+ * @param inlength
+ * how many longs to compress
+ * @param out
+ * output array
+ * @param outpos
+ * where to write in the output array
+ */
+ public void compress(long[] in, IntWrapper inpos, int inlength,
+ long[] out, IntWrapper outpos);
+
+ /**
+ * Uncompress data from an array to another array.
+ *
+ * Both inpos and outpos parameters are modified to indicate new
+ * positions after read/write.
+ *
+ * @param in
+ * array containing data in compressed form
+ * @param inpos
+ * where to start reading in the array
+ * @param inlength
+ * length of the compressed data (ignored by some
+ * schemes)
+ * @param out
+ * array where to write the compressed output
+ * @param outpos
+ * where to write the compressed output in out
+ */
+ public void uncompress(long[] in, IntWrapper inpos, int inlength,
+ long[] out, IntWrapper outpos);
+
+}
diff --git a/src/main/java/me/lemire/longcompression/LongComposition.java b/src/main/java/me/lemire/longcompression/LongComposition.java
new file mode 100644
index 0000000..1394a78
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/LongComposition.java
@@ -0,0 +1,71 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.longcompression;
+
+import me.lemire.integercompression.IntWrapper;
+
+/**
+ * Helper class to compose schemes.
+ *
+ * @author Benoit Lacelle
+ */
+public class LongComposition implements LongCODEC {
+ LongCODEC F1, F2;
+
+ /**
+ * Compose a scheme from a first one (f1) and a second one (f2). The
+ * first one is called first and then the second one tries to compress
+ * whatever remains from the first run.
+ *
+ * By convention, the first scheme should be such that if, during
+ * decoding, a 32-bit zero is first encountered, then there is no
+ * output.
+ *
+ * @param f1
+ * first codec
+ * @param f2
+ * second codec
+ */
+ public LongComposition(LongCODEC f1, LongCODEC f2) {
+ F1 = f1;
+ F2 = f2;
+ }
+
+ @Override
+ public void compress(long[] in, IntWrapper inpos, int inlength,
+ long[] out, IntWrapper outpos) {
+ if (inlength == 0) {
+ return;
+ }
+ int inposInit = inpos.get();
+ int outposInit = outpos.get();
+ F1.compress(in, inpos, inlength, out, outpos);
+ if (outpos.get() == outposInit) {
+ out[outposInit] = 0;
+ outpos.increment();
+ }
+ inlength -= inpos.get() - inposInit;
+ F2.compress(in, inpos, inlength, out, outpos);
+ }
+
+ @Override
+ public void uncompress(long[] in, IntWrapper inpos, int inlength,
+ long[] out, IntWrapper outpos) {
+ if (inlength == 0)
+ return;
+ final int init = inpos.get();
+ F1.uncompress(in, inpos, inlength, out, outpos);
+ inlength -= inpos.get() - init;
+ F2.uncompress(in, inpos, inlength, out, outpos);
+ }
+
+ @Override
+ public String toString() {
+ return F1.toString() + " + " + F2.toString();
+ }
+
+}
diff --git a/src/main/java/me/lemire/longcompression/LongJustCopy.java b/src/main/java/me/lemire/longcompression/LongJustCopy.java
new file mode 100644
index 0000000..7a5a67a
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/LongJustCopy.java
@@ -0,0 +1,52 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
+package me.lemire.longcompression;
+
+import me.lemire.integercompression.IntWrapper;
+
+/**
+ * @author Benoit lacelle
+ *
+ */
+public final class LongJustCopy implements LongCODEC, SkippableLongCODEC {
+
+ @Override
+ public void headlessCompress(long[] in, IntWrapper inpos, int inlength,
+ long[] out, IntWrapper outpos) {
+ System.arraycopy(in, inpos.get(), out, outpos.get(), inlength);
+ inpos.add(inlength);
+ outpos.add(inlength);
+ }
+
+ @Override
+ public void uncompress(long[] in, IntWrapper inpos, int inlength,
+ long[] out, IntWrapper outpos) {
+ headlessUncompress(in,inpos,inlength,out,outpos,inlength);
+ }
+
+ @Override
+ public String toString() {
+ return this.getClass().getSimpleName();
+ }
+
+ @Override
+ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength,
+ long[] out, IntWrapper outpos, int num) {
+ System.arraycopy(in, inpos.get(), out, outpos.get(), num);
+ inpos.add(num);
+ outpos.add(num);
+
+ }
+
+ @Override
+ public void compress(long[] in, IntWrapper inpos, int inlength,
+ long[] out, IntWrapper outpos) {
+ headlessCompress(in,inpos,inlength,out,outpos);
+ }
+
+}
diff --git a/src/main/java/me/lemire/longcompression/LongUtil.java b/src/main/java/me/lemire/longcompression/LongUtil.java
new file mode 100644
index 0000000..c06433f
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/LongUtil.java
@@ -0,0 +1,22 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
+package me.lemire.longcompression;
+
+/**
+ * These are unofficial helpers related to long compression
+ *
+ * @author Benoit Lacelle
+ *
+ */
+@Deprecated
+public class LongUtil {
+
+ protected static String longToBinaryWithLeading(long l) {
+ return String.format("%64s", Long.toBinaryString(l)).replace(' ', '0');
+ }
+}
diff --git a/src/main/java/me/lemire/longcompression/LongVariableByte.java b/src/main/java/me/lemire/longcompression/LongVariableByte.java
new file mode 100644
index 0000000..478db20
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/LongVariableByte.java
@@ -0,0 +1,343 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.longcompression;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.LongBuffer;
+
+import me.lemire.integercompression.IntWrapper;
+
+/**
+ * Implementation of variable-byte. For best performance, use it using the
+ * ByteLongCODEC interface.
+ *
+ * Note that this does not use differential coding: if you are working on sorted
+ * lists, you must compute the deltas separately.
+ *
+ * @author Benoit Lacelle
+ */
+public class LongVariableByte implements LongCODEC, ByteLongCODEC, SkippableLongCODEC {
+
+ private static byte extract7bits(int i, long val) {
+ return (byte) ((val >>> (7 * i)) & ((1 << 7) - 1));
+ }
+
+ private static byte extract7bitsmaskless(int i, long val) {
+ return (byte) ((val >>> (7 * i)));
+ }
+ @Override
+ public void compress(long[] in, IntWrapper inpos, int inlength, long[] out,
+ IntWrapper outpos) {
+ headlessCompress(in, inpos, inlength, out, outpos);
+ }
+
+ @Override
+ public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] out,
+ IntWrapper outpos) {
+ if (inlength == 0)
+ return;
+ // Worst case: we write 10 bytes per long, hence 2 longs for a long, hence 16 bytes per long
+ ByteBuffer buf = makeBuffer(inlength * 16);
+ buf.order(ByteOrder.LITTLE_ENDIAN);
+ for (int k = inpos.get(); k < inpos.get() + inlength; ++k) {
+ final long val = in[k];
+ // System.out.println(LongUtil.longToBinaryWithLeading(val));
+ if (val >= 0 && val < (1 << 7)) {
+ buf.put((byte) (val | (1 << 7)));
+ } else if (val >= 0 && val < (1 << 14)) {
+ buf.put((byte) extract7bits(0, val));
+ buf.put((byte) (extract7bitsmaskless(1, (val)) | (1 << 7)));
+ } else if (val >= 0 && val < (1 << 21)) {
+ buf.put((byte) extract7bits(0, val));
+ buf.put((byte) extract7bits(1, val));
+ buf.put((byte) (extract7bitsmaskless(2, (val)) | (1 << 7)));
+ } else if (val >= 0 && val < (1 << 28)) {
+ buf.put((byte) extract7bits(0, val));
+ buf.put((byte) extract7bits(1, val));
+ buf.put((byte) extract7bits(2, val));
+ buf.put((byte) (extract7bitsmaskless(3, (val)) | (1 << 7)));
+ } else if (val >= 0 && val < (1L << 35)) {
+ buf.put((byte) extract7bits(0, val));
+ buf.put((byte) extract7bits(1, val));
+ buf.put((byte) extract7bits(2, val));
+ buf.put((byte) extract7bits(3, val));
+ buf.put((byte) (extract7bitsmaskless(4, (val)) | (1 << 7)));
+ } else if (val >= 0 && val < (1L << 42)) {
+ buf.put((byte) extract7bits(0, val));
+ buf.put((byte) extract7bits(1, val));
+ buf.put((byte) extract7bits(2, val));
+ buf.put((byte) extract7bits(3, val));
+ buf.put((byte) extract7bits(4, val));
+ buf.put((byte) (extract7bitsmaskless(5, (val)) | (1 << 7)));
+ } else if (val >= 0 && val < (1L << 49)) {
+ buf.put((byte) extract7bits(0, val));
+ buf.put((byte) extract7bits(1, val));
+ buf.put((byte) extract7bits(2, val));
+ buf.put((byte) extract7bits(3, val));
+ buf.put((byte) extract7bits(4, val));
+ buf.put((byte) extract7bits(5, val));
+ buf.put((byte) (extract7bitsmaskless(6, (val)) | (1 << 7)));
+ } else if (val >= 0 && val < (1L << 56)) {
+ buf.put((byte) extract7bits(0, val));
+ buf.put((byte) extract7bits(1, val));
+ buf.put((byte) extract7bits(2, val));
+ buf.put((byte) extract7bits(3, val));
+ buf.put((byte) extract7bits(4, val));
+ buf.put((byte) extract7bits(5, val));
+ buf.put((byte) extract7bits(6, val));
+ buf.put((byte) (extract7bitsmaskless(7, (val)) | (1 << 7)));
+ } else if (val >= 0 && val < (1L << 63)) {
+ buf.put((byte) extract7bits(0, val));
+ buf.put((byte) extract7bits(1, val));
+ buf.put((byte) extract7bits(2, val));
+ buf.put((byte) extract7bits(3, val));
+ buf.put((byte) extract7bits(4, val));
+ buf.put((byte) extract7bits(5, val));
+ buf.put((byte) extract7bits(6, val));
+ buf.put((byte) extract7bits(7, val));
+ buf.put((byte) (extract7bitsmaskless(8, (val)) | (1 << 7)));
+ } else {
+ buf.put((byte) extract7bits(0, val));
+ buf.put((byte) extract7bits(1, val));
+ buf.put((byte) extract7bits(2, val));
+ buf.put((byte) extract7bits(3, val));
+ buf.put((byte) extract7bits(4, val));
+ buf.put((byte) extract7bits(5, val));
+ buf.put((byte) extract7bits(6, val));
+ buf.put((byte) extract7bits(7, val));
+ buf.put((byte) extract7bits(8, val));
+ buf.put((byte) (extract7bitsmaskless(9, (val)) | (1 << 7)));
+ }
+ }
+ while (buf.position() % 8 != 0)
+ buf.put((byte) 0);
+ final int length = buf.position();
+ buf.flip();
+ LongBuffer ibuf = buf.asLongBuffer();
+ ibuf.get(out, outpos.get(), length / 8);
+ outpos.add(length / 8);
+ inpos.add(inlength);
+ }
+
+ @Override
+ public void compress(long[] in, IntWrapper inpos, int inlength, byte[] out,
+ IntWrapper outpos) {
+ if (inlength == 0)
+ return;
+ int outpostmp = outpos.get();
+ for (int k = inpos.get(); k < inpos.get() + inlength; ++k) {
+ final long val = in[k];
+ if (val >= 0 && val < (1 << 7)) {
+ out[outpostmp++] = (byte) (val | (1 << 7));
+ } else if (val >= 0 && val < (1 << 14)) {
+ out[outpostmp++] = (byte) extract7bits(0, val);
+ out[outpostmp++] = (byte) (extract7bitsmaskless(1, (val)) | (1 << 7));
+ } else if (val >= 0 && val < (1 << 21)) {
+ out[outpostmp++] = (byte) extract7bits(0, val);
+ out[outpostmp++] = (byte) extract7bits(1, val);
+ out[outpostmp++] = (byte) (extract7bitsmaskless(2, (val)) | (1 << 7));
+ } else if (val >= 0 && val < (1 << 28)) {
+ out[outpostmp++] = (byte) extract7bits(0, val);
+ out[outpostmp++] = (byte) extract7bits(1, val);
+ out[outpostmp++] = (byte) extract7bits(2, val);
+ out[outpostmp++] = (byte) (extract7bitsmaskless(3, (val)) | (1 << 7));
+ } else if (val >= 0 && val < (1L << 35)) {
+ out[outpostmp++] = (byte) extract7bits(0, val);
+ out[outpostmp++] = (byte) extract7bits(1, val);
+ out[outpostmp++] = (byte) extract7bits(2, val);
+ out[outpostmp++] = (byte) extract7bits(3, val);
+ out[outpostmp++] = (byte) (extract7bitsmaskless(4, (val)) | (1 << 7));
+ } else if (val >= 0 && val < (1L << 42)) {
+ out[outpostmp++] = (byte) extract7bits(0, val);
+ out[outpostmp++] = (byte) extract7bits(1, val);
+ out[outpostmp++] = (byte) extract7bits(2, val);
+ out[outpostmp++] = (byte) extract7bits(3, val);
+ out[outpostmp++] = (byte) extract7bits(4, val);
+ out[outpostmp++] = (byte) (extract7bitsmaskless(5, (val)) | (1 << 7));
+ } else if (val >= 0 && val < (1L << 49)) {
+ out[outpostmp++] = (byte) extract7bits(0, val);
+ out[outpostmp++] = (byte) extract7bits(1, val);
+ out[outpostmp++] = (byte) extract7bits(2, val);
+ out[outpostmp++] = (byte) extract7bits(3, val);
+ out[outpostmp++] = (byte) extract7bits(4, val);
+ out[outpostmp++] = (byte) extract7bits(5, val);
+ out[outpostmp++] = (byte) (extract7bitsmaskless(6, (val)) | (1 << 7));
+ } else if (val >= 0 && val < (1L << 56)) {
+ out[outpostmp++] = (byte) extract7bits(0, val);
+ out[outpostmp++] = (byte) extract7bits(1, val);
+ out[outpostmp++] = (byte) extract7bits(2, val);
+ out[outpostmp++] = (byte) extract7bits(3, val);
+ out[outpostmp++] = (byte) extract7bits(4, val);
+ out[outpostmp++] = (byte) extract7bits(5, val);
+ out[outpostmp++] = (byte) extract7bits(6, val);
+ out[outpostmp++] = (byte) (extract7bitsmaskless(7, (val)) | (1 << 7));
+ } else if (val >= 0 && val < (1L << 63)) {
+ out[outpostmp++] = (byte) extract7bits(0, val);
+ out[outpostmp++] = (byte) extract7bits(1, val);
+ out[outpostmp++] = (byte) extract7bits(2, val);
+ out[outpostmp++] = (byte) extract7bits(3, val);
+ out[outpostmp++] = (byte) extract7bits(4, val);
+ out[outpostmp++] = (byte) extract7bits(5, val);
+ out[outpostmp++] = (byte) extract7bits(6, val);
+ out[outpostmp++] = (byte) extract7bits(7, val);
+ out[outpostmp++] = (byte) (extract7bitsmaskless(8, (val)) | (1 << 7));
+ } else {
+ // System.out.println(LongUtil.longToBinaryWithLeading(val));
+ out[outpostmp++] = (byte) extract7bits(0, val);
+ out[outpostmp++] = (byte) extract7bits(1, val);
+ out[outpostmp++] = (byte) extract7bits(2, val);
+ out[outpostmp++] = (byte) extract7bits(3, val);
+ out[outpostmp++] = (byte) extract7bits(4, val);
+ out[outpostmp++] = (byte) extract7bits(5, val);
+ out[outpostmp++] = (byte) extract7bits(6, val);
+ out[outpostmp++] = (byte) extract7bits(7, val);
+ out[outpostmp++] = (byte) extract7bits(8, val);
+ out[outpostmp++] = (byte) (extract7bitsmaskless(9, (val)) | (1 << 7));
+ }
+ }
+ outpos.set(outpostmp);
+ inpos.add(inlength);
+ }
+
+ @Override
+ public void uncompress(long[] in, IntWrapper inpos, int inlength, long[] out,
+ IntWrapper outpos) {
+ int s = 0;
+ long val = 0;
+ int p = inpos.get();
+ int finalp = inpos.get() + inlength;
+ int tmpoutpos = outpos.get();
+ for (long v = 0, shift = 0; p < finalp;) {
+ val = in[p];
+ // System.out.println(LongUtil.longToBinaryWithLeading(val));
+ long c = (byte) (val >>> s);
+ // Shift to next byte
+ s += 8;
+ // Shift to next long if s==64
+ p += s>>6;
+ // cycle from 63 to 0
+ s = s & 63;
+ v += ((c & 127) << shift);
+ if ((c & 128) == 128) {
+ out[tmpoutpos++] = v;
+ v = 0;
+ shift = 0;
+ } else
+ shift += 7;
+ assert shift < 64;
+ }
+ outpos.set(tmpoutpos);
+ inpos.add(inlength);
+ }
+
+ @Override
+ public void uncompress(byte[] in, IntWrapper inpos, int inlength,
+ long[] out, IntWrapper outpos) {
+ int p = inpos.get();
+ int finalp = inpos.get() + inlength;
+ int tmpoutpos = outpos.get();
+ for (long v = 0; p < finalp; out[tmpoutpos++] = v) {
+ v = in[p] & 0x7F;
+ if (in[p] < 0) {
+ p += 1;
+ continue;
+ }
+ v = ((in[p + 1] & 0x7F) << 7) | v;
+ if (in[p + 1] < 0) {
+ p += 2;
+ continue;
+ }
+ v = ((in[p + 2] & 0x7F) << 14) | v;
+ if (in[p + 2] < 0 ) {
+ p += 3;
+ continue;
+ }
+ v = ((in[p + 3] & 0x7F) << 21) | v;
+ if (in[p + 3] < 0) {
+ p += 4;
+ continue;
+ }
+ v = (((long) in[p + 4] & 0x7F) << 28) | v;
+ if (in[p + 4] < 0) {
+ p += 5;
+ continue;
+ }
+ v = (((long) in[p + 5] & 0x7F) << 35) | v;
+ if (in[p + 5] < 0) {
+ p += 6;
+ continue;
+ }
+ v = (((long) in[p + 6] & 0x7F) << 42) | v;
+ if (in[p + 6] < 0) {
+ p += 7;
+ continue;
+ }
+ v = (((long) in[p + 7] & 0x7F) << 49) | v;
+ if (in[p + 7] < 0) {
+ p += 8;
+ continue;
+ }
+ v = (((long) in[p + 8] & 0x7F) << 56) | v;
+ if (in[p + 8] < 0) {
+ p += 9;
+ continue;
+ }
+ v = (((long) in[p + 9] & 0x7F) << 63) | v;
+ p += 10;
+ }
+ outpos.set(tmpoutpos);
+ inpos.add(p);
+ }
+
+ @Override
+ public String toString() {
+ return this.getClass().getSimpleName();
+ }
+
+ @Override
+ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[] out,
+ IntWrapper outpos, int num) {
+ int s = 0;
+ long val = 0;
+ int p = inpos.get();
+ int tmpoutpos = outpos.get();
+ int finaloutpos = num + tmpoutpos;
+ for (long v = 0, shift = 0; tmpoutpos < finaloutpos;) {
+ val = in[p];
+ // System.out.println(longToBinaryWithLeading(val));
+ long c = val >>> s;
+ // Shift to next byte
+ s += 8;
+ // Shift to next long if s == 64
+ p += s>>6;
+ // cycle from 63 to 0
+ s = s & 63;
+ v += ((c & 127) << shift);
+ if ((c & 128) == 128) {
+ out[tmpoutpos++] = v;
+ v = 0;
+ shift = 0;
+ } else
+ shift += 7;
+ assert shift < 64;
+ }
+ outpos.set(tmpoutpos);
+ inpos.set(p + (s!=0 ? 1 : 0));
+ }
+
+ /**
+ * Creates a new buffer of the requested size.
+ *
+ * In case you need a different way to allocate buffers, you can override this method
+ * with a custom behavior. The default implementation allocates a new Java direct
+ * {@link ByteBuffer} on each invocation.
+ */
+ protected ByteBuffer makeBuffer(int sizeInBytes) {
+ return ByteBuffer.allocateDirect(sizeInBytes);
+ }
+}
diff --git a/src/main/java/me/lemire/longcompression/RoaringIntPacking.java b/src/main/java/me/lemire/longcompression/RoaringIntPacking.java
new file mode 100644
index 0000000..f109ab3
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/RoaringIntPacking.java
@@ -0,0 +1,108 @@
+/*
+ * (c) the authors Licensed under the Apache License, Version 2.0.
+ */
+package me.lemire.longcompression;
+
+import java.math.BigInteger;
+import java.util.Comparator;
+
+/**
+ * Used to hold the logic packing 2 integers in a long, and separating a long in two integers. It is
+ * useful in {@link Roaring64NavigableMap} as the implementation split the input long in two
+ * integers, one used as key of a NavigableMap while the other is added in a Bitmap
+ *
+ * @author Benoit Lacelle
+ *
+ */
+// Duplicated from RoaringBitmap
+class RoaringIntPacking {
+
+ /**
+ *
+ * @param id any long, positive or negative
+ * @return an int holding the 32 highest order bits of information of the input long
+ */
+ public static int high(long id) {
+ return (int) (id >> 32);
+ }
+
+ /**
+ *
+ * @param id any long, positive or negative
+ * @return an int holding the 32 lowest order bits of information of the input long
+ */
+ public static int low(long id) {
+ return (int) id;
+ }
+
+ /**
+ *
+ * @param high an integer representing the highest order bits of the output long
+ * @param low an integer representing the lowest order bits of the output long
+ * @return a long packing together the integers as computed by
+ * {@link RoaringIntPacking#high(long)} and {@link RoaringIntPacking#low(long)}
+ */
+ // https://stackoverflow.com/questions/12772939/java-storing-two-ints-in-a-long
+ public static long pack(int high, int low) {
+ return (((long) high) << 32) | (low & 0xffffffffL);
+ }
+
+
+ /**
+ *
+ * @param signedLongs true if long put in a {@link Roaring64NavigableMap} should be considered as
+ * signed long.
+ * @return the int representing the highest value which can be set as high value in a
+ * {@link Roaring64NavigableMap}
+ */
+ public static int highestHigh(boolean signedLongs) {
+ if (signedLongs) {
+ return Integer.MAX_VALUE;
+ } else {
+ return -1;
+ }
+ }
+
+ /**
+ * @return A comparator for unsigned longs: a negative long is a long greater than Long.MAX_VALUE
+ */
+ public static Comparator unsignedComparator() {
+ return new Comparator() {
+
+ @Override
+ public int compare(Integer o1, Integer o2) {
+ return compareUnsigned(o1, o2);
+ }
+ };
+ }
+
+ /**
+ * Compares two {@code int} values numerically treating the values as unsigned.
+ *
+ * @param x the first {@code int} to compare
+ * @param y the second {@code int} to compare
+ * @return the value {@code 0} if {@code x == y}; a value less than {@code 0} if {@code x < y} as
+ * unsigned values; and a value greater than {@code 0} if {@code x > y} as unsigned values
+ * @since 1.8
+ */
+ // Duplicated from jdk8 Integer.compareUnsigned
+ public static int compareUnsigned(int x, int y) {
+ return Integer.compare(x + Integer.MIN_VALUE, y + Integer.MIN_VALUE);
+ }
+
+ /** the constant 2^64 */
+ private static final BigInteger TWO_64 = BigInteger.ONE.shiftLeft(64);
+
+ /**
+ * JDK8 Long.toUnsignedString was too complex to backport. Go for a slow version relying on
+ * BigInteger
+ */
+ // https://stackoverflow.com/questions/7031198/java-signed-long-to-unsigned-long-string
+ static String toUnsignedString(long l) {
+ BigInteger b = BigInteger.valueOf(l);
+ if (b.signum() < 0) {
+ b = b.add(TWO_64);
+ }
+ return b.toString();
+ }
+}
diff --git a/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java b/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java
new file mode 100644
index 0000000..e3e7b84
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java
@@ -0,0 +1,69 @@
+/**
+ * This is code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
+package me.lemire.longcompression;
+
+import me.lemire.integercompression.IntWrapper;
+
+/**
+ * Interface describing a standard CODEC to compress longs. This is a
+ * variation on the LongCODEC interface meant to be used for head access.
+ *
+ * The main difference is that we must specify the number of longs we wish to
+ * decode. This information should be stored elsewhere.
+ *
+ * This interface was designed by the Terrier team for their search engine.
+ *
+ * @author Benoit Lacelle
+ *
+ */
+public interface SkippableLongCODEC {
+ /**
+ * Compress data from an array to another array.
+ *
+ * Both inpos and outpos are modified to represent how much data was read
+ * and written to. If 12 longs (inlength = 12) are compressed to 3 longs, then
+ * inpos will be incremented by 12 while outpos will be incremented by 3. We
+ * use IntWrapper to pass the values by reference.
+ *
+ * @param in
+ * input array
+ * @param inpos
+ * location in the input array
+ * @param inlength
+ * how many longs to compress
+ * @param out
+ * output array
+ * @param outpos
+ * where to write in the output array
+ */
+ public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] out,
+ IntWrapper outpos);
+
+ /**
+ * Uncompress data from an array to another array.
+ *
+ * Both inpos and outpos parameters are modified to indicate new positions
+ * after read/write.
+ *
+ * @param in
+ * array containing data in compressed form
+ * @param inpos
+ * where to start reading in the array
+ * @param inlength
+ * length of the compressed data (ignored by some schemes)
+ * @param out
+ * array where to write the compressed output
+ * @param outpos
+ * where to write the compressed output in out
+ * @param num
+ * number of longs we want to decode, the actual number of longs decoded can be less
+ */
+ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[] out,
+ IntWrapper outpos, int num);
+
+}
diff --git a/src/main/java/me/lemire/longcompression/SkippableLongComposition.java b/src/main/java/me/lemire/longcompression/SkippableLongComposition.java
new file mode 100644
index 0000000..5568489
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/SkippableLongComposition.java
@@ -0,0 +1,70 @@
+/**
+ * This is code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.longcompression;
+
+import me.lemire.integercompression.IntWrapper;
+
+/**
+ * Helper class to compose schemes.
+ *
+ * @author Benoit Lacelle
+ */
+public class SkippableLongComposition implements SkippableLongCODEC {
+ SkippableLongCODEC F1, F2;
+
+ /**
+ * Compose a scheme from a first one (f1) and a second one (f2). The first
+ * one is called first and then the second one tries to compress whatever
+ * remains from the first run.
+ *
+ * By convention, the first scheme should be such that if, during decoding,
+ * a 32-bit zero is first encountered, then there is no output.
+ *
+ * @param f1
+ * first codec
+ * @param f2
+ * second codec
+ */
+ public SkippableLongComposition(SkippableLongCODEC f1,
+ SkippableLongCODEC f2) {
+ F1 = f1;
+ F2 = f2;
+ }
+
+ @Override
+ public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] out,
+ IntWrapper outpos) {
+ int init = inpos.get();
+ int outposInit = outpos.get();
+ F1.headlessCompress(in, inpos, inlength, out, outpos);
+ if (outpos.get() == outposInit) {
+ out[outposInit] = 0;
+ outpos.increment();
+ }
+ inlength -= inpos.get() - init;
+ F2.headlessCompress(in, inpos, inlength, out, outpos);
+ }
+
+ @Override
+ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[] out,
+ IntWrapper outpos, int num) {
+ int init = inpos.get();
+ F1.headlessUncompress(in, inpos, inlength, out, outpos, num);
+ if (inpos.get() == init) {
+ inpos.increment();
+ }
+ inlength -= inpos.get() - init;
+ num -= outpos.get();
+ F2.headlessUncompress(in, inpos, inlength, out, outpos, num);
+ }
+
+ @Override
+ public String toString() {
+ return F1.toString() + "+" + F2.toString();
+ }
+
+}
diff --git a/src/main/java/me/lemire/longcompression/differential/LongDelta.java b/src/main/java/me/lemire/longcompression/differential/LongDelta.java
new file mode 100644
index 0000000..2b0e077
--- /dev/null
+++ b/src/main/java/me/lemire/longcompression/differential/LongDelta.java
@@ -0,0 +1,150 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
+package me.lemire.longcompression.differential;
+
+/**
+ * Generic class to compute differential coding.
+ *
+ * @author Benoit lacelle
+ *
+ */
+public final class LongDelta {
+
+ /**
+ * Apply differential coding (in-place).
+ *
+ * @param data
+ * data to be modified
+ */
+ public static void delta(long[] data) {
+ for (int i = data.length - 1; i > 0; --i) {
+ data[i] -= data[i - 1];
+ }
+ }
+
+ /**
+ * Apply differential coding (in-place) given an initial value.
+ *
+ * @param data
+ * data to be modified
+ * @param start
+ * starting index
+ * @param length
+ * number of integers to process
+ * @param init
+ * initial value
+ * @return next initial vale
+ */
+ public static long delta(long[] data, int start, int length, int init) {
+ final long nextinit = data[start + length - 1];
+ for (int i = length - 1; i > 0; --i) {
+ data[start + i] -= data[start + i - 1];
+ }
+ data[start] -= init;
+ return nextinit;
+ }
+
+ /**
+ * Compute differential coding given an initial value. Output is written
+ * to a provided array: must have length "length" or better.
+ *
+ * @param data
+ * data to be modified
+ * @param start
+ * starting index
+ * @param length
+ * number of integers to process
+ * @param init
+ * initial value
+ * @param out
+ * output array
+ * @return next initial vale
+ */
+ public static long delta(long[] data, int start, int length, int init,
+ long[] out) {
+ for (int i = length - 1; i > 0; --i) {
+ out[i] = data[start + i] - data[start + i - 1];
+ }
+ out[0] = data[start] - init;
+ return data[start + length - 1];
+ }
+
+ /**
+ * Undo differential coding (in-place). Effectively computes a prefix
+ * sum.
+ *
+ * @param data
+ * to be modified.
+ */
+ public static void inverseDelta(long[] data) {
+ for (int i = 1; i < data.length; ++i) {
+ data[i] += data[i - 1];
+ }
+ }
+
+ /**
+ * Undo differential coding (in-place). Effectively computes a prefix
+ * sum. Like inverseDelta, only faster.
+ *
+ * @param data
+ * to be modified
+ */
+ public static void fastinverseDelta(long[] data) {
+ int sz0 = data.length / 4 * 4;
+ int i = 1;
+ if (sz0 >= 4) {
+ long a = data[0];
+ for (; i < sz0 - 4; i += 4) {
+ a = data[i] += a;
+ a = data[i + 1] += a;
+ a = data[i + 2] += a;
+ a = data[i + 3] += a;
+ }
+ }
+
+ for (; i != data.length; ++i) {
+ data[i] += data[i - 1];
+ }
+ }
+
+ /**
+ * Undo differential coding (in-place). Effectively computes a prefix
+ * sum. Like inverseDelta, only faster. Uses an initial value.
+ *
+ * @param data
+ * to be modified
+ * @param start
+ * starting index
+ * @param length
+ * number of integers to process
+ * @param init
+ * initial value
+ * @return next initial value
+ */
+ public static long fastinverseDelta(long[] data, int start, int length,
+ int init) {
+ data[start] += init;
+ int sz0 = length / 4 * 4;
+ int i = 1;
+ if (sz0 >= 4) {
+ long a = data[start];
+ for (; i < sz0 - 4; i += 4) {
+ a = data[start + i] += a;
+ a = data[start + i + 1] += a;
+ a = data[start + i + 2] += a;
+ a = data[start + i + 3] += a;
+ }
+ }
+
+ for (; i != length; ++i) {
+ data[start + i] += data[start + i - 1];
+ }
+ return data[start + length - 1];
+ }
+
+}
diff --git a/src/test/java/me/lemire/integercompression/AdhocTest.java b/src/test/java/me/lemire/integercompression/AdhocTest.java
index bced6c0..8fd4049 100644
--- a/src/test/java/me/lemire/integercompression/AdhocTest.java
+++ b/src/test/java/me/lemire/integercompression/AdhocTest.java
@@ -1,3 +1,10 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
package me.lemire.integercompression;
import org.junit.Assert;
diff --git a/src/test/java/me/lemire/integercompression/BasicTest.java b/src/test/java/me/lemire/integercompression/BasicTest.java
index e88293e..b5f292e 100644
--- a/src/test/java/me/lemire/integercompression/BasicTest.java
+++ b/src/test/java/me/lemire/integercompression/BasicTest.java
@@ -1,3 +1,10 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
package me.lemire.integercompression;
import java.util.Arrays;
@@ -22,7 +29,7 @@
*/
@SuppressWarnings({ "static-method" })
public class BasicTest {
- IntegerCODEC[] codecs = {
+ final IntegerCODEC[] codecs = {
new IntegratedComposition(new IntegratedBinaryPacking(),
new IntegratedVariableByte()),
new JustCopy(),
@@ -44,7 +51,7 @@ public class BasicTest {
new DeltaZigzagVariableByte()) };
/**
- *
+ * This tests with a compressed array with various offset
*/
@Test
public void saulTest() {
diff --git a/src/test/java/me/lemire/integercompression/BoundaryTest.java b/src/test/java/me/lemire/integercompression/BoundaryTest.java
index ede2e9f..128b431 100644
--- a/src/test/java/me/lemire/integercompression/BoundaryTest.java
+++ b/src/test/java/me/lemire/integercompression/BoundaryTest.java
@@ -1,3 +1,10 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
package me.lemire.integercompression;
import java.util.Arrays;
diff --git a/src/test/java/me/lemire/integercompression/ByteBasicTest.java b/src/test/java/me/lemire/integercompression/ByteBasicTest.java
index c2f5b6f..93112c3 100644
--- a/src/test/java/me/lemire/integercompression/ByteBasicTest.java
+++ b/src/test/java/me/lemire/integercompression/ByteBasicTest.java
@@ -1,3 +1,10 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
package me.lemire.integercompression;
import java.util.Arrays;
diff --git a/src/test/java/me/lemire/integercompression/DeltaZigzagEncodingTest.java b/src/test/java/me/lemire/integercompression/DeltaZigzagEncodingTest.java
index 5e0923d..ae42c1d 100644
--- a/src/test/java/me/lemire/integercompression/DeltaZigzagEncodingTest.java
+++ b/src/test/java/me/lemire/integercompression/DeltaZigzagEncodingTest.java
@@ -1,7 +1,10 @@
-/*
+/**
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
*/
+
package me.lemire.integercompression;
import org.junit.Test;
diff --git a/src/test/java/me/lemire/integercompression/ExampleTest.java b/src/test/java/me/lemire/integercompression/ExampleTest.java
index 300983c..f6038b8 100644
--- a/src/test/java/me/lemire/integercompression/ExampleTest.java
+++ b/src/test/java/me/lemire/integercompression/ExampleTest.java
@@ -1,3 +1,10 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
package me.lemire.integercompression;
import me.lemire.integercompression.differential.*;
diff --git a/src/test/java/me/lemire/integercompression/IntCompressorTest.java b/src/test/java/me/lemire/integercompression/IntCompressorTest.java
index 34b8946..79e51fc 100644
--- a/src/test/java/me/lemire/integercompression/IntCompressorTest.java
+++ b/src/test/java/me/lemire/integercompression/IntCompressorTest.java
@@ -1,3 +1,10 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
package me.lemire.integercompression;
import java.util.Arrays;
diff --git a/src/test/java/me/lemire/integercompression/ResourcedTest.java b/src/test/java/me/lemire/integercompression/ResourcedTest.java
index 61b8e58..34f1d05 100644
--- a/src/test/java/me/lemire/integercompression/ResourcedTest.java
+++ b/src/test/java/me/lemire/integercompression/ResourcedTest.java
@@ -1,3 +1,10 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
package me.lemire.integercompression;
import java.util.ArrayList;
diff --git a/src/test/java/me/lemire/integercompression/SkippableBasicTest.java b/src/test/java/me/lemire/integercompression/SkippableBasicTest.java
index d965992..93c1784 100644
--- a/src/test/java/me/lemire/integercompression/SkippableBasicTest.java
+++ b/src/test/java/me/lemire/integercompression/SkippableBasicTest.java
@@ -1,3 +1,10 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
package me.lemire.integercompression;
import java.util.Arrays;
@@ -12,7 +19,7 @@
*/
@SuppressWarnings({ "static-method" })
public class SkippableBasicTest {
- SkippableIntegerCODEC[] codecs = {
+ final SkippableIntegerCODEC[] codecs = {
new JustCopy(),
new VariableByte(),
new SkippableComposition(new BinaryPacking(), new VariableByte()),
diff --git a/src/test/java/me/lemire/integercompression/TestUtils.java b/src/test/java/me/lemire/integercompression/TestUtils.java
index a0820ab..7ce51b3 100644
--- a/src/test/java/me/lemire/integercompression/TestUtils.java
+++ b/src/test/java/me/lemire/integercompression/TestUtils.java
@@ -1,3 +1,10 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
package me.lemire.integercompression;
import java.util.Arrays;
@@ -123,7 +130,7 @@ public static void assertSymmetry(IntegerCODEC codec, int... orig) {
assertArrayEquals(orig, target);
}
- protected static int[] compress(IntegerCODEC codec, int[] data) {
+ public static int[] compress(IntegerCODEC codec, int[] data) {
int[] outBuf = new int[data.length * 4];
IntWrapper inPos = new IntWrapper();
IntWrapper outPos = new IntWrapper();
diff --git a/src/test/java/me/lemire/integercompression/XorBinaryPackingTest.java b/src/test/java/me/lemire/integercompression/XorBinaryPackingTest.java
index 3201b02..650eb4b 100644
--- a/src/test/java/me/lemire/integercompression/XorBinaryPackingTest.java
+++ b/src/test/java/me/lemire/integercompression/XorBinaryPackingTest.java
@@ -1,7 +1,10 @@
/**
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
*/
+
package me.lemire.integercompression;
import java.util.Arrays;
diff --git a/src/test/java/me/lemire/longcompression/LongBasicTest.java b/src/test/java/me/lemire/longcompression/LongBasicTest.java
new file mode 100644
index 0000000..5aa3551
--- /dev/null
+++ b/src/test/java/me/lemire/longcompression/LongBasicTest.java
@@ -0,0 +1,396 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
+package me.lemire.longcompression;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.util.Arrays;
+
+import org.junit.Test;
+
+import me.lemire.integercompression.BinaryPacking;
+import me.lemire.integercompression.Composition;
+import me.lemire.integercompression.FastPFOR;
+import me.lemire.integercompression.FastPFOR128;
+import me.lemire.integercompression.IntWrapper;
+import me.lemire.integercompression.JustCopy;
+import me.lemire.integercompression.NewPFD;
+import me.lemire.integercompression.NewPFDS16;
+import me.lemire.integercompression.NewPFDS9;
+import me.lemire.integercompression.OptPFD;
+import me.lemire.integercompression.OptPFDS16;
+import me.lemire.integercompression.OptPFDS9;
+import me.lemire.integercompression.Simple9;
+import me.lemire.integercompression.VariableByte;
+import me.lemire.integercompression.differential.Delta;
+import me.lemire.integercompression.differential.IntegratedBinaryPacking;
+import me.lemire.integercompression.differential.IntegratedComposition;
+import me.lemire.integercompression.differential.IntegratedVariableByte;
+import me.lemire.longcompression.differential.LongDelta;
+import me.lemire.longcompression.synth.LongClusteredDataGenerator;
+
+/**
+ * Just some basic sanity tests.
+ *
+ * @author Benoit Lacelle
+ */
+@SuppressWarnings({ "static-method" })
+public class LongBasicTest {
+ final LongCODEC[] codecs = {
+ new LongJustCopy(),
+ new LongVariableByte(),
+ new LongAs2IntsCodec()};
+
+ /**
+ * This tests with a compressed array with various offset
+ */
+ @Test
+ public void saulTest() {
+ for (LongCODEC C : codecs) {
+ for (int x = 0; x < 50; ++x) {
+ long[] a = { 2, 3, 4, 5 };
+ long[] b = new long[90];
+ long[] c = new long[a.length];
+
+ IntWrapper aOffset = new IntWrapper(0);
+ IntWrapper bOffset = new IntWrapper(x);
+ C.compress(a, aOffset, a.length, b, bOffset);
+ int len = bOffset.get() - x;
+
+ bOffset.set(x);
+ IntWrapper cOffset = new IntWrapper(0);
+ C.uncompress(b, bOffset, len, c, cOffset);
+ if(!Arrays.equals(a, c)) {
+ System.out.println("Problem with "+C);
+ }
+ assertArrayEquals(a, c);
+
+ }
+ }
+ }
+ /**
+ *
+ */
+ @Test
+ public void varyingLengthTest() {
+ int N = 4096;
+ long[] data = new long[N];
+ for (int k = 0; k < N; ++k)
+ data[k] = k;
+ for (LongCODEC c : codecs) {
+ System.out.println("[BasicTest.varyingLengthTest] codec = " + c);
+ for (int L = 1; L <= 128; L++) {
+ long[] comp = LongTestUtils.compress(c, Arrays.copyOf(data, L));
+ long[] answer = LongTestUtils.uncompress(c, comp, L);
+ for (int k = 0; k < L; ++k)
+ if (answer[k] != data[k])
+ throw new RuntimeException("bug");
+ }
+ for (int L = 128; L <= N; L *= 2) {
+ long[] comp = LongTestUtils.compress(c, Arrays.copyOf(data, L));
+ long[] answer = LongTestUtils.uncompress(c, comp, L);
+ for (int k = 0; k < L; ++k)
+ if (answer[k] != data[k]) {
+ System.out.println(Arrays.toString(Arrays.copyOf(
+ answer, L)));
+ System.out.println(Arrays.toString(Arrays.copyOf(data,
+ L)));
+ throw new RuntimeException("bug");
+ }
+ }
+
+ }
+ }
+
+ /**
+ *
+ */
+ @Test
+ public void varyingLengthTest2() {
+ int N = 128;
+ long[] data = new long[N];
+ data[127] = -1;
+ for (LongCODEC c : codecs) {
+ System.out.println("[BasicTest.varyingLengthTest2] codec = " + c);
+ try {
+ // CODEC Simple9 is limited to "small" integers.
+ if (c.getClass().equals(
+ Class.forName("me.lemire.integercompression.Simple9")))
+ continue;
+ } catch (ClassNotFoundException e) {
+ e.printStackTrace();
+ }
+ try {
+ // CODEC Simple16 is limited to "small" integers.
+ if (c.getClass().equals(
+ Class.forName("me.lemire.integercompression.Simple16")))
+ continue;
+ } catch (ClassNotFoundException e) {
+ e.printStackTrace();
+ }
+ try {
+ // CODEC GroupSimple9 is limited to "small" integers.
+ if (c.getClass().equals(
+ Class.forName("me.lemire.integercompression.GroupSimple9")))
+ continue;
+ } catch (ClassNotFoundException e) {
+ e.printStackTrace();
+ }
+
+ for (int L = 1; L <= 128; L++) {
+ long[] comp = LongTestUtils.compress(c, Arrays.copyOf(data, L));
+ long[] answer = LongTestUtils.uncompress(c, comp, L);
+ for (int k = 0; k < L; ++k)
+ if (answer[k] != data[k])
+ throw new RuntimeException("bug");
+ }
+ for (int L = 128; L <= N; L *= 2) {
+ long[] comp = LongTestUtils.compress(c, Arrays.copyOf(data, L));
+ long[] answer = LongTestUtils.uncompress(c, comp, L);
+ for (int k = 0; k < L; ++k)
+ if (answer[k] != data[k])
+ throw new RuntimeException("bug");
+ }
+
+ }
+ }
+
+ /**
+ *
+ */
+ @Test
+ public void checkVariousCases() {
+ for (LongCODEC c : codecs) {
+ testZeroInZeroOut(c);
+ test(c, c, 5, 10);
+ test(c, c, 5, 14);
+ test(c, c, 2, 18);
+ // TODO Unclear which codec should manage an empty output array or not
+ // Some IntegerCodec does not output anything if the input is smaller than some block size
+ // testSpurious(c);
+ testUnsorted(c);
+ testUnsorted2(c);
+ testUnsorted3(c);
+ }
+ }
+
+ /**
+ * check that the codecs can be inverted.
+ */
+ @Test
+ public void basictest() {
+ for (LongCODEC codec : codecs) {
+ test(codec, 5, 10);
+ test(codec, 5, 14);
+ test(codec, 2, 18);
+ }
+ }
+
+ private static void testSpurious(LongCODEC c) {
+ long[] x = new long[1024];
+ long[] y = new long[0];
+ IntWrapper i0 = new IntWrapper(0);
+ IntWrapper i1 = new IntWrapper(0);
+ for (int inlength = 0; inlength < 32; ++inlength) {
+ c.compress(x, i0, inlength, y, i1);
+ assertEquals(0, i1.intValue());
+ }
+ }
+
+ private static void testZeroInZeroOut(LongCODEC c) {
+ long[] x = new long[0];
+ long[] y = new long[0];
+ IntWrapper i0 = new IntWrapper(0);
+ IntWrapper i1 = new IntWrapper(0);
+ c.compress(x, i0, 0, y, i1);
+ assertEquals(0, i1.intValue());
+
+ long[] out = new long[0];
+ IntWrapper outpos = new IntWrapper(0);
+ c.uncompress(y, i1, 0, out, outpos);
+ assertEquals(0, outpos.intValue());
+ }
+
+ private static void test(LongCODEC c, LongCODEC co, int N, int nbr) {
+ LongClusteredDataGenerator cdg = new LongClusteredDataGenerator();
+ for (int sparsity = 1; sparsity < 31 - nbr; sparsity += 4) {
+ long[][] data = new long[N][];
+ int max = (1 << (nbr + sparsity));
+ for (int k = 0; k < N; ++k) {
+ data[k] = cdg.generateClustered((1 << nbr), max);
+ }
+ testCodec(c, co, data, max);
+ }
+ }
+
+ private static void test(LongCODEC codec, int N, int nbr) {
+ LongClusteredDataGenerator cdg = new LongClusteredDataGenerator();
+ System.out.println("[BasicTest.test] N = " + N + " " + nbr);
+ for (int sparsity = 1; sparsity < 63 - nbr; sparsity += 4) {
+ long[][] data = new long[N][];
+ long max = (1L << (nbr + sparsity));
+ for (int k = 0; k < N; ++k) {
+ data[k] = cdg.generateClustered((1 << nbr), max);
+ }
+
+ testCodec(codec, codec, data, max);
+ }
+ }
+
+ private static void testCodec(LongCODEC c, LongCODEC co,
+ long[][] data, long max) {
+ int N = data.length;
+ int maxlength = 0;
+ for (int k = 0; k < N; ++k) {
+ if (data[k].length > maxlength)
+ maxlength = data[k].length;
+ }
+ long[] buffer = new long[maxlength + 1024];
+ long[] dataout = new long[4 * maxlength + 1024];
+ // 4x + 1024 to account for the possibility of some negative
+ // compression.
+ IntWrapper inpos = new IntWrapper();
+ IntWrapper outpos = new IntWrapper();
+ for (int k = 0; k < N; ++k) {
+ long[] backupdata = Arrays.copyOf(data[k], data[k].length);
+
+ inpos.set(1);
+ outpos.set(0);
+ if (!(c instanceof IntegratedLongCODEC)) {
+ LongDelta.delta(backupdata);
+ }
+ c.compress(backupdata, inpos, backupdata.length - inpos.get(),
+ dataout, outpos);
+ final int thiscompsize = outpos.get() + 1;
+ inpos.set(0);
+ outpos.set(1);
+ buffer[0] = backupdata[0];
+ co.uncompress(dataout, inpos, thiscompsize - 1, buffer, outpos);
+ if (!(c instanceof IntegratedLongCODEC))
+ LongDelta.fastinverseDelta(buffer);
+
+ // Check assertions.
+ assertEquals("length is not match", outpos.get(), data[k].length);
+ long[] bufferCutout = Arrays.copyOf(buffer, outpos.get());
+ assertArrayEquals("failed to reconstruct original data", data[k],
+ bufferCutout);
+ }
+ }
+
+ /**
+ * @param codec
+ * provided codec
+ */
+ public void testUnsorted(LongCODEC codec) {
+ int[] lengths = { 133, 1026, 1333333 };
+ for (int N : lengths) {
+ long[] data = new long[N];
+ // initialize the data (most will be small)
+ for (int k = 0; k < N; k += 1)
+ data[k] = 3;
+ // throw some larger values
+ for (int k = 0; k < N; k += 5)
+ data[k] = 100;
+ for (int k = 0; k < N; k += 533)
+ data[k] = 10000;
+ data[5] = -311;
+ // could need more compressing
+ long[] compressed = new long[(int) Math.ceil(N * 1.01) + 1024];
+ IntWrapper inputoffset = new IntWrapper(0);
+ IntWrapper outputoffset = new IntWrapper(0);
+ codec.compress(data, inputoffset, data.length, compressed,
+ outputoffset);
+ // we can repack the data: (optional)
+ compressed = Arrays.copyOf(compressed, outputoffset.intValue());
+
+ long[] recovered = new long[N];
+ IntWrapper recoffset = new IntWrapper(0);
+ codec.uncompress(compressed, new IntWrapper(0), compressed.length,
+ recovered, recoffset);
+ assertArrayEquals(data, recovered);
+ }
+ }
+
+ private void testUnsorted2(LongCODEC codec) {
+ long[] data = new long[128];
+ data[5] = -1;
+ long[] compressed = new long[1024];
+ IntWrapper inputoffset = new IntWrapper(0);
+ IntWrapper outputoffset = new IntWrapper(0);
+ codec.compress(data, inputoffset, data.length, compressed, outputoffset);
+ // we can repack the data: (optional)
+ compressed = Arrays.copyOf(compressed, outputoffset.intValue());
+
+ long[] recovered = new long[128];
+ IntWrapper recoffset = new IntWrapper(0);
+ codec.uncompress(compressed, new IntWrapper(0), compressed.length,
+ recovered, recoffset);
+ assertArrayEquals(data, recovered);
+ }
+
+ private void testUnsorted3(LongCODEC codec) {
+ long[] data = new long[128];
+ data[127] = -1;
+ long[] compressed = new long[1024];
+ IntWrapper inputoffset = new IntWrapper(0);
+ IntWrapper outputoffset = new IntWrapper(0);
+ codec.compress(data, inputoffset, data.length, compressed, outputoffset);
+ // we can repack the data: (optional)
+ compressed = Arrays.copyOf(compressed, outputoffset.intValue());
+
+ long[] recovered = new long[128];
+ IntWrapper recoffset = new IntWrapper(0);
+ codec.uncompress(compressed, new IntWrapper(0), compressed.length,
+ recovered, recoffset);
+ assertArrayEquals(data, recovered);
+ }
+
+ /**
+ *
+ */
+ @Test
+ public void fastPforTest() {
+ // proposed by Stefan Ackermann (https://github.com/Stivo)
+ for (LongCODEC codec : codecs) {
+ int N = FastPFOR.BLOCK_SIZE;
+ long[] data = new long[N];
+ for (int i = 0; i < N; i++)
+ data[i] = 0;
+ data[126] = -1;
+ long[] comp = LongTestUtils.compress(codec, Arrays.copyOf(data, N));
+ long[] answer = LongTestUtils.uncompress(codec, comp, N);
+ for (int k = 0; k < N; ++k)
+ if (answer[k] != data[k])
+ throw new RuntimeException("bug " + k + " " + answer[k]
+ + " != " + data[k]);
+ }
+ }
+
+ /**
+ *
+ */
+ @Test
+ public void fastPfor128Test() {
+ // proposed by Stefan Ackermann (https://github.com/Stivo)
+ for (LongCODEC codec : codecs) {
+ int N = FastPFOR128.BLOCK_SIZE;
+ long[] data = new long[N];
+ for (int i = 0; i < N; i++)
+ data[i] = 0;
+ data[126] = -1;
+ long[] comp = LongTestUtils.compress(codec, Arrays.copyOf(data, N));
+ long[] answer = LongTestUtils.uncompress(codec, comp, N);
+ for (int k = 0; k < N; ++k)
+ if (answer[k] != data[k])
+ throw new RuntimeException("bug " + k + " " + answer[k]
+ + " != " + data[k]);
+ }
+ }
+
+}
diff --git a/src/test/java/me/lemire/longcompression/LongTestUtils.java b/src/test/java/me/lemire/longcompression/LongTestUtils.java
new file mode 100644
index 0000000..a44e665
--- /dev/null
+++ b/src/test/java/me/lemire/longcompression/LongTestUtils.java
@@ -0,0 +1,133 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
+package me.lemire.longcompression;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+
+import me.lemire.integercompression.IntWrapper;
+
+/**
+ * Static utility methods for test.
+ */
+public class LongTestUtils {
+
+ protected static void dumpIntArray(long[] data, String label) {
+ System.out.print(label);
+ for (int i = 0; i < data.length; ++i) {
+ if (i % 6 == 0) {
+ System.out.println();
+ }
+ System.out.format(" %1$11d", data[i]);
+ }
+ System.out.println();
+ }
+
+ protected static void dumpIntArrayAsHex(long[] data, String label) {
+ System.out.print(label);
+ for (int i = 0; i < data.length; ++i) {
+ if (i % 8 == 0) {
+ System.out.println();
+ }
+ System.out.format(" %1$08X", data[i]);
+ }
+ System.out.println();
+ }
+
+ /**
+ * Check that compress and uncompress keep original array.
+ *
+ * @param codec CODEC to test.
+ * @param orig original integers
+ */
+ public static void assertSymmetry(LongCODEC codec, long... orig) {
+ // There are some cases that compressed array is bigger than original
+ // array. So output array for compress must be larger.
+ //
+ // Example:
+ // - VariableByte compresses an array like [ -1 ].
+ // - Composition compresses a short array.
+ final int EXTEND = 1;
+
+ long[] compressed = new long[orig.length + EXTEND];
+ IntWrapper c_inpos = new IntWrapper(0);
+ IntWrapper c_outpos = new IntWrapper(0);
+ codec.compress(orig, c_inpos, orig.length, compressed,
+ c_outpos);
+
+ assertTrue(c_outpos.get() <= orig.length + EXTEND);
+
+ // Uncompress an array.
+ long[] uncompressed = new long[orig.length];
+ IntWrapper u_inpos = new IntWrapper(0);
+ IntWrapper u_outpos = new IntWrapper(0);
+ codec.uncompress(compressed, u_inpos, c_outpos.get(),
+ uncompressed, u_outpos);
+
+ // Compare between uncompressed and orig arrays.
+ long[] target = Arrays.copyOf(uncompressed, u_outpos.get());
+ assertArrayEquals(orig, target);
+ }
+
+ protected static long[] compress(LongCODEC codec, long[] data) {
+ long[] outBuf = new long[data.length * 8];
+ IntWrapper inPos = new IntWrapper();
+ IntWrapper outPos = new IntWrapper();
+ codec.compress(data, inPos, data.length, outBuf, outPos);
+ return Arrays.copyOf(outBuf, outPos.get());
+ }
+
+ protected static long[] uncompress(LongCODEC codec, long[] data, int len) {
+ long[] outBuf = new long[len + 1024];
+ IntWrapper inPos = new IntWrapper();
+ IntWrapper outPos = new IntWrapper();
+ codec.uncompress(data, inPos, data.length, outBuf, outPos);
+ return Arrays.copyOf(outBuf, outPos.get());
+ }
+
+
+
+ protected static byte[] compress(ByteLongCODEC codec, long[] data) {
+ byte[] outBuf = new byte[data.length * 4 * 4];
+ IntWrapper inPos = new IntWrapper();
+ IntWrapper outPos = new IntWrapper();
+ codec.compress(data, inPos, data.length, outBuf, outPos);
+ return Arrays.copyOf(outBuf, outPos.get());
+ }
+
+ protected static long[] uncompress(ByteLongCODEC codec, byte[] data, int len) {
+ long[] outBuf = new long[len + 1024];
+ IntWrapper inPos = new IntWrapper();
+ IntWrapper outPos = new IntWrapper();
+ codec.uncompress(data, inPos, data.length, outBuf, outPos);
+ return Arrays.copyOf(outBuf, outPos.get());
+ }
+
+ protected static long[] compressHeadless(SkippableLongCODEC codec, long[] data) {
+ long[] outBuf = new long[data.length * 4];
+ IntWrapper inPos = new IntWrapper();
+ IntWrapper outPos = new IntWrapper();
+ codec.headlessCompress(data, inPos, data.length, outBuf, outPos);
+ return Arrays.copyOf(outBuf, outPos.get());
+ }
+
+ protected static long[] uncompressHeadless(SkippableLongCODEC codec, long[] data, int len) {
+ long[] outBuf = new long[len + 1024];
+ IntWrapper inPos = new IntWrapper();
+ IntWrapper outPos = new IntWrapper();
+ codec.headlessUncompress(data, inPos, data.length, outBuf, outPos,len);
+ if(outPos.get() < len) throw new RuntimeException("Insufficient output.");
+ return Arrays.copyOf(outBuf, outPos.get());
+ }
+
+ public static String longToBinaryWithLeading(long l) {
+ return String.format("%64s", Long.toBinaryString(l)).replace(' ', '0');
+ }
+}
diff --git a/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java b/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java
new file mode 100644
index 0000000..e900c9c
--- /dev/null
+++ b/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java
@@ -0,0 +1,145 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
+package me.lemire.longcompression;
+
+import java.util.Arrays;
+
+import org.junit.Test;
+
+import me.lemire.integercompression.IntWrapper;
+import me.lemire.integercompression.TestUtils;
+import me.lemire.integercompression.VariableByte;
+
+
+/**
+ * Just some basic sanity tests.
+ *
+ * @author Benoit Lacelle
+ */
+@SuppressWarnings({ "static-method" })
+public class SkippableLongBasicTest {
+ final SkippableLongCODEC[] codecs = {
+ new LongJustCopy(),
+ new LongVariableByte(), };
+
+
+ /**
+ *
+ */
+ @Test
+ public void consistentTest() {
+ int N = 4096;
+ long[] data = new long[N];
+ long[] rev = new long[N];
+ for (int k = 0; k < N; ++k)
+ data[k] = k % 128;
+ for (SkippableLongCODEC c : codecs) {
+ System.out.println("[SkippeableBasicTest.consistentTest] codec = "
+ + c);
+ long[] outBuf = new long[N + 1024];
+ for (int n = 0; n <= N; ++n) {
+ IntWrapper inPos = new IntWrapper();
+ IntWrapper outPos = new IntWrapper();
+ c.headlessCompress(data, inPos, n, outBuf, outPos);
+
+ IntWrapper inPoso = new IntWrapper();
+ IntWrapper outPoso = new IntWrapper();
+ c.headlessUncompress(outBuf, inPoso, outPos.get(), rev,
+ outPoso, n);
+ if (outPoso.get() != n) {
+ throw new RuntimeException("bug "+n);
+ }
+ if (inPoso.get() != outPos.get()) {
+ throw new RuntimeException("bug "+n+" "+inPoso.get()+" "+outPos.get());
+ }
+ for (int j = 0; j < n; ++j)
+ if (data[j] != rev[j]) {
+ throw new RuntimeException("bug");
+ }
+ }
+ }
+ }
+
+
+ /**
+ *
+ */
+ @Test
+ public void varyingLengthTest() {
+ int N = 4096;
+ long[] data = new long[N];
+ for (int k = 0; k < N; ++k)
+ data[k] = k;
+ for (SkippableLongCODEC c : codecs) {
+ System.out.println("[SkippeableBasicTest.varyingLengthTest] codec = "+c);
+ for (int L = 1; L <= 128; L++) {
+ long[] comp = LongTestUtils.compressHeadless(c, Arrays.copyOf(data, L));
+ long[] answer = LongTestUtils.uncompressHeadless(c, comp, L);
+ for (int k = 0; k < L; ++k)
+ if (answer[k] != data[k])
+ throw new RuntimeException("bug "+c.toString()+" "+k+" "+answer[k]+" "+data[k]);
+ }
+ for (int L = 128; L <= N; L *= 2) {
+ long[] comp = LongTestUtils.compressHeadless(c, Arrays.copyOf(data, L));
+ long[] answer = LongTestUtils.uncompressHeadless(c, comp, L);
+ for (int k = 0; k < L; ++k)
+ if (answer[k] != data[k])
+ throw new RuntimeException("bug");
+ }
+
+ }
+ }
+
+ /**
+ *
+ */
+ @Test
+ public void varyingLengthTest2() {
+ int N = 128;
+ long[] data = new long[N];
+ data[127] = -1;
+ for (SkippableLongCODEC c : codecs) {
+ System.out.println("[SkippeableBasicTest.varyingLengthTest2] codec = "+c);
+
+ try {
+ // CODEC Simple9 is limited to "small" integers.
+ if (c.getClass().equals(
+ Class.forName("me.lemire.integercompression.Simple9")))
+ continue;
+ } catch (ClassNotFoundException e) {
+ e.printStackTrace();
+ }
+ try {
+ // CODEC Simple16 is limited to "small" integers.
+ if (c.getClass().equals(
+ Class.forName("me.lemire.integercompression.Simple16")))
+ continue;
+ } catch (ClassNotFoundException e) {
+ e.printStackTrace();
+ }
+ for (int L = 1; L <= 128; L++) {
+ long[] comp = LongTestUtils.compressHeadless(c, Arrays.copyOf(data, L));
+ long[] answer = LongTestUtils.uncompressHeadless(c, comp, L);
+ for (int k = 0; k < L; ++k)
+ if (answer[k] != data[k]) {
+ throw new RuntimeException("L=" + L + ": bug at k = "+k+" "+answer[k]+" "+data[k]+" for "+c.toString());
+ }
+ }
+ for (int L = 128; L <= N; L *= 2) {
+ long[] comp = LongTestUtils.compressHeadless(c, Arrays.copyOf(data, L));
+ long[] answer = LongTestUtils.uncompressHeadless(c, comp, L);
+ for (int k = 0; k < L; ++k)
+ if (answer[k] != data[k])
+ throw new RuntimeException("bug");
+ }
+
+ }
+ }
+
+
+}
diff --git a/src/test/java/me/lemire/longcompression/TestLongAs2IntsCodec.java b/src/test/java/me/lemire/longcompression/TestLongAs2IntsCodec.java
new file mode 100644
index 0000000..00bb52a
--- /dev/null
+++ b/src/test/java/me/lemire/longcompression/TestLongAs2IntsCodec.java
@@ -0,0 +1,106 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
+package me.lemire.longcompression;
+
+import java.util.stream.LongStream;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Edge-cases having caused issue specifically with LongVariableByte.
+ *
+ * @author Benoit Lacelle
+ */
+public class TestLongAs2IntsCodec {
+ final LongAs2IntsCodec codec = new LongAs2IntsCodec();
+
+ private void checkConsistency(LongCODEC codec, long[] array) {
+ {
+ long[] compressed = LongTestUtils.compress(codec, array);
+ long[] uncompressed = LongTestUtils.uncompress(codec, compressed, array.length);
+
+ Assert.assertArrayEquals(array, uncompressed);
+ }
+
+ if (codec instanceof ByteLongCODEC) {
+ byte[] compressed = LongTestUtils.compress((ByteLongCODEC) codec, array);
+ long[] uncompressed = LongTestUtils.uncompress((ByteLongCODEC) codec, compressed, array.length);
+
+ Assert.assertArrayEquals(array, uncompressed);
+ }
+
+ if (codec instanceof SkippableLongCODEC) {
+ long[] compressed = LongTestUtils.compressHeadless((SkippableLongCODEC) codec, array);
+ long[] uncompressed =
+ LongTestUtils.uncompressHeadless((SkippableLongCODEC) codec, compressed, array.length);
+
+ Assert.assertArrayEquals(array, uncompressed);
+ }
+ }
+
+ @Test
+ public void testCodec_Zero() {
+ checkConsistency(codec, new long[] { 0 });
+ }
+
+ @Test
+ public void testCodec_Minus1() {
+ checkConsistency(codec, new long[] { -1 });
+ }
+
+ @Test
+ public void testCodec_ZeroTimes8Minus1() {
+ checkConsistency(codec, new long[] { 0, 0, 0, 0, 0, 0, 0, 0, -1 });
+ }
+
+ @Test
+ public void testCodec_ZeroTimes127Minus1() {
+ long[] array = LongStream.concat(LongStream.range(0, 127).map(l -> 0), LongStream.of(-1)).toArray();
+
+ checkConsistency(codec, array);
+ }
+
+ @Test
+ public void testCodec_ZeroTimes128Minus1() {
+ long[] array = LongStream.concat(LongStream.range(0, 128).map(l -> 0), LongStream.of(-1)).toArray();
+
+ checkConsistency(codec, array);
+ }
+
+ @Test
+ public void testCodec_MinValue() {
+ checkConsistency(codec, new long[] { Long.MIN_VALUE });
+ }
+
+ @Test
+ public void testCodec_ZeroMinValue() {
+ checkConsistency(codec, new long[] { 0, Long.MIN_VALUE });
+ }
+
+ @Test
+ public void testCodec_allPowerOfTwo() {
+ checkConsistency(codec, new long[] { 1L << 42 });
+ for (int i = 0; i < 64; i++) {
+ checkConsistency(codec, new long[] { 1L << i });
+ }
+ }
+
+ @Test
+ public void testCodec_ZeroThenAllPowerOfTwo() {
+ for (int i = 0; i < 64; i++) {
+ checkConsistency(codec, new long[] { 0, 1L << i });
+ }
+ }
+
+ @Test
+ public void testCodec_intermediateHighPowerOfTwo() {
+ Assert.assertEquals(3, LongTestUtils.compress((LongCODEC) codec, new long[] { 1L << 42 }).length);
+ }
+
+}
diff --git a/src/test/java/me/lemire/longcompression/TestLongVariableByte.java b/src/test/java/me/lemire/longcompression/TestLongVariableByte.java
new file mode 100644
index 0000000..15613f2
--- /dev/null
+++ b/src/test/java/me/lemire/longcompression/TestLongVariableByte.java
@@ -0,0 +1,103 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+
+package me.lemire.longcompression;
+
+import java.util.stream.LongStream;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Edge-cases having caused issue specifically with LongVariableByte.
+ *
+ * @author Benoit Lacelle
+ */
+public class TestLongVariableByte {
+ final LongVariableByte codec = new LongVariableByte();
+
+ private void checkConsistency(LongCODEC codec, long[] array) {
+ {
+ long[] compressed = LongTestUtils.compress(codec, array);
+ long[] uncompressed = LongTestUtils.uncompress(codec, compressed, array.length);
+
+ Assert.assertArrayEquals(array, uncompressed);
+ }
+
+ if (codec instanceof ByteLongCODEC) {
+ byte[] compressed = LongTestUtils.compress((ByteLongCODEC) codec, array);
+ long[] uncompressed = LongTestUtils.uncompress((ByteLongCODEC) codec, compressed, array.length);
+
+ Assert.assertArrayEquals(array, uncompressed);
+ }
+
+ if (codec instanceof SkippableLongCODEC) {
+ long[] compressed = LongTestUtils.compressHeadless((SkippableLongCODEC) codec, array);
+ long[] uncompressed =
+ LongTestUtils.uncompressHeadless((SkippableLongCODEC) codec, compressed, array.length);
+
+ Assert.assertArrayEquals(array, uncompressed);
+ }
+ }
+
+ @Test
+ public void testCodec_ZeroMinus1() {
+ checkConsistency(codec, new long[] { -1 });
+ }
+
+ @Test
+ public void testCodec_ZeroTimes8Minus1() {
+ checkConsistency(codec, new long[] { 0, 0, 0, 0, 0, 0, 0, 0, -1 });
+ }
+
+ @Test
+ public void testCodec_ZeroTimes127Minus1() {
+ long[] array = LongStream.concat(LongStream.range(0, 127).map(l -> 0), LongStream.of(-1)).toArray();
+
+ checkConsistency(codec, array);
+ }
+
+ @Test
+ public void testCodec_ZeroTimes128Minus1() {
+ long[] array = LongStream.concat(LongStream.range(0, 128).map(l -> 0), LongStream.of(-1)).toArray();
+
+ checkConsistency(codec, array);
+ }
+
+ @Test
+ public void testCodec_MinValue() {
+ checkConsistency(codec, new long[] { Long.MIN_VALUE });
+ }
+
+ @Test
+ public void testCodec_ZeroMinValue() {
+ checkConsistency(codec, new long[] { 0, Long.MIN_VALUE });
+ }
+
+ @Test
+ public void testCodec_allPowerOfTwo() {
+ checkConsistency(codec, new long[] { 1L << 42 });
+ for (int i = 0; i < 64; i++) {
+ checkConsistency(codec, new long[] { 1L << i });
+ }
+ }
+
+ @Test
+ public void testCodec_ZeroThenAllPowerOfTwo() {
+ for (int i = 0; i < 64; i++) {
+ checkConsistency(codec, new long[] { 0, 1L << i });
+ }
+ }
+
+ @Test
+ public void testCodec_intermediateHighPowerOfTwo() {
+ Assert.assertEquals(1, LongTestUtils.compress((LongCODEC) codec, new long[] { 1L << 42 }).length);
+ Assert.assertEquals(7, LongTestUtils.compress((ByteLongCODEC) codec, new long[] { 1L << 42 }).length);
+ Assert.assertEquals(1, LongTestUtils.compressHeadless((SkippableLongCODEC) codec, new long[] { 1L << 42 }).length);
+ }
+
+}
diff --git a/src/test/java/me/lemire/longcompression/synth/LongClusteredDataGenerator.java b/src/test/java/me/lemire/longcompression/synth/LongClusteredDataGenerator.java
new file mode 100644
index 0000000..5b90ee0
--- /dev/null
+++ b/src/test/java/me/lemire/longcompression/synth/LongClusteredDataGenerator.java
@@ -0,0 +1,91 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.longcompression.synth;
+
+import me.lemire.integercompression.synth.ClusteredDataGenerator;
+
+/**
+ * This class will generate lists of random longs based on the clustered
+ * model:
+ *
+ * Reference: Vo Ngoc Anh and Alistair Moffat. 2010. Index compression using
+ * 64-bit words. Softw. Pract. Exper.40, 2 (February 2010), 131-147.
+ *
+ * @author Benoit Lacelle
+ * @see ClusteredDataGenerator
+ */
+public class LongClusteredDataGenerator {
+
+ final LongUniformDataGenerator unidg = new LongUniformDataGenerator();
+
+ /**
+ * Creating random array generator.
+ */
+ public LongClusteredDataGenerator() {
+ }
+
+ void fillUniform(long[] array, int offset, int length, long Min, long Max) {
+ long[] v = this.unidg.generateUniform(length, Max - Min);
+ for (int k = 0; k < v.length; ++k)
+ array[k + offset] = Min + v[k];
+ }
+
+ void fillClustered(long[] array, int offset, int length, long Min, long Max) {
+ final long range = Max - Min;
+ if ((range == length) || (length <= 10)) {
+ fillUniform(array, offset, length, Min, Max);
+ return;
+ }
+ final long cut = length
+ / 2
+ + ((range - length - 1 > 0) ? (long)this.unidg.rand
+ .nextDouble() * (range - length - 1) : 0);
+ final double p = this.unidg.rand.nextDouble();
+ if (p < 0.25) {
+ fillUniform(array, offset, length / 2, Min, Min + cut);
+ fillClustered(array, offset + length / 2, length
+ - length / 2, Min + cut, Max);
+ } else if (p < 0.5) {
+ fillClustered(array, offset, length / 2, Min, Min + cut);
+ fillUniform(array, offset + length / 2, length - length
+ / 2, Min + cut, Max);
+ } else {
+ fillClustered(array, offset, length / 2, Min, Min + cut);
+ fillClustered(array, offset + length / 2, length
+ - length / 2, Min + cut, Max);
+ }
+ }
+
+ /**
+ * generates randomly N distinct integers from 0 to Max.
+ *
+ * @param N
+ * number of integers to generate
+ * @param Max
+ * maximal value of the integers
+ * @return array containing the integers
+ */
+ public long[] generateClustered(int N, long Max) {
+ long[] array = new long[N];
+ fillClustered(array, 0, N, 0, Max);
+ return array;
+ }
+
+ /**
+ * Little test program.
+ *
+ * @param args
+ * arguments are ignored
+ */
+ public static void main(final String[] args) {
+ long[] example = (new LongClusteredDataGenerator())
+ .generateClustered(20, 1000);
+ for (int k = 0; k < example.length; ++k)
+ System.out.println(example[k]);
+ }
+
+}
diff --git a/src/test/java/me/lemire/longcompression/synth/LongUniformDataGenerator.java b/src/test/java/me/lemire/longcompression/synth/LongUniformDataGenerator.java
new file mode 100644
index 0000000..4d435f2
--- /dev/null
+++ b/src/test/java/me/lemire/longcompression/synth/LongUniformDataGenerator.java
@@ -0,0 +1,125 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.longcompression.synth;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Random;
+import java.util.Set;
+
+import org.roaringbitmap.longlong.Roaring64Bitmap;
+
+import me.lemire.integercompression.synth.UniformDataGenerator;
+
+/**
+ * This class will generate "uniform" lists of random longs.
+ *
+ * @author Benoit Lacelle
+ * @see UniformDataGenerator
+ */
+public class LongUniformDataGenerator {
+ /**
+ * construct generator of random arrays.
+ */
+ public LongUniformDataGenerator() {
+ this.rand = new Random();
+ }
+
+ /**
+ * @param seed
+ * random seed
+ */
+ public LongUniformDataGenerator(final int seed) {
+ this.rand = new Random(seed);
+ }
+
+ /**
+ * generates randomly N distinct longs from 0 to Max.
+ */
+ long[] generateUniformHash(int N, long Max) {
+ if (N > Max)
+ throw new RuntimeException("not possible");
+ long[] ans = new long[N];
+ Set s = new HashSet<>();
+ while (s.size() < N)
+ s.add((long) (this.rand.nextDouble() * Max));
+ Iterator i = s.iterator();
+ for (int k = 0; k < N; ++k)
+ ans[k] = i.next().longValue();
+ Arrays.sort(ans);
+ return ans;
+ }
+
+ /**
+ * output all longs from the range [0,Max) that are not in the array
+ */
+ static long[] negate(long[] x, long Max) {
+ int newLength = saturatedCast(Max - x.length);
+ long[] ans = new long[newLength];
+ int i = 0;
+ int c = 0;
+ for (int j = 0; j < x.length; ++j) {
+ long v = x[j];
+ for (; i < v; ++i)
+ ans[c++] = i;
+ ++i;
+ }
+ while (c < ans.length)
+ ans[c++] = i++;
+ return ans;
+ }
+
+ private static int saturatedCast(long toInt) {
+ if (toInt > Integer.MAX_VALUE) {
+ return Integer.MAX_VALUE;
+ } else {
+ return (int) toInt;
+ }
+ }
+
+ /**
+ * generates randomly N distinct longs from 0 to Max.
+ *
+ * @param N
+ * number of longs to generate
+ * @param Max
+ * bound on the value of longs
+ * @return an array containing randomly selected longs
+ */
+ public long[] generateUniform(int N, long Max) {
+ assert N >= 0;
+ assert Max >= 0;
+ if (N * 2 > Max) {
+ return negate(generateUniform(saturatedCast(Max - N), Max), Max);
+ }
+ if (2048 * N > Max)
+ return generateUniformBitmap(N, Max);
+ return generateUniformHash(N, Max);
+ }
+
+ /**
+ * generates randomly N distinct longs from 0 to Max.
+ */
+ long[] generateUniformBitmap(int N, long Max) {
+ if (N > Max)
+ throw new RuntimeException("not possible");
+ Roaring64Bitmap bs = new Roaring64Bitmap();
+ int cardinality = 0;
+ while (cardinality < N) {
+ long v = (long) (rand.nextDouble() * Max);
+ if (!bs.contains(v)) {
+ bs.add(v);
+ cardinality++;
+ }
+ }
+ return bs.toArray();
+ }
+
+ Random rand = new Random();
+
+}
\ No newline at end of file