@@ -146,6 +146,7 @@ class LshTable
146
146
*/
147
147
LshTable ()
148
148
{
149
+ feature_size_ = 0 ;
149
150
}
150
151
151
152
/* * Default constructor
@@ -156,7 +157,7 @@ class LshTable
156
157
*/
157
158
LshTable (unsigned int feature_size, unsigned int key_size, std::vector<size_t > & indices)
158
159
{
159
- ( void ) feature_size;
160
+ feature_size_ = feature_size;
160
161
(void )key_size;
161
162
(void )indices;
162
163
std::cerr << " LSH is not implemented for that type" << std::endl;
@@ -335,6 +336,8 @@ class LshTable
335
336
*/
336
337
unsigned int key_size_;
337
338
339
+ unsigned int feature_size_;
340
+
338
341
// Members only used for the unsigned char specialization
339
342
/* * The mask to apply to a feature to get the hash key
340
343
* Only used in the unsigned char case
@@ -350,9 +353,10 @@ inline LshTable<unsigned char>::LshTable( unsigned int feature_size,
350
353
unsigned int subsignature_size,
351
354
std::vector<size_t > & indices )
352
355
{
356
+ feature_size_ = feature_size;
353
357
initialize (subsignature_size);
354
358
// Allocate the mask
355
- mask_ = std::vector<size_t >((size_t ) ceil (( float )( feature_size * sizeof (char )) / ( float ) sizeof (size_t ) ), 0 );
359
+ mask_ = std::vector<size_t >((feature_size * sizeof (char ) + sizeof ( size_t ) - 1 ) / sizeof (size_t ), 0 );
356
360
357
361
// Generate a random set of order of subsignature_size_ bits
358
362
for (unsigned int i = 0 ; i < key_size_; ++i) {
@@ -391,6 +395,7 @@ inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) cons
391
395
{
392
396
// no need to check if T is dividable by sizeof(size_t) like in the Hamming
393
397
// distance computation as we have a mask
398
+ // FIXIT: This is bad assumption, because we reading tail bytes after of the allocated features buffer
394
399
const size_t * feature_block_ptr = reinterpret_cast <const size_t *> ((const void *)feature);
395
400
396
401
// Figure out the subsignature of the feature
@@ -399,10 +404,20 @@ inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) cons
399
404
size_t subsignature = 0 ;
400
405
size_t bit_index = 1 ;
401
406
402
- for (std::vector< size_t >::const_iterator pmask_block = mask_. begin (); pmask_block != mask_. end (); ++pmask_block ) {
407
+ for (unsigned i = 0 ; i < feature_size_; i += sizeof ( size_t ) ) {
403
408
// get the mask and signature blocks
404
- size_t feature_block = *feature_block_ptr;
405
- size_t mask_block = *pmask_block;
409
+ size_t feature_block;
410
+ if (i <= feature_size_ - sizeof (size_t ))
411
+ {
412
+ feature_block = *feature_block_ptr;
413
+ }
414
+ else
415
+ {
416
+ size_t tmp = 0 ;
417
+ memcpy (&tmp, feature_block_ptr, feature_size_ - i); // preserve bytes order
418
+ feature_block = tmp;
419
+ }
420
+ size_t mask_block = mask_[i / sizeof (size_t )];
406
421
while (mask_block) {
407
422
// Get the lowest set bit in the mask block
408
423
size_t lowest_bit = mask_block & (-(ptrdiff_t )mask_block);
0 commit comments