Skip to content

Commit

Permalink
added code cache for better performance on larger strings + calculate…
Browse files Browse the repository at this point in the history
… the first row in the initialization loop
  • Loading branch information
gustf committed Jun 1, 2017
1 parent f7f0cf8 commit 4d5890f
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 26 deletions.
24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@ levenshtein('kitten', 'sitting');

```
$ npm run bench
```
```
655,992 op/s » js-levenshtein
542,796 op/s » leven
497,966 op/s » talisman
386,839 op/s » levenshtein-edit-distance
254,941 op/s » fast-levenshtein
69,857 op/s » levenshtein-component
21,688 op/s » levdist
24,631 op/s » ld
21,834 op/s » natural
13,984 op/s » levenshtein
js-levenshtein
745,590 op/s » js-levenshtein/cached
667,378 op/s » js-levenshtein
533,400 op/s » leven
525,475 op/s » talisman
381,433 op/s » levenshtein-edit-distance
287,224 op/s » fast-levenshtein
80,848 op/s » levenshtein-component
24,762 op/s » levdist
27,333 op/s » ld
19,505 op/s » natural
13,413 op/s » levenshtein
```


Expand Down
5 changes: 5 additions & 0 deletions bench.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const levenshtein1 = require('levenshtein');
const talisman = require('talisman/metrics/distance/levenshtein');
const leven = require('leven');
const levenshtein = require('./');
const cached = require('./cached');

function run(fn)
{
Expand All @@ -30,6 +31,10 @@ function run(fn)

suite('js-levenshtein', function()
{
bench('js-levenshtein/cached', function()
{
run(cached);
});
bench('js-levenshtein', function()
{
run(levenshtein);
Expand Down
113 changes: 113 additions & 0 deletions cached.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
'use strict';
module.exports = (function()
{
var vector = [];

function _min(d0, d1, d2, bx, ay)
{
return d0 < d1 || d2 < d1
? d0 > d2
? d2 + 1
: d0 + 1
: bx === ay
? d1
: d1 + 1;
}

return function(a, b)
{
if (a === b) {
return 0;
}

if (a.length > b.length) {
var tmp = a;
a = b;
b = tmp;
}

var la = a.length;
var lb = b.length;

while (la > 0 && (a.charCodeAt(la - 1) === b.charCodeAt(lb - 1))) {
la--;
lb--;
}

var offset = 0;

while (offset < la && (a.charCodeAt(offset) === b.charCodeAt(offset))) {
offset++;
}

la -= offset;
lb -= offset;

if (la === 0 || lb === 1) {
return lb;
}

var x;
var y;
var d0;
var d1;
var d2;
var d3;
var dd;
var dy;
var ay;
var bx0;
var bx1;
var bx2;
var bx3;

if (vector.length < la << 1) {
vector = new Array(la << 1);
}

bx0 = b.charCodeAt(offset);
dd = 1;
for (y = 0; y < la; y++) {
vector[la + y] = a.charCodeAt(offset + y);
vector[y] = dd = dd < y ? dd + 1 : bx0 === vector[la + y] ? y : y + 1;
}

for (x = 1; (x + 3) < lb;) {
bx0 = b.charCodeAt(offset + (d0 = x));
bx1 = b.charCodeAt(offset + (d1 = x + 1));
bx2 = b.charCodeAt(offset + (d2 = x + 2));
bx3 = b.charCodeAt(offset + (d3 = x + 3));
dd = (x += 4);
for (y = 0; y < la;) {
ay = vector[la + y];
dy = vector[y];
d0 = _min(dy, d0, d1, bx0, ay);
d1 = _min(d0, d1, d2, bx1, ay);
d2 = _min(d1, d2, d3, bx2, ay);
dd = _min(d2, d3, dd, bx3, ay);
vector[y++] = dd;
d3 = d2;
d2 = d1;
d1 = d0;
d0 = dy;
}
}

for (; x < lb;) {
bx0 = b.charCodeAt(offset + (d0 = x));
dd = ++x;
for (y = 0; y < la; y++) {
dy = vector[y];
vector[y] = dd = dy < d0 || dd < d0
? (dy > dd ? dd + 1 : dy + 1)
: bx0 === vector[la + y]
? d0
: d0 + 1;
d0 = dy;
}
}

return dd;
};
})();

30 changes: 16 additions & 14 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
'use strict';
module.exports = (function()
{

function _min(d0, d1, d2, bx, ay)
{
return d0 < d1 || d2 < d1
Expand Down Expand Up @@ -42,12 +41,10 @@ module.exports = (function()
la -= offset;
lb -= offset;

if (la === 0) {
if (la === 0 || lb === 1) {
return lb;
}

var dist = new Array(la);

var x;
var y;
var d0;
Expand All @@ -62,24 +59,29 @@ module.exports = (function()
var bx2;
var bx3;

for (y = 0; y < la;) {
dist[y] = ++y;
var vector = new Array(la << 1);

bx0 = b.charCodeAt(offset);
dd = 1;
for (y = 0; y < la; y++) {
vector[la + y] = a.charCodeAt(offset + y);
vector[y] = dd = dd < y ? dd + 1 : bx0 === vector[la + y] ? y : y + 1;
}

for (x = 0; (x + 3) < lb;) {
for (x = 1; (x + 3) < lb;) {
bx0 = b.charCodeAt(offset + (d0 = x));
bx1 = b.charCodeAt(offset + (d1 = x + 1));
bx2 = b.charCodeAt(offset + (d2 = x + 2));
bx3 = b.charCodeAt(offset + (d3 = x + 3));
dd = (x += 4);
for (y = 0; y < la;) {
ay = a.charCodeAt(offset + y);
dy = dist[y];
ay = vector[la + y];
dy = vector[y];
d0 = _min(dy, d0, d1, bx0, ay);
d1 = _min(d0, d1, d2, bx1, ay);
d2 = _min(d1, d2, d3, bx2, ay);
dd = _min(d2, d3, dd, bx3, ay);
dist[y++] = dd;
vector[y++] = dd;
d3 = d2;
d2 = d1;
d1 = d0;
Expand All @@ -91,10 +93,10 @@ module.exports = (function()
bx0 = b.charCodeAt(offset + (d0 = x));
dd = ++x;
for (y = 0; y < la; y++) {
dy = dist[y];
dist[y] = dd = dy < d0 || dd < d0
? (dy > dd ? dd + 1 : dy + 1)
: bx0 === a.charCodeAt(offset + y)
dy = vector[y];
vector[y] = dd = dy < d0 || dd < d0
? dy > dd ? dd + 1 : dy + 1
: bx0 === vector[la + y]
? d0
: d0 + 1;
d0 = dy;
Expand Down
19 changes: 19 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import test from 'ava';
import levenshtein from './';
import cached from './cached';

test(t =>
{
Expand All @@ -19,3 +20,21 @@ test(t =>
t.is(levenshtein('因為我是中國人所以我會說中文', '因為我是英國人所以我會說英文'), 2);
});

test(t =>
{
t.is(cached('a', 'b'), 1);
t.is(cached('ab', 'ac'), 1);
t.is(cached('ac', 'bc'), 1);
t.is(cached('abc', 'axc'), 1);
t.is(cached('kitten', 'sitting'), 3);
t.is(cached('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6);
t.is(cached('cat', 'cow'), 2);
t.is(cached('xabxcdxxefxgx', 'abcdefg'), 6);
t.is(cached('javawasneat', 'scalaisgreat'), 7);
t.is(cached('example', 'samples'), 3);
t.is(cached('sturgeon', 'urgently'), 6);
t.is(cached('levenshtein', 'frankenstein'), 6);
t.is(cached('distance', 'difference'), 5);
t.is(cached('因為我是中國人所以我會說中文', '因為我是英國人所以我會說英文'), 2);
});

0 comments on commit 4d5890f

Please sign in to comment.