Skip to content

Commit 0bef8b3

Browse files
committed
add Lempel-Ziv Factorization and subsequence automaton
1 parent e8fa285 commit 0bef8b3

6 files changed

+84
-35
lines changed

README.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,17 @@ Collections of some commonly used algorithms.
88
+ [x] manacher
99
+ [x] 字符串hash
1010
+ [x] EERTREE
11-
+ [ ] 后缀数组
12-
+ [x] 倍增
11+
+ [x] 后缀数组
12+
+ [x] Prefix Doubling
1313
+ [x] SA-IS
14-
+ [ ] DC3
15-
+ [ ] GSACA
1614
+ [x] 后缀树
1715
+ [x] 后缀自动机
18-
+ [ ] 子序列自动机
16+
+ [x] 子序列自动机
1917
+ [ ] AC自动机
2018
+ [x] Palindromic Factorization
2119
+ [x] Lyndon Word Factorization
2220
+ [ ] Square Factorization
21+
+ [x] Lempel-Ziv Factorization
2322
+ [ ] Repetitions
2423

2524
## Graph Algorithms

string-utility/SequenceAutomaton.cc

Lines changed: 0 additions & 28 deletions
This file was deleted.
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#include <cstring>
2+
#include <vector>
3+
#include <utility>
4+
5+
using PII = std::pair<int, int>;
6+
7+
/**
8+
* Implement this paper:
9+
* Simpler and Faster Lempel Ziv Factorization
10+
*
11+
* s: input string
12+
* sa: suffix array of s
13+
* rk: inverse suffix array
14+
* return a list of (len, prevOcc)
15+
**/
16+
std::vector<PII> LempelZiv(char s[], int sa[], int rk[]) {
17+
int n = strlen(s);
18+
std::vector<int> psv(n + 1), nsv(n + 1), stack;
19+
// computer psv and nsv from suffix array
20+
for (int i = 0; i < n; ++i) {
21+
int x = sa[i];
22+
while (!stack.empty() && sa[stack.back()] > x) {
23+
nsv[stack.back()] = x;
24+
stack.pop_back();
25+
}
26+
psv[i] = stack.empty() ? -1 : sa[stack.back()];
27+
stack.push_back(i);
28+
}
29+
while (!stack.empty()) {
30+
nsv[stack.back()] = -1;
31+
stack.pop_back();
32+
}
33+
// computer Lempel-Ziv Factorization
34+
std::vector<PII> lz;
35+
lz.push_back(PII(0, s[0]));
36+
for (int p = 1; p < n; ) {
37+
int lpf = 0, prev = psv[p];
38+
for (auto &j: {psv[p], nsv[p]}) if (~j) {
39+
int l = 0;
40+
while (p + l < n && s[j + l] == s[p + l]) ++l;
41+
if (l > lpf) lpf = l, prev = j;
42+
}
43+
if (lpf) lz.push_back(PII(lpf, prev));
44+
else lz.push_back(PII(1, s[p]));
45+
p += std::max(1, lpf);
46+
}
47+
return lz;
48+
}

string-utility/sa-is.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77
*
88
* time complexity: O(n)
99
*/
10+
#include <algorithm>
11+
1012
namespace SA {
13+
const static int N = 100000 + 10;
1114
int sa[N], rk[N], ht[N], s[N<<1], t[N<<1], p[N], cnt[N], cur[N];
1215
#define pushS(x) sa[cur[s[x]]--] = x
1316
#define pushL(x) sa[cur[s[x]]++] = x
14-
#define inducedSort(v) fill_n(sa, n, -1); fill_n(cnt, m, 0); \
17+
#define inducedSort(v) std::fill_n(sa, n, -1); std::fill_n(cnt, m, 0); \
1518
for (int i = 0; i < n; i++) cnt[s[i]]++; \
1619
for (int i = 1; i < m; i++) cnt[i] += cnt[i-1]; \
1720
for (int i = 0; i < m; i++) cur[i] = cnt[i]-1; \
@@ -39,7 +42,7 @@ void sais(int n, int m, int *s, int *t, int *p) {
3942
template<typename T>
4043
int mapCharToInt(int n, const T *str) {
4144
int m = *max_element(str, str+n);
42-
fill_n(rk, m+1, 0);
45+
std::fill_n(rk, m+1, 0);
4346
for (int i = 0; i < n; i++) rk[str[i]] = 1;
4447
for (int i = 0; i < m; i++) rk[i+1] += rk[i];
4548
for (int i = 0; i < n; i++) s[i] = rk[str[i]] - 1;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/**
2+
* Directed acyclic subsequence graph
3+
* - recognize all the subsequences of a string.
4+
*
5+
* time complexity: O(n * \sigma)
6+
**/
7+
#include <cstring>
8+
#include <algorithm>
9+
10+
struct DASG {
11+
static const int SIZE = 100000 + 10, SIGMA = 26;
12+
struct Node{
13+
Node *ch[SIGMA];
14+
Node() {memset(ch, 0, sizeof(ch));}
15+
} pool[SIZE], *sz, *rt, *last[SIGMA];
16+
void init() {
17+
sz = pool; rt = sz++;
18+
std::fill(last, last + SIGMA, rt);
19+
}
20+
void add(int x) {
21+
Node *np = sz++;
22+
for (Node *p = last[x]; p < np; ++p) {
23+
if (!p->ch[x]) p->ch[x] = np;
24+
}
25+
last[x] = np;
26+
}
27+
};
File renamed without changes.

0 commit comments

Comments
 (0)