|
| 1 | +#include "../data-structure/RangeMinimumQuery.cc" |
| 2 | +#include <cstring> |
| 3 | +#include <vector> |
| 4 | +#include <algorithm> |
| 5 | + |
| 6 | +// Two Efficient Algorithms for Linear Suffix Array Construction |
| 7 | +#define pushS(x) sa[--cur[(int)s[x]]] = x |
| 8 | +#define pushL(x) sa[cur[(int)s[x]]++] = x |
| 9 | +class SuffixArray { |
| 10 | +public: |
| 11 | + std::vector<int> sa; |
| 12 | + std::vector<int> rank; |
| 13 | + std::vector<int> lcp; |
| 14 | + SchieberVishkinRMQ<int> lcpRMQ; |
| 15 | + |
| 16 | + template <class T> void build(const T *s, int n); |
| 17 | + template <class T> void build(const T *s, int n, int m); |
| 18 | + |
| 19 | + int size() const { |
| 20 | + return sa.size() - 1; |
| 21 | + } |
| 22 | + int computeLCP(int i, int j) const { |
| 23 | + if (i == j) return size() - i; |
| 24 | + int x = rank[i], y = rank[j]; |
| 25 | + if (x > y) std::swap(x, y); |
| 26 | + return lcp[lcpRMQ.query(x + 1, y)]; |
| 27 | + } |
| 28 | + |
| 29 | +private: |
| 30 | + using SLTypes = std::vector<bool>; |
| 31 | + int *buffer, *freq, *cur; |
| 32 | + int len; |
| 33 | + |
| 34 | + void buildRankTable(); |
| 35 | + void buildLCPArrayRMQ(); |
| 36 | + template <class T> void computeLCPArray(const T *s); |
| 37 | + template <class T> void countFrequence(const T *s, int n, int m); |
| 38 | + template <class T> void induce(const T *s, int *sa, int m, const SLTypes &t); |
| 39 | + template <class T> void sa_is(const T *s, int *sa, int n, int m); |
| 40 | +}; |
| 41 | + |
| 42 | +template <class T> |
| 43 | +void SuffixArray::build(const T *s, int n) { |
| 44 | + this->len = n; |
| 45 | + int m = *std::max_element(s, s + n) + 1; |
| 46 | + build(s, n, m); |
| 47 | + buildRankTable(); |
| 48 | + computeLCPArray(s); |
| 49 | + buildLCPArrayRMQ(); |
| 50 | +} |
| 51 | + |
| 52 | +template <class T> |
| 53 | +void SuffixArray::build(const T *s, int n, int m) { |
| 54 | + sa.resize(n + 1); |
| 55 | + if (n == 0) sa[0] = 0; |
| 56 | + else { |
| 57 | + // memory usage: sa + buffer + types |
| 58 | + // = 4 * (n + max(m * 2, n)) + 2 * n / 8 + O(1) bytes |
| 59 | + std::vector<int> buffer((std::max(m, (n + 1) / 2) + 1) * 2); |
| 60 | + this->buffer = &buffer[0]; |
| 61 | + sa_is<T>(s, &sa[0], n + 1, m); |
| 62 | + } |
| 63 | +} |
| 64 | + |
| 65 | +void SuffixArray::buildRankTable() { |
| 66 | + int n = size() + 1; |
| 67 | + rank.resize(n); |
| 68 | + for (int i = 0; i < n; ++i) rank[sa[i]] = i; |
| 69 | +} |
| 70 | + |
| 71 | +void SuffixArray::buildLCPArrayRMQ() { |
| 72 | + lcpRMQ.build(&lcp[0], size() + 1); |
| 73 | +} |
| 74 | + |
| 75 | +template <class T> |
| 76 | +void SuffixArray::computeLCPArray(const T *s) { |
| 77 | + const int n = size() + 1; |
| 78 | + lcp.resize(n); |
| 79 | + for (int i = 0, h = lcp[0] = 0; i < n; ++i) { |
| 80 | + int j = sa[rank[i] - 1]; |
| 81 | + while (i + h < n && j + h < n && s[i + h] == s[j + h]) ++h; |
| 82 | + if (lcp[rank[i]] = h) --h; |
| 83 | + } |
| 84 | +} |
| 85 | + |
| 86 | +template <class T> |
| 87 | +void SuffixArray::countFrequence(const T *s, int n, int m) { |
| 88 | + memset(freq, 0, sizeof(int) * m); |
| 89 | + for (int i = 0; i < n; ++i) ++freq[(int)s[i]]; |
| 90 | + for (int i = 1; i < m; ++i) freq[i] += freq[i - 1]; |
| 91 | + memcpy(cur, freq, sizeof(int) * m); |
| 92 | +} |
| 93 | + |
| 94 | +template <class T> |
| 95 | +void SuffixArray::induce(const T *s, int *sa, int m, const SLTypes &t) { |
| 96 | + const int n = t.size(); |
| 97 | + memcpy(cur + 1, freq, sizeof(int) * (m - 1)); |
| 98 | + for (int i = 0; i < n; ++i) { |
| 99 | + int p = sa[i] - 1; |
| 100 | + if (p >= 0 && t[p]) pushL(p); |
| 101 | + } |
| 102 | + memcpy(cur, freq, sizeof(int) * m); |
| 103 | + for (int i = n - 1; i > 0; --i) { |
| 104 | + int p = sa[i] - 1; |
| 105 | + if (p >= 0 && !t[p]) pushS(p); |
| 106 | + } |
| 107 | +} |
| 108 | + |
| 109 | +template <class T> |
| 110 | +void SuffixArray::sa_is(const T *s, int *sa, int n, int m) { |
| 111 | + SLTypes t(n); memset(sa, 0, sizeof(int) * n); |
| 112 | + for (int i = n - 2; ~i; --i) { |
| 113 | + t[i] = (s[i] == s[i + 1]) ? t[i + 1] : s[i] > s[i + 1]; |
| 114 | + } |
| 115 | + freq = buffer, cur = buffer + m; |
| 116 | + countFrequence(s, n, m); |
| 117 | + for (int i = 1; i < n; ++i) if (t[i - 1] > t[i]) pushS(i); |
| 118 | + induce(s, sa, m, t); |
| 119 | + int n1 = 0, order = 0; |
| 120 | + for (int i = 0, p; i < n; ++i) { |
| 121 | + if ((p = sa[i]) && t[p - 1] > t[p]) sa[n1++] = p; |
| 122 | + } |
| 123 | + int *s1 = sa + n1; |
| 124 | + memset(s1, -1, sizeof(int) * (n - n1)); |
| 125 | + s1[(sa[0] - 1) / 2] = order++; |
| 126 | + for (int i = 1; i < n1; ++i) { |
| 127 | + bool diff = true; |
| 128 | + for (int x = sa[i - 1], y = sa[i]; ; ++x, ++y) { |
| 129 | + if (s[x] != s[y] || t[x] != t[y]) break; |
| 130 | + else if (t[x] > t[x + 1] && t[y] > t[y + 1]) { |
| 131 | + diff = (s[x + 1] != s[y + 1]); break; |
| 132 | + } |
| 133 | + } |
| 134 | + s1[(sa[i] - 1) / 2] = (order += diff) - 1; |
| 135 | + } |
| 136 | + for (int i = 0, x = 0; i < n - n1; ++i) { |
| 137 | + if (~s1[i]) s1[x++] = s1[i]; |
| 138 | + } |
| 139 | + if (order != n1) { |
| 140 | + sa_is<int>(s1, sa, n1, order); |
| 141 | + for (int i = 0; i < n1; ++i) s1[sa[i]] = i; |
| 142 | + } |
| 143 | + for (int i = 1, j = 0; i < n; ++i) { |
| 144 | + if (t[i - 1] > t[i]) sa[s1[j++]] = -i; |
| 145 | + } |
| 146 | + memset(s1, 0, sizeof(int) * (n - n1)); |
| 147 | + freq = buffer, cur = buffer + m; |
| 148 | + countFrequence(s, n, m); |
| 149 | + for (int i = n1 - 1; ~i; --i) pushS(-sa[i]); |
| 150 | + induce(s, sa, m, t); |
| 151 | +} |
0 commit comments