|
| 1 | +#include <cstdio> |
| 2 | +#include <vector> |
| 3 | +#include <cassert> |
| 4 | +#include <functional> |
| 5 | +#include <algorithm> |
| 6 | + |
| 7 | +// given first m items init[0..m-1] and coefficents trans[0..m-1] or |
| 8 | +// given first 2 *m items init[0..2m-1], it will compute trans[0..m-1] |
| 9 | +// for you. trans[0..m] should be given as that |
| 10 | +// init[m] = sum_{i=0}^{m-1} init[i] * trans[i] |
| 11 | +struct LinearRecurrence { |
| 12 | + using int64 = long long; |
| 13 | + using vec = std::vector<int64>; |
| 14 | + |
| 15 | + static void extand(vec &a, size_t d, int64 value = 0) { |
| 16 | + if (d <= a.size()) return; |
| 17 | + a.resize(d, value); |
| 18 | + } |
| 19 | + static vec BerlekampMassey(const vec &s, int64 mod) { |
| 20 | + std::function<int64(int64)> inverse = [&](int64 a) { |
| 21 | + return a == 1 ? 1 : (int64)(mod - mod / a) * inverse(mod % a) % mod; |
| 22 | + }; |
| 23 | + vec A = {1}, B = {1}; |
| 24 | + int64 b = s[0]; |
| 25 | + for (size_t i = 1, m = 1; i < s.size(); ++i, m++) { |
| 26 | + int64 d = 0; |
| 27 | + for (size_t j = 0; j < A.size(); ++j) { |
| 28 | + d += A[j] * s[i - j] % mod; |
| 29 | + } |
| 30 | + if (!(d %= mod)) continue; |
| 31 | + if (2 * (A.size() - 1) <= i) { |
| 32 | + auto temp = A; |
| 33 | + extand(A, B.size() + m); |
| 34 | + int64 coef = d * inverse(b) % mod; |
| 35 | + for (size_t j = 0; j < B.size(); ++j) { |
| 36 | + A[j + m] -= coef * B[j] % mod; |
| 37 | + if (A[j + m] < 0) A[j + m] += mod; |
| 38 | + } |
| 39 | + B = temp, b = d, m = 0; |
| 40 | + } else { |
| 41 | + extand(A, B.size() + m); |
| 42 | + int64 coef = d * inverse(b) % mod; |
| 43 | + for (size_t j = 0; j < B.size(); ++j) { |
| 44 | + A[j + m] -= coef * B[j] % mod; |
| 45 | + if (A[j + m] < 0) A[j + m] += mod; |
| 46 | + } |
| 47 | + } |
| 48 | + } |
| 49 | + return A; |
| 50 | + } |
| 51 | + static void exgcd(int64 a, int64 b, int64 &g, int64 &x, int64 &y) { |
| 52 | + if (!b) x = 1, y = 0, g = a; |
| 53 | + else { |
| 54 | + exgcd(b, a % b, g, y, x); |
| 55 | + y -= x * (a / b); |
| 56 | + } |
| 57 | + } |
| 58 | + static int64 crt(const vec &c, const vec &m) { |
| 59 | + int n = c.size(); |
| 60 | + int64 M = 1, ans = 0; |
| 61 | + for (int i = 0; i < n; ++i) M *= m[i]; |
| 62 | + for (int i = 0; i < n; ++i) { |
| 63 | + int64 x, y, g, tm = M / m[i]; |
| 64 | + exgcd(tm, m[i], g, x, y); |
| 65 | + ans = (ans + tm * x * c[i] % M) % M; |
| 66 | + } |
| 67 | + return (ans + M) % M; |
| 68 | + } |
| 69 | + static vec ReedsSloane(const vec &s, int64 mod) { |
| 70 | + auto inverse = [] (int64 a, int64 m) { |
| 71 | + int64 d, x, y; |
| 72 | + exgcd(a, m, d, x, y); |
| 73 | + return d == 1 ? (x % m + m) % m : -1; |
| 74 | + }; |
| 75 | + auto L = [] (const vec &a, const vec &b) { |
| 76 | + int da = (a.size() > 1 || (a.size() == 1 && a[0])) ? a.size() - 1 : -1000; |
| 77 | + int db = (b.size() > 1 || (b.size() == 1 && b[0])) ? b.size() - 1 : -1000; |
| 78 | + return std::max(da, db + 1); |
| 79 | + }; |
| 80 | + auto prime_power = [&] (const vec &s, int64 mod, int64 p, int64 e) { |
| 81 | + // linear feedback shift register mod p^e, p is prime |
| 82 | + std::vector<vec> a(e), b(e), an(e), bn(e), ao(e), bo(e); |
| 83 | + vec t(e), u(e), r(e), to(e, 1), uo(e), pw(e + 1);; |
| 84 | + pw[0] = 1; |
| 85 | + for (int i = pw[0] = 1; i <= e; ++i) pw[i] = pw[i - 1] * p; |
| 86 | + for (int64 i = 0; i < e; ++i) { |
| 87 | + a[i] = {pw[i]}, an[i] = {pw[i]}; |
| 88 | + b[i] = {0}, bn[i] = {s[0] * pw[i] % mod}; |
| 89 | + t[i] = s[0] * pw[i] % mod; |
| 90 | + if (t[i] == 0) { |
| 91 | + t[i] = 1, u[i] = e; |
| 92 | + } else { |
| 93 | + for (u[i] = 0; t[i] % p == 0; t[i] /= p, ++u[i]); |
| 94 | + } |
| 95 | + } |
| 96 | + for (size_t k = 1; k < s.size(); ++k) { |
| 97 | + for (int g = 0; g < e; ++g) { |
| 98 | + if (L(an[g], bn[g]) > L(a[g], b[g])) { |
| 99 | + ao[g] = a[e - 1 - u[g]]; |
| 100 | + bo[g] = b[e - 1 - u[g]]; |
| 101 | + to[g] = t[e - 1 - u[g]]; |
| 102 | + uo[g] = u[e - 1 - u[g]]; |
| 103 | + r[g] = k - 1; |
| 104 | + } |
| 105 | + } |
| 106 | + a = an, b = bn; |
| 107 | + for (int o = 0; o < e; ++o) { |
| 108 | + int64 d = 0; |
| 109 | + for (size_t i = 0; i < a[o].size() && i <= k; ++i) { |
| 110 | + d = (d + a[o][i] * s[k - i]) % mod; |
| 111 | + } |
| 112 | + if (d == 0) { |
| 113 | + t[o] = 1, u[o] = e; |
| 114 | + } else { |
| 115 | + for (u[o] = 0, t[o] = d; t[o] % p == 0; t[o] /= p, ++u[o]); |
| 116 | + int g = e - 1 - u[o]; |
| 117 | + if (L(a[g], b[g]) == 0) { |
| 118 | + extand(bn[o], k + 1); |
| 119 | + bn[o][k] = (bn[o][k] + d) % mod; |
| 120 | + } else { |
| 121 | + int64 coef = t[o] * inverse(to[g], mod) % mod * pw[u[o] - uo[g]] % mod; |
| 122 | + int m = k - r[g]; |
| 123 | + extand(an[o], ao[g].size() + m); |
| 124 | + extand(bn[o], bo[g].size() + m); |
| 125 | + for (size_t i = 0; i < ao[g].size(); ++i) { |
| 126 | + an[o][i + m] -= coef * ao[g][i] % mod; |
| 127 | + if (an[o][i + m] < 0) an[o][i + m] += mod; |
| 128 | + } |
| 129 | + while (an[o].size() && an[o].back() == 0) an[o].pop_back(); |
| 130 | + for (size_t i = 0; i < bo[g].size(); ++i) { |
| 131 | + bn[o][i + m] -= coef * bo[g][i] % mod; |
| 132 | + if (bn[o][i + m] < 0) bn[o][i + m] -= mod; |
| 133 | + } |
| 134 | + while (bn[o].size() && bn[o].back() == 0) bn[o].pop_back(); |
| 135 | + } |
| 136 | + } |
| 137 | + } |
| 138 | + } |
| 139 | + return std::make_pair(an[0], bn[0]); |
| 140 | + }; |
| 141 | + |
| 142 | + std::vector<std::tuple<int64, int64, int>> fac; |
| 143 | + for (int64 i = 2; i * i <= mod; ++i) if (mod % i == 0) { |
| 144 | + int64 cnt = 0, pw = 1; |
| 145 | + while (mod % i == 0) mod /= i, ++cnt, pw *= i; |
| 146 | + fac.emplace_back(pw, i, cnt); |
| 147 | + } |
| 148 | + if (mod > 1) fac.emplace_back(mod, mod, 1); |
| 149 | + std::vector<vec> as; |
| 150 | + size_t n = 0; |
| 151 | + for (auto &&x: fac) { |
| 152 | + int64 mod, p, e; |
| 153 | + vec a, b; |
| 154 | + std::tie(mod, p, e) = x; |
| 155 | + auto ss = s; |
| 156 | + for (auto &&x: ss) x %= mod; |
| 157 | + std::tie(a, b) = prime_power(ss, mod, p, e); |
| 158 | + as.emplace_back(a); |
| 159 | + n = std::max(n, a.size()); |
| 160 | + } |
| 161 | + vec a(n), c(as.size()), m(as.size()); |
| 162 | + for (size_t i = 0; i < n; ++i) { |
| 163 | + for (size_t j = 0; j < as.size(); ++j) { |
| 164 | + m[j] = std::get<0>(fac[j]); |
| 165 | + c[j] = i < as[j].size() ? as[j][i] : 0; |
| 166 | + } |
| 167 | + a[i] = crt(c, m); |
| 168 | + } |
| 169 | + return a; |
| 170 | + } |
| 171 | + |
| 172 | + LinearRecurrence(const vec &s, const vec &c, int64 mod): |
| 173 | + init(s), trans(c), mod(mod), m(s.size()) {} |
| 174 | + LinearRecurrence(const vec &s, int64 mod, bool is_prime = true): mod(mod) { |
| 175 | + vec A; |
| 176 | + if (is_prime) A = BerlekampMassey(s, mod); |
| 177 | + else A = ReedsSloane(s, mod); |
| 178 | + if (A.empty()) A = {0}; |
| 179 | + m = A.size() - 1; |
| 180 | + trans.resize(m); |
| 181 | + for (int i = 0; i < m; ++i) { |
| 182 | + trans[i] = (mod - A[i + 1]) % mod; |
| 183 | + } |
| 184 | + std::reverse(trans.begin(), trans.end()); |
| 185 | + for (auto &&x: trans) printf("%lld ", x); |
| 186 | + puts(""); |
| 187 | + init = {s.begin(), s.begin() + m}; |
| 188 | + } |
| 189 | + int64 calc(int64 n) { |
| 190 | + if (mod == 1) return 0; |
| 191 | + if (n < m) return init[n]; |
| 192 | + vec v(m), u(m << 1); |
| 193 | + int msk = !!n; |
| 194 | + for (int64 m = n; m > 1; m >>= 1) msk <<= 1; |
| 195 | + v[0] = 1 % mod; |
| 196 | + for (int x = 0; msk; msk >>= 1, x <<= 1) { |
| 197 | + std::fill_n(u.begin(), m * 2, 0); |
| 198 | + x |= !!(n & msk); |
| 199 | + if (x < m) u[x] = 1 % mod; |
| 200 | + else {// can be optimized by fft/ntt |
| 201 | + for (int i = 0; i < m; ++i) { |
| 202 | + for (int j = 0, t = i + (x & 1); j < m; ++j, ++t) { |
| 203 | + u[t] = (u[t] + v[i] * v[j]) % mod; |
| 204 | + } |
| 205 | + } |
| 206 | + for (int i = m * 2 - 1; i >= m; --i) { |
| 207 | + for (int j = 0, t = i - m; j < m; ++j, ++t) { |
| 208 | + u[t] = (u[t] + trans[j] * u[i]) % mod; |
| 209 | + } |
| 210 | + } |
| 211 | + } |
| 212 | + v = {u.begin(), u.begin() + m}; |
| 213 | + } |
| 214 | + int64 ret = 0; |
| 215 | + for (int i = 0; i < m; ++i) { |
| 216 | + ret = (ret + v[i] * init[i]) % mod; |
| 217 | + } |
| 218 | + return ret; |
| 219 | + } |
| 220 | + |
| 221 | + vec init, trans; |
| 222 | + int64 mod; |
| 223 | + int m; |
| 224 | +}; |
0 commit comments