|
| 1 | +<!--?title Rabin-Karp Algorithm--> |
| 2 | + |
| 3 | +# Rabin-Karp Algorithm for string matching in O(|S| + |T|) |
| 4 | + |
| 5 | + |
| 6 | +This algorithm is based on the concept of hashing, so if you are not familiar with string hashing, kindly refer to the [String Hashing](./string/string-hashing.html) article. |
| 7 | + |
| 8 | + |
| 9 | +This algorithm was authored by Rabin and Karp in 1987. |
| 10 | + |
| 11 | +Problem: Given two strings - a pattern $S$ and a text $T$, determine if the pattern appears in the text and if it does, enumerate all its |
| 12 | +occurences in $O(|S| + |T|)$ time. |
| 13 | + |
| 14 | +Algorithm: Calculate the hash for the pattern $S$. Calculate hash values for all the prefixes of the text $T$. Now, we can compare a substring of length $|S|$ with $S$ in constant time using the calculated hashes. So, compare each substring of length $|S|$ with the pattern. This will take a total of $O(|T|)$ time. Hence the final complexity of the algorithm is $O(|T| + |S|)$ where $O(|S|)$ is required for calculating the hash of the pattern and $O(|T|)$ for comparing each substring of length $|S|$ with the pattern. |
| 15 | + |
| 16 | + |
| 17 | +## Implementation |
| 18 | + |
| 19 | + string s, t; // input |
| 20 | + |
| 21 | + // calculate all powers of p |
| 22 | + const int p = 31; |
| 23 | + vector<unsigned long long> p_pow(max(s.length(), t.length())); |
| 24 | + p_pow[0] = 1; |
| 25 | + for (size_t i = 1; i < p_pow.size(); ++i) |
| 26 | + p_pow[i] = p_pow[i-1] * p; |
| 27 | + |
| 28 | + // calculate hashes of all prefixes of text T |
| 29 | + vector<unsigned long long> h(t.length()); |
| 30 | + for (size_t i = 0; i < t.length(); i++) |
| 31 | + { |
| 32 | + h[i] = (t[i] - 'a' + 1) * p_pow[i]; |
| 33 | + if (i) h[i] + = h[i - 1]; |
| 34 | + } |
| 35 | + |
| 36 | + // calculate the hash of the pattern S |
| 37 | + unsigned long long h_s = 0; |
| 38 | + for (size_t i = 0; i < s.length(); i++) |
| 39 | + h_s += (s[i] - 'a' + 1) * p_pow[i]; |
| 40 | + |
| 41 | + // iterate over all substrings of T having length |S| and compare them |
| 42 | + // with S |
| 43 | + for (size_t i = 0; i + s.length() - 1 < t.length(); i++) |
| 44 | + { |
| 45 | + unsigned long long cur_h = h[i + s.length () - 1]; |
| 46 | + if (i) cur_h -= h [i - 1]; |
| 47 | + |
| 48 | + // get the hashes multiplied to the same degree of p and compare them |
| 49 | + if (cur_h == H_s * p_pow[i]) |
| 50 | + cout << i << ''; |
| 51 | + } |
| 52 | + |
| 53 | +## Practice Problems |
| 54 | + |
| 55 | +* [Pattern Find - SPOJ](http://www.spoj.com/problems/NAJPF/) |
| 56 | + |
0 commit comments