2
2
3
3
import java .util .ArrayList ;
4
4
import java .util .HashMap ;
5
+ import java .util .HashSet ;
5
6
import java .util .List ;
6
7
import java .util .Map ;
8
+ import java .util .Set ;
7
9
8
10
public class _187 {
9
11
public static class Solution1 {
@@ -22,4 +24,46 @@ public List<String> findRepeatedDnaSequences(String s) {
22
24
return repeatedSequences ;
23
25
}
24
26
}
27
+
28
+ public static class Solution2 {
29
+ /**
30
+ * Use Rolling Hash/Rabin-Karp algorithm to significantly speed up the search.
31
+ *
32
+ * Rolling Hash/Rabin-Karp algorithm:
33
+ * Instead of comparing the entire string to the other, we compare only the hash after adding the incoming character
34
+ * and removing the outgoing character, this could be done in constant time.
35
+ * Back to this problem, since there are only 4 characters, we only need 2 bits to represent each character:
36
+ * 00 -> A
37
+ * 01 -> C
38
+ * 10 -> G
39
+ * 11 -> T
40
+ * so for a DNA sequence that is 10 character long, a total of 10 * 2 = 20 bits is good enough, this is much smaller than
41
+ * an Integer (32-bit) in most modern programming languages, so using one integer could well represent one DNA sequence.
42
+ * Thus we could do bit manipulation to implement the removal of the outgoing character and the addition of the incoming character.
43
+ *
44
+ * <<= 2 will shift the integer to the left, i.e. removing the outgoing character;
45
+ * |= val will add the incoming character.
46
+ */
47
+ public List <String > findRepeatedDnaSequences (String s ) {
48
+ Set <Integer > seen1stTime = new HashSet <>();
49
+ Set <Integer > seen2ndTime = new HashSet <>();
50
+ List <String > ans = new ArrayList <>();
51
+ char [] map = new char [26 ];
52
+ map ['A' - 'A' ] = 0 ;
53
+ map ['C' - 'A' ] = 1 ;
54
+ map ['G' - 'A' ] = 2 ;
55
+ map ['T' - 'A' ] = 3 ;
56
+ for (int i = 0 ; i < s .length () - 9 ; i ++) {
57
+ int hash = 0 ;
58
+ for (int j = i ; j < i + 10 ; j ++) {
59
+ hash <<= 2 ;
60
+ hash |= map [s .charAt (j ) - 'A' ];
61
+ }
62
+ if (!seen1stTime .add (hash ) && seen2ndTime .add (hash )) {
63
+ ans .add (s .substring (i , i + 10 ));
64
+ }
65
+ }
66
+ return ans ;
67
+ }
68
+ }
25
69
}
0 commit comments