Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit a3b9465

Browse files
committedOct 15, 2021
add a solution for 187
1 parent 1d5b363 commit a3b9465

File tree

2 files changed

+70
-17
lines changed

2 files changed

+70
-17
lines changed
 

‎src/main/java/com/fishercoder/solutions/_187.java

+44
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
import java.util.ArrayList;
44
import java.util.HashMap;
5+
import java.util.HashSet;
56
import java.util.List;
67
import java.util.Map;
8+
import java.util.Set;
79

810
public class _187 {
911
public static class Solution1 {
@@ -22,4 +24,46 @@ public List<String> findRepeatedDnaSequences(String s) {
2224
return repeatedSequences;
2325
}
2426
}
27+
28+
public static class Solution2 {
29+
/**
30+
* Use Rolling Hash/Rabin-Karp algorithm to significantly speed up the search.
31+
*
32+
* Rolling Hash/Rabin-Karp algorithm:
33+
* Instead of comparing the entire string to the other, we compare only the hash after adding the incoming character
34+
* and removing the outgoing character, this could be done in constant time.
35+
* Back to this problem, since there are only 4 characters, we only need 2 bits to represent each character:
36+
* 00 -> A
37+
* 01 -> C
38+
* 10 -> G
39+
* 11 -> T
40+
* so for a DNA sequence that is 10 character long, a total of 10 * 2 = 20 bits is good enough, this is much smaller than
41+
* an Integer (32-bit) in most modern programming languages, so using one integer could well represent one DNA sequence.
42+
* Thus we could do bit manipulation to implement the removal of the outgoing character and the addition of the incoming character.
43+
*
44+
* <<= 2 will shift the integer to the left, i.e. removing the outgoing character;
45+
* |= val will add the incoming character.
46+
*/
47+
public List<String> findRepeatedDnaSequences(String s) {
48+
Set<Integer> seen1stTime = new HashSet<>();
49+
Set<Integer> seen2ndTime = new HashSet<>();
50+
List<String> ans = new ArrayList<>();
51+
char[] map = new char[26];
52+
map['A' - 'A'] = 0;
53+
map['C' - 'A'] = 1;
54+
map['G' - 'A'] = 2;
55+
map['T' - 'A'] = 3;
56+
for (int i = 0; i < s.length() - 9; i++) {
57+
int hash = 0;
58+
for (int j = i; j < i + 10; j++) {
59+
hash <<= 2;
60+
hash |= map[s.charAt(j) - 'A'];
61+
}
62+
if (!seen1stTime.add(hash) && seen2ndTime.add(hash)) {
63+
ans.add(s.substring(i, i + 10));
64+
}
65+
}
66+
return ans;
67+
}
68+
}
2569
}

‎src/test/java/com/fishercoder/_187Test.java

+26-17
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,32 @@
1111
import static org.junit.Assert.assertEquals;
1212

1313
public class _187Test {
14-
private static _187.Solution1 solution1;
15-
private static String s;
16-
private static List<String> expected;
17-
private static List<String> actual;
14+
private static _187.Solution1 solution1;
15+
private static _187.Solution2 solution2;
16+
private static String s;
17+
private static List<String> expected;
1818

19-
@BeforeClass
20-
public static void setup() {
21-
solution1 = new _187.Solution1();
22-
}
19+
@BeforeClass
20+
public static void setup() {
21+
solution1 = new _187.Solution1();
22+
solution2 = new _187.Solution2();
23+
}
2324

24-
@Test
25-
public void test1() {
26-
s = "AAAAAAAAAAA";
27-
System.out.println(s.length());
28-
actual = solution1.findRepeatedDnaSequences(s);
29-
expected = new ArrayList<>(Arrays.asList("AAAAAAAAAA"));
30-
System.out.println(expected.get(0).length());
31-
assertEquals(expected, actual);
32-
}
25+
@Test
26+
public void test1() {
27+
s = "AAAAAAAAAAA";
28+
System.out.println(s.length());
29+
expected = new ArrayList<>(Arrays.asList("AAAAAAAAAA"));
30+
assertEquals(expected, solution1.findRepeatedDnaSequences(s));
31+
assertEquals(expected, solution2.findRepeatedDnaSequences(s));
32+
}
33+
34+
@Test
35+
public void test2() {
36+
s = "AAAAAAAAAAAAA";
37+
System.out.println(s.length());
38+
expected = new ArrayList<>(Arrays.asList("AAAAAAAAAA"));
39+
assertEquals(expected, solution1.findRepeatedDnaSequences(s));
40+
assertEquals(expected, solution2.findRepeatedDnaSequences(s));
41+
}
3342
}

0 commit comments

Comments
 (0)
Failed to load comments.