Skip to content

Commit 5ff004e

Browse files
authored
Improved logic of Boyer Moore & added few edge cases in test (williamfiset#187)
1 parent 1266ede commit 5ff004e

File tree

2 files changed

+102
-21
lines changed

2 files changed

+102
-21
lines changed

src/main/java/com/williamfiset/algorithms/strings/BoyerMooreStringSearch.java

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,35 +17,43 @@ public class BoyerMooreStringSearch {
1717
* @return List of indexes where the pattern occurs
1818
*/
1919
public List<Integer> findOccurrences(String text, String pattern) {
20-
if (isNull(text) || isNull(pattern)) {
20+
if (isNull(text)
21+
|| isNull(pattern)
22+
|| pattern.length() > text.length()
23+
|| pattern.length() == 0) {
2124
return new ArrayList<>();
2225
}
2326
List<Integer> occurrences = new ArrayList<>();
24-
25-
Map<Character, Integer> skipTable = new HashMap<>();
26-
for (int i = 0; i < pattern.length(); i++) {
27-
skipTable.put(pattern.charAt(i), pattern.length() - i - 1);
28-
}
27+
Map<Character, Integer> skipTable = generateSkipTable(pattern);
2928

3029
int textIndex = pattern.length() - 1;
30+
int patternIndex = pattern.length() - 1;
3131
while (textIndex < text.length()) {
32-
int patternIndex = 0;
33-
boolean match = true;
34-
while (patternIndex < pattern.length() && match) {
35-
if (text.charAt(textIndex - patternIndex)
36-
!= pattern.charAt(pattern.length() - patternIndex - 1)) {
37-
match = false;
38-
textIndex +=
39-
skipTable.getOrDefault(text.charAt(textIndex - patternIndex), pattern.length());
32+
if (patternIndex >= 0 && pattern.charAt(patternIndex) == text.charAt(textIndex)) {
33+
if (patternIndex == 0) {
34+
occurrences.add(textIndex);
35+
} else {
36+
textIndex--;
4037
}
41-
patternIndex++;
42-
}
43-
if (match) {
44-
occurrences.add(textIndex - (pattern.length() - 1));
45-
textIndex += pattern.length();
38+
patternIndex--;
39+
} else {
40+
textIndex =
41+
textIndex
42+
+ pattern.length()
43+
- Math.min(
44+
Math.max(patternIndex, 0),
45+
1 + skipTable.getOrDefault(text.charAt(textIndex), 0));
46+
patternIndex = pattern.length() - 1;
4647
}
4748
}
48-
4949
return occurrences;
5050
}
51+
52+
private Map<Character, Integer> generateSkipTable(String pattern) {
53+
Map<Character, Integer> characterIndexMap = new HashMap<>();
54+
for (int charIndex = 0; charIndex < pattern.length(); charIndex++) {
55+
characterIndexMap.put(pattern.charAt(charIndex), charIndex);
56+
}
57+
return characterIndexMap;
58+
}
5159
}

src/test/java/com/williamfiset/algorithms/strings/BoyerMooreStringSearchTest.java

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,24 @@
11
package com.williamfiset.algorithms.strings;
22

33
import static com.google.common.truth.Truth.assertThat;
4+
import static java.util.Objects.isNull;
45

6+
import java.util.ArrayList;
7+
import java.util.List;
8+
import java.util.Random;
59
import org.junit.Before;
610
import org.junit.Test;
711

812
public class BoyerMooreStringSearchTest {
913

1014
private BoyerMooreStringSearch underTest;
15+
private Random random;
16+
private final int MAX_ITERATION = 20;
1117

1218
@Before
1319
public void setup() {
1420
underTest = new BoyerMooreStringSearch();
21+
random = new Random();
1522
}
1623

1724
@Test
@@ -35,12 +42,78 @@ public void shouldReturnOneOccurrence() {
3542
}
3643

3744
@Test
38-
public void shouldReturnMultiplyOccurrences() {
45+
public void shouldReturnMultipleOccurrences() {
46+
assertThat(underTest.findOccurrences("SAAT TE", "TE")).containsExactly(5);
3947
assertThat(
4048
underTest.findOccurrences("Sample text for testing the Boyer-Moore algorithm.", "te"))
4149
.containsExactly(7, 16);
4250
assertThat(underTest.findOccurrences("Sample text for testing the Boyer-Moore algorithm.", " "))
4351
.containsExactly(6, 11, 15, 23, 27, 39);
52+
assertThat(underTest.findOccurrences("AABAACAADAABAABA", "AABA")).containsExactly(0, 9, 12);
53+
assertThat(underTest.findOccurrences("AAAAAAA", "AA")).containsExactly(0, 1, 2, 3, 4, 5);
54+
}
55+
56+
@Test
57+
public void shouldReturnEmptyForPatternLengthLargerThenText() {
58+
assertThat(underTest.findOccurrences("This is a Test Text", "This is a test Pattern"))
59+
.isEmpty();
60+
}
61+
62+
@Test
63+
public void shouldReturnDynamicString() {
64+
int runLength = random.nextInt(MAX_ITERATION) + 1;
65+
for (int run = 0; run < runLength; run++) {
66+
int upperCharText = random.nextInt(3);
67+
int upperCharPattern = random.nextInt(3);
68+
int maxLengthText =
69+
random.nextInt(1000) + 100; // random length of the string between [100=1000]
70+
int maxLengthPattern = random.nextInt(10);
71+
String text = generateRandomString(upperCharText, maxLengthText);
72+
String pattern = generateRandomString(upperCharPattern, maxLengthPattern);
73+
assertThat(underTest.findOccurrences(text, pattern))
74+
.containsExactlyElementsIn(getOccurrencesBruteForce(text, pattern));
75+
}
76+
}
77+
78+
/**
79+
* @param text the text being searched in
80+
* @param pattern the pattern that needs to be searched in text
81+
* @return a list of beginning index of text where pattern exits
82+
*/
83+
private List<Integer> getOccurrencesBruteForce(String text, String pattern) {
84+
if (isNull(text)
85+
|| isNull(pattern)
86+
|| text.length() < pattern.length()
87+
|| pattern.length() == 0) {
88+
return new ArrayList<>();
89+
}
90+
List<Integer> occurrences = new ArrayList<>();
91+
for (int textIndex = 0; textIndex < text.length() - pattern.length() + 1; textIndex++) {
92+
boolean match = true;
93+
for (int patIndex = 0; patIndex < pattern.length(); patIndex++) {
94+
if (text.charAt(textIndex + patIndex) != pattern.charAt(patIndex)) {
95+
match = false;
96+
break;
97+
}
98+
}
99+
if (match) {
100+
occurrences.add(textIndex);
101+
}
102+
}
103+
return occurrences;
104+
}
105+
106+
/**
107+
* @param upperLimitAscii Largest element in the random string
108+
* @param length Length of the random string
109+
* @return Returns a random string containing character between [a-z]
110+
*/
111+
private String generateRandomString(int upperLimitAscii, int length) {
112+
return random
113+
.ints('a', 'a' + upperLimitAscii + 1)
114+
.limit(length)
115+
.collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
116+
.toString();
44117
}
45118

46119
// TODO(william): Add a test that compares this implementation of Boyermoore

0 commit comments

Comments
 (0)