Skip to content

Commit 7a448d0

Browse files
authored
Create Web Crawler.java
1 parent 2ab62d1 commit 7a448d0

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

Medium/Web Crawler.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/**
2+
* // This is the HtmlParser's API interface.
3+
* // You should not implement it, or speculate about its implementation
4+
* interface HtmlParser {
5+
* public List<String> getUrls(String url) {}
6+
* }
7+
*/
8+
class Solution {
9+
public List<String> crawl(String startUrl, HtmlParser htmlParser) {
10+
Set<String> visited = new HashSet<>();
11+
Stack<String> stack = new Stack<>();
12+
stack.push(startUrl);
13+
String hostname = getHostname(startUrl);
14+
while(!stack.isEmpty()) {
15+
String popped = stack.pop();
16+
visited.add(popped);
17+
List<String> connectedUrls = htmlParser.getUrls(popped);
18+
for (String url : connectedUrls) {
19+
if (!visited.contains(url) && url.contains(hostname)) {
20+
stack.push(url);
21+
}
22+
}
23+
}
24+
return new ArrayList<>(visited);
25+
}
26+
27+
private String getHostname(String url) {
28+
String[] splits = url.split("/");
29+
return splits[2];
30+
}
31+
}

0 commit comments

Comments
 (0)