1
+ /**
2
+ * // This is the HtmlParser's API interface.
3
+ * // You should not implement it, or speculate about its implementation
4
+ * interface HtmlParser {
5
+ * public List<String> getUrls(String url) {}
6
+ * }
7
+ */
8
+ class Solution {
9
+ /**
10
+ * Solution uses the approach for the DFS. However, rather than creating an immutable function and collecting all the elements that have
11
+ * been processed and returned successfully from the recursive calls, we keep on appending them to the final set.
12
+ *
13
+ * This set is finally returned as a list.
14
+ */
15
+ public List <String > crawl (String startUrl , HtmlParser htmlParser ) {
16
+ final String hostName = this .getHostName (startUrl );
17
+ final Set <String > visitedUrls = Collections .synchronizedSet (new HashSet ());
18
+ this .crawlUrl (startUrl , htmlParser , hostName , visitedUrls );
19
+ return new ArrayList (visitedUrls );
20
+ }
21
+
22
+ private void crawlUrl (String startUrl ,
23
+ HtmlParser htmlParser ,
24
+ String domainName ,
25
+ Set <String > visitedUrls ) {
26
+
27
+ visitedUrls .add (startUrl );
28
+ htmlParser .getUrls (startUrl )
29
+ .parallelStream ()
30
+ .filter (url -> getHostName (url ).equals (domainName ))
31
+ .filter (url -> !visitedUrls .contains (url ))
32
+ .forEach (url -> crawlUrl (url , htmlParser , domainName , visitedUrls ));
33
+ }
34
+
35
+ private String getHostName (String url ) {
36
+ final int index = url .indexOf ("/" , 7 ); // Finding the indexes for the url post protocol.
37
+ return (index == -1 ) ? url : url .substring (0 , index );
38
+ }
39
+ }
0 commit comments