File tree Expand file tree Collapse file tree 1 file changed +17
-6
lines changed Expand file tree Collapse file tree 1 file changed +17
-6
lines changed Original file line number Diff line number Diff line change 10
10
11
11
12
12
def crawl (url ):
13
-
13
+ """
14
+ Crawls a page
15
+ Arguments:
16
+ - URL of the page to crawl
17
+ Return:
18
+ - List of all unique links found
19
+ """
20
+
21
+ found_link = []
14
22
req = requests .get (url )
15
23
16
24
# Check if successful
17
25
if (req .status_code != 200 ):
18
26
return []
19
27
20
- # Find links
21
- links = link_re .findall (req .text )
28
+ # Finding unique links
29
+ links = set ( link_re .findall (req .text ) )
22
30
23
- print ("\n Found {} links" .format (len (links )))
31
+ print ("\n Found {} unique links" .format (len (links )))
24
32
25
33
# Search links for emails
26
34
for link in links :
27
35
28
36
# Get an absolute URL for a link
29
37
link = urljoin (url , link )
30
-
38
+ found_link . append ( link )
31
39
print (link )
40
+
41
+ return found_link
32
42
33
43
if __name__ == '__main__' :
34
- crawl ('http://www.realpython.com' )
44
+ url = input ("Enter a url to crawl: " )
45
+ crawl (url )
You can’t perform that action at this time.
0 commit comments