smartcoder207
diff --git a/‎.github/broken-link-collectorr.py
Lines changed: 116 additions & 0 deletions b/‎.github/broken-link-collectorr.py
Lines changed: 116 additions & 0 deletions
@@ -0,0 +1,116 @@
+"""
+Note:
+ This Program fillters out most of the good links and collect links that returned error save them in separate file
+ Some of the links may work in browser but not in python due to security of webpage
+ So a manual check on those filtered record is needed
+ Since the program already filtered most of the good links we can easily check the reaming link and save time
+ This program takes a while depending on internet speed
+
+ Instruction:
+ Download the bad_link_filter and readme as raw file
+ Then execute in your machine
+ The bad links will be saved in error.txt file
+ Then you have to manually check the links mentioned in error.txt file and remove the good links from the file
+ 
+"""
+
+def is_url_working(url):   #Check the status code of webpage
+    import requests
+    try:
+        headers = {
+                        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36',
+                        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+                        'Accept-Language': 'en-us,en;q=0.5',
+                        'Accept-Encoding': 'gzip,deflate',
+                        'Connection': 'keep-alive',
+                        'Access-Control-Allow-Methods': 'POST',
+                        'Access-Control-Allow-Headers': 'X-PINGOTHER, Content-Type',
+                        'Pragma': 'no-cache',
+                        'Cache-Control': 'no-cache',
+                   }
+        proxies = {"http": None,"https": None}
+        response=requests.get(url,headers=headers,proxies=proxies)
+        status=response.status_code
+        if status>=400:
+            return status
+    except requests.exceptions.ConnectionError as ce:
+        return 'HTTPSConnectionPool error'
+    except Exception as e:
+        return e
+
+def func(indexes):
+    error_links=[]
+    print('InProgress, Sections completed will be shown below.Please wait for a while')
+    for index,section in indexes.items():
+        for title,row in section.items():
+            error=is_url_working(row['link'])
+            if error:
+                e={
+                    'index':index,
+                    'title':title,
+                    'link': row['link'],
+                    'error':error
+                    }
+                error_links.append(e)
+        print(index,' section completed')
+    return error_links
+
+def get_lines_from_file(location):      #open,read,return lines after filtering empty lines and spaces
+    lines=[]
+    with open(location,'r') as file:
+        lines=[line.strip() for line in file.readlines() if line.strip()]
+    return lines
+
+def line_to_dict(line):     #covert api row to dict
+    line=line.strip().split('|')
+    name,link=line[1].strip().split('](')
+    name,link=name[1:],link[:-1]
+    row={
+        'link':link,
+        'description':line[2],
+        'auth':line[3],
+        'https':line[4],
+        'cors':line[5],
+        }
+    return name,row
+
+def section_to_dict(lines,ind):     #convert section to dict
+    section={}
+    while ind<len(lines):
+        if 'Back to Index' in lines[ind]:       #Break a section
+            break
+        name,row=line_to_dict(lines[ind])
+        section[name]=row
+        ind+=1
+    return ind,section
+
+def get_section_wise_dict(lines):   #convert unstructured lines to section wise dict
+    ind=0
+    indexes={}
+    while ind<len(lines):
+        if '###' in lines[ind]:                 #Enters a section
+            name=lines[ind][3:].strip()
+            ind,indexes[name]=section_to_dict(lines,ind+1+1+1)
+        ind+=1
+    return indexes
+
+def link_to_error_file(error_links):    #Enters the bad links to a file which further requires manual check
+    lines=[]
+    for row in error_links:
+        statement='| {} | [{}]({}) | {} |'.format(row['index'], row['title'], row['link'], str(row['error']))
+        lines.append(statement)
+    with open('error.txt','w') as file:
+        file.write('\n#Manual check has to be done on following links#\n\n')
+        file.write('| Section | API  |Error/Satus Code |\n')
+        file.write('|---|---|---|\n')
+        for line in lines:
+            file.write(line)
+            file.write('\n')
+    print("Written to file")
+    print('Manual check has to be done for the links saved in error.txt')
+    
+location= input('Location of readme public api readme file: ')       #Get location of raw readme file
+lines=get_lines_from_file(location)
+indexes=get_section_wise_dict(lines)
+error_links=func(indexes)
+link_to_error_file(error_links)