Skip to content

Commit 729d281

Browse files
committed
way better link checker
1 parent 20ba81b commit 729d281

File tree

2 files changed

+86
-32
lines changed

2 files changed

+86
-32
lines changed

advanced/datatypes.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ True
5555

5656
We can also convert anything [iterable](../basics/loops.md#summary) to a
5757
set [by calling the
58-
class](../basics/classes.md#why-should-I-use-custom-classes-in-my-projects).
58+
class](../basics/classes.md#why-should-i-use-custom-classes-in-my-projects).
5959

6060
```python
6161
>>> set('hello')

linkcheck.py

Lines changed: 85 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
3939
[some website](http://github.com/)
4040
[another website](https://github.com/)
41-
[local header](#some-header)
41+
[local link](#some-title)
4242
"""
4343

4444
import os
@@ -47,7 +47,7 @@
4747
import common
4848

4949

50-
def check(filepath, target):
50+
def check(this_file, target, title, titledict):
5151
"""Check if a link's target is like it should be.
5252
5353
Return an error message string or "ok".
@@ -57,45 +57,99 @@ def check(filepath, target):
5757
# be added later.
5858
return "ok"
5959

60-
if '#' in target:
61-
where = target.index('#')
62-
if where == 0:
63-
# It's a link to a title in the same file, we need to skip it.
64-
return "ok"
65-
target = target[:where]
60+
path = posixpath.join(posixpath.dirname(this_file), target)
61+
path = posixpath.normpath(path)
62+
real_path = common.slashfix(path)
6663

67-
path = posixpath.join(posixpath.dirname(filepath), target)
68-
realpath = common.slashfix(path)
69-
if not os.path.exists(realpath):
64+
if not os.path.exists(real_path):
7065
return "doesn't exist"
66+
7167
if target.endswith('/'):
7268
# A directory.
73-
if os.path.isdir(realpath):
74-
return "ok"
75-
return "not a directory"
69+
if not os.path.isdir(real_path):
70+
return "not a directory"
7671
else:
7772
# A file.
78-
if os.path.isfile(realpath):
79-
return "ok"
80-
return "not a file"
73+
if not os.path.isfile(real_path):
74+
return "not a file"
75+
76+
if title is not None and title not in titledict[path]:
77+
return "no title named %s" % title
78+
return "ok"
79+
80+
81+
def find_titles(filename):
82+
"""Read titles of a markdown file and return a list of them."""
83+
result = []
84+
85+
with common.slashfix_open(filename, 'r') as f:
86+
for line in f:
87+
if line.startswith('```'):
88+
# it's a code block, let's skip to the end of it to
89+
# avoid detecting comments as titles
90+
while f.readline().rstrip() != '```':
91+
pass
92+
if line.startswith('#'):
93+
# found a title
94+
result.append(common.header_link(line.lstrip('#').strip()))
95+
96+
return result
97+
98+
99+
def find_links(this_file):
100+
"""Read links of a markdown file.
101+
102+
Return a list of (target, title, lineno) pairs where title can be None.
103+
"""
104+
result = []
105+
106+
with common.slashfix_open(this_file, 'r') as f:
107+
for match, lineno in common.find_links(f):
108+
target = match.group(2)
109+
if '#' in target:
110+
file, title = target.split('#', 1)
111+
if not file:
112+
# link to this file, [blabla](#hi)
113+
file = posixpath.basename(this_file)
114+
else:
115+
file = target
116+
title = None
117+
118+
result.append((file, title, lineno))
119+
120+
return result
121+
122+
123+
def get_line(filename, lineno):
124+
"""Return the lineno'th line of a file."""
125+
with common.slashfix_open(filename, 'r') as f:
126+
for lineno2, line in enumerate(f, start=1):
127+
if lineno == lineno2:
128+
return line
129+
raise ValueError("%s is less than %d lines long" % (filename, lineno))
81130

82131

83132
def main():
84-
print("Searching and checking links...")
85-
broken = 0
86-
total = 0
133+
print("Searching for titles and links...")
134+
titledict = {} # {filename: [title1, title2, ...]}
135+
linkdict = {} # {filename: [(file, title, lineno), ...])
87136
for path in common.get_markdown_files():
88-
with common.slashfix_open(path, 'r') as f:
89-
for match, lineno in common.find_links(f):
90-
text, target = match.groups()
91-
status = check(path, target)
92-
if status != "ok":
93-
# The .group(0) is not perfect, but it's good enough.
94-
print(" file %s, line %d: %s" % (path, lineno, status))
95-
print(" " + match.group(0))
96-
print()
97-
broken += 1
98-
total += 1
137+
titledict[path] = find_titles(path)
138+
linkdict[path] = find_links(path)
139+
140+
print("Checking the links...")
141+
total = 0
142+
broken = 0
143+
144+
for filename, linklist in linkdict.items():
145+
for target, title, lineno in linklist:
146+
status = check(filename, target, title, titledict)
147+
if status != "ok":
148+
print(" file %s, line %d: %s" % (filename, lineno, status))
149+
print(" %s" % get_line(filename, lineno))
150+
broken += 1
151+
total += 1
152+
99153
print("%d/%d links seem to be broken." % (broken, total))
100154

101155

0 commit comments

Comments
 (0)