Skip to content

Commit 5c8c4c5

Browse files
committed
Get the generator in a working state
1 parent c475cbf commit 5c8c4c5

File tree

1 file changed

+175
-64
lines changed

1 file changed

+175
-64
lines changed

irrelevant/json_generator.py

Lines changed: 175 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,121 +1,235 @@
1+
import json
12
import pprint
23
fname = "/Users/300041709/code/self/wtfpython/README.md"
34
examples = []
45

56
# The globals
67
current_example = 1
8+
sequence_num = 1
79
current_section_name = ""
810

911

10-
def parse_example_parts(lines):
12+
STATEMENT_PREFIXES = ["...", ">>> ", "$ "]
13+
14+
15+
def generate_code_block(statements, output):
16+
global sequence_num
17+
result = {
18+
"type": "code",
19+
"sequence_num": sequence_num,
20+
"statements": statements,
21+
"output": output
22+
}
23+
sequence_num += 1
24+
return result
25+
26+
27+
def generate_markdown_block(lines):
28+
global sequence_num
29+
result = {
30+
"type": "markdown",
31+
"sequence_num": sequence_num,
32+
"value": lines
33+
}
34+
sequence_num += 1
35+
return result
36+
37+
def is_interactive_statement(line):
38+
for prefix in STATEMENT_PREFIXES:
39+
if line.startswith(prefix):
40+
return True
41+
return False
42+
43+
def parse_example_parts(lines, example_title_line):
1144
parts = {
1245
"build_up": [],
1346
"explanation": []
1447
}
15-
next_line = next(lines)
16-
sequence_num = 1
1748
content = []
49+
statements_so_far = []
50+
output_so_far = []
51+
next_line = example_title_line
1852
# store build_up till an H4 (explanation) is encountered
1953
while not next_line.startswith("#### "):
2054
# Watching out for the snippets
2155
if next_line.startswith("```"):
2256
# It's a snippet, whatever found until now is text
57+
is_interactive = False
2358
if content:
24-
parts["build_up"].append(
25-
{
26-
"type": "text",
27-
"sequence_num": sequence_num,
28-
"value": content
29-
}
30-
)
31-
sequence_num += 1
59+
parts["build_up"].append(generate_markdown_block(content))
3260
content = []
3361

3462
next_line = next(lines)
63+
3564
while not next_line.startswith("```"):
36-
content.append(next_line)
65+
if is_interactive_statement(next_line):
66+
is_interactive = True
67+
if (output_so_far):
68+
parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))
69+
statements_so_far, output_so_far = [], []
70+
statements_so_far.append(next_line)
71+
else:
72+
# can be either output or normal code
73+
if is_interactive:
74+
output_so_far.append(next_line)
75+
else:
76+
statements_so_far.append(next_line)
3777
next_line = next(lines)
78+
3879
# Snippet is over
39-
parts["build_up"].append(
40-
{
41-
"type": "code",
42-
"sequence_num": sequence_num,
43-
"value": content
44-
}
45-
)
46-
sequence_num += 1
47-
content = []
80+
parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))
81+
statements_so_far, output_so_far = [], []
4882
next_line = next(lines)
49-
continue
5083
else:
5184
# It's a text, go on.
5285
content.append(next_line)
5386
next_line = next(lines)
5487

5588
# Explanation encountered, save any content till now (if any)
5689
if content:
57-
parts["build_up"].append(
58-
{
59-
"type": "text",
60-
"sequence_num": sequence_num,
61-
"value": content
62-
}
63-
)
90+
parts["build_up"].append(generate_markdown_block(content))
6491

6592
# Reset stuff
66-
sequence_num = 1
6793
content = []
94+
statements_so_far, output_so_far = [], []
6895

6996
# store lines again until --- or another H3 is encountered
7097
while not (next_line.startswith("---") or
7198
next_line.startswith("### ")):
72-
7399
if next_line.startswith("```"):
74100
# It's a snippet, whatever found until now is text
101+
is_interactive = False
75102
if content:
76-
parts["explanation"].append(
77-
{
78-
"type": "text",
79-
"sequence_num": sequence_num,
80-
"value": content
81-
}
82-
)
83-
sequence_num += 1
103+
parts["build_up"].append(generate_markdown_block(content))
84104
content = []
85105

86106
next_line = next(lines)
107+
87108
while not next_line.startswith("```"):
88-
content.append(next_line)
109+
if is_interactive_statement(next_line):
110+
is_interactive = True
111+
if (output_so_far):
112+
parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))
113+
statements_so_far, output_so_far = [], []
114+
statements_so_far.append(next_line)
115+
else:
116+
# can be either output or normal code
117+
if is_interactive:
118+
output_so_far.append(next_line)
119+
else:
120+
statements_so_far.append(next_line)
89121
next_line = next(lines)
122+
90123
# Snippet is over
91-
parts["explanation"].append(
92-
{
93-
"type": "code",
94-
"sequence_num": sequence_num,
95-
"value": content
96-
}
97-
)
98-
sequence_num += 1
99-
content = []
124+
parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))
125+
statements_so_far, output_so_far = [], []
100126
next_line = next(lines)
101-
continue
102127
else:
103128
# It's a text, go on.
104129
content.append(next_line)
105130
next_line = next(lines)
106131

107132
# All done
108133
if content:
109-
parts["explanation"].append(
110-
{
111-
"type": "text",
112-
"sequence_num": sequence_num,
113-
"value": content
114-
}
115-
)
134+
parts["explanation"].append(generate_markdown_block(content))
116135

117136
return next_line, parts
118137

138+
def remove_from_beginning(tokens, line):
139+
for token in tokens:
140+
if line.startswith(token):
141+
line = line.replace(token, "")
142+
return line
143+
144+
145+
def inspect_and_sanitize_code_lines(lines):
146+
tokens_to_remove = STATEMENT_PREFIXES
147+
result = []
148+
is_print_present = False
149+
for line in lines:
150+
line = remove_from_beginning(tokens_to_remove, line)
151+
if line.startswith("print ") or line.startswith("print("):
152+
is_print_present = True
153+
result.append(line)
154+
return is_print_present, result
155+
156+
def convert_to_cells(cell_contents):
157+
cells = []
158+
for stuff in cell_contents:
159+
if stuff["type"] == "markdown":
160+
# todo add metadata later
161+
cells.append(
162+
{
163+
"cell_type": "markdown",
164+
"metadata": {},
165+
"source": stuff["value"]
166+
}
167+
)
168+
elif stuff["type"] == "code":
169+
is_print_present, sanitized_code = inspect_and_sanitize_code_lines(stuff["statements"])
170+
if is_print_present:
171+
cells.append(
172+
{
173+
"cell_type": "code",
174+
"metadata": {
175+
"collapsed": True
176+
},
177+
"execution_count": None,
178+
"outputs": [{
179+
"name": "stdout",
180+
"output_type": "stream",
181+
"text": stuff["output"]
182+
}],
183+
"source": sanitized_code
184+
}
185+
)
186+
else:
187+
cells.append(
188+
{
189+
"cell_type": "code",
190+
"execution_count": None,
191+
"metadata": {
192+
"collapsed": True
193+
},
194+
"outputs": [{
195+
"data": {
196+
"text/plain": stuff["output"]
197+
},
198+
"output_type": "execute_result",
199+
"metadata": {},
200+
"execution_count": None
201+
}],
202+
"source": sanitized_code
203+
}
204+
)
205+
206+
return cells
207+
208+
209+
def convert_to_notebook(parsed_json):
210+
result = {
211+
"cells": [],
212+
"metadata": {},
213+
"nbformat": 4,
214+
"nbformat_minor": 2
215+
}
216+
for example in parsed_json:
217+
parts = example["parts"]
218+
build_up = parts.get("build_up")
219+
explanation = parts.get("explanation")
220+
notebook_path = "test.ipynb"
221+
222+
if(build_up):
223+
result["cells"] += convert_to_cells(build_up)
224+
225+
if(explanation):
226+
result["cells"] += convert_to_cells(explanation)
227+
228+
pprint.pprint(result, indent=2)
229+
with open(notebook_path, "w") as f:
230+
json.dump(result, f)
231+
232+
119233

120234
with open(fname, 'r+', encoding="utf-8") as f:
121235
lines = iter(f.readlines())
@@ -126,6 +240,7 @@ def parse_example_parts(lines):
126240
if line.startswith("## "):
127241
# A section is encountered
128242
current_section_name = line.replace("## ", "").strip()
243+
section_text = []
129244
line = next(lines)
130245
# Until a new section is encountered
131246
while not (line.startswith("## " )):
@@ -138,19 +253,15 @@ def parse_example_parts(lines):
138253
"title": line.replace("### ", ""),
139254
"section": current_section_name
140255
}
141-
line, example_details["parts"] = parse_example_parts(lines)
256+
line, example_details["parts"] = parse_example_parts(lines, line)
142257
result.append(example_details)
143258
current_example += 1
144259
else:
145-
# todo catch section text
260+
section_text.append(line)
146261
line = next(lines)
147262
else:
148263
line = next(lines)
149264

150265
except StopIteration:
151266
pprint.pprint(result, indent=2)
152-
print(len(result))
153-
154-
155-
156-
267+
convert_to_notebook(result)

0 commit comments

Comments
 (0)