1
+ import json
1
2
import pprint
2
3
fname = "/Users/300041709/code/self/wtfpython/README.md"
3
4
examples = []
4
5
5
6
# The globals
6
7
current_example = 1
8
+ sequence_num = 1
7
9
current_section_name = ""
8
10
9
11
10
- def parse_example_parts (lines ):
12
+ STATEMENT_PREFIXES = ["..." , ">>> " , "$ " ]
13
+
14
+
15
+ def generate_code_block (statements , output ):
16
+ global sequence_num
17
+ result = {
18
+ "type" : "code" ,
19
+ "sequence_num" : sequence_num ,
20
+ "statements" : statements ,
21
+ "output" : output
22
+ }
23
+ sequence_num += 1
24
+ return result
25
+
26
+
27
+ def generate_markdown_block (lines ):
28
+ global sequence_num
29
+ result = {
30
+ "type" : "markdown" ,
31
+ "sequence_num" : sequence_num ,
32
+ "value" : lines
33
+ }
34
+ sequence_num += 1
35
+ return result
36
+
37
+ def is_interactive_statement (line ):
38
+ for prefix in STATEMENT_PREFIXES :
39
+ if line .startswith (prefix ):
40
+ return True
41
+ return False
42
+
43
+ def parse_example_parts (lines , example_title_line ):
11
44
parts = {
12
45
"build_up" : [],
13
46
"explanation" : []
14
47
}
15
- next_line = next (lines )
16
- sequence_num = 1
17
48
content = []
49
+ statements_so_far = []
50
+ output_so_far = []
51
+ next_line = example_title_line
18
52
# store build_up till an H4 (explanation) is encountered
19
53
while not next_line .startswith ("#### " ):
20
54
# Watching out for the snippets
21
55
if next_line .startswith ("```" ):
22
56
# It's a snippet, whatever found until now is text
57
+ is_interactive = False
23
58
if content :
24
- parts ["build_up" ].append (
25
- {
26
- "type" : "text" ,
27
- "sequence_num" : sequence_num ,
28
- "value" : content
29
- }
30
- )
31
- sequence_num += 1
59
+ parts ["build_up" ].append (generate_markdown_block (content ))
32
60
content = []
33
61
34
62
next_line = next (lines )
63
+
35
64
while not next_line .startswith ("```" ):
36
- content .append (next_line )
65
+ if is_interactive_statement (next_line ):
66
+ is_interactive = True
67
+ if (output_so_far ):
68
+ parts ["build_up" ].append (generate_code_block (statements_so_far , output_so_far ))
69
+ statements_so_far , output_so_far = [], []
70
+ statements_so_far .append (next_line )
71
+ else :
72
+ # can be either output or normal code
73
+ if is_interactive :
74
+ output_so_far .append (next_line )
75
+ else :
76
+ statements_so_far .append (next_line )
37
77
next_line = next (lines )
78
+
38
79
# Snippet is over
39
- parts ["build_up" ].append (
40
- {
41
- "type" : "code" ,
42
- "sequence_num" : sequence_num ,
43
- "value" : content
44
- }
45
- )
46
- sequence_num += 1
47
- content = []
80
+ parts ["build_up" ].append (generate_code_block (statements_so_far , output_so_far ))
81
+ statements_so_far , output_so_far = [], []
48
82
next_line = next (lines )
49
- continue
50
83
else :
51
84
# It's a text, go on.
52
85
content .append (next_line )
53
86
next_line = next (lines )
54
87
55
88
# Explanation encountered, save any content till now (if any)
56
89
if content :
57
- parts ["build_up" ].append (
58
- {
59
- "type" : "text" ,
60
- "sequence_num" : sequence_num ,
61
- "value" : content
62
- }
63
- )
90
+ parts ["build_up" ].append (generate_markdown_block (content ))
64
91
65
92
# Reset stuff
66
- sequence_num = 1
67
93
content = []
94
+ statements_so_far , output_so_far = [], []
68
95
69
96
# store lines again until --- or another H3 is encountered
70
97
while not (next_line .startswith ("---" ) or
71
98
next_line .startswith ("### " )):
72
-
73
99
if next_line .startswith ("```" ):
74
100
# It's a snippet, whatever found until now is text
101
+ is_interactive = False
75
102
if content :
76
- parts ["explanation" ].append (
77
- {
78
- "type" : "text" ,
79
- "sequence_num" : sequence_num ,
80
- "value" : content
81
- }
82
- )
83
- sequence_num += 1
103
+ parts ["build_up" ].append (generate_markdown_block (content ))
84
104
content = []
85
105
86
106
next_line = next (lines )
107
+
87
108
while not next_line .startswith ("```" ):
88
- content .append (next_line )
109
+ if is_interactive_statement (next_line ):
110
+ is_interactive = True
111
+ if (output_so_far ):
112
+ parts ["build_up" ].append (generate_code_block (statements_so_far , output_so_far ))
113
+ statements_so_far , output_so_far = [], []
114
+ statements_so_far .append (next_line )
115
+ else :
116
+ # can be either output or normal code
117
+ if is_interactive :
118
+ output_so_far .append (next_line )
119
+ else :
120
+ statements_so_far .append (next_line )
89
121
next_line = next (lines )
122
+
90
123
# Snippet is over
91
- parts ["explanation" ].append (
92
- {
93
- "type" : "code" ,
94
- "sequence_num" : sequence_num ,
95
- "value" : content
96
- }
97
- )
98
- sequence_num += 1
99
- content = []
124
+ parts ["build_up" ].append (generate_code_block (statements_so_far , output_so_far ))
125
+ statements_so_far , output_so_far = [], []
100
126
next_line = next (lines )
101
- continue
102
127
else :
103
128
# It's a text, go on.
104
129
content .append (next_line )
105
130
next_line = next (lines )
106
131
107
132
# All done
108
133
if content :
109
- parts ["explanation" ].append (
110
- {
111
- "type" : "text" ,
112
- "sequence_num" : sequence_num ,
113
- "value" : content
114
- }
115
- )
134
+ parts ["explanation" ].append (generate_markdown_block (content ))
116
135
117
136
return next_line , parts
118
137
138
+ def remove_from_beginning (tokens , line ):
139
+ for token in tokens :
140
+ if line .startswith (token ):
141
+ line = line .replace (token , "" )
142
+ return line
143
+
144
+
145
+ def inspect_and_sanitize_code_lines (lines ):
146
+ tokens_to_remove = STATEMENT_PREFIXES
147
+ result = []
148
+ is_print_present = False
149
+ for line in lines :
150
+ line = remove_from_beginning (tokens_to_remove , line )
151
+ if line .startswith ("print " ) or line .startswith ("print(" ):
152
+ is_print_present = True
153
+ result .append (line )
154
+ return is_print_present , result
155
+
156
+ def convert_to_cells (cell_contents ):
157
+ cells = []
158
+ for stuff in cell_contents :
159
+ if stuff ["type" ] == "markdown" :
160
+ # todo add metadata later
161
+ cells .append (
162
+ {
163
+ "cell_type" : "markdown" ,
164
+ "metadata" : {},
165
+ "source" : stuff ["value" ]
166
+ }
167
+ )
168
+ elif stuff ["type" ] == "code" :
169
+ is_print_present , sanitized_code = inspect_and_sanitize_code_lines (stuff ["statements" ])
170
+ if is_print_present :
171
+ cells .append (
172
+ {
173
+ "cell_type" : "code" ,
174
+ "metadata" : {
175
+ "collapsed" : True
176
+ },
177
+ "execution_count" : None ,
178
+ "outputs" : [{
179
+ "name" : "stdout" ,
180
+ "output_type" : "stream" ,
181
+ "text" : stuff ["output" ]
182
+ }],
183
+ "source" : sanitized_code
184
+ }
185
+ )
186
+ else :
187
+ cells .append (
188
+ {
189
+ "cell_type" : "code" ,
190
+ "execution_count" : None ,
191
+ "metadata" : {
192
+ "collapsed" : True
193
+ },
194
+ "outputs" : [{
195
+ "data" : {
196
+ "text/plain" : stuff ["output" ]
197
+ },
198
+ "output_type" : "execute_result" ,
199
+ "metadata" : {},
200
+ "execution_count" : None
201
+ }],
202
+ "source" : sanitized_code
203
+ }
204
+ )
205
+
206
+ return cells
207
+
208
+
209
+ def convert_to_notebook (parsed_json ):
210
+ result = {
211
+ "cells" : [],
212
+ "metadata" : {},
213
+ "nbformat" : 4 ,
214
+ "nbformat_minor" : 2
215
+ }
216
+ for example in parsed_json :
217
+ parts = example ["parts" ]
218
+ build_up = parts .get ("build_up" )
219
+ explanation = parts .get ("explanation" )
220
+ notebook_path = "test.ipynb"
221
+
222
+ if (build_up ):
223
+ result ["cells" ] += convert_to_cells (build_up )
224
+
225
+ if (explanation ):
226
+ result ["cells" ] += convert_to_cells (explanation )
227
+
228
+ pprint .pprint (result , indent = 2 )
229
+ with open (notebook_path , "w" ) as f :
230
+ json .dump (result , f )
231
+
232
+
119
233
120
234
with open (fname , 'r+' , encoding = "utf-8" ) as f :
121
235
lines = iter (f .readlines ())
@@ -126,6 +240,7 @@ def parse_example_parts(lines):
126
240
if line .startswith ("## " ):
127
241
# A section is encountered
128
242
current_section_name = line .replace ("## " , "" ).strip ()
243
+ section_text = []
129
244
line = next (lines )
130
245
# Until a new section is encountered
131
246
while not (line .startswith ("## " )):
@@ -138,19 +253,15 @@ def parse_example_parts(lines):
138
253
"title" : line .replace ("### " , "" ),
139
254
"section" : current_section_name
140
255
}
141
- line , example_details ["parts" ] = parse_example_parts (lines )
256
+ line , example_details ["parts" ] = parse_example_parts (lines , line )
142
257
result .append (example_details )
143
258
current_example += 1
144
259
else :
145
- # todo catch section text
260
+ section_text . append ( line )
146
261
line = next (lines )
147
262
else :
148
263
line = next (lines )
149
264
150
265
except StopIteration :
151
266
pprint .pprint (result , indent = 2 )
152
- print (len (result ))
153
-
154
-
155
-
156
-
267
+ convert_to_notebook (result )
0 commit comments