Skip to content

Commit 2e6aaf6

Browse files
committed
Removed unecessary work around jinja, now we just toss stuff to make html parsing work
1 parent f756cab commit 2e6aaf6

File tree

3 files changed

+37
-144
lines changed

3 files changed

+37
-144
lines changed

html5lib/html5parser.py

Lines changed: 1 addition & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -166,19 +166,10 @@ def mainLoop(self):
166166
CommentToken = tokenTypes["Comment"]
167167
DoctypeToken = tokenTypes["Doctype"]
168168
ParseErrorToken = tokenTypes["ParseError"]
169-
JinjaStatementStartTag = tokenTypes["JinjaStatementStartTag"]
170-
JinjaStatementEndTag = tokenTypes["JinjaStatementEndTag"]
171-
JinjaStatementTag = tokenTypes["JinjaStatementTag"]
172-
JinjaVariableStartTag = tokenTypes["JinjaVariableStartTag"]
173-
JinjaVariableEndTag = tokenTypes["JinjaVariableEndTag"]
174-
JinjaVariable = tokenTypes["JinjaVariable"]
175-
JinjaPipe = tokenTypes["JinjaPipe"]
176-
JinjaFilter = tokenTypes["JinjaFilter"]
177169

178170
for token in self.normalizedTokens():
179171
new_token = token
180172
while new_token is not None:
181-
log.debug(u"Token {} Phase = {}".format(new_token, self.phase))
182173
currentNode = self.tree.openElements[-1] if self.tree.openElements else None
183174
currentNodeNamespace = currentNode.namespace if currentNode else None
184175
currentNodeName = currentNode.name if currentNode else None
@@ -189,10 +180,7 @@ def mainLoop(self):
189180
self.parseError(new_token["data"], new_token.get("datavars", {}))
190181
new_token = None
191182
else:
192-
if type in (JinjaVariableStartTag, JinjaVariableEndTag, JinjaVariable, JinjaFilter, JinjaPipe):
193-
log.debug(u"Type is a jinja tag")
194-
phase = self.phases["inJinjaVariable"]
195-
elif (
183+
if (
196184
len(self.tree.openElements) == 0 or
197185
currentNodeNamespace == self.tree.defaultNamespace or
198186
(self.isMathMLTextIntegrationPoint(currentNode) and
@@ -220,22 +208,6 @@ def mainLoop(self):
220208
new_token = phase.processComment(new_token)
221209
elif type == DoctypeToken:
222210
new_token = phase.processDoctype(new_token)
223-
elif type == JinjaStatementStartTag:
224-
new_token = phase.processJinjaStatementStartTag(new_token)
225-
elif type == JinjaStatementEndTag:
226-
new_token = phase.processJinjaStatementEndTag(new_token)
227-
elif type == JinjaStatementTag:
228-
new_token = phase.processJinjaStatementTag(new_token)
229-
elif type == JinjaVariableStartTag:
230-
new_token = phase.processJinjaVariableStartTag(new_token)
231-
elif type == JinjaVariableEndTag:
232-
new_token = phase.processJinjaVariableEndTag(new_token)
233-
elif type == JinjaVariable:
234-
new_token = phase.processJinjaVariable(new_token)
235-
elif type == JinjaPipe:
236-
new_token = phase.processJinjaPipe(new_token)
237-
elif type == JinjaFilter:
238-
new_token = phase.processJinjaFilter(new_token)
239211

240212
if (type == StartTagToken and token["selfClosing"]
241213
and not token["selfClosingAcknowledged"]):
@@ -432,7 +404,6 @@ def resetInsertionMode(self):
432404
new_phase = self.phases["inBody"]
433405
break
434406

435-
#log.debug(u"Changing phase to {}".format(new_phase))
436407
self.phase = new_phase
437408

438409
def parseRCDataRawtext(self, token, contentType):
@@ -450,7 +421,6 @@ def parseRCDataRawtext(self, token, contentType):
450421

451422
self.originalPhase = self.phase
452423

453-
log.debug(u"Changing phase to text")
454424
self.phase = self.phases["text"]
455425

456426

@@ -551,44 +521,6 @@ def startTagHtml(self, token):
551521
def processEndTag(self, token):
552522
return self.endTagHandler[token["name"]](token)
553523

554-
class InJinjaVariablePhase(Phase):
555-
def processJinjaVariableStartTag(self, token):
556-
log = logging.getLogger('html5lib')
557-
log.debug(u"InJinja: Start Tag")
558-
self.tree.reconstructActiveFormattingElements()
559-
self.tree.insertElement(token)
560-
561-
def processJinjaVariableEndTag(self, token):
562-
log = logging.getLogger('html5lib')
563-
log.debug(u"InJinja: End Tag {}".format(token["name"]))
564-
for node in self.tree.openElements[::-1]:
565-
log.debug(u"InJinja: Open tag {} token {}".format(node, token))
566-
if node.name == token["name"]:
567-
self.tree.generateImpliedEndTags(exclude=token["name"])
568-
log.debug(u"InJinja: Implied end tag {} {}".format(self.tree.openElements[-1].name, token["name"]))
569-
if self.tree.openElements[-1].name != token["name"]:
570-
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
571-
while self.tree.openElements.pop() != node:
572-
pass
573-
break
574-
else:
575-
if node.nameTuple in specialElements:
576-
log.debug(u"Nametuple {} in {}".format(node.nameTuple, specialElements))
577-
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
578-
break
579-
580-
def processJinjaVariable(self, token):
581-
element = self.tree.createElementWithoutNamespace(token)
582-
self.tree.openElements[-1].appendChild(element)
583-
584-
def processJinjaPipe(self, token):
585-
element = self.tree.createElementWithoutNamespace(token)
586-
self.tree.openElements[-1].appendChild(element)
587-
588-
def processJinjaFilter(self, token):
589-
element = self.tree.createElementWithoutNamespace(token)
590-
self.tree.openElements[-1].appendChild(element)
591-
592524
class InitialPhase(Phase):
593525
def processSpaceCharacters(self, token):
594526
pass
@@ -882,8 +814,6 @@ def startTagOther(self, token):
882814
def endTagHead(self, token):
883815
node = self.parser.tree.openElements.pop()
884816
assert node.name == "head", "Expected head got %s" % node.name
885-
log = logging.getLogger(u"html5lib")
886-
log.debug(u"Switching phase to afterHead")
887817
self.parser.phase = self.parser.phases["afterHead"]
888818

889819
def endTagHtmlBodyBr(self, token):
@@ -894,8 +824,6 @@ def endTagOther(self, token):
894824
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
895825

896826
def anythingElse(self):
897-
log = logging.getLogger(u"html5lib")
898-
log.debug(u"Implied end head tag")
899827
self.endTagHead(impliedTagToken("head"))
900828

901829
# XXX If we implement a parser for which scripting is disabled we need to
@@ -966,8 +894,6 @@ def endTagOther(self, token):
966894

967895
def anythingElse(self):
968896
self.tree.insertElement(impliedTagToken("body", "StartTag"))
969-
log = logging.getLogger(u"html5lib")
970-
log.debug(u"Changing phase to body")
971897
self.parser.phase = self.parser.phases["inBody"]
972898
self.parser.framesetOK = True
973899

@@ -2793,7 +2719,6 @@ def processEndTag(self, token):
27932719
"inHead": InHeadPhase,
27942720
# XXX "inHeadNoscript": InHeadNoScriptPhase,
27952721
"afterHead": AfterHeadPhase,
2796-
"inJinjaVariable": InJinjaVariablePhase,
27972722
"inBody": InBodyPhase,
27982723
"text": TextPhase,
27992724
"inTable": InTablePhase,

html5lib/tokenizer.py

Lines changed: 36 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -294,23 +294,21 @@ def jinjaOpenState(self):
294294
data = self.stream.char()
295295

296296
if data == "{":
297-
self.currentToken = {
298-
"type": tokenTypes["JinjaVariableStartTag"],
299-
"name": u"jinjavariabletag", "data": {},
300-
"namespace": None,
301-
"selfClosing": False
302-
}
297+
#self.currentToken = {
298+
#"type": tokenTypes["JinjaVariableStartTag"],
299+
#"name": "{{", "data": {},
300+
#"selfClosing": False
301+
#}
303302

304-
self.tokenQueue.append(self.currentToken)
303+
#self.tokenQueue.append(self.currentToken)
305304

306305
self.state = self.jinjaVariableState
307306
elif data == "%":
308-
self.tokenQueue.append({
309-
"type": tokenTypes["JinjaStatementStartTag"],
310-
"name": "{%", "data": {},
311-
"namespace": None,
312-
"selfClosing": False
313-
})
307+
#self.tokenQueue.append({
308+
#"type": tokenTypes["JinjaStatementStartTag"],
309+
#"name": "{%", "data": {},
310+
#"selfClosing": False
311+
#})
314312

315313
self.state = self.jinjaStatementState
316314

@@ -322,23 +320,19 @@ def jinjaStatementEndState(self):
322320
data = self.stream.char()
323321

324322
if data == "}":
325-
self.tokenQueue.append({
326-
"type": tokenTypes["JinjaStatementEndTag"],
327-
"name": "%}", "data": [],
328-
"selfClosing": False
329-
})
323+
#self.tokenQueue.append({
324+
#"type": tokenTypes["JinjaStatementEndTag"],
325+
#"name": "%}", "data": [],
326+
#"selfClosing": False
327+
#})
330328
self.state = self.dataState
331329
elif data is EOF:
332330
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
333331
"expected-jinja-statement-closing-tag-but-got-eof",
334332
"datavars": {"data": data}})
335333
self.state = self.dataState
336334
else:
337-
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
338-
"expected-jinja-statement-closing-tag-but-got-char",
339-
"datavars": {"data": data}})
340-
self.stream.unget(data)
341-
self.state = self.bogusCommentState
335+
self.state = self.jinjaStatementState
342336

343337
#self.state = self.dataState
344338
return True
@@ -348,23 +342,19 @@ def jinjaVariableEndState(self):
348342
data = self.stream.char()
349343

350344
if data == "}":
351-
self.tokenQueue.append({
352-
"type": tokenTypes["JinjaVariableEndTag"],
353-
"name": u"jinjavariabletag", "data": [],
354-
"selfClosing": False
355-
})
345+
#self.tokenQueue.append({
346+
#"type": tokenTypes["JinjaVariableEndTag"],
347+
#"name": "}}", "data": [],
348+
#"selfClosing": False
349+
#})
356350
self.state = self.dataState
357351
elif data is EOF:
358352
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
359353
"expected-jinja-variable-closing-tag-but-got-eof",
360354
"datavars": {"data": data}})
361355
self.state = self.dataState
362356
else:
363-
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
364-
"expected-jinja-variable-closing-tag-but-got-char",
365-
"datavars": {"data": data}})
366-
self.stream.unget(data)
367-
self.state = self.bogusCommentState
357+
self.state = self.jinjaStatementState
368358

369359
#self.state = self.dataState
370360
return True
@@ -376,12 +366,12 @@ def jinjaStatementState(self):
376366
self.state = self.jinjaStatementEndState
377367
elif data is EOF:
378368
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
379-
"eof-in-jinja-statement"})
369+
"missing-jinja-closing-brace"})
380370
self.state = self.dataState
381371
else:
382372
chars = self.stream.charsUntil(("%", "\u0000"))
383-
self.tokenQueue.append({"type": tokenTypes["JinjaStatementTag"], "data":
384-
data + chars})
373+
#self.tokenQueue.append({"type": tokenTypes["JinjaStatementTag"], "data":
374+
#data + chars})
385375

386376
return True
387377

@@ -390,46 +380,25 @@ def jinjaVariableState(self):
390380

391381
if data == "}":
392382
self.state = self.jinjaVariableEndState
393-
#elif data == "(":
394-
#self.state = self.jinjaArgState
395383
elif data is EOF:
396384
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
397-
"eof-in-jinja-variable"})
385+
"missing-jinja-closing-brace"})
398386
self.state = self.dataState
399387
elif data in spaceCharacters:
400388
# Skip spaces
401389
pass
402390
elif data == "|":
403-
self.currentToken = {"type": tokenTypes["JinjaPipe"],
404-
"name": "jinjapipe", "selfClosing": True, "data": {
405-
"value": data,
406-
"position": self.stream.position(),
407-
}}
408-
self.tokenQueue.append(self.currentToken)
391+
pass
409392
# If this is the first token after the variable start tag
410-
elif self.currentToken['type'] == tokenTypes["JinjaVariableStartTag"]:
411-
#log.debug(u"Got start tag {}".format(("|", "}", "\u0000") | spaceCharacters))
412-
413-
chars = self.stream.charsUntil(frozenset(("(", "|", "}", "\u0000")) | spaceCharacters)
414-
self.currentToken = {"type": tokenTypes["JinjaVariable"],
415-
"name": "jinjavariable", "selfClosing": True, "data": {
416-
"value": data + chars,
417-
"position": self.stream.position(),
418-
}}
419-
self.tokenQueue.append(self.currentToken)
420-
elif self.currentToken['type'] == tokenTypes["JinjaPipe"]:
421-
chars = self.stream.charsUntil(frozenset(("(", "|", "}", "\u0000")) | spaceCharacters)
422-
self.currentToken = {"type": tokenTypes["JinjaFilter"],
423-
"name": "jinjafilter", "selfClosing": True, "data": {
424-
"value": data + chars,
425-
"position": self.stream.position(),
426-
}}
427-
self.tokenQueue.append(self.currentToken)
428393
else:
429-
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
430-
"expected-jinja-pipe-got-character"})
431-
self.stream.unget(data)
432-
self.state = self.bogusCommentState
394+
chars = self.stream.charsUntil(frozenset(("}", "\u0000")) | spaceCharacters)
395+
#self.currentToken = {"type": tokenTypes["JinjaFilterTag"], "data":
396+
#data + chars}
397+
#self.tokenQueue.append(self.currentToken)
398+
#else:
399+
#chars = self.stream.charsUntil(("}", "\u0000"))
400+
#self.tokenQueue.append({"type": tokenTypes["JinjaVariableTag"], "data":
401+
#data + chars})
433402

434403
return True
435404

html5lib/treebuilders/etree.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ def _getAttributes(self):
6363
return self._element.attrib
6464

6565
def _setAttributes(self, attributes):
66-
log.debug(u"Attributes {}".format(attributes))
6766
# Delete existing attributes first
6867
# XXX - there may be a better way to do this...
6968
for key in list(self._element.attrib.keys()):

0 commit comments

Comments
 (0)