Skip to content

Commit 5246944

Browse files
committed
WIP - Jinja parsing
1 parent 53a0132 commit 5246944

File tree

5 files changed

+105
-14
lines changed

5 files changed

+105
-14
lines changed

html5lib/constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3091,7 +3091,8 @@
30913091
"JinjaStatementTag": 10,
30923092
"JinjaVariableStartTag": 11,
30933093
"JinjaVariableEndTag": 12,
3094-
"JinjaVariableTag": 13
3094+
"JinjaVariable": 13,
3095+
"JinjaFilter": 14
30953096
}
30963097

30973098
tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],

html5lib/html5parser.py

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,8 @@ def mainLoop(self):
171171
JinjaStatementTag = tokenTypes["JinjaStatementTag"]
172172
JinjaVariableStartTag = tokenTypes["JinjaVariableStartTag"]
173173
JinjaVariableEndTag = tokenTypes["JinjaVariableEndTag"]
174-
JinjaVariableTag = tokenTypes["JinjaVariableTag"]
174+
JinjaVariable = tokenTypes["JinjaVariable"]
175+
JinjaFilter = tokenTypes["JinjaFilter"]
175176

176177
for token in self.normalizedTokens():
177178
new_token = token
@@ -187,7 +188,11 @@ def mainLoop(self):
187188
self.parseError(new_token["data"], new_token.get("datavars", {}))
188189
new_token = None
189190
else:
190-
if (len(self.tree.openElements) == 0 or
191+
if type in (JinjaVariableStartTag, JinjaVariableEndTag, JinjaVariable, JinjaFilter):
192+
log.debug(u"Type is a jinja tag")
193+
phase = self.phases["inJinjaVariable"]
194+
elif (
195+
len(self.tree.openElements) == 0 or
191196
currentNodeNamespace == self.tree.defaultNamespace or
192197
(self.isMathMLTextIntegrationPoint(currentNode) and
193198
((type == StartTagToken and
@@ -224,8 +229,10 @@ def mainLoop(self):
224229
new_token = phase.processJinjaVariableStartTag(new_token)
225230
elif type == JinjaVariableEndTag:
226231
new_token = phase.processJinjaVariableEndTag(new_token)
227-
elif type == JinjaVariableTag:
228-
new_token = phase.processJinjaVariableTag(new_token)
232+
elif type == JinjaVariable:
233+
new_token = phase.processJinjaVariable(new_token)
234+
elif type == JinjaFilter:
235+
new_token = phase.processJinjaFilter(new_token)
229236

230237
if (type == StartTagToken and token["selfClosing"]
231238
and not token["selfClosingAcknowledged"]):
@@ -516,7 +523,10 @@ def processJinjaVariableStartTag(self, token):
516523
def processJinjaVariableEndTag(self, token):
517524
pass
518525

519-
def processJinjaVariableTag(self, token):
526+
def processJinjaVariable(self, token):
527+
pass
528+
529+
def processJinjaFilterTag(self, token):
520530
pass
521531

522532
def processStartTag(self, token):
@@ -535,6 +545,40 @@ def startTagHtml(self, token):
535545
def processEndTag(self, token):
536546
return self.endTagHandler[token["name"]](token)
537547

548+
class InJinjaVariablePhase(Phase):
549+
def processJinjaVariableStartTag(self, token):
550+
log = logging.getLogger('html5lib')
551+
log.debug(u"InJinja: Start Tag")
552+
self.tree.reconstructActiveFormattingElements()
553+
self.tree.insertElement(token)
554+
555+
def processJinjaVariableEndTag(self, token):
556+
log = logging.getLogger('html5lib')
557+
log.debug(u"InJinja: End Tag {}".format(token["name"]))
558+
for node in self.tree.openElements[::-1]:
559+
log.debug(u"InJinja: Open tag {} token {}".format(node, token))
560+
if node.name == token["name"]:
561+
self.tree.generateImpliedEndTags(exclude=token["name"])
562+
log.debug(u"InJinja: Implied end tag {} {}".format(self.tree.openElements[-1].name, token["name"]))
563+
if self.tree.openElements[-1].name != token["name"]:
564+
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
565+
while self.tree.openElements.pop() != node:
566+
pass
567+
break
568+
else:
569+
if node.nameTuple in specialElements:
570+
log.debug(u"Nametuple {} in {}".format(node.nameTuple, specialElements))
571+
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
572+
break
573+
574+
def processJinjaVariable(self, token):
575+
element = self.tree.createElementWithoutNamespace(token)
576+
self.tree.openElements[-1].appendChild(element)
577+
578+
def processJinjaFilter(self, token):
579+
element = self.tree.createElementWithoutNamespace(token)
580+
self.tree.openElements[-1].appendChild(element)
581+
538582
class InitialPhase(Phase):
539583
def processSpaceCharacters(self, token):
540584
pass
@@ -2739,6 +2783,7 @@ def processEndTag(self, token):
27392783
"inHead": InHeadPhase,
27402784
# XXX "inHeadNoscript": InHeadNoScriptPhase,
27412785
"afterHead": AfterHeadPhase,
2786+
"inJinjaVariable": InJinjaVariablePhase,
27422787
"inBody": InBodyPhase,
27432788
"text": TextPhase,
27442789
"inTable": InTablePhase,

html5lib/tokenizer.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -294,17 +294,21 @@ def jinjaOpenState(self):
294294
data = self.stream.char()
295295

296296
if data == "{":
297-
self.tokenQueue.append({
297+
self.currentToken = {
298298
"type": tokenTypes["JinjaVariableStartTag"],
299-
"name": "{{", "data": [],
299+
"name": u"jinjavariabletag", "data": {},
300+
"namespace": None,
300301
"selfClosing": False
301-
})
302+
}
303+
304+
self.tokenQueue.append(self.currentToken)
302305

303306
self.state = self.jinjaVariableState
304307
elif data == "%":
305308
self.tokenQueue.append({
306309
"type": tokenTypes["JinjaStatementStartTag"],
307-
"name": "{%", "data": [],
310+
"name": "{%", "data": {},
311+
"namespace": None,
308312
"selfClosing": False
309313
})
310314

@@ -346,7 +350,7 @@ def jinjaVariableEndState(self):
346350
if data == "}":
347351
self.tokenQueue.append({
348352
"type": tokenTypes["JinjaVariableEndTag"],
349-
"name": "}}", "data": [],
353+
"name": u"jinjavariabletag", "data": [],
350354
"selfClosing": False
351355
})
352356
self.state = self.dataState
@@ -390,10 +394,34 @@ def jinjaVariableState(self):
390394
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
391395
"eof-in-jinja-variable"})
392396
self.state = self.dataState
397+
elif data in spaceCharacters:
398+
# Skip spaces
399+
pass
400+
elif data == "|":
401+
pass
402+
# If this is the first token after the variable start tag
403+
elif self.currentToken['type'] == tokenTypes["JinjaVariableStartTag"]:
404+
#log.debug(u"Got start tag {}".format(("|", "}", "\u0000") | spaceCharacters))
405+
406+
chars = self.stream.charsUntil(frozenset(("|", "}", "\u0000")) | spaceCharacters)
407+
self.currentToken = {"type": tokenTypes["JinjaVariable"],
408+
"name": "jinjavariable", "selfClosing": True, "data": {
409+
"value": data + chars,
410+
"position": self.stream.position(),
411+
}}
412+
self.tokenQueue.append(self.currentToken)
393413
else:
394-
chars = self.stream.charsUntil(("}", "\u0000"))
395-
self.tokenQueue.append({"type": tokenTypes["JinjaVariableTag"], "data":
396-
data + chars})
414+
chars = self.stream.charsUntil(frozenset(("|", "}", "\u0000")) | spaceCharacters)
415+
self.currentToken = {"type": tokenTypes["JinjaFilter"],
416+
"name": "jinjafilter", "selfClosing": True, "data": {
417+
"value": data + chars,
418+
"position": self.stream.position(),
419+
}}
420+
self.tokenQueue.append(self.currentToken)
421+
#else:
422+
#chars = self.stream.charsUntil(("}", "\u0000"))
423+
#self.tokenQueue.append({"type": tokenTypes["JinjaVariableTag"], "data":
424+
#data + chars})
397425

398426
return True
399427

html5lib/treebuilders/_base.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33

44
from ..constants import scopingElements, tableInsertModeElements, namespaces
55

6+
import logging
7+
8+
log = logging.getLogger("html5lib")
9+
10+
611
# The scope markers are inserted when entering object elements,
712
# marquees, table cells, and table captions, and are used to prevent formatting
813
# from "leaking" into tables, object elements, and marquees.
@@ -269,6 +274,13 @@ def createElement(self, token):
269274
element.attributes = token["data"]
270275
return element
271276

277+
def createElementWithoutNamespace(self, token):
278+
"""Create an element but don't insert it anywhere"""
279+
name = token["name"]
280+
element = self.elementClass(name)
281+
element.attributes = token["data"]
282+
return element
283+
272284
def _getInsertFromTable(self):
273285
return self._insertFromTable
274286

html5lib/treebuilders/etree.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
import re
55

6+
import logging
7+
68
from . import _base
79
from .. import ihatexml
810
from .. import constants
@@ -11,6 +13,8 @@
1113

1214
tag_regexp = re.compile("{([^}]*)}(.*)")
1315

16+
log = logging.getLogger("html5lib")
17+
1418

1519
def getETreeBuilder(ElementTreeImplementation, fullTree=False):
1620
ElementTree = ElementTreeImplementation
@@ -59,6 +63,7 @@ def _getAttributes(self):
5963
return self._element.attrib
6064

6165
def _setAttributes(self, attributes):
66+
log.debug(u"Attributes {}".format(attributes))
6267
# Delete existing attributes first
6368
# XXX - there may be a better way to do this...
6469
for key in list(self._element.attrib.keys()):

0 commit comments

Comments
 (0)