Skip to content

Commit 9578106

Browse files
author
Edward Z. Yang ext:(%22)
committed
Sketchy XML commits. Doesn't actually work.
--HG-- branch : sketchyxml
1 parent 1ccae06 commit 9578106

File tree

2 files changed

+42
-31
lines changed

2 files changed

+42
-31
lines changed

library/HTML5/TreeBuilder.php

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
// XERROR - with regards to parse errors
3232
// XSCRIPT - with regards to scripting mode
3333
// XENCODING - with regards to encoding (for reparsing tests)
34+
// XSKETCHY - godawful workarounds
3435

3536
class HTML5_TreeBuilder {
3637
public $stack = array();
@@ -128,6 +129,15 @@ private function strConst($number) {
128129
const NS_XML = 'http://www.w3.org/XML/1998/namespace';
129130
const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
130131

132+
public $nsToPrefix = array(
133+
self::NS_HTML => '',
134+
self::NS_MATHML => 'math:',
135+
self::NS_SVG => 'svg:',
136+
self::NS_XLINK => 'xlink:',
137+
self::NS_XML => 'xml:',
138+
self::NS_XMLNS => 'xmlns:',
139+
);
140+
131141
public function __construct() {
132142
$this->mode = self::INITIAL;
133143
$this->dom = new DOMDocument;
@@ -140,7 +150,6 @@ public function __construct() {
140150

141151
// Process tag tokens
142152
public function emitToken($token, $mode = null) {
143-
// XXX: ignore parse errors... why are we emitting them, again?
144153
if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
145154
if ($mode === null) $mode = $this->mode;
146155

@@ -195,9 +204,6 @@ public function emitToken($token, $mode = null) {
195204
* doctype attribute of the Document object. */
196205
if (!isset($token['public'])) $token['public'] = null;
197206
if (!isset($token['system'])) $token['system'] = null;
198-
// Yes this is hacky. I'm kind of annoyed that I can't appendChild
199-
// a doctype to DOMDocument. Maybe I haven't chanted the right
200-
// syllables.
201207
$impl = new DOMImplementation();
202208
// This call can fail for particularly pathological cases (namely,
203209
// the qualifiedName parameter ($token['name']) could be missing.
@@ -1753,7 +1759,7 @@ public function emitToken($token, $mode = null) {
17531759
* elements with an entry for the new element, and
17541760
* let node be the new element. */
17551761
// we don't know what the token is anymore
1756-
$clone = $node->cloneNode();
1762+
$clone = $this->cloneNode($node);
17571763
$a_pos = array_search($node, $this->a_formatting, true);
17581764
$s_pos = array_search($node, $this->stack, true);
17591765
$this->a_formatting[$a_pos] = $clone;
@@ -1794,7 +1800,7 @@ public function emitToken($token, $mode = null) {
17941800

17951801
/* 8. Create an element for the token for which the
17961802
* formatting element was created. */
1797-
$clone = $formatting_element->cloneNode();
1803+
$clone = $this->cloneNode($formatting_element);
17981804

17991805
/* 9. Take all of the child nodes of the furthest
18001806
block and append them to the element created in the
@@ -3177,7 +3183,7 @@ private function reconstructActiveFormattingElements() {
31773183
}
31783184

31793185
/* 8. Perform a shallow clone of the element entry to obtain clone. */
3180-
$clone = $entry->cloneNode();
3186+
$clone = $this->cloneNode($entry);
31813187

31823188
/* 9. Append clone to the current node and push it onto the stack
31833189
of open elements so that it is the new current node. */
@@ -3672,22 +3678,25 @@ public function insertForeignElement($token, $namespaceURI) {
36723678
if (!empty($token['attr'])) {
36733679
foreach ($token['attr'] as $kp) {
36743680
$attr = $kp['name'];
3681+
// XSKETCHY: this entire thing is a hack to get around
3682+
// DOM's really bad XML implementation
36753683
if (is_array($attr)) {
36763684
$ns = $attr[2];
36773685
$attr = $attr[1];
36783686
} else {
36793687
$ns = self::NS_HTML;
36803688
}
3681-
if (!$el->hasAttributeNS($ns, $attr)) {
3682-
// XSKETCHY: work around godawful libxml bug
3683-
if ($ns === self::NS_XLINK) {
3684-
$el->setAttribute('xlink:'.$attr, $kp['value']);
3685-
} elseif ($ns === self::NS_HTML) {
3686-
// Another godawful libxml bug
3687-
$el->setAttribute($attr, $kp['value']);
3688-
} else {
3689-
$el->setAttributeNS($ns, $attr, $kp['value']);
3689+
if ($ns === self::NS_XML) {
3690+
// this is special cased since DOM converts xml:lang
3691+
// into lang
3692+
$el->setAttributeNS($ns, $attr, $kp['value']);
3693+
} else {
3694+
$prefix = $this->nsToPrefix[$ns];
3695+
$el->setAttribute($prefix.$attr, $kp['value']);
3696+
if (!isset($el->html5_namespaced)) {
3697+
$el->html5_namespaced = array();
36903698
}
3699+
$el->html5_namespaced[$prefix.$attr] = true;
36913700
}
36923701
}
36933702
}
@@ -3701,6 +3710,14 @@ public function insertForeignElement($token, $namespaceURI) {
37013710
* value is not the XLink Namespace, that is a parse error. */
37023711
}
37033712

3713+
private function cloneNode($node) {
3714+
$clone = $node->cloneNode();
3715+
if (isset($node->html5_namespaced)) {
3716+
$clone->html5_namespaced = $node->html5_namespaced;
3717+
}
3718+
return $clone;
3719+
}
3720+
37043721
public function save() {
37053722
$this->dom->normalize();
37063723
if (!$this->fragment) {

tests/HTML5/TestData.php

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -117,22 +117,16 @@ public static function strDom($node, $prefix = '| ') {
117117
}
118118
$text = "<{$ns}{$next->tagName}>";
119119
foreach ($next->attributes as $attr) {
120-
$ans = '';
121-
switch ($attr->namespaceURI) {
122-
case HTML5_TreeBuilder::NS_MATHML:
123-
$ans = 'math '; break;
124-
case HTML5_TreeBuilder::NS_SVG:
125-
$ans = 'svg '; break;
126-
case HTML5_TreeBuilder::NS_XLINK:
127-
$ans = 'xlink '; break;
128-
case HTML5_TreeBuilder::NS_XML:
129-
$ans = 'xml '; break;
130-
case HTML5_TreeBuilder::NS_XMLNS:
131-
$ans = 'xmlns '; break;
120+
// XSKETCHY
121+
$name = $attr->name;
122+
if ($attr->namespaceURI === HTML5_TreeBuilder::NS_XML) {
123+
$name = "xml $name";
124+
} else {
125+
if (isset($next->html5_namespaced[$name])) {
126+
$name = str_replace(':', ' ', $name);
127+
}
132128
}
133-
// XSKETCHY: needed for our horrible xlink hack
134-
$name = str_replace(':', ' ', $attr->localName);
135-
$subnodes[] = "{$ans}{$name}=\"{$attr->value}\"";
129+
$subnodes[] = "{$name}=\"{$attr->value}\"";
136130
}
137131
sort($subnodes);
138132
break;

0 commit comments

Comments
 (0)