From 9578106c7e75b1318f445666a726d37836af97d1 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang ext:(%22)" Date: Sat, 6 Jun 2009 15:35:46 -0400 Subject: [PATCH] Sketchy XML commits. Doesn't actually work. --HG-- branch : sketchyxml --- library/HTML5/TreeBuilder.php | 49 +++++++++++++++++++++++------------ tests/HTML5/TestData.php | 24 +++++++---------- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/library/HTML5/TreeBuilder.php b/library/HTML5/TreeBuilder.php index c6e4b1f..359e225 100644 --- a/library/HTML5/TreeBuilder.php +++ b/library/HTML5/TreeBuilder.php @@ -31,6 +31,7 @@ // XERROR - with regards to parse errors // XSCRIPT - with regards to scripting mode // XENCODING - with regards to encoding (for reparsing tests) +// XSKETCHY - godawful workarounds class HTML5_TreeBuilder { public $stack = array(); @@ -128,6 +129,15 @@ private function strConst($number) { const NS_XML = 'http://www.w3.org/XML/1998/namespace'; const NS_XMLNS = 'http://www.w3.org/2000/xmlns/'; + public $nsToPrefix = array( + self::NS_HTML => '', + self::NS_MATHML => 'math:', + self::NS_SVG => 'svg:', + self::NS_XLINK => 'xlink:', + self::NS_XML => 'xml:', + self::NS_XMLNS => 'xmlns:', + ); + public function __construct() { $this->mode = self::INITIAL; $this->dom = new DOMDocument; @@ -140,7 +150,6 @@ public function __construct() { // Process tag tokens public function emitToken($token, $mode = null) { - // XXX: ignore parse errors... why are we emitting them, again? if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return; if ($mode === null) $mode = $this->mode; @@ -195,9 +204,6 @@ public function emitToken($token, $mode = null) { * doctype attribute of the Document object. */ if (!isset($token['public'])) $token['public'] = null; if (!isset($token['system'])) $token['system'] = null; - // Yes this is hacky. I'm kind of annoyed that I can't appendChild - // a doctype to DOMDocument. Maybe I haven't chanted the right - // syllables. $impl = new DOMImplementation(); // This call can fail for particularly pathological cases (namely, // the qualifiedName parameter ($token['name']) could be missing. @@ -1753,7 +1759,7 @@ public function emitToken($token, $mode = null) { * elements with an entry for the new element, and * let node be the new element. */ // we don't know what the token is anymore - $clone = $node->cloneNode(); + $clone = $this->cloneNode($node); $a_pos = array_search($node, $this->a_formatting, true); $s_pos = array_search($node, $this->stack, true); $this->a_formatting[$a_pos] = $clone; @@ -1794,7 +1800,7 @@ public function emitToken($token, $mode = null) { /* 8. Create an element for the token for which the * formatting element was created. */ - $clone = $formatting_element->cloneNode(); + $clone = $this->cloneNode($formatting_element); /* 9. Take all of the child nodes of the furthest block and append them to the element created in the @@ -3177,7 +3183,7 @@ private function reconstructActiveFormattingElements() { } /* 8. Perform a shallow clone of the element entry to obtain clone. */ - $clone = $entry->cloneNode(); + $clone = $this->cloneNode($entry); /* 9. Append clone to the current node and push it onto the stack of open elements so that it is the new current node. */ @@ -3672,22 +3678,25 @@ public function insertForeignElement($token, $namespaceURI) { if (!empty($token['attr'])) { foreach ($token['attr'] as $kp) { $attr = $kp['name']; + // XSKETCHY: this entire thing is a hack to get around + // DOM's really bad XML implementation if (is_array($attr)) { $ns = $attr[2]; $attr = $attr[1]; } else { $ns = self::NS_HTML; } - if (!$el->hasAttributeNS($ns, $attr)) { - // XSKETCHY: work around godawful libxml bug - if ($ns === self::NS_XLINK) { - $el->setAttribute('xlink:'.$attr, $kp['value']); - } elseif ($ns === self::NS_HTML) { - // Another godawful libxml bug - $el->setAttribute($attr, $kp['value']); - } else { - $el->setAttributeNS($ns, $attr, $kp['value']); + if ($ns === self::NS_XML) { + // this is special cased since DOM converts xml:lang + // into lang + $el->setAttributeNS($ns, $attr, $kp['value']); + } else { + $prefix = $this->nsToPrefix[$ns]; + $el->setAttribute($prefix.$attr, $kp['value']); + if (!isset($el->html5_namespaced)) { + $el->html5_namespaced = array(); } + $el->html5_namespaced[$prefix.$attr] = true; } } } @@ -3701,6 +3710,14 @@ public function insertForeignElement($token, $namespaceURI) { * value is not the XLink Namespace, that is a parse error. */ } + private function cloneNode($node) { + $clone = $node->cloneNode(); + if (isset($node->html5_namespaced)) { + $clone->html5_namespaced = $node->html5_namespaced; + } + return $clone; + } + public function save() { $this->dom->normalize(); if (!$this->fragment) { diff --git a/tests/HTML5/TestData.php b/tests/HTML5/TestData.php index 2f72142..5cea72f 100644 --- a/tests/HTML5/TestData.php +++ b/tests/HTML5/TestData.php @@ -117,22 +117,16 @@ public static function strDom($node, $prefix = '| ') { } $text = "<{$ns}{$next->tagName}>"; foreach ($next->attributes as $attr) { - $ans = ''; - switch ($attr->namespaceURI) { - case HTML5_TreeBuilder::NS_MATHML: - $ans = 'math '; break; - case HTML5_TreeBuilder::NS_SVG: - $ans = 'svg '; break; - case HTML5_TreeBuilder::NS_XLINK: - $ans = 'xlink '; break; - case HTML5_TreeBuilder::NS_XML: - $ans = 'xml '; break; - case HTML5_TreeBuilder::NS_XMLNS: - $ans = 'xmlns '; break; + // XSKETCHY + $name = $attr->name; + if ($attr->namespaceURI === HTML5_TreeBuilder::NS_XML) { + $name = "xml $name"; + } else { + if (isset($next->html5_namespaced[$name])) { + $name = str_replace(':', ' ', $name); + } } - // XSKETCHY: needed for our horrible xlink hack - $name = str_replace(':', ' ', $attr->localName); - $subnodes[] = "{$ans}{$name}=\"{$attr->value}\""; + $subnodes[] = "{$name}=\"{$attr->value}\""; } sort($subnodes); break;