python-openxml
diff --git a/‎.gitignore
Lines changed: 2 additions & 0 deletions b/‎.gitignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎HISTORY.rst b/‎HISTORY.rst
diff --git a/‎LICENSE
Lines changed: 20 additions & 0 deletions b/‎LICENSE
Lines changed: 20 additions & 0 deletions
diff --git a/‎MANIFEST.in
Lines changed: 3 additions & 0 deletions b/‎MANIFEST.in
Lines changed: 3 additions & 0 deletions
diff --git a/‎Makefile
Lines changed: 23 additions & 0 deletions b/‎Makefile
Lines changed: 23 additions & 0 deletions
diff --git a/‎README.rst
Lines changed: 155 additions & 0 deletions b/‎README.rst
Lines changed: 155 additions & 0 deletions
diff --git a/‎cxml/__init__.py
Lines changed: 29 additions & 0 deletions b/‎cxml/__init__.py
Lines changed: 29 additions & 0 deletions
diff --git a/‎cxml/lexer.py
Lines changed: 139 additions & 0 deletions b/‎cxml/lexer.py
Lines changed: 139 additions & 0 deletions
diff --git a/‎cxml/lib/__init__.py b/‎cxml/lib/__init__.py
@@ -1 +1,3 @@
+.coverage
 /_scratch/
+/.tox/
@@ -0,0 +1,20 @@
+The MIT License (MIT)
+Copyright (c) 2013 Steve Canny, https://github.com/scanny
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
@@ -0,0 +1,3 @@
+include HISTORY.rst LICENSE README.rst tox.ini
+recursive-include tests *.py
+recursive-include tests *.txt
@@ -0,0 +1,23 @@
+MAKE   = make
+PYTHON = python
+SETUP  = $(PYTHON) ./setup.py
+
+.PHONY: clean
+
+help:
+	@echo "Please use \`make <target>' where <target> is one or more of"
+	@echo "  clean     delete intermediate work product and start fresh"
+
+clean:
+	find . -type f -name \*.pyc -exec rm {} \;
+	rm -rf dist *.egg-info .coverage .DS_Store
+
+coverage:
+	py.test --cov-report term-missing --cov=cxml tests/
+
+sdist:
+	$(SETUP) sdist
+
+test: clean
+	flake8
+	py.test -x
@@ -0,0 +1,155 @@
+
+cxml - Compact XML translator
+=============================
+
+.. highlight:: python
+
+`cxml` translates a Compact XML (CXML) expression into the corresponding
+pretty-printed XML snippet. For example::
+
+    from cxml import xml
+
+    xml('w:p/(w:pPr/w:jc{w:val=right},w:r/w:t"Right-aligned")'),
+
+.. highlight:: xml
+
+becomes::
+
+    <w:p xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
+      <w:pPr>
+        <w:jc w:val="right"/>
+      </w:pPr>
+      <w:r>
+        <w:t>Right-aligned</w:t>
+      </w:r>
+    </w:p>
+
+
+Who cares?
+----------
+
+The motivation for a compact XML expression language arose out of the testing
+requirements of the `python-docx` and `python-pptx` libraries. The
+*WordprocessingML* and *PresentationML* file formats are XML-based and many
+operations in those libraries involve the recognition or modification of XML.
+The tests then require a great many XML snippets to test all the possible
+combinations the code must recognize or produce.
+
+Including full-sized XML snippets in the test code is both distracting and
+tedious. By compressing the specification of a snippet to fit on a single
+line (in most cases), the test code is much more compact and expressive.
+
+
+Syntax
+------
+
+CXML syntax borrows from that of XPath.
+
+.. highlight:: python
+
+An element is specified by its name::
+
+    >>> xml('foobar')
+    <foobar/>
+
+A child is specified by name following a slash::
+
+    >>> xml('foo/bar')
+    <foo>
+      <bar/>
+    </foo>
+
+XML output is pretty-printed with 2-space indentation.
+
+Multiple child elements are specified by separating them with a comma and
+enclosing them in parentheses::
+
+    >>> xml('foo/(bar,baz)')
+    <foo>
+      <bar/>
+      <baz/>
+    </foo>
+
+Element attributes are specified in braces after the element name::
+
+    >>> xml('foo{a=b}')
+    <foo a="b"/>
+
+Multiple attributes are separated by commas::
+
+    >>> xml('foo{a=b,b=c}')
+    <foo a="b" b="c"/>
+
+Whitespace is permitted (and ignored) between tokens in most places, however
+after using CXML quite a bit I don't find it useful::
+
+    >>> xml(' foo {a=b, b=c}')
+    <foo a="b" b="c"/>
+
+Attribute text may be surrounded by double-quotes, which is handy when the
+text contains a comma or a closing brace::
+
+    >>> xml('foo{a=b,b="c,}g")}')
+    <foo a="b" b="c,}g"/>
+
+Text immediately following the attributes' closing brace is interpreted as
+the text of the element. Whitespace within the text is preserved.::
+
+    >>> xml('foo{a=b,b=c} bar ')
+    <foo a="b" b="c"> bar </foo>
+
+Element text may also be enclosed in quotes, which allows it to contain
+a comma or slash that would otherwise be interpreted as the next token.::
+
+    >>> xml('foo{a=b}"bar/baz, barfoo"')
+    <foo a="b">bar/baz, barfoo</foo>
+
+An element having a namespace prefix appears with the corresponding namespace
+declaration::
+
+    >>> xml('a:foo)')
+    <a:foo xmlns:a="http://foo/a"/>
+
+A different namespace prefix in a descendant element causes the corresponding
+namespace declaration to be added to the root element, in the order
+encountered::
+
+    >>> xml('a:foo/(b:bar,c:baz)')
+    <a:foo xmlns:a="http://foo/a" xmlns:b="http://foo/b" xmlns:c="http://foo/c">
+      <b:bar/>
+      <c:baz/>
+    </a:foo>
+
+A namespace can be explicitly declared as an attribute of an element, in
+which case it will appear whether a child element in that namespace is
+present or not::
+
+    >>> xml('a:foo{b:}')
+    <a:foo xmlns:a="http://foo/a" xmlns:b="http://foo/b"/>
+
+An explicit namespace appears immediately after the root element namespace
+(if it has one) when placed on the root element. This allows namespace
+declarations to appear in a different order than the order encountered. This
+is occasionally handy when matching XML by its string value.
+
+An explicit namespace may also be placed on a child element, in which case
+the corresponding namespace declaration appears on that child rather than the
+root element::
+
+    >>> xml('a:foo/b:bar{b:,c:}')
+    <a:foo xmlns:a="http://foo/a">
+      <b:bar xmlns:b="http://foo/b" xmlns:c="http://foo/c"/>
+    </a:foo>
+
+Putting all these together, a reasonably complex XML snippet can be condensed
+quite a bit::
+
+    >>> xml('w:p/(w:pPr/w:jc{w:val=right},w:r/w:t"Right-aligned")'),
+    <w:p xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
+      <w:pPr>
+        <w:jc w:val="right"/>
+      </w:pPr>
+      <w:r>
+        <w:t>Right-aligned</w:t>
+      </w:r>
+    </w:p>
@@ -0,0 +1,29 @@
+# encoding: utf-8
+
+"""
+API for CXML translator.
+"""
+
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+
+
+__version__ = '0.9.6'
+
+
+from .lexer import CxmlLexer
+from .parser import CxmlParser
+from .symbols import root
+from .translator import CxmlTranslator
+
+
+def xml(cxml):
+    """
+    Return the XML generated from *cxml*.
+    """
+    lexer = CxmlLexer(cxml)
+    parser = CxmlParser(lexer)
+    root_ast = parser.parse(root)
+    root_element = CxmlTranslator.translate(root_ast)
+    return root_element.xml
@@ -0,0 +1,139 @@
+# encoding: utf-8
+
+"""
+Lexical analyzer, (a.k.a lexer, tokenizer) for CXML language.
+"""
+
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+
+from .lib.lexer import Lexer
+
+from .symbols import (
+    COLON, COMMA, EQUAL, LBRACE, LPAREN, NAME, RBRACE, RPAREN, SLASH, SNTL,
+    TEXT
+)
+
+
+alphas = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+nums = '0123456789'
+
+name_start_chars = alphas + '_'
+name_chars = alphas + nums + '_-.'
+
+punctuation = ':,=/{}()'
+
+
+class CxmlLexer(Lexer):
+    """
+    Lexer object for CXML.
+    """
+    def _lex_start(self):
+        """
+        The starting and fallback state of the lexer, where it is in-between
+        tokens.
+        """
+        # should only be entering this state in-between tokens
+        assert self._start == self._pos
+
+        peek = self._peek
+
+        # test EOF first to avoid __contains__ errors
+        if peek is None:
+            return self._lex_eof
+
+        # ignore whitespace as a priority
+        elif peek == ' ':
+            return self._lex_whitespace
+
+        elif peek in name_start_chars:
+            return self._lex_name
+
+        elif peek in punctuation:
+            return self._lex_punctuation
+
+        elif peek == '"':
+            return self._lex_quoted_string
+
+        else:
+            raise SyntaxError(
+                "at character '%s' in '%s'" % (peek, self._input)
+            )
+
+    def _lex_eof(self):
+        """
+        Emit `SNTL` token and end parsing by returning |None|.
+        """
+        assert self._start == self._pos == self._len
+        self._emit(SNTL)
+        return None
+
+    def _lex_name(self):
+        """
+        Emit maximal sequence of name characters.
+        """
+        self._accept_run(name_chars)
+        self._emit(NAME)
+        return self._lex_start
+
+    def _lex_punctuation(self):
+        """
+        Emit the appropriate single-character punctuation token, such as
+        COLON.
+        """
+        symbol = self._next()
+
+        token_type = {
+            ':': COLON, ',': COMMA, '{': LBRACE, '}': RBRACE,
+            '=': EQUAL, '/': SLASH, '(': LPAREN, ')': RPAREN,
+        }[symbol]
+
+        self._emit(token_type)
+        return self._lex_text if symbol in '=}' else self._lex_start
+
+    def _lex_quoted_string(self):
+        """
+        Emit the text of a quoted string as a TEXT token, discarding the
+        enclosing quote characters.
+        """
+        # skip over opening quote
+        self._skip()
+
+        # accept any character until another double-quote or EOF
+        self._accept_until('"')
+        self._emit(TEXT)
+
+        # raise unterminated if next character not closing quote
+        if self._peek != '"':
+            raise SyntaxError("unterminated quote")
+        self._skip()
+
+        return self._lex_start
+
+    def _lex_text(self):
+        """
+        Parse a string value, either a quoted string or a raw string, which
+        is terminated by a comma, closing brace, slash, or right paren.
+        """
+        peek = self._peek
+
+        if peek is None:
+            return self._lex_eof
+
+        if peek == '"':
+            return self._lex_quoted_string
+
+        if peek not in ',}/)':
+            self._accept_until(',}/)')
+            self._emit(TEXT)
+
+        return self._lex_start
+
+    def _lex_whitespace(self):
+        """
+        Consume all whitespace at current position and ignore it.
+        """
+        self._accept_run(' ')
+        self._ignore()
+        return self._lex_start
Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
	`1`	`+.coverage`
`1`	`2`	`/_scratch/`
	`3`	`+/.tox/`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+include HISTORY.rst LICENSE README.rst tox.ini`
	`2`	`+recursive-include tests *.py`
	`3`	`+recursive-include tests *.txt`