User:John Vandenberg/wiki xml
Processing MediaWiki syntax using XML.
flexbisonparse
editPython support
editIn the flexbisonparse direction, add the following files:
- "pymod.c"
#include "Python.h" const char* wikiparse_do_parse (const char* input); char wikiparse_toxml__doc__[] = "toxml(wikitext) -- Convert Wikitext to XML\n"; PyObject *wikiparse_toxml(PyObject *self, PyObject *args) { PyObject *result = NULL; const char *wikitext, *xml; if (!PyArg_ParseTuple(args, "s", &wikitext)) return NULL; xml = wikiparse_do_parse(wikitext); result = Py_BuildValue("s", xml); return result; } static PyMethodDef wikiparse_functions[] = { {"toxml", (PyCFunction)wikiparse_toxml, METH_VARARGS, wikiparse_toxml__doc__}, {NULL, NULL, 0, NULL} }; /* module entry-point (module-initialization) function */ void initwikiparse(void) { /* Create the module and add the functions */ PyObject *m = Py_InitModule3("wikiparse", wikiparse_functions, "MediaWiki syntax parser"); }
- setup.py
from distutils.core import setup, Extension setup(name="wikiparse", version="0.1", ext_modules=[Extension("wikiparse", ["pymod.c", "lex.yy.c", "wikiparse.tab.c", "parsetree.c"])])
- wikiparse.py
import wikiparse import sys def parse(page): return wikiparse.toxml(page) def main(): print "%s" % parse("hello") print "%s" % parse( .join( sys.stdin.readlines() ) ) if __name__ == "__main__": main()
Execute:
$ make $ python setup.py install $ cat test.txt | python ./wikiparse.py | xmllint --format -