html5lib
diff --git a/‎benchmarks/bench_html.py
Lines changed: 55 additions & 0 deletions b/‎benchmarks/bench_html.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎benchmarks/bench_wpt.py
Lines changed: 45 additions & 0 deletions b/‎benchmarks/bench_wpt.py
Lines changed: 45 additions & 0 deletions
@@ -0,0 +1,55 @@
+import io
+import os
+import sys
+
+import pyperf
+
+sys.path[0:0] = [os.path.join(os.path.dirname(__file__), "..")]
+import html5lib  # noqa: E402
+
+
+def bench_parse(fh):
+    fh.seek(0)
+    html5lib.parse(fh, treebuilder="lxml", useChardet=False)
+
+
+def bench_serialize(loops, fh):
+    fh.seek(0)
+    doc = html5lib.parse(fh, treebuilder="lxml", useChardet=False)
+
+    range_it = range(loops)
+    t0 = pyperf.perf_counter()
+
+    for loops in range_it:
+        html5lib.serialize(doc, tree="lxml", encoding="ascii", inject_meta_charset=False)
+
+    return pyperf.perf_counter() - t0
+
+
+BENCHMARKS = ["parse", "serialize"]
+
+
+def add_cmdline_args(cmd, args):
+    if args.benchmark:
+        cmd.append(args.benchmark)
+
+
+if __name__ == "__main__":
+    runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)
+    runner.metadata["description"] = "Run benchmarks based on Anolis"
+    runner.argparser.add_argument("benchmark", nargs="?", choices=BENCHMARKS)
+
+    args = runner.parse_args()
+    if args.benchmark:
+        benchmarks = (args.benchmark,)
+    else:
+        benchmarks = BENCHMARKS
+
+    with open(os.path.join(os.path.dirname(__file__), "data", "html.html"), "rb") as fh:
+        source = io.BytesIO(fh.read())
+
+    if "parse" in benchmarks:
+        runner.bench_func("html_parse", bench_parse, source)
+
+    if "serialize" in benchmarks:
+        runner.bench_time_func("html_serialize", bench_serialize, source)
@@ -0,0 +1,45 @@
+import io
+import os
+import sys
+
+import pyperf
+
+sys.path[0:0] = [os.path.join(os.path.dirname(__file__), "..")]
+import html5lib  # noqa: E402
+
+
+def bench_html5lib(fh):
+    fh.seek(0)
+    html5lib.parse(fh, treebuilder="etree", useChardet=False)
+
+
+def add_cmdline_args(cmd, args):
+    if args.benchmark:
+        cmd.append(args.benchmark)
+
+
+BENCHMARKS = {}
+for root, dirs, files in os.walk(os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "wpt")):
+    for f in files:
+        if f.endswith(".html"):
+            BENCHMARKS[f[: -len(".html")]] = os.path.join(root, f)
+
+
+if __name__ == "__main__":
+    runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)
+    runner.metadata["description"] = "Run parser benchmarks from WPT"
+    runner.argparser.add_argument("benchmark", nargs="?", choices=sorted(BENCHMARKS))
+
+    args = runner.parse_args()
+    if args.benchmark:
+        benchmarks = (args.benchmark,)
+    else:
+        benchmarks = sorted(BENCHMARKS)
+
+    for bench in benchmarks:
+        name = "wpt_%s" % bench
+        path = BENCHMARKS[bench]
+        with open(path, "rb") as fh:
+            fh2 = io.BytesIO(fh.read())
+
+        runner.bench_func(name, bench_html5lib, fh2)