From de8fd9328e9deb0d1ec596d7486686ea3cb688c2 Mon Sep 17 00:00:00 2001 From: "Felix (xq) Queißner" Date: Tue, 9 Jun 2020 18:27:38 +0200 Subject: Includes cmark markdown parser library. --- lib/cmark/test/pathological_tests.py | 169 +++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 lib/cmark/test/pathological_tests.py (limited to 'lib/cmark/test/pathological_tests.py') diff --git a/lib/cmark/test/pathological_tests.py b/lib/cmark/test/pathological_tests.py new file mode 100644 index 0000000..76cc913 --- /dev/null +++ b/lib/cmark/test/pathological_tests.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import re +import argparse +import sys +import platform +import itertools +import multiprocessing +import time +from cmark import CMark + +TIMEOUT = 5 + +parser = argparse.ArgumentParser(description='Run cmark tests.') +parser.add_argument('--program', dest='program', nargs='?', default=None, + help='program to test') +parser.add_argument('--library-dir', dest='library_dir', nargs='?', + default=None, help='directory containing dynamic library') +args = parser.parse_args(sys.argv[1:]) + +allowed_failures = {"many references": True} + +cmark = CMark(prog=args.program, library_dir=args.library_dir) + +def hash_collisions(): + REFMAP_SIZE = 16 + COUNT = 50000 + + def badhash(ref): + h = 0 + for c in ref: + a = (h << 6) & 0xFFFFFFFF + b = (h << 16) & 0xFFFFFFFF + h = ord(c) + a + b - h + h = h & 0xFFFFFFFF + + return (h % REFMAP_SIZE) == 0 + + keys = ("x%d" % i for i in itertools.count()) + collisions = itertools.islice((k for k in keys if badhash(k)), COUNT) + bad_key = next(collisions) + + document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions) + + return document, re.compile("(

\[%s\]

\n){%d}" % (bad_key, COUNT-1)) + + +# list of pairs consisting of input and a regex that must match the output. +pathological = { + # note - some pythons have limit of 65535 for {num-matches} in re. + "nested strong emph": + (("*a **a " * 65000) + "b" + (" a** a*" * 65000), + re.compile("(a a ){65000}b( a a){65000}")), + "many emph closers with no openers": + (("a_ " * 65000), + re.compile("(a[_] ){64999}a_")), + "many emph openers with no closers": + (("_a " * 65000), + re.compile("(_a ){64999}_a")), + "many link closers with no openers": + (("a]" * 65000), + re.compile("(a\]){65000}")), + "many link openers with no closers": + (("[a" * 65000), + re.compile("(\[a){65000}")), + "mismatched openers and closers": + (("*a_ " * 50000), + re.compile("([*]a[_] ){49999}[*]a_")), + "openers and closers multiple of 3": + (("a**b" + ("c* " * 50000)), + re.compile("a[*][*]b(c[*] ){49999}c[*]")), + "link openers and emph closers": + (("[ a_" * 50000), + re.compile("(\[ a_){50000}")), + "pattern [ (]( repeated": + (("[ (](" * 80000), + re.compile("(\[ \(\]\(){80000}")), + "hard link/emph case": + ("**x [a*b**c*](d)", + re.compile("\\*\\*x ab\\*\\*c")), + "nested brackets": + (("[" * 50000) + "a" + ("]" * 50000), + re.compile("\[{50000}a\]{50000}")), + "nested block quotes": + ((("> " * 50000) + "a"), + re.compile("(
\n){50000}")), + "deeply nested lists": + ("".join(map(lambda x: (" " * x + "* a\n"), range(0,1000))), + re.compile("\n){999}")), + "U+0000 in input": + ("abc\u0000de\u0000", + re.compile("abc\ufffd?de\ufffd?")), + "backticks": + ("".join(map(lambda x: ("e" + "`" * x), range(1,5000))), + re.compile("^

[e`]*

\n$")), + "unclosed links A": + ("[a]( 0: + print("Ignoring these allowed failures:") + for x in results['ignored']: + print(x) + if failed == 0 and errored == 0: + exit(0) + else: + exit(1) + +if __name__ == "__main__": + run_tests() -- cgit v1.2.3