diff options
| author | Felix (xq) Queißner <git@mq32.de> | 2020-06-09 18:27:38 +0200 |
|---|---|---|
| committer | Felix (xq) Queißner <git@mq32.de> | 2020-06-09 18:27:38 +0200 |
| commit | de8fd9328e9deb0d1ec596d7486686ea3cb688c2 (patch) | |
| tree | 9c25d7568dee522c1d2ed7b6d238d1dad54163f6 /lib/cmark/tools/make_entities_inc.py | |
| parent | 661ddc244793102ee0720871c4edcd64f80bc744 (diff) | |
| download | kristall-de8fd9328e9deb0d1ec596d7486686ea3cb688c2.tar.gz | |
Includes cmark markdown parser library.
Diffstat (limited to 'lib/cmark/tools/make_entities_inc.py')
| -rw-r--r-- | lib/cmark/tools/make_entities_inc.py | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/lib/cmark/tools/make_entities_inc.py b/lib/cmark/tools/make_entities_inc.py new file mode 100644 index 0000000..7b8ee41 --- /dev/null +++ b/lib/cmark/tools/make_entities_inc.py @@ -0,0 +1,32 @@ +# Creates C data structures for binary lookup table of entities, +# using python's html5 entity data. +# Usage: python3 tools/make_entities_inc.py > src/entities.inc + +import html + +entities5 = html.entities.html5 + +# remove keys without semicolons. For some reason the list +# has duplicates of a few things, like auml, one with and one +# without a semicolon. +entities = sorted([(k[:-1], entities5[k].encode('utf-8')) for k in entities5.keys() if k[-1] == ';']) + +# Print out the header: +print("""/* Autogenerated by tools/make_headers_inc.py */ + +struct cmark_entity_node { + unsigned char *entity; + unsigned char bytes[8]; +}; + +#define CMARK_ENTITY_MIN_LENGTH 2 +#define CMARK_ENTITY_MAX_LENGTH 32""") + +print("#define CMARK_NUM_ENTITIES " + str(len(entities))); + +print("\nstatic const struct cmark_entity_node cmark_entities[] = {"); + +for (ent, bs) in entities: + print('{(unsigned char*)"' + ent + '", {' + ', '.join(map(str, bs)) + ', 0}},') + +print("};") |
