aboutsummaryrefslogtreecommitdiff
path: root/lib/cmark/tools/make_entities_inc.py
diff options
context:
space:
mode:
authorFelix (xq) Queißner <git@mq32.de>2020-06-09 18:27:38 +0200
committerFelix (xq) Queißner <git@mq32.de>2020-06-09 18:27:38 +0200
commitde8fd9328e9deb0d1ec596d7486686ea3cb688c2 (patch)
tree9c25d7568dee522c1d2ed7b6d238d1dad54163f6 /lib/cmark/tools/make_entities_inc.py
parent661ddc244793102ee0720871c4edcd64f80bc744 (diff)
downloadkristall-de8fd9328e9deb0d1ec596d7486686ea3cb688c2.tar.gz
Includes cmark markdown parser library.
Diffstat (limited to 'lib/cmark/tools/make_entities_inc.py')
-rw-r--r--lib/cmark/tools/make_entities_inc.py32
1 files changed, 32 insertions, 0 deletions
diff --git a/lib/cmark/tools/make_entities_inc.py b/lib/cmark/tools/make_entities_inc.py
new file mode 100644
index 0000000..7b8ee41
--- /dev/null
+++ b/lib/cmark/tools/make_entities_inc.py
@@ -0,0 +1,32 @@
+# Creates C data structures for binary lookup table of entities,
+# using python's html5 entity data.
+# Usage: python3 tools/make_entities_inc.py > src/entities.inc
+
+import html
+
+entities5 = html.entities.html5
+
+# remove keys without semicolons. For some reason the list
+# has duplicates of a few things, like auml, one with and one
+# without a semicolon.
+entities = sorted([(k[:-1], entities5[k].encode('utf-8')) for k in entities5.keys() if k[-1] == ';'])
+
+# Print out the header:
+print("""/* Autogenerated by tools/make_headers_inc.py */
+
+struct cmark_entity_node {
+ unsigned char *entity;
+ unsigned char bytes[8];
+};
+
+#define CMARK_ENTITY_MIN_LENGTH 2
+#define CMARK_ENTITY_MAX_LENGTH 32""")
+
+print("#define CMARK_NUM_ENTITIES " + str(len(entities)));
+
+print("\nstatic const struct cmark_entity_node cmark_entities[] = {");
+
+for (ent, bs) in entities:
+ print('{(unsigned char*)"' + ent + '", {' + ', '.join(map(str, bs)) + ', 0}},')
+
+print("};")