aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md51
-rw-r--r--serializer.c326
-rw-r--r--serializer.h87
3 files changed, 464 insertions, 0 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f3b35ed
--- /dev/null
+++ b/README.md
@@ -0,0 +1,51 @@
+Serializer/deserializer library for C99
+=======================================
+
+This library hopes to be useful under certain circumstances where a portable
+implementation is desired to read/write data streams yet endianness and
+alignment requirements need to be taken into account e.g.: network protocols,
+file formats, etc. As the title specifies, only a C99-compliant compiler is
+required.
+
+Only 8-bit and 16/32-bit little-endian and big-endian values are supported.
+This means no support for bit fields, as some aspects of bit fields are
+implementation-defined.
+
+Usage
+------
+
+```{c}
+#include "serializer.h"
+#include <stdint.h>
+#include <stdio.h>
+
+int main()
+{
+ struct
+ {
+ uint8_t a;
+ uint16_t b;
+ uint32_t c;
+ uint16_t d;
+ } ex;
+
+ static const uint8_t data[] =
+ {
+ 0x01, 0x33, 0xFF, 0xAC, 0xBB, 0xFA, 0xFA, 0xDE, 0xDE
+ };
+
+ deserialize("1/le2/be4/be2", &ex, sizeof ex, data, sizeof data);
+ printf("a=%X, b=%X, c=%X, d=%X\n",
+ ex.a, ex.b, ex.c, ex.d);
+ return 0;
+}
+```
+Output
+------
+`a=1, b=FF33, c=ACBBFAFA, d=DEDE`
+
+TODO
+----
+Only deserialization is implemented. Serialization will be implemented in the future.
+
+64-bit support.
diff --git a/serializer.c b/serializer.c
new file mode 100644
index 0000000..287ee22
--- /dev/null
+++ b/serializer.c
@@ -0,0 +1,326 @@
+/*
+ Copyright 2020 Xavier Del Campo Romero
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "serializer.h"
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+
+static bool little_endian(void)
+{
+ return (const union {char c; int b;}){.c = 1}.b;
+}
+
+enum serializer_err serialize(const char *format, void *dst, size_t sz, const void *src, const size_t src_sz)
+{
+ if (!format || !dst || !sz || !src)
+ {
+ return SERIALIZER_ERR_INVALID_ARG;
+ }
+
+ return SERIALIZER_OK;
+}
+
+enum token
+{
+ TOKEN_8BIT,
+ TOKEN_LE16BIT,
+ TOKEN_BE16BIT,
+ TOKEN_LE32BIT,
+ TOKEN_BE32BIT,
+ TOKEN_NOT_READY,
+ TOKEN_ERROR
+};
+
+enum state
+{
+ ENDIAN_SPECIFIER_OR_8BIT_OR_SLASH,
+ CHARACTER_E,
+ MULTI_BYTE_NUM
+};
+
+enum endianness
+{
+ ENDIANESS_UNKNOWN,
+ LITTLE_ENDIAN,
+ BIG_ENDIAN
+};
+
+static enum token get_spec_8bit(const char c, enum state *const state, enum endianness *const endianness)
+{
+ switch (c)
+ {
+ case '/':
+ return TOKEN_NOT_READY;
+
+ case '1':
+ return TOKEN_8BIT;
+
+ case 'l':
+ *state = CHARACTER_E;
+ *endianness = LITTLE_ENDIAN;
+ return TOKEN_NOT_READY;
+
+ case 'b':
+ *state = CHARACTER_E;
+ *endianness = BIG_ENDIAN;
+ return TOKEN_NOT_READY;
+
+ default:
+ break;
+ }
+
+ return TOKEN_ERROR;
+}
+
+static enum token get_ech(const char c, enum state *const state, enum endianness *const endianness)
+{
+ if (c == 'e')
+ {
+ *state = MULTI_BYTE_NUM;
+ return TOKEN_NOT_READY;
+ }
+
+ return TOKEN_ERROR;
+}
+
+static enum token get_multibyte(const char c, enum state *const state, enum endianness *const endianness)
+{
+ switch (c)
+ {
+ case '2':
+
+ switch (*endianness)
+ {
+ case LITTLE_ENDIAN:
+ return TOKEN_LE16BIT;
+
+ case BIG_ENDIAN:
+ return TOKEN_BE16BIT;
+
+ default:
+ break;
+ }
+
+ break;
+
+ case '4':
+
+ switch (*endianness)
+ {
+ case LITTLE_ENDIAN:
+ return TOKEN_LE32BIT;
+
+ case BIG_ENDIAN:
+ return TOKEN_BE32BIT;
+
+ default:
+ break;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ return TOKEN_ERROR;
+}
+
+static enum token get_token(const char c, enum state *const state, enum endianness *const endianness)
+{
+ static enum token (*const get[])(char c, enum state *state, enum endianness *endianness) =
+ {
+ [ENDIAN_SPECIFIER_OR_8BIT_OR_SLASH] = get_spec_8bit,
+ [CHARACTER_E] = get_ech,
+ [MULTI_BYTE_NUM] = get_multibyte
+ };
+
+ return get[*state](c, state, endianness);
+}
+
+static void read8(void *const dst, const void *const src)
+{
+ *(uint8_t *)dst = *(const uint8_t *)src;
+}
+
+static size_t swap32(void *dst, const void *const src)
+{
+ *(uint8_t *)dst++ = *((const uint8_t *)src + 3);
+ *(uint8_t *)dst++ = *((const uint8_t *)src + 2);
+ *(uint8_t *)dst++ = *((const uint8_t *)src + 1);
+ *(uint8_t *)dst = *(const uint8_t *)src;
+}
+
+static void readbe32(void *const dst, const void *const src)
+{
+ enum
+ {
+ SZ = sizeof (uint32_t)
+ };
+
+ if (little_endian())
+ {
+ swap32(dst, src);
+ }
+ else
+ {
+ memmove(dst, src, SZ);
+ }
+}
+
+static void readle32(void *const dst, const void *const src)
+{
+ enum
+ {
+ SZ = sizeof (uint32_t)
+ };
+
+ if (little_endian())
+ {
+ memmove(dst, src, SZ);
+ }
+ else
+ {
+ swap32(dst, src);
+ }
+}
+
+static size_t swap16(void *dst, const void *const src)
+{
+ *(uint8_t *)dst++ = *((const uint8_t *)src + 1);
+ *(uint8_t *)dst = *(const uint8_t *)src;
+}
+
+static void readbe16(void *const dst, const void *const src)
+{
+ enum
+ {
+ SZ = sizeof (uint16_t)
+ };
+
+ if (little_endian())
+ {
+ swap16(dst, src);
+ }
+ else
+ {
+ memmove(dst, src, SZ);
+ }
+}
+
+static void readle16(void *const dst, const void *const src)
+{
+ enum
+ {
+ SZ = sizeof (uint16_t)
+ };
+
+ if (little_endian())
+ {
+ memmove(dst, src, SZ);
+ }
+ else
+ {
+ swap16(dst, src);
+ }
+}
+
+enum serializer_err deserialize(const char *format,
+ void *const dst,
+ const size_t sz,
+ const void *const src,
+ const size_t src_sz)
+{
+ if (!format || !dst || !sz || !src)
+ {
+ return SERIALIZER_ERR_INVALID_ARG;
+ }
+ else
+ {
+ enum state state = 0;
+ enum endianness endianness = ENDIANESS_UNKNOWN;
+ char c;
+ size_t in_sz = 0, out_sz = 0;
+
+ while (c = *format)
+ {
+ const enum token token = get_token(c, &state, &endianness);
+
+ switch (token)
+ {
+ default:
+ {
+ static const size_t sizes[] =
+ {
+ [TOKEN_8BIT] = sizeof (uint8_t),
+ [TOKEN_LE16BIT] = sizeof (uint16_t),
+ [TOKEN_BE16BIT] = sizeof (uint16_t),
+ [TOKEN_LE32BIT] = sizeof (uint32_t),
+ [TOKEN_BE32BIT] = sizeof (uint32_t)
+ };
+
+ const size_t st = sizes[token];
+
+ static void (*const read[])(void *dst, const void *src) =
+ {
+ [TOKEN_8BIT] = read8,
+ [TOKEN_LE16BIT] = readle16,
+ [TOKEN_BE16BIT] = readbe16,
+ [TOKEN_LE32BIT] = readle32,
+ [TOKEN_BE32BIT] = readbe32
+ };
+
+ const size_t padding = out_sz % st;
+
+ if (padding)
+ {
+ out_sz += st - padding;
+ }
+
+ if (in_sz + st > src_sz)
+ return SERIALIZER_ERR_IN_OVERFLOW;
+ else if (out_sz + st <= sz)
+ {
+ read[token]((uint8_t *)dst + out_sz, (const uint8_t *)src + in_sz);
+ in_sz += st;
+ out_sz += st;
+ state = 0;
+ }
+ else
+ return SERIALIZER_ERR_OUT_OVERFLOW;
+ }
+ break;
+
+ case TOKEN_ERROR:
+ return SERIALIZER_ERR_FORMAT;
+
+ case TOKEN_NOT_READY:
+ break;
+ }
+
+ format++;
+ }
+
+ if (!out_sz)
+ {
+ return SERIALIZER_ERR_FORMAT;
+ }
+ }
+
+ return SERIALIZER_OK;
+}
diff --git a/serializer.h b/serializer.h
new file mode 100644
index 0000000..091e0e8
--- /dev/null
+++ b/serializer.h
@@ -0,0 +1,87 @@
+/*
+ Copyright 2020 Xavier Del Campo Romero
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef SERIALIZER_H
+#define SERIALIZER_H
+
+#include <stddef.h>
+
+#if __STDC_VERSION__ < 199901L
+#error C99 support is mandatory for serializer
+#endif /* __STDC_VERSION < 199901L */
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif /* __cplusplus */
+
+/* Tokens used by this library:
+ * '1': 8-bit value.
+ * 'leN': little endian N-byte value.
+ * 'beN': big endian N-byte value.
+ * Where N: 2 or 4.
+ * Tokens must be placed without spaces or other symbols.
+ * For example, "11le2be41" means:
+ * - 2 8-bit values.
+ * - 1 little-endian 16-bit value.
+ * - 1 big-endian 32-bit value.
+ * - 1 8-bit value.
+ * For increased readability, optional slashes can be placed
+ * between tokens. For example: "1/1/le2/be4/1". */
+
+/**
+ * Error code list used by this library.
+ */
+enum serializer_err
+{
+ SERIALIZER_OK, /**< Serialize/deserialize operation was successful. */
+ SERIALIZER_ERR_INVALID_ARG, /**< An invalid argument has been given. */
+ SERIALIZER_ERR_FORMAT, /**< An error occured while parsing format string. */
+ SERIALIZER_ERR_IN_OVERFLOW, /**< Input data holds less bytes than specified by format. */
+ SERIALIZER_ERR_OUT_OVERFLOW /**< Input data does not fit into destination buffer. */
+};
+
+/**
+ * Dumps source buffer with given format and alignment into destination buffer.
+ * @param format Data structure format as null-terminated string.
+ * @param dst Destination buffer.
+ * @param sz Size of the destination buffer in bytes.
+ * @param src Source buffer.
+ * @param src_sz Size of the source buffer in bytes.
+ * @return Returns one of the error codes from @ref serializer_err .
+ * @see Possible tokens for the format string on the comments above.
+ * @attention Buffer contents are undefined if an error occurs.
+ */
+enum serializer_err serialize(const char *format, void *dst, size_t sz, const void *src, size_t src_sz);
+
+/**
+ * Dumps source buffer with given format into destination buffer with given alignment.
+ * @param format Data structure format as null-terminated string.
+ * @param dst Destination buffer.
+ * @param sz Size of the destination buffer in bytes.
+ * @param src Source buffer.
+ * @param src_sz Size of the source buffer in bytes.
+ * @return Returns one of the error codes from @ref serializer_err .
+ * @see Possible tokens for the format string on the comments above.
+ * @attention Buffer contents are undefined if an error occurs.
+ */
+enum serializer_err deserialize(const char *format, void *dst, size_t sz, const void *src, size_t src_sz);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* SERIALIZER_H */