diff options
-rwxr-xr-x | build.sh | 6 | ||||
-rw-r--r-- | main.c | 25 | ||||
-rw-r--r-- | msgpack.c | 267 | ||||
-rw-r--r-- | msgpack.h | 242 | ||||
-rw-r--r-- | msgpack.old.c | 244 | ||||
-rw-r--r-- | spec.md | 553 |
6 files changed, 1337 insertions, 0 deletions
diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..504f926 --- /dev/null +++ b/build.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +set -xe + +gcc -g -Wall -Wextra main.c msgpack.c -o main +valgrind ./main @@ -0,0 +1,25 @@ +#include <stdio.h> +#include "msgpack.h" + +#define BODY_SUCCESS printf("%s\n", msgpack_type_string[t]) +#define BODY_ERROR { \ + printf("ERROR %s %s %s\n", msgpack_error_string[e], \ + msgpack_type_string[t], msgpack_type_string[a]); \ + return 1; \ + } +int main(void) +{ + // char buf[] = {0x0F}; + // int b = 0; + // MSGPACK_CHECK2(msgpack_read_bool(&msgpack_init(buf, 1, NULL), &b),(t, e, a), + // BODY_SUCCESS, BODY_ERROR); + // printf("VALUE %d\n", b); + + // char buf[] = {0xD3, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x01}; + char buf[] = {0xCC, 0xFF}; + union mp_int m; m.i = 0; + MSGPACK_CHECK2(msgpack_read_int(&msgpack_init(buf, sizeof(buf), NULL), &m),(t, e, a), + BODY_SUCCESS, BODY_ERROR); + printf("VALUE %llu\n", m.u); + return 0; +} diff --git a/msgpack.c b/msgpack.c new file mode 100644 index 0000000..b5ba7ae --- /dev/null +++ b/msgpack.c @@ -0,0 +1,267 @@ +#include "msgpack.h" +#include <endian.h> + +#define I8(b) (*(int8_t*)(b)) +#define I16(b) ((int16_t)be16toh(*(uint16_t *)(b))) +#define I32(b) ((int32_t)be32toh(*(uint32_t *)(b))) +#define I64(b) ((int64_t)be64toh(*(uint64_t *)(b))) + +#define U8(b) (*(uint8_t*)(b)) +#define U16(b) ((uint16_t)be16toh(*(uint16_t *)(b))) +#define U32(b) ((uint32_t)be32toh(*(uint32_t *)(b))) +#define U64(b) ((uint64_t)be64toh(*(uint64_t *)(b))) + +#define ENOUGH_BYTES(pack, n, on_fail) if((pack)->size < (n)) { on_fail; } + +// range low, range high +#define RANGES(X) \ + X(0x00, 0x7F) \ + X(0x80, 0x8F) \ + X(0x90, 0x9F) \ + X(0xA0, 0xBF) \ + X(0xE0, 0xFF) + +#define FORMAT_NIL(X) \ + X(0xC1, FMT_UNKNOWN, MSGPACK_UNKNOWN, ;, 1, 0) \ + X(0xC0, FMT_NIL, MSGPACK_NIL, ;, 1, 0) + +// byte, fmt, type, _, offset, length, data +#define FORMAT_BOOL(X) \ + X(0xC2, FMT_BOOL_TRUE, MSGPACK_BOOL, ;, 1, 0, 1) \ + X(0xC3, FMT_BOOL_FALSE, MSGPACK_BOOL, ;, 1, 0, 0) + +// byte, fmt, type, subtype, offset, length, data +#define FORMAT_INT(X) \ + X(0x00, FMT_FIX_UINT, MSGPACK_INT, MSGPACK_INT_UNSIGNED, 1, 0, m->u = U8(pack->bin)) \ + X(0XE0, FMT_FIX_INT, MSGPACK_INT, MSGPACK_INT_SIGNED, 1, 0, m->i = I8(pack->bin)) \ + X(0xCC, FMT_U8, MSGPACK_INT, MSGPACK_INT_UNSIGNED, 1, 1, m->u = U8(pack->bin+1)) \ + X(0xCD, FMT_U16, MSGPACK_INT, MSGPACK_INT_UNSIGNED, 1, 2, m->u = U16(pack->bin+1)) \ + X(0xCE, FMT_U32, MSGPACK_INT, MSGPACK_INT_UNSIGNED, 1, 4, m->u = U32(pack->bin+1)) \ + X(0xCF, FMT_U64, MSGPACK_INT, MSGPACK_INT_UNSIGNED, 1, 8, m->u = U64(pack->bin+1)) \ + X(0xD0, FMT_I8, MSGPACK_INT, MSGPACK_INT_SIGNED, 1, 1, m->i = I8(pack->bin+1)) \ + X(0xD1, FMT_I16, MSGPACK_INT, MSGPACK_INT_SIGNED, 1, 2, m->i = I16(pack->bin+1)) \ + X(0xD2, FMT_I32, MSGPACK_INT, MSGPACK_INT_SIGNED, 1, 4, m->i = I32(pack->bin+1)) \ + X(0xD3, FMT_I64, MSGPACK_INT, MSGPACK_INT_SIGNED, 1, 8, m->i = I64(pack->bin+1)) \ + +// byte, fmt, type, subtype, offset, length, data +#define FORMAT_FLOAT(X) \ + X(0xCA, FMT_FLOAT, MSGPACK_FLOAT, MSGPACK_FLOAT_32, 1, 4, m->_u = U32(pack->bin+1)) \ + X(0xCB, FMT_DOUBLE, MSGPACK_FLOAT, MSGPACK_FLOAT_64, 1, 8, m->_u = U64(pack->bin+1)) \ + +// byte, fmt, type, subtype, offset, length +#define FORMAT_RAW(X) \ + X(0xA0, FMT_FIX_STR, MSGPACK_RAW, MSGPACK_RAW_STRING, 1, pack->bin[0] & 1) \ + X(0xC4, FMT_BIN8, MSGPACK_RAW, MSGPACK_RAW_BIN, 1, pack->bin[1]) \ + X(0xC5, FMT_BIN16, MSGPACK_RAW, MSGPACK_RAW_BIN, 3, U16(pack->bin+1)) \ + X(0xC6, FMT_BIN32, MSGPACK_RAW, MSGPACK_RAW_BIN, 5, U32(pack->bin+1)) \ + X(0xD9, FMT_STR8, MSGPACK_RAW, MSGPACK_RAW_STRING, 1, pack->bin[1]) \ + X(0xDA, FMT_STR16, MSGPACK_RAW, MSGPACK_RAW_STRING, 3, U16(pack->bin+1)) \ + X(0xDB, FMT_STR32, MSGPACK_RAW, MSGPACK_RAW_STRING, 5, U32(pack->bin+1)) + +// byte, fmt, type, elements, offset, lenght +#define FORMAT_ARRAY(X) \ + X(0x90, FMT_FIX_ARRAY, MSGPACK_ARRAY, pack->bin[0] & 0x0F, 1, elements_length(pack, fmt)) \ + X(0xDC, FMT_ARRAY16, MSGPACK_ARRAY, U16(pack->bin+1), 3, elements_length(pack, fmt)) \ + X(0xDD, FMT_ARRAY32, MSGPACK_ARRAY, U32(pack->bin+1), 5, elements_length(pack, fmt)) +#define FORMAT_MAP(X) \ + X(0x80, FMT_FIX_MAP, MSGPACK_MAP, 2 * (pack->bin[0] & 0x0F), 1, elements_length(pack, fmt)) \ + X(0xDE, FMT_MAP16, MSGPACK_MAP, 2 * (U16(pack->bin+1)), 3, elements_length(pack, fmt)) \ + X(0xDF, FMT_MAP32, MSGPACK_MAP, 2 * (U16(pack->bin+1)), 5, elements_length(pack, fmt)) + +#define FORMAT_EXT(X) + +#define FORMATS(X) \ + FORMAT_NIL (X) \ + FORMAT_BOOL (X) \ + FORMAT_INT (X) \ + FORMAT_FLOAT(X) \ + FORMAT_RAW (X) \ + FORMAT_ARRAY(X) \ + FORMAT_MAP (X) \ + FORMAT_EXT (X) + +#define EXPAND(X) X +#define X_FMT_ENUM(_byte, fmt, ...) fmt, +#define X_FMT_TYPE(_byte, fmt, type, ...) [fmt] = type, +#define X_BYTE_FMT( byte, fmt, ...) [byte] = fmt, + +enum msgpack_fmt { + FORMATS(X_FMT_ENUM) +}; + +static const enum msgpack_type fmt_to_type[] = { + FORMATS(X_FMT_TYPE) +}; +static const enum msgpack_fmt byte_to_fmt[] = { + FORMATS(X_BYTE_FMT) +}; + +static enum msgpack_fmt pack_fmt(const msgpack_t *pack); +static size_t elements_length(msgpack_t *pack, enum msgpack_fmt fmt); +static size_t pack_size(msgpack_t *pack); + + +#define SUCCESS(type) SUCCESS2(type, 0) +#define ERROR(type, err) ERROR2(type, err, 0) +#define SUCCESS2(type, rest) \ + ((rest << 6) | (type)) +#define ERROR2(type, err, rest) \ + ((1 << (sizeof(int)*8-1))| (rest << 6) | (err << 3) | (type)) + +#define CHECK_READ_ARGS(type, pack, m) \ + do { \ + if(!(pack) || !(pack)->bin || (pack)->size == 0) \ + return ERROR((type), MSGPACK_ERROR_INVALID_PACK); \ + if((pack)->bin[0] == 0xc) \ + return ERROR2((type), MSGPACK_ERROR_WRONG_TYPE, MSGPACK_NIL); \ + if(!(m)) \ + return ERROR((type), MSGPACK_ERROR_INVALID_ARGUMENT); \ + } while(0) + + + +#define X_READ_INT(_byte, x_fmt, x_type, x_subtype, x_offset, x_length, x_data) \ + case x_fmt: { \ + ENOUGH_BYTES(pack, (x_offset) + (x_length), \ + return ERROR2(x_type, MSGPACK_ERROR_UNEXPECTED_END, x_subtype)); \ + x_data; \ + return SUCCESS2(x_type, x_subtype); \ + } + +int msgpack_read_int(const msgpack_t *pack, union mp_int *m) +{ + CHECK_READ_ARGS(MSGPACK_INT, pack, m); + + enum msgpack_fmt fmt = pack_fmt(pack); + + switch(fmt) { + FORMAT_INT(X_READ_INT) + default: + return ERROR2(MSGPACK_INT, MSGPACK_ERROR_WRONG_TYPE, fmt_to_type[fmt]); + } +} + +#define X_READ_BOOL(_byte, x_fmt, x_type, _, _offset, _length, x_data) \ + case x_fmt: { \ + *m = x_data; \ + return SUCCESS(x_type); \ + } +int msgpack_read_bool(const msgpack_t *pack, int *m) +{ + CHECK_READ_ARGS(MSGPACK_BOOL, pack, m); + + enum msgpack_fmt fmt = pack_fmt(pack); + + switch(fmt) { + FORMAT_BOOL(X_READ_BOOL) + default: + return ERROR2(MSGPACK_BOOL, MSGPACK_ERROR_WRONG_TYPE, fmt_to_type[fmt]); + } +} + +#define X_READ_FLOAT(...) X_READ_INT(__VA_ARGS__) + +int msgpack_read_float(const msgpack_t *pack, union mp_float *m) +{ + CHECK_READ_ARGS(MSGPACK_FLOAT, pack, m); + + enum msgpack_fmt fmt = pack_fmt(pack); + + switch(fmt) { + FORMAT_FLOAT(X_READ_FLOAT) + default: + return ERROR2(MSGPACK_FLOAT, MSGPACK_ERROR_WRONG_TYPE, fmt_to_type[fmt]); + } +} + +int msgpack_read_raw (const msgpack_t *pack, struct mp_bin *m); +int msgpack_read_raw_cpy (const msgpack_t *pack, struct mp_bin *m); +int msgpack_read_ext (const msgpack_t *pack, struct mp_bin *m); +int msgpack_read_ext_cpy (const msgpack_t *pack, struct mp_bin *m); + +int msgpack_read_array (msgpack_t *pack, size_t *length); +int msgpack_read_map (msgpack_t *pack, size_t *length); +int msgpack_read_array2 (const msgpack_t *pack, struct mp_array *m); +int msgpack_read_map2 (const msgpack_t *pack, struct mp_map *m); + + +int msgpack_write_int (msgpack_t *pack, const union mp_int *m, int subtype); +int msgpack_write_bool (msgpack_t *pack, const int *m); +int msgpack_write_float (msgpack_t *pack, const union mp_float *m, int subtype); + +int msgpack_write_raw (msgpack_t *pack, const struct mp_bin *m, int subtype); +int msgpack_write_ext (msgpack_t *pack, const struct mp_bin *m, int subtype); + +int msgpack_write_array (msgpack_t *pack, const size_t *length); +int msgpack_write_map (msgpack_t *pack, const size_t *length); + + +#define X_COMPLEX_LENGTH(_byte, x_fmt, _type, x_elements, x_offset, x_length) \ + case x_fmt: { \ + ENOUGH_BYTES(pack, (x_offset), return 0); \ + offset = (x_offset); \ + elements = (x_elements); \ + } break; + +static size_t elements_length(msgpack_t *pack, enum msgpack_fmt fmt) +{ + size_t offset = 0; + size_t elements = 0; + + switch(fmt) { + FORMAT_ARRAY(X_COMPLEX_LENGTH) + FORMAT_MAP(X_COMPLEX_LENGTH) + default: return 0; + } + + for(size_t i = 0; i < elements; i++) { + msgpack_t new_pack = {pack->bin + offset, + pack->size - offset, pack->membuf}; + size_t len = pack_size(&new_pack); + if(len == 0) return 0; + + offset += len; + ENOUGH_BYTES(pack, offset, return 0); + } + + return offset; +} + +#define X_PACK_SIZE(_byte, x_fmt, _type, _, x_offset, x_length, ...) \ + case x_fmt: { \ + ENOUGH_BYTES(pack, (x_offset), return 0); \ + offset = (x_offset); \ + length = (x_length); \ + } break; + +static size_t pack_size(msgpack_t *pack) +{ + enum msgpack_fmt fmt = pack_fmt(pack); + + size_t offset = 0; + size_t length = 0; + + switch(fmt) { + FORMATS(X_PACK_SIZE) + default: return 0; + } + + if(fmt_to_type[fmt] == MSGPACK_ARRAY || fmt_to_type[fmt] == MSGPACK_MAP) + if(length == 0) return 0; + + return offset + length; +} + +#define X_MASK_RANGE(range_low, range_high) \ + if(byte >= range_low && byte <= range_high) \ + byte &= ~(uint8_t)(range_high-range_low); \ + else \ + +static enum msgpack_fmt pack_fmt(const msgpack_t *pack) +{ + ENOUGH_BYTES(pack, 1, return 0); + unsigned char byte = pack->bin[0]; + + RANGES(X_MASK_RANGE){}; + return byte_to_fmt[byte]; +} diff --git a/msgpack.h b/msgpack.h new file mode 100644 index 0000000..fc3ab9a --- /dev/null +++ b/msgpack.h @@ -0,0 +1,242 @@ +#ifndef MSGPACK_H +#define MSGPACK_H + +#include <stddef.h> +#include <stdint.h> + +/* -- Data structures -- + * + * These structs are used for decoding the MessagePack format. + * They follow the format specification and their members are as + * straight-forward as possible. + * + * They are meant to be stack allocated, and be passed as pointers + * to the appropriate function and be filled. This library does no + * memory allocations by default (except one function that returns + * a generic and needs a function ptr to an allocator to be passed + * as an argument, see 'msgpack_read(...)') + * + * msgpack_t: + * - stores the current substring and its size + * - can use a membuf to cache the length of + * it's substrings + * - created with 'msgpack_init' macro, by pasing + * the binary string, size, and optional membuf + * (set to null if not using it) + * + * struct msgpack_membuf: + * - used to store substring length + * - passed down other 'msgpack_t's + * in arrays and maps + * - created with 'membuf_init' macro by + * passing the starting address of the + * binary string as an 'uintptr_t' and + * an already allocated size_t *, with + * as many members as bytes in the bin + * string + * + * ... + * + */ + +typedef struct mp_msgpack { + char *bin; + size_t size; + + struct mp_membuf { + uintptr_t offset; + size_t *buf; + } *membuf; +} msgpack_t; + +#define msgpack_init(bin, size, membuf) \ + ((msgpack_t){(bin), (size), (membuf)}) +#define membuf_init(uintptr_bin, size_t_buf) \ + ((struct mp_membuf){(offset), (buf)}) + +union mp_int { + int64_t i; + uint64_t u; +}; + +union mp_float { + float f; + double d; + uint64_t _u; +}; + +struct mp_array { + msgpack_t *members; + size_t length; +}; + +struct mp_map { + msgpack_t *keys; + msgpack_t *members; + size_t length; +}; + +struct mp_bin { + char* bin; + size_t size; +}; + +struct mp_timestamp { + // ... +}; + + +/* -- API for this library -- + * + * Arguments: + * - pack is a ptr to a struct that hold the msgpack binary + * - m is a ptr to allocated memory that will be filled by the function + * + * Return value: + * - The [2-0] bits are the type of the + * read value (useful for msgpack_read); + * - If an error is encountered, the sign bit + * is set, and; + * - The [5-3] bits are the error code. + * + * Use the MSGPACK_CHECK functions to easily + * manage errors. + * + * Example: + * 0b|1|...|00000000|001|101 + * | | | | | + * err | byte info | type[5] + * | | + * not important error[1] + * + */ + +int msgpack_read_int (const msgpack_t *pack, union mp_int *m); +int msgpack_read_bool (const msgpack_t *pack, int *m); +int msgpack_read_float (const msgpack_t *pack, union mp_float *m); + +int msgpack_read_raw (const msgpack_t *pack, struct mp_bin *m); +int msgpack_read_raw_cpy (const msgpack_t *pack, struct mp_bin *m); +int msgpack_read_ext (const msgpack_t *pack, struct mp_bin *m); +int msgpack_read_ext_cpy (const msgpack_t *pack, struct mp_bin *m); + +int msgpack_read_array (msgpack_t *pack, size_t *length); +int msgpack_read_map (msgpack_t *pack, size_t *length); +int msgpack_read_array2 (const msgpack_t *pack, struct mp_array *m); +int msgpack_read_map2 (const msgpack_t *pack, struct mp_map *m); + + +int msgpack_write_int (msgpack_t *pack, const union mp_int *m, int subtype); +int msgpack_write_bool (msgpack_t *pack, const int *m); +int msgpack_write_float (msgpack_t *pack, const union mp_float *m, int subtype); + +int msgpack_write_raw (msgpack_t *pack, const struct mp_bin *m, int subtype); +int msgpack_write_ext (msgpack_t *pack, const struct mp_bin *m, int subtype); + +int msgpack_write_array (msgpack_t *pack, const size_t *length); +int msgpack_write_map (msgpack_t *pack, const size_t *length); + +/* -- MessagePack Types -- + * + * These types are defined by the messagepack specification + * and X-Marcros are used to generate enums and arrays with + * their string counterparts. + * + * There are subtypes for some of the main types, + * which are not present in the specificiation, but + * are can be useful for the client + * + */ + +#define MSGPACK_TYPES(X) \ + X(MSGPACK_NIL) \ + X(MSGPACK_INT) \ + X(MSGPACK_BOOL) \ + X(MSGPACK_FLOAT) \ + X(MSGPACK_RAW) \ + X(MSGPACK_ARRAY) \ + X(MSGPACK_MAP) \ + X(MSGPACK_EXT) \ + X(MSGPACK_UNKNOWN) \ + +#define MSGPACK_SUBTYPES(X) \ + X(MSGPACK_INT_SIGNED) \ + X(MSGPACK_INT_UNSIGNED) \ + X(MSGPACK_FLOAT_32) \ + X(MSGPACK_FLOAT_64) \ + X(MSGPACK_RAW_STRING) \ + X(MSGPACK_RAW_BIN) \ + X(MSGPACK_EXT_TIMESTAMP) + +#define MSGPACK_ERRORS(X) \ + X(MSGPACK_ERROR_UNKNOWN) \ + X(MSGPACK_ERROR_WRONG_TYPE) \ + X(MSGPACK_ERROR_UNSUFFICIENT_CAPACITY) \ + X(MSGPACK_ERROR_UNEXPECTED_END) \ + X(MSGPACK_ERROR_INVALID_PACK) \ + X(MSGPACK_ERROR_INVALID_ARGUMENT) + +#define X_TO_ENUM(e) e, +#define X_TO_STRING(e) [e] = #e, + +enum msgpack_type { + MSGPACK_TYPES(X_TO_ENUM) + MSGPACK_SUBTYPES(X_TO_ENUM) +}; + +enum msgpack_error { + MSGPACK_ERRORS(X_TO_ENUM) +}; + +static const char * const msgpack_type_string[] = { + MSGPACK_TYPES(X_TO_STRING) + MSGPACK_SUBTYPES(X_TO_STRING) +}; +static const char * const msgpack_error_string[] = { + MSGPACK_ERRORS(X_TO_STRING) +}; + +/* -- Macros for error handling -- + * + * Argumnts: + * - call is the function call to a msgpack_*** function; + * - body_*** is a scope that is invoked upon error or success, + * it uses the variable names from the 'arg' varible; + * - arg is the name of the variables that hold the: + * - msgpack type; + * - msgpack error code; + * - extra byte data, + * which are extracted from the return value of the function call. + * + * Example: + * MSGPACK_CHECK(msgpack_read_bool(&pack, &b), (t, e, a), { + * if(e == MSGPACK_ERROR_WRONG_TYPE) { + * err("msgpack: %s, expected type %s, but got type %s", + * msgpack_error_string[e], + * msgpack_type_string[a], + * msgpack_type_string[t]); + * } + * }); + * + */ + + + +#define __EXPAND(...) __VA_ARGS__ +#define __MSGPACK_CHECK_DEFER(...) __MSGPACK_CHECK(__VA_ARGS__) +#define __MSGPACK_CHECK(call, body_suc, body_err, type_var, err_var, rest_var) \ + do { \ + int __r = (call); \ + int (type_var) = __r & 0x7; \ + int (err_var) = (__r >> 3) & 0x7; \ + int (rest_var) = (__r >> 6) & 0xFF; \ + if(__r >= 0) { body_suc; } \ + else { body_err; } \ + } while(0) + +#define MSGPACK_CHECK(call, arg, body_err) \ + __MSGPACK_CHECK_DEFER(call, ;, body_err, __EXPAND arg) +#define MSGPACK_CHECK2(call, arg, body_suc, body_err) \ + __MSGPACK_CHECK_DEFER(call, body_suc, body_err, __EXPAND arg) + +#endif diff --git a/msgpack.old.c b/msgpack.old.c new file mode 100644 index 0000000..9a7be3c --- /dev/null +++ b/msgpack.old.c @@ -0,0 +1,244 @@ +#include "msgpack.h" + +// read big endian ints +#define M_I16(b) (b[0] << 8) | (b[1]) +#define M_I32(b) (b[0] << 24) | (b[1] << 16) \ + | (b[2] << 8) | (b[3]) +#define M_I64(b) (b[0] << 56) | (b[1] << 48) \ + | (b[2] << 40) | (b[3] << 32) \ + | (b[4] << 24) | (b[5] << 16) \ + | (b[6] << 8) | (b[7]) + +// Todo: add memtable for the lengths, the index is the current index from the array, +// the msgpack_t struct has a pointer to such a table +// macro making sure there are enough bytes before reading them +#define ENOUGH_BYTES(pack, n, on_fail) if((pack)->size < (n)) { on_fail; } +#define EB(pack, n) ENOUGH_BYTES(pack, (n)+1, return 0) + +// byte, type, format, length expr, condition +#define BYTE_FORMATS(X, pack) \ + X(0xc0, MSGPACK_NIL, FMT_NIL, 0, ;) \ + X(0xc1, MSGPACK_UNKNOWN, FMT_UNUSED, 0, ;) \ + X(0xc2, MSGPACK_BOOL, FMT_FALSE, 0, ;) \ + X(0xc3, MSGPACK_BOOL, FMT_TRUE, 0, ;) \ + \ + X(0xc4, MSGPACK_RAW, FMT_BIN8, 1+ pack->bin[1], EB(pack, 1)) \ + X(0xc5, MSGPACK_RAW, FMT_BIN16, 2+ M_I16(pack->bin+1), EB(pack, 2)) \ + X(0xc6, MSGPACK_RAW, FMT_BIN32, 4+ M_I32(pack->bin+1), EB(pack, 4)) \ + X(0xc7, MSGPACK_EXT, FMT_EXT8, 1+ pack->bin[1], EB(pack, 1)) \ + X(0xc8, MSGPACK_EXT, FMT_EXT16, 2+ M_I16(pack->bin+1), EB(pack, 2)) \ + X(0xc9, MSGPACK_EXT, FMT_EXT32, 4+ M_I32(pack->bin+1), EB(pack, 4)) \ + \ + X(0xca, MSGPACK_FLOAT, FMT_F32, 4, ;) \ + X(0xcb, MSGPACK_FLOAT, FMT_F64, 8, ;) \ + X(0xcc, MSGPACK_INT, FMT_U8, 1, ;) \ + X(0xcd, MSGPACK_INT, FMT_U16, 2, ;) \ + X(0xce, MSGPACK_INT, FMT_U32, 4, ;) \ + X(0xcf, MSGPACK_INT, FMT_U64, 8, ;) \ + X(0xd0, MSGPACK_INT, FMT_I8, 1, ;) \ + X(0xd1, MSGPACK_INT, FMT_I16, 2, ;) \ + X(0xd2, MSGPACK_INT, FMT_I32, 4, ;) \ + X(0xd3, MSGPACK_INT, FMT_I64, 8, ;) \ + X(0xd4, MSGPACK_EXT, FMT_FIX_EXT1, 1+ 1, ;) \ + X(0xd5, MSGPACK_EXT, FMT_FIX_EXT2, 1+ 2, ;) \ + X(0xd6, MSGPACK_EXT, FMT_FIX_EXT4, 1+ 4, ;) \ + X(0xd7, MSGPACK_EXT, FMT_FIX_EXT8, 1+ 8, ;) \ + X(0xd8, MSGPACK_EXT, FMT_FIX_EXT16, 1+ 16, ;) \ + \ + X(0xd9, MSGPACK_RAW, FMT_STR8, 1+ pack->bin[1], EB(pack, 1)) \ + X(0xda, MSGPACK_RAW, FMT_STR16, 2+ M_I16(pack->bin+1), EB(pack, 2)) \ + X(0xdb, MSGPACK_RAW, FMT_STR32, 4+ M_I32(pack->bin+1), EB(pack, 4)) \ + \ + X(0xdc, MSGPACK_ARRAY, FMT_ARRAY16, 2+ complex_length(pack, FMT_ARRAY16), EB(pack, 2)) \ + X(0xdd, MSGPACK_ARRAY, FMT_ARRAY32, 4+ complex_length(pack, FMT_ARRAY32), EB(pack, 4)) \ + X(0xde, MSGPACK_MAP, FMT_MAP16, 2+ complex_length(pack, FMT_MAP16), EB(pack, 2)) \ + X(0xdf, MSGPACK_MAP, FMT_MAP32, 4+ complex_length(pack, FMT_MAP32), EB(pack, 4)) + +// just mask these ranges, don't check them +// use the logaical and of the ranges as a mask + +// range low, range high, type, format, length expr +#define RANGE_FORMATS(X, pack) \ + X(0x00, 0x7f, MSGPACK_INT, FMT_FIX_U, 0) \ + X(0x80, 0x8f, MSGPACK_MAP, FMT_FIX_MAP, complex_length(pack, FMT_FIX_MAP)) \ + X(0x90, 0x9f, MSGPACK_ARRAY, FMT_FIX_ARRAY, complex_length(pack, FMT_FIX_ARRAY)) \ + X(0xa0, 0xbf, MSGPACK_STRING, FMT_FIX_STR, pack[0] & 01F) \ + X(0xe0, 0xff, MSGPACK_INT, FMT_FIX_I, 0) + +#define X_TO_ENUM2(_1, _2, fmt, ...) fmt, +#define X_TO_ENUM3(_1, _2, _3, fmt, ...) fmt, + +enum msgpack_fmt { + BYTE_FORMATS(X_TO_ENUM2,;) + RANGE_FORMATS(X_TO_ENUM3,;) +}; + +#define X_TO_FMT_TYPE1(_1, type, fmt, ...) [fmt] = type, +#define X_TO_FMT_TYPE2(_1, _2, type, fmt, ...) [fmt] = type, + +static const enum msgpack_type fmt_to_type[] = { + BYTE_FORMATS(X_TO_FMT_TYPE1,;) + RANGE_FORMATS(X_TO_FMT_TYPE2,;) +}; + + +static size_t complex_length(msgpack_t *pack, enum msgpack_fmt fmt); +static size_t pack_lenght(msgpack_t pack); +static enum msgpack_fmt pack_fmt(msgpack_t pack); + +#define SUCCESS(type) (type) +#define ERROR(type, err, rest) ((rest << 6) | (err << 3) | (type)) + +#define CHECK_ARGS(type, pack, m) \ + do { \ + if(!(pack) || !(pack)->bin || (pack)->size == 0) \ + return ERROR((type), MSGPACK_ERROR_INVALID_PACK, 0); \ + if((pack)->bin[0] == 0xc) \ + return ERROR((type), MSGPACK_ERROR_WRONG_TYPE, MSGPACK_NIL);\ + if(!(m)) \ + return ERROR((type), MSGPACK_ERROR_INVALID_ARGUMENT, 0); \ + } while(0) + +int msgpack_read(msgpack_t *pack, msgpack_allocator_t alloc, void **m_ptr); + +int msgpack_read_int(msgpack_t *pack, msgpack_int_t *m) +{ + CHECK_ARGS(MSGPACK_INT, pack, m); + + enum msgpack_fmt fmt = pack_fmt(pack); + switch(fmt) { + default: + return ERROR(MSGPACK_INT, MSGPACK_ERROR_WRONG_TYPE, fmt_to_type[fmt]); + } + + return SUCCESS(MSGPACK_INT); +} + +int msgpack_read_bool(msgpack_t *pack, msgpack_bool_t *m); +int msgpack_read_float(msgpack_t *pack, msgpack_int_t *m); +int msgpack_read_string(msgpack_t *pack, msgpack_string_t *m); +int msgpack_read_bin(msgpack_t *pack, msgpack_bin_t *m); + +int msgpack_read_array(msgpack_t *pack, msgpack_array_t *m) +{ + CHECK_ARGS(MSGPACK_ARRAY, pack, m); + + enum msgpack_fmt fmt = pack_fmt(pack); + size_t offset = 0; + size_t elements = 0; + + switch(fmt) { + case FMT_FIX_ARRAY: + offset = 1; + elements = pack->bin[0] & 0x0F; + break; + case FMT_ARRAY16: + offset = 3; + ENOUGH_BYTES(pack, offset, return 0); + elements = M_I16(pack->bin+1); + break; + case FMT_ARRAY32: + offset = 5; + ENOUGH_BYTES(pack, offset, return 0); + elements = M_I32(pack->bin+1); + break; + default: + return ERROR(MSGPACK_ARRAY, MSGPACK_ERROR_WRONG_TYPE, fmt_to_type[fmt]); + } + + if(m->length < elements) { + m->length = elements; + return ERROR(MSGPACK_ARRAY, MSGPACK_ERROR_UNSUFFICIENT_CAPACITY, 0); + } + + for(size_t i = 0; i < elements; i++) { + msgpack_t new_pack = {pack->bin+offset, size-offset}; + size_t len = pack_lenght(&new_pack) + 1; + + ENOUGH_BYTES(pack, offset + len, + return ERROR(MSGPACK_ARRAY, MSGPACK_ERROR_UNEXPECTED_END, 0)); + + m->members[i].bin = pack->bin + offset; + m->members[i].size = len; + + offset += len; + } + + return SUCCESS(MSGPACK_ARRAY); +} + +int msgpack_read_map(msgpack_t *pack, msgpack_map_t *m); + +static size_t complex_length(msgpack_t *pack, enum msgpack_fmt fmt) +{ + ENOUGH_BYTES(pack, 1, return 0); + + size_t offset = 0; + size_t elements = 0; + + switch(fmt) { + case FMT_FIX_MAP: + case FMT_FIX_ARRAY: + offset = 1; + elements = pack->bin[0] & 0x0F; + break; + case FMT_MAP16: + case FMT_ARRAY16: + offset = 3; + ENOUGH_BYTES(pack, offset, return 0); + elements = M_I16(pack->bin+1); + break; + case FMT_MAP32: + case FMT_ARRAY32: + offset = 5; + ENOUGH_BYTES(pack, offset, return 0); + elements = M_I32(pack->bin+1); + break; + } + + if(fmt_to_type[fmt] == MSGPACK_MAP) elements *= 2; + + for(size_t i = 0; i < elements; i++) { + msgpack_t new_pack = {pack->bin+offset, size-offset}; + size_t len = pack_lenght(new_pack) + 1; + if(len == 0) return 0; + + offset += len; + ENOUGH_BYTES(pack, offset, return 0); + } + + return offset - 1; +} + +#define WRAP_BYTE_FORMATS(X, pack) \ + switch(pack->bin[0]) { BYTE_FORMATS(X, pack) } +#define WRAP_RANGE_FORMATS(X, pack) \ + char byte = pack->bin[0]; RANGE_FORMATS(X, pack) {} + +#define BYTE_TO_LENGHT(byte, _type, _fmt, length_expr, cond) \ + case byte: cond; return (length_expr) +1; break; +#define BYTE_TO_TYPE(byte, type, _fmt _length_expr, _cond) \ + case byte: return type; break; + +#define RANGE_TO_LENGHT(bytel, byteh, _type, _fmt, length_expr) \ + if(bytel <= byte && byteh >= pack->bin[0]) { return (length_expr) + 1; } else +#define RANGE_TO_TYPE(bytel, byteh, type, _fmt, _length) \ + if(bytel <= byte && byteh >= pack->bin[0]) { return type; } else + +static size_t pack_lenght(msgpack_t pack) +{ + ENOUGH_BYTES(pack, 1, return 0); + WRAP_BYTE_FORMATS(BYTE_TO_LENGTH, pack); + WRAP_RANGE_FORMATS(RANGE_TO_LENGTH, pack); + + return 0; +} + +static enum msgpack_fmt pack_fmt(msgpack_t pack) +{ + ENOUGH_BYTES(pack, 1, return 0); + WRAP_BYTE_FORMATS(BYTE_TO_TYPE, pack); + WRAP_RANGE_FORMATS(RANGE_TO_TYPE, pack); + + return FMT_UNUSED; +} @@ -0,0 +1,553 @@ +# MessagePack specification + +MessagePack is an object serialization specification like JSON. + +MessagePack has two concepts: **type system** and **formats**. + +Serialization is conversion from application objects into MessagePack formats via MessagePack type system. + +Deserialization is conversion from MessagePack formats into application objects via MessagePack type system. + + Serialization: + Application objects + --> MessagePack type system + --> MessagePack formats (byte array) + + Deserialization: + MessagePack formats (byte array) + --> MessagePack type system + --> Application objects + +This document describes the MessagePack type system, MessagePack formats and conversion of them. + +## Table of contents + +* MessagePack specification + * [Type system](#type-system) + * [Limitation](#limitation) + * [Extension types](#extension-types) + * [Formats](#formats) + * [Overview](#overview) + * [Notation in diagrams](#notation-in-diagrams) + * [nil format](#nil-format) + * [bool format family](#bool-format-family) + * [int format family](#int-format-family) + * [float format family](#float-format-family) + * [str format family](#str-format-family) + * [bin format family](#bin-format-family) + * [array format family](#array-format-family) + * [map format family](#map-format-family) + * [ext format family](#ext-format-family) + * [Timestamp extension type](#timestamp-extension-type) + * [Serialization: type to format conversion](#serialization-type-to-format-conversion) + * [Deserialization: format to type conversion](#deserialization-format-to-type-conversion) + * [Future discussion](#future-discussion) + * [Profile](#profile) + * [Implementation guidelines](#implementation-guidelines) + * [Upgrading MessagePack specification](#upgrading-messagepack-specification) + +## Type system + +* Types + * **Integer** represents an integer + * **Nil** represents nil + * **Boolean** represents true or false + * **Float** represents a IEEE 754 double precision floating point number including NaN and Infinity + * **Raw** + * **String** extending Raw type represents a UTF-8 string + * **Binary** extending Raw type represents a byte array + * **Array** represents a sequence of objects + * **Map** represents key-value pairs of objects + * **Extension** represents a tuple of type information and a byte array where type information is an integer whose meaning is defined by applications or MessagePack specification + * **Timestamp** represents an instantaneous point on the time-line in the world that is independent from time zones or calendars. Maximum precision is nanoseconds. + +### Limitation + +* a value of an Integer object is limited from `-(2^63)` upto `(2^64)-1` +* maximum length of a Binary object is `(2^32)-1` +* maximum byte size of a String object is `(2^32)-1` +* String objects may contain invalid byte sequence and the behavior of a deserializer depends on the actual implementation when it received invalid byte sequence + * Deserializers should provide functionality to get the original byte array so that applications can decide how to handle the object +* maximum number of elements of an Array object is `(2^32)-1` +* maximum number of key-value associations of a Map object is `(2^32)-1` + +### Extension types + +MessagePack allows applications to define application-specific types using the Extension type. +Extension type consists of an integer and a byte array where the integer represents a kind of types and the byte array represents data. + +Applications can assign `0` to `127` to store application-specific type information. An example usage is that application defines `type = 0` as the application's unique type system, and stores name of a type and values of the type at the payload. + +MessagePack reserves `-1` to `-128` for future extension to add predefined types. These types will be added to exchange more types without using pre-shared statically-typed schema across different programming environments. + + [0, 127]: application-specific types + [-128, -1]: reserved for predefined types + +Because extension types are intended to be added, old applications may not implement all of them. However, they can still handle such type as one of Extension types. Therefore, applications can decide whether they reject unknown Extension types, accept as opaque data, or transfer to another application without touching payload of them. + +Here is the list of predefined extension types. Formats of the types are defined at [Formats](#formats-timestamp) section. + +Name | Type +--------- | ---- +Timestamp | -1 + +## Formats + +### Overview + +format name | first byte (in binary) | first byte (in hex) +--------------- | ---------------------- | ------------------- +positive fixint | 0xxxxxxx | 0x00 - 0x7f +fixmap | 1000xxxx | 0x80 - 0x8f +fixarray | 1001xxxx | 0x90 - 0x9f +fixstr | 101xxxxx | 0xa0 - 0xbf +nil | 11000000 | 0xc0 +(never used) | 11000001 | 0xc1 +false | 11000010 | 0xc2 +true | 11000011 | 0xc3 +bin 8 | 11000100 | 0xc4 +bin 16 | 11000101 | 0xc5 +bin 32 | 11000110 | 0xc6 +ext 8 | 11000111 | 0xc7 +ext 16 | 11001000 | 0xc8 +ext 32 | 11001001 | 0xc9 +float 32 | 11001010 | 0xca +float 64 | 11001011 | 0xcb +uint 8 | 11001100 | 0xcc +uint 16 | 11001101 | 0xcd +uint 32 | 11001110 | 0xce +uint 64 | 11001111 | 0xcf +int 8 | 11010000 | 0xd0 +int 16 | 11010001 | 0xd1 +int 32 | 11010010 | 0xd2 +int 64 | 11010011 | 0xd3 +fixext 1 | 11010100 | 0xd4 +fixext 2 | 11010101 | 0xd5 +fixext 4 | 11010110 | 0xd6 +fixext 8 | 11010111 | 0xd7 +fixext 16 | 11011000 | 0xd8 +str 8 | 11011001 | 0xd9 +str 16 | 11011010 | 0xda +str 32 | 11011011 | 0xdb +array 16 | 11011100 | 0xdc +array 32 | 11011101 | 0xdd +map 16 | 11011110 | 0xde +map 32 | 11011111 | 0xdf +negative fixint | 111xxxxx | 0xe0 - 0xff + +### Notation in diagrams + + one byte: + +--------+ + | | + +--------+ + + a variable number of bytes: + +========+ + | | + +========+ + + variable number of objects stored in MessagePack format: + +~~~~~~~~~~~~~~~~~+ + | | + +~~~~~~~~~~~~~~~~~+ + +`X`, `Y`, `Z` and `A` are the symbols that will be replaced by an actual bit. + +### nil format + +Nil format stores nil in 1 byte. + + nil: + +--------+ + | 0xc0 | + +--------+ + +### bool format family + +Bool format family stores false or true in 1 byte. + + false: + +--------+ + | 0xc2 | + +--------+ + + true: + +--------+ + | 0xc3 | + +--------+ + +### int format family + +Int format family stores an integer in 1, 2, 3, 5, or 9 bytes. + + positive fixint stores 7-bit positive integer + +--------+ + |0XXXXXXX| + +--------+ + + negative fixint stores 5-bit negative integer + +--------+ + |111YYYYY| + +--------+ + + * 0XXXXXXX is 8-bit unsigned integer + * 111YYYYY is 8-bit signed integer + + uint 8 stores a 8-bit unsigned integer + +--------+--------+ + | 0xcc |ZZZZZZZZ| + +--------+--------+ + + uint 16 stores a 16-bit big-endian unsigned integer + +--------+--------+--------+ + | 0xcd |ZZZZZZZZ|ZZZZZZZZ| + +--------+--------+--------+ + + uint 32 stores a 32-bit big-endian unsigned integer + +--------+--------+--------+--------+--------+ + | 0xce |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| + +--------+--------+--------+--------+--------+ + + uint 64 stores a 64-bit big-endian unsigned integer + +--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | 0xcf |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| + +--------+--------+--------+--------+--------+--------+--------+--------+--------+ + + int 8 stores a 8-bit signed integer + +--------+--------+ + | 0xd0 |ZZZZZZZZ| + +--------+--------+ + + int 16 stores a 16-bit big-endian signed integer + +--------+--------+--------+ + | 0xd1 |ZZZZZZZZ|ZZZZZZZZ| + +--------+--------+--------+ + + int 32 stores a 32-bit big-endian signed integer + +--------+--------+--------+--------+--------+ + | 0xd2 |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| + +--------+--------+--------+--------+--------+ + + int 64 stores a 64-bit big-endian signed integer + +--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | 0xd3 |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| + +--------+--------+--------+--------+--------+--------+--------+--------+--------+ + +### float format family + +Float format family stores a floating point number in 5 bytes or 9 bytes. + + float 32 stores a floating point number in IEEE 754 single precision floating point number format: + +--------+--------+--------+--------+--------+ + | 0xca |XXXXXXXX|XXXXXXXX|XXXXXXXX|XXXXXXXX| + +--------+--------+--------+--------+--------+ + + float 64 stores a floating point number in IEEE 754 double precision floating point number format: + +--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | 0xcb |YYYYYYYY|YYYYYYYY|YYYYYYYY|YYYYYYYY|YYYYYYYY|YYYYYYYY|YYYYYYYY|YYYYYYYY| + +--------+--------+--------+--------+--------+--------+--------+--------+--------+ + + where + * XXXXXXXX_XXXXXXXX_XXXXXXXX_XXXXXXXX is a big-endian IEEE 754 single precision floating point number. + Extension of precision from single-precision to double-precision does not lose precision. + * YYYYYYYY_YYYYYYYY_YYYYYYYY_YYYYYYYY_YYYYYYYY_YYYYYYYY_YYYYYYYY_YYYYYYYY is a big-endian + IEEE 754 double precision floating point number + +### str format family + +Str format family stores a byte array in 1, 2, 3, or 5 bytes of extra bytes in addition to the size of the byte array. + + fixstr stores a byte array whose length is upto 31 bytes: + +--------+========+ + |101XXXXX| data | + +--------+========+ + + str 8 stores a byte array whose length is upto (2^8)-1 bytes: + +--------+--------+========+ + | 0xd9 |YYYYYYYY| data | + +--------+--------+========+ + + str 16 stores a byte array whose length is upto (2^16)-1 bytes: + +--------+--------+--------+========+ + | 0xda |ZZZZZZZZ|ZZZZZZZZ| data | + +--------+--------+--------+========+ + + str 32 stores a byte array whose length is upto (2^32)-1 bytes: + +--------+--------+--------+--------+--------+========+ + | 0xdb |AAAAAAAA|AAAAAAAA|AAAAAAAA|AAAAAAAA| data | + +--------+--------+--------+--------+--------+========+ + + where + * XXXXX is a 5-bit unsigned integer which represents N + * YYYYYYYY is a 8-bit unsigned integer which represents N + * ZZZZZZZZ_ZZZZZZZZ is a 16-bit big-endian unsigned integer which represents N + * AAAAAAAA_AAAAAAAA_AAAAAAAA_AAAAAAAA is a 32-bit big-endian unsigned integer which represents N + * N is the length of data + +### bin format family + +Bin format family stores an byte array in 2, 3, or 5 bytes of extra bytes in addition to the size of the byte array. + + bin 8 stores a byte array whose length is upto (2^8)-1 bytes: + +--------+--------+========+ + | 0xc4 |XXXXXXXX| data | + +--------+--------+========+ + + bin 16 stores a byte array whose length is upto (2^16)-1 bytes: + +--------+--------+--------+========+ + | 0xc5 |YYYYYYYY|YYYYYYYY| data | + +--------+--------+--------+========+ + + bin 32 stores a byte array whose length is upto (2^32)-1 bytes: + +--------+--------+--------+--------+--------+========+ + | 0xc6 |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| data | + +--------+--------+--------+--------+--------+========+ + + where + * XXXXXXXX is a 8-bit unsigned integer which represents N + * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N + * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N + * N is the length of data + +### array format family + +Array format family stores a sequence of elements in 1, 3, or 5 bytes of extra bytes in addition to the elements. + + fixarray stores an array whose length is upto 15 elements: + +--------+~~~~~~~~~~~~~~~~~+ + |1001XXXX| N objects | + +--------+~~~~~~~~~~~~~~~~~+ + + array 16 stores an array whose length is upto (2^16)-1 elements: + +--------+--------+--------+~~~~~~~~~~~~~~~~~+ + | 0xdc |YYYYYYYY|YYYYYYYY| N objects | + +--------+--------+--------+~~~~~~~~~~~~~~~~~+ + + array 32 stores an array whose length is upto (2^32)-1 elements: + +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+ + | 0xdd |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| N objects | + +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+ + + where + * XXXX is a 4-bit unsigned integer which represents N + * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N + * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N + * N is the size of an array + +### map format family + +Map format family stores a sequence of key-value pairs in 1, 3, or 5 bytes of extra bytes in addition to the key-value pairs. + + fixmap stores a map whose length is upto 15 elements + +--------+~~~~~~~~~~~~~~~~~+ + |1000XXXX| N*2 objects | + +--------+~~~~~~~~~~~~~~~~~+ + + map 16 stores a map whose length is upto (2^16)-1 elements + +--------+--------+--------+~~~~~~~~~~~~~~~~~+ + | 0xde |YYYYYYYY|YYYYYYYY| N*2 objects | + +--------+--------+--------+~~~~~~~~~~~~~~~~~+ + + map 32 stores a map whose length is upto (2^32)-1 elements + +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+ + | 0xdf |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| N*2 objects | + +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+ + + where + * XXXX is a 4-bit unsigned integer which represents N + * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N + * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N + * N is the size of a map + * odd elements in objects are keys of a map + * the next element of a key is its associated value + +### ext format family + +Ext format family stores a tuple of an integer and a byte array. + + fixext 1 stores an integer and a byte array whose length is 1 byte + +--------+--------+--------+ + | 0xd4 | type | data | + +--------+--------+--------+ + + fixext 2 stores an integer and a byte array whose length is 2 bytes + +--------+--------+--------+--------+ + | 0xd5 | type | data | + +--------+--------+--------+--------+ + + fixext 4 stores an integer and a byte array whose length is 4 bytes + +--------+--------+--------+--------+--------+--------+ + | 0xd6 | type | data | + +--------+--------+--------+--------+--------+--------+ + + fixext 8 stores an integer and a byte array whose length is 8 bytes + +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | 0xd7 | type | data | + +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + + fixext 16 stores an integer and a byte array whose length is 16 bytes + +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | 0xd8 | type | data + +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + +--------+--------+--------+--------+--------+--------+--------+--------+ + data (cont.) | + +--------+--------+--------+--------+--------+--------+--------+--------+ + + ext 8 stores an integer and a byte array whose length is upto (2^8)-1 bytes: + +--------+--------+--------+========+ + | 0xc7 |XXXXXXXX| type | data | + +--------+--------+--------+========+ + + ext 16 stores an integer and a byte array whose length is upto (2^16)-1 bytes: + +--------+--------+--------+--------+========+ + | 0xc8 |YYYYYYYY|YYYYYYYY| type | data | + +--------+--------+--------+--------+========+ + + ext 32 stores an integer and a byte array whose length is upto (2^32)-1 bytes: + +--------+--------+--------+--------+--------+--------+========+ + | 0xc9 |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ| type | data | + +--------+--------+--------+--------+--------+--------+========+ + + where + * XXXXXXXX is a 8-bit unsigned integer which represents N + * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N + * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a big-endian 32-bit unsigned integer which represents N + * N is a length of data + * type is a signed 8-bit signed integer + * type < 0 is reserved for future extension including 2-byte type information + +### Timestamp extension type + +Timestamp extension type is assigned to extension type `-1`. It defines 3 formats: 32-bit format, 64-bit format, and 96-bit format. + + timestamp 32 stores the number of seconds that have elapsed since 1970-01-01 00:00:00 UTC + in an 32-bit unsigned integer: + +--------+--------+--------+--------+--------+--------+ + | 0xd6 | -1 | seconds in 32-bit unsigned int | + +--------+--------+--------+--------+--------+--------+ + + timestamp 64 stores the number of seconds and nanoseconds that have elapsed since 1970-01-01 00:00:00 UTC + in 32-bit unsigned integers: + +--------+--------+--------+--------+--------+------|-+--------+--------+--------+--------+ + | 0xd7 | -1 | nanosec. in 30-bit unsigned int | seconds in 34-bit unsigned int | + +--------+--------+--------+--------+--------+------^-+--------+--------+--------+--------+ + + timestamp 96 stores the number of seconds and nanoseconds that have elapsed since 1970-01-01 00:00:00 UTC + in 64-bit signed integer and 32-bit unsigned integer: + +--------+--------+--------+--------+--------+--------+--------+ + | 0xc7 | 12 | -1 |nanoseconds in 32-bit unsigned int | + +--------+--------+--------+--------+--------+--------+--------+ + +--------+--------+--------+--------+--------+--------+--------+--------+ + seconds in 64-bit signed int | + +--------+--------+--------+--------+--------+--------+--------+--------+ + +* Timestamp 32 format can represent a timestamp in [1970-01-01 00:00:00 UTC, 2106-02-07 06:28:16 UTC) range. Nanoseconds part is 0. +* Timestamp 64 format can represent a timestamp in [1970-01-01 00:00:00.000000000 UTC, 2514-05-30 01:53:04.000000000 UTC) range. +* Timestamp 96 format can represent a timestamp in [-292277022657-01-27 08:29:52 UTC, 292277026596-12-04 15:30:08.000000000 UTC) range. +* In timestamp 64 and timestamp 96 formats, nanoseconds must not be larger than 999999999. + +Pseudo code for serialization: + + struct timespec { + long tv_sec; // seconds + long tv_nsec; // nanoseconds + } time; + if ((time.tv_sec >> 34) == 0) { + uint64_t data64 = (time.tv_nsec << 34) | time.tv_sec; + if (data64 & 0xffffffff00000000L == 0) { + // timestamp 32 + uint32_t data32 = data64; + serialize(0xd6, -1, data32) + } + else { + // timestamp 64 + serialize(0xd7, -1, data64) + } + } + else { + // timestamp 96 + serialize(0xc7, 12, -1, time.tv_nsec, time.tv_sec) + } + +Pseudo code for deserialization: + + ExtensionValue value = deserialize_ext_type(); + struct timespec result; + switch(value.length) { + case 4: + uint32_t data32 = value.payload; + result.tv_nsec = 0; + result.tv_sec = data32; + case 8: + uint64_t data64 = value.payload; + result.tv_nsec = data64 >> 34; + result.tv_sec = data64 & 0x00000003ffffffffL; + case 12: + uint32_t data32 = value.payload; + uint64_t data64 = value.payload + 4; + result.tv_nsec = data32; + result.tv_sec = data64; + default: + // error + } + +## Serialization: type to format conversion + +MessagePack serializers convert MessagePack types into formats as following: + +source types | output format +------------ | --------------------------------------------------------------------------------------- +Integer | int format family (positive fixint, negative fixint, int 8/16/32/64 or uint 8/16/32/64) +Nil | nil +Boolean | bool format family (false or true) +Float | float format family (float 32/64) +String | str format family (fixstr or str 8/16/32) +Binary | bin format family (bin 8/16/32) +Array | array format family (fixarray or array 16/32) +Map | map format family (fixmap or map 16/32) +Extension | ext format family (fixext or ext 8/16/32) + +If an object can be represented in multiple possible output formats, serializers SHOULD use the format which represents the data in the smallest number of bytes. + +## Deserialization: format to type conversion + +MessagePack deserializers convert MessagePack formats into types as following: + +source formats | output type +-------------------------------------------------------------------- | ----------- +positive fixint, negative fixint, int 8/16/32/64 and uint 8/16/32/64 | Integer +nil | Nil +false and true | Boolean +float 32/64 | Float +fixstr and str 8/16/32 | String +bin 8/16/32 | Binary +fixarray and array 16/32 | Array +fixmap map 16/32 | Map +fixext and ext 8/16/32 | Extension + +## Future discussion + +### Profile + +Profile is an idea that Applications restrict the semantics of MessagePack while sharing the same syntax to adapt MessagePack for certain use cases. + +For example, applications may remove Binary type, restrict keys of map objects to be String type, and put some restrictions to make the semantics compatible with JSON. Applications which use schema may remove String and Binary types and deal with byte arrays as Raw type. Applications which use hash (digest) of serialized data may sort keys of maps to make the serialized data deterministic. + +## Implementation guidelines + +### Upgrading MessagePack specification + +MessagePack specification is changed at this time. +Here is a guideline to upgrade existent MessagePack implementations: + +* In a minor release, deserializers support the bin format family and str 8 format. The type of deserialized objects should be same with raw 16 (== str 16) or raw 32 (== str 32) +* In a major release, serializers distinguish Binary type and String type using bin format family and str format family + * At the same time, serializers should offer "compatibility mode" which doesn't use bin format family and str 8 format + + +___ + + MessagePack specification + Last modified at 2017-08-09 22:42:07 -0700 + Sadayuki Furuhashi © 2013-04-21 21:52:33 -0700 |