123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843 |
- /*
- * hipack-parser.c
- * Copyright (C) 2015 Adrian Perez <aperez@igalia.com>
- *
- * Distributed under terms of the MIT license.
- */
- #include "hipack.h"
- #include <assert.h>
- #include <string.h>
- #include <stdbool.h>
- #include <stdlib.h>
- #include <ctype.h>
- #include <errno.h>
- const char* HIPACK_READ_ERROR = "Error reading from input";
- enum status {
- kStatusOk = 0,
- kStatusEof,
- kStatusError,
- kStatusIoError,
- };
- typedef enum status status_t;
- struct parser {
- int (*getchar) (void*);
- void *getchar_data;
- int look;
- unsigned line;
- unsigned column;
- const char *error;
- };
- #define P struct parser* p
- #define S status_t *status
- #define CHECK_OK status); \
- if (*status != kStatusOk) goto error; \
- ((void) 0
- #define DUMMY ) /* Makes autoindentation work. */
- #undef DUMMY
- #define DUMMY_VALUE ((hipack_value_t) { .type = HIPACK_BOOL, .annot = NULL })
- static hipack_value_t parse_value (P, S);
- static void parse_keyval_items (P, hipack_dict_t *result, int eos, S);
- static inline bool
- string_to_intrinsic_annot (const hipack_string_t *hstr, hipack_type_t *type)
- {
- assert (type);
- static const struct {
- const char * const str;
- int type;
- } annots[] = {
- { ".int", HIPACK_INTEGER },
- { ".float", HIPACK_FLOAT },
- { ".bool", HIPACK_BOOL },
- { ".string", HIPACK_STRING },
- { ".list", HIPACK_LIST },
- { ".dict", HIPACK_DICT },
- };
- if (hstr->size < 4)
- return false;
- for (uint8_t i = 0; i < sizeof (annots) / sizeof (annots[0]); i++) {
- if (strncmp (annots[i].str, (const char*) hstr->data, hstr->size) == 0) {
- *type = annots[i].type;
- return true;
- }
- }
- return false;
- }
- static inline bool
- is_hipack_whitespace (int ch)
- {
- switch (ch) {
- case 0x09: /* Horizontal tab. */
- case 0x0A: /* New line. */
- case 0x0D: /* Carriage return. */
- case 0x20: /* Space. */
- return true;
- default:
- return false;
- }
- }
- static inline bool
- is_hipack_key_character (int ch)
- {
- switch (ch) {
- /* Keys do not contain whitespace */
- case 0x09: /* Horizontal tab. */
- case 0x0A: /* New line. */
- case 0x0D: /* Carriage return. */
- case 0x20: /* Space. */
- /* Characters are forbidden in keys by the spec. */
- case '[':
- case ']':
- case '{':
- case '}':
- case ':':
- case ',':
- return false;
- default:
- return true;
- }
- }
- static inline bool
- is_number_char (int ch)
- {
- switch (ch) {
- case '.': return true;
- case '+': return true;
- case '-': return true;
- case '0': return true;
- case '1': return true;
- case '2': return true;
- case '3': return true;
- case '4': return true;
- case '5': return true;
- case '6': return true;
- case '7': return true;
- case '8': return true;
- case '9': return true;
- case 'a': case 'A': return true;
- case 'b': case 'B': return true;
- case 'c': case 'C': return true;
- case 'd': case 'D': return true;
- case 'e': case 'E': return true;
- case 'f': case 'F': return true;
- default:
- return false;
- }
- }
- static inline bool
- is_octal_nonzero_digit (int ch)
- {
- return (ch > '0') && (ch < '8');
- }
- static inline int
- xdigit_to_int (int xdigit)
- {
- assert ((xdigit >= '0' && xdigit <= '9') ||
- (xdigit >= 'A' && xdigit <= 'F') ||
- (xdigit >= 'a' && xdigit <= 'f'));
- switch (xdigit) {
- case '0': return 0;
- case '1': return 1;
- case '2': return 2;
- case '3': return 3;
- case '4': return 4;
- case '5': return 5;
- case '6': return 6;
- case '7': return 7;
- case '8': return 8;
- case '9': return 9;
- case 'a': case 'A': return 0xA;
- case 'b': case 'B': return 0xB;
- case 'c': case 'C': return 0xC;
- case 'd': case 'D': return 0xD;
- case 'e': case 'E': return 0xE;
- case 'f': case 'F': return 0xF;
- default: abort ();
- }
- }
- static inline int
- nextchar_raw (P, S)
- {
- int ch = (*p->getchar) (p->getchar_data);
- switch (ch) {
- case HIPACK_IO_ERROR:
- *status = kStatusIoError;
- /* fall-through */
- case HIPACK_IO_EOF:
- break;
- case '\n':
- p->column = 0;
- p->line++;
- /* fall-through */
- default:
- p->column++;
- }
- return ch;
- }
- static inline void
- nextchar (P, S)
- {
- do {
- p->look = nextchar_raw (p, CHECK_OK);
- if (p->look == '#') {
- while (p->look != '\n' && p->look != HIPACK_IO_EOF) {
- p->look = nextchar_raw (p, CHECK_OK);
- }
- }
- } while (p->look != HIPACK_IO_EOF && p->look == '#');
- error:
- /* noop */;
- }
- static inline void
- skipwhite (P, S)
- {
- while (p->look != HIPACK_IO_EOF && is_hipack_whitespace (p->look))
- nextchar (p, status);
- }
- static inline void
- matchchar (P, int ch, const char *errmsg, S)
- {
- if (p->look == ch) {
- nextchar (p, CHECK_OK);
- return;
- }
- p->error = errmsg ? errmsg : "unexpected input";
- *status = kStatusError;
- error:
- return;
- }
- #ifndef HIPACK_STRING_CHUNK_SIZE
- #define HIPACK_STRING_CHUNK_SIZE 32
- #endif /* !HIPACK_STRING_CHUNK_SIZE */
- #ifndef HIPACK_STRING_POW_SIZE
- #define HIPACK_STRING_POW_SIZE 512
- #endif /* !HIPACK_STRING_POW_SIZE */
- #ifndef HIPACK_LIST_CHUNK_SIZE
- #define HIPACK_LIST_CHUNK_SIZE HIPACK_STRING_CHUNK_SIZE
- #endif /* !HIPACK_LIST_CHUNK_SIZE */
- #ifndef HIPACK_LIST_POW_SIZE
- #define HIPACK_LIST_POW_SIZE HIPACK_STRING_POW_SIZE
- #endif /* !HIPACK_LIST_POW_SIZE */
- static hipack_string_t*
- string_resize (hipack_string_t *hstr, uint32_t *alloc, uint32_t size)
- {
- /* TODO: Use HIPACK_STRING_POW_SIZE. */
- if (size) {
- uint32_t new_size = HIPACK_STRING_CHUNK_SIZE *
- ((size / HIPACK_STRING_CHUNK_SIZE) + 1);
- if (new_size < size) {
- new_size = size;
- }
- if (new_size != *alloc) {
- *alloc = new_size;
- new_size = sizeof (hipack_string_t) + new_size * sizeof (uint8_t);
- hstr = hipack_alloc_array_extra (hstr, new_size,
- sizeof (uint8_t),
- sizeof (hipack_string_t));
- }
- hstr->size = size;
- } else {
- hipack_alloc_free (hstr);
- hstr = NULL;
- *alloc = 0;
- }
- return hstr;
- }
- static hipack_list_t*
- list_resize (hipack_list_t *list, uint32_t *alloc, uint32_t size)
- {
- /* TODO: Use HIPACK_LIST_POW_SIZE. */
- if (size) {
- uint32_t new_size = HIPACK_LIST_CHUNK_SIZE *
- ((size / HIPACK_LIST_CHUNK_SIZE) + 1);
- if (new_size < size) {
- new_size = size;
- }
- if (new_size != *alloc) {
- *alloc = new_size;
- list = hipack_alloc_array_extra (list, new_size,
- sizeof (hipack_value_t),
- sizeof (hipack_list_t));
- }
- list->size = size;
- } else {
- hipack_alloc_free (list);
- list = NULL;
- *alloc = 0;
- }
- return list;
- }
- /* On empty (missing) keys, NULL is returned. */
- static hipack_string_t*
- parse_key (P, S)
- {
- hipack_string_t *hstr = NULL;
- uint32_t alloc_size = 0;
- uint32_t size = 0;
- while (p->look != HIPACK_IO_EOF && is_hipack_key_character (p->look)) {
- hstr = string_resize (hstr, &alloc_size, size + 1);
- hstr->data[size++] = p->look;
- nextchar (p, CHECK_OK);
- }
- return hstr;
- error:
- hipack_string_free (hstr);
- return NULL;
- }
- static void
- parse_string (P, hipack_value_t *result, S)
- {
- hipack_string_t *hstr = NULL;
- uint32_t alloc_size = 0;
- uint32_t size = 0;
- matchchar (p, '"', NULL, CHECK_OK);
- while (p->look != '"' && p->look != HIPACK_IO_EOF) {
- /* Handle escapes. */
- if (p->look == '\\') {
- int extra;
- p->look = nextchar_raw (p, CHECK_OK);
- switch (p->look) {
- case '"' : p->look = '"' ; break;
- case 'n' : p->look = '\n'; break;
- case 'r' : p->look = '\r'; break;
- case 't' : p->look = '\t'; break;
- case '\\': p->look = '\\'; break;
- default:
- /* Hex number. */
- extra = nextchar_raw (p, CHECK_OK);
- if (!isxdigit (extra) || !isxdigit (p->look)) {
- p->error = "invalid escape sequence";
- *status = kStatusError;
- goto error;
- }
- p->look = (xdigit_to_int (p->look) * 16) +
- xdigit_to_int (extra);
- break;
- }
- }
- hstr = string_resize (hstr, &alloc_size, size + 1);
- hstr->data[size++] = p->look;
- /* Read next character from the string. */
- p->look = nextchar_raw (p, CHECK_OK);
- }
- matchchar (p, '"', "unterminated string value", CHECK_OK);
- result->type = HIPACK_STRING;
- result->v_string = hstr ? hstr : hipack_string_new_from_lstring ("", 0);
- return;
- error:
- hipack_string_free (hstr);
- return;
- }
- static void
- parse_list (P, hipack_value_t *result, S)
- {
- hipack_list_t *list = NULL;
- uint32_t alloc_size = 0;
- uint32_t size = 0;
- matchchar (p, '[', NULL, CHECK_OK);
- skipwhite (p, CHECK_OK);
- while (p->look != ']') {
- hipack_value_t value = parse_value (p, CHECK_OK);
- list = list_resize (list, &alloc_size, size + 1);
- list->data[size++] = value;
- bool got_whitespace = is_hipack_whitespace (p->look);
- skipwhite (p, CHECK_OK);
- /* There must either a comma or whitespace after the value. */
- if (p->look == ',') {
- nextchar (p, CHECK_OK);
- } else if (!got_whitespace && !is_hipack_whitespace (p->look)) {
- break;
- }
- skipwhite (p, CHECK_OK);
- }
- matchchar (p, ']', "unterminated list value", CHECK_OK);
- result->type = HIPACK_LIST;
- result->v_list = list ? list : hipack_list_new (0);
- return;
- error:
- hipack_list_free (list);
- return;
- }
- static void
- parse_dict (P, hipack_value_t *result, S)
- {
- hipack_dict_t *dict = hipack_dict_new ();
- matchchar (p, '{', NULL, CHECK_OK);
- skipwhite (p, CHECK_OK);
- parse_keyval_items (p, dict, '}', CHECK_OK);
- matchchar (p, '}', "unterminated dict value", CHECK_OK);
- result->type = HIPACK_DICT;
- result->v_dict = dict;
- return;
- error:
- hipack_dict_free (dict);
- return;
- }
- static void
- parse_bool (P, hipack_value_t *result, S)
- {
- result->type = HIPACK_BOOL;
- if (p->look == 'T' || p->look == 't') {
- nextchar (p, CHECK_OK);
- matchchar (p, 'r', NULL, CHECK_OK);
- matchchar (p, 'u', NULL, CHECK_OK);
- matchchar (p, 'e', NULL, CHECK_OK);
- result->v_bool = true;
- } else if (p->look == 'F' || p->look == 'f') {
- nextchar (p, CHECK_OK);
- matchchar (p, 'a', NULL, CHECK_OK);
- matchchar (p, 'l', NULL, CHECK_OK);
- matchchar (p, 's', NULL, CHECK_OK);
- matchchar (p, 'e', NULL, CHECK_OK);
- result->v_bool = false;
- }
- return;
- error:
- p->error = "invalid boolean value";
- }
- static void
- parse_number (P, hipack_value_t *result, S)
- {
- hipack_string_t *hstr = NULL;
- uint32_t alloc_size = 0;
- uint32_t size = 0;
- #define SAVE_LOOK( ) \
- hstr = string_resize (hstr, &alloc_size, size + 1); \
- hstr->data[size++] = p->look
- /* Optional sign. */
- bool has_sign = false;
- if (p->look == '-' || p->look == '+') {
- SAVE_LOOK ();
- has_sign = true;
- nextchar (p, CHECK_OK);
- }
- /* Octal/hexadecimal numbers. */
- bool is_octal = false;
- bool is_hex = false;
- if (p->look == '0') {
- SAVE_LOOK ();
- nextchar (p, CHECK_OK);
- if (p->look == 'x' || p->look == 'X') {
- SAVE_LOOK ();
- nextchar (p, CHECK_OK);
- is_hex = true;
- } else if (is_octal_nonzero_digit (p->look)) {
- is_octal = true;
- }
- }
- /* Read the rest of the number. */
- bool dot_seen = false;
- bool exp_seen = false;
- while (p->look != HIPACK_IO_EOF && is_number_char (p->look)) {
- if (!is_hex && (p->look == 'e' || p->look == 'E')) {
- if (exp_seen || is_octal) {
- *status = kStatusError;
- goto error;
- }
- exp_seen = true;
- /* Handle the optional sign of the exponent. */
- SAVE_LOOK ();
- nextchar (p, CHECK_OK);
- if (p->look == '-' || p->look == '+') {
- SAVE_LOOK ();
- nextchar (p, CHECK_OK);
- }
- } else {
- if (p->look == '.') {
- if (dot_seen || is_hex || is_octal) {
- *status = kStatusError;
- goto error;
- }
- dot_seen = true;
- }
- if (p->look == '-' || p->look == '+') {
- *status = kStatusError;
- goto error;
- }
- SAVE_LOOK ();
- nextchar (p, CHECK_OK);
- }
- }
- if (!size) {
- *status = kStatusError;
- goto error;
- }
- /* Zero-terminate, to use with the libc conversion functions. */
- hstr = string_resize (hstr, &alloc_size, size + 1);
- hstr->data[size++] = '\0';
- char *endptr = NULL;
- if (is_hex) {
- assert (!is_octal);
- assert (!exp_seen);
- assert (!dot_seen);
- char *endptr = NULL;
- long v = strtol ((const char*) hstr->data, &endptr, 16);
- /* TODO: Check for overflow. */
- result->type = HIPACK_INTEGER;
- result->v_integer = (int32_t) v;
- } else if (is_octal) {
- assert (!is_hex);
- assert (!exp_seen);
- assert (!dot_seen);
- long v = strtol ((const char*) hstr->data, &endptr, 8);
- /* TODO: Check for overflow. */
- result->type = HIPACK_INTEGER;
- result->v_integer = (int32_t) v;
- } else if (dot_seen || exp_seen) {
- assert (!is_hex);
- assert (!is_octal);
- result->type = HIPACK_FLOAT;
- result->v_float = strtod ((const char*) hstr->data, &endptr);
- } else {
- assert (!is_hex);
- assert (!is_octal);
- assert (!exp_seen);
- assert (!dot_seen);
- long v = strtol ((const char*) hstr->data, &endptr, 10);
- /* TODO: Check for overflow. */
- result->type = HIPACK_INTEGER;
- result->v_integer = (int32_t) v;
- }
- if (endptr && *endptr != '\0') {
- *status = kStatusError;
- goto error;
- }
- hipack_string_free (hstr);
- return;
- error:
- p->error = "invalid numeric value";
- hipack_string_free (hstr);
- }
- static bool
- parse_annotations (P, hipack_value_t *result, S)
- {
- hipack_string_t *key = NULL;
- bool type_annot = false;
- while (p->look == ':') {
- p->look = nextchar_raw (p, CHECK_OK);
- key = parse_key (p, CHECK_OK);
- skipwhite (p, CHECK_OK); /* TODO: Move after checking duplicates. */
- /* Check for intrinsic type annotations. */
- assert (key->size > 0);
- if (key->data[0] == '.') {
- hipack_type_t annot_type;
- bool found = string_to_intrinsic_annot (key, &annot_type);
- if (found) {
- if (type_annot && annot_type != result->type) {
- p->error = "multiple intrinsic type annotations";
- goto error;
- }
- result->type = annot_type;
- type_annot = true;
- } else {
- p->error = "invalid intrinsic annotation";
- goto error;
- }
- } else {
- /* Check if the annotation is already in the set. */
- if (result->annot && hipack_dict_get (result->annot, key)) {
- p->error = "duplicate annotation";
- goto error;
- }
- /* Add the annotation to the set. */
- if (!result->annot)
- result->annot = hipack_dict_new ();
- static const hipack_value_t annot_present = {
- .type = HIPACK_BOOL,
- .v_bool = true,
- };
- hipack_dict_set_adopt_key (result->annot, &key, &annot_present);
- }
- }
- return type_annot;
- error:
- if (key)
- hipack_string_free (key);
- *status = kStatusError;
- return false;
- }
- static hipack_value_t
- parse_value (P, S)
- {
- hipack_value_t result = DUMMY_VALUE;
- bool type_annot = parse_annotations (p, &result, CHECK_OK);
- const hipack_type_t expected_type = result.type;
- switch (p->look) {
- case '"': /* String */
- parse_string (p, &result, CHECK_OK);
- break;
- case '[': /* List */
- parse_list (p, &result, CHECK_OK);
- break;
- case '{': /* Dict */
- parse_dict (p, &result, CHECK_OK);
- break;
- case 'T': /* Bool */
- case 't':
- case 'F':
- case 'f':
- parse_bool (p, &result, CHECK_OK);
- break;
- default: /* Integer or Float */
- parse_number (p, &result, CHECK_OK);
- break;
- }
- if (type_annot && expected_type != result.type) {
- p->error = "annotated type does not match value type";
- *status = kStatusError;
- goto error;
- }
- return result;
- error:
- hipack_value_free (&result);
- return DUMMY_VALUE;
- }
- static void
- parse_keyval_items (P, hipack_dict_t *result, int eos, S)
- {
- hipack_value_t value = DUMMY_VALUE;
- hipack_string_t *key = NULL;
- while (p->look != eos) {
- key = parse_key (p, CHECK_OK);
- if (!key) {
- p->error = "missing dictionary key";
- *status = kStatusError;
- goto error;
- }
- bool got_separator = false;
- if (is_hipack_whitespace (p->look)) {
- got_separator = true;
- skipwhite (p, CHECK_OK);
- } else switch (p->look) {
- case ':':
- nextchar (p, CHECK_OK);
- skipwhite (p, CHECK_OK);
- /* fall-through */
- case '{':
- case '[':
- got_separator = true;
- break;
- }
- if (!got_separator) {
- p->error = "missing separator";
- *status = kStatusError;
- goto error;
- }
- value = parse_value (p, CHECK_OK);
- hipack_dict_set_adopt_key (result, &key, &value);
- /*
- * There must be either a comma or a whitespace after the value,
- * or the end-of-sequence character.
- */
- if (p->look == ',') {
- nextchar (p, CHECK_OK);
- } else if (p->look != eos && !is_hipack_whitespace (p->look)) {
- break;
- }
- skipwhite (p, CHECK_OK);
- }
- return;
- error:
- hipack_string_free (key);
- hipack_value_free (&value);
- }
- static hipack_dict_t*
- parse_message (P, S)
- {
- hipack_dict_t *result = hipack_dict_new ();
- nextchar (p, CHECK_OK);
- skipwhite (p, CHECK_OK);
- if (p->look == HIPACK_IO_ERROR) {
- *status = kStatusIoError;
- } else if (p->look == '{') {
- /* Input starts with a Dict marker. */
- nextchar (p, CHECK_OK);
- skipwhite (p, CHECK_OK);
- parse_keyval_items (p, result, '}', CHECK_OK);
- matchchar (p, '}', "unterminated message", CHECK_OK);
- } else {
- parse_keyval_items (p, result, HIPACK_IO_EOF, CHECK_OK);
- }
- return result;
- error:
- hipack_dict_free (result);
- return NULL;
- }
- hipack_dict_t*
- hipack_read (hipack_reader_t *reader)
- {
- assert (reader);
- /*
- * Copy the reader function (and its data pointer) into the parser
- * structure. The rest of the fields are used as results, so the
- * reader structure can be cleaned up right after.
- */
- struct parser p = {
- .getchar = reader->getchar,
- .getchar_data = reader->getchar_data,
- .line = 1,
- 0,
- };
- memset (reader, 0x00, sizeof (hipack_reader_t));
- status_t status = kStatusOk;
- hipack_dict_t *result = parse_message (&p, &status);
- switch (status) {
- case kStatusOk:
- assert (result);
- break;
- case kStatusError:
- assert (!result);
- assert (p.error);
- break;
- case kStatusIoError:
- p.error = HIPACK_READ_ERROR;
- hipack_dict_free (result);
- result = NULL;
- break;
- case kStatusEof:
- break;
- }
- reader->error = p.error;
- reader->error_line = p.line;
- reader->error_column = p.column;
- return result;
- }
- int
- hipack_stdio_getchar (void *fp)
- {
- assert (fp);
- int ch = fgetc ((FILE*) fp);
- if (ch == EOF) {
- return ferror ((FILE*) fp) ? HIPACK_IO_ERROR : HIPACK_IO_EOF;
- }
- return ch;
- }
|