[LTP] [PATCH v2 2/5] lib: Add minimalistic json parser

Cyril Hrubis chrubis@suse.cz
Wed Feb 24 17:50:42 CET 2021


Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
---
 include/tst_json.h | 177 ++++++++++++
 lib/tst_json.c     | 679 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 856 insertions(+)
 create mode 100644 include/tst_json.h
 create mode 100644 lib/tst_json.c

diff --git a/include/tst_json.h b/include/tst_json.h
new file mode 100644
index 000000000..4b3669824
--- /dev/null
+++ b/include/tst_json.h
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
+/*
+ * Copyright (C) 2021 Cyril Hrubis <metan@ucw.cz>
+ */
+
+#ifndef TST_JSON_H
+#define TST_JSON_H
+
+#include <stdio.h>
+
+#define TST_JSON_ERR_MAX 128
+#define TST_JSON_ID_MAX 64
+
+enum tst_json_type {
+	TST_JSON_VOID = 0,
+	TST_JSON_INT,
+	TST_JSON_STR,
+	TST_JSON_OBJ,
+	TST_JSON_ARR,
+};
+
+struct tst_json_buf {
+	/** Pointer to a null terminated JSON string */
+	const char *json;
+	/** A length of the JSON string */
+	size_t len;
+	/** A current offset into the JSON string */
+	size_t off;
+	/** An offset to the start of the last array or object */
+	size_t sub_off;
+
+	char err[TST_JSON_ERR_MAX];
+	char buf[];
+};
+
+struct tst_json_val {
+	enum tst_json_type type;
+
+	/** An user supplied buffer and size to store a string values to. */
+	char *buf;
+	size_t buf_size;
+
+	/** An union to store the parsed value into. */
+	union {
+		long val_int;
+		const char *val_str;
+	};
+
+	/** An ID for object values */
+	char id[TST_JSON_ID_MAX];
+};
+
+/*
+ * @brief Resets the parser.
+ *
+ * Resets the parse offset and clears errors.
+ *
+ * @buf An tst_json buffer
+ */
+static inline void tst_json_reset(struct tst_json_buf *buf)
+{
+	buf->off = 0;
+	buf->err[0] = 0;
+}
+
+/*
+ * @brief Fills the buffer error.
+ *
+ * Once buffer error is set all parsing functions return immediatelly with type
+ * set to TST_JSON_VOID.
+ *
+ * @buf An tst_json buffer
+ * @fmt A printf like format string
+ * @... A printf like parameters
+ */
+void tst_json_err(struct tst_json_buf *buf, const char *fmt, ...)
+               __attribute__((format (printf, 2, 3)));
+
+/*
+ * @brief Prints error into a file.
+ *
+ * The error takes into consideration the current offset in the buffer and
+ * prints a few preceding lines along with the exact position of the error.
+ *
+ * @f A file to print the error to.
+ * @buf An tst_json buffer.
+ */
+void tst_json_err_print(FILE *f, struct tst_json_buf *buf);
+
+/*
+ * @brief Returns true if error was encountered.
+ *
+ * @bfu An tst_json buffer.
+ * @return True if error was encountered false otherwise.
+ */
+static inline int tst_json_is_err(struct tst_json_buf *buf)
+{
+	return !!buf->err[0];
+}
+
+/*
+ * @brief Checks is result has valid type.
+ *
+ * @res An tst_json value.
+ * @return Zero if result is not valid, non-zero otherwise.
+ */
+static inline int tst_json_valid(struct tst_json_val *res)
+{
+	return !!res->type;
+}
+
+/*
+ * @brief Returns the type of next element in buffer.
+ *
+ * @buf An tst_json buffer.
+ * @return A type of next element in the buffer.
+ */
+enum tst_json_type tst_json_next_type(struct tst_json_buf *buf);
+
+/*
+ * @brief Returns if first element in JSON is object or array.
+ *
+ * @buf An tst_json buffer.
+ * @return On success returns TST_JSON_OBJ or TST_JSON_ARR. On failure TST_JSON_VOID.
+ */
+enum tst_json_type tst_json_start(struct tst_json_buf *buf);
+
+/*
+ * @brief Starts parsing of an JSON object.
+ *
+ * @buf An tst_json buffer.
+ * @res An tst_json result.
+ */
+int tst_json_obj_first(struct tst_json_buf *buf, struct tst_json_val *res);
+int tst_json_obj_next(struct tst_json_buf *buf, struct tst_json_val *res);
+
+#define TST_JSON_OBJ_FOREACH(buf, res) \
+	for (tst_json_obj_first(buf, res); tst_json_valid(res); tst_json_obj_next(buf, res))
+
+/*
+ * @brief Skips parsing of an JSON object.
+ *
+ * @buf An tst_json buffer.
+ * @return Zero on success, non-zero otherwise.
+ */
+int tst_json_obj_skip(struct tst_json_buf *buf);
+
+int tst_json_arr_first(struct tst_json_buf *buf, struct tst_json_val *res);
+int tst_json_arr_next(struct tst_json_buf *buf, struct tst_json_val *res);
+
+#define TST_JSON_ARR_FOREACH(buf, res) \
+	for (tst_json_arr_first(buf, res); tst_json_valid(res); tst_json_arr_next(buf, res))
+
+/*
+ * @brief Skips parsing of an JSON array.
+ *
+ * @buf An tst_json buffer.
+ * @return Zero on success, non-zero otherwise.
+ */
+int tst_json_arr_skip(struct tst_json_buf *buf);
+
+/*
+ * @brief Loads a file into an tst_json buffer.
+ *
+ * @path A path to a file.
+ * @return An tst_json buffer or NULL in a case of a failure.
+ */
+struct tst_json_buf *tst_json_load(const char *path);
+
+/*
+ * @brief Frees an tst_json buffer.
+ *
+ * @buf An tst_json buffer allcated by tst_json_load() function.
+ */
+void tst_json_free(struct tst_json_buf *buf);
+
+#endif /* TST_JSON_H */
diff --git a/lib/tst_json.c b/lib/tst_json.c
new file mode 100644
index 000000000..3a4cb9d0b
--- /dev/null
+++ b/lib/tst_json.c
@@ -0,0 +1,679 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
+/*
+ * Copyright (C) 2021 Cyril Hrubis <metan@ucw.cz>
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include "tst_json.h"
+
+static inline int buf_empty(struct tst_json_buf *buf)
+{
+	return buf->off >= buf->len;
+}
+
+static int eatws(struct tst_json_buf *buf)
+{
+	while (!buf_empty(buf)) {
+		switch (buf->json[buf->off]) {
+		case ' ':
+		case '\t':
+		case '\n':
+		case '\f':
+		break;
+		default:
+			goto ret;
+		}
+
+		buf->off += 1;
+	}
+ret:
+	return buf_empty(buf);
+}
+
+static char getb(struct tst_json_buf *buf)
+{
+	if (buf_empty(buf))
+		return 0;
+
+	return buf->json[buf->off++];
+}
+
+static char peekb(struct tst_json_buf *buf)
+{
+	if (buf_empty(buf))
+		return 0;
+
+	return buf->json[buf->off];
+}
+
+static int eatb(struct tst_json_buf *buf, char ch)
+{
+	if (peekb(buf) != ch)
+		return 0;
+
+	getb(buf);
+	return 1;
+}
+
+static int hex2val(unsigned char b)
+{
+	switch (b) {
+	case '0' ... '9':
+		return b - '0';
+	case 'a' ... 'f':
+		return b - 'a' + 10;
+	case 'A' ... 'F':
+		return b - 'A' + 10;
+	default:
+		return -1;
+	}
+}
+
+static int32_t parse_ucode_cp(struct tst_json_buf *buf)
+{
+	int ret = 0, v, i;
+
+	for (i = 0; i < 4; i++) {
+		if ((v = hex2val(getb(buf))) < 0)
+			goto err;
+		ret *= 16;
+		ret += v;
+	}
+
+	return ret;
+err:
+	tst_json_err(buf, "Expected four hexadecimal digits");
+	return -1;
+}
+
+static unsigned int utf8_bytes(uint32_t ucode_cp)
+{
+	if (ucode_cp < 0x0080)
+		return 1;
+
+	if (ucode_cp < 0x0800)
+		return 2;
+
+	if (ucode_cp < 0x10000)
+		return 3;
+
+	return 4;
+}
+
+static int to_utf8(uint32_t ucode_cp, char *buf)
+{
+	if (ucode_cp < 0x0080) {
+		buf[0] = ucode_cp & 0x00f7;
+		return 1;
+	}
+
+	if (ucode_cp < 0x0800) {
+		buf[0] = 0xc0 | (0x1f & (ucode_cp>>6));
+		buf[1] = 0x80 | (0x3f & ucode_cp);
+		return 2;
+	}
+
+	if (ucode_cp < 0x10000) {
+		buf[0] = 0xe0 | (0x0f & (ucode_cp>>12));
+		buf[1] = 0x80 | (0x3f & (ucode_cp>>6));
+		buf[2] = 0x80 | (0x3f & ucode_cp);
+		return 3;
+	}
+
+	buf[0] = 0xf0 | (0x07 & (ucode_cp>>18));
+	buf[1] = 0x80 | (0x3f & (ucode_cp>>12));
+	buf[2] = 0x80 | (0x3f & (ucode_cp>>6));
+	buf[3] = 0x80 | (0x3f & ucode_cp);
+	return 4;
+}
+
+static unsigned int parse_ucode_esc(struct tst_json_buf *buf, char *str,
+                                    size_t off, size_t len)
+{
+	int32_t ucode = parse_ucode_cp(buf);
+
+	if (ucode < 0)
+		return 0;
+
+	if (!str)
+		return ucode;
+
+	if (utf8_bytes(ucode) + 1 >= len - off) {
+		tst_json_err(buf, "String buffer too short!");
+		return 0;
+	}
+
+	return to_utf8(ucode, str+off);
+}
+
+static int copy_str(struct tst_json_buf *buf, char *str, size_t len)
+{
+	size_t pos = 0;
+	int esc = 0;
+	unsigned int l;
+
+	eatb(buf, '"');
+
+	for (;;) {
+		if (buf_empty(buf)) {
+			tst_json_err(buf, "Unterminated string");
+			return 1;
+		}
+
+		if (!esc && eatb(buf, '"')) {
+			if (str)
+				str[pos] = 0;
+			return 0;
+		}
+
+		char b = getb(buf);
+
+		if (!esc && b == '\\') {
+			esc = 1;
+			continue;
+		}
+
+		if (esc) {
+			switch (b) {
+			case '"':
+			case '\\':
+			case '/':
+			break;
+			case 'b':
+				b = '\b';
+			break;
+			case 'f':
+				b = '\f';
+			break;
+			case 'n':
+				b = '\n';
+			break;
+			case 'r':
+				b = '\r';
+			break;
+			case 't':
+				b = '\t';
+			break;
+			case 'u':
+				if (!(l = parse_ucode_esc(buf, str, pos, len)))
+					return 1;
+				pos += l;
+				b = 0;
+			break;
+			default:
+				tst_json_err(buf, "Invalid escape \\%c", b);
+				return 1;
+			}
+			esc = 0;
+		}
+
+		if (str && b) {
+			if (pos + 1 >= len) {
+				tst_json_err(buf, "String buffer too short!");
+				return 1;
+			}
+
+			str[pos++] = b;
+		}
+	}
+
+	return 1;
+}
+
+static int copy_id_str(struct tst_json_buf *buf, char *str, size_t len)
+{
+	size_t pos = 0;
+
+	if (eatws(buf))
+		goto err0;
+
+	if (!eatb(buf, '"'))
+		goto err0;
+
+	for (;;) {
+		if (buf_empty(buf)) {
+			tst_json_err(buf, "Unterminated ID string");
+			return 1;
+		}
+
+		if (eatb(buf, '"')) {
+			str[pos] = 0;
+			break;
+		}
+
+		if (pos >= len-1) {
+			tst_json_err(buf, "ID string too long");
+			return 1;
+		}
+
+		str[pos++] = getb(buf);
+	}
+
+	if (eatws(buf))
+		goto err1;
+
+	if (!eatb(buf, ':'))
+		goto err1;
+
+	return 0;
+err0:
+	tst_json_err(buf, "Expected ID string");
+	return 1;
+err1:
+	tst_json_err(buf, "Expected ':' after ID string");
+	return 1;
+}
+
+static int is_digit(char b)
+{
+	switch (b) {
+	case '0' ... '9':
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int get_number(struct tst_json_buf *buf, struct tst_json_val *res)
+{
+	long val = 0;
+	int neg = 0;
+
+	if (peekb(buf) == '-') {
+		neg = 1;
+		getb(buf);
+		if (!is_digit(peekb(buf))) {
+			tst_json_err(buf, "Expected digits after '-'");
+			return 1;
+		}
+	}
+
+	while (is_digit(peekb(buf))) {
+		val *= 10;
+		val += getb(buf) - '0';
+		//TODO: overflow?
+	}
+
+	if (neg)
+		val = -val;
+
+	res->val_int = val;
+
+	return 0;
+}
+
+int tst_json_obj_skip(struct tst_json_buf *buf)
+{
+	struct tst_json_val res = {};
+
+	TST_JSON_OBJ_FOREACH(buf, &res) {
+		switch (res.type) {
+		case TST_JSON_OBJ:
+			if (tst_json_obj_skip(buf))
+				return 1;
+		break;
+		case TST_JSON_ARR:
+			if (tst_json_arr_skip(buf))
+				return 1;
+		break;
+		default:
+		break;
+		}
+	}
+
+	return 0;
+}
+
+int tst_json_arr_skip(struct tst_json_buf *buf)
+{
+	struct tst_json_val res = {};
+
+	TST_JSON_ARR_FOREACH(buf, &res) {
+		switch (res.type) {
+		case TST_JSON_OBJ:
+			if (tst_json_obj_skip(buf))
+				return 1;
+		break;
+		case TST_JSON_ARR:
+			if (tst_json_arr_skip(buf))
+				return 1;
+		break;
+		default:
+		break;
+		}
+	}
+
+	return 0;
+}
+
+enum tst_json_type tst_json_next_type(struct tst_json_buf *buf)
+{
+	if (eatws(buf)) {
+		tst_json_err(buf, "Unexpected end");
+		return TST_JSON_VOID;
+	}
+
+	char b = peekb(buf);
+
+	switch (b) {
+	case '{':
+		return TST_JSON_OBJ;
+	case '[':
+		return TST_JSON_ARR;
+	case '"':
+		return TST_JSON_STR;
+	case '-':
+	case '0' ... '9':
+		return TST_JSON_INT;
+	default:
+		tst_json_err(buf, "Expected object, array, number or string");
+		return TST_JSON_VOID;
+	}
+}
+
+enum tst_json_type tst_json_start(struct tst_json_buf *buf)
+{
+	enum tst_json_type type = tst_json_next_type(buf);
+
+	switch (type) {
+	case TST_JSON_ARR:
+	case TST_JSON_OBJ:
+	case TST_JSON_VOID:
+	break;
+	case TST_JSON_INT:
+	case TST_JSON_STR:
+		tst_json_err(buf, "JSON can start only with array or object");
+		type = TST_JSON_VOID;
+	break;
+	}
+
+	return type;
+}
+
+static int get_value(struct tst_json_buf *buf, struct tst_json_val *res)
+{
+	res->type = tst_json_next_type(buf);
+
+	switch (res->type) {
+	case TST_JSON_STR:
+		if (copy_str(buf, res->buf, res->buf_size)) {
+			res->type = TST_JSON_VOID;
+			return 0;
+		}
+		res->val_str = res->buf;
+		return 1;
+	case TST_JSON_INT:
+		return !get_number(buf, res);
+	case TST_JSON_VOID:
+		//tst_json_err(buf, "Unexpected character");
+		return 0;
+	case TST_JSON_ARR:
+	case TST_JSON_OBJ:
+		buf->sub_off = buf->off;
+		return 1;
+	}
+
+	return 1;
+}
+
+static int pre_next(struct tst_json_buf *buf, struct tst_json_val *res)
+{
+	if (!eatb(buf, ',')) {
+		tst_json_err(buf, "Expected ','");
+		res->type = TST_JSON_VOID;
+		return 1;
+	}
+
+	if (eatws(buf)) {
+		tst_json_err(buf, "Unexpected end");
+		res->type = TST_JSON_VOID;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int check_end(struct tst_json_buf *buf, struct tst_json_val *res, char b)
+{
+	if (eatws(buf)) {
+		tst_json_err(buf, "Unexpected end");
+		return 1;
+	}
+
+	if (eatb(buf, b)) {
+		res->type = TST_JSON_VOID;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int obj_next(struct tst_json_buf *buf, struct tst_json_val *res)
+{
+	if (copy_id_str(buf, res->id, sizeof(res->id)))
+		return 0;
+
+	return get_value(buf, res);
+}
+
+static int check_err(struct tst_json_buf *buf, struct tst_json_val *res)
+{
+	if (tst_json_is_err(buf)) {
+		res->type = TST_JSON_VOID;
+		return 1;
+	}
+
+	return 0;
+}
+
+int tst_json_obj_next(struct tst_json_buf *buf, struct tst_json_val *res)
+{
+	if (check_err(buf, res))
+		return 0;
+
+	if (check_end(buf, res, '}'))
+		return 0;
+
+	if (pre_next(buf, res))
+		return 0;
+
+	return obj_next(buf, res);
+}
+
+static int any_first(struct tst_json_buf *buf, char b)
+{
+	if (eatws(buf)) {
+		tst_json_err(buf, "Unexpected end");
+		return 1;
+	}
+
+	if (!eatb(buf, b)) {
+		tst_json_err(buf, "Expected '%c'", b);
+		return 1;
+	}
+
+	return 0;
+}
+
+int tst_json_obj_first(struct tst_json_buf *buf, struct tst_json_val *res)
+{
+	if (check_err(buf, res))
+		return 0;
+
+	if (any_first(buf, '{'))
+		return 1;
+
+	if (check_end(buf, res, '}'))
+		return 0;
+
+	return obj_next(buf, res);
+}
+
+static int arr_next(struct tst_json_buf *buf, struct tst_json_val *res)
+{
+	return get_value(buf, res);
+}
+
+int tst_json_arr_first(struct tst_json_buf *buf, struct tst_json_val *res)
+{
+	if (check_err(buf, res))
+		return 0;
+
+	if (any_first(buf, '['))
+		return 1;
+
+	if (check_end(buf, res, ']'))
+		return 0;
+
+	return arr_next(buf, res);
+}
+
+int tst_json_arr_next(struct tst_json_buf *buf, struct tst_json_val *res)
+{
+	if (check_err(buf, res))
+		return 0;
+
+	if (check_end(buf, res, ']'))
+		return 0;
+
+	if (pre_next(buf, res))
+		return 0;
+
+	return arr_next(buf, res);
+}
+
+void tst_json_err(struct tst_json_buf *buf, const char *fmt, ...)
+{
+	va_list va;
+
+	va_start(va, fmt);
+	vsnprintf(buf->err, TST_JSON_ERR_MAX, fmt, va);
+	va_end(va);
+}
+
+static void print_line(FILE *f, const char *line)
+{
+	while (*line && *line != '\n')
+		fputc(*(line++), f);
+}
+
+static void print_spaces(FILE *f, size_t count)
+{
+	while (count--)
+		fputc(' ', f);
+}
+
+static void print_spaceline(FILE *f, const char *line, size_t count)
+{
+	size_t i;
+
+	for (i = 0; i < count; i++)
+		fputc(line[i] == '\t' ? '\t' : ' ', f);
+}
+
+#define ERR_LINES 10
+
+#define MIN(A, B) ((A < B) ? (A) : (B))
+
+void tst_json_err_print(FILE *f, struct tst_json_buf *buf)
+{
+	ssize_t i;
+	const char *lines[ERR_LINES] = {};
+	size_t cur_line = 0;
+	size_t cur_off = 0;
+	size_t last_off = buf->off;
+
+	for (;;) {
+		lines[(cur_line++) % ERR_LINES] = buf->json + cur_off;
+
+		while (cur_off < buf->len && buf->json[cur_off] != '\n')
+			cur_off++;
+
+		if (cur_off >= buf->off)
+			break;
+
+		cur_off++;
+		last_off = buf->off - cur_off;
+	}
+
+	fprintf(f, "Parse error at line %zu\n\n", cur_line);
+
+	size_t idx = 0;
+
+	for (i = MIN(ERR_LINES, cur_line); i > 0; i--) {
+		idx = (cur_line - i) % ERR_LINES;
+		fprintf(f, "%03zu: ", cur_line - i + 1);
+		print_line(f, lines[idx]);
+		fputc('\n', f);
+	}
+
+	print_spaces(f, 5);
+	print_spaceline(f, lines[idx], last_off);
+	fprintf(f, "^\n");
+	fprintf(f, "%s\n", buf->err);
+}
+
+struct tst_json_buf *tst_json_load(const char *path)
+{
+	int fd = open(path, O_RDONLY);
+	struct tst_json_buf *ret;
+	ssize_t res;
+	off_t len, off = 0;
+
+	if (fd < 0)
+		return NULL;
+
+	len = lseek(fd, 0, SEEK_END);
+	if (len == (off_t)-1) {
+		fprintf(stderr, "lseek() failed\n");
+		goto err0;
+	}
+
+	if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
+		fprintf(stderr, "lseek() failed\n");
+		goto err0;
+	}
+
+	ret = malloc(sizeof(struct tst_json_buf) + len + 1);
+	if (!ret) {
+		fprintf(stderr, "malloc() failed\n");
+		goto err0;
+	}
+
+	memset(ret, 0, sizeof(*ret));
+
+	ret->buf[len] = 0;
+	ret->len = len;
+	ret->json = ret->buf;
+
+	while (off < len) {
+		res = read(fd, ret->buf + off, len - off);
+		if (res < 0) {
+			fprintf(stderr, "read() failed\n");
+			goto err1;
+		}
+
+		off += res;
+	}
+
+	close(fd);
+
+	return ret;
+err1:
+	free(ret);
+err0:
+	close(fd);
+	return NULL;
+}
+
+void tst_json_free(struct tst_json_buf *buf)
+{
+	free(buf);
+}
-- 
2.26.2



More information about the ltp mailing list