/*------------------------------------------------------------------------- * * test_json_parser_incremental.c * Test program for incremental JSON parser * * Copyright (c) 2024, PostgreSQL Global Development Group * * IDENTIFICATION * src/test/modules/test_json_parser/test_json_parser_incremental.c * * This program tests incremental parsing of json. The input is fed into * the parser in very small chunks. In practice you would normally use * much larger chunks, but doing this makes it more likely that the * full range of increment handling, especially in the lexer, is exercised. * If the "-c SIZE" option is provided, that chunk size is used instead * of the default of 60. * * If the -s flag is given, the program does semantic processing. This should * just mirror back the json, albeit with white space changes. * * The argument specifies the file containing the JSON input. * *------------------------------------------------------------------------- */ #include "postgres_fe.h" #include #include #include #include #include "common/jsonapi.h" #include "common/logging.h" #include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "pg_getopt.h" #define BUFSIZE 6000 #define DEFAULT_CHUNK_SIZE 60 typedef struct DoState { JsonLexContext *lex; bool elem_is_first; StringInfo buf; } DoState; static void usage(const char *progname); static void escape_json(StringInfo buf, const char *str); /* semantic action functions for parser */ static JsonParseErrorType do_object_start(void *state); static JsonParseErrorType do_object_end(void *state); static JsonParseErrorType do_object_field_start(void *state, char *fname, bool isnull); static JsonParseErrorType do_object_field_end(void *state, char *fname, bool isnull); static JsonParseErrorType do_array_start(void *state); static JsonParseErrorType do_array_end(void *state); static JsonParseErrorType do_array_element_start(void *state, bool isnull); static JsonParseErrorType do_array_element_end(void *state, bool isnull); static JsonParseErrorType do_scalar(void *state, char *token, JsonTokenType tokentype); JsonSemAction sem = { .object_start = do_object_start, .object_end = do_object_end, .object_field_start = do_object_field_start, .object_field_end = do_object_field_end, .array_start = do_array_start, .array_end = do_array_end, .array_element_start = do_array_element_start, .array_element_end = do_array_element_end, .scalar = do_scalar }; int main(int argc, char **argv) { char buff[BUFSIZE]; FILE *json_file; JsonParseErrorType result; JsonLexContext lex; StringInfoData json; int n_read; size_t chunk_size = DEFAULT_CHUNK_SIZE; struct stat statbuf; off_t bytes_left; JsonSemAction *testsem = &nullSemAction; char *testfile; int c; bool need_strings = false; pg_logging_init(argv[0]); while ((c = getopt(argc, argv, "c:s")) != -1) { switch (c) { case 'c': /* chunksize */ sscanf(optarg, "%zu", &chunk_size); if (chunk_size > BUFSIZE) pg_fatal("chunk size cannot exceed %d", BUFSIZE); break; case 's': /* do semantic processing */ testsem = &sem; sem.semstate = palloc(sizeof(struct DoState)); ((struct DoState *) sem.semstate)->lex = &lex; ((struct DoState *) sem.semstate)->buf = makeStringInfo(); need_strings = true; break; } } if (optind < argc) { testfile = pg_strdup(argv[optind]); optind++; } else { usage(argv[0]); exit(1); } makeJsonLexContextIncremental(&lex, PG_UTF8, need_strings); initStringInfo(&json); if ((json_file = fopen(testfile, "r")) == NULL) pg_fatal("error opening input: %m"); if (fstat(fileno(json_file), &statbuf) != 0) pg_fatal("error statting input: %m"); bytes_left = statbuf.st_size; for (;;) { /* We will break when there's nothing left to read */ if (bytes_left < chunk_size) chunk_size = bytes_left; n_read = fread(buff, 1, chunk_size, json_file); if (n_read < chunk_size) pg_fatal("error reading input file: %d", ferror(json_file)); appendBinaryStringInfo(&json, buff, n_read); /* * Append some trailing junk to the buffer passed to the parser. This * helps us ensure that the parser does the right thing even if the * chunk isn't terminated with a '\0'. */ appendStringInfoString(&json, "1+23 trailing junk"); bytes_left -= n_read; if (bytes_left > 0) { result = pg_parse_json_incremental(&lex, testsem, json.data, n_read, false); if (result != JSON_INCOMPLETE) { fprintf(stderr, "%s\n", json_errdetail(result, &lex)); exit(1); } resetStringInfo(&json); } else { result = pg_parse_json_incremental(&lex, testsem, json.data, n_read, true); if (result != JSON_SUCCESS) { fprintf(stderr, "%s\n", json_errdetail(result, &lex)); exit(1); } if (!need_strings) printf("SUCCESS!\n"); break; } } fclose(json_file); exit(0); } /* * The semantic routines here essentially just output the same json, except * for white space. We could pretty print it but there's no need for our * purposes. The result should be able to be fed to any JSON processor * such as jq for validation. */ static JsonParseErrorType do_object_start(void *state) { DoState *_state = (DoState *) state; printf("{\n"); _state->elem_is_first = true; return JSON_SUCCESS; } static JsonParseErrorType do_object_end(void *state) { DoState *_state = (DoState *) state; printf("\n}\n"); _state->elem_is_first = false; return JSON_SUCCESS; } static JsonParseErrorType do_object_field_start(void *state, char *fname, bool isnull) { DoState *_state = (DoState *) state; if (!_state->elem_is_first) printf(",\n"); resetStringInfo(_state->buf); escape_json(_state->buf, fname); printf("%s: ", _state->buf->data); _state->elem_is_first = false; return JSON_SUCCESS; } static JsonParseErrorType do_object_field_end(void *state, char *fname, bool isnull) { /* nothing to do really */ return JSON_SUCCESS; } static JsonParseErrorType do_array_start(void *state) { DoState *_state = (DoState *) state; printf("[\n"); _state->elem_is_first = true; return JSON_SUCCESS; } static JsonParseErrorType do_array_end(void *state) { DoState *_state = (DoState *) state; printf("\n]\n"); _state->elem_is_first = false; return JSON_SUCCESS; } static JsonParseErrorType do_array_element_start(void *state, bool isnull) { DoState *_state = (DoState *) state; if (!_state->elem_is_first) printf(",\n"); _state->elem_is_first = false; return JSON_SUCCESS; } static JsonParseErrorType do_array_element_end(void *state, bool isnull) { /* nothing to do */ return JSON_SUCCESS; } static JsonParseErrorType do_scalar(void *state, char *token, JsonTokenType tokentype) { DoState *_state = (DoState *) state; if (tokentype == JSON_TOKEN_STRING) { resetStringInfo(_state->buf); escape_json(_state->buf, token); printf("%s", _state->buf->data); } else printf("%s", token); return JSON_SUCCESS; } /* copied from backend code */ static void escape_json(StringInfo buf, const char *str) { const char *p; appendStringInfoCharMacro(buf, '"'); for (p = str; *p; p++) { switch (*p) { case '\b': appendStringInfoString(buf, "\\b"); break; case '\f': appendStringInfoString(buf, "\\f"); break; case '\n': appendStringInfoString(buf, "\\n"); break; case '\r': appendStringInfoString(buf, "\\r"); break; case '\t': appendStringInfoString(buf, "\\t"); break; case '"': appendStringInfoString(buf, "\\\""); break; case '\\': appendStringInfoString(buf, "\\\\"); break; default: if ((unsigned char) *p < ' ') appendStringInfo(buf, "\\u%04x", (int) *p); else appendStringInfoCharMacro(buf, *p); break; } } appendStringInfoCharMacro(buf, '"'); } static void usage(const char *progname) { fprintf(stderr, "Usage: %s [OPTION ...] testfile\n", progname); fprintf(stderr, "Options:\n"); fprintf(stderr, " -c chunksize size of piece fed to parser (default 64)n"); fprintf(stderr, " -s do semantic processing\n"); }