From 42fa4b660143b66bea1fb90793ec90054e170c93 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Tue, 9 Apr 2024 15:30:48 -0400 Subject: [PATCH] Assorted minor cleanups in the test_json_parser module Per gripes from Michael Paquier Discussion: https://postgr.es/m/ZhTQ6_w1vwOhqTQI@paquier.xyz Along the way, also clean up a handful of typos in 3311ea86ed and ea7b4e9a2a, found by Alexander Lakhin, and a couple of stylistic snafus noted by Daniel Westermann and Daniel Gustafsson. --- src/backend/backup/basebackup_incremental.c | 6 ++-- src/common/jsonapi.c | 6 ++-- src/common/parse_manifest.c | 2 +- src/test/modules/test_json_parser/README | 19 ++++++------- .../test_json_parser_incremental.c | 28 ++++++++++++++----- .../test_json_parser/test_json_parser_perf.c | 11 ++++---- 6 files changed, 43 insertions(+), 29 deletions(-) diff --git a/src/backend/backup/basebackup_incremental.c b/src/backend/backup/basebackup_incremental.c index 330a229401..4ad9d7cad0 100644 --- a/src/backend/backup/basebackup_incremental.c +++ b/src/backend/backup/basebackup_incremental.c @@ -34,7 +34,7 @@ #define BLOCKS_PER_READ 512 /* - * we expect the find the last lines of the manifest, including the checksum, + * We expect to find the last lines of the manifest, including the checksum, * in the last MIN_CHUNK bytes of the manifest. We trigger an incremental * parse step if we are about to overflow MAX_CHUNK bytes. */ @@ -88,8 +88,8 @@ struct IncrementalBackupInfo * Files extracted from the backup manifest. * * We don't really need this information, because we use WAL summaries to - * figure what's changed. It would be unsafe to just rely on the list of - * files that existed before, because it's possible for a file to be + * figure out what's changed. It would be unsafe to just rely on the list + * of files that existed before, because it's possible for a file to be * removed and a new one created with the same name and different * contents. In such cases, the whole file must still be sent. We can tell * from the WAL summaries whether that happened, but not from the file diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index 9dfbc397c0..12fabcaccf 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -356,7 +356,7 @@ makeJsonLexContextCstringLen(JsonLexContext *lex, char *json, * need explicit stacks for predictions, field names and null indicators, but * we don't need the input, that will be handed in bit by bit to the * parse routine. We also need an accumulator for partial tokens in case - * the boundary between chunks happns to fall in the middle of a token. + * the boundary between chunks happens to fall in the middle of a token. */ #define JS_STACK_CHUNK_SIZE 64 #define JS_MAX_PROD_LEN 10 /* more than we need */ @@ -1414,9 +1414,9 @@ json_lex(JsonLexContext *lex) } /* - * Add any remaining alpha_numeric chars. This takes care of the + * Add any remaining alphanumeric chars. This takes care of the * {null, false, true} literals as well as any trailing - * alpha-numeric junk on non-string tokens. + * alphanumeric junk on non-string tokens. */ for (int i = added; i < lex->input_length; i++) { diff --git a/src/common/parse_manifest.c b/src/common/parse_manifest.c index a94e3d6b15..821fba3967 100644 --- a/src/common/parse_manifest.c +++ b/src/common/parse_manifest.c @@ -806,7 +806,7 @@ json_manifest_finalize_wal_range(JsonManifestParseState *parse) * the rest of the file. * * For an incremental parse, this will just be called on the last chunk of the - * manifest, and the cryptohash context paswed in. For a non-incremental + * manifest, and the cryptohash context passed in. For a non-incremental * parse incr_ctx will be NULL. */ static void diff --git a/src/test/modules/test_json_parser/README b/src/test/modules/test_json_parser/README index 7e410db24b..ceccd499f4 100644 --- a/src/test/modules/test_json_parser/README +++ b/src/test/modules/test_json_parser/README @@ -4,22 +4,21 @@ Module `test_json_parser` This module contains two programs for testing the json parsers. - `test_json_parser_incremental` is for testing the incremental parser, It - reads in a file and pases it in very small chunks (60 bytes at a time) to - the incremental parser. It's not meant to be a speed test but to test the - accuracy of the incremental parser. It takes one argument: the name of the - input file. + reads in a file and passes it in very small chunks (default is 60 bytes at a + time) to the incremental parser. It's not meant to be a speed test but to + test the accuracy of the incremental parser. There are two option arguments, + "-c nn" specifies an alternative chunk size, and "-s" specifies using + semantic routines. The semantic routines re-output the json, although not in + a very pretty form. The required non-option argument is the input file name. - `test_json_parser_perf` is for speed testing both the standard recursive descent parser and the non-recursive incremental parser. If given the `-i` flag it uses the non-recursive parser, - otherwise the stardard parser. The remaining flags are the number of + otherwise the standard parser. The remaining flags are the number of parsing iterations and the file containing the input. Even when using the non-recursive parser, the input is passed to the parser in a single chunk. The results are thus comparable to those of the standard parser. -The easiest way to use these is to run `make check` and `make speed-check` - -The sample input file is a small extract from a list of `delicious` +The sample input file is a small, sanitized extract from a list of `delicious` bookmarks taken some years ago, all wrapped in a single json -array. 10,000 iterations of parsing this file gives a reasonable -benchmark, and that is what the `speed-check` target does. +array. diff --git a/src/test/modules/test_json_parser/test_json_parser_incremental.c b/src/test/modules/test_json_parser/test_json_parser_incremental.c index c28db05647..835cdc9efd 100644 --- a/src/test/modules/test_json_parser/test_json_parser_incremental.c +++ b/src/test/modules/test_json_parser/test_json_parser_incremental.c @@ -3,16 +3,17 @@ * test_json_parser_incremental.c * Test program for incremental JSON parser * - * Copyright (c) 2023, PostgreSQL Global Development Group + * Copyright (c) 2024, PostgreSQL Global Development Group * * IDENTIFICATION * src/test/modules/test_json_parser/test_json_parser_incremental.c * - * This progam tests incremental parsing of json. The input is fed into + * This program tests incremental parsing of json. The input is fed into * the parser in very small chunks. In practice you would normally use * much larger chunks, but doing this makes it more likely that the - * full range of incement handling, especially in the lexer, is exercised. - * If the "-c SIZE" option is provided, that chunk size is used instead. + * full range of increment handling, especially in the lexer, is exercised. + * If the "-c SIZE" option is provided, that chunk size is used instead + * of the default of 60. * * The argument specifies the file containing the JSON input. * @@ -31,6 +32,9 @@ #include "mb/pg_wchar.h" #include "pg_getopt.h" +#define BUFSIZE 6000 +#define DEFAULT_CHUNK_SIZE 60 + typedef struct DoState { JsonLexContext *lex; @@ -67,14 +71,13 @@ JsonSemAction sem = { int main(int argc, char **argv) { - /* max delicious line length is less than this */ - char buff[6001]; + char buff[BUFSIZE]; FILE *json_file; JsonParseErrorType result; JsonLexContext lex; StringInfoData json; int n_read; - size_t chunk_size = 60; + size_t chunk_size = DEFAULT_CHUNK_SIZE; struct stat statbuf; off_t bytes_left; JsonSemAction *testsem = &nullSemAction; @@ -88,6 +91,11 @@ main(int argc, char **argv) { case 'c': /* chunksize */ sscanf(optarg, "%zu", &chunk_size); + if (chunk_size > BUFSIZE) + { + fprintf(stderr, "chunk size cannot exceed %d\n", BUFSIZE); + exit(1); + } break; case 's': /* do semantic processing */ testsem = &sem; @@ -121,6 +129,12 @@ main(int argc, char **argv) { n_read = fread(buff, 1, chunk_size, json_file); appendBinaryStringInfo(&json, buff, n_read); + + /* + * Append some trailing junk to the buffer passed to the parser. This + * helps us ensure that the parser does the right thing even if the + * chunk isn't terminated with a '\0'. + */ appendStringInfoString(&json, "1+23 trailing junk"); bytes_left -= n_read; if (bytes_left > 0) diff --git a/src/test/modules/test_json_parser/test_json_parser_perf.c b/src/test/modules/test_json_parser/test_json_parser_perf.c index 517dc8529a..c463046848 100644 --- a/src/test/modules/test_json_parser/test_json_parser_perf.c +++ b/src/test/modules/test_json_parser/test_json_parser_perf.c @@ -1,14 +1,14 @@ /*------------------------------------------------------------------------- * * test_json_parser_perf.c - * Performancet est program for both flavors of the JSON parser + * Performance test program for both flavors of the JSON parser * - * Copyright (c) 2023, PostgreSQL Global Development Group + * Copyright (c) 2024, PostgreSQL Global Development Group * * IDENTIFICATION * src/test/modules/test_json_parser/test_json_parser_perf.c * - * This progam tests either the standard (recursive descent) JSON parser + * This program tests either the standard (recursive descent) JSON parser * or the incremental (table driven) parser, but without breaking the input * into chunks in the latter case. Thus it can be used to compare the pure * parsing speed of the two parsers. If the "-i" option is used, then the @@ -28,11 +28,12 @@ #include #include +#define BUFSIZE 6000 + int main(int argc, char **argv) { - /* max delicious line length is less than this */ - char buff[6001]; + char buff[BUFSIZE]; FILE *json_file; JsonParseErrorType result; JsonLexContext *lex;