From 8e3c58e6e459b285d37edb6129e412ed25cd90c1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 6 Sep 2020 13:57:10 -0400 Subject: [PATCH] Refactor pg_get_line() to expose an alternative StringInfo-based API. Letting the caller provide a StringInfo to read into is helpful when the caller needs to merge lines or otherwise modify the data after it's been read. Notably, now the code added by commit 8f8154a50 can use pg_get_line_append() instead of having its own copy of that logic. A follow-on commit will also make use of this. Also, since StringInfo buffers are a minimum of 1KB long, blindly using pg_get_line() in a loop can eat a lot more memory than one would expect. I discovered for instance that commit e0f05cd5b caused initdb to consume circa 10MB to read postgres.bki, even though that's under 1MB worth of data. A less memory-hungry alternative is to re-use the same StringInfo for all lines and pg_strdup the results. Discussion: https://postgr.es/m/1315832.1599345736@sss.pgh.pa.us --- src/backend/libpq/hba.c | 40 +++++++------------- src/bin/initdb/initdb.c | 12 ++++-- src/common/pg_get_line.c | 74 +++++++++++++++++++++++++------------ src/include/common/string.h | 3 ++ 4 files changed, 77 insertions(+), 52 deletions(-) diff --git a/src/backend/libpq/hba.c b/src/backend/libpq/hba.c index 5991a21cf2..9f106653f3 100644 --- a/src/backend/libpq/hba.c +++ b/src/backend/libpq/hba.c @@ -502,33 +502,8 @@ tokenize_file(const char *filename, FILE *file, List **tok_lines, int elevel) /* Collect the next input line, handling backslash continuations */ resetStringInfo(&buf); - while (!feof(file) && !ferror(file)) + while (pg_get_line_append(file, &buf)) { - /* Make sure there's a reasonable amount of room in the buffer */ - enlargeStringInfo(&buf, 128); - - /* Read some data, appending it to what we already have */ - if (fgets(buf.data + buf.len, buf.maxlen - buf.len, file) == NULL) - { - int save_errno = errno; - - if (!ferror(file)) - break; /* normal EOF */ - /* I/O error! */ - ereport(elevel, - (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", filename))); - err_msg = psprintf("could not read file \"%s\": %s", - filename, strerror(save_errno)); - resetStringInfo(&buf); - break; - } - buf.len += strlen(buf.data + buf.len); - - /* If we haven't got a whole line, loop to read more */ - if (!(buf.len > 0 && buf.data[buf.len - 1] == '\n')) - continue; - /* Strip trailing newline, including \r in case we're on Windows */ buf.len = pg_strip_crlf(buf.data); @@ -551,6 +526,19 @@ tokenize_file(const char *filename, FILE *file, List **tok_lines, int elevel) break; } + if (ferror(file)) + { + /* I/O error! */ + int save_errno = errno; + + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + err_msg = psprintf("could not read file \"%s\": %s", + filename, strerror(save_errno)); + break; + } + /* Parse fields */ lineptr = buf.data; while (*lineptr && err_msg == NULL) diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 861b8817b9..37e0d7ceab 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -470,9 +470,9 @@ readfile(const char *path) { char **result; FILE *infile; + StringInfoData line; int maxlines; int n; - char *ln; if ((infile = fopen(path, "r")) == NULL) { @@ -480,11 +480,13 @@ readfile(const char *path) exit(1); } + initStringInfo(&line); + maxlines = 1024; result = (char **) pg_malloc(maxlines * sizeof(char *)); n = 0; - while ((ln = pg_get_line(infile)) != NULL) + while (pg_get_line_append(infile, &line)) { /* make sure there will be room for a trailing NULL pointer */ if (n >= maxlines - 1) @@ -493,10 +495,14 @@ readfile(const char *path) result = (char **) pg_realloc(result, maxlines * sizeof(char *)); } - result[n++] = ln; + result[n++] = pg_strdup(line.data); + + resetStringInfo(&line); } result[n] = NULL; + pfree(line.data); + fclose(infile); return result; diff --git a/src/common/pg_get_line.c b/src/common/pg_get_line.c index 38433675d4..2fb8e19893 100644 --- a/src/common/pg_get_line.c +++ b/src/common/pg_get_line.c @@ -41,6 +41,11 @@ * Note that while I/O errors are reflected back to the caller to be * dealt with, an OOM condition for the palloc'd buffer will not be; * there'll be an ereport(ERROR) or exit(1) inside stringinfo.c. + * + * Also note that the palloc'd buffer is usually a lot longer than + * strictly necessary, so it may be inadvisable to use this function + * to collect lots of long-lived data. A less memory-hungry option + * is to use pg_get_line_append() in a loop, then pstrdup() each line. */ char * pg_get_line(FILE *stream) @@ -49,21 +54,7 @@ pg_get_line(FILE *stream) initStringInfo(&buf); - /* Read some data, appending it to whatever we already have */ - while (fgets(buf.data + buf.len, buf.maxlen - buf.len, stream) != NULL) - { - buf.len += strlen(buf.data + buf.len); - - /* Done if we have collected a newline */ - if (buf.len > 0 && buf.data[buf.len - 1] == '\n') - return buf.data; - - /* Make some more room in the buffer, and loop to read more data */ - enlargeStringInfo(&buf, 128); - } - - /* Did fgets() fail because of an I/O error? */ - if (ferror(stream)) + if (!pg_get_line_append(stream, &buf)) { /* ensure that free() doesn't mess up errno */ int save_errno = errno; @@ -73,13 +64,50 @@ pg_get_line(FILE *stream) return NULL; } - /* If we read no data before reaching EOF, we should return NULL */ - if (buf.len == 0) - { - pfree(buf.data); - return NULL; - } - - /* No newline at EOF ... so return what we have */ return buf.data; } + +/* + * pg_get_line_append() + * + * This has similar behavior to pg_get_line(), and thence to fgets(), + * except that the collected data is appended to whatever is in *buf. + * + * Returns true if a line was successfully collected (including the + * case of a non-newline-terminated line at EOF). Returns false if + * there was an I/O error or no data was available before EOF. + * (Check ferror(stream) to distinguish these cases.) + * + * In the false-result case, the contents of *buf are logically unmodified, + * though it's possible that the buffer has been resized. + */ +bool +pg_get_line_append(FILE *stream, StringInfo buf) +{ + int orig_len = buf->len; + + /* Read some data, appending it to whatever we already have */ + while (fgets(buf->data + buf->len, buf->maxlen - buf->len, stream) != NULL) + { + buf->len += strlen(buf->data + buf->len); + + /* Done if we have collected a newline */ + if (buf->len > orig_len && buf->data[buf->len - 1] == '\n') + return true; + + /* Make some more room in the buffer, and loop to read more data */ + enlargeStringInfo(buf, 128); + } + + /* Check for I/O errors and EOF */ + if (ferror(stream) || buf->len == orig_len) + { + /* Discard any data we collected before detecting error */ + buf->len = orig_len; + buf->data[orig_len] = '\0'; + return false; + } + + /* No newline at EOF, but we did collect some data */ + return true; +} diff --git a/src/include/common/string.h b/src/include/common/string.h index 18aa1dc5aa..50c241a811 100644 --- a/src/include/common/string.h +++ b/src/include/common/string.h @@ -10,6 +10,8 @@ #ifndef COMMON_STRING_H #define COMMON_STRING_H +struct StringInfoData; /* avoid including stringinfo.h here */ + /* functions in src/common/string.c */ extern bool pg_str_endswith(const char *str, const char *end); extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr, @@ -19,6 +21,7 @@ extern int pg_strip_crlf(char *str); /* functions in src/common/pg_get_line.c */ extern char *pg_get_line(FILE *stream); +extern bool pg_get_line_append(FILE *stream, struct StringInfoData *buf); /* functions in src/common/sprompt.c */ extern char *simple_prompt(const char *prompt, bool echo);