Refactor pg_get_line() to expose an alternative StringInfo-based API.

Letting the caller provide a StringInfo to read into is helpful when
the caller needs to merge lines or otherwise modify the data after
it's been read.  Notably, now the code added by commit 8f8154a50
can use pg_get_line_append() instead of having its own copy of that
logic.  A follow-on commit will also make use of this.

Also, since StringInfo buffers are a minimum of 1KB long, blindly
using pg_get_line() in a loop can eat a lot more memory than one would
expect.  I discovered for instance that commit e0f05cd5b caused initdb
to consume circa 10MB to read postgres.bki, even though that's under
1MB worth of data.  A less memory-hungry alternative is to re-use the
same StringInfo for all lines and pg_strdup the results.

Discussion: https://postgr.es/m/1315832.1599345736@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2020-09-06 13:57:10 -04:00
parent 68b603e1a9
commit 8e3c58e6e4
4 changed files with 77 additions and 52 deletions

View File

@ -502,33 +502,8 @@ tokenize_file(const char *filename, FILE *file, List **tok_lines, int elevel)
/* Collect the next input line, handling backslash continuations */
resetStringInfo(&buf);
while (!feof(file) && !ferror(file))
while (pg_get_line_append(file, &buf))
{
/* Make sure there's a reasonable amount of room in the buffer */
enlargeStringInfo(&buf, 128);
/* Read some data, appending it to what we already have */
if (fgets(buf.data + buf.len, buf.maxlen - buf.len, file) == NULL)
{
int save_errno = errno;
if (!ferror(file))
break; /* normal EOF */
/* I/O error! */
ereport(elevel,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", filename)));
err_msg = psprintf("could not read file \"%s\": %s",
filename, strerror(save_errno));
resetStringInfo(&buf);
break;
}
buf.len += strlen(buf.data + buf.len);
/* If we haven't got a whole line, loop to read more */
if (!(buf.len > 0 && buf.data[buf.len - 1] == '\n'))
continue;
/* Strip trailing newline, including \r in case we're on Windows */
buf.len = pg_strip_crlf(buf.data);
@ -551,6 +526,19 @@ tokenize_file(const char *filename, FILE *file, List **tok_lines, int elevel)
break;
}
if (ferror(file))
{
/* I/O error! */
int save_errno = errno;
ereport(elevel,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", filename)));
err_msg = psprintf("could not read file \"%s\": %s",
filename, strerror(save_errno));
break;
}
/* Parse fields */
lineptr = buf.data;
while (*lineptr && err_msg == NULL)

View File

@ -470,9 +470,9 @@ readfile(const char *path)
{
char **result;
FILE *infile;
StringInfoData line;
int maxlines;
int n;
char *ln;
if ((infile = fopen(path, "r")) == NULL)
{
@ -480,11 +480,13 @@ readfile(const char *path)
exit(1);
}
initStringInfo(&line);
maxlines = 1024;
result = (char **) pg_malloc(maxlines * sizeof(char *));
n = 0;
while ((ln = pg_get_line(infile)) != NULL)
while (pg_get_line_append(infile, &line))
{
/* make sure there will be room for a trailing NULL pointer */
if (n >= maxlines - 1)
@ -493,10 +495,14 @@ readfile(const char *path)
result = (char **) pg_realloc(result, maxlines * sizeof(char *));
}
result[n++] = ln;
result[n++] = pg_strdup(line.data);
resetStringInfo(&line);
}
result[n] = NULL;
pfree(line.data);
fclose(infile);
return result;

View File

@ -41,6 +41,11 @@
* Note that while I/O errors are reflected back to the caller to be
* dealt with, an OOM condition for the palloc'd buffer will not be;
* there'll be an ereport(ERROR) or exit(1) inside stringinfo.c.
*
* Also note that the palloc'd buffer is usually a lot longer than
* strictly necessary, so it may be inadvisable to use this function
* to collect lots of long-lived data. A less memory-hungry option
* is to use pg_get_line_append() in a loop, then pstrdup() each line.
*/
char *
pg_get_line(FILE *stream)
@ -49,21 +54,7 @@ pg_get_line(FILE *stream)
initStringInfo(&buf);
/* Read some data, appending it to whatever we already have */
while (fgets(buf.data + buf.len, buf.maxlen - buf.len, stream) != NULL)
{
buf.len += strlen(buf.data + buf.len);
/* Done if we have collected a newline */
if (buf.len > 0 && buf.data[buf.len - 1] == '\n')
return buf.data;
/* Make some more room in the buffer, and loop to read more data */
enlargeStringInfo(&buf, 128);
}
/* Did fgets() fail because of an I/O error? */
if (ferror(stream))
if (!pg_get_line_append(stream, &buf))
{
/* ensure that free() doesn't mess up errno */
int save_errno = errno;
@ -73,13 +64,50 @@ pg_get_line(FILE *stream)
return NULL;
}
/* If we read no data before reaching EOF, we should return NULL */
if (buf.len == 0)
{
pfree(buf.data);
return NULL;
}
/* No newline at EOF ... so return what we have */
return buf.data;
}
/*
* pg_get_line_append()
*
* This has similar behavior to pg_get_line(), and thence to fgets(),
* except that the collected data is appended to whatever is in *buf.
*
* Returns true if a line was successfully collected (including the
* case of a non-newline-terminated line at EOF). Returns false if
* there was an I/O error or no data was available before EOF.
* (Check ferror(stream) to distinguish these cases.)
*
* In the false-result case, the contents of *buf are logically unmodified,
* though it's possible that the buffer has been resized.
*/
bool
pg_get_line_append(FILE *stream, StringInfo buf)
{
int orig_len = buf->len;
/* Read some data, appending it to whatever we already have */
while (fgets(buf->data + buf->len, buf->maxlen - buf->len, stream) != NULL)
{
buf->len += strlen(buf->data + buf->len);
/* Done if we have collected a newline */
if (buf->len > orig_len && buf->data[buf->len - 1] == '\n')
return true;
/* Make some more room in the buffer, and loop to read more data */
enlargeStringInfo(buf, 128);
}
/* Check for I/O errors and EOF */
if (ferror(stream) || buf->len == orig_len)
{
/* Discard any data we collected before detecting error */
buf->len = orig_len;
buf->data[orig_len] = '\0';
return false;
}
/* No newline at EOF, but we did collect some data */
return true;
}

View File

@ -10,6 +10,8 @@
#ifndef COMMON_STRING_H
#define COMMON_STRING_H
struct StringInfoData; /* avoid including stringinfo.h here */
/* functions in src/common/string.c */
extern bool pg_str_endswith(const char *str, const char *end);
extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr,
@ -19,6 +21,7 @@ extern int pg_strip_crlf(char *str);
/* functions in src/common/pg_get_line.c */
extern char *pg_get_line(FILE *stream);
extern bool pg_get_line_append(FILE *stream, struct StringInfoData *buf);
/* functions in src/common/sprompt.c */
extern char *simple_prompt(const char *prompt, bool echo);