mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-10-05 17:26:56 +02:00
ca3b37487b
Backpatch-through: 9.5
147 lines
3.5 KiB
C
147 lines
3.5 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* ts_utils.c
|
|
* various support functions
|
|
*
|
|
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/tsearch/ts_utils.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include <ctype.h>
|
|
|
|
#include "miscadmin.h"
|
|
#include "tsearch/ts_locale.h"
|
|
#include "tsearch/ts_utils.h"
|
|
|
|
|
|
/*
|
|
* Given the base name and extension of a tsearch config file, return
|
|
* its full path name. The base name is assumed to be user-supplied,
|
|
* and is checked to prevent pathname attacks. The extension is assumed
|
|
* to be safe.
|
|
*
|
|
* The result is a palloc'd string.
|
|
*/
|
|
char *
|
|
get_tsearch_config_filename(const char *basename,
|
|
const char *extension)
|
|
{
|
|
char sharepath[MAXPGPATH];
|
|
char *result;
|
|
|
|
/*
|
|
* We limit the basename to contain a-z, 0-9, and underscores. This may
|
|
* be overly restrictive, but we don't want to allow access to anything
|
|
* outside the tsearch_data directory, so for instance '/' *must* be
|
|
* rejected, and on some platforms '\' and ':' are risky as well. Allowing
|
|
* uppercase might result in incompatible behavior between case-sensitive
|
|
* and case-insensitive filesystems, and non-ASCII characters create other
|
|
* interesting risks, so on the whole a tight policy seems best.
|
|
*/
|
|
if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid text search configuration file name \"%s\"",
|
|
basename)));
|
|
|
|
get_share_path(my_exec_path, sharepath);
|
|
result = palloc(MAXPGPATH);
|
|
snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
|
|
sharepath, basename, extension);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Reads a stop-word file. Each word is run through 'wordop'
|
|
* function, if given. wordop may either modify the input in-place,
|
|
* or palloc a new version.
|
|
*/
|
|
void
|
|
readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
|
|
{
|
|
char **stop = NULL;
|
|
|
|
s->len = 0;
|
|
if (fname && *fname)
|
|
{
|
|
char *filename = get_tsearch_config_filename(fname, "stop");
|
|
tsearch_readline_state trst;
|
|
char *line;
|
|
int reallen = 0;
|
|
|
|
if (!tsearch_readline_begin(&trst, filename))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
errmsg("could not open stop-word file \"%s\": %m",
|
|
filename)));
|
|
|
|
while ((line = tsearch_readline(&trst)) != NULL)
|
|
{
|
|
char *pbuf = line;
|
|
|
|
/* Trim trailing space */
|
|
while (*pbuf && !t_isspace(pbuf))
|
|
pbuf += pg_mblen(pbuf);
|
|
*pbuf = '\0';
|
|
|
|
/* Skip empty lines */
|
|
if (*line == '\0')
|
|
{
|
|
pfree(line);
|
|
continue;
|
|
}
|
|
|
|
if (s->len >= reallen)
|
|
{
|
|
if (reallen == 0)
|
|
{
|
|
reallen = 64;
|
|
stop = (char **) palloc(sizeof(char *) * reallen);
|
|
}
|
|
else
|
|
{
|
|
reallen *= 2;
|
|
stop = (char **) repalloc((void *) stop,
|
|
sizeof(char *) * reallen);
|
|
}
|
|
}
|
|
|
|
if (wordop)
|
|
{
|
|
stop[s->len] = wordop(line);
|
|
if (stop[s->len] != line)
|
|
pfree(line);
|
|
}
|
|
else
|
|
stop[s->len] = line;
|
|
|
|
(s->len)++;
|
|
}
|
|
|
|
tsearch_readline_end(&trst);
|
|
pfree(filename);
|
|
}
|
|
|
|
s->stop = stop;
|
|
|
|
/* Sort to allow binary searching */
|
|
if (s->stop && s->len > 0)
|
|
qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp);
|
|
}
|
|
|
|
bool
|
|
searchstoplist(StopList *s, char *key)
|
|
{
|
|
return (s->stop && s->len > 0 &&
|
|
bsearch(&key, s->stop, s->len,
|
|
sizeof(char *), pg_qsort_strcmp)) ? true : false;
|
|
}
|