postgresql/contrib/hstore/hstore_io.c
2006-10-04 00:30:14 +00:00

474 lines
8.9 KiB
C

#include "hstore.h"
#include <ctype.h>
PG_MODULE_MAGIC;
typedef struct
{
char *begin;
char *ptr;
char *cur;
char *word;
int wordlen;
Pairs *pairs;
int pcur;
int plen;
} HSParser;
#define RESIZEPRSBUF \
do { \
if ( state->cur - state->word + 1 >= state->wordlen ) \
{ \
int4 clen = state->cur - state->word; \
state->wordlen *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->wordlen ); \
state->cur = state->word + clen; \
} \
} while (0)
#define GV_WAITVAL 0
#define GV_INVAL 1
#define GV_INESCVAL 2
#define GV_WAITESCIN 3
#define GV_WAITESCESCIN 4
static bool
get_val(HSParser * state, bool ignoreeq, bool *escaped)
{
int st = GV_WAITVAL;
state->wordlen = 32;
state->cur = state->word = palloc(state->wordlen);
*escaped = false;
while (1)
{
if (st == GV_WAITVAL)
{
if (*(state->ptr) == '"')
{
*escaped = true;
st = GV_INESCVAL;
}
else if (*(state->ptr) == '\0')
{
return false;
}
else if (*(state->ptr) == '=' && !ignoreeq)
{
elog(ERROR, "Syntax error near '%c' at postion %d", *(state->ptr), (int4) (state->ptr - state->begin));
}
else if (*(state->ptr) == '\\')
{
st = GV_WAITESCIN;
}
else if (!isspace((unsigned char) *(state->ptr)))
{
*(state->cur) = *(state->ptr);
state->cur++;
st = GV_INVAL;
}
}
else if (st == GV_INVAL)
{
if (*(state->ptr) == '\\')
{
st = GV_WAITESCIN;
}
else if (*(state->ptr) == '=' && !ignoreeq)
{
state->ptr--;
return true;
}
else if (*(state->ptr) == ',' && ignoreeq)
{
state->ptr--;
return true;
}
else if (isspace((unsigned char) *(state->ptr)))
{
return true;
}
else if (*(state->ptr) == '\0')
{
state->ptr--;
return true;
}
else
{
RESIZEPRSBUF;
*(state->cur) = *(state->ptr);
state->cur++;
}
}
else if (st == GV_INESCVAL)
{
if (*(state->ptr) == '\\')
{
st = GV_WAITESCESCIN;
}
else if (*(state->ptr) == '"')
{
return true;
}
else if (*(state->ptr) == '\0')
{
elog(ERROR, "Unexpected end of string");
}
else
{
RESIZEPRSBUF;
*(state->cur) = *(state->ptr);
state->cur++;
}
}
else if (st == GV_WAITESCIN)
{
if (*(state->ptr) == '\0')
elog(ERROR, "Unexpected end of string");
RESIZEPRSBUF;
*(state->cur) = *(state->ptr);
state->cur++;
st = GV_INVAL;
}
else if (st == GV_WAITESCESCIN)
{
if (*(state->ptr) == '\0')
elog(ERROR, "Unexpected end of string");
RESIZEPRSBUF;
*(state->cur) = *(state->ptr);
state->cur++;
st = GV_INESCVAL;
}
else
elog(ERROR, "Unknown state %d at postion line %d in file '%s'", st, __LINE__, __FILE__);
state->ptr++;
}
return false;
}
#define WKEY 0
#define WVAL 1
#define WEQ 2
#define WGT 3
#define WDEL 4
static void
parse_hstore(HSParser * state)
{
int st = WKEY;
bool escaped = false;
state->plen = 16;
state->pairs = (Pairs *) palloc(sizeof(Pairs) * state->plen);
state->pcur = 0;
state->ptr = state->begin;
state->word = NULL;
while (1)
{
if (st == WKEY)
{
if (!get_val(state, false, &escaped))
return;
if (state->pcur >= state->plen)
{
state->plen *= 2;
state->pairs = (Pairs *) repalloc(state->pairs, sizeof(Pairs) * state->plen);
}
state->pairs[state->pcur].key = state->word;
state->pairs[state->pcur].keylen = state->cur - state->word;
state->pairs[state->pcur].val = NULL;
state->word = NULL;
st = WEQ;
}
else if (st == WEQ)
{
if (*(state->ptr) == '=')
{
st = WGT;
}
else if (*(state->ptr) == '\0')
{
elog(ERROR, "Unexpectd end of string");
}
else if (!isspace((unsigned char) *(state->ptr)))
{
elog(ERROR, "Syntax error near '%c' at postion %d", *(state->ptr), (int4) (state->ptr - state->begin));
}
}
else if (st == WGT)
{
if (*(state->ptr) == '>')
{
st = WVAL;
}
else if (*(state->ptr) == '\0')
{
elog(ERROR, "Unexpectd end of string");
}
else
{
elog(ERROR, "Syntax error near '%c' at postion %d", *(state->ptr), (int4) (state->ptr - state->begin));
}
}
else if (st == WVAL)
{
if (!get_val(state, true, &escaped))
elog(ERROR, "Unexpected end of string");
state->pairs[state->pcur].val = state->word;
state->pairs[state->pcur].vallen = state->cur - state->word;
state->pairs[state->pcur].isnull = false;
state->pairs[state->pcur].needfree = true;
if (state->cur - state->word == 4 && !escaped)
{
state->word[4] = '\0';
if (0 == pg_strcasecmp(state->word, "null"))
state->pairs[state->pcur].isnull = true;
}
state->word = NULL;
state->pcur++;
st = WDEL;
}
else if (st == WDEL)
{
if (*(state->ptr) == ',')
{
st = WKEY;
}
else if (*(state->ptr) == '\0')
{
return;
}
else if (!isspace((unsigned char) *(state->ptr)))
{
elog(ERROR, "Syntax error near '%c' at postion %d", *(state->ptr), (int4) (state->ptr - state->begin));
}
}
else
elog(ERROR, "Unknown state %d at line %d in file '%s'", st, __LINE__, __FILE__);
state->ptr++;
}
}
int
comparePairs(const void *a, const void *b)
{
if (((Pairs *) a)->keylen == ((Pairs *) b)->keylen)
{
int res = strncmp(
((Pairs *) a)->key,
((Pairs *) b)->key,
((Pairs *) a)->keylen
);
if (res)
return res;
/* guarantee that neddfree willl be later */
if (((Pairs *) b)->needfree == ((Pairs *) a)->needfree)
return 0;
else if (((Pairs *) a)->needfree)
return 1;
else
return -1;
}
return (((Pairs *) a)->keylen > ((Pairs *) b)->keylen) ? 1 : -1;
}
int
uniquePairs(Pairs * a, int4 l, int4 *buflen)
{
Pairs *ptr,
*res;
*buflen = 0;
if (l < 2)
{
if (l == 1)
*buflen = a->keylen + ((a->isnull) ? 0 : a->vallen);
return l;
}
qsort((void *) a, l, sizeof(Pairs), comparePairs);
ptr = a + 1;
res = a;
while (ptr - a < l)
{
if (ptr->keylen == res->keylen && strncmp(ptr->key, res->key, res->keylen) == 0)
{
if (ptr->needfree)
{
pfree(ptr->key);
pfree(ptr->val);
}
}
else
{
*buflen += res->keylen + ((res->isnull) ? 0 : res->vallen);
res++;
memcpy(res, ptr, sizeof(Pairs));
}
ptr++;
}
*buflen += res->keylen + ((res->isnull) ? 0 : res->vallen);
return res + 1 - a;
}
static void
freeHSParse(HSParser * state)
{
int i;
if (state->word)
pfree(state->word);
for (i = 0; i < state->pcur; i++)
if (state->pairs[i].needfree)
{
if (state->pairs[i].key)
pfree(state->pairs[i].key);
if (state->pairs[i].val)
pfree(state->pairs[i].val);
}
pfree(state->pairs);
}
PG_FUNCTION_INFO_V1(hstore_in);
Datum hstore_in(PG_FUNCTION_ARGS);
Datum
hstore_in(PG_FUNCTION_ARGS)
{
HSParser state;
int4 len,
buflen,
i;
HStore *out;
HEntry *entries;
char *ptr;
state.begin = PG_GETARG_CSTRING(0);
parse_hstore(&state);
if (state.pcur == 0)
{
freeHSParse(&state);
len = CALCDATASIZE(0, 0);
out = palloc(len);
out->len = len;
out->size = 0;
PG_RETURN_POINTER(out);
}
state.pcur = uniquePairs(state.pairs, state.pcur, &buflen);
len = CALCDATASIZE(state.pcur, buflen);
out = palloc(len);
out->len = len;
out->size = state.pcur;
entries = ARRPTR(out);
ptr = STRPTR(out);
for (i = 0; i < out->size; i++)
{
entries[i].keylen = state.pairs[i].keylen;
entries[i].pos = ptr - STRPTR(out);
memcpy(ptr, state.pairs[i].key, state.pairs[i].keylen);
ptr += entries[i].keylen;
entries[i].valisnull = state.pairs[i].isnull;
if (entries[i].valisnull)
entries[i].vallen = 4; /* null */
else
{
entries[i].vallen = state.pairs[i].vallen;
memcpy(ptr, state.pairs[i].val, state.pairs[i].vallen);
ptr += entries[i].vallen;
}
}
freeHSParse(&state);
PG_RETURN_POINTER(out);
}
static char *
cpw(char *dst, char *src, int len)
{
char *ptr = src;
while (ptr - src < len)
{
if (*ptr == '"' || *ptr == '\\')
*dst++ = '\\';
*dst++ = *ptr++;
}
return dst;
}
PG_FUNCTION_INFO_V1(hstore_out);
Datum hstore_out(PG_FUNCTION_ARGS);
Datum
hstore_out(PG_FUNCTION_ARGS)
{
HStore *in = PG_GETARG_HS(0);
int buflen,
i;
char *out,
*ptr;
char *base = STRPTR(in);
HEntry *entries = ARRPTR(in);
if (in->size == 0)
{
out = palloc(1);
*out = '\0';
PG_FREE_IF_COPY(in, 0);
PG_RETURN_CSTRING(out);
}
buflen = (4 /* " */ + 2 /* => */ + 2 /* , */ ) * in->size +
2 /* esc */ * (in->len - CALCDATASIZE(in->size, 0));
out = ptr = palloc(buflen);
for (i = 0; i < in->size; i++)
{
*ptr++ = '"';
ptr = cpw(ptr, base + entries[i].pos, entries[i].keylen);
*ptr++ = '"';
*ptr++ = '=';
*ptr++ = '>';
if (entries[i].valisnull)
{
*ptr++ = 'N';
*ptr++ = 'U';
*ptr++ = 'L';
*ptr++ = 'L';
}
else
{
*ptr++ = '"';
ptr = cpw(ptr, base + entries[i].pos + entries[i].keylen, entries[i].vallen);
*ptr++ = '"';
}
if (i + 1 != in->size)
{
*ptr++ = ',';
*ptr++ = ' ';
}
}
*ptr = '\0';
PG_FREE_IF_COPY(in, 0);
PG_RETURN_CSTRING(out);
}