/* * $PostgreSQL: pgsql/contrib/hstore/hstore_io.c,v 1.8 2008/05/12 00:00:42 alvherre Exp $ */ #include "postgres.h" #include #include "hstore.h" PG_MODULE_MAGIC; typedef struct { char *begin; char *ptr; char *cur; char *word; int wordlen; Pairs *pairs; int pcur; int plen; } HSParser; #define RESIZEPRSBUF \ do { \ if ( state->cur - state->word + 1 >= state->wordlen ) \ { \ int4 clen = state->cur - state->word; \ state->wordlen *= 2; \ state->word = (char*)repalloc( (void*)state->word, state->wordlen ); \ state->cur = state->word + clen; \ } \ } while (0) #define GV_WAITVAL 0 #define GV_INVAL 1 #define GV_INESCVAL 2 #define GV_WAITESCIN 3 #define GV_WAITESCESCIN 4 static bool get_val(HSParser * state, bool ignoreeq, bool *escaped) { int st = GV_WAITVAL; state->wordlen = 32; state->cur = state->word = palloc(state->wordlen); *escaped = false; while (1) { if (st == GV_WAITVAL) { if (*(state->ptr) == '"') { *escaped = true; st = GV_INESCVAL; } else if (*(state->ptr) == '\0') { return false; } else if (*(state->ptr) == '=' && !ignoreeq) { elog(ERROR, "Syntax error near '%c' at postion %d", *(state->ptr), (int4) (state->ptr - state->begin)); } else if (*(state->ptr) == '\\') { st = GV_WAITESCIN; } else if (!isspace((unsigned char) *(state->ptr))) { *(state->cur) = *(state->ptr); state->cur++; st = GV_INVAL; } } else if (st == GV_INVAL) { if (*(state->ptr) == '\\') { st = GV_WAITESCIN; } else if (*(state->ptr) == '=' && !ignoreeq) { state->ptr--; return true; } else if (*(state->ptr) == ',' && ignoreeq) { state->ptr--; return true; } else if (isspace((unsigned char) *(state->ptr))) { return true; } else if (*(state->ptr) == '\0') { state->ptr--; return true; } else { RESIZEPRSBUF; *(state->cur) = *(state->ptr); state->cur++; } } else if (st == GV_INESCVAL) { if (*(state->ptr) == '\\') { st = GV_WAITESCESCIN; } else if (*(state->ptr) == '"') { return true; } else if (*(state->ptr) == '\0') { elog(ERROR, "Unexpected end of string"); } else { RESIZEPRSBUF; *(state->cur) = *(state->ptr); state->cur++; } } else if (st == GV_WAITESCIN) { if (*(state->ptr) == '\0') elog(ERROR, "Unexpected end of string"); RESIZEPRSBUF; *(state->cur) = *(state->ptr); state->cur++; st = GV_INVAL; } else if (st == GV_WAITESCESCIN) { if (*(state->ptr) == '\0') elog(ERROR, "Unexpected end of string"); RESIZEPRSBUF; *(state->cur) = *(state->ptr); state->cur++; st = GV_INESCVAL; } else elog(ERROR, "Unknown state %d at position line %d in file '%s'", st, __LINE__, __FILE__); state->ptr++; } return false; } #define WKEY 0 #define WVAL 1 #define WEQ 2 #define WGT 3 #define WDEL 4 static void parse_hstore(HSParser * state) { int st = WKEY; bool escaped = false; state->plen = 16; state->pairs = (Pairs *) palloc(sizeof(Pairs) * state->plen); state->pcur = 0; state->ptr = state->begin; state->word = NULL; while (1) { if (st == WKEY) { if (!get_val(state, false, &escaped)) return; if (state->pcur >= state->plen) { state->plen *= 2; state->pairs = (Pairs *) repalloc(state->pairs, sizeof(Pairs) * state->plen); } state->pairs[state->pcur].key = state->word; state->pairs[state->pcur].keylen = state->cur - state->word; state->pairs[state->pcur].val = NULL; state->word = NULL; st = WEQ; } else if (st == WEQ) { if (*(state->ptr) == '=') { st = WGT; } else if (*(state->ptr) == '\0') { elog(ERROR, "Unexpected end of string"); } else if (!isspace((unsigned char) *(state->ptr))) { elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int4) (state->ptr - state->begin)); } } else if (st == WGT) { if (*(state->ptr) == '>') { st = WVAL; } else if (*(state->ptr) == '\0') { elog(ERROR, "Unexpected end of string"); } else { elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int4) (state->ptr - state->begin)); } } else if (st == WVAL) { if (!get_val(state, true, &escaped)) elog(ERROR, "Unexpected end of string"); state->pairs[state->pcur].val = state->word; state->pairs[state->pcur].vallen = state->cur - state->word; state->pairs[state->pcur].isnull = false; state->pairs[state->pcur].needfree = true; if (state->cur - state->word == 4 && !escaped) { state->word[4] = '\0'; if (0 == pg_strcasecmp(state->word, "null")) state->pairs[state->pcur].isnull = true; } state->word = NULL; state->pcur++; st = WDEL; } else if (st == WDEL) { if (*(state->ptr) == ',') { st = WKEY; } else if (*(state->ptr) == '\0') { return; } else if (!isspace((unsigned char) *(state->ptr))) { elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int4) (state->ptr - state->begin)); } } else elog(ERROR, "Unknown state %d at line %d in file '%s'", st, __LINE__, __FILE__); state->ptr++; } } int comparePairs(const void *a, const void *b) { if (((Pairs *) a)->keylen == ((Pairs *) b)->keylen) { int res = strncmp( ((Pairs *) a)->key, ((Pairs *) b)->key, ((Pairs *) a)->keylen ); if (res) return res; /* guarantee that needfree will be later */ if (((Pairs *) b)->needfree == ((Pairs *) a)->needfree) return 0; else if (((Pairs *) a)->needfree) return 1; else return -1; } return (((Pairs *) a)->keylen > ((Pairs *) b)->keylen) ? 1 : -1; } int uniquePairs(Pairs * a, int4 l, int4 *buflen) { Pairs *ptr, *res; *buflen = 0; if (l < 2) { if (l == 1) *buflen = a->keylen + ((a->isnull) ? 0 : a->vallen); return l; } qsort((void *) a, l, sizeof(Pairs), comparePairs); ptr = a + 1; res = a; while (ptr - a < l) { if (ptr->keylen == res->keylen && strncmp(ptr->key, res->key, res->keylen) == 0) { if (ptr->needfree) { pfree(ptr->key); pfree(ptr->val); } } else { *buflen += res->keylen + ((res->isnull) ? 0 : res->vallen); res++; memcpy(res, ptr, sizeof(Pairs)); } ptr++; } *buflen += res->keylen + ((res->isnull) ? 0 : res->vallen); return res + 1 - a; } static void freeHSParse(HSParser * state) { int i; if (state->word) pfree(state->word); for (i = 0; i < state->pcur; i++) if (state->pairs[i].needfree) { if (state->pairs[i].key) pfree(state->pairs[i].key); if (state->pairs[i].val) pfree(state->pairs[i].val); } pfree(state->pairs); } PG_FUNCTION_INFO_V1(hstore_in); Datum hstore_in(PG_FUNCTION_ARGS); Datum hstore_in(PG_FUNCTION_ARGS) { HSParser state; int4 len, buflen, i; HStore *out; HEntry *entries; char *ptr; state.begin = PG_GETARG_CSTRING(0); parse_hstore(&state); if (state.pcur == 0) { freeHSParse(&state); len = CALCDATASIZE(0, 0); out = palloc(len); SET_VARSIZE(out, len); out->size = 0; PG_RETURN_POINTER(out); } state.pcur = uniquePairs(state.pairs, state.pcur, &buflen); len = CALCDATASIZE(state.pcur, buflen); out = palloc(len); SET_VARSIZE(out, len); out->size = state.pcur; entries = ARRPTR(out); ptr = STRPTR(out); for (i = 0; i < out->size; i++) { entries[i].keylen = state.pairs[i].keylen; entries[i].pos = ptr - STRPTR(out); memcpy(ptr, state.pairs[i].key, state.pairs[i].keylen); ptr += entries[i].keylen; entries[i].valisnull = state.pairs[i].isnull; if (entries[i].valisnull) entries[i].vallen = 4; /* null */ else { entries[i].vallen = state.pairs[i].vallen; memcpy(ptr, state.pairs[i].val, state.pairs[i].vallen); ptr += entries[i].vallen; } } freeHSParse(&state); PG_RETURN_POINTER(out); } static char * cpw(char *dst, char *src, int len) { char *ptr = src; while (ptr - src < len) { if (*ptr == '"' || *ptr == '\\') *dst++ = '\\'; *dst++ = *ptr++; } return dst; } PG_FUNCTION_INFO_V1(hstore_out); Datum hstore_out(PG_FUNCTION_ARGS); Datum hstore_out(PG_FUNCTION_ARGS) { HStore *in = PG_GETARG_HS(0); int buflen, i; char *out, *ptr; char *base = STRPTR(in); HEntry *entries = ARRPTR(in); if (in->size == 0) { out = palloc(1); *out = '\0'; PG_FREE_IF_COPY(in, 0); PG_RETURN_CSTRING(out); } buflen = (4 /* " */ + 2 /* => */ + 2 /* , */ ) * in->size + 2 /* esc */ * (VARSIZE(in) - CALCDATASIZE(in->size, 0)); out = ptr = palloc(buflen); for (i = 0; i < in->size; i++) { *ptr++ = '"'; ptr = cpw(ptr, base + entries[i].pos, entries[i].keylen); *ptr++ = '"'; *ptr++ = '='; *ptr++ = '>'; if (entries[i].valisnull) { *ptr++ = 'N'; *ptr++ = 'U'; *ptr++ = 'L'; *ptr++ = 'L'; } else { *ptr++ = '"'; ptr = cpw(ptr, base + entries[i].pos + entries[i].keylen, entries[i].vallen); *ptr++ = '"'; } if (i + 1 != in->size) { *ptr++ = ','; *ptr++ = ' '; } } *ptr = '\0'; PG_FREE_IF_COPY(in, 0); PG_RETURN_CSTRING(out); }