postgresql/contrib/hstore/hstore_io.c
Tom Lane beca984e5f Fix bugs in plpgsql and ecpg caused by assuming that isspace() would only
return true for exactly the characters treated as whitespace by their flex
scanners.  Per report from Victor Snezhko and subsequent investigation.

Also fix a passel of unsafe usages of <ctype.h> functions, that is, ye olde
char-vs-unsigned-char issue.  I won't miss <ctype.h> when we are finally
able to stop using it.
2006-09-22 21:39:58 +00:00

371 lines
8.6 KiB
C

#include "hstore.h"
#include <ctype.h>
PG_MODULE_MAGIC;
typedef struct {
char *begin;
char *ptr;
char *cur;
char *word;
int wordlen;
Pairs *pairs;
int pcur;
int plen;
} HSParser;
#define RESIZEPRSBUF \
do { \
if ( state->cur - state->word + 1 >= state->wordlen ) \
{ \
int4 clen = state->cur - state->word; \
state->wordlen *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->wordlen ); \
state->cur = state->word + clen; \
} \
} while (0)
#define GV_WAITVAL 0
#define GV_INVAL 1
#define GV_INESCVAL 2
#define GV_WAITESCIN 3
#define GV_WAITESCESCIN 4
static bool
get_val( HSParser *state, bool ignoreeq, bool *escaped ) {
int st = GV_WAITVAL;
state->wordlen=32;
state->cur = state->word = palloc( state->wordlen );
*escaped=false;
while(1) {
if ( st == GV_WAITVAL ) {
if ( *(state->ptr) == '"' ) {
*escaped=true;
st = GV_INESCVAL;
} else if ( *(state->ptr) == '\0' ) {
return false;
} else if ( *(state->ptr) == '=' && !ignoreeq ) {
elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), (int4)(state->ptr-state->begin));
} else if ( *(state->ptr) == '\\' ) {
st = GV_WAITESCIN;
} else if ( !isspace((unsigned char) *(state->ptr)) ) {
*(state->cur) = *(state->ptr);
state->cur++;
st = GV_INVAL;
}
} else if ( st == GV_INVAL ) {
if ( *(state->ptr) == '\\' ) {
st = GV_WAITESCIN;
} else if ( *(state->ptr) == '=' && !ignoreeq ) {
state->ptr--;
return true;
} else if ( *(state->ptr) == ',' && ignoreeq ) {
state->ptr--;
return true;
} else if ( isspace((unsigned char) *(state->ptr)) ) {
return true;
} else if ( *(state->ptr) == '\0' ) {
state->ptr--;
return true;
} else {
RESIZEPRSBUF;
*(state->cur) = *(state->ptr);
state->cur++;
}
} else if ( st == GV_INESCVAL ) {
if ( *(state->ptr) == '\\' ) {
st = GV_WAITESCESCIN;
} else if ( *(state->ptr) == '"' ) {
return true;
} else if ( *(state->ptr) == '\0' ) {
elog(ERROR,"Unexpected end of string");
} else {
RESIZEPRSBUF;
*(state->cur) = *(state->ptr);
state->cur++;
}
} else if ( st == GV_WAITESCIN ) {
if ( *(state->ptr) == '\0' )
elog(ERROR,"Unexpected end of string");
RESIZEPRSBUF;
*(state->cur) = *(state->ptr);
state->cur++;
st = GV_INVAL;
} else if ( st == GV_WAITESCESCIN ) {
if ( *(state->ptr) == '\0' )
elog(ERROR,"Unexpected end of string");
RESIZEPRSBUF;
*(state->cur) = *(state->ptr);
state->cur++;
st = GV_INESCVAL;
} else
elog(ERROR,"Unknown state %d at postion line %d in file '%s'", st, __LINE__, __FILE__);
state->ptr++;
}
return false;
}
#define WKEY 0
#define WVAL 1
#define WEQ 2
#define WGT 3
#define WDEL 4
static void
parse_hstore( HSParser *state ) {
int st = WKEY;
bool escaped=false;
state->plen=16;
state->pairs = (Pairs*)palloc( sizeof(Pairs) * state->plen );
state->pcur=0;
state->ptr = state->begin;
state->word=NULL;
while(1) {
if (st == WKEY) {
if ( !get_val(state, false, &escaped) )
return;
if ( state->pcur >= state->plen ) {
state->plen *= 2;
state->pairs = (Pairs*)repalloc( state->pairs, sizeof(Pairs) * state->plen );
}
state->pairs[ state->pcur ].key = state->word;
state->pairs[ state->pcur ].keylen = state->cur - state->word;
state->pairs[ state->pcur ].val=NULL;
state->word=NULL;
st = WEQ;
} else if ( st == WEQ ) {
if ( *(state->ptr) == '=' ) {
st = WGT;
} else if ( *(state->ptr) == '\0' ) {
elog(ERROR,"Unexpectd end of string");
} else if (!isspace((unsigned char) *(state->ptr))) {
elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), (int4)(state->ptr-state->begin));
}
} else if ( st == WGT ) {
if ( *(state->ptr) == '>' ) {
st = WVAL;
} else if ( *(state->ptr) == '\0' ) {
elog(ERROR,"Unexpectd end of string");
} else {
elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), (int4)(state->ptr-state->begin));
}
} else if ( st == WVAL ) {
if ( !get_val(state, true, &escaped) )
elog(ERROR,"Unexpected end of string");
state->pairs[ state->pcur ].val = state->word;
state->pairs[ state->pcur ].vallen = state->cur - state->word;
state->pairs[ state->pcur ].isnull = false;
state->pairs[ state->pcur ].needfree = true;
if ( state->cur - state->word == 4 && !escaped) {
state->word[4] = '\0';
if ( 0==pg_strcasecmp(state->word, "null") )
state->pairs[ state->pcur ].isnull=true;
}
state->word=NULL;
state->pcur++;
st = WDEL;
} else if ( st == WDEL ) {
if ( *(state->ptr) == ',' ) {
st = WKEY;
} else if ( *(state->ptr) == '\0' ) {
return;
} else if (!isspace((unsigned char) *(state->ptr))) {
elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), (int4)(state->ptr-state->begin));
}
} else
elog(ERROR,"Unknown state %d at line %d in file '%s'", st, __LINE__, __FILE__);
state->ptr++;
}
}
int
comparePairs(const void *a, const void *b) {
if ( ((Pairs*)a)->keylen == ((Pairs*)b)->keylen ) {
int res = strncmp(
((Pairs*)a)->key,
((Pairs*)b)->key,
((Pairs*)a)->keylen
);
if ( res )
return res;
/* guarantee that neddfree willl be later */
if ( ((Pairs*)b)->needfree == ((Pairs*)a)->needfree )
return 0;
else if ( ((Pairs*)a)->needfree )
return 1;
else
return -1;
}
return ( ((Pairs*)a)->keylen > ((Pairs*)b)->keylen ) ? 1 : -1;
}
int
uniquePairs(Pairs * a, int4 l, int4 *buflen) {
Pairs *ptr, *res;
*buflen=0;
if ( l < 2 ) {
if ( l==1 )
*buflen = a->keylen + ((a->isnull) ? 0 : a->vallen) ;
return l;
}
qsort((void *) a, l, sizeof(Pairs), comparePairs);
ptr=a+1;
res=a;
while( ptr - a < l ) {
if ( ptr->keylen == res->keylen && strncmp( ptr->key, res->key, res->keylen )==0 ) {
if ( ptr->needfree ) {
pfree(ptr->key);
pfree(ptr->val);
}
} else {
*buflen += res->keylen + (( res->isnull ) ? 0 : res->vallen);
res++;
memcpy(res,ptr,sizeof(Pairs));
}
ptr++;
}
*buflen += res->keylen + (( res->isnull ) ? 0 : res->vallen);
return res + 1 - a;
}
static void
freeHSParse(HSParser *state) {
int i;
if ( state->word ) pfree( state->word );
for (i=0;i<state->pcur;i++)
if ( state->pairs[i].needfree ) {
if (state->pairs[i].key) pfree(state->pairs[i].key);
if (state->pairs[i].val) pfree(state->pairs[i].val);
}
pfree( state->pairs );
}
PG_FUNCTION_INFO_V1(hstore_in);
Datum hstore_in(PG_FUNCTION_ARGS);
Datum
hstore_in(PG_FUNCTION_ARGS) {
HSParser state;
int4 len,buflen,i;
HStore *out;
HEntry *entries;
char *ptr;
state.begin = PG_GETARG_CSTRING(0);
parse_hstore(&state);
if ( state.pcur == 0 ) {
freeHSParse(&state);
len = CALCDATASIZE(0,0);
out = palloc(len);
out->len=len;
out->size=0;
PG_RETURN_POINTER(out);
}
state.pcur = uniquePairs(state.pairs, state.pcur, &buflen);
len=CALCDATASIZE(state.pcur, buflen);
out = palloc(len);
out->len=len;
out->size=state.pcur;
entries=ARRPTR(out);
ptr = STRPTR(out);
for(i=0;i<out->size;i++) {
entries[i].keylen = state.pairs[i].keylen;
entries[i].pos = ptr - STRPTR(out);
memcpy(ptr, state.pairs[i].key, state.pairs[i].keylen);
ptr+=entries[i].keylen;
entries[i].valisnull = state.pairs[i].isnull;
if ( entries[i].valisnull )
entries[i].vallen=4; /* null */
else {
entries[i].vallen = state.pairs[i].vallen;
memcpy(ptr, state.pairs[i].val,state.pairs[i].vallen);
ptr+=entries[i].vallen;
}
}
freeHSParse(&state);
PG_RETURN_POINTER(out);
}
static char*
cpw(char *dst, char *src, int len) {
char *ptr = src;
while(ptr-src<len) {
if ( *ptr == '"' || *ptr == '\\' )
*dst++='\\';
*dst++ = *ptr++;
}
return dst;
}
PG_FUNCTION_INFO_V1(hstore_out);
Datum hstore_out(PG_FUNCTION_ARGS);
Datum
hstore_out(PG_FUNCTION_ARGS) {
HStore *in = PG_GETARG_HS(0);
int buflen,i;
char *out,*ptr;
char *base = STRPTR(in);
HEntry *entries = ARRPTR(in);
if ( in->size==0 ) {
out=palloc(1);
*out='\0';
PG_FREE_IF_COPY(in,0);
PG_RETURN_CSTRING(out);
}
buflen = ( 4 /* " */ + 2 /* => */ + 2 /*, */ )*in->size +
2 /* esc */ * ( in->len - CALCDATASIZE(in->size,0) );
out=ptr=palloc(buflen);
for(i=0;i<in->size;i++) {
*ptr++='"';
ptr = cpw( ptr, base + entries[i].pos, entries[i].keylen );
*ptr++='"';
*ptr++='=';
*ptr++='>';
if ( entries[i].valisnull ) {
*ptr++='N';
*ptr++='U';
*ptr++='L';
*ptr++='L';
} else {
*ptr++='"';
ptr = cpw( ptr, base + entries[i].pos + entries[i].keylen, entries[i].vallen );
*ptr++='"';
}
if ( i+1 != in->size ) {
*ptr++=',';
*ptr++=' ';
}
}
*ptr='\0';
PG_FREE_IF_COPY(in,0);
PG_RETURN_CSTRING(out);
}