/* * txtquery io * Teodor Sigaev * contrib/ltree/ltxtquery_io.c */ #include "postgres.h" #include #include "crc32.h" #include "libpq/pqformat.h" #include "ltree.h" #include "miscadmin.h" #include "nodes/miscnodes.h" #include "varatt.h" /* parser's states */ #define WAITOPERAND 1 #define INOPERAND 2 #define WAITOPERATOR 3 /* * node of query tree, also used * for storing polish notation in parser */ typedef struct NODE { int32 type; int32 val; int16 distance; int16 length; uint16 flag; struct NODE *next; } NODE; typedef struct { char *buf; int32 state; int32 count; struct Node *escontext; /* reverse polish notation in list (for temporary usage) */ NODE *str; /* number in str */ int32 num; /* user-friendly operand */ int32 lenop; int32 sumlen; char *op; char *curop; } QPRS_STATE; /* * get token from query string * * caller needs to check if a soft-error was set if the result is ERR. */ static int32 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag) { int charlen; for (;;) { charlen = pg_mblen(state->buf); switch (state->state) { case WAITOPERAND: if (t_iseq(state->buf, '!')) { (state->buf)++; *val = (int32) '!'; return OPR; } else if (t_iseq(state->buf, '(')) { state->count++; (state->buf)++; return OPEN; } else if (ISLABEL(state->buf)) { state->state = INOPERAND; *strval = state->buf; *lenval = charlen; *flag = 0; } else if (!t_isspace(state->buf)) ereturn(state->escontext, ERR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("operand syntax error"))); break; case INOPERAND: if (ISLABEL(state->buf)) { if (*flag) ereturn(state->escontext, ERR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("modifiers syntax error"))); *lenval += charlen; } else if (t_iseq(state->buf, '%')) *flag |= LVAR_SUBLEXEME; else if (t_iseq(state->buf, '@')) *flag |= LVAR_INCASE; else if (t_iseq(state->buf, '*')) *flag |= LVAR_ANYEND; else { state->state = WAITOPERATOR; return VAL; } break; case WAITOPERATOR: if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')) { state->state = WAITOPERAND; *val = (int32) *(state->buf); (state->buf)++; return OPR; } else if (t_iseq(state->buf, ')')) { (state->buf)++; state->count--; return (state->count < 0) ? ERR : CLOSE; } else if (*(state->buf) == '\0') { return (state->count) ? ERR : END; } else if (!t_iseq(state->buf, ' ')) { return ERR; } break; default: return ERR; break; } state->buf += charlen; } /* should not get here */ } /* * push new one in polish notation reverse view */ static bool pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag) { NODE *tmp = (NODE *) palloc(sizeof(NODE)); tmp->type = type; tmp->val = val; tmp->flag = flag; if (distance > 0xffff) ereturn(state->escontext, false, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("value is too big"))); if (lenval > 0xff) ereturn(state->escontext, false, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("operand is too long"))); tmp->distance = distance; tmp->length = lenval; tmp->next = state->str; state->str = tmp; state->num++; return true; } /* * This function is used for query text parsing */ static bool pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag) { if (lenval > 0xffff) ereturn(state->escontext, false, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("word is too long"))); if (!pushquery(state, type, ltree_crc32_sz(strval, lenval), state->curop - state->op, lenval, flag)) return false; while (state->curop - state->op + lenval + 1 >= state->lenop) { int32 tmp = state->curop - state->op; state->lenop *= 2; state->op = (char *) repalloc(state->op, state->lenop); state->curop = state->op + tmp; } memcpy(state->curop, strval, lenval); state->curop += lenval; *(state->curop) = '\0'; state->curop++; state->sumlen += lenval + 1; return true; } #define STACKDEPTH 32 /* * make polish notation of query */ static int32 makepol(QPRS_STATE *state) { int32 val = 0, type; int32 lenval = 0; char *strval = NULL; int32 stack[STACKDEPTH]; int32 lenstack = 0; uint16 flag = 0; /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END) { switch (type) { case VAL: if (!pushval_asis(state, VAL, strval, lenval, flag)) return ERR; while (lenstack && (stack[lenstack - 1] == (int32) '&' || stack[lenstack - 1] == (int32) '!')) { lenstack--; if (!pushquery(state, OPR, stack[lenstack], 0, 0, 0)) return ERR; } break; case OPR: if (lenstack && val == (int32) '|') { if (!pushquery(state, OPR, val, 0, 0, 0)) return ERR; } else { if (lenstack == STACKDEPTH) /* internal error */ elog(ERROR, "stack too short"); stack[lenstack] = val; lenstack++; } break; case OPEN: if (makepol(state) == ERR) return ERR; while (lenstack && (stack[lenstack - 1] == (int32) '&' || stack[lenstack - 1] == (int32) '!')) { lenstack--; if (!pushquery(state, OPR, stack[lenstack], 0, 0, 0)) return ERR; } break; case CLOSE: while (lenstack) { lenstack--; if (!pushquery(state, OPR, stack[lenstack], 0, 0, 0)) return ERR; }; return END; break; case ERR: if (SOFT_ERROR_OCCURRED(state->escontext)) return ERR; /* fall through */ default: ereturn(state->escontext, ERR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error"))); } } while (lenstack) { lenstack--; if (!pushquery(state, OPR, stack[lenstack], 0, 0, 0)) return ERR; }; return END; } static void findoprnd(ITEM *ptr, int32 *pos) { /* since this function recurses, it could be driven to stack overflow. */ check_stack_depth(); if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE) { ptr[*pos].left = 0; (*pos)++; } else if (ptr[*pos].val == (int32) '!') { ptr[*pos].left = 1; (*pos)++; findoprnd(ptr, pos); } else { ITEM *curitem = &ptr[*pos]; int32 tmp = *pos; (*pos)++; findoprnd(ptr, pos); curitem->left = *pos - tmp; findoprnd(ptr, pos); } } /* * input */ static ltxtquery * queryin(char *buf, struct Node *escontext) { QPRS_STATE state; int32 i; ltxtquery *query; int32 commonlen; ITEM *ptr; NODE *tmp; int32 pos = 0; #ifdef BS_DEBUG char pbuf[16384], *cur; #endif /* init state */ state.buf = buf; state.state = WAITOPERAND; state.count = 0; state.num = 0; state.str = NULL; state.escontext = escontext; /* init list of operand */ state.sumlen = 0; state.lenop = 64; state.curop = state.op = (char *) palloc(state.lenop); *(state.curop) = '\0'; /* parse query & make polish notation (postfix, but in reverse order) */ if (makepol(&state) == ERR) return NULL; if (!state.num) ereturn(escontext, NULL, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error"), errdetail("Empty query."))); if (LTXTQUERY_TOO_BIG(state.num, state.sumlen)) ereturn(escontext, NULL, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("ltxtquery is too large"))); commonlen = COMPUTESIZE(state.num, state.sumlen); query = (ltxtquery *) palloc0(commonlen); SET_VARSIZE(query, commonlen); query->size = state.num; ptr = GETQUERY(query); /* set item in polish notation */ for (i = 0; i < state.num; i++) { ptr[i].type = state.str->type; ptr[i].val = state.str->val; ptr[i].distance = state.str->distance; ptr[i].length = state.str->length; ptr[i].flag = state.str->flag; tmp = state.str->next; pfree(state.str); state.str = tmp; } /* set user-friendly operand view */ memcpy(GETOPERAND(query), state.op, state.sumlen); pfree(state.op); /* set left operand's position for every operator */ pos = 0; findoprnd(ptr, &pos); return query; } /* * in without morphology */ PG_FUNCTION_INFO_V1(ltxtq_in); Datum ltxtq_in(PG_FUNCTION_ARGS) { ltxtquery *res; if ((res = queryin((char *) PG_GETARG_POINTER(0), fcinfo->context)) == NULL) PG_RETURN_NULL(); PG_RETURN_POINTER(res); } /* * ltxtquery type recv function * * The type is sent as text in binary mode, so this is almost the same * as the input function, but it's prefixed with a version number so we * can change the binary format sent in future if necessary. For now, * only version 1 is supported. */ PG_FUNCTION_INFO_V1(ltxtq_recv); Datum ltxtq_recv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); int version = pq_getmsgint(buf, 1); char *str; int nbytes; ltxtquery *res; if (version != 1) elog(ERROR, "unsupported ltxtquery version number %d", version); str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); res = queryin(str, NULL); pfree(str); PG_RETURN_POINTER(res); } /* * out function */ typedef struct { ITEM *curpol; char *buf; char *cur; char *op; int32 buflen; } INFIX; #define RESIZEBUF(inf,addsize) \ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ { \ int32 len = (inf)->cur - (inf)->buf; \ (inf)->buflen *= 2; \ (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \ (inf)->cur = (inf)->buf + len; \ } /* * recursive walk on tree and print it in * infix (human-readable) view */ static void infix(INFIX *in, bool first) { /* since this function recurses, it could be driven to stack overflow. */ check_stack_depth(); if (in->curpol->type == VAL) { char *op = in->op + in->curpol->distance; RESIZEBUF(in, in->curpol->length * 2 + 5); while (*op) { *(in->cur) = *op; op++; in->cur++; } if (in->curpol->flag & LVAR_SUBLEXEME) { *(in->cur) = '%'; in->cur++; } if (in->curpol->flag & LVAR_INCASE) { *(in->cur) = '@'; in->cur++; } if (in->curpol->flag & LVAR_ANYEND) { *(in->cur) = '*'; in->cur++; } *(in->cur) = '\0'; in->curpol++; } else if (in->curpol->val == (int32) '!') { bool isopr = false; RESIZEBUF(in, 1); *(in->cur) = '!'; in->cur++; *(in->cur) = '\0'; in->curpol++; if (in->curpol->type == OPR) { isopr = true; RESIZEBUF(in, 2); sprintf(in->cur, "( "); in->cur = strchr(in->cur, '\0'); } infix(in, isopr); if (isopr) { RESIZEBUF(in, 2); sprintf(in->cur, " )"); in->cur = strchr(in->cur, '\0'); } } else { int32 op = in->curpol->val; INFIX nrm; in->curpol++; if (op == (int32) '|' && !first) { RESIZEBUF(in, 2); sprintf(in->cur, "( "); in->cur = strchr(in->cur, '\0'); } nrm.curpol = in->curpol; nrm.op = in->op; nrm.buflen = 16; nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); /* get right operand */ infix(&nrm, false); /* get & print left operand */ in->curpol = nrm.curpol; infix(in, false); /* print operator & right operand */ RESIZEBUF(in, 3 + (nrm.cur - nrm.buf)); sprintf(in->cur, " %c %s", op, nrm.buf); in->cur = strchr(in->cur, '\0'); pfree(nrm.buf); if (op == (int32) '|' && !first) { RESIZEBUF(in, 2); sprintf(in->cur, " )"); in->cur = strchr(in->cur, '\0'); } } } PG_FUNCTION_INFO_V1(ltxtq_out); Datum ltxtq_out(PG_FUNCTION_ARGS) { ltxtquery *query = PG_GETARG_LTXTQUERY_P(0); INFIX nrm; if (query->size == 0) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error"), errdetail("Empty query."))); nrm.curpol = GETQUERY(query); nrm.buflen = 32; nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); *(nrm.cur) = '\0'; nrm.op = GETOPERAND(query); infix(&nrm, true); PG_RETURN_POINTER(nrm.buf); } /* * ltxtquery type send function * * The type is sent as text in binary mode, so this is almost the same * as the output function, but it's prefixed with a version number so we * can change the binary format sent in future if necessary. For now, * only version 1 is supported. */ PG_FUNCTION_INFO_V1(ltxtq_send); Datum ltxtq_send(PG_FUNCTION_ARGS) { ltxtquery *query = PG_GETARG_LTXTQUERY_P(0); StringInfoData buf; int version = 1; INFIX nrm; if (query->size == 0) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error"), errdetail("Empty query."))); nrm.curpol = GETQUERY(query); nrm.buflen = 32; nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); *(nrm.cur) = '\0'; nrm.op = GETOPERAND(query); infix(&nrm, true); pq_begintypsend(&buf); pq_sendint8(&buf, version); pq_sendtext(&buf, nrm.buf, strlen(nrm.buf)); pfree(nrm.buf); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); }