/*------------------------------------------------------------------------- * * queryjumblefuncs.c * Query normalization and fingerprinting. * * Normalization is a process whereby similar queries, typically differing only * in their constants (though the exact rules are somewhat more subtle than * that) are recognized as equivalent, and are tracked as a single entry. This * is particularly useful for non-prepared queries. * * Normalization is implemented by fingerprinting queries, selectively * serializing those fields of each query tree's nodes that are judged to be * essential to the query. This is referred to as a query jumble. This is * distinct from a regular serialization in that various extraneous * information is ignored as irrelevant or not essential to the query, such * as the collations of Vars and, most notably, the values of constants. * * This jumble is acquired at the end of parse analysis of each query, and * a 64-bit hash of it is stored into the query's Query.queryId field. * The server then copies this value around, making it available in plan * tree(s) generated from the query. The executor can then use this value * to blame query costs on the proper queryId. * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/nodes/queryjumblefuncs.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "common/hashfn.h" #include "miscadmin.h" #include "nodes/queryjumble.h" #include "parser/scansup.h" #define JUMBLE_SIZE 1024 /* query serialization buffer size */ /* GUC parameters */ int compute_query_id = COMPUTE_QUERY_ID_AUTO; /* True when compute_query_id is ON, or AUTO and a module requests them */ bool query_id_enabled = false; static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size); static void RecordConstLocation(JumbleState *jstate, int location); static void _jumbleNode(JumbleState *jstate, Node *node); static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node); /* * Given a possibly multi-statement source string, confine our attention to the * relevant part of the string. */ const char * CleanQuerytext(const char *query, int *location, int *len) { int query_location = *location; int query_len = *len; /* First apply starting offset, unless it's -1 (unknown). */ if (query_location >= 0) { Assert(query_location <= strlen(query)); query += query_location; /* Length of 0 (or -1) means "rest of string" */ if (query_len <= 0) query_len = strlen(query); else Assert(query_len <= strlen(query)); } else { /* If query location is unknown, distrust query_len as well */ query_location = 0; query_len = strlen(query); } /* * Discard leading and trailing whitespace, too. Use scanner_isspace() * not libc's isspace(), because we want to match the lexer's behavior. */ while (query_len > 0 && scanner_isspace(query[0])) query++, query_location++, query_len--; while (query_len > 0 && scanner_isspace(query[query_len - 1])) query_len--; *location = query_location; *len = query_len; return query; } JumbleState * JumbleQuery(Query *query) { JumbleState *jstate = NULL; Assert(IsQueryIdEnabled()); jstate = (JumbleState *) palloc(sizeof(JumbleState)); /* Set up workspace for query jumbling */ jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); jstate->jumble_len = 0; jstate->clocations_buf_size = 32; jstate->clocations = (LocationLen *) palloc(jstate->clocations_buf_size * sizeof(LocationLen)); jstate->clocations_count = 0; jstate->highest_extern_param_id = 0; /* Compute query ID and mark the Query node with it */ _jumbleNode(jstate, (Node *) query); query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, jstate->jumble_len, 0)); /* * If we are unlucky enough to get a hash of zero, use 1 instead for * normal statements and 2 for utility queries. */ if (query->queryId == UINT64CONST(0)) { if (query->utilityStmt) query->queryId = UINT64CONST(2); else query->queryId = UINT64CONST(1); } return jstate; } /* * Enables query identifier computation. * * Third-party plugins can use this function to inform core that they require * a query identifier to be computed. */ void EnableQueryId(void) { if (compute_query_id != COMPUTE_QUERY_ID_OFF) query_id_enabled = true; } /* * AppendJumble: Append a value that is substantive in a given query to * the current jumble. */ static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) { unsigned char *jumble = jstate->jumble; Size jumble_len = jstate->jumble_len; /* * Whenever the jumble buffer is full, we hash the current contents and * reset the buffer to contain just that hash value, thus relying on the * hash to summarize everything so far. */ while (size > 0) { Size part_size; if (jumble_len >= JUMBLE_SIZE) { uint64 start_hash; start_hash = DatumGetUInt64(hash_any_extended(jumble, JUMBLE_SIZE, 0)); memcpy(jumble, &start_hash, sizeof(start_hash)); jumble_len = sizeof(start_hash); } part_size = Min(size, JUMBLE_SIZE - jumble_len); memcpy(jumble + jumble_len, item, part_size); jumble_len += part_size; item += part_size; size -= part_size; } jstate->jumble_len = jumble_len; } /* * Record location of constant within query string of query tree * that is currently being walked. */ static void RecordConstLocation(JumbleState *jstate, int location) { /* -1 indicates unknown or undefined location */ if (location >= 0) { /* enlarge array if needed */ if (jstate->clocations_count >= jstate->clocations_buf_size) { jstate->clocations_buf_size *= 2; jstate->clocations = (LocationLen *) repalloc(jstate->clocations, jstate->clocations_buf_size * sizeof(LocationLen)); } jstate->clocations[jstate->clocations_count].location = location; /* initialize lengths to -1 to simplify third-party module usage */ jstate->clocations[jstate->clocations_count].length = -1; jstate->clocations_count++; } } #define JUMBLE_NODE(item) \ _jumbleNode(jstate, (Node *) expr->item) #define JUMBLE_LOCATION(location) \ RecordConstLocation(jstate, expr->location) #define JUMBLE_FIELD(item) \ AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)) #define JUMBLE_FIELD_SINGLE(item) \ AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item)) #define JUMBLE_STRING(str) \ do { \ if (expr->str) \ AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \ } while(0) #include "queryjumblefuncs.funcs.c" static void _jumbleNode(JumbleState *jstate, Node *node) { Node *expr = node; if (expr == NULL) return; /* Guard against stack overflow due to overly complex expressions */ check_stack_depth(); /* * We always emit the node's NodeTag, then any additional fields that are * considered significant, and then we recurse to any child nodes. */ JUMBLE_FIELD(type); switch (nodeTag(expr)) { #include "queryjumblefuncs.switch.c" case T_List: case T_IntList: case T_OidList: case T_XidList: _jumbleList(jstate, expr); break; default: /* Only a warning, since we can stumble along anyway */ elog(WARNING, "unrecognized node type: %d", (int) nodeTag(expr)); break; } /* Special cases to handle outside the automated code */ switch (nodeTag(expr)) { case T_Param: { Param *p = (Param *) node; /* * Update the highest Param id seen, in order to start * normalization correctly. */ if (p->paramkind == PARAM_EXTERN && p->paramid > jstate->highest_extern_param_id) jstate->highest_extern_param_id = p->paramid; } break; default: break; } } static void _jumbleList(JumbleState *jstate, Node *node) { List *expr = (List *) node; ListCell *l; switch (expr->type) { case T_List: foreach(l, expr) _jumbleNode(jstate, lfirst(l)); break; case T_IntList: foreach(l, expr) JUMBLE_FIELD_SINGLE(lfirst_int(l)); break; case T_OidList: foreach(l, expr) JUMBLE_FIELD_SINGLE(lfirst_oid(l)); break; case T_XidList: foreach(l, expr) JUMBLE_FIELD_SINGLE(lfirst_xid(l)); break; default: elog(ERROR, "unrecognized list node type: %d", (int) expr->type); return; } } static void _jumbleA_Const(JumbleState *jstate, Node *node) { A_Const *expr = (A_Const *) node; JUMBLE_FIELD(isnull); if (!expr->isnull) { JUMBLE_FIELD(val.node.type); switch (nodeTag(&expr->val)) { case T_Integer: JUMBLE_FIELD(val.ival.ival); break; case T_Float: JUMBLE_STRING(val.fval.fval); break; case T_Boolean: JUMBLE_FIELD(val.boolval.boolval); break; case T_String: JUMBLE_STRING(val.sval.sval); break; case T_BitString: JUMBLE_STRING(val.bsval.bsval); break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(&expr->val)); break; } } } static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node) { RangeTblEntry *expr = (RangeTblEntry *) node; JUMBLE_FIELD(rtekind); switch (expr->rtekind) { case RTE_RELATION: JUMBLE_FIELD(relid); JUMBLE_NODE(tablesample); JUMBLE_FIELD(inh); break; case RTE_SUBQUERY: JUMBLE_NODE(subquery); break; case RTE_JOIN: JUMBLE_FIELD(jointype); break; case RTE_FUNCTION: JUMBLE_NODE(functions); break; case RTE_TABLEFUNC: JUMBLE_NODE(tablefunc); break; case RTE_VALUES: JUMBLE_NODE(values_lists); break; case RTE_CTE: /* * Depending on the CTE name here isn't ideal, but it's the only * info we have to identify the referenced WITH item. */ JUMBLE_STRING(ctename); JUMBLE_FIELD(ctelevelsup); break; case RTE_NAMEDTUPLESTORE: JUMBLE_STRING(enrname); break; case RTE_RESULT: break; default: elog(ERROR, "unrecognized RTE kind: %d", (int) expr->rtekind); break; } }