Show ignored constants as "$N" rather than "?" in pg_stat_statements.

The trouble with the original choice here is that "?" is a valid (and
indeed used) operator name, so that you could end up with ambiguous
statement texts like "SELECT ? ? ?".  With this patch, you instead
see "SELECT $1 ? $2", which seems significantly more readable.
The numbers used for this purpose begin after the last actual $N parameter
in the particular query.  The conflict with external parameters has its own
potential for confusion of course, but it was agreed to be an improvement
over the previous behavior.

Lukas Fittl

Discussion: https://postgr.es/m/CAP53PkxeaCuwYmF-A4J5z2-qk5fYFo5_NH3gpXGJJBxv1DMwEw@mail.gmail.com
This commit is contained in:
Tom Lane 2017-03-27 20:14:36 -04:00
parent 6462238f0d
commit a6f22e8356
4 changed files with 142 additions and 56 deletions

View File

@ -68,6 +68,13 @@ SELECT 1 AS i UNION SELECT 2 ORDER BY i;
2 2
(2 rows) (2 rows)
-- ? operator
select '{"a":1, "b":2}'::jsonb ? 'b';
?column?
----------
t
(1 row)
-- cte -- cte
WITH t(f) AS ( WITH t(f) AS (
VALUES (1.0), (2.0) VALUES (1.0), (2.0)
@ -79,24 +86,35 @@ WITH t(f) AS (
2.0 2.0
(2 rows) (2 rows)
-- prepared statement with parameter
PREPARE pgss_test (int) AS SELECT $1, 'test' LIMIT 1;
EXECUTE pgss_test(1);
?column? | ?column?
----------+----------
1 | test
(1 row)
DEALLOCATE pgss_test;
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls | rows query | calls | rows
-----------------------------------------+-------+------ ---------------------------------------------------+-------+------
SELECT ? +| 4 | 4 PREPARE pgss_test (int) AS SELECT $1, $2 LIMIT $3 | 1 | 1
+| | SELECT $1 +| 4 | 4
AS "text" | | +| |
SELECT ? + ? | 2 | 2 AS "text" | |
SELECT ? + ? + ? AS "add" | 3 | 3 SELECT $1 + $2 | 2 | 2
SELECT ? AS "float" | 1 | 1 SELECT $1 + $2 + $3 AS "add" | 3 | 3
SELECT ? AS "int" | 2 | 2 SELECT $1 AS "float" | 1 | 1
SELECT ? AS i UNION SELECT ? ORDER BY i | 1 | 2 SELECT $1 AS "int" | 2 | 2
SELECT ? || ? | 1 | 1 SELECT $1 AS i UNION SELECT $2 ORDER BY i | 1 | 2
SELECT pg_stat_statements_reset() | 1 | 1 SELECT $1 || $2 | 1 | 1
WITH t(f) AS ( +| 1 | 2 SELECT pg_stat_statements_reset() | 1 | 1
VALUES (?), (?) +| | WITH t(f) AS ( +| 1 | 2
) +| | VALUES ($1), ($2) +| |
SELECT f FROM t ORDER BY f | | ) +| |
(9 rows) SELECT f FROM t ORDER BY f | |
select $1::jsonb ? $2 | 1 | 1
(11 rows)
-- --
-- CRUD: INSERT SELECT UPDATE DELETE on test table -- CRUD: INSERT SELECT UPDATE DELETE on test table
@ -108,7 +126,7 @@ SELECT pg_stat_statements_reset();
(1 row) (1 row)
-- utility "create table" should not be shown -- utility "create table" should not be shown
CREATE TABLE test (a int, b char(20)); CREATE TEMP TABLE test (a int, b char(20));
INSERT INTO test VALUES(generate_series(1, 10), 'aaa'); INSERT INTO test VALUES(generate_series(1, 10), 'aaa');
UPDATE test SET b = 'bbb' WHERE a > 7; UPDATE test SET b = 'bbb' WHERE a > 7;
DELETE FROM test WHERE a > 9; DELETE FROM test WHERE a > 9;
@ -125,6 +143,8 @@ BEGIN \;
UPDATE test SET b = '555' WHERE a = 5 \; UPDATE test SET b = '555' WHERE a = 5 \;
UPDATE test SET b = '666' WHERE a = 6 \; UPDATE test SET b = '666' WHERE a = 6 \;
COMMIT ; COMMIT ;
-- many INSERT values
INSERT INTO test (a, b) VALUES (1, 'a'), (2, 'b'), (3, 'c');
-- SELECT with constants -- SELECT with constants
SELECT * FROM test WHERE a > 5 ORDER BY a ; SELECT * FROM test WHERE a > 5 ORDER BY a ;
a | b a | b
@ -147,8 +167,11 @@ SELECT *
SELECT * FROM test ORDER BY a; SELECT * FROM test ORDER BY a;
a | b a | b
---+---------------------- ---+----------------------
1 | a
1 | 111 1 | 111
2 | b
2 | 222 2 | 222
3 | c
3 | 333 3 | 333
4 | 444 4 | 444
5 | 555 5 | 555
@ -156,19 +179,35 @@ SELECT * FROM test ORDER BY a;
7 | aaa 7 | aaa
8 | bbb 8 | bbb
9 | bbb 9 | bbb
(9 rows) (12 rows)
-- SELECT with IN clause
SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5);
a | b
---+----------------------
1 | 111
2 | 222
3 | 333
4 | 444
5 | 555
1 | a
2 | b
3 | c
(8 rows)
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls | rows query | calls | rows
---------------------------------------------------+-------+------ -------------------------------------------------------------+-------+------
DELETE FROM test WHERE a > ? | 1 | 1 DELETE FROM test WHERE a > $1 | 1 | 1
INSERT INTO test VALUES(generate_series(?, ?), ?) | 1 | 10 INSERT INTO test (a, b) VALUES ($1, $2), ($3, $4), ($5, $6) | 1 | 3
SELECT * FROM test ORDER BY a | 1 | 9 INSERT INTO test VALUES(generate_series($1, $2), $3) | 1 | 10
SELECT * FROM test WHERE a > ? ORDER BY a | 2 | 4 SELECT * FROM test ORDER BY a | 1 | 12
SELECT pg_stat_statements_reset() | 1 | 1 SELECT * FROM test WHERE a > $1 ORDER BY a | 2 | 4
UPDATE test SET b = ? WHERE a = ? | 6 | 6 SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5) | 1 | 8
UPDATE test SET b = ? WHERE a > ? | 1 | 3 SELECT pg_stat_statements_reset() | 1 | 1
(7 rows) UPDATE test SET b = $1 WHERE a = $2 | 6 | 6
UPDATE test SET b = $1 WHERE a > $2 | 1 | 3
(9 rows)
-- --
-- pg_stat_statements.track = none -- pg_stat_statements.track = none
@ -251,9 +290,9 @@ SELECT PLUS_ONE(10);
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls | rows query | calls | rows
-----------------------------------+-------+------ -----------------------------------+-------+------
SELECT ?::TEXT | 1 | 1 SELECT $1::TEXT | 1 | 1
SELECT PLUS_ONE(?) | 2 | 2 SELECT PLUS_ONE($1) | 2 | 2
SELECT PLUS_TWO(?) | 2 | 2 SELECT PLUS_TWO($1) | 2 | 2
SELECT pg_stat_statements_reset() | 1 | 1 SELECT pg_stat_statements_reset() | 1 | 1
(4 rows) (4 rows)
@ -308,10 +347,10 @@ SELECT PLUS_ONE(1);
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls | rows query | calls | rows
-----------------------------------+-------+------ -----------------------------------+-------+------
SELECT (i + ? + ?)::INTEGER | 2 | 2 SELECT (i + $2 + $3)::INTEGER | 2 | 2
SELECT (i + ?)::INTEGER LIMIT ? | 2 | 2 SELECT (i + $2)::INTEGER LIMIT $3 | 2 | 2
SELECT PLUS_ONE(?) | 2 | 2 SELECT PLUS_ONE($1) | 2 | 2
SELECT PLUS_TWO(?) | 2 | 2 SELECT PLUS_TWO($1) | 2 | 2
SELECT pg_stat_statements_reset() | 1 | 1 SELECT pg_stat_statements_reset() | 1 | 1
(5 rows) (5 rows)
@ -352,7 +391,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
DROP FUNCTION PLUS_TWO(INTEGER) | 1 | 0 DROP FUNCTION PLUS_TWO(INTEGER) | 1 | 0
DROP TABLE IF EXISTS test | 3 | 0 DROP TABLE IF EXISTS test | 3 | 0
DROP TABLE test | 1 | 0 DROP TABLE test | 1 | 0
SELECT ? | 1 | 1 SELECT $1 | 1 | 1
SELECT pg_stat_statements_reset() | 1 | 1 SELECT pg_stat_statements_reset() | 1 | 1
(8 rows) (8 rows)

View File

@ -27,10 +27,10 @@
* to blame query costs on the proper queryId. * to blame query costs on the proper queryId.
* *
* To facilitate presenting entries to users, we create "representative" query * To facilitate presenting entries to users, we create "representative" query
* strings in which constants are replaced with '?' characters, to make it * strings in which constants are replaced with parameter symbols ($n), to
* clearer what a normalized entry can represent. To save on shared memory, * make it clearer what a normalized entry can represent. To save on shared
* and to avoid having to truncate oversized query strings, we store these * memory, and to avoid having to truncate oversized query strings, we store
* strings in a temporary external query-texts file. Offsets into this * these strings in a temporary external query-texts file. Offsets into this
* file are kept in shared memory. * file are kept in shared memory.
* *
* Note about locking issues: to create or delete an entry in the shared * Note about locking issues: to create or delete an entry in the shared
@ -219,6 +219,9 @@ typedef struct pgssJumbleState
/* Current number of valid entries in clocations array */ /* Current number of valid entries in clocations array */
int clocations_count; int clocations_count;
/* highest Param id we've seen, in order to start normalization correctly */
int highest_extern_param_id;
} pgssJumbleState; } pgssJumbleState;
/*---- Local variables ----*/ /*---- Local variables ----*/
@ -803,6 +806,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query)
jstate.clocations = (pgssLocationLen *) jstate.clocations = (pgssLocationLen *)
palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
jstate.clocations_count = 0; jstate.clocations_count = 0;
jstate.highest_extern_param_id = 0;
/* Compute query ID and mark the Query node with it */ /* Compute query ID and mark the Query node with it */
JumbleQuery(&jstate, query); JumbleQuery(&jstate, query);
@ -2482,6 +2486,10 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
APP_JUMB(p->paramkind); APP_JUMB(p->paramkind);
APP_JUMB(p->paramid); APP_JUMB(p->paramid);
APP_JUMB(p->paramtype); APP_JUMB(p->paramtype);
/* Also, track the highest external Param id */
if (p->paramkind == PARAM_EXTERN &&
p->paramid > jstate->highest_extern_param_id)
jstate->highest_extern_param_id = p->paramid;
} }
break; break;
case T_Aggref: case T_Aggref:
@ -2874,7 +2882,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
break; break;
case T_TableFunc: case T_TableFunc:
{ {
TableFunc *tablefunc = (TableFunc *) node; TableFunc *tablefunc = (TableFunc *) node;
JumbleExpr(jstate, tablefunc->docexpr); JumbleExpr(jstate, tablefunc->docexpr);
JumbleExpr(jstate, tablefunc->rowexpr); JumbleExpr(jstate, tablefunc->rowexpr);
@ -2938,7 +2946,8 @@ RecordConstLocation(pgssJumbleState *jstate, int location)
* of interest, so it's worth doing.) * of interest, so it's worth doing.)
* *
* *query_len_p contains the input string length, and is updated with * *query_len_p contains the input string length, and is updated with
* the result string length (which cannot be longer) on exit. * the result string length on exit. The resulting string might be longer
* or shorter depending on what happens with replacement of constants.
* *
* Returns a palloc'd string. * Returns a palloc'd string.
*/ */
@ -2949,6 +2958,7 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
char *norm_query; char *norm_query;
int query_len = *query_len_p; int query_len = *query_len_p;
int i, int i,
norm_query_buflen, /* Space allowed for norm_query */
len_to_wrt, /* Length (in bytes) to write */ len_to_wrt, /* Length (in bytes) to write */
quer_loc = 0, /* Source query byte location */ quer_loc = 0, /* Source query byte location */
n_quer_loc = 0, /* Normalized query byte location */ n_quer_loc = 0, /* Normalized query byte location */
@ -2961,8 +2971,17 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
*/ */
fill_in_constant_lengths(jstate, query, query_loc); fill_in_constant_lengths(jstate, query, query_loc);
/*
* Allow for $n symbols to be longer than the constants they replace.
* Constants must take at least one byte in text form, while a $n symbol
* certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
* could refine that limit based on the max value of n for the current
* query, but it hardly seems worth any extra effort to do so.
*/
norm_query_buflen = query_len + jstate->clocations_count * 10;
/* Allocate result buffer */ /* Allocate result buffer */
norm_query = palloc(query_len + 1); norm_query = palloc(norm_query_buflen + 1);
for (i = 0; i < jstate->clocations_count; i++) for (i = 0; i < jstate->clocations_count; i++)
{ {
@ -2986,8 +3005,9 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt; n_quer_loc += len_to_wrt;
/* And insert a '?' in place of the constant token */ /* And insert a param symbol in place of the constant token */
norm_query[n_quer_loc++] = '?'; n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
i + 1 + jstate->highest_extern_param_id);
quer_loc = off + tok_len; quer_loc = off + tok_len;
last_off = off; last_off = off;
@ -3004,7 +3024,7 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt; n_quer_loc += len_to_wrt;
Assert(n_quer_loc <= query_len); Assert(n_quer_loc <= norm_query_buflen);
norm_query[n_quer_loc] = '\0'; norm_query[n_quer_loc] = '\0';
*query_len_p = n_quer_loc; *query_len_p = n_quer_loc;

View File

@ -37,12 +37,20 @@ SELECT :add + 1 + 1 AS "add" \gset
-- set operator -- set operator
SELECT 1 AS i UNION SELECT 2 ORDER BY i; SELECT 1 AS i UNION SELECT 2 ORDER BY i;
-- ? operator
select '{"a":1, "b":2}'::jsonb ? 'b';
-- cte -- cte
WITH t(f) AS ( WITH t(f) AS (
VALUES (1.0), (2.0) VALUES (1.0), (2.0)
) )
SELECT f FROM t ORDER BY f; SELECT f FROM t ORDER BY f;
-- prepared statement with parameter
PREPARE pgss_test (int) AS SELECT $1, 'test' LIMIT 1;
EXECUTE pgss_test(1);
DEALLOCATE pgss_test;
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
-- --
@ -51,7 +59,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
SELECT pg_stat_statements_reset(); SELECT pg_stat_statements_reset();
-- utility "create table" should not be shown -- utility "create table" should not be shown
CREATE TABLE test (a int, b char(20)); CREATE TEMP TABLE test (a int, b char(20));
INSERT INTO test VALUES(generate_series(1, 10), 'aaa'); INSERT INTO test VALUES(generate_series(1, 10), 'aaa');
UPDATE test SET b = 'bbb' WHERE a > 7; UPDATE test SET b = 'bbb' WHERE a > 7;
@ -74,6 +82,9 @@ UPDATE test SET b = '555' WHERE a = 5 \;
UPDATE test SET b = '666' WHERE a = 6 \; UPDATE test SET b = '666' WHERE a = 6 \;
COMMIT ; COMMIT ;
-- many INSERT values
INSERT INTO test (a, b) VALUES (1, 'a'), (2, 'b'), (3, 'c');
-- SELECT with constants -- SELECT with constants
SELECT * FROM test WHERE a > 5 ORDER BY a ; SELECT * FROM test WHERE a > 5 ORDER BY a ;
@ -85,6 +96,9 @@ SELECT *
-- SELECT without constants -- SELECT without constants
SELECT * FROM test ORDER BY a; SELECT * FROM test ORDER BY a;
-- SELECT with IN clause
SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5);
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
-- --

View File

@ -244,11 +244,12 @@
</para> </para>
<para> <para>
When a constant's value has been ignored for purposes of matching the When a constant's value has been ignored for purposes of matching the query
query to other queries, the constant is replaced by <literal>?</literal> to other queries, the constant is replaced by a parameter symbol, such
in the <structname>pg_stat_statements</> display. The rest of the query as <literal>$1</literal>, in the <structname>pg_stat_statements</>
text is that of the first query that had the particular display.
<structfield>queryid</> hash value associated with the The rest of the query text is that of the first query that had the
particular <structfield>queryid</> hash value associated with the
<structname>pg_stat_statements</> entry. <structname>pg_stat_statements</> entry.
</para> </para>
@ -301,6 +302,18 @@
replicas. If in doubt, direct testing is recommended. replicas. If in doubt, direct testing is recommended.
</para> </para>
<para>
The parameter symbols used to replace constants in
representative query texts start from the next number after the
highest <literal>$</><replaceable>n</> parameter in the original query
text, or <literal>$1</> if there was none. It's worth noting that in
some cases there may be hidden parameter symbols that affect this
numbering. For example, <application>PL/pgSQL</> uses hidden parameter
symbols to insert values of function local variables into queries, so that
a <application>PL/pgSQL</> statement like <literal>SELECT i + 1 INTO j</>
would have representative text like <literal>SELECT i + $2</>.
</para>
<para> <para>
The representative query texts are kept in an external disk file, and do The representative query texts are kept in an external disk file, and do
not consume shared memory. Therefore, even very lengthy query texts can not consume shared memory. Therefore, even very lengthy query texts can
@ -481,13 +494,13 @@ bench=# SELECT query, calls, total_time, rows, 100.0 * shared_blks_hit /
nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
FROM pg_stat_statements ORDER BY total_time DESC LIMIT 5; FROM pg_stat_statements ORDER BY total_time DESC LIMIT 5;
-[ RECORD 1 ]--------------------------------------------------------------------- -[ RECORD 1 ]---------------------------------------------------------------------
query | UPDATE pgbench_branches SET bbalance = bbalance + ? WHERE bid = ?; query | UPDATE pgbench_branches SET bbalance = bbalance + $1 WHERE bid = $2;
calls | 3000 calls | 3000
total_time | 9609.00100000002 total_time | 9609.00100000002
rows | 2836 rows | 2836
hit_percent | 99.9778970000200936 hit_percent | 99.9778970000200936
-[ RECORD 2 ]--------------------------------------------------------------------- -[ RECORD 2 ]---------------------------------------------------------------------
query | UPDATE pgbench_tellers SET tbalance = tbalance + ? WHERE tid = ?; query | UPDATE pgbench_tellers SET tbalance = tbalance + $1 WHERE tid = $2;
calls | 3000 calls | 3000
total_time | 8015.156 total_time | 8015.156
rows | 2990 rows | 2990
@ -499,7 +512,7 @@ total_time | 310.624
rows | 100000 rows | 100000
hit_percent | 0.30395136778115501520 hit_percent | 0.30395136778115501520
-[ RECORD 4 ]--------------------------------------------------------------------- -[ RECORD 4 ]---------------------------------------------------------------------
query | UPDATE pgbench_accounts SET abalance = abalance + ? WHERE aid = ?; query | UPDATE pgbench_accounts SET abalance = abalance + $1 WHERE aid = $2;
calls | 3000 calls | 3000
total_time | 271.741999999997 total_time | 271.741999999997
rows | 3000 rows | 3000