Show ignored constants as "$N" rather than "?" in pg_stat_statements.

The trouble with the original choice here is that "?" is a valid (and
indeed used) operator name, so that you could end up with ambiguous
statement texts like "SELECT ? ? ?".  With this patch, you instead
see "SELECT $1 ? $2", which seems significantly more readable.
The numbers used for this purpose begin after the last actual $N parameter
in the particular query.  The conflict with external parameters has its own
potential for confusion of course, but it was agreed to be an improvement
over the previous behavior.

Lukas Fittl

Discussion: https://postgr.es/m/CAP53PkxeaCuwYmF-A4J5z2-qk5fYFo5_NH3gpXGJJBxv1DMwEw@mail.gmail.com
This commit is contained in:
Tom Lane 2017-03-27 20:14:36 -04:00
parent 6462238f0d
commit a6f22e8356
4 changed files with 142 additions and 56 deletions

View File

@ -68,6 +68,13 @@ SELECT 1 AS i UNION SELECT 2 ORDER BY i;
2
(2 rows)
-- ? operator
select '{"a":1, "b":2}'::jsonb ? 'b';
?column?
----------
t
(1 row)
-- cte
WITH t(f) AS (
VALUES (1.0), (2.0)
@ -79,24 +86,35 @@ WITH t(f) AS (
2.0
(2 rows)
-- prepared statement with parameter
PREPARE pgss_test (int) AS SELECT $1, 'test' LIMIT 1;
EXECUTE pgss_test(1);
?column? | ?column?
----------+----------
1 | test
(1 row)
DEALLOCATE pgss_test;
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls | rows
-----------------------------------------+-------+------
SELECT ? +| 4 | 4
+| |
AS "text" | |
SELECT ? + ? | 2 | 2
SELECT ? + ? + ? AS "add" | 3 | 3
SELECT ? AS "float" | 1 | 1
SELECT ? AS "int" | 2 | 2
SELECT ? AS i UNION SELECT ? ORDER BY i | 1 | 2
SELECT ? || ? | 1 | 1
SELECT pg_stat_statements_reset() | 1 | 1
WITH t(f) AS ( +| 1 | 2
VALUES (?), (?) +| |
) +| |
SELECT f FROM t ORDER BY f | |
(9 rows)
query | calls | rows
---------------------------------------------------+-------+------
PREPARE pgss_test (int) AS SELECT $1, $2 LIMIT $3 | 1 | 1
SELECT $1 +| 4 | 4
+| |
AS "text" | |
SELECT $1 + $2 | 2 | 2
SELECT $1 + $2 + $3 AS "add" | 3 | 3
SELECT $1 AS "float" | 1 | 1
SELECT $1 AS "int" | 2 | 2
SELECT $1 AS i UNION SELECT $2 ORDER BY i | 1 | 2
SELECT $1 || $2 | 1 | 1
SELECT pg_stat_statements_reset() | 1 | 1
WITH t(f) AS ( +| 1 | 2
VALUES ($1), ($2) +| |
) +| |
SELECT f FROM t ORDER BY f | |
select $1::jsonb ? $2 | 1 | 1
(11 rows)
--
-- CRUD: INSERT SELECT UPDATE DELETE on test table
@ -108,7 +126,7 @@ SELECT pg_stat_statements_reset();
(1 row)
-- utility "create table" should not be shown
CREATE TABLE test (a int, b char(20));
CREATE TEMP TABLE test (a int, b char(20));
INSERT INTO test VALUES(generate_series(1, 10), 'aaa');
UPDATE test SET b = 'bbb' WHERE a > 7;
DELETE FROM test WHERE a > 9;
@ -125,6 +143,8 @@ BEGIN \;
UPDATE test SET b = '555' WHERE a = 5 \;
UPDATE test SET b = '666' WHERE a = 6 \;
COMMIT ;
-- many INSERT values
INSERT INTO test (a, b) VALUES (1, 'a'), (2, 'b'), (3, 'c');
-- SELECT with constants
SELECT * FROM test WHERE a > 5 ORDER BY a ;
a | b
@ -147,8 +167,11 @@ SELECT *
SELECT * FROM test ORDER BY a;
a | b
---+----------------------
1 | a
1 | 111
2 | b
2 | 222
3 | c
3 | 333
4 | 444
5 | 555
@ -156,19 +179,35 @@ SELECT * FROM test ORDER BY a;
7 | aaa
8 | bbb
9 | bbb
(9 rows)
(12 rows)
-- SELECT with IN clause
SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5);
a | b
---+----------------------
1 | 111
2 | 222
3 | 333
4 | 444
5 | 555
1 | a
2 | b
3 | c
(8 rows)
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls | rows
---------------------------------------------------+-------+------
DELETE FROM test WHERE a > ? | 1 | 1
INSERT INTO test VALUES(generate_series(?, ?), ?) | 1 | 10
SELECT * FROM test ORDER BY a | 1 | 9
SELECT * FROM test WHERE a > ? ORDER BY a | 2 | 4
SELECT pg_stat_statements_reset() | 1 | 1
UPDATE test SET b = ? WHERE a = ? | 6 | 6
UPDATE test SET b = ? WHERE a > ? | 1 | 3
(7 rows)
query | calls | rows
-------------------------------------------------------------+-------+------
DELETE FROM test WHERE a > $1 | 1 | 1
INSERT INTO test (a, b) VALUES ($1, $2), ($3, $4), ($5, $6) | 1 | 3
INSERT INTO test VALUES(generate_series($1, $2), $3) | 1 | 10
SELECT * FROM test ORDER BY a | 1 | 12
SELECT * FROM test WHERE a > $1 ORDER BY a | 2 | 4
SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5) | 1 | 8
SELECT pg_stat_statements_reset() | 1 | 1
UPDATE test SET b = $1 WHERE a = $2 | 6 | 6
UPDATE test SET b = $1 WHERE a > $2 | 1 | 3
(9 rows)
--
-- pg_stat_statements.track = none
@ -251,9 +290,9 @@ SELECT PLUS_ONE(10);
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls | rows
-----------------------------------+-------+------
SELECT ?::TEXT | 1 | 1
SELECT PLUS_ONE(?) | 2 | 2
SELECT PLUS_TWO(?) | 2 | 2
SELECT $1::TEXT | 1 | 1
SELECT PLUS_ONE($1) | 2 | 2
SELECT PLUS_TWO($1) | 2 | 2
SELECT pg_stat_statements_reset() | 1 | 1
(4 rows)
@ -308,10 +347,10 @@ SELECT PLUS_ONE(1);
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls | rows
-----------------------------------+-------+------
SELECT (i + ? + ?)::INTEGER | 2 | 2
SELECT (i + ?)::INTEGER LIMIT ? | 2 | 2
SELECT PLUS_ONE(?) | 2 | 2
SELECT PLUS_TWO(?) | 2 | 2
SELECT (i + $2 + $3)::INTEGER | 2 | 2
SELECT (i + $2)::INTEGER LIMIT $3 | 2 | 2
SELECT PLUS_ONE($1) | 2 | 2
SELECT PLUS_TWO($1) | 2 | 2
SELECT pg_stat_statements_reset() | 1 | 1
(5 rows)
@ -352,7 +391,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
DROP FUNCTION PLUS_TWO(INTEGER) | 1 | 0
DROP TABLE IF EXISTS test | 3 | 0
DROP TABLE test | 1 | 0
SELECT ? | 1 | 1
SELECT $1 | 1 | 1
SELECT pg_stat_statements_reset() | 1 | 1
(8 rows)

View File

@ -27,10 +27,10 @@
* to blame query costs on the proper queryId.
*
* To facilitate presenting entries to users, we create "representative" query
* strings in which constants are replaced with '?' characters, to make it
* clearer what a normalized entry can represent. To save on shared memory,
* and to avoid having to truncate oversized query strings, we store these
* strings in a temporary external query-texts file. Offsets into this
* strings in which constants are replaced with parameter symbols ($n), to
* make it clearer what a normalized entry can represent. To save on shared
* memory, and to avoid having to truncate oversized query strings, we store
* these strings in a temporary external query-texts file. Offsets into this
* file are kept in shared memory.
*
* Note about locking issues: to create or delete an entry in the shared
@ -219,6 +219,9 @@ typedef struct pgssJumbleState
/* Current number of valid entries in clocations array */
int clocations_count;
/* highest Param id we've seen, in order to start normalization correctly */
int highest_extern_param_id;
} pgssJumbleState;
/*---- Local variables ----*/
@ -803,6 +806,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query)
jstate.clocations = (pgssLocationLen *)
palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
jstate.clocations_count = 0;
jstate.highest_extern_param_id = 0;
/* Compute query ID and mark the Query node with it */
JumbleQuery(&jstate, query);
@ -2482,6 +2486,10 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
APP_JUMB(p->paramkind);
APP_JUMB(p->paramid);
APP_JUMB(p->paramtype);
/* Also, track the highest external Param id */
if (p->paramkind == PARAM_EXTERN &&
p->paramid > jstate->highest_extern_param_id)
jstate->highest_extern_param_id = p->paramid;
}
break;
case T_Aggref:
@ -2874,7 +2882,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
break;
case T_TableFunc:
{
TableFunc *tablefunc = (TableFunc *) node;
TableFunc *tablefunc = (TableFunc *) node;
JumbleExpr(jstate, tablefunc->docexpr);
JumbleExpr(jstate, tablefunc->rowexpr);
@ -2938,7 +2946,8 @@ RecordConstLocation(pgssJumbleState *jstate, int location)
* of interest, so it's worth doing.)
*
* *query_len_p contains the input string length, and is updated with
* the result string length (which cannot be longer) on exit.
* the result string length on exit. The resulting string might be longer
* or shorter depending on what happens with replacement of constants.
*
* Returns a palloc'd string.
*/
@ -2949,6 +2958,7 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
char *norm_query;
int query_len = *query_len_p;
int i,
norm_query_buflen, /* Space allowed for norm_query */
len_to_wrt, /* Length (in bytes) to write */
quer_loc = 0, /* Source query byte location */
n_quer_loc = 0, /* Normalized query byte location */
@ -2961,8 +2971,17 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
*/
fill_in_constant_lengths(jstate, query, query_loc);
/*
* Allow for $n symbols to be longer than the constants they replace.
* Constants must take at least one byte in text form, while a $n symbol
* certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
* could refine that limit based on the max value of n for the current
* query, but it hardly seems worth any extra effort to do so.
*/
norm_query_buflen = query_len + jstate->clocations_count * 10;
/* Allocate result buffer */
norm_query = palloc(query_len + 1);
norm_query = palloc(norm_query_buflen + 1);
for (i = 0; i < jstate->clocations_count; i++)
{
@ -2986,8 +3005,9 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
/* And insert a '?' in place of the constant token */
norm_query[n_quer_loc++] = '?';
/* And insert a param symbol in place of the constant token */
n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
i + 1 + jstate->highest_extern_param_id);
quer_loc = off + tok_len;
last_off = off;
@ -3004,7 +3024,7 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
Assert(n_quer_loc <= query_len);
Assert(n_quer_loc <= norm_query_buflen);
norm_query[n_quer_loc] = '\0';
*query_len_p = n_quer_loc;

View File

@ -37,12 +37,20 @@ SELECT :add + 1 + 1 AS "add" \gset
-- set operator
SELECT 1 AS i UNION SELECT 2 ORDER BY i;
-- ? operator
select '{"a":1, "b":2}'::jsonb ? 'b';
-- cte
WITH t(f) AS (
VALUES (1.0), (2.0)
)
SELECT f FROM t ORDER BY f;
-- prepared statement with parameter
PREPARE pgss_test (int) AS SELECT $1, 'test' LIMIT 1;
EXECUTE pgss_test(1);
DEALLOCATE pgss_test;
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
--
@ -51,7 +59,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
SELECT pg_stat_statements_reset();
-- utility "create table" should not be shown
CREATE TABLE test (a int, b char(20));
CREATE TEMP TABLE test (a int, b char(20));
INSERT INTO test VALUES(generate_series(1, 10), 'aaa');
UPDATE test SET b = 'bbb' WHERE a > 7;
@ -74,6 +82,9 @@ UPDATE test SET b = '555' WHERE a = 5 \;
UPDATE test SET b = '666' WHERE a = 6 \;
COMMIT ;
-- many INSERT values
INSERT INTO test (a, b) VALUES (1, 'a'), (2, 'b'), (3, 'c');
-- SELECT with constants
SELECT * FROM test WHERE a > 5 ORDER BY a ;
@ -85,6 +96,9 @@ SELECT *
-- SELECT without constants
SELECT * FROM test ORDER BY a;
-- SELECT with IN clause
SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5);
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
--

View File

@ -244,11 +244,12 @@
</para>
<para>
When a constant's value has been ignored for purposes of matching the
query to other queries, the constant is replaced by <literal>?</literal>
in the <structname>pg_stat_statements</> display. The rest of the query
text is that of the first query that had the particular
<structfield>queryid</> hash value associated with the
When a constant's value has been ignored for purposes of matching the query
to other queries, the constant is replaced by a parameter symbol, such
as <literal>$1</literal>, in the <structname>pg_stat_statements</>
display.
The rest of the query text is that of the first query that had the
particular <structfield>queryid</> hash value associated with the
<structname>pg_stat_statements</> entry.
</para>
@ -301,6 +302,18 @@
replicas. If in doubt, direct testing is recommended.
</para>
<para>
The parameter symbols used to replace constants in
representative query texts start from the next number after the
highest <literal>$</><replaceable>n</> parameter in the original query
text, or <literal>$1</> if there was none. It's worth noting that in
some cases there may be hidden parameter symbols that affect this
numbering. For example, <application>PL/pgSQL</> uses hidden parameter
symbols to insert values of function local variables into queries, so that
a <application>PL/pgSQL</> statement like <literal>SELECT i + 1 INTO j</>
would have representative text like <literal>SELECT i + $2</>.
</para>
<para>
The representative query texts are kept in an external disk file, and do
not consume shared memory. Therefore, even very lengthy query texts can
@ -481,13 +494,13 @@ bench=# SELECT query, calls, total_time, rows, 100.0 * shared_blks_hit /
nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
FROM pg_stat_statements ORDER BY total_time DESC LIMIT 5;
-[ RECORD 1 ]---------------------------------------------------------------------
query | UPDATE pgbench_branches SET bbalance = bbalance + ? WHERE bid = ?;
query | UPDATE pgbench_branches SET bbalance = bbalance + $1 WHERE bid = $2;
calls | 3000
total_time | 9609.00100000002
rows | 2836
hit_percent | 99.9778970000200936
-[ RECORD 2 ]---------------------------------------------------------------------
query | UPDATE pgbench_tellers SET tbalance = tbalance + ? WHERE tid = ?;
query | UPDATE pgbench_tellers SET tbalance = tbalance + $1 WHERE tid = $2;
calls | 3000
total_time | 8015.156
rows | 2990
@ -499,7 +512,7 @@ total_time | 310.624
rows | 100000
hit_percent | 0.30395136778115501520
-[ RECORD 4 ]---------------------------------------------------------------------
query | UPDATE pgbench_accounts SET abalance = abalance + ? WHERE aid = ?;
query | UPDATE pgbench_accounts SET abalance = abalance + $1 WHERE aid = $2;
calls | 3000
total_time | 271.741999999997
rows | 3000