postgresql/contrib/postgres_fdw/expected/postgres_fdw.out

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

11917 lines
562 KiB
Plaintext
Raw Normal View History

-- ===================================================================
-- create FDW objects
-- ===================================================================
CREATE EXTENSION postgres_fdw;
CREATE SERVER testserver1 FOREIGN DATA WRAPPER postgres_fdw;
DO $d$
BEGIN
EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw
OPTIONS (dbname '$$||current_database()||$$',
port '$$||current_setting('port')||$$'
)$$;
EXECUTE $$CREATE SERVER loopback2 FOREIGN DATA WRAPPER postgres_fdw
OPTIONS (dbname '$$||current_database()||$$',
port '$$||current_setting('port')||$$'
)$$;
EXECUTE $$CREATE SERVER loopback3 FOREIGN DATA WRAPPER postgres_fdw
OPTIONS (dbname '$$||current_database()||$$',
port '$$||current_setting('port')||$$'
)$$;
END;
$d$;
CREATE USER MAPPING FOR public SERVER testserver1
OPTIONS (user 'value', password 'value');
CREATE USER MAPPING FOR CURRENT_USER SERVER loopback;
CREATE USER MAPPING FOR CURRENT_USER SERVER loopback2;
CREATE USER MAPPING FOR public SERVER loopback3;
-- ===================================================================
-- create objects used through FDW loopback server
-- ===================================================================
CREATE TYPE user_enum AS ENUM ('foo', 'bar', 'buz');
CREATE SCHEMA "S 1";
CREATE TABLE "S 1"."T 1" (
"C 1" int NOT NULL,
c2 int NOT NULL,
c3 text,
c4 timestamptz,
c5 timestamp,
c6 varchar(10),
c7 char(10),
c8 user_enum,
CONSTRAINT t1_pkey PRIMARY KEY ("C 1")
);
CREATE TABLE "S 1"."T 2" (
c1 int NOT NULL,
c2 text,
CONSTRAINT t2_pkey PRIMARY KEY (c1)
);
CREATE TABLE "S 1"."T 3" (
c1 int NOT NULL,
c2 int NOT NULL,
c3 text,
CONSTRAINT t3_pkey PRIMARY KEY (c1)
);
CREATE TABLE "S 1"."T 4" (
c1 int NOT NULL,
c2 int NOT NULL,
c3 text,
CONSTRAINT t4_pkey PRIMARY KEY (c1)
);
-- Disable autovacuum for these tables to avoid unexpected effects of that
ALTER TABLE "S 1"."T 1" SET (autovacuum_enabled = 'false');
ALTER TABLE "S 1"."T 2" SET (autovacuum_enabled = 'false');
ALTER TABLE "S 1"."T 3" SET (autovacuum_enabled = 'false');
ALTER TABLE "S 1"."T 4" SET (autovacuum_enabled = 'false');
INSERT INTO "S 1"."T 1"
SELECT id,
id % 10,
to_char(id, 'FM00000'),
'1970-01-01'::timestamptz + ((id % 100) || ' days')::interval,
'1970-01-01'::timestamp + ((id % 100) || ' days')::interval,
id % 10,
id % 10,
'foo'::user_enum
FROM generate_series(1, 1000) id;
INSERT INTO "S 1"."T 2"
SELECT id,
'AAA' || to_char(id, 'FM000')
FROM generate_series(1, 100) id;
INSERT INTO "S 1"."T 3"
SELECT id,
id + 1,
'AAA' || to_char(id, 'FM000')
FROM generate_series(1, 100) id;
DELETE FROM "S 1"."T 3" WHERE c1 % 2 != 0; -- delete for outer join tests
INSERT INTO "S 1"."T 4"
SELECT id,
id + 1,
'AAA' || to_char(id, 'FM000')
FROM generate_series(1, 100) id;
DELETE FROM "S 1"."T 4" WHERE c1 % 3 != 0; -- delete for outer join tests
ANALYZE "S 1"."T 1";
ANALYZE "S 1"."T 2";
ANALYZE "S 1"."T 3";
ANALYZE "S 1"."T 4";
-- ===================================================================
-- create foreign tables
-- ===================================================================
CREATE FOREIGN TABLE ft1 (
c0 int,
c1 int NOT NULL,
c2 int NOT NULL,
c3 text,
c4 timestamptz,
c5 timestamp,
c6 varchar(10),
c7 char(10) default 'ft1',
c8 user_enum
) SERVER loopback;
ALTER FOREIGN TABLE ft1 DROP COLUMN c0;
CREATE FOREIGN TABLE ft2 (
c1 int NOT NULL,
c2 int NOT NULL,
cx int,
c3 text,
c4 timestamptz,
c5 timestamp,
c6 varchar(10),
c7 char(10) default 'ft2',
c8 user_enum
) SERVER loopback;
ALTER FOREIGN TABLE ft2 DROP COLUMN cx;
CREATE FOREIGN TABLE ft4 (
c1 int NOT NULL,
c2 int NOT NULL,
c3 text
) SERVER loopback OPTIONS (schema_name 'S 1', table_name 'T 3');
CREATE FOREIGN TABLE ft5 (
c1 int NOT NULL,
c2 int NOT NULL,
c3 text
) SERVER loopback OPTIONS (schema_name 'S 1', table_name 'T 4');
CREATE FOREIGN TABLE ft6 (
c1 int NOT NULL,
c2 int NOT NULL,
c3 text
) SERVER loopback2 OPTIONS (schema_name 'S 1', table_name 'T 4');
CREATE FOREIGN TABLE ft7 (
c1 int NOT NULL,
c2 int NOT NULL,
c3 text
) SERVER loopback3 OPTIONS (schema_name 'S 1', table_name 'T 4');
-- ===================================================================
-- tests for validator
-- ===================================================================
-- requiressl and some other parameters are omitted because
-- valid values for them depend on configure options
ALTER SERVER testserver1 OPTIONS (
use_remote_estimate 'false',
updatable 'true',
fdw_startup_cost '123.456',
fdw_tuple_cost '0.123',
service 'value',
connect_timeout 'value',
dbname 'value',
host 'value',
hostaddr 'value',
port 'value',
--client_encoding 'value',
application_name 'value',
--fallback_application_name 'value',
keepalives 'value',
keepalives_idle 'value',
keepalives_interval 'value',
tcp_user_timeout 'value',
-- requiressl 'value',
sslcompression 'value',
sslmode 'value',
sslcert 'value',
sslkey 'value',
sslrootcert 'value',
sslcrl 'value',
--requirepeer 'value',
krbsrvname 'value',
gsslib 'value',
gssdelegation 'value'
--replication 'value'
);
-- Error, invalid list syntax
ALTER SERVER testserver1 OPTIONS (ADD extensions 'foo; bar');
ERROR: parameter "extensions" must be a list of extension names
-- OK but gets a warning
ALTER SERVER testserver1 OPTIONS (ADD extensions 'foo, bar');
WARNING: extension "foo" is not installed
WARNING: extension "bar" is not installed
ALTER SERVER testserver1 OPTIONS (DROP extensions);
ALTER USER MAPPING FOR public SERVER testserver1
OPTIONS (DROP user, DROP password);
-- Attempt to add a valid option that's not allowed in a user mapping
ALTER USER MAPPING FOR public SERVER testserver1
OPTIONS (ADD sslmode 'require');
ERROR: invalid option "sslmode"
-- But we can add valid ones fine
ALTER USER MAPPING FOR public SERVER testserver1
OPTIONS (ADD sslpassword 'dummy');
-- Ensure valid options we haven't used in a user mapping yet are
-- permitted to check validation.
ALTER USER MAPPING FOR public SERVER testserver1
OPTIONS (ADD sslkey 'value', ADD sslcert 'value');
ALTER FOREIGN TABLE ft1 OPTIONS (schema_name 'S 1', table_name 'T 1');
ALTER FOREIGN TABLE ft2 OPTIONS (schema_name 'S 1', table_name 'T 1');
ALTER FOREIGN TABLE ft1 ALTER COLUMN c1 OPTIONS (column_name 'C 1');
ALTER FOREIGN TABLE ft2 ALTER COLUMN c1 OPTIONS (column_name 'C 1');
\det+
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
List of foreign tables
Schema | Table | Server | FDW options | Description
--------+-------+-----------+---------------------------------------+-------------
public | ft1 | loopback | (schema_name 'S 1', table_name 'T 1') |
public | ft2 | loopback | (schema_name 'S 1', table_name 'T 1') |
public | ft4 | loopback | (schema_name 'S 1', table_name 'T 3') |
public | ft5 | loopback | (schema_name 'S 1', table_name 'T 4') |
public | ft6 | loopback2 | (schema_name 'S 1', table_name 'T 4') |
public | ft7 | loopback3 | (schema_name 'S 1', table_name 'T 4') |
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
(6 rows)
-- Test that alteration of server options causes reconnection
-- Remote's errors might be non-English, so hide them to ensure stable results
\set VERBOSITY terse
SELECT c3, c4 FROM ft1 ORDER BY c3, c1 LIMIT 1; -- should work
c3 | c4
-------+------------------------------
00001 | Fri Jan 02 00:00:00 1970 PST
(1 row)
ALTER SERVER loopback OPTIONS (SET dbname 'no such database');
SELECT c3, c4 FROM ft1 ORDER BY c3, c1 LIMIT 1; -- should fail
ERROR: could not connect to server "loopback"
DO $d$
BEGIN
EXECUTE $$ALTER SERVER loopback
OPTIONS (SET dbname '$$||current_database()||$$')$$;
END;
$d$;
SELECT c3, c4 FROM ft1 ORDER BY c3, c1 LIMIT 1; -- should work again
c3 | c4
-------+------------------------------
00001 | Fri Jan 02 00:00:00 1970 PST
(1 row)
-- Test that alteration of user mapping options causes reconnection
ALTER USER MAPPING FOR CURRENT_USER SERVER loopback
OPTIONS (ADD user 'no such user');
SELECT c3, c4 FROM ft1 ORDER BY c3, c1 LIMIT 1; -- should fail
ERROR: could not connect to server "loopback"
ALTER USER MAPPING FOR CURRENT_USER SERVER loopback
OPTIONS (DROP user);
SELECT c3, c4 FROM ft1 ORDER BY c3, c1 LIMIT 1; -- should work again
c3 | c4
-------+------------------------------
00001 | Fri Jan 02 00:00:00 1970 PST
(1 row)
\set VERBOSITY default
-- Now we should be able to run ANALYZE.
-- To exercise multiple code paths, we use local stats on ft1
-- and remote-estimate mode on ft2.
ANALYZE ft1;
ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true');
-- ===================================================================
-- test error case for create publication on foreign table
-- ===================================================================
CREATE PUBLICATION testpub_ftbl FOR TABLE ft1; -- should fail
ERROR: cannot add relation "ft1" to publication
DETAIL: This operation is not supported for foreign tables.
-- ===================================================================
-- simple queries
-- ===================================================================
-- single table without alias
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (COSTS OFF) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
QUERY PLAN
---------------------
Foreign Scan on ft1
(1 row)
SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
-----+----+-------+------------------------------+--------------------------+----+------------+-----
101 | 1 | 00101 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
102 | 2 | 00102 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2 | 2 | foo
103 | 3 | 00103 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3 | 3 | foo
104 | 4 | 00104 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4 | 4 | foo
105 | 5 | 00105 | Tue Jan 06 00:00:00 1970 PST | Tue Jan 06 00:00:00 1970 | 5 | 5 | foo
106 | 6 | 00106 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
107 | 7 | 00107 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
108 | 8 | 00108 | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
109 | 9 | 00109 | Sat Jan 10 00:00:00 1970 PST | Sat Jan 10 00:00:00 1970 | 9 | 9 | foo
110 | 0 | 00110 | Sun Jan 11 00:00:00 1970 PST | Sun Jan 11 00:00:00 1970 | 0 | 0 | foo
(10 rows)
-- single table with alias - also test that tableoid sort is not pushed to remote side
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
QUERY PLAN
-------------------------------------------------------------------------------------
Limit
Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
-> Sort
Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
Sort Key: t1.c3, t1.c1, t1.tableoid
-> Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(8 rows)
SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
-----+----+-------+------------------------------+--------------------------+----+------------+-----
101 | 1 | 00101 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
102 | 2 | 00102 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2 | 2 | foo
103 | 3 | 00103 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3 | 3 | foo
104 | 4 | 00104 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4 | 4 | foo
105 | 5 | 00105 | Tue Jan 06 00:00:00 1970 PST | Tue Jan 06 00:00:00 1970 | 5 | 5 | foo
106 | 6 | 00106 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
107 | 7 | 00107 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
108 | 8 | 00108 | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
109 | 9 | 00109 | Sat Jan 10 00:00:00 1970 PST | Sat Jan 10 00:00:00 1970 | 9 | 9 | foo
110 | 0 | 00110 | Sun Jan 11 00:00:00 1970 PST | Sun Jan 11 00:00:00 1970 | 0 | 0 | foo
(10 rows)
-- whole-row reference
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: t1.*, c3, c1
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY c3 ASC NULLS LAST, "C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
(3 rows)
SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
t1
--------------------------------------------------------------------------------------------
(101,1,00101,"Fri Jan 02 00:00:00 1970 PST","Fri Jan 02 00:00:00 1970",1,"1 ",foo)
(102,2,00102,"Sat Jan 03 00:00:00 1970 PST","Sat Jan 03 00:00:00 1970",2,"2 ",foo)
(103,3,00103,"Sun Jan 04 00:00:00 1970 PST","Sun Jan 04 00:00:00 1970",3,"3 ",foo)
(104,4,00104,"Mon Jan 05 00:00:00 1970 PST","Mon Jan 05 00:00:00 1970",4,"4 ",foo)
(105,5,00105,"Tue Jan 06 00:00:00 1970 PST","Tue Jan 06 00:00:00 1970",5,"5 ",foo)
(106,6,00106,"Wed Jan 07 00:00:00 1970 PST","Wed Jan 07 00:00:00 1970",6,"6 ",foo)
(107,7,00107,"Thu Jan 08 00:00:00 1970 PST","Thu Jan 08 00:00:00 1970",7,"7 ",foo)
(108,8,00108,"Fri Jan 09 00:00:00 1970 PST","Fri Jan 09 00:00:00 1970",8,"8 ",foo)
(109,9,00109,"Sat Jan 10 00:00:00 1970 PST","Sat Jan 10 00:00:00 1970",9,"9 ",foo)
(110,0,00110,"Sun Jan 11 00:00:00 1970 PST","Sun Jan 11 00:00:00 1970",0,"0 ",foo)
(10 rows)
-- empty result
SELECT * FROM ft1 WHERE false;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+----+----+----+----+----+----
(0 rows)
-- with WHERE clause
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE t1.c1 = 101 AND t1.c6 = '1' AND t1.c7 >= '1';
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((c7 >= '1')) AND (("C 1" = 101)) AND ((c6 = '1'))
(3 rows)
SELECT * FROM ft1 t1 WHERE t1.c1 = 101 AND t1.c6 = '1' AND t1.c7 >= '1';
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
-----+----+-------+------------------------------+--------------------------+----+------------+-----
101 | 1 | 00101 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
-- with FOR UPDATE/SHARE
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = 101 FOR UPDATE;
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8, t1.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 101)) FOR UPDATE
(3 rows)
SELECT * FROM ft1 t1 WHERE c1 = 101 FOR UPDATE;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
-----+----+-------+------------------------------+--------------------------+----+------------+-----
101 | 1 | 00101 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = 102 FOR SHARE;
QUERY PLAN
---------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8, t1.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 102)) FOR SHARE
(3 rows)
SELECT * FROM ft1 t1 WHERE c1 = 102 FOR SHARE;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
-----+----+-------+------------------------------+--------------------------+----+------------+-----
102 | 2 | 00102 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2 | 2 | foo
(1 row)
-- aggregate
SELECT COUNT(*) FROM ft1 t1;
count
-------
1000
(1 row)
-- subquery
SELECT * FROM ft1 t1 WHERE t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 <= 10) ORDER BY c1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
2 | 2 | 00002 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2 | 2 | foo
3 | 3 | 00003 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3 | 3 | foo
4 | 4 | 00004 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4 | 4 | foo
5 | 5 | 00005 | Tue Jan 06 00:00:00 1970 PST | Tue Jan 06 00:00:00 1970 | 5 | 5 | foo
6 | 6 | 00006 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
7 | 7 | 00007 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
8 | 8 | 00008 | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
9 | 9 | 00009 | Sat Jan 10 00:00:00 1970 PST | Sat Jan 10 00:00:00 1970 | 9 | 9 | foo
10 | 0 | 00010 | Sun Jan 11 00:00:00 1970 PST | Sun Jan 11 00:00:00 1970 | 0 | 0 | foo
(10 rows)
-- subquery+MAX
SELECT * FROM ft1 t1 WHERE t1.c3 = (SELECT MAX(c3) FROM ft2 t2) ORDER BY c1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+----+-------+------------------------------+--------------------------+----+------------+-----
1000 | 0 | 01000 | Thu Jan 01 00:00:00 1970 PST | Thu Jan 01 00:00:00 1970 | 0 | 0 | foo
(1 row)
-- used in CTE
WITH t1 AS (SELECT * FROM ft1 WHERE c1 <= 10) SELECT t2.c1, t2.c2, t2.c3, t2.c4 FROM t1, ft2 t2 WHERE t1.c1 = t2.c1 ORDER BY t1.c1;
c1 | c2 | c3 | c4
----+----+-------+------------------------------
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST
2 | 2 | 00002 | Sat Jan 03 00:00:00 1970 PST
3 | 3 | 00003 | Sun Jan 04 00:00:00 1970 PST
4 | 4 | 00004 | Mon Jan 05 00:00:00 1970 PST
5 | 5 | 00005 | Tue Jan 06 00:00:00 1970 PST
6 | 6 | 00006 | Wed Jan 07 00:00:00 1970 PST
7 | 7 | 00007 | Thu Jan 08 00:00:00 1970 PST
8 | 8 | 00008 | Fri Jan 09 00:00:00 1970 PST
9 | 9 | 00009 | Sat Jan 10 00:00:00 1970 PST
10 | 0 | 00010 | Sun Jan 11 00:00:00 1970 PST
(10 rows)
-- fixed values
SELECT 'fixed', NULL FROM ft1 t1 WHERE c1 = 1;
?column? | ?column?
----------+----------
fixed |
(1 row)
-- Test forcing the remote server to produce sorted data for a merge join.
SET enable_hashjoin TO false;
SET enable_nestloop TO false;
-- inner join; expressions in the clauses appear in the equivalence class list
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------
Limit
Output: t1.c1, t2."C 1"
-> Merge Join
Output: t1.c1, t2."C 1"
Inner Unique: true
Merge Cond: (t1.c1 = t2."C 1")
-> Foreign Scan on public.ft2 t1
Output: t1.c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST
-> Index Only Scan using t1_pkey on "S 1"."T 1" t2
Output: t2."C 1"
(11 rows)
SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
c1 | C 1
-----+-----
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
-- outer join; expressions in the clauses do not appear in equivalence class
-- list but no output change as compared to the previous query
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------
Limit
Output: t1.c1, t2."C 1"
-> Merge Left Join
Output: t1.c1, t2."C 1"
Inner Unique: true
Merge Cond: (t1.c1 = t2."C 1")
-> Foreign Scan on public.ft2 t1
Output: t1.c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST
-> Index Only Scan using t1_pkey on "S 1"."T 1" t2
Output: t2."C 1"
(11 rows)
SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
c1 | C 1
-----+-----
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
-- A join between local table and foreign join. ORDER BY clause is added to the
-- foreign join so that the local table can be joined using merge join strategy.
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1."C 1" FROM "S 1"."T 1" t1 left join ft1 t2 join ft2 t3 on (t2.c1 = t3.c1) on (t3.c1 = t1."C 1") OFFSET 100 LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1."C 1"
-> Merge Right Join
Output: t1."C 1"
Inner Unique: true
Merge Cond: (t3.c1 = t1."C 1")
-> Foreign Scan
Output: t3.c1
Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r3."C 1" FROM ("S 1"."T 1" r2 INNER JOIN "S 1"."T 1" r3 ON (((r3."C 1" = r2."C 1")))) ORDER BY r2."C 1" ASC NULLS LAST
-> Index Only Scan using t1_pkey on "S 1"."T 1" t1
Output: t1."C 1"
(12 rows)
SELECT t1."C 1" FROM "S 1"."T 1" t1 left join ft1 t2 join ft2 t3 on (t2.c1 = t3.c1) on (t3.c1 = t1."C 1") OFFSET 100 LIMIT 10;
C 1
-----
101
102
103
104
105
106
107
108
109
110
(10 rows)
-- Test similar to above, except that the full join prevents any equivalence
-- classes from being merged. This produces single relation equivalence classes
-- included in join restrictions.
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1."C 1", t2.c1, t3.c1 FROM "S 1"."T 1" t1 left join ft1 t2 full join ft2 t3 on (t2.c1 = t3.c1) on (t3.c1 = t1."C 1") OFFSET 100 LIMIT 10;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1."C 1", t2.c1, t3.c1
-> Merge Right Join
Output: t1."C 1", t2.c1, t3.c1
Inner Unique: true
Merge Cond: (t3.c1 = t1."C 1")
-> Foreign Scan
Output: t3.c1, t2.c1
Relations: (public.ft2 t3) LEFT JOIN (public.ft1 t2)
Remote SQL: SELECT r3."C 1", r2."C 1" FROM ("S 1"."T 1" r3 LEFT JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r3."C 1")))) ORDER BY r3."C 1" ASC NULLS LAST
-> Index Only Scan using t1_pkey on "S 1"."T 1" t1
Output: t1."C 1"
(12 rows)
SELECT t1."C 1", t2.c1, t3.c1 FROM "S 1"."T 1" t1 left join ft1 t2 full join ft2 t3 on (t2.c1 = t3.c1) on (t3.c1 = t1."C 1") OFFSET 100 LIMIT 10;
C 1 | c1 | c1
-----+-----+-----
101 | 101 | 101
102 | 102 | 102
103 | 103 | 103
104 | 104 | 104
105 | 105 | 105
106 | 106 | 106
107 | 107 | 107
108 | 108 | 108
109 | 109 | 109
110 | 110 | 110
(10 rows)
-- Test similar to above with all full outer joins
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1."C 1", t2.c1, t3.c1 FROM "S 1"."T 1" t1 full join ft1 t2 full join ft2 t3 on (t2.c1 = t3.c1) on (t3.c1 = t1."C 1") OFFSET 100 LIMIT 10;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1."C 1", t2.c1, t3.c1
-> Merge Full Join
Output: t1."C 1", t2.c1, t3.c1
Inner Unique: true
Merge Cond: (t3.c1 = t1."C 1")
-> Foreign Scan
Output: t2.c1, t3.c1
Relations: (public.ft1 t2) FULL JOIN (public.ft2 t3)
Remote SQL: SELECT r2."C 1", r3."C 1" FROM ("S 1"."T 1" r2 FULL JOIN "S 1"."T 1" r3 ON (((r2."C 1" = r3."C 1")))) ORDER BY r3."C 1" ASC NULLS LAST
-> Index Only Scan using t1_pkey on "S 1"."T 1" t1
Output: t1."C 1"
(12 rows)
SELECT t1."C 1", t2.c1, t3.c1 FROM "S 1"."T 1" t1 full join ft1 t2 full join ft2 t3 on (t2.c1 = t3.c1) on (t3.c1 = t1."C 1") OFFSET 100 LIMIT 10;
C 1 | c1 | c1
-----+-----+-----
101 | 101 | 101
102 | 102 | 102
103 | 103 | 103
104 | 104 | 104
105 | 105 | 105
106 | 106 | 106
107 | 107 | 107
108 | 108 | 108
109 | 109 | 109
110 | 110 | 110
(10 rows)
RESET enable_hashjoin;
RESET enable_nestloop;
-- Test executing assertion in estimate_path_cost_size() that makes sure that
-- retrieved_rows for foreign rel re-used to cost pre-sorted foreign paths is
-- a sensible value even when the rel has tuples=0
CREATE TABLE loct_empty (c1 int NOT NULL, c2 text);
CREATE FOREIGN TABLE ft_empty (c1 int NOT NULL, c2 text)
SERVER loopback OPTIONS (table_name 'loct_empty');
INSERT INTO loct_empty
SELECT id, 'AAA' || to_char(id, 'FM000') FROM generate_series(1, 100) id;
DELETE FROM loct_empty;
ANALYZE ft_empty;
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft_empty ORDER BY c1;
QUERY PLAN
-------------------------------------------------------------------------------
Foreign Scan on public.ft_empty
Output: c1, c2
Remote SQL: SELECT c1, c2 FROM public.loct_empty ORDER BY c1 ASC NULLS LAST
(3 rows)
-- ===================================================================
-- WHERE with remotely-executable conditions
-- ===================================================================
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE t1.c1 = 1; -- Var, OpExpr(b), Const
QUERY PLAN
---------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 1))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE t1.c1 = 100 AND t1.c2 = 0; -- BoolExpr
QUERY PLAN
--------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 100)) AND ((c2 = 0))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 IS NULL; -- NullTest
QUERY PLAN
-------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" IS NULL))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 IS NOT NULL; -- NullTest
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" IS NOT NULL))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE round(abs(c1), 0) = 1; -- FuncExpr
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((round(abs("C 1"), 0) = 1::numeric))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = -c1; -- OpExpr(l)
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = (- "C 1")))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c1 IS NOT NULL) IS DISTINCT FROM (c1 IS NOT NULL); -- DistinctExpr
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((("C 1" IS NOT NULL) IS DISTINCT FROM ("C 1" IS NOT NULL)))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = ANY(ARRAY[c2, 1, c1 + 0]); -- ScalarArrayOpExpr
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = ANY (ARRAY[c2, 1, ("C 1" + 0)])))
(3 rows)
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = (ARRAY[c1,c2,3])[1]; -- SubscriptingRef
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = ((ARRAY["C 1", c2, 3])[1])))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c6 = E'foo''s\\bar'; -- check special chars
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
-------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((c6 = E'foo''s\\bar'))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c8 = 'foo'; -- can't be sent to remote
QUERY PLAN
-------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Filter: (t1.c8 = 'foo'::user_enum)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(4 rows)
-- parameterized remote path for foreign table
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM "S 1"."T 1" a, ft2 b WHERE a."C 1" = 47 AND b.c1 = a.c2;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Nested Loop
Output: a."C 1", a.c2, a.c3, a.c4, a.c5, a.c6, a.c7, a.c8, b.c1, b.c2, b.c3, b.c4, b.c5, b.c6, b.c7, b.c8
-> Index Scan using t1_pkey on "S 1"."T 1" a
Output: a."C 1", a.c2, a.c3, a.c4, a.c5, a.c6, a.c7, a.c8
Index Cond: (a."C 1" = 47)
-> Foreign Scan on public.ft2 b
Output: b.c1, b.c2, b.c3, b.c4, b.c5, b.c6, b.c7, b.c8
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = $1::integer))
(8 rows)
SELECT * FROM "S 1"."T 1" a, ft2 b WHERE a."C 1" = 47 AND b.c1 = a.c2;
C 1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
-----+----+-------+------------------------------+--------------------------+----+------------+-----+----+----+-------+------------------------------+--------------------------+----+------------+-----
47 | 7 | 00047 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo | 7 | 7 | 00007 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
(1 row)
-- check both safe and unsafe join conditions
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft2 a, ft2 b
WHERE a.c2 = 6 AND b.c1 = a.c1 AND a.c8 = 'foo' AND b.c7 = upper(a.c7);
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Nested Loop
Output: a.c1, a.c2, a.c3, a.c4, a.c5, a.c6, a.c7, a.c8, b.c1, b.c2, b.c3, b.c4, b.c5, b.c6, b.c7, b.c8
-> Foreign Scan on public.ft2 a
Output: a.c1, a.c2, a.c3, a.c4, a.c5, a.c6, a.c7, a.c8
Filter: (a.c8 = 'foo'::user_enum)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((c2 = 6))
-> Foreign Scan on public.ft2 b
Output: b.c1, b.c2, b.c3, b.c4, b.c5, b.c6, b.c7, b.c8
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Filter: ((b.c7)::text = upper((a.c7)::text))
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (($1::integer = "C 1"))
(10 rows)
SELECT * FROM ft2 a, ft2 b
WHERE a.c2 = 6 AND b.c1 = a.c1 AND a.c8 = 'foo' AND b.c7 = upper(a.c7);
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
-----+----+-------+------------------------------+--------------------------+----+------------+-----+-----+----+-------+------------------------------+--------------------------+----+------------+-----
6 | 6 | 00006 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 6 | 6 | 00006 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
16 | 6 | 00016 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 16 | 6 | 00016 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
26 | 6 | 00026 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 26 | 6 | 00026 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo
36 | 6 | 00036 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 36 | 6 | 00036 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo
46 | 6 | 00046 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 46 | 6 | 00046 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo
56 | 6 | 00056 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 56 | 6 | 00056 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo
66 | 6 | 00066 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 66 | 6 | 00066 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo
76 | 6 | 00076 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 76 | 6 | 00076 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo
86 | 6 | 00086 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 86 | 6 | 00086 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo
96 | 6 | 00096 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 96 | 6 | 00096 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo
106 | 6 | 00106 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 106 | 6 | 00106 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
116 | 6 | 00116 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 116 | 6 | 00116 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
126 | 6 | 00126 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 126 | 6 | 00126 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo
136 | 6 | 00136 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 136 | 6 | 00136 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo
146 | 6 | 00146 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 146 | 6 | 00146 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo
156 | 6 | 00156 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 156 | 6 | 00156 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo
166 | 6 | 00166 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 166 | 6 | 00166 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo
176 | 6 | 00176 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 176 | 6 | 00176 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo
186 | 6 | 00186 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 186 | 6 | 00186 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo
196 | 6 | 00196 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 196 | 6 | 00196 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo
206 | 6 | 00206 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 206 | 6 | 00206 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
216 | 6 | 00216 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 216 | 6 | 00216 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
226 | 6 | 00226 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 226 | 6 | 00226 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo
236 | 6 | 00236 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 236 | 6 | 00236 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo
246 | 6 | 00246 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 246 | 6 | 00246 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo
256 | 6 | 00256 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 256 | 6 | 00256 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo
266 | 6 | 00266 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 266 | 6 | 00266 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo
276 | 6 | 00276 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 276 | 6 | 00276 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo
286 | 6 | 00286 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 286 | 6 | 00286 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo
296 | 6 | 00296 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 296 | 6 | 00296 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo
306 | 6 | 00306 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 306 | 6 | 00306 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
316 | 6 | 00316 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 316 | 6 | 00316 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
326 | 6 | 00326 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 326 | 6 | 00326 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo
336 | 6 | 00336 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 336 | 6 | 00336 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo
346 | 6 | 00346 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 346 | 6 | 00346 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo
356 | 6 | 00356 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 356 | 6 | 00356 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo
366 | 6 | 00366 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 366 | 6 | 00366 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo
376 | 6 | 00376 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 376 | 6 | 00376 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo
386 | 6 | 00386 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 386 | 6 | 00386 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo
396 | 6 | 00396 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 396 | 6 | 00396 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo
406 | 6 | 00406 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 406 | 6 | 00406 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
416 | 6 | 00416 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 416 | 6 | 00416 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
426 | 6 | 00426 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 426 | 6 | 00426 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo
436 | 6 | 00436 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 436 | 6 | 00436 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo
446 | 6 | 00446 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 446 | 6 | 00446 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo
456 | 6 | 00456 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 456 | 6 | 00456 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo
466 | 6 | 00466 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 466 | 6 | 00466 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo
476 | 6 | 00476 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 476 | 6 | 00476 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo
486 | 6 | 00486 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 486 | 6 | 00486 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo
496 | 6 | 00496 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 496 | 6 | 00496 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo
506 | 6 | 00506 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 506 | 6 | 00506 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
516 | 6 | 00516 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 516 | 6 | 00516 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
526 | 6 | 00526 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 526 | 6 | 00526 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo
536 | 6 | 00536 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 536 | 6 | 00536 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo
546 | 6 | 00546 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 546 | 6 | 00546 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo
556 | 6 | 00556 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 556 | 6 | 00556 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo
566 | 6 | 00566 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 566 | 6 | 00566 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo
576 | 6 | 00576 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 576 | 6 | 00576 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo
586 | 6 | 00586 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 586 | 6 | 00586 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo
596 | 6 | 00596 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 596 | 6 | 00596 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo
606 | 6 | 00606 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 606 | 6 | 00606 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
616 | 6 | 00616 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 616 | 6 | 00616 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
626 | 6 | 00626 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 626 | 6 | 00626 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo
636 | 6 | 00636 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 636 | 6 | 00636 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo
646 | 6 | 00646 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 646 | 6 | 00646 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo
656 | 6 | 00656 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 656 | 6 | 00656 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo
666 | 6 | 00666 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 666 | 6 | 00666 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo
676 | 6 | 00676 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 676 | 6 | 00676 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo
686 | 6 | 00686 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 686 | 6 | 00686 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo
696 | 6 | 00696 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 696 | 6 | 00696 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo
706 | 6 | 00706 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 706 | 6 | 00706 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
716 | 6 | 00716 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 716 | 6 | 00716 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
726 | 6 | 00726 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 726 | 6 | 00726 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo
736 | 6 | 00736 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 736 | 6 | 00736 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo
746 | 6 | 00746 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 746 | 6 | 00746 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo
756 | 6 | 00756 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 756 | 6 | 00756 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo
766 | 6 | 00766 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 766 | 6 | 00766 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo
776 | 6 | 00776 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 776 | 6 | 00776 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo
786 | 6 | 00786 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 786 | 6 | 00786 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo
796 | 6 | 00796 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 796 | 6 | 00796 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo
806 | 6 | 00806 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 806 | 6 | 00806 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
816 | 6 | 00816 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 816 | 6 | 00816 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
826 | 6 | 00826 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 826 | 6 | 00826 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo
836 | 6 | 00836 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 836 | 6 | 00836 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo
846 | 6 | 00846 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 846 | 6 | 00846 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo
856 | 6 | 00856 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 856 | 6 | 00856 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo
866 | 6 | 00866 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 866 | 6 | 00866 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo
876 | 6 | 00876 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 876 | 6 | 00876 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo
886 | 6 | 00886 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 886 | 6 | 00886 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo
896 | 6 | 00896 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 896 | 6 | 00896 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo
906 | 6 | 00906 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 906 | 6 | 00906 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
916 | 6 | 00916 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 916 | 6 | 00916 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
926 | 6 | 00926 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 926 | 6 | 00926 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo
936 | 6 | 00936 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 936 | 6 | 00936 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo
946 | 6 | 00946 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 946 | 6 | 00946 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo
956 | 6 | 00956 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 956 | 6 | 00956 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo
966 | 6 | 00966 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 966 | 6 | 00966 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo
976 | 6 | 00976 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 976 | 6 | 00976 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo
986 | 6 | 00986 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 986 | 6 | 00986 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo
996 | 6 | 00996 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 996 | 6 | 00996 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo
(100 rows)
-- bug before 9.3.5 due to sloppy handling of remote-estimate parameters
SELECT * FROM ft1 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft2 WHERE c1 < 5));
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
2 | 2 | 00002 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2 | 2 | foo
3 | 3 | 00003 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3 | 3 | foo
4 | 4 | 00004 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4 | 4 | foo
(4 rows)
SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5));
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
2 | 2 | 00002 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2 | 2 | foo
3 | 3 | 00003 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3 | 3 | foo
4 | 4 | 00004 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4 | 4 | foo
(4 rows)
-- we should not push order by clause with volatile expressions or unsafe
-- collations
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft2 ORDER BY ft2.c1, random();
QUERY PLAN
-------------------------------------------------------------------------------
Sort
Output: c1, c2, c3, c4, c5, c6, c7, c8, (random())
Sort Key: ft2.c1, (random())
-> Foreign Scan on public.ft2
Output: c1, c2, c3, c4, c5, c6, c7, c8, random()
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(6 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft2 ORDER BY ft2.c1, ft2.c3 collate "C";
QUERY PLAN
-------------------------------------------------------------------------------
Sort
Output: c1, c2, c3, c4, c5, c6, c7, c8, ((c3)::text)
Sort Key: ft2.c1, ft2.c3 COLLATE "C"
-> Foreign Scan on public.ft2
Output: c1, c2, c3, c4, c5, c6, c7, c8, c3
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(6 rows)
-- user-defined operator/function
CREATE FUNCTION postgres_fdw_abs(int) RETURNS int AS $$
BEGIN
RETURN abs($1);
END
$$ LANGUAGE plpgsql IMMUTABLE;
CREATE OPERATOR === (
LEFTARG = int,
RIGHTARG = int,
PROCEDURE = int4eq,
COMMUTATOR = ===
);
-- built-in operators and functions can be shipped for remote execution
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 = abs(t1.c2);
QUERY PLAN
---------------------------------------------------------------------------
Foreign Scan
Output: (count(c3))
Relations: Aggregate on (public.ft1 t1)
Remote SQL: SELECT count(c3) FROM "S 1"."T 1" WHERE (("C 1" = abs(c2)))
(4 rows)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 = abs(t1.c2);
count
-------
9
(1 row)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 = t1.c2;
QUERY PLAN
----------------------------------------------------------------------
Foreign Scan
Output: (count(c3))
Relations: Aggregate on (public.ft1 t1)
Remote SQL: SELECT count(c3) FROM "S 1"."T 1" WHERE (("C 1" = c2))
(4 rows)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 = t1.c2;
count
-------
9
(1 row)
-- by default, user-defined ones cannot
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 = postgres_fdw_abs(t1.c2);
QUERY PLAN
-----------------------------------------------------------
Aggregate
Output: count(c3)
-> Foreign Scan on public.ft1 t1
Output: c3
Filter: (t1.c1 = postgres_fdw_abs(t1.c2))
Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1"
(6 rows)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 = postgres_fdw_abs(t1.c2);
count
-------
9
(1 row)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 === t1.c2;
QUERY PLAN
-----------------------------------------------------------
Aggregate
Output: count(c3)
-> Foreign Scan on public.ft1 t1
Output: c3
Filter: (t1.c1 === t1.c2)
Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1"
(6 rows)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 === t1.c2;
count
-------
9
(1 row)
-- ORDER BY can be shipped, though
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1 t1 WHERE t1.c1 === t1.c2 order by t1.c2 limit 1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Limit
Output: c1, c2, c3, c4, c5, c6, c7, c8
-> Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Filter: (t1.c1 === t1.c2)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY c2 ASC NULLS LAST
(6 rows)
SELECT * FROM ft1 t1 WHERE t1.c1 === t1.c2 order by t1.c2 limit 1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
-- but let's put them in an extension ...
ALTER EXTENSION postgres_fdw ADD FUNCTION postgres_fdw_abs(int);
ALTER EXTENSION postgres_fdw ADD OPERATOR === (int, int);
ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw');
-- ... now they can be shipped
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 = postgres_fdw_abs(t1.c2);
QUERY PLAN
-----------------------------------------------------------------------------------------------
Foreign Scan
Output: (count(c3))
Relations: Aggregate on (public.ft1 t1)
Remote SQL: SELECT count(c3) FROM "S 1"."T 1" WHERE (("C 1" = public.postgres_fdw_abs(c2)))
(4 rows)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 = postgres_fdw_abs(t1.c2);
count
-------
9
(1 row)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 === t1.c2;
QUERY PLAN
-----------------------------------------------------------------------------------------
Foreign Scan
Output: (count(c3))
Relations: Aggregate on (public.ft1 t1)
Remote SQL: SELECT count(c3) FROM "S 1"."T 1" WHERE (("C 1" OPERATOR(public.===) c2))
(4 rows)
SELECT count(c3) FROM ft1 t1 WHERE t1.c1 === t1.c2;
count
-------
9
(1 row)
-- and both ORDER BY and LIMIT can be shipped
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1 t1 WHERE t1.c1 === t1.c2 order by t1.c2 limit 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" OPERATOR(public.===) c2)) ORDER BY c2 ASC NULLS LAST LIMIT 1::bigint
(3 rows)
SELECT * FROM ft1 t1 WHERE t1.c1 === t1.c2 order by t1.c2 limit 1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
-- Test CASE pushdown
EXPLAIN (VERBOSE, COSTS OFF)
SELECT c1,c2,c3 FROM ft2 WHERE CASE WHEN c1 > 990 THEN c1 END < 1000 ORDER BY c1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft2
Output: c1, c2, c3
Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1" WHERE (((CASE WHEN ("C 1" > 990) THEN "C 1" ELSE NULL::integer END) < 1000)) ORDER BY "C 1" ASC NULLS LAST
(3 rows)
SELECT c1,c2,c3 FROM ft2 WHERE CASE WHEN c1 > 990 THEN c1 END < 1000 ORDER BY c1;
c1 | c2 | c3
-----+----+-------
991 | 1 | 00991
992 | 2 | 00992
993 | 3 | 00993
994 | 4 | 00994
995 | 5 | 00995
996 | 6 | 00996
997 | 7 | 00997
998 | 8 | 00998
999 | 9 | 00999
(9 rows)
-- Nested CASE
EXPLAIN (VERBOSE, COSTS OFF)
SELECT c1,c2,c3 FROM ft2 WHERE CASE CASE WHEN c2 > 0 THEN c2 END WHEN 100 THEN 601 WHEN c2 THEN c2 ELSE 0 END > 600 ORDER BY c1;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft2
Output: c1, c2, c3
Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1" WHERE (((CASE (CASE WHEN (c2 > 0) THEN c2 ELSE NULL::integer END) WHEN 100 THEN 601 WHEN c2 THEN c2 ELSE 0 END) > 600)) ORDER BY "C 1" ASC NULLS LAST
(3 rows)
SELECT c1,c2,c3 FROM ft2 WHERE CASE CASE WHEN c2 > 0 THEN c2 END WHEN 100 THEN 601 WHEN c2 THEN c2 ELSE 0 END > 600 ORDER BY c1;
c1 | c2 | c3
----+----+----
(0 rows)
-- CASE arg WHEN
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1 WHERE c1 > (CASE mod(c1, 4) WHEN 0 THEN 1 WHEN 2 THEN 50 ELSE 100 END);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" > (CASE mod("C 1", 4) WHEN 0 THEN 1 WHEN 2 THEN 50 ELSE 100 END)))
(3 rows)
-- CASE cannot be pushed down because of unshippable arg clause
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1 WHERE c1 > (CASE random()::integer WHEN 0 THEN 1 WHEN 2 THEN 50 ELSE 100 END);
QUERY PLAN
-----------------------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Filter: (ft1.c1 > CASE (random())::integer WHEN 0 THEN 1 WHEN 2 THEN 50 ELSE 100 END)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(4 rows)
-- these are shippable
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1 WHERE CASE c6 WHEN 'foo' THEN true ELSE c3 < 'bar' END;
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, c2, c3, c4, c5, c6, c7, c8
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((CASE c6 WHEN 'foo'::text THEN true ELSE (c3 < 'bar') END))
(3 rows)
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1 WHERE CASE c3 WHEN c6 THEN true ELSE c3 < 'bar' END;
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, c2, c3, c4, c5, c6, c7, c8
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((CASE c3 WHEN c6 THEN true ELSE (c3 < 'bar') END))
(3 rows)
-- but this is not because of collation
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1 WHERE CASE c3 COLLATE "C" WHEN c6 THEN true ELSE c3 < 'bar' END;
QUERY PLAN
-------------------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Filter: CASE (ft1.c3)::text WHEN ft1.c6 THEN true ELSE (ft1.c3 < 'bar'::text) END
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(4 rows)
-- a regconfig constant referring to this text search configuration
-- is initially unshippable
CREATE TEXT SEARCH CONFIGURATION public.custom_search
(COPY = pg_catalog.english);
EXPLAIN (VERBOSE, COSTS OFF)
SELECT c1, to_tsvector('custom_search'::regconfig, c3) FROM ft1
WHERE c1 = 642 AND length(to_tsvector('custom_search'::regconfig, c3)) > 0;
QUERY PLAN
-------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, to_tsvector('custom_search'::regconfig, c3)
Filter: (length(to_tsvector('custom_search'::regconfig, ft1.c3)) > 0)
Remote SQL: SELECT "C 1", c3 FROM "S 1"."T 1" WHERE (("C 1" = 642))
(4 rows)
SELECT c1, to_tsvector('custom_search'::regconfig, c3) FROM ft1
WHERE c1 = 642 AND length(to_tsvector('custom_search'::regconfig, c3)) > 0;
c1 | to_tsvector
-----+-------------
642 | '00642':1
(1 row)
-- but if it's in a shippable extension, it can be shipped
ALTER EXTENSION postgres_fdw ADD TEXT SEARCH CONFIGURATION public.custom_search;
-- however, that doesn't flush the shippability cache, so do a quick reconnect
\c -
EXPLAIN (VERBOSE, COSTS OFF)
SELECT c1, to_tsvector('custom_search'::regconfig, c3) FROM ft1
WHERE c1 = 642 AND length(to_tsvector('custom_search'::regconfig, c3)) > 0;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, to_tsvector('custom_search'::regconfig, c3)
Remote SQL: SELECT "C 1", c3 FROM "S 1"."T 1" WHERE (("C 1" = 642)) AND ((length(to_tsvector('public.custom_search'::regconfig, c3)) > 0))
(3 rows)
SELECT c1, to_tsvector('custom_search'::regconfig, c3) FROM ft1
WHERE c1 = 642 AND length(to_tsvector('custom_search'::regconfig, c3)) > 0;
c1 | to_tsvector
-----+-------------
642 | '00642':1
(1 row)
-- ===================================================================
-- JOIN queries
-- ===================================================================
-- Analyze ft4 and ft5 so that we have better statistics. These tables do not
-- have use_remote_estimate set.
ANALYZE ft4;
ANALYZE ft5;
-- join two tables
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c1, t1.c3
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
(4 rows)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
c1 | c1
-----+-----
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
-- join three tables
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c2, t3.c3, t1.c3
Relations: ((public.ft1 t1) INNER JOIN (public.ft2 t2)) INNER JOIN (public.ft4 t3)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r2.c2, r4.c3, r1.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) INNER JOIN "S 1"."T 3" r4 ON (((r1."C 1" = r4.c1)))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10;
c1 | c2 | c3
----+----+--------
22 | 2 | AAA022
24 | 4 | AAA024
26 | 6 | AAA026
28 | 8 | AAA028
30 | 0 | AAA030
32 | 2 | AAA032
34 | 4 | AAA034
36 | 6 | AAA036
38 | 8 | AAA038
40 | 0 | AAA040
(10 rows)
-- left outer join
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c1
Relations: (public.ft4 t1) LEFT JOIN (public.ft5 t2)
Remote SQL: SELECT r1.c1, r2.c1 FROM ("S 1"."T 3" r1 LEFT JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
c1 | c1
----+----
22 |
24 | 24
26 |
28 |
30 | 30
32 |
34 |
36 | 36
38 |
40 |
(10 rows)
-- left outer join three tables
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c2, t3.c3
Relations: ((public.ft2 t1) LEFT JOIN (public.ft2 t2)) LEFT JOIN (public.ft4 t3)
Remote SQL: SELECT r1."C 1", r2.c2, r4.c3 FROM (("S 1"."T 1" r1 LEFT JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) LEFT JOIN "S 1"."T 3" r4 ON (((r2."C 1" = r4.c1)))) LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
c1 | c2 | c3
----+----+--------
11 | 1 |
12 | 2 | AAA012
13 | 3 |
14 | 4 | AAA014
15 | 5 |
16 | 6 | AAA016
17 | 7 |
18 | 8 | AAA018
19 | 9 |
20 | 0 | AAA020
(10 rows)
-- left outer join + placement of clauses.
-- clauses within the nullable side are not pulled up, but top level clause on
-- non-nullable side is pushed into non-nullable side
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1) WHERE t1.c1 < 10;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t1.c2, ft5.c1, ft5.c2
Relations: (public.ft4 t1) LEFT JOIN (public.ft5)
Remote SQL: SELECT r1.c1, r1.c2, r4.c1, r4.c2 FROM ("S 1"."T 3" r1 LEFT JOIN "S 1"."T 4" r4 ON (((r1.c1 = r4.c1)) AND ((r4.c1 < 10)))) WHERE ((r1.c1 < 10))
(4 rows)
SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1) WHERE t1.c1 < 10;
c1 | c2 | c1 | c2
----+----+----+----
2 | 3 | |
4 | 5 | |
6 | 7 | 6 | 7
8 | 9 | |
(4 rows)
-- clauses within the nullable side are not pulled up, but the top level clause
-- on nullable side is not pushed down into nullable side
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1)
WHERE (t2.c1 < 10 OR t2.c1 IS NULL) AND t1.c1 < 10;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t1.c2, ft5.c1, ft5.c2
Relations: (public.ft4 t1) LEFT JOIN (public.ft5)
Remote SQL: SELECT r1.c1, r1.c2, r4.c1, r4.c2 FROM ("S 1"."T 3" r1 LEFT JOIN "S 1"."T 4" r4 ON (((r1.c1 = r4.c1)) AND ((r4.c1 < 10)))) WHERE (((r4.c1 < 10) OR (r4.c1 IS NULL))) AND ((r1.c1 < 10))
(4 rows)
SELECT t1.c1, t1.c2, t2.c1, t2.c2 FROM ft4 t1 LEFT JOIN (SELECT * FROM ft5 WHERE c1 < 10) t2 ON (t1.c1 = t2.c1)
WHERE (t2.c1 < 10 OR t2.c1 IS NULL) AND t1.c1 < 10;
c1 | c2 | c1 | c2
----+----+----+----
2 | 3 | |
4 | 5 | |
6 | 7 | 6 | 7
8 | 9 | |
(4 rows)
-- right outer join
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft5 t1 RIGHT JOIN ft4 t2 ON (t1.c1 = t2.c1) ORDER BY t2.c1, t1.c1 OFFSET 10 LIMIT 10;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c1
Relations: (public.ft4 t2) LEFT JOIN (public.ft5 t1)
Remote SQL: SELECT r1.c1, r2.c1 FROM ("S 1"."T 3" r2 LEFT JOIN "S 1"."T 4" r1 ON (((r1.c1 = r2.c1)))) ORDER BY r2.c1 ASC NULLS LAST, r1.c1 ASC NULLS LAST LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c1 FROM ft5 t1 RIGHT JOIN ft4 t2 ON (t1.c1 = t2.c1) ORDER BY t2.c1, t1.c1 OFFSET 10 LIMIT 10;
c1 | c1
----+----
| 22
24 | 24
| 26
| 28
30 | 30
| 32
| 34
36 | 36
| 38
| 40
(10 rows)
-- right outer join three tables
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 RIGHT JOIN ft2 t2 ON (t1.c1 = t2.c1) RIGHT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c2, t3.c3
Relations: ((public.ft4 t3) LEFT JOIN (public.ft2 t2)) LEFT JOIN (public.ft2 t1)
Remote SQL: SELECT r1."C 1", r2.c2, r4.c3 FROM (("S 1"."T 3" r4 LEFT JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r4.c1)))) LEFT JOIN "S 1"."T 1" r1 ON (((r1."C 1" = r2."C 1")))) LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 RIGHT JOIN ft2 t2 ON (t1.c1 = t2.c1) RIGHT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
c1 | c2 | c3
----+----+--------
22 | 2 | AAA022
24 | 4 | AAA024
26 | 6 | AAA026
28 | 8 | AAA028
30 | 0 | AAA030
32 | 2 | AAA032
34 | 4 | AAA034
36 | 6 | AAA036
38 | 8 | AAA038
40 | 0 | AAA040
(10 rows)
-- full outer join
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c1
Relations: (public.ft4 t1) FULL JOIN (public.ft5 t2)
Remote SQL: SELECT r1.c1, r2.c1 FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST LIMIT 10::bigint OFFSET 45::bigint
(4 rows)
SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
c1 | c1
-----+----
92 |
94 |
96 | 96
98 |
100 |
| 3
| 9
| 15
| 21
| 27
(10 rows)
-- full outer join with restrictions on the joining relations
-- a. the joining relations are both base relations
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: ft4.c1, ft5.c1
Relations: (public.ft4) FULL JOIN (public.ft5)
Remote SQL: SELECT s4.c1, s5.c1 FROM ((SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s4(c1) FULL JOIN (SELECT c1 FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s5(c1) ON (((s4.c1 = s5.c1)))) ORDER BY s4.c1 ASC NULLS LAST, s5.c1 ASC NULLS LAST
(4 rows)
SELECT t1.c1, t2.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1;
c1 | c1
----+----
50 |
52 |
54 | 54
56 |
58 |
60 | 60
| 51
| 57
(8 rows)
EXPLAIN (VERBOSE, COSTS OFF)
SELECT 1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (TRUE) OFFSET 10 LIMIT 10;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: 1
Relations: (public.ft4) FULL JOIN (public.ft5)
Remote SQL: SELECT NULL FROM ((SELECT NULL FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s4 FULL JOIN (SELECT NULL FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s5 ON (TRUE)) LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT 1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (TRUE) OFFSET 10 LIMIT 10;
?column?
----------
1
1
1
1
1
1
1
1
1
1
(10 rows)
-- b. one of the joining relations is a base relation and the other is a join
-- relation
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM ft4 t2 LEFT JOIN ft5 t3 ON (t2.c1 = t3.c1) WHERE (t2.c1 between 50 and 60)) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: ft4.c1, t2.c1, t3.c1
Relations: (public.ft4) FULL JOIN ((public.ft4 t2) LEFT JOIN (public.ft5 t3))
Remote SQL: SELECT s4.c1, s8.c1, s8.c2 FROM ((SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s4(c1) FULL JOIN (SELECT r5.c1, r6.c1 FROM ("S 1"."T 3" r5 LEFT JOIN "S 1"."T 4" r6 ON (((r5.c1 = r6.c1)))) WHERE ((r5.c1 >= 50)) AND ((r5.c1 <= 60))) s8(c1, c2) ON (((s4.c1 = s8.c1)))) ORDER BY s4.c1 ASC NULLS LAST, s8.c1 ASC NULLS LAST, s8.c2 ASC NULLS LAST
(4 rows)
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM ft4 t2 LEFT JOIN ft5 t3 ON (t2.c1 = t3.c1) WHERE (t2.c1 between 50 and 60)) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b;
c1 | a | b
----+----+----
50 | 50 |
52 | 52 |
54 | 54 | 54
56 | 56 |
58 | 58 |
60 | 60 | 60
(6 rows)
-- c. test deparsing the remote query as nested subqueries
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: ft4.c1, ft4_1.c1, ft5.c1
Make postgres_fdw's "Relations" output agree with the rest of EXPLAIN. The relation aliases shown in the "Relations" line for a foreign scan didn't always agree with those used in the rest of EXPLAIN's output. The regression test result changes appearing here provide examples. It's really impossible for postgres_fdw to duplicate EXPLAIN's alias assignment logic during postgresGetForeignRelSize(), because of the de-duplication that EXPLAIN does on a global basis --- and anyway, trying to duplicate that would be unmaintainable. Instead, just put numeric rangetable indexes into the string, and convert those to table names/aliases in postgresExplainForeignScan, which does have access to the results of ruleutils.c's alias assignment logic. Aside from being more reliable, this shifts some work from planning to EXPLAIN, which is a good tradeoff for performance. (I also changed from using StringInfo to using psprintf, which makes the code slightly simpler and reduces its memory consumption.) A kluge required by this solution is that we have to reverse-engineer the rtoffset applied by setrefs.c. If that logic ever fails (presumably because the member tables of a join got offset by different amounts), we'll need some more cooperation with setrefs.c to keep things straight. But for now, there's no need for that. Arguably this is a back-patchable bug fix, but since this is a mostly cosmetic issue and there have been no field complaints, I'll refrain for now. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-02 22:31:03 +01:00
Relations: (public.ft4) FULL JOIN ((public.ft4 ft4_1) FULL JOIN (public.ft5))
Remote SQL: SELECT s4.c1, s10.c1, s10.c2 FROM ((SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s4(c1) FULL JOIN (SELECT s8.c1, s9.c1 FROM ((SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s8(c1) FULL JOIN (SELECT c1 FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s9(c1) ON (((s8.c1 = s9.c1)))) WHERE (((s8.c1 IS NULL) OR (s8.c1 IS NOT NULL)))) s10(c1, c2) ON (((s4.c1 = s10.c1)))) ORDER BY s4.c1 ASC NULLS LAST, s10.c1 ASC NULLS LAST, s10.c2 ASC NULLS LAST
(4 rows)
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b;
c1 | a | b
----+----+----
50 | 50 |
52 | 52 |
54 | 54 | 54
56 | 56 |
58 | 58 |
60 | 60 | 60
| | 51
| | 57
(8 rows)
-- d. test deparsing rowmarked relations as subqueries
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
LockRows
Output: "T 3".c1, ft4.c1, ft5.c1, "T 3".ctid, ft4.*, ft5.*
-> Nested Loop
Output: "T 3".c1, ft4.c1, ft5.c1, "T 3".ctid, ft4.*, ft5.*
-> Foreign Scan
Output: ft4.c1, ft4.*, ft5.c1, ft5.*
Relations: (public.ft4) FULL JOIN (public.ft5)
Remote SQL: SELECT s8.c1, s8.c2, s9.c1, s9.c2 FROM ((SELECT c1, ROW(c1, c2, c3) FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s8(c1, c2) FULL JOIN (SELECT c1, ROW(c1, c2, c3) FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s9(c1, c2) ON (((s8.c1 = s9.c1)))) WHERE (((s8.c1 IS NULL) OR (s8.c1 IS NOT NULL))) ORDER BY s8.c1 ASC NULLS LAST, s9.c1 ASC NULLS LAST
-> Sort
Output: ft4.c1, ft4.*, ft5.c1, ft5.*
Sort Key: ft4.c1, ft5.c1
-> Hash Full Join
Output: ft4.c1, ft4.*, ft5.c1, ft5.*
Hash Cond: (ft4.c1 = ft5.c1)
Filter: ((ft4.c1 IS NULL) OR (ft4.c1 IS NOT NULL))
-> Foreign Scan on public.ft4
Output: ft4.c1, ft4.*
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))
-> Hash
Output: ft5.c1, ft5.*
-> Foreign Scan on public.ft5
Output: ft5.c1, ft5.*
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60))
-> Materialize
Output: "T 3".c1, "T 3".ctid
-> Seq Scan on "S 1"."T 3"
Output: "T 3".c1, "T 3".ctid
Filter: ("T 3".c1 = 50)
(28 rows)
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1;
c1 | a | b
----+----+----
50 | 50 |
50 | 52 |
50 | 54 | 54
50 | 56 |
50 | 58 |
50 | 60 | 60
50 | | 51
50 | | 57
(8 rows)
-- full outer join + inner join
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1, t3.c1 FROM ft4 t1 INNER JOIN ft5 t2 ON (t1.c1 = t2.c1 + 1 and t1.c1 between 50 and 60) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) ORDER BY t1.c1, t2.c1, t3.c1 LIMIT 10;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c1, t3.c1
Relations: ((public.ft4 t1) INNER JOIN (public.ft5 t2)) FULL JOIN (public.ft4 t3)
Remote SQL: SELECT r1.c1, r2.c1, r4.c1 FROM (("S 1"."T 3" r1 INNER JOIN "S 1"."T 4" r2 ON (((r1.c1 = (r2.c1 + 1))) AND ((r1.c1 >= 50)) AND ((r1.c1 <= 60)))) FULL JOIN "S 1"."T 3" r4 ON (((r2.c1 = r4.c1)))) ORDER BY r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST, r4.c1 ASC NULLS LAST LIMIT 10::bigint
(4 rows)
SELECT t1.c1, t2.c1, t3.c1 FROM ft4 t1 INNER JOIN ft5 t2 ON (t1.c1 = t2.c1 + 1 and t1.c1 between 50 and 60) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) ORDER BY t1.c1, t2.c1, t3.c1 LIMIT 10;
c1 | c1 | c1
----+----+----
52 | 51 |
58 | 57 |
| | 2
| | 4
| | 6
| | 8
| | 10
| | 12
| | 14
| | 16
(10 rows)
-- full outer join three tables
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c2, t3.c3
Relations: ((public.ft2 t1) FULL JOIN (public.ft2 t2)) FULL JOIN (public.ft4 t3)
Remote SQL: SELECT r1."C 1", r2.c2, r4.c3 FROM (("S 1"."T 1" r1 FULL JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) FULL JOIN "S 1"."T 3" r4 ON (((r2."C 1" = r4.c1)))) LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
c1 | c2 | c3
----+----+--------
11 | 1 |
12 | 2 | AAA012
13 | 3 |
14 | 4 | AAA014
15 | 5 |
16 | 6 | AAA016
17 | 7 |
18 | 8 | AAA018
19 | 9 |
20 | 0 | AAA020
(10 rows)
-- full outer join + right outer join
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) RIGHT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c2, t3.c3
Relations: ((public.ft4 t3) LEFT JOIN (public.ft2 t2)) LEFT JOIN (public.ft2 t1)
Remote SQL: SELECT r1."C 1", r2.c2, r4.c3 FROM (("S 1"."T 3" r4 LEFT JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r4.c1)))) LEFT JOIN "S 1"."T 1" r1 ON (((r1."C 1" = r2."C 1")))) LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) RIGHT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
c1 | c2 | c3
----+----+--------
22 | 2 | AAA022
24 | 4 | AAA024
26 | 6 | AAA026
28 | 8 | AAA028
30 | 0 | AAA030
32 | 2 | AAA032
34 | 4 | AAA034
36 | 6 | AAA036
38 | 8 | AAA038
40 | 0 | AAA040
(10 rows)
-- right outer join + full outer join
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 RIGHT JOIN ft2 t2 ON (t1.c1 = t2.c1) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c2, t3.c3
Relations: ((public.ft2 t2) LEFT JOIN (public.ft2 t1)) FULL JOIN (public.ft4 t3)
Remote SQL: SELECT r1."C 1", r2.c2, r4.c3 FROM (("S 1"."T 1" r2 LEFT JOIN "S 1"."T 1" r1 ON (((r1."C 1" = r2."C 1")))) FULL JOIN "S 1"."T 3" r4 ON (((r2."C 1" = r4.c1)))) LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 RIGHT JOIN ft2 t2 ON (t1.c1 = t2.c1) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
c1 | c2 | c3
----+----+--------
11 | 1 |
12 | 2 | AAA012
13 | 3 |
14 | 4 | AAA014
15 | 5 |
16 | 6 | AAA016
17 | 7 |
18 | 8 | AAA018
19 | 9 |
20 | 0 | AAA020
(10 rows)
-- full outer join + left outer join
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c2, t3.c3
Relations: ((public.ft2 t1) FULL JOIN (public.ft2 t2)) LEFT JOIN (public.ft4 t3)
Remote SQL: SELECT r1."C 1", r2.c2, r4.c3 FROM (("S 1"."T 1" r1 FULL JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) LEFT JOIN "S 1"."T 3" r4 ON (((r2."C 1" = r4.c1)))) LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
c1 | c2 | c3
----+----+--------
11 | 1 |
12 | 2 | AAA012
13 | 3 |
14 | 4 | AAA014
15 | 5 |
16 | 6 | AAA016
17 | 7 |
18 | 8 | AAA018
19 | 9 |
20 | 0 | AAA020
(10 rows)
-- left outer join + full outer join
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c2, t3.c3
Relations: ((public.ft2 t1) LEFT JOIN (public.ft2 t2)) FULL JOIN (public.ft4 t3)
Remote SQL: SELECT r1."C 1", r2.c2, r4.c3 FROM (("S 1"."T 1" r1 LEFT JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) FULL JOIN "S 1"."T 3" r4 ON (((r2."C 1" = r4.c1)))) LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
c1 | c2 | c3
----+----+--------
11 | 1 |
12 | 2 | AAA012
13 | 3 |
14 | 4 | AAA014
15 | 5 |
16 | 6 | AAA016
17 | 7 |
18 | 8 | AAA018
19 | 9 |
20 | 0 | AAA020
(10 rows)
SET enable_memoize TO off;
-- right outer join + left outer join
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 RIGHT JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c2, t3.c3
Relations: ((public.ft2 t2) LEFT JOIN (public.ft2 t1)) LEFT JOIN (public.ft4 t3)
Remote SQL: SELECT r1."C 1", r2.c2, r4.c3 FROM (("S 1"."T 1" r2 LEFT JOIN "S 1"."T 1" r1 ON (((r1."C 1" = r2."C 1")))) LEFT JOIN "S 1"."T 3" r4 ON (((r2."C 1" = r4.c1)))) LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 RIGHT JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
c1 | c2 | c3
----+----+--------
11 | 1 |
12 | 2 | AAA012
13 | 3 |
14 | 4 | AAA014
15 | 5 |
16 | 6 | AAA016
17 | 7 |
18 | 8 | AAA018
19 | 9 |
20 | 0 | AAA020
(10 rows)
RESET enable_memoize;
-- left outer join + right outer join
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) RIGHT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c2, t3.c3
Relations: (public.ft4 t3) LEFT JOIN ((public.ft2 t1) INNER JOIN (public.ft2 t2))
Remote SQL: SELECT r1."C 1", r2.c2, r4.c3 FROM ("S 1"."T 3" r4 LEFT JOIN ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ON (((r2."C 1" = r4.c1)))) LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) RIGHT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;
c1 | c2 | c3
----+----+--------
22 | 2 | AAA022
24 | 4 | AAA024
26 | 6 | AAA026
28 | 8 | AAA028
30 | 0 | AAA030
32 | 2 | AAA032
34 | 4 | AAA034
36 | 6 | AAA036
38 | 8 | AAA038
40 | 0 | AAA040
(10 rows)
-- full outer join + WHERE clause, only matched rows
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) WHERE (t1.c1 = t2.c1 OR t1.c1 IS NULL) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1.c1, t2.c1
-> Sort
Output: t1.c1, t2.c1
Sort Key: t1.c1, t2.c1
-> Foreign Scan
Output: t1.c1, t2.c1
Relations: (public.ft4 t1) FULL JOIN (public.ft5 t2)
Remote SQL: SELECT r1.c1, r2.c1 FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) WHERE (((r1.c1 = r2.c1) OR (r1.c1 IS NULL)))
(9 rows)
SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) WHERE (t1.c1 = t2.c1 OR t1.c1 IS NULL) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
c1 | c1
----+----
66 | 66
72 | 72
78 | 78
84 | 84
90 | 90
96 | 96
| 3
| 9
| 15
| 21
(10 rows)
-- full outer join + WHERE clause with shippable extensions set
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t1.c3 FROM ft1 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE postgres_fdw_abs(t1.c1) > 0 OFFSET 10 LIMIT 10;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c2, t1.c3
Relations: (public.ft1 t1) FULL JOIN (public.ft2 t2)
Remote SQL: SELECT r1."C 1", r2.c2, r1.c3 FROM ("S 1"."T 1" r1 FULL JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) WHERE ((public.postgres_fdw_abs(r1."C 1") > 0)) LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
ALTER SERVER loopback OPTIONS (DROP extensions);
-- full outer join + WHERE clause with shippable extensions not set
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2, t1.c3 FROM ft1 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE postgres_fdw_abs(t1.c1) > 0 OFFSET 10 LIMIT 10;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1.c1, t2.c2, t1.c3
-> Foreign Scan
Output: t1.c1, t2.c2, t1.c3
Filter: (postgres_fdw_abs(t1.c1) > 0)
Relations: (public.ft1 t1) FULL JOIN (public.ft2 t2)
Remote SQL: SELECT r1."C 1", r2.c2, r1.c3 FROM ("S 1"."T 1" r1 FULL JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1"))))
(7 rows)
ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw');
-- join two tables with FOR UPDATE clause
-- tests whole-row reference for row marks
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR UPDATE OF r1
(4 rows)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
c1 | c1
-----+-----
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR UPDATE OF r1 FOR UPDATE OF r2
(4 rows)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
c1 | c1
-----+-----
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
-- join two tables with FOR SHARE clause
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR SHARE OF r1
(4 rows)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
c1 | c1
-----+-----
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR SHARE OF r1 FOR SHARE OF r2
(4 rows)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
c1 | c1
-----+-----
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
-- join in CTE
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
Allow user control of CTE materialization, and change the default behavior. Historically we've always materialized the full output of a CTE query, treating WITH as an optimization fence (so that, for example, restrictions from the outer query cannot be pushed into it). This is appropriate when the CTE query is INSERT/UPDATE/DELETE, or is recursive; but when the CTE query is non-recursive and side-effect-free, there's no hazard of changing the query results by pushing restrictions down. Another argument for materialization is that it can avoid duplicate computation of an expensive WITH query --- but that only applies if the WITH query is called more than once in the outer query. Even then it could still be a net loss, if each call has restrictions that would allow just a small part of the WITH query to be computed. Hence, let's change the behavior for WITH queries that are non-recursive and side-effect-free. By default, we will inline them into the outer query (removing the optimization fence) if they are called just once. If they are called more than once, we will keep the old behavior by default, but the user can override this and force inlining by specifying NOT MATERIALIZED. Lastly, the user can force the old behavior by specifying MATERIALIZED; this would mainly be useful when the query had deliberately been employing WITH as an optimization fence to prevent a poor choice of plan. Andreas Karlsson, Andrew Gierth, David Fetter Discussion: https://postgr.es/m/87sh48ffhb.fsf@news-spur.riddles.org.uk
2019-02-16 22:11:12 +01:00
WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t.c1_1, t.c2_1, t.c1_3
CTE t
-> Foreign Scan
Output: t1.c1, t1.c3, t2.c1
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r1.c3, r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1"))))
-> Sort
Output: t.c1_1, t.c2_1, t.c1_3
Sort Key: t.c1_3, t.c1_1
-> CTE Scan on t
Output: t.c1_1, t.c2_1, t.c1_3
(12 rows)
Allow user control of CTE materialization, and change the default behavior. Historically we've always materialized the full output of a CTE query, treating WITH as an optimization fence (so that, for example, restrictions from the outer query cannot be pushed into it). This is appropriate when the CTE query is INSERT/UPDATE/DELETE, or is recursive; but when the CTE query is non-recursive and side-effect-free, there's no hazard of changing the query results by pushing restrictions down. Another argument for materialization is that it can avoid duplicate computation of an expensive WITH query --- but that only applies if the WITH query is called more than once in the outer query. Even then it could still be a net loss, if each call has restrictions that would allow just a small part of the WITH query to be computed. Hence, let's change the behavior for WITH queries that are non-recursive and side-effect-free. By default, we will inline them into the outer query (removing the optimization fence) if they are called just once. If they are called more than once, we will keep the old behavior by default, but the user can override this and force inlining by specifying NOT MATERIALIZED. Lastly, the user can force the old behavior by specifying MATERIALIZED; this would mainly be useful when the query had deliberately been employing WITH as an optimization fence to prevent a poor choice of plan. Andreas Karlsson, Andrew Gierth, David Fetter Discussion: https://postgr.es/m/87sh48ffhb.fsf@news-spur.riddles.org.uk
2019-02-16 22:11:12 +01:00
WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
c1_1 | c2_1
------+------
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
-- ctid with whole-row reference
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1.ctid, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END, r1."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
(4 rows)
-- SEMI JOIN, not pushed down
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1 FROM ft1 t1 WHERE EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c1) ORDER BY t1.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------
Limit
Output: t1.c1
-> Merge Semi Join
Output: t1.c1
Merge Cond: (t1.c1 = t2.c1)
-> Foreign Scan on public.ft1 t1
Output: t1.c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST
-> Foreign Scan on public.ft2 t2
Output: t2.c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST
(11 rows)
SELECT t1.c1 FROM ft1 t1 WHERE EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c1) ORDER BY t1.c1 OFFSET 100 LIMIT 10;
c1
-----
101
102
103
104
105
106
107
108
109
110
(10 rows)
-- ANTI JOIN, not pushed down
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c2) ORDER BY t1.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------
Limit
Output: t1.c1
-> Merge Anti Join
Output: t1.c1
Merge Cond: (t1.c1 = t2.c2)
-> Foreign Scan on public.ft1 t1
Output: t1.c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST
-> Foreign Scan on public.ft2 t2
Output: t2.c2
Remote SQL: SELECT c2 FROM "S 1"."T 1" ORDER BY c2 ASC NULLS LAST
(11 rows)
SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c2) ORDER BY t1.c1 OFFSET 100 LIMIT 10;
c1
-----
110
111
112
113
114
115
116
117
118
119
(10 rows)
-- CROSS JOIN can be pushed down
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c1, t2.c1
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) ORDER BY r1."C 1" ASC NULLS LAST, r2."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
(4 rows)
SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
c1 | c1
----+-----
1 | 101
1 | 102
1 | 103
1 | 104
1 | 105
1 | 106
1 | 107
1 | 108
1 | 109
1 | 110
(10 rows)
-- different server, not pushed down. No result expected.
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft5 t1 JOIN ft6 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------
Limit
Output: t1.c1, t2.c1
-> Merge Join
Output: t1.c1, t2.c1
Merge Cond: (t2.c1 = t1.c1)
-> Foreign Scan on public.ft6 t2
Output: t2.c1, t2.c2, t2.c3
Remote SQL: SELECT c1 FROM "S 1"."T 4" ORDER BY c1 ASC NULLS LAST
-> Materialize
Output: t1.c1, t1.c2, t1.c3
-> Foreign Scan on public.ft5 t1
Output: t1.c1, t1.c2, t1.c3
Remote SQL: SELECT c1 FROM "S 1"."T 4" ORDER BY c1 ASC NULLS LAST
(13 rows)
SELECT t1.c1, t2.c1 FROM ft5 t1 JOIN ft6 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
c1 | c1
----+----
(0 rows)
-- unsafe join conditions (c8 has a UDT), not pushed down. Practically a CROSS
-- JOIN since c8 in both tables has same value.
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c8 = t2.c8) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
-------------------------------------------------------------------------
Limit
Output: t1.c1, t2.c1
-> Sort
Output: t1.c1, t2.c1
Sort Key: t1.c1, t2.c1
-> Merge Left Join
Output: t1.c1, t2.c1
Merge Cond: (t1.c8 = t2.c8)
-> Sort
Output: t1.c1, t1.c8
Sort Key: t1.c8
-> Foreign Scan on public.ft1 t1
Output: t1.c1, t1.c8
Remote SQL: SELECT "C 1", c8 FROM "S 1"."T 1"
-> Sort
Output: t2.c1, t2.c8
Sort Key: t2.c8
-> Foreign Scan on public.ft2 t2
Output: t2.c1, t2.c8
Remote SQL: SELECT "C 1", c8 FROM "S 1"."T 1"
(20 rows)
SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c8 = t2.c8) ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
c1 | c1
----+-----
1 | 101
1 | 102
1 | 103
1 | 104
1 | 105
1 | 106
1 | 107
1 | 108
1 | 109
1 | 110
(10 rows)
-- unsafe conditions on one side (c8 has a UDT), not pushed down.
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = 'foo' ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
-----------------------------------------------------------------------------
Limit
Output: t1.c1, t2.c1, t1.c3
-> Sort
Output: t1.c1, t2.c1, t1.c3
Sort Key: t1.c3, t1.c1
-> Hash Right Join
Output: t1.c1, t2.c1, t1.c3
Hash Cond: (t2.c1 = t1.c1)
-> Foreign Scan on public.ft2 t2
Output: t2.c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1"
-> Hash
Output: t1.c1, t1.c3
-> Foreign Scan on public.ft1 t1
Output: t1.c1, t1.c3
Filter: (t1.c8 = 'foo'::user_enum)
Remote SQL: SELECT "C 1", c3, c8 FROM "S 1"."T 1"
(17 rows)
SELECT t1.c1, t2.c1 FROM ft1 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = 'foo' ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
c1 | c1
-----+-----
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
-- join where unsafe to pushdown condition in WHERE clause has a column not
-- in the SELECT clause. In this test unsafe clause needs to have column
-- references from both joining sides so that the clause is not pushed down
-- into one of the joining sides.
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = t2.c8 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1.c1, t2.c1, t1.c3
-> Sort
Output: t1.c1, t2.c1, t1.c3
Sort Key: t1.c3, t1.c1
-> Foreign Scan
Output: t1.c1, t2.c1, t1.c3
Filter: (t1.c8 = t2.c8)
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, r1.c8, r2.c8 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1"))))
(10 rows)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE t1.c8 = t2.c8 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
c1 | c1
-----+-----
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
-- Aggregate after UNION, for testing setrefs
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) UNION SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) AS t (t1c1, t2c1) GROUP BY t1c1 ORDER BY t1c1 OFFSET 100 LIMIT 10;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1.c1, (avg((t1.c1 + t2.c1)))
-> Sort
Output: t1.c1, (avg((t1.c1 + t2.c1)))
Sort Key: t1.c1
-> HashAggregate
Output: t1.c1, avg((t1.c1 + t2.c1))
Group Key: t1.c1
-> HashAggregate
Output: t1.c1, t2.c1
Group Key: t1.c1, t2.c1
-> Append
-> Foreign Scan
Output: t1.c1, t2.c1
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1"))))
-> Foreign Scan
Output: t1_1.c1, t2_1.c1
Make postgres_fdw's "Relations" output agree with the rest of EXPLAIN. The relation aliases shown in the "Relations" line for a foreign scan didn't always agree with those used in the rest of EXPLAIN's output. The regression test result changes appearing here provide examples. It's really impossible for postgres_fdw to duplicate EXPLAIN's alias assignment logic during postgresGetForeignRelSize(), because of the de-duplication that EXPLAIN does on a global basis --- and anyway, trying to duplicate that would be unmaintainable. Instead, just put numeric rangetable indexes into the string, and convert those to table names/aliases in postgresExplainForeignScan, which does have access to the results of ruleutils.c's alias assignment logic. Aside from being more reliable, this shifts some work from planning to EXPLAIN, which is a good tradeoff for performance. (I also changed from using StringInfo to using psprintf, which makes the code slightly simpler and reduces its memory consumption.) A kluge required by this solution is that we have to reverse-engineer the rtoffset applied by setrefs.c. If that logic ever fails (presumably because the member tables of a join got offset by different amounts), we'll need some more cooperation with setrefs.c to keep things straight. But for now, there's no need for that. Arguably this is a back-patchable bug fix, but since this is a mostly cosmetic issue and there have been no field complaints, I'll refrain for now. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-02 22:31:03 +01:00
Relations: (public.ft1 t1_1) INNER JOIN (public.ft2 t2_1)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1"))))
(20 rows)
SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) UNION SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) AS t (t1c1, t2c1) GROUP BY t1c1 ORDER BY t1c1 OFFSET 100 LIMIT 10;
t1c1 | avg
------+----------------------
101 | 202.0000000000000000
102 | 204.0000000000000000
103 | 206.0000000000000000
104 | 208.0000000000000000
105 | 210.0000000000000000
106 | 212.0000000000000000
107 | 214.0000000000000000
108 | 216.0000000000000000
109 | 218.0000000000000000
110 | 220.0000000000000000
(10 rows)
-- join with lateral reference
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1."C 1"
-> Nested Loop
Output: t1."C 1"
-> Index Scan using t1_pkey on "S 1"."T 1" t1
Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
-> Memoize
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
Cache Key: t1.c2
Cache Mode: binary
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
-> Subquery Scan on q
-> HashAggregate
Output: t2.c1, t3.c1
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
Group Key: t2.c1
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
-> Foreign Scan
Output: t2.c1, t3.c1
Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")) AND ((r1.c2 = $1::integer))))
(17 rows)
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
C 1
-----
1
1
1
1
1
1
1
1
1
1
(10 rows)
-- join with pseudoconstant quals, not pushed down.
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1 AND CURRENT_USER = SESSION_USER) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
-------------------------------------------------------------------------------
Limit
Output: t1.c1, t2.c1, t1.c3
-> Sort
Output: t1.c1, t2.c1, t1.c3
Sort Key: t1.c3, t1.c1
-> Result
Output: t1.c1, t2.c1, t1.c3
One-Time Filter: (CURRENT_USER = SESSION_USER)
-> Hash Join
Output: t1.c1, t1.c3, t2.c1
Hash Cond: (t2.c1 = t1.c1)
-> Foreign Scan on public.ft2 t2
Output: t2.c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1"
-> Hash
Output: t1.c1, t1.c3
-> Foreign Scan on public.ft1 t1
Output: t1.c1, t1.c3
Remote SQL: SELECT "C 1", c3 FROM "S 1"."T 1"
(19 rows)
-- non-Var items in targetlist of the nullable rel of a join preventing
-- push-down in some cases
-- unable to push {ft1, ft2}
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT q.a, ft2.c1 FROM (SELECT 13 FROM ft1 WHERE c1 = 13) q(a) RIGHT JOIN ft2 ON (q.a = ft2.c1) WHERE ft2.c1 BETWEEN 10 AND 15;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------
Nested Loop Left Join
Output: (13), ft2.c1
Join Filter: (13 = ft2.c1)
-> Foreign Scan on public.ft2
Output: ft2.c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1" WHERE (("C 1" >= 10)) AND (("C 1" <= 15)) ORDER BY "C 1" ASC NULLS LAST
-> Materialize
Output: (13)
-> Foreign Scan on public.ft1
Output: 13
Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 13))
(11 rows)
SELECT q.a, ft2.c1 FROM (SELECT 13 FROM ft1 WHERE c1 = 13) q(a) RIGHT JOIN ft2 ON (q.a = ft2.c1) WHERE ft2.c1 BETWEEN 10 AND 15;
a | c1
----+----
| 10
| 11
| 12
13 | 13
| 14
| 15
(6 rows)
-- ok to push {ft1, ft2} but not {ft1, ft2, ft4}
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT ft4.c1, q.* FROM ft4 LEFT JOIN (SELECT 13, ft1.c1, ft2.c1 FROM ft1 RIGHT JOIN ft2 ON (ft1.c1 = ft2.c1) WHERE ft1.c1 = 12) q(a, b, c) ON (ft4.c1 = q.b) WHERE ft4.c1 BETWEEN 10 AND 15;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop Left Join
Output: ft4.c1, (13), ft1.c1, ft2.c1
Join Filter: (ft4.c1 = ft1.c1)
-> Foreign Scan on public.ft4
Output: ft4.c1, ft4.c2, ft4.c3
Remote SQL: SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 10)) AND ((c1 <= 15))
-> Materialize
Output: ft1.c1, ft2.c1, (13)
-> Foreign Scan
Output: ft1.c1, ft2.c1, 13
Relations: (public.ft1) INNER JOIN (public.ft2)
Remote SQL: SELECT r4."C 1", r5."C 1" FROM ("S 1"."T 1" r4 INNER JOIN "S 1"."T 1" r5 ON (((r5."C 1" = 12)) AND ((r4."C 1" = 12)))) ORDER BY r4."C 1" ASC NULLS LAST
(12 rows)
SELECT ft4.c1, q.* FROM ft4 LEFT JOIN (SELECT 13, ft1.c1, ft2.c1 FROM ft1 RIGHT JOIN ft2 ON (ft1.c1 = ft2.c1) WHERE ft1.c1 = 12) q(a, b, c) ON (ft4.c1 = q.b) WHERE ft4.c1 BETWEEN 10 AND 15;
c1 | a | b | c
----+----+----+----
10 | | |
12 | 13 | 12 | 12
14 | | |
(3 rows)
-- join with nullable side with some columns with null values
UPDATE ft5 SET c3 = null where c1 % 9 = 0;
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT ft5, ft5.c1, ft5.c2, ft5.c3, ft4.c1, ft4.c2 FROM ft5 left join ft4 on ft5.c1 = ft4.c1 WHERE ft4.c1 BETWEEN 10 and 30 ORDER BY ft5.c1, ft4.c1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
Foreign Scan
Output: ft5.*, ft5.c1, ft5.c2, ft5.c3, ft4.c1, ft4.c2
Relations: (public.ft5) INNER JOIN (public.ft4)
Remote SQL: SELECT CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1.c1, r1.c2, r1.c3) END, r1.c1, r1.c2, r1.c3, r2.c1, r2.c2 FROM ("S 1"."T 4" r1 INNER JOIN "S 1"."T 3" r2 ON (((r1.c1 = r2.c1)) AND ((r2.c1 >= 10)) AND ((r2.c1 <= 30)))) ORDER BY r1.c1 ASC NULLS LAST
(4 rows)
SELECT ft5, ft5.c1, ft5.c2, ft5.c3, ft4.c1, ft4.c2 FROM ft5 left join ft4 on ft5.c1 = ft4.c1 WHERE ft4.c1 BETWEEN 10 and 30 ORDER BY ft5.c1, ft4.c1;
ft5 | c1 | c2 | c3 | c1 | c2
----------------+----+----+--------+----+----
(12,13,AAA012) | 12 | 13 | AAA012 | 12 | 13
(18,19,) | 18 | 19 | | 18 | 19
(24,25,AAA024) | 24 | 25 | AAA024 | 24 | 25
(30,31,AAA030) | 30 | 31 | AAA030 | 30 | 31
(4 rows)
-- multi-way join involving multiple merge joins
-- (this case used to have EPQ-related planning problems)
CREATE TABLE local_tbl (c1 int NOT NULL, c2 int NOT NULL, c3 text, CONSTRAINT local_tbl_pkey PRIMARY KEY (c1));
INSERT INTO local_tbl SELECT id, id % 10, to_char(id, 'FM0000') FROM generate_series(1, 1000) id;
ANALYZE local_tbl;
SET enable_nestloop TO false;
SET enable_hashjoin TO false;
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = ft4.c1
AND ft1.c2 = ft5.c1 AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft2.c1 < 100 FOR UPDATE;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
LockRows
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft4.c1, ft4.c2, ft4.c3, ft5.c1, ft5.c2, ft5.c3, local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.*, ft2.*, ft4.*, ft5.*, local_tbl.ctid
-> Merge Join
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft4.c1, ft4.c2, ft4.c3, ft5.c1, ft5.c2, ft5.c3, local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.*, ft2.*, ft4.*, ft5.*, local_tbl.ctid
Inner Unique: true
Merge Cond: (ft1.c2 = local_tbl.c1)
-> Foreign Scan
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.*, ft4.c1, ft4.c2, ft4.c3, ft4.*, ft5.c1, ft5.c2, ft5.c3, ft5.*
Relations: (((public.ft1) INNER JOIN (public.ft2)) INNER JOIN (public.ft4)) INNER JOIN (public.ft5)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END, r3.c1, r3.c2, r3.c3, CASE WHEN (r3.*)::text IS NOT NULL THEN ROW(r3.c1, r3.c2, r3.c3) END, r4.c1, r4.c2, r4.c3, CASE WHEN (r4.*)::text IS NOT NULL THEN ROW(r4.c1, r4.c2, r4.c3) END FROM ((("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")) AND ((r2."C 1" < 100)) AND ((r1."C 1" < 100)))) INNER JOIN "S 1"."T 3" r3 ON (((r1.c2 = r3.c1)))) INNER JOIN "S 1"."T 4" r4 ON (((r1.c2 = r4.c1)))) ORDER BY r1.c2 ASC NULLS LAST FOR UPDATE OF r1 FOR UPDATE OF r2 FOR UPDATE OF r3 FOR UPDATE OF r4
-> Merge Join
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.*, ft4.c1, ft4.c2, ft4.c3, ft4.*, ft5.c1, ft5.c2, ft5.c3, ft5.*
Merge Cond: (ft1.c2 = ft5.c1)
-> Merge Join
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.*, ft4.c1, ft4.c2, ft4.c3, ft4.*
Merge Cond: (ft1.c2 = ft4.c1)
-> Sort
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.*
Sort Key: ft1.c2
-> Merge Join
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.*
Merge Cond: (ft1.c1 = ft2.c1)
-> Sort
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
Sort Key: ft1.c1
-> Foreign Scan on public.ft1
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) FOR UPDATE
-> Materialize
Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.*
-> Foreign Scan on public.ft2
Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) ORDER BY "C 1" ASC NULLS LAST FOR UPDATE
-> Sort
Output: ft4.c1, ft4.c2, ft4.c3, ft4.*
Sort Key: ft4.c1
-> Foreign Scan on public.ft4
Output: ft4.c1, ft4.c2, ft4.c3, ft4.*
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3" FOR UPDATE
-> Sort
Output: ft5.c1, ft5.c2, ft5.c3, ft5.*
Sort Key: ft5.c1
-> Foreign Scan on public.ft5
Output: ft5.c1, ft5.c2, ft5.c3, ft5.*
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4" FOR UPDATE
-> Index Scan using local_tbl_pkey on public.local_tbl
Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid
(47 rows)
SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = ft4.c1
AND ft1.c2 = ft5.c1 AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft2.c1 < 100 FOR UPDATE;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c1 | c2 | c3 | c1 | c2 | c3 | c1 | c2 | c3
----+----+-------+------------------------------+--------------------------+----+------------+-----+----+----+-------+------------------------------+--------------------------+----+------------+-----+----+----+--------+----+----+--------+----+----+------
6 | 6 | 00006 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 6 | 6 | 00006 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
16 | 6 | 00016 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 16 | 6 | 00016 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
26 | 6 | 00026 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 26 | 6 | 00026 | Tue Jan 27 00:00:00 1970 PST | Tue Jan 27 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
36 | 6 | 00036 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 36 | 6 | 00036 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
46 | 6 | 00046 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 46 | 6 | 00046 | Mon Feb 16 00:00:00 1970 PST | Mon Feb 16 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
56 | 6 | 00056 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 56 | 6 | 00056 | Thu Feb 26 00:00:00 1970 PST | Thu Feb 26 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
66 | 6 | 00066 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 66 | 6 | 00066 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
76 | 6 | 00076 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 76 | 6 | 00076 | Wed Mar 18 00:00:00 1970 PST | Wed Mar 18 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
86 | 6 | 00086 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 86 | 6 | 00086 | Sat Mar 28 00:00:00 1970 PST | Sat Mar 28 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
96 | 6 | 00096 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 96 | 6 | 00096 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
(10 rows)
RESET enable_nestloop;
RESET enable_hashjoin;
-- test that add_paths_with_pathkeys_for_rel() arranges for the epq_path to
-- return columns needed by the parent ForeignScan node
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.*, COALESCE(ft1.c3 || ft2.c3, 'foobar') FROM ft1 INNER JOIN ft2 ON (ft1.c1 = ft2.c1 AND ft1.c1 < 100)) ss ON (local_tbl.c1 = ss.c1) ORDER BY local_tbl.c1 FOR UPDATE OF local_tbl;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
LockRows
Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, (COALESCE((ft1.c3 || ft2.c3), 'foobar'::text)), local_tbl.ctid, ft1.*, ft2.*
-> Merge Left Join
Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, (COALESCE((ft1.c3 || ft2.c3), 'foobar'::text)), local_tbl.ctid, ft1.*, ft2.*
Merge Cond: (local_tbl.c1 = ft1.c1)
-> Index Scan using local_tbl_pkey on public.local_tbl
Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid
-> Materialize
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, (COALESCE((ft1.c3 || ft2.c3), 'foobar'::text))
-> Foreign Scan
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, COALESCE((ft1.c3 || ft2.c3), 'foobar'::text)
Relations: (public.ft1) INNER JOIN (public.ft2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r4."C 1", r4.c2, r4.c3, r4.c4, r4.c5, r4.c6, r4.c7, r4.c8, CASE WHEN (r4.*)::text IS NOT NULL THEN ROW(r4."C 1", r4.c2, r4.c3, r4.c4, r4.c5, r4.c6, r4.c7, r4.c8) END, CASE WHEN (r5.*)::text IS NOT NULL THEN ROW(r5."C 1", r5.c2, r5.c3, r5.c4, r5.c5, r5.c6, r5.c7, r5.c8) END, r5.c3 FROM ("S 1"."T 1" r4 INNER JOIN "S 1"."T 1" r5 ON (((r5."C 1" = r4."C 1")) AND ((r4."C 1" < 100)))) ORDER BY r4."C 1" ASC NULLS LAST
-> Result
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, ft2.c3
-> Sort
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, (COALESCE((ft1.c3 || ft2.c3), 'foobar'::text)), ft2.c3
Sort Key: ft1.c1
-> Hash Join
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, COALESCE((ft1.c3 || ft2.c3), 'foobar'::text), ft2.c3
Hash Cond: (ft1.c1 = ft2.c1)
-> Foreign Scan on public.ft1
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100))
-> Hash
Output: ft2.*, ft2.c1, ft2.c3
-> Foreign Scan on public.ft2
Output: ft2.*, ft2.c1, ft2.c3
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(29 rows)
ALTER SERVER loopback OPTIONS (DROP extensions);
ALTER SERVER loopback OPTIONS (ADD fdw_startup_cost '10000.0');
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.* FROM ft1 INNER JOIN ft2 ON (ft1.c1 = ft2.c1 AND ft1.c1 < 100 AND (ft1.c1 - postgres_fdw_abs(ft2.c2)) = 0)) ss ON (local_tbl.c3 = ss.c3) ORDER BY local_tbl.c1 FOR UPDATE OF local_tbl;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
LockRows
Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, local_tbl.ctid, ft1.*, ft2.*
-> Nested Loop Left Join
Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, local_tbl.ctid, ft1.*, ft2.*
Join Filter: (local_tbl.c3 = ft1.c3)
-> Index Scan using local_tbl_pkey on public.local_tbl
Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid
-> Materialize
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*
-> Foreign Scan
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*
Filter: ((ft1.c1 - postgres_fdw_abs(ft2.c2)) = 0)
Relations: (public.ft1) INNER JOIN (public.ft2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r4."C 1", r4.c2, r4.c3, r4.c4, r4.c5, r4.c6, r4.c7, r4.c8, CASE WHEN (r4.*)::text IS NOT NULL THEN ROW(r4."C 1", r4.c2, r4.c3, r4.c4, r4.c5, r4.c6, r4.c7, r4.c8) END, CASE WHEN (r5.*)::text IS NOT NULL THEN ROW(r5."C 1", r5.c2, r5.c3, r5.c4, r5.c5, r5.c6, r5.c7, r5.c8) END, r5.c2 FROM ("S 1"."T 1" r4 INNER JOIN "S 1"."T 1" r5 ON (((r5."C 1" = r4."C 1")) AND ((r4."C 1" < 100)))) ORDER BY r4.c3 ASC NULLS LAST
-> Sort
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, ft2.c2
Sort Key: ft1.c3
-> Merge Join
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, ft2.c2
Merge Cond: (ft1.c1 = ft2.c1)
Join Filter: ((ft1.c1 - postgres_fdw_abs(ft2.c2)) = 0)
-> Sort
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
Sort Key: ft1.c1
-> Foreign Scan on public.ft1
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100))
-> Materialize
Output: ft2.*, ft2.c1, ft2.c2
-> Foreign Scan on public.ft2
Output: ft2.*, ft2.c1, ft2.c2
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST
(32 rows)
ALTER SERVER loopback OPTIONS (DROP fdw_startup_cost);
ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw');
DROP TABLE local_tbl;
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
-- check join pushdown in situations where multiple userids are involved
CREATE ROLE regress_view_owner SUPERUSER;
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
CREATE USER MAPPING FOR regress_view_owner SERVER loopback;
GRANT SELECT ON ft4 TO regress_view_owner;
GRANT SELECT ON ft5 TO regress_view_owner;
CREATE VIEW v4 AS SELECT * FROM ft4;
CREATE VIEW v5 AS SELECT * FROM ft5;
ALTER VIEW v5 OWNER TO regress_view_owner;
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2 FROM v4 t1 LEFT JOIN v5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; -- can't be pushed down, different view owners
QUERY PLAN
----------------------------------------------------------------------
Limit
Output: ft4.c1, ft5.c2, ft5.c1
-> Sort
Output: ft4.c1, ft5.c2, ft5.c1
Sort Key: ft4.c1, ft5.c1
-> Hash Left Join
Output: ft4.c1, ft5.c2, ft5.c1
Hash Cond: (ft4.c1 = ft5.c1)
-> Foreign Scan on public.ft4
Output: ft4.c1, ft4.c2, ft4.c3
Remote SQL: SELECT c1 FROM "S 1"."T 3"
-> Hash
Output: ft5.c2, ft5.c1
-> Foreign Scan on public.ft5
Output: ft5.c2, ft5.c1
Remote SQL: SELECT c1, c2 FROM "S 1"."T 4"
(16 rows)
SELECT t1.c1, t2.c2 FROM v4 t1 LEFT JOIN v5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
c1 | c2
----+----
22 |
24 | 25
26 |
28 |
30 | 31
32 |
34 |
36 | 37
38 |
40 |
(10 rows)
ALTER VIEW v4 OWNER TO regress_view_owner;
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2 FROM v4 t1 LEFT JOIN v5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; -- can be pushed down
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
Output: ft4.c1, ft5.c2, ft5.c1
Relations: (public.ft4) LEFT JOIN (public.ft5)
Get rid of the "new" and "old" entries in a view's rangetable. The rule system needs "old" and/or "new" pseudo-RTEs in rule actions that are ON INSERT/UPDATE/DELETE. Historically it's put such entries into the ON SELECT rules of views as well, but those are really quite vestigial. The only thing we've used them for is to carry the view's relid forward to AcquireExecutorLocks (so that we can re-lock the view to verify it hasn't changed before re-using a plan) and to carry its relid and permissions data forward to execution-time permissions checks. What we can do instead of that is to retain these fields of the RTE_RELATION RTE for the view even after we convert it to an RTE_SUBQUERY RTE. This requires a tiny amount of extra complication in the planner and AcquireExecutorLocks, but on the other hand we can get rid of the logic that moves that data from one place to another. The principal immediate benefit of doing this, aside from a small saving in the pg_rewrite data for views, is that these pseudo-RTEs no longer trigger ruleutils.c's heuristic about qualifying variable names when the rangetable's length is more than 1. That results in quite a number of small simplifications in regression test outputs, which are all to the good IMO. Bump catversion because we need to dump a few more fields of RTE_SUBQUERY RTEs. While those will always be zeroes anyway in stored rules (because we'd never populate them until query rewrite) they are useful for debugging, and it seems like we'd better make sure to transmit such RTEs accurately in plans sent to parallel workers. I don't think the executor actually examines these fields after startup, but someday it might. This is a second attempt at committing 1b4d280ea. The difference from the first time is that now we can add some filtering rules to AdjustUpgrade.pm to allow cross-version upgrade testing to pass despite all the cosmetic changes in CREATE VIEW outputs. Amit Langote (filtering rules by me) Discussion: https://postgr.es/m/CA+HiwqEf7gPN4Hn+LoZ4tP2q_Qt7n3vw7-6fJKOf92tSEnX6Gg@mail.gmail.com Discussion: https://postgr.es/m/891521.1673657296@sss.pgh.pa.us
2023-01-18 19:23:57 +01:00
Remote SQL: SELECT r4.c1, r5.c2, r5.c1 FROM ("S 1"."T 3" r4 LEFT JOIN "S 1"."T 4" r5 ON (((r4.c1 = r5.c1)))) ORDER BY r4.c1 ASC NULLS LAST, r5.c1 ASC NULLS LAST LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
SELECT t1.c1, t2.c2 FROM v4 t1 LEFT JOIN v5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
c1 | c2
----+----
22 |
24 | 25
26 |
28 |
30 | 31
32 |
34 |
36 | 37
38 |
40 |
(10 rows)
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2 FROM v4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; -- can't be pushed down, view owner not current user
QUERY PLAN
----------------------------------------------------------------------
Limit
Output: ft4.c1, t2.c2, t2.c1
-> Sort
Output: ft4.c1, t2.c2, t2.c1
Sort Key: ft4.c1, t2.c1
-> Hash Left Join
Output: ft4.c1, t2.c2, t2.c1
Hash Cond: (ft4.c1 = t2.c1)
-> Foreign Scan on public.ft4
Output: ft4.c1, ft4.c2, ft4.c3
Remote SQL: SELECT c1 FROM "S 1"."T 3"
-> Hash
Output: t2.c2, t2.c1
-> Foreign Scan on public.ft5 t2
Output: t2.c2, t2.c1
Remote SQL: SELECT c1, c2 FROM "S 1"."T 4"
(16 rows)
SELECT t1.c1, t2.c2 FROM v4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
c1 | c2
----+----
22 |
24 | 25
26 |
28 |
30 | 31
32 |
34 |
36 | 37
38 |
40 |
(10 rows)
ALTER VIEW v4 OWNER TO CURRENT_USER;
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c2 FROM v4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; -- can be pushed down
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
Output: ft4.c1, t2.c2, t2.c1
Relations: (public.ft4) LEFT JOIN (public.ft5 t2)
Get rid of the "new" and "old" entries in a view's rangetable. The rule system needs "old" and/or "new" pseudo-RTEs in rule actions that are ON INSERT/UPDATE/DELETE. Historically it's put such entries into the ON SELECT rules of views as well, but those are really quite vestigial. The only thing we've used them for is to carry the view's relid forward to AcquireExecutorLocks (so that we can re-lock the view to verify it hasn't changed before re-using a plan) and to carry its relid and permissions data forward to execution-time permissions checks. What we can do instead of that is to retain these fields of the RTE_RELATION RTE for the view even after we convert it to an RTE_SUBQUERY RTE. This requires a tiny amount of extra complication in the planner and AcquireExecutorLocks, but on the other hand we can get rid of the logic that moves that data from one place to another. The principal immediate benefit of doing this, aside from a small saving in the pg_rewrite data for views, is that these pseudo-RTEs no longer trigger ruleutils.c's heuristic about qualifying variable names when the rangetable's length is more than 1. That results in quite a number of small simplifications in regression test outputs, which are all to the good IMO. Bump catversion because we need to dump a few more fields of RTE_SUBQUERY RTEs. While those will always be zeroes anyway in stored rules (because we'd never populate them until query rewrite) they are useful for debugging, and it seems like we'd better make sure to transmit such RTEs accurately in plans sent to parallel workers. I don't think the executor actually examines these fields after startup, but someday it might. This is a second attempt at committing 1b4d280ea. The difference from the first time is that now we can add some filtering rules to AdjustUpgrade.pm to allow cross-version upgrade testing to pass despite all the cosmetic changes in CREATE VIEW outputs. Amit Langote (filtering rules by me) Discussion: https://postgr.es/m/CA+HiwqEf7gPN4Hn+LoZ4tP2q_Qt7n3vw7-6fJKOf92tSEnX6Gg@mail.gmail.com Discussion: https://postgr.es/m/891521.1673657296@sss.pgh.pa.us
2023-01-18 19:23:57 +01:00
Remote SQL: SELECT r4.c1, r2.c2, r2.c1 FROM ("S 1"."T 3" r4 LEFT JOIN "S 1"."T 4" r2 ON (((r4.c1 = r2.c1)))) ORDER BY r4.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST LIMIT 10::bigint OFFSET 10::bigint
(4 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
SELECT t1.c1, t2.c2 FROM v4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
c1 | c2
----+----
22 |
24 | 25
26 |
28 |
30 | 31
32 |
34 |
36 | 37
38 |
40 |
(10 rows)
ALTER VIEW v4 OWNER TO regress_view_owner;
-- ====================================================================
-- Check that userid to use when querying the remote table is correctly
-- propagated into foreign rels present in subqueries under an UNION ALL
-- ====================================================================
CREATE ROLE regress_view_owner_another;
ALTER VIEW v4 OWNER TO regress_view_owner_another;
GRANT SELECT ON ft4 TO regress_view_owner_another;
ALTER FOREIGN TABLE ft4 OPTIONS (ADD use_remote_estimate 'true');
-- The following should query the remote backing table of ft4 as user
-- regress_view_owner_another, the view owner, though it fails as expected
-- due to the lack of a user mapping for that user.
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM v4;
ERROR: user mapping not found for "regress_view_owner_another"
-- Likewise, but with the query under an UNION ALL
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM (SELECT * FROM v4 UNION ALL SELECT * FROM v4);
ERROR: user mapping not found for "regress_view_owner_another"
-- Should not get that error once a user mapping is created
CREATE USER MAPPING FOR regress_view_owner_another SERVER loopback OPTIONS (password_required 'false');
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM v4;
QUERY PLAN
--------------------------------------------------
Foreign Scan on public.ft4
Output: ft4.c1, ft4.c2, ft4.c3
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3"
(3 rows)
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM (SELECT * FROM v4 UNION ALL SELECT * FROM v4);
QUERY PLAN
--------------------------------------------------------
Append
-> Foreign Scan on public.ft4
Output: ft4.c1, ft4.c2, ft4.c3
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3"
-> Foreign Scan on public.ft4 ft4_1
Output: ft4_1.c1, ft4_1.c2, ft4_1.c3
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3"
(7 rows)
DROP USER MAPPING FOR regress_view_owner_another SERVER loopback;
DROP OWNED BY regress_view_owner_another;
DROP ROLE regress_view_owner_another;
ALTER FOREIGN TABLE ft4 OPTIONS (SET use_remote_estimate 'false');
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
-- cleanup
DROP OWNED BY regress_view_owner;
DROP ROLE regress_view_owner;
-- ===================================================================
-- Aggregate and grouping queries
-- ===================================================================
-- Simple aggregates
explain (verbose, costs off)
select count(c6), sum(c1), avg(c1), min(c2), max(c1), stddev(c2), sum(c1) * (random() <= 1)::int as sum2 from ft1 where c2 < 5 group by c2 order by 1, 2;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (count(c6)), (sum(c1)), (avg(c1)), (min(c2)), (max(c1)), (stddev(c2)), ((sum(c1)) * ((random() <= '1'::double precision))::integer), c2
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT count(c6), sum("C 1"), avg("C 1"), min(c2), max("C 1"), stddev(c2), c2 FROM "S 1"."T 1" WHERE ((c2 < 5)) GROUP BY 7 ORDER BY count(c6) ASC NULLS LAST, sum("C 1") ASC NULLS LAST
(4 rows)
select count(c6), sum(c1), avg(c1), min(c2), max(c1), stddev(c2), sum(c1) * (random() <= 1)::int as sum2 from ft1 where c2 < 5 group by c2 order by 1, 2;
count | sum | avg | min | max | stddev | sum2
-------+-------+----------------------+-----+------+--------+-------
100 | 49600 | 496.0000000000000000 | 1 | 991 | 0 | 49600
100 | 49700 | 497.0000000000000000 | 2 | 992 | 0 | 49700
100 | 49800 | 498.0000000000000000 | 3 | 993 | 0 | 49800
100 | 49900 | 499.0000000000000000 | 4 | 994 | 0 | 49900
100 | 50500 | 505.0000000000000000 | 0 | 1000 | 0 | 50500
(5 rows)
explain (verbose, costs off)
select count(c6), sum(c1), avg(c1), min(c2), max(c1), stddev(c2), sum(c1) * (random() <= 1)::int as sum2 from ft1 where c2 < 5 group by c2 order by 1, 2 limit 1;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (count(c6)), (sum(c1)), (avg(c1)), (min(c2)), (max(c1)), (stddev(c2)), ((sum(c1)) * ((random() <= '1'::double precision))::integer), c2
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT count(c6), sum("C 1"), avg("C 1"), min(c2), max("C 1"), stddev(c2), c2 FROM "S 1"."T 1" WHERE ((c2 < 5)) GROUP BY 7 ORDER BY count(c6) ASC NULLS LAST, sum("C 1") ASC NULLS LAST LIMIT 1::bigint
(4 rows)
select count(c6), sum(c1), avg(c1), min(c2), max(c1), stddev(c2), sum(c1) * (random() <= 1)::int as sum2 from ft1 where c2 < 5 group by c2 order by 1, 2 limit 1;
count | sum | avg | min | max | stddev | sum2
-------+-------+----------------------+-----+-----+--------+-------
100 | 49600 | 496.0000000000000000 | 1 | 991 | 0 | 49600
(1 row)
-- Aggregate is not pushed down as aggregation contains random()
explain (verbose, costs off)
select sum(c1 * (random() <= 1)::int) as sum, avg(c1) from ft1;
QUERY PLAN
-------------------------------------------------------------------------------
Aggregate
Output: sum((c1 * ((random() <= '1'::double precision))::integer)), avg(c1)
-> Foreign Scan on public.ft1
Output: c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1"
(5 rows)
-- Aggregate over join query
explain (verbose, costs off)
select count(*), sum(t1.c1), avg(t2.c1) from ft1 t1 inner join ft1 t2 on (t1.c2 = t2.c2) where t1.c2 = 6;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (count(*)), (sum(t1.c1)), (avg(t2.c1))
Relations: Aggregate on ((public.ft1 t1) INNER JOIN (public.ft1 t2))
Remote SQL: SELECT count(*), sum(r1."C 1"), avg(r2."C 1") FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2.c2 = 6)) AND ((r1.c2 = 6))))
(4 rows)
select count(*), sum(t1.c1), avg(t2.c1) from ft1 t1 inner join ft1 t2 on (t1.c2 = t2.c2) where t1.c2 = 6;
count | sum | avg
-------+---------+----------------------
10000 | 5010000 | 501.0000000000000000
(1 row)
-- Not pushed down due to local conditions present in underneath input rel
explain (verbose, costs off)
select sum(t1.c1), count(t2.c1) from ft1 t1 inner join ft2 t2 on (t1.c1 = t2.c1) where ((t1.c1 * t2.c1)/(t1.c1 * t2.c1)) * random() <= 1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------
Aggregate
Output: sum(t1.c1), count(t2.c1)
-> Foreign Scan
Output: t1.c1, t2.c1
Filter: (((((t1.c1 * t2.c1) / (t1.c1 * t2.c1)))::double precision * random()) <= '1'::double precision)
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1"))))
(7 rows)
-- GROUP BY clause having expressions
explain (verbose, costs off)
select c2/2, sum(c2) * (c2/2) from ft1 group by c2/2 order by c2/2;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: ((c2 / 2)), ((sum(c2) * (c2 / 2)))
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT (c2 / 2), (sum(c2) * (c2 / 2)) FROM "S 1"."T 1" GROUP BY 1 ORDER BY (c2 / 2) ASC NULLS LAST
(4 rows)
select c2/2, sum(c2) * (c2/2) from ft1 group by c2/2 order by c2/2;
?column? | ?column?
----------+----------
0 | 0
1 | 500
2 | 1800
3 | 3900
4 | 6800
(5 rows)
-- Aggregates in subquery are pushed down.
Remove pessimistic cost penalization from Incremental Sort When incremental sorts were added in v13 a 1.5x pessimism factor was added to the cost modal. Seemingly this was done because the cost modal only has an estimate of the total number of input rows and the number of presorted groups. It assumes that the input rows will be evenly distributed throughout the presorted groups. The 1.5x pessimism factor was added to slightly reduce the likelihood of incremental sorts being used in the hope to avoid performance regressions where an incremental sort plan was picked and turned out slower due to a large skew in the number of rows in the presorted groups. An additional quirk with the path generation code meant that we could consider both a sort and an incremental sort on paths with presorted keys. This meant that with the pessimism factor, it was possible that we opted to perform a sort rather than an incremental sort when the given path had presorted keys. Here we remove the 1.5x pessimism factor to allow incremental sorts to have a fairer chance at being chosen against a full sort. Previously we would generally create a sort path on the cheapest input path (if that wasn't sorted already) and incremental sort paths on any path which had presorted keys. This meant that if the cheapest input path wasn't completely sorted but happened to have presorted keys, we would create a full sort path *and* an incremental sort path on that input path. Here we change this logic so that if there are presorted keys, we only create an incremental sort path, and create sort paths only when a full sort is required. Both the removal of the cost pessimism factor and the changes made to the path generation make it more likely that incremental sorts will now be chosen. That, of course, as with teaching the planner any new tricks, means an increased likelihood that the planner will perform an incremental sort when it's not the best method. Our standard escape hatch for these cases is an enable_* GUC. enable_incremental_sort already exists for this. This came out of a report by Pavel Luzanov where he mentioned that the master branch was choosing to perform a Seq Scan -> Sort -> Group Aggregate for his query with an ORDER BY aggregate function. The v15 plan for his query performed an Index Scan -> Group Aggregate, of course, the aggregate performed the final sort internally in nodeAgg.c for the aggregate's ORDER BY. The ideal plan would have been to use the index, which provided partially sorted input then use an incremental sort to provide the aggregate with the sorted input. This was not being chosen due to the pessimism in the incremental sort cost modal, so here we remove that and rationalize the path generation so that sort and incremental sort plans don't have to needlessly compete. We assume that it's senseless to ever use a full sort on a given input path where an incremental sort can be performed. Reported-by: Pavel Luzanov Reviewed-by: Richard Guo Discussion: https://postgr.es/m/9f61ddbf-2989-1536-b31e-6459370a6baa%40postgrespro.ru
2022-12-16 03:22:23 +01:00
set enable_incremental_sort = off;
explain (verbose, costs off)
select count(x.a), sum(x.a) from (select c2 a, sum(c1) b from ft1 group by c2, sqrt(c1) order by 1, 2) x;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------
Aggregate
Output: count(ft1.c2), sum(ft1.c2)
-> Foreign Scan
Output: ft1.c2, (sum(ft1.c1)), (sqrt((ft1.c1)::double precision))
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT c2, sum("C 1"), sqrt("C 1") FROM "S 1"."T 1" GROUP BY 1, 3 ORDER BY c2 ASC NULLS LAST, sum("C 1") ASC NULLS LAST
(6 rows)
select count(x.a), sum(x.a) from (select c2 a, sum(c1) b from ft1 group by c2, sqrt(c1) order by 1, 2) x;
count | sum
-------+------
1000 | 4500
(1 row)
Remove pessimistic cost penalization from Incremental Sort When incremental sorts were added in v13 a 1.5x pessimism factor was added to the cost modal. Seemingly this was done because the cost modal only has an estimate of the total number of input rows and the number of presorted groups. It assumes that the input rows will be evenly distributed throughout the presorted groups. The 1.5x pessimism factor was added to slightly reduce the likelihood of incremental sorts being used in the hope to avoid performance regressions where an incremental sort plan was picked and turned out slower due to a large skew in the number of rows in the presorted groups. An additional quirk with the path generation code meant that we could consider both a sort and an incremental sort on paths with presorted keys. This meant that with the pessimism factor, it was possible that we opted to perform a sort rather than an incremental sort when the given path had presorted keys. Here we remove the 1.5x pessimism factor to allow incremental sorts to have a fairer chance at being chosen against a full sort. Previously we would generally create a sort path on the cheapest input path (if that wasn't sorted already) and incremental sort paths on any path which had presorted keys. This meant that if the cheapest input path wasn't completely sorted but happened to have presorted keys, we would create a full sort path *and* an incremental sort path on that input path. Here we change this logic so that if there are presorted keys, we only create an incremental sort path, and create sort paths only when a full sort is required. Both the removal of the cost pessimism factor and the changes made to the path generation make it more likely that incremental sorts will now be chosen. That, of course, as with teaching the planner any new tricks, means an increased likelihood that the planner will perform an incremental sort when it's not the best method. Our standard escape hatch for these cases is an enable_* GUC. enable_incremental_sort already exists for this. This came out of a report by Pavel Luzanov where he mentioned that the master branch was choosing to perform a Seq Scan -> Sort -> Group Aggregate for his query with an ORDER BY aggregate function. The v15 plan for his query performed an Index Scan -> Group Aggregate, of course, the aggregate performed the final sort internally in nodeAgg.c for the aggregate's ORDER BY. The ideal plan would have been to use the index, which provided partially sorted input then use an incremental sort to provide the aggregate with the sorted input. This was not being chosen due to the pessimism in the incremental sort cost modal, so here we remove that and rationalize the path generation so that sort and incremental sort plans don't have to needlessly compete. We assume that it's senseless to ever use a full sort on a given input path where an incremental sort can be performed. Reported-by: Pavel Luzanov Reviewed-by: Richard Guo Discussion: https://postgr.es/m/9f61ddbf-2989-1536-b31e-6459370a6baa%40postgrespro.ru
2022-12-16 03:22:23 +01:00
reset enable_incremental_sort;
-- Aggregate is still pushed down by taking unshippable expression out
explain (verbose, costs off)
select c2 * (random() <= 1)::int as sum1, sum(c1) * c2 as sum2 from ft1 group by c2 order by 1, 2;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Sort
Output: ((c2 * ((random() <= '1'::double precision))::integer)), ((sum(c1) * c2)), c2
Sort Key: ((ft1.c2 * ((random() <= '1'::double precision))::integer)), ((sum(ft1.c1) * ft1.c2))
-> Foreign Scan
Output: (c2 * ((random() <= '1'::double precision))::integer), ((sum(c1) * c2)), c2
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT (sum("C 1") * c2), c2 FROM "S 1"."T 1" GROUP BY 2
(7 rows)
select c2 * (random() <= 1)::int as sum1, sum(c1) * c2 as sum2 from ft1 group by c2 order by 1, 2;
sum1 | sum2
------+--------
0 | 0
1 | 49600
2 | 99400
3 | 149400
4 | 199600
5 | 250000
6 | 300600
7 | 351400
8 | 402400
9 | 453600
(10 rows)
-- Aggregate with unshippable GROUP BY clause are not pushed
explain (verbose, costs off)
select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::int order by 1;
QUERY PLAN
------------------------------------------------------------------------------
Sort
Output: ((c2 * ((random() <= '1'::double precision))::integer))
Sort Key: ((ft2.c2 * ((random() <= '1'::double precision))::integer))
-> HashAggregate
Output: ((c2 * ((random() <= '1'::double precision))::integer))
Group Key: (ft2.c2 * ((random() <= '1'::double precision))::integer)
-> Foreign Scan on public.ft2
Output: (c2 * ((random() <= '1'::double precision))::integer)
Remote SQL: SELECT c2 FROM "S 1"."T 1"
(9 rows)
-- GROUP BY clause in various forms, cardinal, alias and constant expression
explain (verbose, costs off)
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (count(c2)), c2, 5, 7.0, 9
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
(4 rows)
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
w | x | y | z
-----+---+---+-----
100 | 0 | 5 | 7.0
100 | 1 | 5 | 7.0
100 | 2 | 5 | 7.0
100 | 3 | 5 | 7.0
100 | 4 | 5 | 7.0
100 | 5 | 5 | 7.0
100 | 6 | 5 | 7.0
100 | 7 | 5 | 7.0
100 | 8 | 5 | 7.0
100 | 9 | 5 | 7.0
(10 rows)
-- GROUP BY clause referring to same column multiple times
-- Also, ORDER BY contains an aggregate function
explain (verbose, costs off)
select c2, c2 from ft1 where c2 > 6 group by 1, 2 order by sum(c1);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: c2, c2, (sum(c1))
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT c2, c2, sum("C 1") FROM "S 1"."T 1" WHERE ((c2 > 6)) GROUP BY 1, 2 ORDER BY sum("C 1") ASC NULLS LAST
(4 rows)
select c2, c2 from ft1 where c2 > 6 group by 1, 2 order by sum(c1);
c2 | c2
----+----
7 | 7
8 | 8
9 | 9
(3 rows)
-- Testing HAVING clause shippability
explain (verbose, costs off)
select c2, sum(c1) from ft2 group by c2 having avg(c1) < 500 and sum(c1) < 49800 order by c2;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: c2, (sum(c1))
Relations: Aggregate on (public.ft2)
Remote SQL: SELECT c2, sum("C 1") FROM "S 1"."T 1" GROUP BY 1 HAVING ((avg("C 1") < 500::numeric)) AND ((sum("C 1") < 49800)) ORDER BY c2 ASC NULLS LAST
(4 rows)
select c2, sum(c1) from ft2 group by c2 having avg(c1) < 500 and sum(c1) < 49800 order by c2;
c2 | sum
----+-------
1 | 49600
2 | 49700
(2 rows)
-- Unshippable HAVING clause will be evaluated locally, and other qual in HAVING clause is pushed down
explain (verbose, costs off)
select count(*) from (select c5, count(c1) from ft1 group by c5, sqrt(c2) having (avg(c1) / avg(c1)) * random() <= 1 and avg(c1) < 500) x;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------
Aggregate
Output: count(*)
-> Foreign Scan
Output: ft1.c5, NULL::bigint, (sqrt((ft1.c2)::double precision))
Filter: (((((avg(ft1.c1)) / (avg(ft1.c1))))::double precision * random()) <= '1'::double precision)
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT c5, NULL::bigint, sqrt(c2), avg("C 1") FROM "S 1"."T 1" GROUP BY 1, 3 HAVING ((avg("C 1") < 500::numeric))
(7 rows)
select count(*) from (select c5, count(c1) from ft1 group by c5, sqrt(c2) having (avg(c1) / avg(c1)) * random() <= 1 and avg(c1) < 500) x;
count
-------
49
(1 row)
-- Aggregate in HAVING clause is not pushable, and thus aggregation is not pushed down
explain (verbose, costs off)
select sum(c1) from ft1 group by c2 having avg(c1 * (random() <= 1)::int) > 100 order by 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Sort
Output: (sum(c1)), c2
Sort Key: (sum(ft1.c1))
-> HashAggregate
Output: sum(c1), c2
Group Key: ft1.c2
Filter: (avg((ft1.c1 * ((random() <= '1'::double precision))::integer)) > '100'::numeric)
-> Foreign Scan on public.ft1
Output: c1, c2
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1"
(10 rows)
Avoid postgres_fdw crash for a targetlist entry that's just a Param. foreign_grouping_ok() is willing to put fairly arbitrary expressions into the targetlist of a remote SELECT that's doing grouping or aggregation on the remote side, including expressions that have no foreign component to them at all. This is possibly a bit dubious from an efficiency standpoint; but it rises to the level of a crash-causing bug if the expression is just a Param or non-foreign Var. In that case, the expression will necessarily also appear in the fdw_exprs list of values we need to send to the remote server, and then setrefs.c's set_foreignscan_references will mistakenly replace the fdw_exprs entry with a Var referencing the targetlist result. The root cause of this problem is bad design in commit e7cb7ee14: it put logic into set_foreignscan_references that IMV is postgres_fdw-specific, and yet this bug shows that it isn't postgres_fdw-specific enough. The transformation being done on fdw_exprs assumes that fdw_exprs is to be evaluated with the fdw_scan_tlist as input, which is not how postgres_fdw uses it; yet it could be the right thing for some other FDW. (In the bigger picture, setrefs.c has no business assuming this for the other expression fields of a ForeignScan either.) The right fix therefore would be to expand the FDW API so that the FDW could inform setrefs.c how it intends to evaluate these various expressions. We can't change that in the back branches though, and we also can't just summarily change setrefs.c's behavior there, or we're likely to break external FDWs. As a stopgap, therefore, hack up postgres_fdw so that it won't attempt to send targetlist entries that look exactly like the fdw_exprs entries they'd produce. In most cases this actually produces a superior plan, IMO, with less data needing to be transmitted and returned; so we probably ought to think harder about whether we should ship tlist expressions at all when they don't contain any foreign Vars or Aggs. But that's an optimization not a bug fix so I left it for later. One case where this produces an inferior plan is where the expression in question is actually a GROUP BY expression: then the restriction prevents us from using remote grouping. It might be possible to work around that (since that would reduce to group-by-a-constant on the remote side); but it seems like a pretty unlikely corner case, so I'm not sure it's worth expending effort solely to improve that. In any case the right long-term answer is to fix the API as sketched above, and then revert this hack. Per bug #15781 from Sean Johnston. Back-patch to v10 where the problem was introduced. Discussion: https://postgr.es/m/15781-2601b1002bad087c@postgresql.org
2019-04-27 19:15:54 +02:00
-- Remote aggregate in combination with a local Param (for the output
-- of an initplan) can be trouble, per bug #15781
explain (verbose, costs off)
select exists(select 1 from pg_enum), sum(c1) from ft1;
QUERY PLAN
--------------------------------------------------
Foreign Scan
Output: $0, (sum(ft1.c1))
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT sum("C 1") FROM "S 1"."T 1"
InitPlan 1 (returns $0)
-> Seq Scan on pg_catalog.pg_enum
(6 rows)
select exists(select 1 from pg_enum), sum(c1) from ft1;
exists | sum
--------+--------
t | 500500
(1 row)
explain (verbose, costs off)
select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1;
QUERY PLAN
---------------------------------------------------
GroupAggregate
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
Output: $0, sum(ft1.c1)
Avoid postgres_fdw crash for a targetlist entry that's just a Param. foreign_grouping_ok() is willing to put fairly arbitrary expressions into the targetlist of a remote SELECT that's doing grouping or aggregation on the remote side, including expressions that have no foreign component to them at all. This is possibly a bit dubious from an efficiency standpoint; but it rises to the level of a crash-causing bug if the expression is just a Param or non-foreign Var. In that case, the expression will necessarily also appear in the fdw_exprs list of values we need to send to the remote server, and then setrefs.c's set_foreignscan_references will mistakenly replace the fdw_exprs entry with a Var referencing the targetlist result. The root cause of this problem is bad design in commit e7cb7ee14: it put logic into set_foreignscan_references that IMV is postgres_fdw-specific, and yet this bug shows that it isn't postgres_fdw-specific enough. The transformation being done on fdw_exprs assumes that fdw_exprs is to be evaluated with the fdw_scan_tlist as input, which is not how postgres_fdw uses it; yet it could be the right thing for some other FDW. (In the bigger picture, setrefs.c has no business assuming this for the other expression fields of a ForeignScan either.) The right fix therefore would be to expand the FDW API so that the FDW could inform setrefs.c how it intends to evaluate these various expressions. We can't change that in the back branches though, and we also can't just summarily change setrefs.c's behavior there, or we're likely to break external FDWs. As a stopgap, therefore, hack up postgres_fdw so that it won't attempt to send targetlist entries that look exactly like the fdw_exprs entries they'd produce. In most cases this actually produces a superior plan, IMO, with less data needing to be transmitted and returned; so we probably ought to think harder about whether we should ship tlist expressions at all when they don't contain any foreign Vars or Aggs. But that's an optimization not a bug fix so I left it for later. One case where this produces an inferior plan is where the expression in question is actually a GROUP BY expression: then the restriction prevents us from using remote grouping. It might be possible to work around that (since that would reduce to group-by-a-constant on the remote side); but it seems like a pretty unlikely corner case, so I'm not sure it's worth expending effort solely to improve that. In any case the right long-term answer is to fix the API as sketched above, and then revert this hack. Per bug #15781 from Sean Johnston. Back-patch to v10 where the problem was introduced. Discussion: https://postgr.es/m/15781-2601b1002bad087c@postgresql.org
2019-04-27 19:15:54 +02:00
InitPlan 1 (returns $0)
-> Seq Scan on pg_catalog.pg_enum
-> Foreign Scan on public.ft1
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
Output: ft1.c1
Avoid postgres_fdw crash for a targetlist entry that's just a Param. foreign_grouping_ok() is willing to put fairly arbitrary expressions into the targetlist of a remote SELECT that's doing grouping or aggregation on the remote side, including expressions that have no foreign component to them at all. This is possibly a bit dubious from an efficiency standpoint; but it rises to the level of a crash-causing bug if the expression is just a Param or non-foreign Var. In that case, the expression will necessarily also appear in the fdw_exprs list of values we need to send to the remote server, and then setrefs.c's set_foreignscan_references will mistakenly replace the fdw_exprs entry with a Var referencing the targetlist result. The root cause of this problem is bad design in commit e7cb7ee14: it put logic into set_foreignscan_references that IMV is postgres_fdw-specific, and yet this bug shows that it isn't postgres_fdw-specific enough. The transformation being done on fdw_exprs assumes that fdw_exprs is to be evaluated with the fdw_scan_tlist as input, which is not how postgres_fdw uses it; yet it could be the right thing for some other FDW. (In the bigger picture, setrefs.c has no business assuming this for the other expression fields of a ForeignScan either.) The right fix therefore would be to expand the FDW API so that the FDW could inform setrefs.c how it intends to evaluate these various expressions. We can't change that in the back branches though, and we also can't just summarily change setrefs.c's behavior there, or we're likely to break external FDWs. As a stopgap, therefore, hack up postgres_fdw so that it won't attempt to send targetlist entries that look exactly like the fdw_exprs entries they'd produce. In most cases this actually produces a superior plan, IMO, with less data needing to be transmitted and returned; so we probably ought to think harder about whether we should ship tlist expressions at all when they don't contain any foreign Vars or Aggs. But that's an optimization not a bug fix so I left it for later. One case where this produces an inferior plan is where the expression in question is actually a GROUP BY expression: then the restriction prevents us from using remote grouping. It might be possible to work around that (since that would reduce to group-by-a-constant on the remote side); but it seems like a pretty unlikely corner case, so I'm not sure it's worth expending effort solely to improve that. In any case the right long-term answer is to fix the API as sketched above, and then revert this hack. Per bug #15781 from Sean Johnston. Back-patch to v10 where the problem was introduced. Discussion: https://postgr.es/m/15781-2601b1002bad087c@postgresql.org
2019-04-27 19:15:54 +02:00
Remote SQL: SELECT "C 1" FROM "S 1"."T 1"
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
(7 rows)
Avoid postgres_fdw crash for a targetlist entry that's just a Param. foreign_grouping_ok() is willing to put fairly arbitrary expressions into the targetlist of a remote SELECT that's doing grouping or aggregation on the remote side, including expressions that have no foreign component to them at all. This is possibly a bit dubious from an efficiency standpoint; but it rises to the level of a crash-causing bug if the expression is just a Param or non-foreign Var. In that case, the expression will necessarily also appear in the fdw_exprs list of values we need to send to the remote server, and then setrefs.c's set_foreignscan_references will mistakenly replace the fdw_exprs entry with a Var referencing the targetlist result. The root cause of this problem is bad design in commit e7cb7ee14: it put logic into set_foreignscan_references that IMV is postgres_fdw-specific, and yet this bug shows that it isn't postgres_fdw-specific enough. The transformation being done on fdw_exprs assumes that fdw_exprs is to be evaluated with the fdw_scan_tlist as input, which is not how postgres_fdw uses it; yet it could be the right thing for some other FDW. (In the bigger picture, setrefs.c has no business assuming this for the other expression fields of a ForeignScan either.) The right fix therefore would be to expand the FDW API so that the FDW could inform setrefs.c how it intends to evaluate these various expressions. We can't change that in the back branches though, and we also can't just summarily change setrefs.c's behavior there, or we're likely to break external FDWs. As a stopgap, therefore, hack up postgres_fdw so that it won't attempt to send targetlist entries that look exactly like the fdw_exprs entries they'd produce. In most cases this actually produces a superior plan, IMO, with less data needing to be transmitted and returned; so we probably ought to think harder about whether we should ship tlist expressions at all when they don't contain any foreign Vars or Aggs. But that's an optimization not a bug fix so I left it for later. One case where this produces an inferior plan is where the expression in question is actually a GROUP BY expression: then the restriction prevents us from using remote grouping. It might be possible to work around that (since that would reduce to group-by-a-constant on the remote side); but it seems like a pretty unlikely corner case, so I'm not sure it's worth expending effort solely to improve that. In any case the right long-term answer is to fix the API as sketched above, and then revert this hack. Per bug #15781 from Sean Johnston. Back-patch to v10 where the problem was introduced. Discussion: https://postgr.es/m/15781-2601b1002bad087c@postgresql.org
2019-04-27 19:15:54 +02:00
select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1;
exists | sum
--------+--------
t | 500500
(1 row)
-- Testing ORDER BY, DISTINCT, FILTER, Ordered-sets and VARIADIC within aggregates
-- ORDER BY within aggregate, same column used to order
explain (verbose, costs off)
select array_agg(c1 order by c1) from ft1 where c1 < 100 group by c2 order by 1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (array_agg(c1 ORDER BY c1)), c2
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT array_agg("C 1" ORDER BY "C 1" ASC NULLS LAST), c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) GROUP BY 2 ORDER BY array_agg("C 1" ORDER BY "C 1" ASC NULLS LAST) ASC NULLS LAST
(4 rows)
select array_agg(c1 order by c1) from ft1 where c1 < 100 group by c2 order by 1;
array_agg
--------------------------------
{1,11,21,31,41,51,61,71,81,91}
{2,12,22,32,42,52,62,72,82,92}
{3,13,23,33,43,53,63,73,83,93}
{4,14,24,34,44,54,64,74,84,94}
{5,15,25,35,45,55,65,75,85,95}
{6,16,26,36,46,56,66,76,86,96}
{7,17,27,37,47,57,67,77,87,97}
{8,18,28,38,48,58,68,78,88,98}
{9,19,29,39,49,59,69,79,89,99}
{10,20,30,40,50,60,70,80,90}
(10 rows)
-- ORDER BY within aggregate, different column used to order also using DESC
explain (verbose, costs off)
select array_agg(c5 order by c1 desc) from ft2 where c2 = 6 and c1 < 50;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (array_agg(c5 ORDER BY c1 DESC))
Relations: Aggregate on (public.ft2)
Remote SQL: SELECT array_agg(c5 ORDER BY "C 1" DESC NULLS FIRST) FROM "S 1"."T 1" WHERE (("C 1" < 50)) AND ((c2 = 6))
(4 rows)
select array_agg(c5 order by c1 desc) from ft2 where c2 = 6 and c1 < 50;
array_agg
------------------------------------------------------------------------------------------------------------------------------------------
{"Mon Feb 16 00:00:00 1970","Fri Feb 06 00:00:00 1970","Tue Jan 27 00:00:00 1970","Sat Jan 17 00:00:00 1970","Wed Jan 07 00:00:00 1970"}
(1 row)
-- DISTINCT within aggregate
explain (verbose, costs off)
select array_agg(distinct (t1.c1)%5) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) where t1.c1 < 20 or (t1.c1 is null and t2.c1 < 5) group by (t2.c1)%3 order by 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (array_agg(DISTINCT (t1.c1 % 5))), ((t2.c1 % 3))
Relations: Aggregate on ((public.ft4 t1) FULL JOIN (public.ft5 t2))
Remote SQL: SELECT array_agg(DISTINCT (r1.c1 % 5)), (r2.c1 % 3) FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) WHERE (((r1.c1 < 20) OR ((r1.c1 IS NULL) AND (r2.c1 < 5)))) GROUP BY 2 ORDER BY array_agg(DISTINCT (r1.c1 % 5)) ASC NULLS LAST
(4 rows)
select array_agg(distinct (t1.c1)%5) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) where t1.c1 < 20 or (t1.c1 is null and t2.c1 < 5) group by (t2.c1)%3 order by 1;
array_agg
--------------
{0,1,2,3,4}
{1,2,3,NULL}
(2 rows)
-- DISTINCT combined with ORDER BY within aggregate
explain (verbose, costs off)
select array_agg(distinct (t1.c1)%5 order by (t1.c1)%5) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) where t1.c1 < 20 or (t1.c1 is null and t2.c1 < 5) group by (t2.c1)%3 order by 1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (array_agg(DISTINCT (t1.c1 % 5) ORDER BY (t1.c1 % 5))), ((t2.c1 % 3))
Relations: Aggregate on ((public.ft4 t1) FULL JOIN (public.ft5 t2))
Remote SQL: SELECT array_agg(DISTINCT (r1.c1 % 5) ORDER BY ((r1.c1 % 5)) ASC NULLS LAST), (r2.c1 % 3) FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) WHERE (((r1.c1 < 20) OR ((r1.c1 IS NULL) AND (r2.c1 < 5)))) GROUP BY 2 ORDER BY array_agg(DISTINCT (r1.c1 % 5) ORDER BY ((r1.c1 % 5)) ASC NULLS LAST) ASC NULLS LAST
(4 rows)
select array_agg(distinct (t1.c1)%5 order by (t1.c1)%5) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) where t1.c1 < 20 or (t1.c1 is null and t2.c1 < 5) group by (t2.c1)%3 order by 1;
array_agg
--------------
{0,1,2,3,4}
{1,2,3,NULL}
(2 rows)
explain (verbose, costs off)
select array_agg(distinct (t1.c1)%5 order by (t1.c1)%5 desc nulls last) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) where t1.c1 < 20 or (t1.c1 is null and t2.c1 < 5) group by (t2.c1)%3 order by 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (array_agg(DISTINCT (t1.c1 % 5) ORDER BY (t1.c1 % 5) DESC NULLS LAST)), ((t2.c1 % 3))
Relations: Aggregate on ((public.ft4 t1) FULL JOIN (public.ft5 t2))
Remote SQL: SELECT array_agg(DISTINCT (r1.c1 % 5) ORDER BY ((r1.c1 % 5)) DESC NULLS LAST), (r2.c1 % 3) FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) WHERE (((r1.c1 < 20) OR ((r1.c1 IS NULL) AND (r2.c1 < 5)))) GROUP BY 2 ORDER BY array_agg(DISTINCT (r1.c1 % 5) ORDER BY ((r1.c1 % 5)) DESC NULLS LAST) ASC NULLS LAST
(4 rows)
select array_agg(distinct (t1.c1)%5 order by (t1.c1)%5 desc nulls last) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) where t1.c1 < 20 or (t1.c1 is null and t2.c1 < 5) group by (t2.c1)%3 order by 1;
array_agg
--------------
{3,2,1,NULL}
{4,3,2,1,0}
(2 rows)
-- FILTER within aggregate
explain (verbose, costs off)
select sum(c1) filter (where c1 < 100 and c2 > 5) from ft1 group by c2 order by 1 nulls last;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (sum(c1) FILTER (WHERE ((c1 < 100) AND (c2 > 5)))), c2
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT sum("C 1") FILTER (WHERE (("C 1" < 100) AND (c2 > 5))), c2 FROM "S 1"."T 1" GROUP BY 2 ORDER BY sum("C 1") FILTER (WHERE (("C 1" < 100) AND (c2 > 5))) ASC NULLS LAST
(4 rows)
select sum(c1) filter (where c1 < 100 and c2 > 5) from ft1 group by c2 order by 1 nulls last;
sum
-----
510
520
530
540
(10 rows)
-- DISTINCT, ORDER BY and FILTER within aggregate
explain (verbose, costs off)
select sum(c1%3), sum(distinct c1%3 order by c1%3) filter (where c1%3 < 2), c2 from ft1 where c2 = 6 group by c2;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (sum((c1 % 3))), (sum(DISTINCT (c1 % 3) ORDER BY (c1 % 3)) FILTER (WHERE ((c1 % 3) < 2))), c2
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT sum(("C 1" % 3)), sum(DISTINCT ("C 1" % 3) ORDER BY (("C 1" % 3)) ASC NULLS LAST) FILTER (WHERE (("C 1" % 3) < 2)), c2 FROM "S 1"."T 1" WHERE ((c2 = 6)) GROUP BY 3
(4 rows)
select sum(c1%3), sum(distinct c1%3 order by c1%3) filter (where c1%3 < 2), c2 from ft1 where c2 = 6 group by c2;
sum | sum | c2
-----+-----+----
99 | 1 | 6
(1 row)
-- Outer query is aggregation query
explain (verbose, costs off)
select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
Unique
Output: ((SubPlan 1))
-> Sort
Output: ((SubPlan 1))
Sort Key: ((SubPlan 1))
-> Foreign Scan
Output: (SubPlan 1)
Relations: Aggregate on (public.ft2 t2)
Remote SQL: SELECT count(*) FILTER (WHERE ((c2 = 6) AND ("C 1" < 10))) FROM "S 1"."T 1" WHERE (((c2 % 6) = 0))
SubPlan 1
-> Foreign Scan on public.ft1 t1
Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10))))
Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 6))
(13 rows)
select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;
count
-------
1
(1 row)
-- Inner query is aggregation query
explain (verbose, costs off)
select distinct (select count(t1.c1) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------
Unique
Output: ((SubPlan 1))
-> Sort
Output: ((SubPlan 1))
Sort Key: ((SubPlan 1))
-> Foreign Scan on public.ft2 t2
Output: (SubPlan 1)
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (((c2 % 6) = 0))
SubPlan 1
-> Foreign Scan
Output: (count(t1.c1) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10))))
Relations: Aggregate on (public.ft1 t1)
Remote SQL: SELECT count("C 1") FILTER (WHERE (($1::integer = 6) AND ($2::integer < 10))) FROM "S 1"."T 1" WHERE (("C 1" = 6))
(13 rows)
select distinct (select count(t1.c1) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;
count
-------
0
1
(2 rows)
-- Aggregate not pushed down as FILTER condition is not pushable
explain (verbose, costs off)
select sum(c1) filter (where (c1 / c1) * random() <= 1) from ft1 group by c2 order by 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Sort
Output: (sum(c1) FILTER (WHERE ((((c1 / c1))::double precision * random()) <= '1'::double precision))), c2
Sort Key: (sum(ft1.c1) FILTER (WHERE ((((ft1.c1 / ft1.c1))::double precision * random()) <= '1'::double precision)))
-> HashAggregate
Output: sum(c1) FILTER (WHERE ((((c1 / c1))::double precision * random()) <= '1'::double precision)), c2
Group Key: ft1.c2
-> Foreign Scan on public.ft1
Output: c1, c2
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1"
(9 rows)
explain (verbose, costs off)
select sum(c2) filter (where c2 in (select c2 from ft1 where c2 < 5)) from ft1;
QUERY PLAN
-------------------------------------------------------------------
Aggregate
Output: sum(ft1.c2) FILTER (WHERE (hashed SubPlan 1))
-> Foreign Scan on public.ft1
Output: ft1.c2
Remote SQL: SELECT c2 FROM "S 1"."T 1"
SubPlan 1
-> Foreign Scan on public.ft1 ft1_1
Output: ft1_1.c2
Remote SQL: SELECT c2 FROM "S 1"."T 1" WHERE ((c2 < 5))
(9 rows)
-- Ordered-sets within aggregate
explain (verbose, costs off)
select c2, rank('10'::varchar) within group (order by c6), percentile_cont(c2/10::numeric) within group (order by c1) from ft1 where c2 < 10 group by c2 having percentile_cont(c2/10::numeric) within group (order by c1) < 500 order by c2;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Output: c2, (rank('10'::character varying) WITHIN GROUP (ORDER BY c6)), (percentile_cont((((c2)::numeric / '10'::numeric))::double precision) WITHIN GROUP (ORDER BY ((c1)::double precision)))
Sort Key: ft1.c2
-> Foreign Scan
Output: c2, (rank('10'::character varying) WITHIN GROUP (ORDER BY c6)), (percentile_cont((((c2)::numeric / '10'::numeric))::double precision) WITHIN GROUP (ORDER BY ((c1)::double precision)))
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT c2, rank('10'::character varying) WITHIN GROUP (ORDER BY c6 ASC NULLS LAST), percentile_cont((c2 / 10::numeric)) WITHIN GROUP (ORDER BY ("C 1") ASC NULLS LAST) FROM "S 1"."T 1" WHERE ((c2 < 10)) GROUP BY 1 HAVING ((percentile_cont((c2 / 10::numeric)) WITHIN GROUP (ORDER BY ("C 1") ASC NULLS LAST) < 500::double precision))
(7 rows)
select c2, rank('10'::varchar) within group (order by c6), percentile_cont(c2/10::numeric) within group (order by c1) from ft1 where c2 < 10 group by c2 having percentile_cont(c2/10::numeric) within group (order by c1) < 500 order by c2;
c2 | rank | percentile_cont
----+------+-----------------
0 | 101 | 10
1 | 101 | 100
2 | 1 | 200
3 | 1 | 300
4 | 1 | 400
(5 rows)
-- Using multiple arguments within aggregates
explain (verbose, costs off)
select c1, rank(c1, c2) within group (order by c1, c2) from ft1 group by c1, c2 having c1 = 6 order by 1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: c1, (rank(c1, c2) WITHIN GROUP (ORDER BY c1, c2)), c2
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT "C 1", rank("C 1", c2) WITHIN GROUP (ORDER BY "C 1" ASC NULLS LAST, c2 ASC NULLS LAST), c2 FROM "S 1"."T 1" WHERE (("C 1" = 6)) GROUP BY 1, 3
(4 rows)
select c1, rank(c1, c2) within group (order by c1, c2) from ft1 group by c1, c2 having c1 = 6 order by 1;
c1 | rank
----+------
6 | 1
(1 row)
-- User defined function for user defined aggregate, VARIADIC
create function least_accum(anyelement, variadic anyarray)
returns anyelement language sql as
'select least($1, min($2[i])) from generate_subscripts($2,1) g(i)';
create aggregate least_agg(variadic items anyarray) (
stype = anyelement, sfunc = least_accum
);
-- Disable hash aggregation for plan stability.
set enable_hashagg to false;
-- Not pushed down due to user defined aggregate
explain (verbose, costs off)
select c2, least_agg(c1) from ft1 group by c2 order by c2;
QUERY PLAN
----------------------------------------------------------------------------------
GroupAggregate
Output: c2, least_agg(VARIADIC ARRAY[c1])
Group Key: ft1.c2
-> Foreign Scan on public.ft1
Output: c2, c1
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" ORDER BY c2 ASC NULLS LAST
(6 rows)
-- Add function and aggregate into extension
alter extension postgres_fdw add function least_accum(anyelement, variadic anyarray);
alter extension postgres_fdw add aggregate least_agg(variadic items anyarray);
alter server loopback options (set extensions 'postgres_fdw');
-- Now aggregate will be pushed. Aggregate will display VARIADIC argument.
explain (verbose, costs off)
select c2, least_agg(c1) from ft1 where c2 < 100 group by c2 order by c2;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
Sort
Output: c2, (least_agg(VARIADIC ARRAY[c1]))
Sort Key: ft1.c2
-> Foreign Scan
Output: c2, (least_agg(VARIADIC ARRAY[c1]))
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT c2, public.least_agg(VARIADIC ARRAY["C 1"]) FROM "S 1"."T 1" WHERE ((c2 < 100)) GROUP BY 1
(7 rows)
select c2, least_agg(c1) from ft1 where c2 < 100 group by c2 order by c2;
c2 | least_agg
----+-----------
0 | 10
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
(10 rows)
-- Remove function and aggregate from extension
alter extension postgres_fdw drop function least_accum(anyelement, variadic anyarray);
alter extension postgres_fdw drop aggregate least_agg(variadic items anyarray);
alter server loopback options (set extensions 'postgres_fdw');
-- Not pushed down as we have dropped objects from extension.
explain (verbose, costs off)
select c2, least_agg(c1) from ft1 group by c2 order by c2;
QUERY PLAN
----------------------------------------------------------------------------------
GroupAggregate
Output: c2, least_agg(VARIADIC ARRAY[c1])
Group Key: ft1.c2
-> Foreign Scan on public.ft1
Output: c2, c1
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" ORDER BY c2 ASC NULLS LAST
(6 rows)
-- Cleanup
reset enable_hashagg;
drop aggregate least_agg(variadic items anyarray);
drop function least_accum(anyelement, variadic anyarray);
-- Testing USING OPERATOR() in ORDER BY within aggregate.
-- For this, we need user defined operators along with operator family and
-- operator class. Create those and then add them in extension. Note that
-- user defined objects are considered unshippable unless they are part of
-- the extension.
create operator public.<^ (
leftarg = int4,
rightarg = int4,
procedure = int4eq
);
create operator public.=^ (
leftarg = int4,
rightarg = int4,
procedure = int4lt
);
create operator public.>^ (
leftarg = int4,
rightarg = int4,
procedure = int4gt
);
create operator family my_op_family using btree;
create function my_op_cmp(a int, b int) returns int as
$$begin return btint4cmp(a, b); end $$ language plpgsql;
create operator class my_op_class for type int using btree family my_op_family as
operator 1 public.<^,
operator 3 public.=^,
operator 5 public.>^,
function 1 my_op_cmp(int, int);
-- This will not be pushed as user defined sort operator is not part of the
-- extension yet.
explain (verbose, costs off)
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
Improve performance of ORDER BY / DISTINCT aggregates ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
QUERY PLAN
--------------------------------------------------------------------------------------------------
GroupAggregate
Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
Improve performance of ORDER BY / DISTINCT aggregates ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
-> Sort
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
Output: c1, c2
Improve performance of ORDER BY / DISTINCT aggregates ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
Sort Key: ft2.c1 USING <^
-> Foreign Scan on public.ft2
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
Output: c1, c2
Improve performance of ORDER BY / DISTINCT aggregates ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
(8 rows)
-- This should not be pushed either.
explain (verbose, costs off)
select * from ft2 order by c1 using operator(public.<^);
QUERY PLAN
-------------------------------------------------------------------------------
Sort
Output: c1, c2, c3, c4, c5, c6, c7, c8
Sort Key: ft2.c1 USING <^
-> Foreign Scan on public.ft2
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(6 rows)
-- Update local stats on ft2
ANALYZE ft2;
-- Add into extension
alter extension postgres_fdw add operator class my_op_class using btree;
alter extension postgres_fdw add function my_op_cmp(a int, b int);
alter extension postgres_fdw add operator family my_op_family using btree;
alter extension postgres_fdw add operator public.<^(int, int);
alter extension postgres_fdw add operator public.=^(int, int);
alter extension postgres_fdw add operator public.>^(int, int);
alter server loopback options (set extensions 'postgres_fdw');
-- Now this will be pushed as sort operator is part of the extension.
Improve performance of ORDER BY / DISTINCT aggregates ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
alter server loopback options (add fdw_tuple_cost '0.5');
explain (verbose, costs off)
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (array_agg(c1 ORDER BY c1 USING <^ NULLS LAST)), c2
Relations: Aggregate on (public.ft2)
Remote SQL: SELECT array_agg("C 1" ORDER BY "C 1" USING OPERATOR(public.<^) NULLS LAST), c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) GROUP BY 2
(4 rows)
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
array_agg
--------------------------------
{6,16,26,36,46,56,66,76,86,96}
(1 row)
Improve performance of ORDER BY / DISTINCT aggregates ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
alter server loopback options (drop fdw_tuple_cost);
-- This should be pushed too.
explain (verbose, costs off)
select * from ft2 order by c1 using operator(public.<^);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft2
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" USING OPERATOR(public.<^) NULLS LAST
(3 rows)
-- Remove from extension
alter extension postgres_fdw drop operator class my_op_class using btree;
alter extension postgres_fdw drop function my_op_cmp(a int, b int);
alter extension postgres_fdw drop operator family my_op_family using btree;
alter extension postgres_fdw drop operator public.<^(int, int);
alter extension postgres_fdw drop operator public.=^(int, int);
alter extension postgres_fdw drop operator public.>^(int, int);
alter server loopback options (set extensions 'postgres_fdw');
-- This will not be pushed as sort operator is now removed from the extension.
explain (verbose, costs off)
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
Improve performance of ORDER BY / DISTINCT aggregates ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
QUERY PLAN
--------------------------------------------------------------------------------------------------
GroupAggregate
Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
Improve performance of ORDER BY / DISTINCT aggregates ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
-> Sort
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
Output: c1, c2
Improve performance of ORDER BY / DISTINCT aggregates ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
Sort Key: ft2.c1 USING <^
-> Foreign Scan on public.ft2
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
Output: c1, c2
Improve performance of ORDER BY / DISTINCT aggregates ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
Remove redundant grouping and DISTINCT columns. Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
(8 rows)
-- Cleanup
drop operator class my_op_class using btree;
drop function my_op_cmp(a int, b int);
drop operator family my_op_family using btree;
drop operator public.>^(int, int);
drop operator public.=^(int, int);
drop operator public.<^(int, int);
-- Input relation to aggregate push down hook is not safe to pushdown and thus
-- the aggregate cannot be pushed down to foreign server.
explain (verbose, costs off)
select count(t1.c3) from ft2 t1 left join ft2 t2 on (t1.c1 = random() * t2.c2);
QUERY PLAN
-------------------------------------------------------------------------------------------
Aggregate
Output: count(t1.c3)
-> Nested Loop Left Join
Output: t1.c3
Join Filter: ((t1.c1)::double precision = (random() * (t2.c2)::double precision))
-> Foreign Scan on public.ft2 t1
Output: t1.c3, t1.c1
Remote SQL: SELECT "C 1", c3 FROM "S 1"."T 1"
-> Materialize
Output: t2.c2
-> Foreign Scan on public.ft2 t2
Output: t2.c2
Remote SQL: SELECT c2 FROM "S 1"."T 1"
(13 rows)
-- Subquery in FROM clause having aggregate
explain (verbose, costs off)
select count(*), x.b from ft1, (select c2 a, sum(c1) b from ft1 group by c2) x where ft1.c2 = x.a group by x.b order by 1, 2;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Sort
Output: (count(*)), x.b
Sort Key: (count(*)), x.b
-> HashAggregate
Output: count(*), x.b
Group Key: x.b
-> Hash Join
Output: x.b
Inner Unique: true
Hash Cond: (ft1.c2 = x.a)
-> Foreign Scan on public.ft1
Output: ft1.c2
Remote SQL: SELECT c2 FROM "S 1"."T 1"
-> Hash
Output: x.b, x.a
-> Subquery Scan on x
Output: x.b, x.a
-> Foreign Scan
Output: ft1_1.c2, (sum(ft1_1.c1))
Make postgres_fdw's "Relations" output agree with the rest of EXPLAIN. The relation aliases shown in the "Relations" line for a foreign scan didn't always agree with those used in the rest of EXPLAIN's output. The regression test result changes appearing here provide examples. It's really impossible for postgres_fdw to duplicate EXPLAIN's alias assignment logic during postgresGetForeignRelSize(), because of the de-duplication that EXPLAIN does on a global basis --- and anyway, trying to duplicate that would be unmaintainable. Instead, just put numeric rangetable indexes into the string, and convert those to table names/aliases in postgresExplainForeignScan, which does have access to the results of ruleutils.c's alias assignment logic. Aside from being more reliable, this shifts some work from planning to EXPLAIN, which is a good tradeoff for performance. (I also changed from using StringInfo to using psprintf, which makes the code slightly simpler and reduces its memory consumption.) A kluge required by this solution is that we have to reverse-engineer the rtoffset applied by setrefs.c. If that logic ever fails (presumably because the member tables of a join got offset by different amounts), we'll need some more cooperation with setrefs.c to keep things straight. But for now, there's no need for that. Arguably this is a back-patchable bug fix, but since this is a mostly cosmetic issue and there have been no field complaints, I'll refrain for now. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-02 22:31:03 +01:00
Relations: Aggregate on (public.ft1 ft1_1)
Remote SQL: SELECT c2, sum("C 1") FROM "S 1"."T 1" GROUP BY 1
(21 rows)
select count(*), x.b from ft1, (select c2 a, sum(c1) b from ft1 group by c2) x where ft1.c2 = x.a group by x.b order by 1, 2;
count | b
-------+-------
100 | 49600
100 | 49700
100 | 49800
100 | 49900
100 | 50000
100 | 50100
100 | 50200
100 | 50300
100 | 50400
100 | 50500
(10 rows)
-- FULL join with IS NULL check in HAVING
explain (verbose, costs off)
select avg(t1.c1), sum(t2.c1) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) group by t2.c1 having (avg(t1.c1) is null and sum(t2.c1) < 10) or sum(t2.c1) is null order by 1 nulls last, 2;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (avg(t1.c1)), (sum(t2.c1)), t2.c1
Relations: Aggregate on ((public.ft4 t1) FULL JOIN (public.ft5 t2))
Remote SQL: SELECT avg(r1.c1), sum(r2.c1), r2.c1 FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) GROUP BY 3 HAVING ((((avg(r1.c1) IS NULL) AND (sum(r2.c1) < 10)) OR (sum(r2.c1) IS NULL))) ORDER BY avg(r1.c1) ASC NULLS LAST, sum(r2.c1) ASC NULLS LAST
(4 rows)
select avg(t1.c1), sum(t2.c1) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) group by t2.c1 having (avg(t1.c1) is null and sum(t2.c1) < 10) or sum(t2.c1) is null order by 1 nulls last, 2;
avg | sum
---------------------+-----
51.0000000000000000 |
| 3
| 9
(3 rows)
-- Aggregate over FULL join needing to deparse the joining relations as
-- subqueries.
explain (verbose, costs off)
select count(*), sum(t1.c1), avg(t2.c1) from (select c1 from ft4 where c1 between 50 and 60) t1 full join (select c1 from ft5 where c1 between 50 and 60) t2 on (t1.c1 = t2.c1);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: (count(*)), (sum(ft4.c1)), (avg(ft5.c1))
Relations: Aggregate on ((public.ft4) FULL JOIN (public.ft5))
Remote SQL: SELECT count(*), sum(s4.c1), avg(s5.c1) FROM ((SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s4(c1) FULL JOIN (SELECT c1 FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s5(c1) ON (((s4.c1 = s5.c1))))
(4 rows)
select count(*), sum(t1.c1), avg(t2.c1) from (select c1 from ft4 where c1 between 50 and 60) t1 full join (select c1 from ft5 where c1 between 50 and 60) t2 on (t1.c1 = t2.c1);
count | sum | avg
-------+-----+---------------------
8 | 330 | 55.5000000000000000
(1 row)
-- ORDER BY expression is part of the target list but not pushed down to
-- foreign server.
explain (verbose, costs off)
select sum(c2) * (random() <= 1)::int as sum from ft1 order by 1;
QUERY PLAN
--------------------------------------------------------------------------------
Sort
Output: (((sum(c2)) * ((random() <= '1'::double precision))::integer))
Sort Key: (((sum(ft1.c2)) * ((random() <= '1'::double precision))::integer))
-> Foreign Scan
Output: ((sum(c2)) * ((random() <= '1'::double precision))::integer)
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT sum(c2) FROM "S 1"."T 1"
(7 rows)
select sum(c2) * (random() <= 1)::int as sum from ft1 order by 1;
sum
------
4500
(1 row)
-- LATERAL join, with parameterization
set enable_hashagg to false;
explain (verbose, costs off)
select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum from ft2 t2 group by t2.c1) qry where t1.c2 * 2 = qry.sum and t1.c2 < 3 and t1."C 1" < 100 order by 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Sort
Output: t1.c2, qry.sum
Sort Key: t1.c2
-> Nested Loop
Output: t1.c2, qry.sum
-> Index Scan using t1_pkey on "S 1"."T 1" t1
Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
Index Cond: (t1."C 1" < 100)
Filter: (t1.c2 < 3)
-> Subquery Scan on qry
Output: qry.sum, t2.c1
Filter: ((t1.c2 * 2) = qry.sum)
-> Foreign Scan
Output: (sum((t2.c1 + t1."C 1"))), t2.c1
Relations: Aggregate on (public.ft2 t2)
Remote SQL: SELECT sum(("C 1" + $1::integer)), "C 1" FROM "S 1"."T 1" GROUP BY 2
(16 rows)
select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum from ft2 t2 group by t2.c1) qry where t1.c2 * 2 = qry.sum and t1.c2 < 3 and t1."C 1" < 100 order by 1;
c2 | sum
----+-----
1 | 2
2 | 4
(2 rows)
reset enable_hashagg;
Split create_foreignscan_path() into three functions. Up to now postgres_fdw has been using create_foreignscan_path() to generate not only base-relation paths, but also paths for foreign joins and foreign upperrels. This is wrong, because create_foreignscan_path() calls get_baserel_parampathinfo() which will only do the right thing for baserels. It accidentally fails to fail for unparameterized paths, which are the only ones postgres_fdw (thought it) was handling, but we really need different APIs for the baserel and join cases. In HEAD, the best thing to do seems to be to split up the baserel, joinrel, and upperrel cases into three functions so that they can have different APIs. I haven't actually given create_foreign_join_path a different API in this commit: we should spend a bit of time thinking about just what we want to do there, since perhaps FDWs would want to do something different from the build-up-a-join-pairwise approach that get_joinrel_parampathinfo expects. In the meantime, since postgres_fdw isn't prepared to generate parameterized joins anyway, just give it a defense against trying to plan joins with lateral refs. In addition (and this is what triggered this whole mess) fix bug #15613 from Srinivasan S A, by teaching file_fdw and postgres_fdw that plain baserel foreign paths still have outer refs if the relation has lateral_relids. Add some assertions in relnode.c to catch future occurrences of the same error --- in particular, to catch other FDWs doing that, but also as backstop against core-code mistakes like the one fixed by commit bdd9a99aa. Bug #15613 also needs to be fixed in the back branches, but the appropriate fix will look quite a bit different there, since we don't want to assume that existing FDWs get the word right away. Discussion: https://postgr.es/m/15613-092be1be9576c728@postgresql.org
2019-02-07 18:59:47 +01:00
-- bug #15613: bad plan for foreign table scan with lateral reference
EXPLAIN (VERBOSE, COSTS OFF)
SELECT ref_0.c2, subq_1.*
FROM
"S 1"."T 1" AS ref_0,
LATERAL (
SELECT ref_0."C 1" c1, subq_0.*
FROM (SELECT ref_0.c2, ref_1.c3
FROM ft1 AS ref_1) AS subq_0
RIGHT JOIN ft2 AS ref_3 ON (subq_0.c3 = ref_3.c3)
) AS subq_1
WHERE ref_0."C 1" < 10 AND subq_1.c3 = '00001'
ORDER BY ref_0."C 1";
QUERY PLAN
---------------------------------------------------------------------------------------------------------
Nested Loop
Output: ref_0.c2, ref_0."C 1", (ref_0.c2), ref_1.c3, ref_0."C 1"
-> Nested Loop
Output: ref_0.c2, ref_0."C 1", ref_1.c3, (ref_0.c2)
-> Index Scan using t1_pkey on "S 1"."T 1" ref_0
Output: ref_0."C 1", ref_0.c2, ref_0.c3, ref_0.c4, ref_0.c5, ref_0.c6, ref_0.c7, ref_0.c8
Index Cond: (ref_0."C 1" < 10)
-> Foreign Scan on public.ft1 ref_1
Output: ref_1.c3, ref_0.c2
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: SELECT c3 FROM "S 1"."T 1" WHERE ((c3 = '00001'))
Split create_foreignscan_path() into three functions. Up to now postgres_fdw has been using create_foreignscan_path() to generate not only base-relation paths, but also paths for foreign joins and foreign upperrels. This is wrong, because create_foreignscan_path() calls get_baserel_parampathinfo() which will only do the right thing for baserels. It accidentally fails to fail for unparameterized paths, which are the only ones postgres_fdw (thought it) was handling, but we really need different APIs for the baserel and join cases. In HEAD, the best thing to do seems to be to split up the baserel, joinrel, and upperrel cases into three functions so that they can have different APIs. I haven't actually given create_foreign_join_path a different API in this commit: we should spend a bit of time thinking about just what we want to do there, since perhaps FDWs would want to do something different from the build-up-a-join-pairwise approach that get_joinrel_parampathinfo expects. In the meantime, since postgres_fdw isn't prepared to generate parameterized joins anyway, just give it a defense against trying to plan joins with lateral refs. In addition (and this is what triggered this whole mess) fix bug #15613 from Srinivasan S A, by teaching file_fdw and postgres_fdw that plain baserel foreign paths still have outer refs if the relation has lateral_relids. Add some assertions in relnode.c to catch future occurrences of the same error --- in particular, to catch other FDWs doing that, but also as backstop against core-code mistakes like the one fixed by commit bdd9a99aa. Bug #15613 also needs to be fixed in the back branches, but the appropriate fix will look quite a bit different there, since we don't want to assume that existing FDWs get the word right away. Discussion: https://postgr.es/m/15613-092be1be9576c728@postgresql.org
2019-02-07 18:59:47 +01:00
-> Materialize
Output: ref_3.c3
-> Foreign Scan on public.ft2 ref_3
Output: ref_3.c3
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: SELECT c3 FROM "S 1"."T 1" WHERE ((c3 = '00001'))
Split create_foreignscan_path() into three functions. Up to now postgres_fdw has been using create_foreignscan_path() to generate not only base-relation paths, but also paths for foreign joins and foreign upperrels. This is wrong, because create_foreignscan_path() calls get_baserel_parampathinfo() which will only do the right thing for baserels. It accidentally fails to fail for unparameterized paths, which are the only ones postgres_fdw (thought it) was handling, but we really need different APIs for the baserel and join cases. In HEAD, the best thing to do seems to be to split up the baserel, joinrel, and upperrel cases into three functions so that they can have different APIs. I haven't actually given create_foreign_join_path a different API in this commit: we should spend a bit of time thinking about just what we want to do there, since perhaps FDWs would want to do something different from the build-up-a-join-pairwise approach that get_joinrel_parampathinfo expects. In the meantime, since postgres_fdw isn't prepared to generate parameterized joins anyway, just give it a defense against trying to plan joins with lateral refs. In addition (and this is what triggered this whole mess) fix bug #15613 from Srinivasan S A, by teaching file_fdw and postgres_fdw that plain baserel foreign paths still have outer refs if the relation has lateral_relids. Add some assertions in relnode.c to catch future occurrences of the same error --- in particular, to catch other FDWs doing that, but also as backstop against core-code mistakes like the one fixed by commit bdd9a99aa. Bug #15613 also needs to be fixed in the back branches, but the appropriate fix will look quite a bit different there, since we don't want to assume that existing FDWs get the word right away. Discussion: https://postgr.es/m/15613-092be1be9576c728@postgresql.org
2019-02-07 18:59:47 +01:00
(15 rows)
SELECT ref_0.c2, subq_1.*
FROM
"S 1"."T 1" AS ref_0,
LATERAL (
SELECT ref_0."C 1" c1, subq_0.*
FROM (SELECT ref_0.c2, ref_1.c3
FROM ft1 AS ref_1) AS subq_0
RIGHT JOIN ft2 AS ref_3 ON (subq_0.c3 = ref_3.c3)
) AS subq_1
WHERE ref_0."C 1" < 10 AND subq_1.c3 = '00001'
ORDER BY ref_0."C 1";
c2 | c1 | c2 | c3
----+----+----+-------
1 | 1 | 1 | 00001
2 | 2 | 2 | 00001
3 | 3 | 3 | 00001
4 | 4 | 4 | 00001
5 | 5 | 5 | 00001
6 | 6 | 6 | 00001
7 | 7 | 7 | 00001
8 | 8 | 8 | 00001
9 | 9 | 9 | 00001
(9 rows)
-- Check with placeHolderVars
explain (verbose, costs off)
select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.c1) from ft1 right join ft2 on (ft1.c1 = ft2.c1)) q(a, b, c) on (ft4.c1 <= q.b);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------
Aggregate
Output: sum(q.a), count(q.b)
-> Nested Loop Left Join
Output: q.a, q.b
Inner Unique: true
Join Filter: ((ft4.c1)::numeric <= q.b)
-> Foreign Scan on public.ft4
Output: ft4.c1, ft4.c2, ft4.c3
Remote SQL: SELECT c1 FROM "S 1"."T 3"
-> Materialize
Output: q.a, q.b
-> Subquery Scan on q
Output: q.a, q.b
-> Foreign Scan
Output: 13, (avg(ft1.c1)), NULL::bigint
Relations: Aggregate on ((public.ft2) LEFT JOIN (public.ft1))
Remote SQL: SELECT 13, avg(r1."C 1"), NULL::bigint FROM ("S 1"."T 1" r2 LEFT JOIN "S 1"."T 1" r1 ON (((r1."C 1" = r2."C 1"))))
(17 rows)
select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.c1) from ft1 right join ft2 on (ft1.c1 = ft2.c1)) q(a, b, c) on (ft4.c1 <= q.b);
sum | count
-----+-------
650 | 50
(1 row)
-- Not supported cases
-- Grouping sets
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
QUERY PLAN
------------------------------------------------------------------------------
Sort
Output: c2, (sum(c1))
Sort Key: ft1.c2
-> MixedAggregate
Output: c2, sum(c1)
Hash Key: ft1.c2
Group Key: ()
-> Foreign Scan on public.ft1
Output: c2, c1
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
(10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
c2 | sum
----+--------
0 | 50500
1 | 49600
2 | 49700
| 149800
(4 rows)
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
QUERY PLAN
------------------------------------------------------------------------------
Sort
Output: c2, (sum(c1))
Sort Key: ft1.c2
-> MixedAggregate
Output: c2, sum(c1)
Hash Key: ft1.c2
Group Key: ()
-> Foreign Scan on public.ft1
Output: c2, c1
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
(10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
c2 | sum
----+--------
0 | 50500
1 | 49600
2 | 49700
| 149800
(4 rows)
explain (verbose, costs off)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
QUERY PLAN
----------------------------------------------------------------------------------
Sort
Output: c2, c6, (sum(c1))
Sort Key: ft1.c2, ft1.c6
-> HashAggregate
Output: c2, c6, sum(c1)
Hash Key: ft1.c2
Hash Key: ft1.c6
-> Foreign Scan on public.ft1
Output: c2, c6, c1
Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3))
(10 rows)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
c2 | c6 | sum
----+----+-------
0 | | 50500
1 | | 49600
2 | | 49700
| 0 | 50500
| 1 | 49600
| 2 | 49700
(6 rows)
explain (verbose, costs off)
select c2, sum(c1), grouping(c2) from ft1 where c2 < 3 group by c2 order by 1 nulls last;
QUERY PLAN
------------------------------------------------------------------------------
Sort
Output: c2, (sum(c1)), (GROUPING(c2))
Sort Key: ft1.c2
-> HashAggregate
Output: c2, sum(c1), GROUPING(c2)
Group Key: ft1.c2
-> Foreign Scan on public.ft1
Output: c2, c1
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
(9 rows)
select c2, sum(c1), grouping(c2) from ft1 where c2 < 3 group by c2 order by 1 nulls last;
c2 | sum | grouping
----+-------+----------
0 | 50500 | 0
1 | 49600 | 0
2 | 49700 | 0
(3 rows)
-- DISTINCT itself is not pushed down, whereas underneath aggregate is pushed
explain (verbose, costs off)
select distinct sum(c1)/1000 s from ft2 where c2 < 6 group by c2 order by 1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------
Unique
Output: ((sum(c1) / 1000)), c2
-> Sort
Output: ((sum(c1) / 1000)), c2
Sort Key: ((sum(ft2.c1) / 1000))
-> Foreign Scan
Output: ((sum(c1) / 1000)), c2
Relations: Aggregate on (public.ft2)
Remote SQL: SELECT (sum("C 1") / 1000), c2 FROM "S 1"."T 1" WHERE ((c2 < 6)) GROUP BY 2
(9 rows)
select distinct sum(c1)/1000 s from ft2 where c2 < 6 group by c2 order by 1;
s
----
49
50
(2 rows)
-- WindowAgg
explain (verbose, costs off)
select c2, sum(c2), count(c2) over (partition by c2%2) from ft2 where c2 < 10 group by c2 order by 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Sort
Output: c2, (sum(c2)), (count(c2) OVER (?)), ((c2 % 2))
Sort Key: ft2.c2
-> WindowAgg
Output: c2, (sum(c2)), count(c2) OVER (?), ((c2 % 2))
-> Sort
Output: c2, ((c2 % 2)), (sum(c2))
Sort Key: ((ft2.c2 % 2))
-> Foreign Scan
Output: c2, ((c2 % 2)), (sum(c2))
Relations: Aggregate on (public.ft2)
Remote SQL: SELECT c2, (c2 % 2), sum(c2) FROM "S 1"."T 1" WHERE ((c2 < 10)) GROUP BY 1
(12 rows)
select c2, sum(c2), count(c2) over (partition by c2%2) from ft2 where c2 < 10 group by c2 order by 1;
c2 | sum | count
----+-----+-------
0 | 0 | 5
1 | 100 | 5
2 | 200 | 5
3 | 300 | 5
4 | 400 | 5
5 | 500 | 5
6 | 600 | 5
7 | 700 | 5
8 | 800 | 5
9 | 900 | 5
(10 rows)
explain (verbose, costs off)
select c2, array_agg(c2) over (partition by c2%2 order by c2 desc) from ft1 where c2 < 10 group by c2 order by 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Sort
Output: c2, (array_agg(c2) OVER (?)), ((c2 % 2))
Sort Key: ft1.c2
-> WindowAgg
Output: c2, array_agg(c2) OVER (?), ((c2 % 2))
-> Sort
Output: c2, ((c2 % 2))
Sort Key: ((ft1.c2 % 2)), ft1.c2 DESC
-> Foreign Scan
Output: c2, ((c2 % 2))
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT c2, (c2 % 2) FROM "S 1"."T 1" WHERE ((c2 < 10)) GROUP BY 1
(12 rows)
select c2, array_agg(c2) over (partition by c2%2 order by c2 desc) from ft1 where c2 < 10 group by c2 order by 1;
c2 | array_agg
----+-------------
0 | {8,6,4,2,0}
1 | {9,7,5,3,1}
2 | {8,6,4,2}
3 | {9,7,5,3}
4 | {8,6,4}
5 | {9,7,5}
6 | {8,6}
7 | {9,7}
8 | {8}
9 | {9}
(10 rows)
explain (verbose, costs off)
select c2, array_agg(c2) over (partition by c2%2 order by c2 range between current row and unbounded following) from ft1 where c2 < 10 group by c2 order by 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Sort
Output: c2, (array_agg(c2) OVER (?)), ((c2 % 2))
Sort Key: ft1.c2
-> WindowAgg
Output: c2, array_agg(c2) OVER (?), ((c2 % 2))
-> Sort
Output: c2, ((c2 % 2))
Sort Key: ((ft1.c2 % 2)), ft1.c2
-> Foreign Scan
Output: c2, ((c2 % 2))
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT c2, (c2 % 2) FROM "S 1"."T 1" WHERE ((c2 < 10)) GROUP BY 1
(12 rows)
select c2, array_agg(c2) over (partition by c2%2 order by c2 range between current row and unbounded following) from ft1 where c2 < 10 group by c2 order by 1;
c2 | array_agg
----+-------------
0 | {0,2,4,6,8}
1 | {1,3,5,7,9}
2 | {2,4,6,8}
3 | {3,5,7,9}
4 | {4,6,8}
5 | {5,7,9}
6 | {6,8}
7 | {7,9}
8 | {8}
9 | {9}
(10 rows)
-- ===================================================================
-- parameterized queries
-- ===================================================================
-- simple join
PREPARE st1(int, int) AS SELECT t1.c3, t2.c3 FROM ft1 t1, ft2 t2 WHERE t1.c1 = $1 AND t2.c1 = $2;
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st1(1, 2);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: t1.c3, t2.c3
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Remote SQL: SELECT r1.c3, r2.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = 2)) AND ((r1."C 1" = 1))))
(4 rows)
EXECUTE st1(1, 1);
c3 | c3
-------+-------
00001 | 00001
(1 row)
EXECUTE st1(101, 101);
c3 | c3
-------+-------
00101 | 00101
(1 row)
-- subquery using stable function (can't be sent to remote)
PREPARE st2(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 < $2 AND t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 > $1 AND date(c4) = '1970-01-17'::date) ORDER BY c1;
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st2(10, 20);
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Sort
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
Sort Key: t1.c1
-> Nested Loop Semi Join
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Join Filter: (t2.c3 = t1.c3)
-> Foreign Scan on public.ft1 t1
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 20))
-> Materialize
Output: t2.c3
-> Foreign Scan on public.ft2 t2
Output: t2.c3
Filter: (date(t2.c4) = '01-17-1970'::date)
Remote SQL: SELECT c3, c4 FROM "S 1"."T 1" WHERE (("C 1" > 10))
(15 rows)
EXECUTE st2(10, 20);
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
16 | 6 | 00016 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
(1 row)
EXECUTE st2(101, 121);
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
-----+----+-------+------------------------------+--------------------------+----+------------+-----
116 | 6 | 00116 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
(1 row)
-- subquery using immutable function (can be sent to remote)
PREPARE st3(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 < $2 AND t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 > $1 AND date(c5) = '1970-01-17'::date) ORDER BY c1;
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st3(10, 20);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
Sort
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
Sort Key: t1.c1
-> Nested Loop Semi Join
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Join Filter: (t2.c3 = t1.c3)
-> Foreign Scan on public.ft1 t1
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 20))
-> Materialize
Output: t2.c3
-> Foreign Scan on public.ft2 t2
Output: t2.c3
Remote SQL: SELECT c3 FROM "S 1"."T 1" WHERE (("C 1" > 10)) AND ((date(c5) = '1970-01-17'::date))
(14 rows)
EXECUTE st3(10, 20);
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
16 | 6 | 00016 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
(1 row)
EXECUTE st3(20, 30);
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+----+----+----+----+----+----
(0 rows)
-- custom plan should be chosen initially
PREPARE st4(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 = $1;
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st4(1);
QUERY PLAN
---------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 1))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st4(1);
QUERY PLAN
---------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 1))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st4(1);
QUERY PLAN
---------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 1))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st4(1);
QUERY PLAN
---------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 1))
(3 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st4(1);
QUERY PLAN
---------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 1))
(3 rows)
-- once we try it enough times, should switch to generic plan
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st4(1);
QUERY PLAN
-------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = $1::integer))
(3 rows)
-- value of $1 should not be sent to remote
PREPARE st5(user_enum,int) AS SELECT * FROM ft1 t1 WHERE c8 = $1 and c1 = $2;
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st5('foo', 1);
QUERY PLAN
---------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Filter: (t1.c8 = 'foo'::user_enum)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 1))
(4 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st5('foo', 1);
QUERY PLAN
---------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Filter: (t1.c8 = 'foo'::user_enum)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 1))
(4 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st5('foo', 1);
QUERY PLAN
---------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Filter: (t1.c8 = 'foo'::user_enum)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 1))
(4 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st5('foo', 1);
QUERY PLAN
---------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Filter: (t1.c8 = 'foo'::user_enum)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 1))
(4 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st5('foo', 1);
QUERY PLAN
---------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Filter: (t1.c8 = 'foo'::user_enum)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 1))
(4 rows)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st5('foo', 1);
QUERY PLAN
-------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Filter: (t1.c8 = $1)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = $1::integer))
(4 rows)
EXECUTE st5('foo', 1);
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
-- altering FDW options requires replanning
PREPARE st6 AS SELECT * FROM ft1 t1 WHERE t1.c1 = t1.c2;
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st6;
QUERY PLAN
----------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = c2))
(3 rows)
PREPARE st7 AS INSERT INTO ft1 (c1,c2,c3) VALUES (1001,101,'foo');
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st7;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Insert on public.ft1
Remote SQL: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
Batch Size: 1
-> Result
Output: NULL::integer, 1001, 101, 'foo'::text, NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft1 '::character(10), NULL::user_enum
(5 rows)
ALTER TABLE "S 1"."T 1" RENAME TO "T 0";
ALTER FOREIGN TABLE ft1 OPTIONS (SET table_name 'T 0');
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st6;
QUERY PLAN
----------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 0" WHERE (("C 1" = c2))
(3 rows)
EXECUTE st6;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
2 | 2 | 00002 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2 | 2 | foo
3 | 3 | 00003 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3 | 3 | foo
4 | 4 | 00004 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4 | 4 | foo
5 | 5 | 00005 | Tue Jan 06 00:00:00 1970 PST | Tue Jan 06 00:00:00 1970 | 5 | 5 | foo
6 | 6 | 00006 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo
7 | 7 | 00007 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
8 | 8 | 00008 | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
9 | 9 | 00009 | Sat Jan 10 00:00:00 1970 PST | Sat Jan 10 00:00:00 1970 | 9 | 9 | foo
(9 rows)
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st7;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Insert on public.ft1
Remote SQL: INSERT INTO "S 1"."T 0"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
Batch Size: 1
-> Result
Output: NULL::integer, 1001, 101, 'foo'::text, NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft1 '::character(10), NULL::user_enum
(5 rows)
ALTER TABLE "S 1"."T 0" RENAME TO "T 1";
ALTER FOREIGN TABLE ft1 OPTIONS (SET table_name 'T 1');
PREPARE st8 AS SELECT count(c3) FROM ft1 t1 WHERE t1.c1 === t1.c2;
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st8;
QUERY PLAN
-----------------------------------------------------------------------------------------
Foreign Scan
Output: (count(c3))
Relations: Aggregate on (public.ft1 t1)
Remote SQL: SELECT count(c3) FROM "S 1"."T 1" WHERE (("C 1" OPERATOR(public.===) c2))
(4 rows)
ALTER SERVER loopback OPTIONS (DROP extensions);
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st8;
QUERY PLAN
-----------------------------------------------------------
Aggregate
Output: count(c3)
-> Foreign Scan on public.ft1 t1
Output: c3
Filter: (t1.c1 === t1.c2)
Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1"
(6 rows)
EXECUTE st8;
count
-------
9
(1 row)
ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw');
-- cleanup
DEALLOCATE st1;
DEALLOCATE st2;
DEALLOCATE st3;
DEALLOCATE st4;
DEALLOCATE st5;
DEALLOCATE st6;
DEALLOCATE st7;
DEALLOCATE st8;
-- System columns, except ctid and oid, should not be sent to remote
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1 t1 WHERE t1.tableoid = 'pg_class'::regclass LIMIT 1;
QUERY PLAN
-------------------------------------------------------------------------------
Limit
Output: c1, c2, c3, c4, c5, c6, c7, c8
-> Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Filter: (t1.tableoid = '1259'::oid)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(6 rows)
SELECT * FROM ft1 t1 WHERE t1.tableoid = 'ft1'::regclass LIMIT 1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT tableoid::regclass, * FROM ft1 t1 LIMIT 1;
QUERY PLAN
-----------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: (tableoid)::regclass, c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" LIMIT 1::bigint
(3 rows)
SELECT tableoid::regclass, * FROM ft1 t1 LIMIT 1;
tableoid | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----------+----+----+-------+------------------------------+--------------------------+----+------------+-----
ft1 | 1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1 t1 WHERE t1.ctid = '(0,2)';
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
--------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((ctid = '(0,2)'))
(3 rows)
SELECT * FROM ft1 t1 WHERE t1.ctid = '(0,2)';
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
2 | 2 | 00002 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2 | 2 | foo
(1 row)
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT ctid, * FROM ft1 t1 LIMIT 1;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: ctid, c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8, ctid FROM "S 1"."T 1" LIMIT 1::bigint
(3 rows)
SELECT ctid, * FROM ft1 t1 LIMIT 1;
ctid | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
-------+----+----+-------+------------------------------+--------------------------+----+------------+-----
(0,1) | 1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
-- ===================================================================
-- used in PL/pgSQL function
-- ===================================================================
CREATE OR REPLACE FUNCTION f_test(p_c1 int) RETURNS int AS $$
DECLARE
v_c1 int;
BEGIN
SELECT c1 INTO v_c1 FROM ft1 WHERE c1 = p_c1 LIMIT 1;
PERFORM c1 FROM ft1 WHERE c1 = p_c1 AND p_c1 = v_c1 LIMIT 1;
RETURN v_c1;
END;
$$ LANGUAGE plpgsql;
SELECT f_test(100);
f_test
--------
100
(1 row)
DROP FUNCTION f_test(int);
-- ===================================================================
2020-09-08 03:09:22 +02:00
-- REINDEX
-- ===================================================================
-- remote table is not created here
CREATE FOREIGN TABLE reindex_foreign (c1 int, c2 int)
SERVER loopback2 OPTIONS (table_name 'reindex_local');
REINDEX TABLE reindex_foreign; -- error
ERROR: "reindex_foreign" is not a table or materialized view
REINDEX TABLE CONCURRENTLY reindex_foreign; -- error
ERROR: "reindex_foreign" is not a table or materialized view
DROP FOREIGN TABLE reindex_foreign;
-- partitions and foreign tables
CREATE TABLE reind_fdw_parent (c1 int) PARTITION BY RANGE (c1);
CREATE TABLE reind_fdw_0_10 PARTITION OF reind_fdw_parent
FOR VALUES FROM (0) TO (10);
CREATE FOREIGN TABLE reind_fdw_10_20 PARTITION OF reind_fdw_parent
FOR VALUES FROM (10) TO (20)
SERVER loopback OPTIONS (table_name 'reind_local_10_20');
REINDEX TABLE reind_fdw_parent; -- ok
REINDEX TABLE CONCURRENTLY reind_fdw_parent; -- ok
DROP TABLE reind_fdw_parent;
-- ===================================================================
-- conversion error
-- ===================================================================
ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE int;
SELECT * FROM ft1 ftx(x1,x2,x3,x4,x5,x6,x7,x8) WHERE x1 = 1; -- ERROR
ERROR: invalid input syntax for type integer: "foo"
CONTEXT: column "x8" of foreign table "ftx"
SELECT ftx.x1, ft2.c2, ftx.x8 FROM ft1 ftx(x1,x2,x3,x4,x5,x6,x7,x8), ft2
WHERE ftx.x1 = ft2.c1 AND ftx.x1 = 1; -- ERROR
ERROR: invalid input syntax for type integer: "foo"
CONTEXT: column "x8" of foreign table "ftx"
SELECT ftx.x1, ft2.c2, ftx FROM ft1 ftx(x1,x2,x3,x4,x5,x6,x7,x8), ft2
WHERE ftx.x1 = ft2.c1 AND ftx.x1 = 1; -- ERROR
ERROR: invalid input syntax for type integer: "foo"
CONTEXT: whole-row reference to foreign table "ftx"
SELECT sum(c2), array_agg(c8) FROM ft1 GROUP BY c8; -- ERROR
ERROR: invalid input syntax for type integer: "foo"
CONTEXT: processing expression at position 2 in select list
ANALYZE ft1; -- ERROR
ERROR: invalid input syntax for type integer: "foo"
CONTEXT: column "c8" of foreign table "ft1"
ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE user_enum;
-- ===================================================================
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
-- local type can be different from remote type in some cases,
-- in particular if similarly-named operators do equivalent things
-- ===================================================================
ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE text;
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1 WHERE c8 = 'foo' LIMIT 1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((c8 = 'foo')) LIMIT 1::bigint
(3 rows)
SELECT * FROM ft1 WHERE c8 = 'foo' LIMIT 1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1 WHERE 'foo' = c8 LIMIT 1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (('foo' = c8)) LIMIT 1::bigint
(3 rows)
SELECT * FROM ft1 WHERE 'foo' = c8 LIMIT 1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
-- we declared c8 to be text locally, but it's still the same type on
-- the remote which will balk if we try to do anything incompatible
-- with that remote type
SELECT * FROM ft1 WHERE c8 LIKE 'foo' LIMIT 1; -- ERROR
ERROR: operator does not exist: public.user_enum ~~ unknown
HINT: No operator matches the given name and argument types. You might need to add explicit type casts.
CONTEXT: remote SQL command: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((c8 ~~ 'foo')) LIMIT 1::bigint
SELECT * FROM ft1 WHERE c8::text LIKE 'foo' LIMIT 1; -- ERROR; cast not pushed down
ERROR: operator does not exist: public.user_enum ~~ unknown
HINT: No operator matches the given name and argument types. You might need to add explicit type casts.
CONTEXT: remote SQL command: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((c8 ~~ 'foo')) LIMIT 1::bigint
ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE user_enum;
-- ===================================================================
-- subtransaction
-- + local/remote error doesn't break cursor
-- ===================================================================
BEGIN;
DECLARE c CURSOR FOR SELECT * FROM ft1 ORDER BY c1;
FETCH c;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
SAVEPOINT s;
ERROR OUT; -- ERROR
ERROR: syntax error at or near "ERROR"
LINE 1: ERROR OUT;
^
ROLLBACK TO s;
FETCH c;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
2 | 2 | 00002 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2 | 2 | foo
(1 row)
SAVEPOINT s;
SELECT * FROM ft1 WHERE 1 / (c1 - 1) > 0; -- ERROR
ERROR: division by zero
CONTEXT: remote SQL command: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (((1 / ("C 1" - 1)) > 0))
ROLLBACK TO s;
FETCH c;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
3 | 3 | 00003 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3 | 3 | foo
(1 row)
SELECT * FROM ft1 ORDER BY c1 LIMIT 1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------+------------------------------+--------------------------+----+------------+-----
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
COMMIT;
-- ===================================================================
-- test handling of collations
-- ===================================================================
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
create table loct3 (f1 text collate "C" unique, f2 text, f3 varchar(10) unique);
create foreign table ft3 (f1 text collate "C", f2 text, f3 varchar(10))
server loopback options (table_name 'loct3', use_remote_estimate 'true');
-- can be sent to remote
explain (verbose, costs off) select * from ft3 where f1 = 'foo';
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
------------------------------------------------------------------------
Foreign Scan on public.ft3
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Output: f1, f2, f3
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: SELECT f1, f2, f3 FROM public.loct3 WHERE ((f1 = 'foo'))
(3 rows)
explain (verbose, costs off) select * from ft3 where f1 COLLATE "C" = 'foo';
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
------------------------------------------------------------------------
Foreign Scan on public.ft3
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Output: f1, f2, f3
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: SELECT f1, f2, f3 FROM public.loct3 WHERE ((f1 = 'foo'))
(3 rows)
explain (verbose, costs off) select * from ft3 where f2 = 'foo';
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
------------------------------------------------------------------------
Foreign Scan on public.ft3
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Output: f1, f2, f3
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: SELECT f1, f2, f3 FROM public.loct3 WHERE ((f2 = 'foo'))
(3 rows)
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
explain (verbose, costs off) select * from ft3 where f3 = 'foo';
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
------------------------------------------------------------------------
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Foreign Scan on public.ft3
Output: f1, f2, f3
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: SELECT f1, f2, f3 FROM public.loct3 WHERE ((f3 = 'foo'))
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
(3 rows)
explain (verbose, costs off) select * from ft3 f, loct3 l
where f.f3 = l.f3 and l.f1 = 'foo';
QUERY PLAN
--------------------------------------------------------------------------------------------------
Nested Loop
Output: f.f1, f.f2, f.f3, l.f1, l.f2, l.f3
-> Index Scan using loct3_f1_key on public.loct3 l
Output: l.f1, l.f2, l.f3
Index Cond: (l.f1 = 'foo'::text)
-> Foreign Scan on public.ft3 f
Output: f.f1, f.f2, f.f3
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT f1, f2, f3 FROM public.loct3 WHERE ((f3 = $1::character varying(10)))
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
(8 rows)
-- can't be sent to remote
explain (verbose, costs off) select * from ft3 where f1 COLLATE "POSIX" = 'foo';
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
QUERY PLAN
---------------------------------------------------
Foreign Scan on public.ft3
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Output: f1, f2, f3
Filter: ((ft3.f1)::text = 'foo'::text)
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Remote SQL: SELECT f1, f2, f3 FROM public.loct3
(4 rows)
explain (verbose, costs off) select * from ft3 where f1 = 'foo' COLLATE "C";
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
QUERY PLAN
---------------------------------------------------
Foreign Scan on public.ft3
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Output: f1, f2, f3
Filter: (ft3.f1 = 'foo'::text COLLATE "C")
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Remote SQL: SELECT f1, f2, f3 FROM public.loct3
(4 rows)
explain (verbose, costs off) select * from ft3 where f2 COLLATE "C" = 'foo';
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
QUERY PLAN
---------------------------------------------------
Foreign Scan on public.ft3
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Output: f1, f2, f3
Filter: ((ft3.f2)::text = 'foo'::text)
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Remote SQL: SELECT f1, f2, f3 FROM public.loct3
(4 rows)
explain (verbose, costs off) select * from ft3 where f2 = 'foo' COLLATE "C";
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
QUERY PLAN
---------------------------------------------------
Foreign Scan on public.ft3
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Output: f1, f2, f3
Filter: (ft3.f2 = 'foo'::text COLLATE "C")
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Remote SQL: SELECT f1, f2, f3 FROM public.loct3
(4 rows)
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
explain (verbose, costs off) select * from ft3 f, loct3 l
where f.f3 = l.f3 COLLATE "POSIX" and l.f1 = 'foo';
QUERY PLAN
-------------------------------------------------------------
Hash Join
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
Output: f.f1, f.f2, f.f3, l.f1, l.f2, l.f3
Inner Unique: true
Hash Cond: ((f.f3)::text = (l.f3)::text)
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
-> Foreign Scan on public.ft3 f
Output: f.f1, f.f2, f.f3
Remote SQL: SELECT f1, f2, f3 FROM public.loct3
-> Hash
Output: l.f1, l.f2, l.f3
-> Index Scan using loct3_f1_key on public.loct3 l
Output: l.f1, l.f2, l.f3
Index Cond: (l.f1 = 'foo'::text)
(12 rows)
Improve handling of collations in contrib/postgres_fdw. If we have a local Var of say varchar type with default collation, and we apply a RelabelType to convert that to text with default collation, we don't want to consider that as creating an FDW_COLLATE_UNSAFE situation. It should be okay to compare that to a remote Var, so long as the remote Var determines the comparison collation. (When we actually ship such an expression to the remote side, the local Var would become a Param with default collation, meaning the remote Var would in fact control the comparison collation, because non-default implicit collation overrides default implicit collation in parse_collate.c.) To fix, be more precise about what FDW_COLLATE_NONE means: it applies either to a noncollatable data type or to a collatable type with default collation, if that collation can't be traced to a remote Var. (When it can, FDW_COLLATE_SAFE is appropriate.) We were essentially using that interpretation already at the Var/Const/Param level, but we weren't bubbling it up properly. An alternative fix would be to introduce a separate FDW_COLLATE_DEFAULT value to describe the second situation, but that would add more code without changing the actual behavior, so it didn't seem worthwhile. Also, since we're clarifying the rule to be that we care about whether operator/function input collations match, there seems no need to fail immediately upon seeing a Const/Param/non-foreign-Var with nondefault collation. We only have to reject if it appears in a collation-sensitive context (for example, "var IS NOT NULL" is perfectly safe from a collation standpoint, whatever collation the var has). So just set the state to UNSAFE rather than failing immediately. Per report from Jeevan Chalke. This essentially corrects some sloppy thinking in commit ed3ddf918b59545583a4b374566bc1148e75f593, so back-patch to 9.3 where that logic appeared.
2015-09-24 18:47:29 +02:00
-- ===================================================================
-- test writable foreign table stuff
-- ===================================================================
EXPLAIN (verbose, costs off)
INSERT INTO ft2 (c1,c2,c3) SELECT c1+1000,c2+100, c3 || c3 FROM ft2 LIMIT 20;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Insert on public.ft2
Remote SQL: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
Batch Size: 1
-> Subquery Scan on "*SELECT*"
Output: "*SELECT*"."?column?", "*SELECT*"."?column?_1", NULL::integer, "*SELECT*"."?column?_2", NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft2 '::character(10), NULL::user_enum
-> Foreign Scan on public.ft2 ft2_1
Output: (ft2_1.c1 + 1000), (ft2_1.c2 + 100), (ft2_1.c3 || ft2_1.c3)
Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1" LIMIT 20::bigint
(8 rows)
INSERT INTO ft2 (c1,c2,c3) SELECT c1+1000,c2+100, c3 || c3 FROM ft2 LIMIT 20;
INSERT INTO ft2 (c1,c2,c3)
VALUES (1101,201,'aaa'), (1102,202,'bbb'), (1103,203,'ccc') RETURNING *;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+-----+----+----+----+------------+----
1101 | 201 | aaa | | | | ft2 |
1102 | 202 | bbb | | | | ft2 |
1103 | 203 | ccc | | | | ft2 |
(3 rows)
INSERT INTO ft2 (c1,c2,c3) VALUES (1104,204,'ddd'), (1105,205,'eee');
EXPLAIN (verbose, costs off)
UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; -- can be pushed down
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Update on public.ft2
-> Foreign Update on public.ft2
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 300), c3 = (c3 || '_update3') WHERE ((("C 1" % 10) = 3))
(3 rows)
UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3;
EXPLAIN (verbose, costs off)
UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------
Update on public.ft2
Output: c1, c2, c3, c4, c5, c6, c7, c8
-> Foreign Update on public.ft2
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8
(4 rows)
UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+--------------------+------------------------------+--------------------------+----+------------+-----
7 | 407 | 00007_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
17 | 407 | 00017_update7 | Sun Jan 18 00:00:00 1970 PST | Sun Jan 18 00:00:00 1970 | 7 | 7 | foo
27 | 407 | 00027_update7 | Wed Jan 28 00:00:00 1970 PST | Wed Jan 28 00:00:00 1970 | 7 | 7 | foo
37 | 407 | 00037_update7 | Sat Feb 07 00:00:00 1970 PST | Sat Feb 07 00:00:00 1970 | 7 | 7 | foo
47 | 407 | 00047_update7 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo
57 | 407 | 00057_update7 | Fri Feb 27 00:00:00 1970 PST | Fri Feb 27 00:00:00 1970 | 7 | 7 | foo
67 | 407 | 00067_update7 | Mon Mar 09 00:00:00 1970 PST | Mon Mar 09 00:00:00 1970 | 7 | 7 | foo
77 | 407 | 00077_update7 | Thu Mar 19 00:00:00 1970 PST | Thu Mar 19 00:00:00 1970 | 7 | 7 | foo
87 | 407 | 00087_update7 | Sun Mar 29 00:00:00 1970 PST | Sun Mar 29 00:00:00 1970 | 7 | 7 | foo
97 | 407 | 00097_update7 | Wed Apr 08 00:00:00 1970 PST | Wed Apr 08 00:00:00 1970 | 7 | 7 | foo
107 | 407 | 00107_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
117 | 407 | 00117_update7 | Sun Jan 18 00:00:00 1970 PST | Sun Jan 18 00:00:00 1970 | 7 | 7 | foo
127 | 407 | 00127_update7 | Wed Jan 28 00:00:00 1970 PST | Wed Jan 28 00:00:00 1970 | 7 | 7 | foo
137 | 407 | 00137_update7 | Sat Feb 07 00:00:00 1970 PST | Sat Feb 07 00:00:00 1970 | 7 | 7 | foo
147 | 407 | 00147_update7 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo
157 | 407 | 00157_update7 | Fri Feb 27 00:00:00 1970 PST | Fri Feb 27 00:00:00 1970 | 7 | 7 | foo
167 | 407 | 00167_update7 | Mon Mar 09 00:00:00 1970 PST | Mon Mar 09 00:00:00 1970 | 7 | 7 | foo
177 | 407 | 00177_update7 | Thu Mar 19 00:00:00 1970 PST | Thu Mar 19 00:00:00 1970 | 7 | 7 | foo
187 | 407 | 00187_update7 | Sun Mar 29 00:00:00 1970 PST | Sun Mar 29 00:00:00 1970 | 7 | 7 | foo
197 | 407 | 00197_update7 | Wed Apr 08 00:00:00 1970 PST | Wed Apr 08 00:00:00 1970 | 7 | 7 | foo
207 | 407 | 00207_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
217 | 407 | 00217_update7 | Sun Jan 18 00:00:00 1970 PST | Sun Jan 18 00:00:00 1970 | 7 | 7 | foo
227 | 407 | 00227_update7 | Wed Jan 28 00:00:00 1970 PST | Wed Jan 28 00:00:00 1970 | 7 | 7 | foo
237 | 407 | 00237_update7 | Sat Feb 07 00:00:00 1970 PST | Sat Feb 07 00:00:00 1970 | 7 | 7 | foo
247 | 407 | 00247_update7 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo
257 | 407 | 00257_update7 | Fri Feb 27 00:00:00 1970 PST | Fri Feb 27 00:00:00 1970 | 7 | 7 | foo
267 | 407 | 00267_update7 | Mon Mar 09 00:00:00 1970 PST | Mon Mar 09 00:00:00 1970 | 7 | 7 | foo
277 | 407 | 00277_update7 | Thu Mar 19 00:00:00 1970 PST | Thu Mar 19 00:00:00 1970 | 7 | 7 | foo
287 | 407 | 00287_update7 | Sun Mar 29 00:00:00 1970 PST | Sun Mar 29 00:00:00 1970 | 7 | 7 | foo
297 | 407 | 00297_update7 | Wed Apr 08 00:00:00 1970 PST | Wed Apr 08 00:00:00 1970 | 7 | 7 | foo
307 | 407 | 00307_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
317 | 407 | 00317_update7 | Sun Jan 18 00:00:00 1970 PST | Sun Jan 18 00:00:00 1970 | 7 | 7 | foo
327 | 407 | 00327_update7 | Wed Jan 28 00:00:00 1970 PST | Wed Jan 28 00:00:00 1970 | 7 | 7 | foo
337 | 407 | 00337_update7 | Sat Feb 07 00:00:00 1970 PST | Sat Feb 07 00:00:00 1970 | 7 | 7 | foo
347 | 407 | 00347_update7 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo
357 | 407 | 00357_update7 | Fri Feb 27 00:00:00 1970 PST | Fri Feb 27 00:00:00 1970 | 7 | 7 | foo
367 | 407 | 00367_update7 | Mon Mar 09 00:00:00 1970 PST | Mon Mar 09 00:00:00 1970 | 7 | 7 | foo
377 | 407 | 00377_update7 | Thu Mar 19 00:00:00 1970 PST | Thu Mar 19 00:00:00 1970 | 7 | 7 | foo
387 | 407 | 00387_update7 | Sun Mar 29 00:00:00 1970 PST | Sun Mar 29 00:00:00 1970 | 7 | 7 | foo
397 | 407 | 00397_update7 | Wed Apr 08 00:00:00 1970 PST | Wed Apr 08 00:00:00 1970 | 7 | 7 | foo
407 | 407 | 00407_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
417 | 407 | 00417_update7 | Sun Jan 18 00:00:00 1970 PST | Sun Jan 18 00:00:00 1970 | 7 | 7 | foo
427 | 407 | 00427_update7 | Wed Jan 28 00:00:00 1970 PST | Wed Jan 28 00:00:00 1970 | 7 | 7 | foo
437 | 407 | 00437_update7 | Sat Feb 07 00:00:00 1970 PST | Sat Feb 07 00:00:00 1970 | 7 | 7 | foo
447 | 407 | 00447_update7 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo
457 | 407 | 00457_update7 | Fri Feb 27 00:00:00 1970 PST | Fri Feb 27 00:00:00 1970 | 7 | 7 | foo
467 | 407 | 00467_update7 | Mon Mar 09 00:00:00 1970 PST | Mon Mar 09 00:00:00 1970 | 7 | 7 | foo
477 | 407 | 00477_update7 | Thu Mar 19 00:00:00 1970 PST | Thu Mar 19 00:00:00 1970 | 7 | 7 | foo
487 | 407 | 00487_update7 | Sun Mar 29 00:00:00 1970 PST | Sun Mar 29 00:00:00 1970 | 7 | 7 | foo
497 | 407 | 00497_update7 | Wed Apr 08 00:00:00 1970 PST | Wed Apr 08 00:00:00 1970 | 7 | 7 | foo
507 | 407 | 00507_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
517 | 407 | 00517_update7 | Sun Jan 18 00:00:00 1970 PST | Sun Jan 18 00:00:00 1970 | 7 | 7 | foo
527 | 407 | 00527_update7 | Wed Jan 28 00:00:00 1970 PST | Wed Jan 28 00:00:00 1970 | 7 | 7 | foo
537 | 407 | 00537_update7 | Sat Feb 07 00:00:00 1970 PST | Sat Feb 07 00:00:00 1970 | 7 | 7 | foo
547 | 407 | 00547_update7 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo
557 | 407 | 00557_update7 | Fri Feb 27 00:00:00 1970 PST | Fri Feb 27 00:00:00 1970 | 7 | 7 | foo
567 | 407 | 00567_update7 | Mon Mar 09 00:00:00 1970 PST | Mon Mar 09 00:00:00 1970 | 7 | 7 | foo
577 | 407 | 00577_update7 | Thu Mar 19 00:00:00 1970 PST | Thu Mar 19 00:00:00 1970 | 7 | 7 | foo
587 | 407 | 00587_update7 | Sun Mar 29 00:00:00 1970 PST | Sun Mar 29 00:00:00 1970 | 7 | 7 | foo
597 | 407 | 00597_update7 | Wed Apr 08 00:00:00 1970 PST | Wed Apr 08 00:00:00 1970 | 7 | 7 | foo
607 | 407 | 00607_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
617 | 407 | 00617_update7 | Sun Jan 18 00:00:00 1970 PST | Sun Jan 18 00:00:00 1970 | 7 | 7 | foo
627 | 407 | 00627_update7 | Wed Jan 28 00:00:00 1970 PST | Wed Jan 28 00:00:00 1970 | 7 | 7 | foo
637 | 407 | 00637_update7 | Sat Feb 07 00:00:00 1970 PST | Sat Feb 07 00:00:00 1970 | 7 | 7 | foo
647 | 407 | 00647_update7 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo
657 | 407 | 00657_update7 | Fri Feb 27 00:00:00 1970 PST | Fri Feb 27 00:00:00 1970 | 7 | 7 | foo
667 | 407 | 00667_update7 | Mon Mar 09 00:00:00 1970 PST | Mon Mar 09 00:00:00 1970 | 7 | 7 | foo
677 | 407 | 00677_update7 | Thu Mar 19 00:00:00 1970 PST | Thu Mar 19 00:00:00 1970 | 7 | 7 | foo
687 | 407 | 00687_update7 | Sun Mar 29 00:00:00 1970 PST | Sun Mar 29 00:00:00 1970 | 7 | 7 | foo
697 | 407 | 00697_update7 | Wed Apr 08 00:00:00 1970 PST | Wed Apr 08 00:00:00 1970 | 7 | 7 | foo
707 | 407 | 00707_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
717 | 407 | 00717_update7 | Sun Jan 18 00:00:00 1970 PST | Sun Jan 18 00:00:00 1970 | 7 | 7 | foo
727 | 407 | 00727_update7 | Wed Jan 28 00:00:00 1970 PST | Wed Jan 28 00:00:00 1970 | 7 | 7 | foo
737 | 407 | 00737_update7 | Sat Feb 07 00:00:00 1970 PST | Sat Feb 07 00:00:00 1970 | 7 | 7 | foo
747 | 407 | 00747_update7 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo
757 | 407 | 00757_update7 | Fri Feb 27 00:00:00 1970 PST | Fri Feb 27 00:00:00 1970 | 7 | 7 | foo
767 | 407 | 00767_update7 | Mon Mar 09 00:00:00 1970 PST | Mon Mar 09 00:00:00 1970 | 7 | 7 | foo
777 | 407 | 00777_update7 | Thu Mar 19 00:00:00 1970 PST | Thu Mar 19 00:00:00 1970 | 7 | 7 | foo
787 | 407 | 00787_update7 | Sun Mar 29 00:00:00 1970 PST | Sun Mar 29 00:00:00 1970 | 7 | 7 | foo
797 | 407 | 00797_update7 | Wed Apr 08 00:00:00 1970 PST | Wed Apr 08 00:00:00 1970 | 7 | 7 | foo
807 | 407 | 00807_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
817 | 407 | 00817_update7 | Sun Jan 18 00:00:00 1970 PST | Sun Jan 18 00:00:00 1970 | 7 | 7 | foo
827 | 407 | 00827_update7 | Wed Jan 28 00:00:00 1970 PST | Wed Jan 28 00:00:00 1970 | 7 | 7 | foo
837 | 407 | 00837_update7 | Sat Feb 07 00:00:00 1970 PST | Sat Feb 07 00:00:00 1970 | 7 | 7 | foo
847 | 407 | 00847_update7 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo
857 | 407 | 00857_update7 | Fri Feb 27 00:00:00 1970 PST | Fri Feb 27 00:00:00 1970 | 7 | 7 | foo
867 | 407 | 00867_update7 | Mon Mar 09 00:00:00 1970 PST | Mon Mar 09 00:00:00 1970 | 7 | 7 | foo
877 | 407 | 00877_update7 | Thu Mar 19 00:00:00 1970 PST | Thu Mar 19 00:00:00 1970 | 7 | 7 | foo
887 | 407 | 00887_update7 | Sun Mar 29 00:00:00 1970 PST | Sun Mar 29 00:00:00 1970 | 7 | 7 | foo
897 | 407 | 00897_update7 | Wed Apr 08 00:00:00 1970 PST | Wed Apr 08 00:00:00 1970 | 7 | 7 | foo
907 | 407 | 00907_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
917 | 407 | 00917_update7 | Sun Jan 18 00:00:00 1970 PST | Sun Jan 18 00:00:00 1970 | 7 | 7 | foo
927 | 407 | 00927_update7 | Wed Jan 28 00:00:00 1970 PST | Wed Jan 28 00:00:00 1970 | 7 | 7 | foo
937 | 407 | 00937_update7 | Sat Feb 07 00:00:00 1970 PST | Sat Feb 07 00:00:00 1970 | 7 | 7 | foo
947 | 407 | 00947_update7 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo
957 | 407 | 00957_update7 | Fri Feb 27 00:00:00 1970 PST | Fri Feb 27 00:00:00 1970 | 7 | 7 | foo
967 | 407 | 00967_update7 | Mon Mar 09 00:00:00 1970 PST | Mon Mar 09 00:00:00 1970 | 7 | 7 | foo
977 | 407 | 00977_update7 | Thu Mar 19 00:00:00 1970 PST | Thu Mar 19 00:00:00 1970 | 7 | 7 | foo
987 | 407 | 00987_update7 | Sun Mar 29 00:00:00 1970 PST | Sun Mar 29 00:00:00 1970 | 7 | 7 | foo
997 | 407 | 00997_update7 | Wed Apr 08 00:00:00 1970 PST | Wed Apr 08 00:00:00 1970 | 7 | 7 | foo
1007 | 507 | 0000700007_update7 | | | | ft2 |
1017 | 507 | 0001700017_update7 | | | | ft2 |
(102 rows)
EXPLAIN (verbose, costs off)
UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; -- can be pushed down
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Update on public.ft2
-> Foreign Update
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
Remote SQL: UPDATE "S 1"."T 1" r1 SET c2 = (r1.c2 + 500), c3 = (r1.c3 || '_update9'), c7 = 'ft2 '::character(10) FROM "S 1"."T 1" r2 WHERE ((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 9))
(3 rows)
UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9;
EXPLAIN (verbose, costs off)
DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down
QUERY PLAN
--------------------------------------------------------------------------------------------
Delete on public.ft2
Output: c1, c4
-> Foreign Delete on public.ft2
Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4
(4 rows)
DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4;
c1 | c4
------+------------------------------
5 | Tue Jan 06 00:00:00 1970 PST
15 | Fri Jan 16 00:00:00 1970 PST
25 | Mon Jan 26 00:00:00 1970 PST
35 | Thu Feb 05 00:00:00 1970 PST
45 | Sun Feb 15 00:00:00 1970 PST
55 | Wed Feb 25 00:00:00 1970 PST
65 | Sat Mar 07 00:00:00 1970 PST
75 | Tue Mar 17 00:00:00 1970 PST
85 | Fri Mar 27 00:00:00 1970 PST
95 | Mon Apr 06 00:00:00 1970 PST
105 | Tue Jan 06 00:00:00 1970 PST
115 | Fri Jan 16 00:00:00 1970 PST
125 | Mon Jan 26 00:00:00 1970 PST
135 | Thu Feb 05 00:00:00 1970 PST
145 | Sun Feb 15 00:00:00 1970 PST
155 | Wed Feb 25 00:00:00 1970 PST
165 | Sat Mar 07 00:00:00 1970 PST
175 | Tue Mar 17 00:00:00 1970 PST
185 | Fri Mar 27 00:00:00 1970 PST
195 | Mon Apr 06 00:00:00 1970 PST
205 | Tue Jan 06 00:00:00 1970 PST
215 | Fri Jan 16 00:00:00 1970 PST
225 | Mon Jan 26 00:00:00 1970 PST
235 | Thu Feb 05 00:00:00 1970 PST
245 | Sun Feb 15 00:00:00 1970 PST
255 | Wed Feb 25 00:00:00 1970 PST
265 | Sat Mar 07 00:00:00 1970 PST
275 | Tue Mar 17 00:00:00 1970 PST
285 | Fri Mar 27 00:00:00 1970 PST
295 | Mon Apr 06 00:00:00 1970 PST
305 | Tue Jan 06 00:00:00 1970 PST
315 | Fri Jan 16 00:00:00 1970 PST
325 | Mon Jan 26 00:00:00 1970 PST
335 | Thu Feb 05 00:00:00 1970 PST
345 | Sun Feb 15 00:00:00 1970 PST
355 | Wed Feb 25 00:00:00 1970 PST
365 | Sat Mar 07 00:00:00 1970 PST
375 | Tue Mar 17 00:00:00 1970 PST
385 | Fri Mar 27 00:00:00 1970 PST
395 | Mon Apr 06 00:00:00 1970 PST
405 | Tue Jan 06 00:00:00 1970 PST
415 | Fri Jan 16 00:00:00 1970 PST
425 | Mon Jan 26 00:00:00 1970 PST
435 | Thu Feb 05 00:00:00 1970 PST
445 | Sun Feb 15 00:00:00 1970 PST
455 | Wed Feb 25 00:00:00 1970 PST
465 | Sat Mar 07 00:00:00 1970 PST
475 | Tue Mar 17 00:00:00 1970 PST
485 | Fri Mar 27 00:00:00 1970 PST
495 | Mon Apr 06 00:00:00 1970 PST
505 | Tue Jan 06 00:00:00 1970 PST
515 | Fri Jan 16 00:00:00 1970 PST
525 | Mon Jan 26 00:00:00 1970 PST
535 | Thu Feb 05 00:00:00 1970 PST
545 | Sun Feb 15 00:00:00 1970 PST
555 | Wed Feb 25 00:00:00 1970 PST
565 | Sat Mar 07 00:00:00 1970 PST
575 | Tue Mar 17 00:00:00 1970 PST
585 | Fri Mar 27 00:00:00 1970 PST
595 | Mon Apr 06 00:00:00 1970 PST
605 | Tue Jan 06 00:00:00 1970 PST
615 | Fri Jan 16 00:00:00 1970 PST
625 | Mon Jan 26 00:00:00 1970 PST
635 | Thu Feb 05 00:00:00 1970 PST
645 | Sun Feb 15 00:00:00 1970 PST
655 | Wed Feb 25 00:00:00 1970 PST
665 | Sat Mar 07 00:00:00 1970 PST
675 | Tue Mar 17 00:00:00 1970 PST
685 | Fri Mar 27 00:00:00 1970 PST
695 | Mon Apr 06 00:00:00 1970 PST
705 | Tue Jan 06 00:00:00 1970 PST
715 | Fri Jan 16 00:00:00 1970 PST
725 | Mon Jan 26 00:00:00 1970 PST
735 | Thu Feb 05 00:00:00 1970 PST
745 | Sun Feb 15 00:00:00 1970 PST
755 | Wed Feb 25 00:00:00 1970 PST
765 | Sat Mar 07 00:00:00 1970 PST
775 | Tue Mar 17 00:00:00 1970 PST
785 | Fri Mar 27 00:00:00 1970 PST
795 | Mon Apr 06 00:00:00 1970 PST
805 | Tue Jan 06 00:00:00 1970 PST
815 | Fri Jan 16 00:00:00 1970 PST
825 | Mon Jan 26 00:00:00 1970 PST
835 | Thu Feb 05 00:00:00 1970 PST
845 | Sun Feb 15 00:00:00 1970 PST
855 | Wed Feb 25 00:00:00 1970 PST
865 | Sat Mar 07 00:00:00 1970 PST
875 | Tue Mar 17 00:00:00 1970 PST
885 | Fri Mar 27 00:00:00 1970 PST
895 | Mon Apr 06 00:00:00 1970 PST
905 | Tue Jan 06 00:00:00 1970 PST
915 | Fri Jan 16 00:00:00 1970 PST
925 | Mon Jan 26 00:00:00 1970 PST
935 | Thu Feb 05 00:00:00 1970 PST
945 | Sun Feb 15 00:00:00 1970 PST
955 | Wed Feb 25 00:00:00 1970 PST
965 | Sat Mar 07 00:00:00 1970 PST
975 | Tue Mar 17 00:00:00 1970 PST
985 | Fri Mar 27 00:00:00 1970 PST
995 | Mon Apr 06 00:00:00 1970 PST
1005 |
1015 |
1105 |
(103 rows)
EXPLAIN (verbose, costs off)
DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; -- can be pushed down
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------
Delete on public.ft2
-> Foreign Delete
Remote SQL: DELETE FROM "S 1"."T 1" r1 USING "S 1"."T 1" r2 WHERE ((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 2))
(3 rows)
DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2;
SELECT c1,c2,c3,c4 FROM ft2 ORDER BY c1;
c1 | c2 | c3 | c4
------+-----+--------------------+------------------------------
1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST
3 | 303 | 00003_update3 | Sun Jan 04 00:00:00 1970 PST
4 | 4 | 00004 | Mon Jan 05 00:00:00 1970 PST
6 | 6 | 00006 | Wed Jan 07 00:00:00 1970 PST
7 | 407 | 00007_update7 | Thu Jan 08 00:00:00 1970 PST
8 | 8 | 00008 | Fri Jan 09 00:00:00 1970 PST
9 | 509 | 00009_update9 | Sat Jan 10 00:00:00 1970 PST
10 | 0 | 00010 | Sun Jan 11 00:00:00 1970 PST
11 | 1 | 00011 | Mon Jan 12 00:00:00 1970 PST
13 | 303 | 00013_update3 | Wed Jan 14 00:00:00 1970 PST
14 | 4 | 00014 | Thu Jan 15 00:00:00 1970 PST
16 | 6 | 00016 | Sat Jan 17 00:00:00 1970 PST
17 | 407 | 00017_update7 | Sun Jan 18 00:00:00 1970 PST
18 | 8 | 00018 | Mon Jan 19 00:00:00 1970 PST
19 | 509 | 00019_update9 | Tue Jan 20 00:00:00 1970 PST
20 | 0 | 00020 | Wed Jan 21 00:00:00 1970 PST
21 | 1 | 00021 | Thu Jan 22 00:00:00 1970 PST
23 | 303 | 00023_update3 | Sat Jan 24 00:00:00 1970 PST
24 | 4 | 00024 | Sun Jan 25 00:00:00 1970 PST
26 | 6 | 00026 | Tue Jan 27 00:00:00 1970 PST
27 | 407 | 00027_update7 | Wed Jan 28 00:00:00 1970 PST
28 | 8 | 00028 | Thu Jan 29 00:00:00 1970 PST
29 | 509 | 00029_update9 | Fri Jan 30 00:00:00 1970 PST
30 | 0 | 00030 | Sat Jan 31 00:00:00 1970 PST
31 | 1 | 00031 | Sun Feb 01 00:00:00 1970 PST
33 | 303 | 00033_update3 | Tue Feb 03 00:00:00 1970 PST
34 | 4 | 00034 | Wed Feb 04 00:00:00 1970 PST
36 | 6 | 00036 | Fri Feb 06 00:00:00 1970 PST
37 | 407 | 00037_update7 | Sat Feb 07 00:00:00 1970 PST
38 | 8 | 00038 | Sun Feb 08 00:00:00 1970 PST
39 | 509 | 00039_update9 | Mon Feb 09 00:00:00 1970 PST
40 | 0 | 00040 | Tue Feb 10 00:00:00 1970 PST
41 | 1 | 00041 | Wed Feb 11 00:00:00 1970 PST
43 | 303 | 00043_update3 | Fri Feb 13 00:00:00 1970 PST
44 | 4 | 00044 | Sat Feb 14 00:00:00 1970 PST
46 | 6 | 00046 | Mon Feb 16 00:00:00 1970 PST
47 | 407 | 00047_update7 | Tue Feb 17 00:00:00 1970 PST
48 | 8 | 00048 | Wed Feb 18 00:00:00 1970 PST
49 | 509 | 00049_update9 | Thu Feb 19 00:00:00 1970 PST
50 | 0 | 00050 | Fri Feb 20 00:00:00 1970 PST
51 | 1 | 00051 | Sat Feb 21 00:00:00 1970 PST
53 | 303 | 00053_update3 | Mon Feb 23 00:00:00 1970 PST
54 | 4 | 00054 | Tue Feb 24 00:00:00 1970 PST
56 | 6 | 00056 | Thu Feb 26 00:00:00 1970 PST
57 | 407 | 00057_update7 | Fri Feb 27 00:00:00 1970 PST
58 | 8 | 00058 | Sat Feb 28 00:00:00 1970 PST
59 | 509 | 00059_update9 | Sun Mar 01 00:00:00 1970 PST
60 | 0 | 00060 | Mon Mar 02 00:00:00 1970 PST
61 | 1 | 00061 | Tue Mar 03 00:00:00 1970 PST
63 | 303 | 00063_update3 | Thu Mar 05 00:00:00 1970 PST
64 | 4 | 00064 | Fri Mar 06 00:00:00 1970 PST
66 | 6 | 00066 | Sun Mar 08 00:00:00 1970 PST
67 | 407 | 00067_update7 | Mon Mar 09 00:00:00 1970 PST
68 | 8 | 00068 | Tue Mar 10 00:00:00 1970 PST
69 | 509 | 00069_update9 | Wed Mar 11 00:00:00 1970 PST
70 | 0 | 00070 | Thu Mar 12 00:00:00 1970 PST
71 | 1 | 00071 | Fri Mar 13 00:00:00 1970 PST
73 | 303 | 00073_update3 | Sun Mar 15 00:00:00 1970 PST
74 | 4 | 00074 | Mon Mar 16 00:00:00 1970 PST
76 | 6 | 00076 | Wed Mar 18 00:00:00 1970 PST
77 | 407 | 00077_update7 | Thu Mar 19 00:00:00 1970 PST
78 | 8 | 00078 | Fri Mar 20 00:00:00 1970 PST
79 | 509 | 00079_update9 | Sat Mar 21 00:00:00 1970 PST
80 | 0 | 00080 | Sun Mar 22 00:00:00 1970 PST
81 | 1 | 00081 | Mon Mar 23 00:00:00 1970 PST
83 | 303 | 00083_update3 | Wed Mar 25 00:00:00 1970 PST
84 | 4 | 00084 | Thu Mar 26 00:00:00 1970 PST
86 | 6 | 00086 | Sat Mar 28 00:00:00 1970 PST
87 | 407 | 00087_update7 | Sun Mar 29 00:00:00 1970 PST
88 | 8 | 00088 | Mon Mar 30 00:00:00 1970 PST
89 | 509 | 00089_update9 | Tue Mar 31 00:00:00 1970 PST
90 | 0 | 00090 | Wed Apr 01 00:00:00 1970 PST
91 | 1 | 00091 | Thu Apr 02 00:00:00 1970 PST
93 | 303 | 00093_update3 | Sat Apr 04 00:00:00 1970 PST
94 | 4 | 00094 | Sun Apr 05 00:00:00 1970 PST
96 | 6 | 00096 | Tue Apr 07 00:00:00 1970 PST
97 | 407 | 00097_update7 | Wed Apr 08 00:00:00 1970 PST
98 | 8 | 00098 | Thu Apr 09 00:00:00 1970 PST
99 | 509 | 00099_update9 | Fri Apr 10 00:00:00 1970 PST
100 | 0 | 00100 | Thu Jan 01 00:00:00 1970 PST
101 | 1 | 00101 | Fri Jan 02 00:00:00 1970 PST
103 | 303 | 00103_update3 | Sun Jan 04 00:00:00 1970 PST
104 | 4 | 00104 | Mon Jan 05 00:00:00 1970 PST
106 | 6 | 00106 | Wed Jan 07 00:00:00 1970 PST
107 | 407 | 00107_update7 | Thu Jan 08 00:00:00 1970 PST
108 | 8 | 00108 | Fri Jan 09 00:00:00 1970 PST
109 | 509 | 00109_update9 | Sat Jan 10 00:00:00 1970 PST
110 | 0 | 00110 | Sun Jan 11 00:00:00 1970 PST
111 | 1 | 00111 | Mon Jan 12 00:00:00 1970 PST
113 | 303 | 00113_update3 | Wed Jan 14 00:00:00 1970 PST
114 | 4 | 00114 | Thu Jan 15 00:00:00 1970 PST
116 | 6 | 00116 | Sat Jan 17 00:00:00 1970 PST
117 | 407 | 00117_update7 | Sun Jan 18 00:00:00 1970 PST
118 | 8 | 00118 | Mon Jan 19 00:00:00 1970 PST
119 | 509 | 00119_update9 | Tue Jan 20 00:00:00 1970 PST
120 | 0 | 00120 | Wed Jan 21 00:00:00 1970 PST
121 | 1 | 00121 | Thu Jan 22 00:00:00 1970 PST
123 | 303 | 00123_update3 | Sat Jan 24 00:00:00 1970 PST
124 | 4 | 00124 | Sun Jan 25 00:00:00 1970 PST
126 | 6 | 00126 | Tue Jan 27 00:00:00 1970 PST
127 | 407 | 00127_update7 | Wed Jan 28 00:00:00 1970 PST
128 | 8 | 00128 | Thu Jan 29 00:00:00 1970 PST
129 | 509 | 00129_update9 | Fri Jan 30 00:00:00 1970 PST
130 | 0 | 00130 | Sat Jan 31 00:00:00 1970 PST
131 | 1 | 00131 | Sun Feb 01 00:00:00 1970 PST
133 | 303 | 00133_update3 | Tue Feb 03 00:00:00 1970 PST
134 | 4 | 00134 | Wed Feb 04 00:00:00 1970 PST
136 | 6 | 00136 | Fri Feb 06 00:00:00 1970 PST
137 | 407 | 00137_update7 | Sat Feb 07 00:00:00 1970 PST
138 | 8 | 00138 | Sun Feb 08 00:00:00 1970 PST
139 | 509 | 00139_update9 | Mon Feb 09 00:00:00 1970 PST
140 | 0 | 00140 | Tue Feb 10 00:00:00 1970 PST
141 | 1 | 00141 | Wed Feb 11 00:00:00 1970 PST
143 | 303 | 00143_update3 | Fri Feb 13 00:00:00 1970 PST
144 | 4 | 00144 | Sat Feb 14 00:00:00 1970 PST
146 | 6 | 00146 | Mon Feb 16 00:00:00 1970 PST
147 | 407 | 00147_update7 | Tue Feb 17 00:00:00 1970 PST
148 | 8 | 00148 | Wed Feb 18 00:00:00 1970 PST
149 | 509 | 00149_update9 | Thu Feb 19 00:00:00 1970 PST
150 | 0 | 00150 | Fri Feb 20 00:00:00 1970 PST
151 | 1 | 00151 | Sat Feb 21 00:00:00 1970 PST
153 | 303 | 00153_update3 | Mon Feb 23 00:00:00 1970 PST
154 | 4 | 00154 | Tue Feb 24 00:00:00 1970 PST
156 | 6 | 00156 | Thu Feb 26 00:00:00 1970 PST
157 | 407 | 00157_update7 | Fri Feb 27 00:00:00 1970 PST
158 | 8 | 00158 | Sat Feb 28 00:00:00 1970 PST
159 | 509 | 00159_update9 | Sun Mar 01 00:00:00 1970 PST
160 | 0 | 00160 | Mon Mar 02 00:00:00 1970 PST
161 | 1 | 00161 | Tue Mar 03 00:00:00 1970 PST
163 | 303 | 00163_update3 | Thu Mar 05 00:00:00 1970 PST
164 | 4 | 00164 | Fri Mar 06 00:00:00 1970 PST
166 | 6 | 00166 | Sun Mar 08 00:00:00 1970 PST
167 | 407 | 00167_update7 | Mon Mar 09 00:00:00 1970 PST
168 | 8 | 00168 | Tue Mar 10 00:00:00 1970 PST
169 | 509 | 00169_update9 | Wed Mar 11 00:00:00 1970 PST
170 | 0 | 00170 | Thu Mar 12 00:00:00 1970 PST
171 | 1 | 00171 | Fri Mar 13 00:00:00 1970 PST
173 | 303 | 00173_update3 | Sun Mar 15 00:00:00 1970 PST
174 | 4 | 00174 | Mon Mar 16 00:00:00 1970 PST
176 | 6 | 00176 | Wed Mar 18 00:00:00 1970 PST
177 | 407 | 00177_update7 | Thu Mar 19 00:00:00 1970 PST
178 | 8 | 00178 | Fri Mar 20 00:00:00 1970 PST
179 | 509 | 00179_update9 | Sat Mar 21 00:00:00 1970 PST
180 | 0 | 00180 | Sun Mar 22 00:00:00 1970 PST
181 | 1 | 00181 | Mon Mar 23 00:00:00 1970 PST
183 | 303 | 00183_update3 | Wed Mar 25 00:00:00 1970 PST
184 | 4 | 00184 | Thu Mar 26 00:00:00 1970 PST
186 | 6 | 00186 | Sat Mar 28 00:00:00 1970 PST
187 | 407 | 00187_update7 | Sun Mar 29 00:00:00 1970 PST
188 | 8 | 00188 | Mon Mar 30 00:00:00 1970 PST
189 | 509 | 00189_update9 | Tue Mar 31 00:00:00 1970 PST
190 | 0 | 00190 | Wed Apr 01 00:00:00 1970 PST
191 | 1 | 00191 | Thu Apr 02 00:00:00 1970 PST
193 | 303 | 00193_update3 | Sat Apr 04 00:00:00 1970 PST
194 | 4 | 00194 | Sun Apr 05 00:00:00 1970 PST
196 | 6 | 00196 | Tue Apr 07 00:00:00 1970 PST
197 | 407 | 00197_update7 | Wed Apr 08 00:00:00 1970 PST
198 | 8 | 00198 | Thu Apr 09 00:00:00 1970 PST
199 | 509 | 00199_update9 | Fri Apr 10 00:00:00 1970 PST
200 | 0 | 00200 | Thu Jan 01 00:00:00 1970 PST
201 | 1 | 00201 | Fri Jan 02 00:00:00 1970 PST
203 | 303 | 00203_update3 | Sun Jan 04 00:00:00 1970 PST
204 | 4 | 00204 | Mon Jan 05 00:00:00 1970 PST
206 | 6 | 00206 | Wed Jan 07 00:00:00 1970 PST
207 | 407 | 00207_update7 | Thu Jan 08 00:00:00 1970 PST
208 | 8 | 00208 | Fri Jan 09 00:00:00 1970 PST
209 | 509 | 00209_update9 | Sat Jan 10 00:00:00 1970 PST
210 | 0 | 00210 | Sun Jan 11 00:00:00 1970 PST
211 | 1 | 00211 | Mon Jan 12 00:00:00 1970 PST
213 | 303 | 00213_update3 | Wed Jan 14 00:00:00 1970 PST
214 | 4 | 00214 | Thu Jan 15 00:00:00 1970 PST
216 | 6 | 00216 | Sat Jan 17 00:00:00 1970 PST
217 | 407 | 00217_update7 | Sun Jan 18 00:00:00 1970 PST
218 | 8 | 00218 | Mon Jan 19 00:00:00 1970 PST
219 | 509 | 00219_update9 | Tue Jan 20 00:00:00 1970 PST
220 | 0 | 00220 | Wed Jan 21 00:00:00 1970 PST
221 | 1 | 00221 | Thu Jan 22 00:00:00 1970 PST
223 | 303 | 00223_update3 | Sat Jan 24 00:00:00 1970 PST
224 | 4 | 00224 | Sun Jan 25 00:00:00 1970 PST
226 | 6 | 00226 | Tue Jan 27 00:00:00 1970 PST
227 | 407 | 00227_update7 | Wed Jan 28 00:00:00 1970 PST
228 | 8 | 00228 | Thu Jan 29 00:00:00 1970 PST
229 | 509 | 00229_update9 | Fri Jan 30 00:00:00 1970 PST
230 | 0 | 00230 | Sat Jan 31 00:00:00 1970 PST
231 | 1 | 00231 | Sun Feb 01 00:00:00 1970 PST
233 | 303 | 00233_update3 | Tue Feb 03 00:00:00 1970 PST
234 | 4 | 00234 | Wed Feb 04 00:00:00 1970 PST
236 | 6 | 00236 | Fri Feb 06 00:00:00 1970 PST
237 | 407 | 00237_update7 | Sat Feb 07 00:00:00 1970 PST
238 | 8 | 00238 | Sun Feb 08 00:00:00 1970 PST
239 | 509 | 00239_update9 | Mon Feb 09 00:00:00 1970 PST
240 | 0 | 00240 | Tue Feb 10 00:00:00 1970 PST
241 | 1 | 00241 | Wed Feb 11 00:00:00 1970 PST
243 | 303 | 00243_update3 | Fri Feb 13 00:00:00 1970 PST
244 | 4 | 00244 | Sat Feb 14 00:00:00 1970 PST
246 | 6 | 00246 | Mon Feb 16 00:00:00 1970 PST
247 | 407 | 00247_update7 | Tue Feb 17 00:00:00 1970 PST
248 | 8 | 00248 | Wed Feb 18 00:00:00 1970 PST
249 | 509 | 00249_update9 | Thu Feb 19 00:00:00 1970 PST
250 | 0 | 00250 | Fri Feb 20 00:00:00 1970 PST
251 | 1 | 00251 | Sat Feb 21 00:00:00 1970 PST
253 | 303 | 00253_update3 | Mon Feb 23 00:00:00 1970 PST
254 | 4 | 00254 | Tue Feb 24 00:00:00 1970 PST
256 | 6 | 00256 | Thu Feb 26 00:00:00 1970 PST
257 | 407 | 00257_update7 | Fri Feb 27 00:00:00 1970 PST
258 | 8 | 00258 | Sat Feb 28 00:00:00 1970 PST
259 | 509 | 00259_update9 | Sun Mar 01 00:00:00 1970 PST
260 | 0 | 00260 | Mon Mar 02 00:00:00 1970 PST
261 | 1 | 00261 | Tue Mar 03 00:00:00 1970 PST
263 | 303 | 00263_update3 | Thu Mar 05 00:00:00 1970 PST
264 | 4 | 00264 | Fri Mar 06 00:00:00 1970 PST
266 | 6 | 00266 | Sun Mar 08 00:00:00 1970 PST
267 | 407 | 00267_update7 | Mon Mar 09 00:00:00 1970 PST
268 | 8 | 00268 | Tue Mar 10 00:00:00 1970 PST
269 | 509 | 00269_update9 | Wed Mar 11 00:00:00 1970 PST
270 | 0 | 00270 | Thu Mar 12 00:00:00 1970 PST
271 | 1 | 00271 | Fri Mar 13 00:00:00 1970 PST
273 | 303 | 00273_update3 | Sun Mar 15 00:00:00 1970 PST
274 | 4 | 00274 | Mon Mar 16 00:00:00 1970 PST
276 | 6 | 00276 | Wed Mar 18 00:00:00 1970 PST
277 | 407 | 00277_update7 | Thu Mar 19 00:00:00 1970 PST
278 | 8 | 00278 | Fri Mar 20 00:00:00 1970 PST
279 | 509 | 00279_update9 | Sat Mar 21 00:00:00 1970 PST
280 | 0 | 00280 | Sun Mar 22 00:00:00 1970 PST
281 | 1 | 00281 | Mon Mar 23 00:00:00 1970 PST
283 | 303 | 00283_update3 | Wed Mar 25 00:00:00 1970 PST
284 | 4 | 00284 | Thu Mar 26 00:00:00 1970 PST
286 | 6 | 00286 | Sat Mar 28 00:00:00 1970 PST
287 | 407 | 00287_update7 | Sun Mar 29 00:00:00 1970 PST
288 | 8 | 00288 | Mon Mar 30 00:00:00 1970 PST
289 | 509 | 00289_update9 | Tue Mar 31 00:00:00 1970 PST
290 | 0 | 00290 | Wed Apr 01 00:00:00 1970 PST
291 | 1 | 00291 | Thu Apr 02 00:00:00 1970 PST
293 | 303 | 00293_update3 | Sat Apr 04 00:00:00 1970 PST
294 | 4 | 00294 | Sun Apr 05 00:00:00 1970 PST
296 | 6 | 00296 | Tue Apr 07 00:00:00 1970 PST
297 | 407 | 00297_update7 | Wed Apr 08 00:00:00 1970 PST
298 | 8 | 00298 | Thu Apr 09 00:00:00 1970 PST
299 | 509 | 00299_update9 | Fri Apr 10 00:00:00 1970 PST
300 | 0 | 00300 | Thu Jan 01 00:00:00 1970 PST
301 | 1 | 00301 | Fri Jan 02 00:00:00 1970 PST
303 | 303 | 00303_update3 | Sun Jan 04 00:00:00 1970 PST
304 | 4 | 00304 | Mon Jan 05 00:00:00 1970 PST
306 | 6 | 00306 | Wed Jan 07 00:00:00 1970 PST
307 | 407 | 00307_update7 | Thu Jan 08 00:00:00 1970 PST
308 | 8 | 00308 | Fri Jan 09 00:00:00 1970 PST
309 | 509 | 00309_update9 | Sat Jan 10 00:00:00 1970 PST
310 | 0 | 00310 | Sun Jan 11 00:00:00 1970 PST
311 | 1 | 00311 | Mon Jan 12 00:00:00 1970 PST
313 | 303 | 00313_update3 | Wed Jan 14 00:00:00 1970 PST
314 | 4 | 00314 | Thu Jan 15 00:00:00 1970 PST
316 | 6 | 00316 | Sat Jan 17 00:00:00 1970 PST
317 | 407 | 00317_update7 | Sun Jan 18 00:00:00 1970 PST
318 | 8 | 00318 | Mon Jan 19 00:00:00 1970 PST
319 | 509 | 00319_update9 | Tue Jan 20 00:00:00 1970 PST
320 | 0 | 00320 | Wed Jan 21 00:00:00 1970 PST
321 | 1 | 00321 | Thu Jan 22 00:00:00 1970 PST
323 | 303 | 00323_update3 | Sat Jan 24 00:00:00 1970 PST
324 | 4 | 00324 | Sun Jan 25 00:00:00 1970 PST
326 | 6 | 00326 | Tue Jan 27 00:00:00 1970 PST
327 | 407 | 00327_update7 | Wed Jan 28 00:00:00 1970 PST
328 | 8 | 00328 | Thu Jan 29 00:00:00 1970 PST
329 | 509 | 00329_update9 | Fri Jan 30 00:00:00 1970 PST
330 | 0 | 00330 | Sat Jan 31 00:00:00 1970 PST
331 | 1 | 00331 | Sun Feb 01 00:00:00 1970 PST
333 | 303 | 00333_update3 | Tue Feb 03 00:00:00 1970 PST
334 | 4 | 00334 | Wed Feb 04 00:00:00 1970 PST
336 | 6 | 00336 | Fri Feb 06 00:00:00 1970 PST
337 | 407 | 00337_update7 | Sat Feb 07 00:00:00 1970 PST
338 | 8 | 00338 | Sun Feb 08 00:00:00 1970 PST
339 | 509 | 00339_update9 | Mon Feb 09 00:00:00 1970 PST
340 | 0 | 00340 | Tue Feb 10 00:00:00 1970 PST
341 | 1 | 00341 | Wed Feb 11 00:00:00 1970 PST
343 | 303 | 00343_update3 | Fri Feb 13 00:00:00 1970 PST
344 | 4 | 00344 | Sat Feb 14 00:00:00 1970 PST
346 | 6 | 00346 | Mon Feb 16 00:00:00 1970 PST
347 | 407 | 00347_update7 | Tue Feb 17 00:00:00 1970 PST
348 | 8 | 00348 | Wed Feb 18 00:00:00 1970 PST
349 | 509 | 00349_update9 | Thu Feb 19 00:00:00 1970 PST
350 | 0 | 00350 | Fri Feb 20 00:00:00 1970 PST
351 | 1 | 00351 | Sat Feb 21 00:00:00 1970 PST
353 | 303 | 00353_update3 | Mon Feb 23 00:00:00 1970 PST
354 | 4 | 00354 | Tue Feb 24 00:00:00 1970 PST
356 | 6 | 00356 | Thu Feb 26 00:00:00 1970 PST
357 | 407 | 00357_update7 | Fri Feb 27 00:00:00 1970 PST
358 | 8 | 00358 | Sat Feb 28 00:00:00 1970 PST
359 | 509 | 00359_update9 | Sun Mar 01 00:00:00 1970 PST
360 | 0 | 00360 | Mon Mar 02 00:00:00 1970 PST
361 | 1 | 00361 | Tue Mar 03 00:00:00 1970 PST
363 | 303 | 00363_update3 | Thu Mar 05 00:00:00 1970 PST
364 | 4 | 00364 | Fri Mar 06 00:00:00 1970 PST
366 | 6 | 00366 | Sun Mar 08 00:00:00 1970 PST
367 | 407 | 00367_update7 | Mon Mar 09 00:00:00 1970 PST
368 | 8 | 00368 | Tue Mar 10 00:00:00 1970 PST
369 | 509 | 00369_update9 | Wed Mar 11 00:00:00 1970 PST
370 | 0 | 00370 | Thu Mar 12 00:00:00 1970 PST
371 | 1 | 00371 | Fri Mar 13 00:00:00 1970 PST
373 | 303 | 00373_update3 | Sun Mar 15 00:00:00 1970 PST
374 | 4 | 00374 | Mon Mar 16 00:00:00 1970 PST
376 | 6 | 00376 | Wed Mar 18 00:00:00 1970 PST
377 | 407 | 00377_update7 | Thu Mar 19 00:00:00 1970 PST
378 | 8 | 00378 | Fri Mar 20 00:00:00 1970 PST
379 | 509 | 00379_update9 | Sat Mar 21 00:00:00 1970 PST
380 | 0 | 00380 | Sun Mar 22 00:00:00 1970 PST
381 | 1 | 00381 | Mon Mar 23 00:00:00 1970 PST
383 | 303 | 00383_update3 | Wed Mar 25 00:00:00 1970 PST
384 | 4 | 00384 | Thu Mar 26 00:00:00 1970 PST
386 | 6 | 00386 | Sat Mar 28 00:00:00 1970 PST
387 | 407 | 00387_update7 | Sun Mar 29 00:00:00 1970 PST
388 | 8 | 00388 | Mon Mar 30 00:00:00 1970 PST
389 | 509 | 00389_update9 | Tue Mar 31 00:00:00 1970 PST
390 | 0 | 00390 | Wed Apr 01 00:00:00 1970 PST
391 | 1 | 00391 | Thu Apr 02 00:00:00 1970 PST
393 | 303 | 00393_update3 | Sat Apr 04 00:00:00 1970 PST
394 | 4 | 00394 | Sun Apr 05 00:00:00 1970 PST
396 | 6 | 00396 | Tue Apr 07 00:00:00 1970 PST
397 | 407 | 00397_update7 | Wed Apr 08 00:00:00 1970 PST
398 | 8 | 00398 | Thu Apr 09 00:00:00 1970 PST
399 | 509 | 00399_update9 | Fri Apr 10 00:00:00 1970 PST
400 | 0 | 00400 | Thu Jan 01 00:00:00 1970 PST
401 | 1 | 00401 | Fri Jan 02 00:00:00 1970 PST
403 | 303 | 00403_update3 | Sun Jan 04 00:00:00 1970 PST
404 | 4 | 00404 | Mon Jan 05 00:00:00 1970 PST
406 | 6 | 00406 | Wed Jan 07 00:00:00 1970 PST
407 | 407 | 00407_update7 | Thu Jan 08 00:00:00 1970 PST
408 | 8 | 00408 | Fri Jan 09 00:00:00 1970 PST
409 | 509 | 00409_update9 | Sat Jan 10 00:00:00 1970 PST
410 | 0 | 00410 | Sun Jan 11 00:00:00 1970 PST
411 | 1 | 00411 | Mon Jan 12 00:00:00 1970 PST
413 | 303 | 00413_update3 | Wed Jan 14 00:00:00 1970 PST
414 | 4 | 00414 | Thu Jan 15 00:00:00 1970 PST
416 | 6 | 00416 | Sat Jan 17 00:00:00 1970 PST
417 | 407 | 00417_update7 | Sun Jan 18 00:00:00 1970 PST
418 | 8 | 00418 | Mon Jan 19 00:00:00 1970 PST
419 | 509 | 00419_update9 | Tue Jan 20 00:00:00 1970 PST
420 | 0 | 00420 | Wed Jan 21 00:00:00 1970 PST
421 | 1 | 00421 | Thu Jan 22 00:00:00 1970 PST
423 | 303 | 00423_update3 | Sat Jan 24 00:00:00 1970 PST
424 | 4 | 00424 | Sun Jan 25 00:00:00 1970 PST
426 | 6 | 00426 | Tue Jan 27 00:00:00 1970 PST
427 | 407 | 00427_update7 | Wed Jan 28 00:00:00 1970 PST
428 | 8 | 00428 | Thu Jan 29 00:00:00 1970 PST
429 | 509 | 00429_update9 | Fri Jan 30 00:00:00 1970 PST
430 | 0 | 00430 | Sat Jan 31 00:00:00 1970 PST
431 | 1 | 00431 | Sun Feb 01 00:00:00 1970 PST
433 | 303 | 00433_update3 | Tue Feb 03 00:00:00 1970 PST
434 | 4 | 00434 | Wed Feb 04 00:00:00 1970 PST
436 | 6 | 00436 | Fri Feb 06 00:00:00 1970 PST
437 | 407 | 00437_update7 | Sat Feb 07 00:00:00 1970 PST
438 | 8 | 00438 | Sun Feb 08 00:00:00 1970 PST
439 | 509 | 00439_update9 | Mon Feb 09 00:00:00 1970 PST
440 | 0 | 00440 | Tue Feb 10 00:00:00 1970 PST
441 | 1 | 00441 | Wed Feb 11 00:00:00 1970 PST
443 | 303 | 00443_update3 | Fri Feb 13 00:00:00 1970 PST
444 | 4 | 00444 | Sat Feb 14 00:00:00 1970 PST
446 | 6 | 00446 | Mon Feb 16 00:00:00 1970 PST
447 | 407 | 00447_update7 | Tue Feb 17 00:00:00 1970 PST
448 | 8 | 00448 | Wed Feb 18 00:00:00 1970 PST
449 | 509 | 00449_update9 | Thu Feb 19 00:00:00 1970 PST
450 | 0 | 00450 | Fri Feb 20 00:00:00 1970 PST
451 | 1 | 00451 | Sat Feb 21 00:00:00 1970 PST
453 | 303 | 00453_update3 | Mon Feb 23 00:00:00 1970 PST
454 | 4 | 00454 | Tue Feb 24 00:00:00 1970 PST
456 | 6 | 00456 | Thu Feb 26 00:00:00 1970 PST
457 | 407 | 00457_update7 | Fri Feb 27 00:00:00 1970 PST
458 | 8 | 00458 | Sat Feb 28 00:00:00 1970 PST
459 | 509 | 00459_update9 | Sun Mar 01 00:00:00 1970 PST
460 | 0 | 00460 | Mon Mar 02 00:00:00 1970 PST
461 | 1 | 00461 | Tue Mar 03 00:00:00 1970 PST
463 | 303 | 00463_update3 | Thu Mar 05 00:00:00 1970 PST
464 | 4 | 00464 | Fri Mar 06 00:00:00 1970 PST
466 | 6 | 00466 | Sun Mar 08 00:00:00 1970 PST
467 | 407 | 00467_update7 | Mon Mar 09 00:00:00 1970 PST
468 | 8 | 00468 | Tue Mar 10 00:00:00 1970 PST
469 | 509 | 00469_update9 | Wed Mar 11 00:00:00 1970 PST
470 | 0 | 00470 | Thu Mar 12 00:00:00 1970 PST
471 | 1 | 00471 | Fri Mar 13 00:00:00 1970 PST
473 | 303 | 00473_update3 | Sun Mar 15 00:00:00 1970 PST
474 | 4 | 00474 | Mon Mar 16 00:00:00 1970 PST
476 | 6 | 00476 | Wed Mar 18 00:00:00 1970 PST
477 | 407 | 00477_update7 | Thu Mar 19 00:00:00 1970 PST
478 | 8 | 00478 | Fri Mar 20 00:00:00 1970 PST
479 | 509 | 00479_update9 | Sat Mar 21 00:00:00 1970 PST
480 | 0 | 00480 | Sun Mar 22 00:00:00 1970 PST
481 | 1 | 00481 | Mon Mar 23 00:00:00 1970 PST
483 | 303 | 00483_update3 | Wed Mar 25 00:00:00 1970 PST
484 | 4 | 00484 | Thu Mar 26 00:00:00 1970 PST
486 | 6 | 00486 | Sat Mar 28 00:00:00 1970 PST
487 | 407 | 00487_update7 | Sun Mar 29 00:00:00 1970 PST
488 | 8 | 00488 | Mon Mar 30 00:00:00 1970 PST
489 | 509 | 00489_update9 | Tue Mar 31 00:00:00 1970 PST
490 | 0 | 00490 | Wed Apr 01 00:00:00 1970 PST
491 | 1 | 00491 | Thu Apr 02 00:00:00 1970 PST
493 | 303 | 00493_update3 | Sat Apr 04 00:00:00 1970 PST
494 | 4 | 00494 | Sun Apr 05 00:00:00 1970 PST
496 | 6 | 00496 | Tue Apr 07 00:00:00 1970 PST
497 | 407 | 00497_update7 | Wed Apr 08 00:00:00 1970 PST
498 | 8 | 00498 | Thu Apr 09 00:00:00 1970 PST
499 | 509 | 00499_update9 | Fri Apr 10 00:00:00 1970 PST
500 | 0 | 00500 | Thu Jan 01 00:00:00 1970 PST
501 | 1 | 00501 | Fri Jan 02 00:00:00 1970 PST
503 | 303 | 00503_update3 | Sun Jan 04 00:00:00 1970 PST
504 | 4 | 00504 | Mon Jan 05 00:00:00 1970 PST
506 | 6 | 00506 | Wed Jan 07 00:00:00 1970 PST
507 | 407 | 00507_update7 | Thu Jan 08 00:00:00 1970 PST
508 | 8 | 00508 | Fri Jan 09 00:00:00 1970 PST
509 | 509 | 00509_update9 | Sat Jan 10 00:00:00 1970 PST
510 | 0 | 00510 | Sun Jan 11 00:00:00 1970 PST
511 | 1 | 00511 | Mon Jan 12 00:00:00 1970 PST
513 | 303 | 00513_update3 | Wed Jan 14 00:00:00 1970 PST
514 | 4 | 00514 | Thu Jan 15 00:00:00 1970 PST
516 | 6 | 00516 | Sat Jan 17 00:00:00 1970 PST
517 | 407 | 00517_update7 | Sun Jan 18 00:00:00 1970 PST
518 | 8 | 00518 | Mon Jan 19 00:00:00 1970 PST
519 | 509 | 00519_update9 | Tue Jan 20 00:00:00 1970 PST
520 | 0 | 00520 | Wed Jan 21 00:00:00 1970 PST
521 | 1 | 00521 | Thu Jan 22 00:00:00 1970 PST
523 | 303 | 00523_update3 | Sat Jan 24 00:00:00 1970 PST
524 | 4 | 00524 | Sun Jan 25 00:00:00 1970 PST
526 | 6 | 00526 | Tue Jan 27 00:00:00 1970 PST
527 | 407 | 00527_update7 | Wed Jan 28 00:00:00 1970 PST
528 | 8 | 00528 | Thu Jan 29 00:00:00 1970 PST
529 | 509 | 00529_update9 | Fri Jan 30 00:00:00 1970 PST
530 | 0 | 00530 | Sat Jan 31 00:00:00 1970 PST
531 | 1 | 00531 | Sun Feb 01 00:00:00 1970 PST
533 | 303 | 00533_update3 | Tue Feb 03 00:00:00 1970 PST
534 | 4 | 00534 | Wed Feb 04 00:00:00 1970 PST
536 | 6 | 00536 | Fri Feb 06 00:00:00 1970 PST
537 | 407 | 00537_update7 | Sat Feb 07 00:00:00 1970 PST
538 | 8 | 00538 | Sun Feb 08 00:00:00 1970 PST
539 | 509 | 00539_update9 | Mon Feb 09 00:00:00 1970 PST
540 | 0 | 00540 | Tue Feb 10 00:00:00 1970 PST
541 | 1 | 00541 | Wed Feb 11 00:00:00 1970 PST
543 | 303 | 00543_update3 | Fri Feb 13 00:00:00 1970 PST
544 | 4 | 00544 | Sat Feb 14 00:00:00 1970 PST
546 | 6 | 00546 | Mon Feb 16 00:00:00 1970 PST
547 | 407 | 00547_update7 | Tue Feb 17 00:00:00 1970 PST
548 | 8 | 00548 | Wed Feb 18 00:00:00 1970 PST
549 | 509 | 00549_update9 | Thu Feb 19 00:00:00 1970 PST
550 | 0 | 00550 | Fri Feb 20 00:00:00 1970 PST
551 | 1 | 00551 | Sat Feb 21 00:00:00 1970 PST
553 | 303 | 00553_update3 | Mon Feb 23 00:00:00 1970 PST
554 | 4 | 00554 | Tue Feb 24 00:00:00 1970 PST
556 | 6 | 00556 | Thu Feb 26 00:00:00 1970 PST
557 | 407 | 00557_update7 | Fri Feb 27 00:00:00 1970 PST
558 | 8 | 00558 | Sat Feb 28 00:00:00 1970 PST
559 | 509 | 00559_update9 | Sun Mar 01 00:00:00 1970 PST
560 | 0 | 00560 | Mon Mar 02 00:00:00 1970 PST
561 | 1 | 00561 | Tue Mar 03 00:00:00 1970 PST
563 | 303 | 00563_update3 | Thu Mar 05 00:00:00 1970 PST
564 | 4 | 00564 | Fri Mar 06 00:00:00 1970 PST
566 | 6 | 00566 | Sun Mar 08 00:00:00 1970 PST
567 | 407 | 00567_update7 | Mon Mar 09 00:00:00 1970 PST
568 | 8 | 00568 | Tue Mar 10 00:00:00 1970 PST
569 | 509 | 00569_update9 | Wed Mar 11 00:00:00 1970 PST
570 | 0 | 00570 | Thu Mar 12 00:00:00 1970 PST
571 | 1 | 00571 | Fri Mar 13 00:00:00 1970 PST
573 | 303 | 00573_update3 | Sun Mar 15 00:00:00 1970 PST
574 | 4 | 00574 | Mon Mar 16 00:00:00 1970 PST
576 | 6 | 00576 | Wed Mar 18 00:00:00 1970 PST
577 | 407 | 00577_update7 | Thu Mar 19 00:00:00 1970 PST
578 | 8 | 00578 | Fri Mar 20 00:00:00 1970 PST
579 | 509 | 00579_update9 | Sat Mar 21 00:00:00 1970 PST
580 | 0 | 00580 | Sun Mar 22 00:00:00 1970 PST
581 | 1 | 00581 | Mon Mar 23 00:00:00 1970 PST
583 | 303 | 00583_update3 | Wed Mar 25 00:00:00 1970 PST
584 | 4 | 00584 | Thu Mar 26 00:00:00 1970 PST
586 | 6 | 00586 | Sat Mar 28 00:00:00 1970 PST
587 | 407 | 00587_update7 | Sun Mar 29 00:00:00 1970 PST
588 | 8 | 00588 | Mon Mar 30 00:00:00 1970 PST
589 | 509 | 00589_update9 | Tue Mar 31 00:00:00 1970 PST
590 | 0 | 00590 | Wed Apr 01 00:00:00 1970 PST
591 | 1 | 00591 | Thu Apr 02 00:00:00 1970 PST
593 | 303 | 00593_update3 | Sat Apr 04 00:00:00 1970 PST
594 | 4 | 00594 | Sun Apr 05 00:00:00 1970 PST
596 | 6 | 00596 | Tue Apr 07 00:00:00 1970 PST
597 | 407 | 00597_update7 | Wed Apr 08 00:00:00 1970 PST
598 | 8 | 00598 | Thu Apr 09 00:00:00 1970 PST
599 | 509 | 00599_update9 | Fri Apr 10 00:00:00 1970 PST
600 | 0 | 00600 | Thu Jan 01 00:00:00 1970 PST
601 | 1 | 00601 | Fri Jan 02 00:00:00 1970 PST
603 | 303 | 00603_update3 | Sun Jan 04 00:00:00 1970 PST
604 | 4 | 00604 | Mon Jan 05 00:00:00 1970 PST
606 | 6 | 00606 | Wed Jan 07 00:00:00 1970 PST
607 | 407 | 00607_update7 | Thu Jan 08 00:00:00 1970 PST
608 | 8 | 00608 | Fri Jan 09 00:00:00 1970 PST
609 | 509 | 00609_update9 | Sat Jan 10 00:00:00 1970 PST
610 | 0 | 00610 | Sun Jan 11 00:00:00 1970 PST
611 | 1 | 00611 | Mon Jan 12 00:00:00 1970 PST
613 | 303 | 00613_update3 | Wed Jan 14 00:00:00 1970 PST
614 | 4 | 00614 | Thu Jan 15 00:00:00 1970 PST
616 | 6 | 00616 | Sat Jan 17 00:00:00 1970 PST
617 | 407 | 00617_update7 | Sun Jan 18 00:00:00 1970 PST
618 | 8 | 00618 | Mon Jan 19 00:00:00 1970 PST
619 | 509 | 00619_update9 | Tue Jan 20 00:00:00 1970 PST
620 | 0 | 00620 | Wed Jan 21 00:00:00 1970 PST
621 | 1 | 00621 | Thu Jan 22 00:00:00 1970 PST
623 | 303 | 00623_update3 | Sat Jan 24 00:00:00 1970 PST
624 | 4 | 00624 | Sun Jan 25 00:00:00 1970 PST
626 | 6 | 00626 | Tue Jan 27 00:00:00 1970 PST
627 | 407 | 00627_update7 | Wed Jan 28 00:00:00 1970 PST
628 | 8 | 00628 | Thu Jan 29 00:00:00 1970 PST
629 | 509 | 00629_update9 | Fri Jan 30 00:00:00 1970 PST
630 | 0 | 00630 | Sat Jan 31 00:00:00 1970 PST
631 | 1 | 00631 | Sun Feb 01 00:00:00 1970 PST
633 | 303 | 00633_update3 | Tue Feb 03 00:00:00 1970 PST
634 | 4 | 00634 | Wed Feb 04 00:00:00 1970 PST
636 | 6 | 00636 | Fri Feb 06 00:00:00 1970 PST
637 | 407 | 00637_update7 | Sat Feb 07 00:00:00 1970 PST
638 | 8 | 00638 | Sun Feb 08 00:00:00 1970 PST
639 | 509 | 00639_update9 | Mon Feb 09 00:00:00 1970 PST
640 | 0 | 00640 | Tue Feb 10 00:00:00 1970 PST
641 | 1 | 00641 | Wed Feb 11 00:00:00 1970 PST
643 | 303 | 00643_update3 | Fri Feb 13 00:00:00 1970 PST
644 | 4 | 00644 | Sat Feb 14 00:00:00 1970 PST
646 | 6 | 00646 | Mon Feb 16 00:00:00 1970 PST
647 | 407 | 00647_update7 | Tue Feb 17 00:00:00 1970 PST
648 | 8 | 00648 | Wed Feb 18 00:00:00 1970 PST
649 | 509 | 00649_update9 | Thu Feb 19 00:00:00 1970 PST
650 | 0 | 00650 | Fri Feb 20 00:00:00 1970 PST
651 | 1 | 00651 | Sat Feb 21 00:00:00 1970 PST
653 | 303 | 00653_update3 | Mon Feb 23 00:00:00 1970 PST
654 | 4 | 00654 | Tue Feb 24 00:00:00 1970 PST
656 | 6 | 00656 | Thu Feb 26 00:00:00 1970 PST
657 | 407 | 00657_update7 | Fri Feb 27 00:00:00 1970 PST
658 | 8 | 00658 | Sat Feb 28 00:00:00 1970 PST
659 | 509 | 00659_update9 | Sun Mar 01 00:00:00 1970 PST
660 | 0 | 00660 | Mon Mar 02 00:00:00 1970 PST
661 | 1 | 00661 | Tue Mar 03 00:00:00 1970 PST
663 | 303 | 00663_update3 | Thu Mar 05 00:00:00 1970 PST
664 | 4 | 00664 | Fri Mar 06 00:00:00 1970 PST
666 | 6 | 00666 | Sun Mar 08 00:00:00 1970 PST
667 | 407 | 00667_update7 | Mon Mar 09 00:00:00 1970 PST
668 | 8 | 00668 | Tue Mar 10 00:00:00 1970 PST
669 | 509 | 00669_update9 | Wed Mar 11 00:00:00 1970 PST
670 | 0 | 00670 | Thu Mar 12 00:00:00 1970 PST
671 | 1 | 00671 | Fri Mar 13 00:00:00 1970 PST
673 | 303 | 00673_update3 | Sun Mar 15 00:00:00 1970 PST
674 | 4 | 00674 | Mon Mar 16 00:00:00 1970 PST
676 | 6 | 00676 | Wed Mar 18 00:00:00 1970 PST
677 | 407 | 00677_update7 | Thu Mar 19 00:00:00 1970 PST
678 | 8 | 00678 | Fri Mar 20 00:00:00 1970 PST
679 | 509 | 00679_update9 | Sat Mar 21 00:00:00 1970 PST
680 | 0 | 00680 | Sun Mar 22 00:00:00 1970 PST
681 | 1 | 00681 | Mon Mar 23 00:00:00 1970 PST
683 | 303 | 00683_update3 | Wed Mar 25 00:00:00 1970 PST
684 | 4 | 00684 | Thu Mar 26 00:00:00 1970 PST
686 | 6 | 00686 | Sat Mar 28 00:00:00 1970 PST
687 | 407 | 00687_update7 | Sun Mar 29 00:00:00 1970 PST
688 | 8 | 00688 | Mon Mar 30 00:00:00 1970 PST
689 | 509 | 00689_update9 | Tue Mar 31 00:00:00 1970 PST
690 | 0 | 00690 | Wed Apr 01 00:00:00 1970 PST
691 | 1 | 00691 | Thu Apr 02 00:00:00 1970 PST
693 | 303 | 00693_update3 | Sat Apr 04 00:00:00 1970 PST
694 | 4 | 00694 | Sun Apr 05 00:00:00 1970 PST
696 | 6 | 00696 | Tue Apr 07 00:00:00 1970 PST
697 | 407 | 00697_update7 | Wed Apr 08 00:00:00 1970 PST
698 | 8 | 00698 | Thu Apr 09 00:00:00 1970 PST
699 | 509 | 00699_update9 | Fri Apr 10 00:00:00 1970 PST
700 | 0 | 00700 | Thu Jan 01 00:00:00 1970 PST
701 | 1 | 00701 | Fri Jan 02 00:00:00 1970 PST
703 | 303 | 00703_update3 | Sun Jan 04 00:00:00 1970 PST
704 | 4 | 00704 | Mon Jan 05 00:00:00 1970 PST
706 | 6 | 00706 | Wed Jan 07 00:00:00 1970 PST
707 | 407 | 00707_update7 | Thu Jan 08 00:00:00 1970 PST
708 | 8 | 00708 | Fri Jan 09 00:00:00 1970 PST
709 | 509 | 00709_update9 | Sat Jan 10 00:00:00 1970 PST
710 | 0 | 00710 | Sun Jan 11 00:00:00 1970 PST
711 | 1 | 00711 | Mon Jan 12 00:00:00 1970 PST
713 | 303 | 00713_update3 | Wed Jan 14 00:00:00 1970 PST
714 | 4 | 00714 | Thu Jan 15 00:00:00 1970 PST
716 | 6 | 00716 | Sat Jan 17 00:00:00 1970 PST
717 | 407 | 00717_update7 | Sun Jan 18 00:00:00 1970 PST
718 | 8 | 00718 | Mon Jan 19 00:00:00 1970 PST
719 | 509 | 00719_update9 | Tue Jan 20 00:00:00 1970 PST
720 | 0 | 00720 | Wed Jan 21 00:00:00 1970 PST
721 | 1 | 00721 | Thu Jan 22 00:00:00 1970 PST
723 | 303 | 00723_update3 | Sat Jan 24 00:00:00 1970 PST
724 | 4 | 00724 | Sun Jan 25 00:00:00 1970 PST
726 | 6 | 00726 | Tue Jan 27 00:00:00 1970 PST
727 | 407 | 00727_update7 | Wed Jan 28 00:00:00 1970 PST
728 | 8 | 00728 | Thu Jan 29 00:00:00 1970 PST
729 | 509 | 00729_update9 | Fri Jan 30 00:00:00 1970 PST
730 | 0 | 00730 | Sat Jan 31 00:00:00 1970 PST
731 | 1 | 00731 | Sun Feb 01 00:00:00 1970 PST
733 | 303 | 00733_update3 | Tue Feb 03 00:00:00 1970 PST
734 | 4 | 00734 | Wed Feb 04 00:00:00 1970 PST
736 | 6 | 00736 | Fri Feb 06 00:00:00 1970 PST
737 | 407 | 00737_update7 | Sat Feb 07 00:00:00 1970 PST
738 | 8 | 00738 | Sun Feb 08 00:00:00 1970 PST
739 | 509 | 00739_update9 | Mon Feb 09 00:00:00 1970 PST
740 | 0 | 00740 | Tue Feb 10 00:00:00 1970 PST
741 | 1 | 00741 | Wed Feb 11 00:00:00 1970 PST
743 | 303 | 00743_update3 | Fri Feb 13 00:00:00 1970 PST
744 | 4 | 00744 | Sat Feb 14 00:00:00 1970 PST
746 | 6 | 00746 | Mon Feb 16 00:00:00 1970 PST
747 | 407 | 00747_update7 | Tue Feb 17 00:00:00 1970 PST
748 | 8 | 00748 | Wed Feb 18 00:00:00 1970 PST
749 | 509 | 00749_update9 | Thu Feb 19 00:00:00 1970 PST
750 | 0 | 00750 | Fri Feb 20 00:00:00 1970 PST
751 | 1 | 00751 | Sat Feb 21 00:00:00 1970 PST
753 | 303 | 00753_update3 | Mon Feb 23 00:00:00 1970 PST
754 | 4 | 00754 | Tue Feb 24 00:00:00 1970 PST
756 | 6 | 00756 | Thu Feb 26 00:00:00 1970 PST
757 | 407 | 00757_update7 | Fri Feb 27 00:00:00 1970 PST
758 | 8 | 00758 | Sat Feb 28 00:00:00 1970 PST
759 | 509 | 00759_update9 | Sun Mar 01 00:00:00 1970 PST
760 | 0 | 00760 | Mon Mar 02 00:00:00 1970 PST
761 | 1 | 00761 | Tue Mar 03 00:00:00 1970 PST
763 | 303 | 00763_update3 | Thu Mar 05 00:00:00 1970 PST
764 | 4 | 00764 | Fri Mar 06 00:00:00 1970 PST
766 | 6 | 00766 | Sun Mar 08 00:00:00 1970 PST
767 | 407 | 00767_update7 | Mon Mar 09 00:00:00 1970 PST
768 | 8 | 00768 | Tue Mar 10 00:00:00 1970 PST
769 | 509 | 00769_update9 | Wed Mar 11 00:00:00 1970 PST
770 | 0 | 00770 | Thu Mar 12 00:00:00 1970 PST
771 | 1 | 00771 | Fri Mar 13 00:00:00 1970 PST
773 | 303 | 00773_update3 | Sun Mar 15 00:00:00 1970 PST
774 | 4 | 00774 | Mon Mar 16 00:00:00 1970 PST
776 | 6 | 00776 | Wed Mar 18 00:00:00 1970 PST
777 | 407 | 00777_update7 | Thu Mar 19 00:00:00 1970 PST
778 | 8 | 00778 | Fri Mar 20 00:00:00 1970 PST
779 | 509 | 00779_update9 | Sat Mar 21 00:00:00 1970 PST
780 | 0 | 00780 | Sun Mar 22 00:00:00 1970 PST
781 | 1 | 00781 | Mon Mar 23 00:00:00 1970 PST
783 | 303 | 00783_update3 | Wed Mar 25 00:00:00 1970 PST
784 | 4 | 00784 | Thu Mar 26 00:00:00 1970 PST
786 | 6 | 00786 | Sat Mar 28 00:00:00 1970 PST
787 | 407 | 00787_update7 | Sun Mar 29 00:00:00 1970 PST
788 | 8 | 00788 | Mon Mar 30 00:00:00 1970 PST
789 | 509 | 00789_update9 | Tue Mar 31 00:00:00 1970 PST
790 | 0 | 00790 | Wed Apr 01 00:00:00 1970 PST
791 | 1 | 00791 | Thu Apr 02 00:00:00 1970 PST
793 | 303 | 00793_update3 | Sat Apr 04 00:00:00 1970 PST
794 | 4 | 00794 | Sun Apr 05 00:00:00 1970 PST
796 | 6 | 00796 | Tue Apr 07 00:00:00 1970 PST
797 | 407 | 00797_update7 | Wed Apr 08 00:00:00 1970 PST
798 | 8 | 00798 | Thu Apr 09 00:00:00 1970 PST
799 | 509 | 00799_update9 | Fri Apr 10 00:00:00 1970 PST
800 | 0 | 00800 | Thu Jan 01 00:00:00 1970 PST
801 | 1 | 00801 | Fri Jan 02 00:00:00 1970 PST
803 | 303 | 00803_update3 | Sun Jan 04 00:00:00 1970 PST
804 | 4 | 00804 | Mon Jan 05 00:00:00 1970 PST
806 | 6 | 00806 | Wed Jan 07 00:00:00 1970 PST
807 | 407 | 00807_update7 | Thu Jan 08 00:00:00 1970 PST
808 | 8 | 00808 | Fri Jan 09 00:00:00 1970 PST
809 | 509 | 00809_update9 | Sat Jan 10 00:00:00 1970 PST
810 | 0 | 00810 | Sun Jan 11 00:00:00 1970 PST
811 | 1 | 00811 | Mon Jan 12 00:00:00 1970 PST
813 | 303 | 00813_update3 | Wed Jan 14 00:00:00 1970 PST
814 | 4 | 00814 | Thu Jan 15 00:00:00 1970 PST
816 | 6 | 00816 | Sat Jan 17 00:00:00 1970 PST
817 | 407 | 00817_update7 | Sun Jan 18 00:00:00 1970 PST
818 | 8 | 00818 | Mon Jan 19 00:00:00 1970 PST
819 | 509 | 00819_update9 | Tue Jan 20 00:00:00 1970 PST
820 | 0 | 00820 | Wed Jan 21 00:00:00 1970 PST
821 | 1 | 00821 | Thu Jan 22 00:00:00 1970 PST
823 | 303 | 00823_update3 | Sat Jan 24 00:00:00 1970 PST
824 | 4 | 00824 | Sun Jan 25 00:00:00 1970 PST
826 | 6 | 00826 | Tue Jan 27 00:00:00 1970 PST
827 | 407 | 00827_update7 | Wed Jan 28 00:00:00 1970 PST
828 | 8 | 00828 | Thu Jan 29 00:00:00 1970 PST
829 | 509 | 00829_update9 | Fri Jan 30 00:00:00 1970 PST
830 | 0 | 00830 | Sat Jan 31 00:00:00 1970 PST
831 | 1 | 00831 | Sun Feb 01 00:00:00 1970 PST
833 | 303 | 00833_update3 | Tue Feb 03 00:00:00 1970 PST
834 | 4 | 00834 | Wed Feb 04 00:00:00 1970 PST
836 | 6 | 00836 | Fri Feb 06 00:00:00 1970 PST
837 | 407 | 00837_update7 | Sat Feb 07 00:00:00 1970 PST
838 | 8 | 00838 | Sun Feb 08 00:00:00 1970 PST
839 | 509 | 00839_update9 | Mon Feb 09 00:00:00 1970 PST
840 | 0 | 00840 | Tue Feb 10 00:00:00 1970 PST
841 | 1 | 00841 | Wed Feb 11 00:00:00 1970 PST
843 | 303 | 00843_update3 | Fri Feb 13 00:00:00 1970 PST
844 | 4 | 00844 | Sat Feb 14 00:00:00 1970 PST
846 | 6 | 00846 | Mon Feb 16 00:00:00 1970 PST
847 | 407 | 00847_update7 | Tue Feb 17 00:00:00 1970 PST
848 | 8 | 00848 | Wed Feb 18 00:00:00 1970 PST
849 | 509 | 00849_update9 | Thu Feb 19 00:00:00 1970 PST
850 | 0 | 00850 | Fri Feb 20 00:00:00 1970 PST
851 | 1 | 00851 | Sat Feb 21 00:00:00 1970 PST
853 | 303 | 00853_update3 | Mon Feb 23 00:00:00 1970 PST
854 | 4 | 00854 | Tue Feb 24 00:00:00 1970 PST
856 | 6 | 00856 | Thu Feb 26 00:00:00 1970 PST
857 | 407 | 00857_update7 | Fri Feb 27 00:00:00 1970 PST
858 | 8 | 00858 | Sat Feb 28 00:00:00 1970 PST
859 | 509 | 00859_update9 | Sun Mar 01 00:00:00 1970 PST
860 | 0 | 00860 | Mon Mar 02 00:00:00 1970 PST
861 | 1 | 00861 | Tue Mar 03 00:00:00 1970 PST
863 | 303 | 00863_update3 | Thu Mar 05 00:00:00 1970 PST
864 | 4 | 00864 | Fri Mar 06 00:00:00 1970 PST
866 | 6 | 00866 | Sun Mar 08 00:00:00 1970 PST
867 | 407 | 00867_update7 | Mon Mar 09 00:00:00 1970 PST
868 | 8 | 00868 | Tue Mar 10 00:00:00 1970 PST
869 | 509 | 00869_update9 | Wed Mar 11 00:00:00 1970 PST
870 | 0 | 00870 | Thu Mar 12 00:00:00 1970 PST
871 | 1 | 00871 | Fri Mar 13 00:00:00 1970 PST
873 | 303 | 00873_update3 | Sun Mar 15 00:00:00 1970 PST
874 | 4 | 00874 | Mon Mar 16 00:00:00 1970 PST
876 | 6 | 00876 | Wed Mar 18 00:00:00 1970 PST
877 | 407 | 00877_update7 | Thu Mar 19 00:00:00 1970 PST
878 | 8 | 00878 | Fri Mar 20 00:00:00 1970 PST
879 | 509 | 00879_update9 | Sat Mar 21 00:00:00 1970 PST
880 | 0 | 00880 | Sun Mar 22 00:00:00 1970 PST
881 | 1 | 00881 | Mon Mar 23 00:00:00 1970 PST
883 | 303 | 00883_update3 | Wed Mar 25 00:00:00 1970 PST
884 | 4 | 00884 | Thu Mar 26 00:00:00 1970 PST
886 | 6 | 00886 | Sat Mar 28 00:00:00 1970 PST
887 | 407 | 00887_update7 | Sun Mar 29 00:00:00 1970 PST
888 | 8 | 00888 | Mon Mar 30 00:00:00 1970 PST
889 | 509 | 00889_update9 | Tue Mar 31 00:00:00 1970 PST
890 | 0 | 00890 | Wed Apr 01 00:00:00 1970 PST
891 | 1 | 00891 | Thu Apr 02 00:00:00 1970 PST
893 | 303 | 00893_update3 | Sat Apr 04 00:00:00 1970 PST
894 | 4 | 00894 | Sun Apr 05 00:00:00 1970 PST
896 | 6 | 00896 | Tue Apr 07 00:00:00 1970 PST
897 | 407 | 00897_update7 | Wed Apr 08 00:00:00 1970 PST
898 | 8 | 00898 | Thu Apr 09 00:00:00 1970 PST
899 | 509 | 00899_update9 | Fri Apr 10 00:00:00 1970 PST
900 | 0 | 00900 | Thu Jan 01 00:00:00 1970 PST
901 | 1 | 00901 | Fri Jan 02 00:00:00 1970 PST
903 | 303 | 00903_update3 | Sun Jan 04 00:00:00 1970 PST
904 | 4 | 00904 | Mon Jan 05 00:00:00 1970 PST
906 | 6 | 00906 | Wed Jan 07 00:00:00 1970 PST
907 | 407 | 00907_update7 | Thu Jan 08 00:00:00 1970 PST
908 | 8 | 00908 | Fri Jan 09 00:00:00 1970 PST
909 | 509 | 00909_update9 | Sat Jan 10 00:00:00 1970 PST
910 | 0 | 00910 | Sun Jan 11 00:00:00 1970 PST
911 | 1 | 00911 | Mon Jan 12 00:00:00 1970 PST
913 | 303 | 00913_update3 | Wed Jan 14 00:00:00 1970 PST
914 | 4 | 00914 | Thu Jan 15 00:00:00 1970 PST
916 | 6 | 00916 | Sat Jan 17 00:00:00 1970 PST
917 | 407 | 00917_update7 | Sun Jan 18 00:00:00 1970 PST
918 | 8 | 00918 | Mon Jan 19 00:00:00 1970 PST
919 | 509 | 00919_update9 | Tue Jan 20 00:00:00 1970 PST
920 | 0 | 00920 | Wed Jan 21 00:00:00 1970 PST
921 | 1 | 00921 | Thu Jan 22 00:00:00 1970 PST
923 | 303 | 00923_update3 | Sat Jan 24 00:00:00 1970 PST
924 | 4 | 00924 | Sun Jan 25 00:00:00 1970 PST
926 | 6 | 00926 | Tue Jan 27 00:00:00 1970 PST
927 | 407 | 00927_update7 | Wed Jan 28 00:00:00 1970 PST
928 | 8 | 00928 | Thu Jan 29 00:00:00 1970 PST
929 | 509 | 00929_update9 | Fri Jan 30 00:00:00 1970 PST
930 | 0 | 00930 | Sat Jan 31 00:00:00 1970 PST
931 | 1 | 00931 | Sun Feb 01 00:00:00 1970 PST
933 | 303 | 00933_update3 | Tue Feb 03 00:00:00 1970 PST
934 | 4 | 00934 | Wed Feb 04 00:00:00 1970 PST
936 | 6 | 00936 | Fri Feb 06 00:00:00 1970 PST
937 | 407 | 00937_update7 | Sat Feb 07 00:00:00 1970 PST
938 | 8 | 00938 | Sun Feb 08 00:00:00 1970 PST
939 | 509 | 00939_update9 | Mon Feb 09 00:00:00 1970 PST
940 | 0 | 00940 | Tue Feb 10 00:00:00 1970 PST
941 | 1 | 00941 | Wed Feb 11 00:00:00 1970 PST
943 | 303 | 00943_update3 | Fri Feb 13 00:00:00 1970 PST
944 | 4 | 00944 | Sat Feb 14 00:00:00 1970 PST
946 | 6 | 00946 | Mon Feb 16 00:00:00 1970 PST
947 | 407 | 00947_update7 | Tue Feb 17 00:00:00 1970 PST
948 | 8 | 00948 | Wed Feb 18 00:00:00 1970 PST
949 | 509 | 00949_update9 | Thu Feb 19 00:00:00 1970 PST
950 | 0 | 00950 | Fri Feb 20 00:00:00 1970 PST
951 | 1 | 00951 | Sat Feb 21 00:00:00 1970 PST
953 | 303 | 00953_update3 | Mon Feb 23 00:00:00 1970 PST
954 | 4 | 00954 | Tue Feb 24 00:00:00 1970 PST
956 | 6 | 00956 | Thu Feb 26 00:00:00 1970 PST
957 | 407 | 00957_update7 | Fri Feb 27 00:00:00 1970 PST
958 | 8 | 00958 | Sat Feb 28 00:00:00 1970 PST
959 | 509 | 00959_update9 | Sun Mar 01 00:00:00 1970 PST
960 | 0 | 00960 | Mon Mar 02 00:00:00 1970 PST
961 | 1 | 00961 | Tue Mar 03 00:00:00 1970 PST
963 | 303 | 00963_update3 | Thu Mar 05 00:00:00 1970 PST
964 | 4 | 00964 | Fri Mar 06 00:00:00 1970 PST
966 | 6 | 00966 | Sun Mar 08 00:00:00 1970 PST
967 | 407 | 00967_update7 | Mon Mar 09 00:00:00 1970 PST
968 | 8 | 00968 | Tue Mar 10 00:00:00 1970 PST
969 | 509 | 00969_update9 | Wed Mar 11 00:00:00 1970 PST
970 | 0 | 00970 | Thu Mar 12 00:00:00 1970 PST
971 | 1 | 00971 | Fri Mar 13 00:00:00 1970 PST
973 | 303 | 00973_update3 | Sun Mar 15 00:00:00 1970 PST
974 | 4 | 00974 | Mon Mar 16 00:00:00 1970 PST
976 | 6 | 00976 | Wed Mar 18 00:00:00 1970 PST
977 | 407 | 00977_update7 | Thu Mar 19 00:00:00 1970 PST
978 | 8 | 00978 | Fri Mar 20 00:00:00 1970 PST
979 | 509 | 00979_update9 | Sat Mar 21 00:00:00 1970 PST
980 | 0 | 00980 | Sun Mar 22 00:00:00 1970 PST
981 | 1 | 00981 | Mon Mar 23 00:00:00 1970 PST
983 | 303 | 00983_update3 | Wed Mar 25 00:00:00 1970 PST
984 | 4 | 00984 | Thu Mar 26 00:00:00 1970 PST
986 | 6 | 00986 | Sat Mar 28 00:00:00 1970 PST
987 | 407 | 00987_update7 | Sun Mar 29 00:00:00 1970 PST
988 | 8 | 00988 | Mon Mar 30 00:00:00 1970 PST
989 | 509 | 00989_update9 | Tue Mar 31 00:00:00 1970 PST
990 | 0 | 00990 | Wed Apr 01 00:00:00 1970 PST
991 | 1 | 00991 | Thu Apr 02 00:00:00 1970 PST
993 | 303 | 00993_update3 | Sat Apr 04 00:00:00 1970 PST
994 | 4 | 00994 | Sun Apr 05 00:00:00 1970 PST
996 | 6 | 00996 | Tue Apr 07 00:00:00 1970 PST
997 | 407 | 00997_update7 | Wed Apr 08 00:00:00 1970 PST
998 | 8 | 00998 | Thu Apr 09 00:00:00 1970 PST
999 | 509 | 00999_update9 | Fri Apr 10 00:00:00 1970 PST
1000 | 0 | 01000 | Thu Jan 01 00:00:00 1970 PST
1001 | 101 | 0000100001 |
1003 | 403 | 0000300003_update3 |
1004 | 104 | 0000400004 |
1006 | 106 | 0000600006 |
1007 | 507 | 0000700007_update7 |
1008 | 108 | 0000800008 |
1009 | 609 | 0000900009_update9 |
1010 | 100 | 0001000010 |
1011 | 101 | 0001100011 |
1013 | 403 | 0001300013_update3 |
1014 | 104 | 0001400014 |
1016 | 106 | 0001600016 |
1017 | 507 | 0001700017_update7 |
1018 | 108 | 0001800018 |
1019 | 609 | 0001900019_update9 |
1020 | 100 | 0002000020 |
1101 | 201 | aaa |
1103 | 503 | ccc_update3 |
1104 | 204 | ddd |
(819 rows)
EXPLAIN (verbose, costs off)
INSERT INTO ft2 (c1,c2,c3) VALUES (1200,999,'foo') RETURNING tableoid::regclass;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Insert on public.ft2
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
Output: (ft2.tableoid)::regclass
Remote SQL: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
Batch Size: 1
-> Result
Output: 1200, 999, NULL::integer, 'foo'::text, NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft2 '::character(10), NULL::user_enum
(6 rows)
INSERT INTO ft2 (c1,c2,c3) VALUES (1200,999,'foo') RETURNING tableoid::regclass;
tableoid
----------
ft2
(1 row)
EXPLAIN (verbose, costs off)
UPDATE ft2 SET c3 = 'bar' WHERE c1 = 1200 RETURNING tableoid::regclass; -- can be pushed down
QUERY PLAN
------------------------------------------------------------------------------------
Update on public.ft2
Output: (tableoid)::regclass
-> Foreign Update on public.ft2
Remote SQL: UPDATE "S 1"."T 1" SET c3 = 'bar'::text WHERE (("C 1" = 1200))
(4 rows)
UPDATE ft2 SET c3 = 'bar' WHERE c1 = 1200 RETURNING tableoid::regclass;
tableoid
----------
ft2
(1 row)
EXPLAIN (verbose, costs off)
DELETE FROM ft2 WHERE c1 = 1200 RETURNING tableoid::regclass; -- can be pushed down
QUERY PLAN
--------------------------------------------------------------------
Delete on public.ft2
Output: (tableoid)::regclass
-> Foreign Delete on public.ft2
Remote SQL: DELETE FROM "S 1"."T 1" WHERE (("C 1" = 1200))
(4 rows)
DELETE FROM ft2 WHERE c1 = 1200 RETURNING tableoid::regclass;
tableoid
----------
ft2
(1 row)
-- Test UPDATE/DELETE with RETURNING on a three-table join
INSERT INTO ft2 (c1,c2,c3)
SELECT id, id - 1200, to_char(id, 'FM00000') FROM generate_series(1201, 1300) id;
EXPLAIN (verbose, costs off)
UPDATE ft2 SET c3 = 'foo'
FROM ft4 INNER JOIN ft5 ON (ft4.c1 = ft5.c1)
WHERE ft2.c1 > 1200 AND ft2.c2 = ft4.c1
RETURNING ft2, ft2.*, ft4, ft4.*; -- can be pushed down
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Update on public.ft2
Output: ft2.*, ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft4.*, ft4.c1, ft4.c2, ft4.c3
-> Foreign Update
Remote SQL: UPDATE "S 1"."T 1" r1 SET c3 = 'foo'::text FROM ("S 1"."T 3" r2 INNER JOIN "S 1"."T 4" r3 ON (TRUE)) WHERE ((r2.c1 = r3.c1)) AND ((r1.c2 = r2.c1)) AND ((r1."C 1" > 1200)) RETURNING r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, r2.c1, r2.c2, r2.c3
(4 rows)
UPDATE ft2 SET c3 = 'foo'
FROM ft4 INNER JOIN ft5 ON (ft4.c1 = ft5.c1)
WHERE ft2.c1 > 1200 AND ft2.c2 = ft4.c1
RETURNING ft2, ft2.*, ft4, ft4.*;
ft2 | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | ft4 | c1 | c2 | c3
--------------------------------+------+----+-----+----+----+----+------------+----+----------------+----+----+--------
(1206,6,foo,,,,"ft2 ",) | 1206 | 6 | foo | | | | ft2 | | (6,7,AAA006) | 6 | 7 | AAA006
(1212,12,foo,,,,"ft2 ",) | 1212 | 12 | foo | | | | ft2 | | (12,13,AAA012) | 12 | 13 | AAA012
(1218,18,foo,,,,"ft2 ",) | 1218 | 18 | foo | | | | ft2 | | (18,19,AAA018) | 18 | 19 | AAA018
(1224,24,foo,,,,"ft2 ",) | 1224 | 24 | foo | | | | ft2 | | (24,25,AAA024) | 24 | 25 | AAA024
(1230,30,foo,,,,"ft2 ",) | 1230 | 30 | foo | | | | ft2 | | (30,31,AAA030) | 30 | 31 | AAA030
(1236,36,foo,,,,"ft2 ",) | 1236 | 36 | foo | | | | ft2 | | (36,37,AAA036) | 36 | 37 | AAA036
(1242,42,foo,,,,"ft2 ",) | 1242 | 42 | foo | | | | ft2 | | (42,43,AAA042) | 42 | 43 | AAA042
(1248,48,foo,,,,"ft2 ",) | 1248 | 48 | foo | | | | ft2 | | (48,49,AAA048) | 48 | 49 | AAA048
(1254,54,foo,,,,"ft2 ",) | 1254 | 54 | foo | | | | ft2 | | (54,55,AAA054) | 54 | 55 | AAA054
(1260,60,foo,,,,"ft2 ",) | 1260 | 60 | foo | | | | ft2 | | (60,61,AAA060) | 60 | 61 | AAA060
(1266,66,foo,,,,"ft2 ",) | 1266 | 66 | foo | | | | ft2 | | (66,67,AAA066) | 66 | 67 | AAA066
(1272,72,foo,,,,"ft2 ",) | 1272 | 72 | foo | | | | ft2 | | (72,73,AAA072) | 72 | 73 | AAA072
(1278,78,foo,,,,"ft2 ",) | 1278 | 78 | foo | | | | ft2 | | (78,79,AAA078) | 78 | 79 | AAA078
(1284,84,foo,,,,"ft2 ",) | 1284 | 84 | foo | | | | ft2 | | (84,85,AAA084) | 84 | 85 | AAA084
(1290,90,foo,,,,"ft2 ",) | 1290 | 90 | foo | | | | ft2 | | (90,91,AAA090) | 90 | 91 | AAA090
(1296,96,foo,,,,"ft2 ",) | 1296 | 96 | foo | | | | ft2 | | (96,97,AAA096) | 96 | 97 | AAA096
(16 rows)
EXPLAIN (verbose, costs off)
DELETE FROM ft2
USING ft4 LEFT JOIN ft5 ON (ft4.c1 = ft5.c1)
WHERE ft2.c1 > 1200 AND ft2.c1 % 10 = 0 AND ft2.c2 = ft4.c1
RETURNING 100; -- can be pushed down
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Delete on public.ft2
Output: 100
-> Foreign Delete
Remote SQL: DELETE FROM "S 1"."T 1" r1 USING ("S 1"."T 3" r2 LEFT JOIN "S 1"."T 4" r3 ON (((r2.c1 = r3.c1)))) WHERE ((r1.c2 = r2.c1)) AND ((r1."C 1" > 1200)) AND (((r1."C 1" % 10) = 0))
(4 rows)
DELETE FROM ft2
USING ft4 LEFT JOIN ft5 ON (ft4.c1 = ft5.c1)
WHERE ft2.c1 > 1200 AND ft2.c1 % 10 = 0 AND ft2.c2 = ft4.c1
RETURNING 100;
?column?
----------
100
100
100
100
100
100
100
100
100
100
(10 rows)
DELETE FROM ft2 WHERE ft2.c1 > 1200;
-- Test UPDATE with a MULTIEXPR sub-select
-- (maybe someday this'll be remotely executable, but not today)
EXPLAIN (verbose, costs off)
UPDATE ft2 AS target SET (c2, c7) = (
SELECT c2 * 10, c7
FROM ft2 AS src
WHERE target.c1 = src.c1
) WHERE c1 > 1100;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
Update on public.ft2 target
Remote SQL: UPDATE "S 1"."T 1" SET c2 = $2, c7 = $3 WHERE ctid = $1
-> Foreign Scan on public.ft2 target
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: $1, $2, (SubPlan 1 (returns $1,$2)), target.ctid, target.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8, ctid FROM "S 1"."T 1" WHERE (("C 1" > 1100)) FOR UPDATE
SubPlan 1 (returns $1,$2)
-> Foreign Scan on public.ft2 src
Output: (src.c2 * 10), src.c7
Remote SQL: SELECT c2, c7 FROM "S 1"."T 1" WHERE (($1::integer = "C 1"))
(9 rows)
UPDATE ft2 AS target SET (c2, c7) = (
SELECT c2 * 10, c7
FROM ft2 AS src
WHERE target.c1 = src.c1
) WHERE c1 > 1100;
UPDATE ft2 AS target SET (c2) = (
SELECT c2 / 10
FROM ft2 AS src
WHERE target.c1 = src.c1
) WHERE c1 > 1100;
-- Test UPDATE involving a join that can be pushed down,
-- but a SET clause that can't be
EXPLAIN (VERBOSE, COSTS OFF)
UPDATE ft2 d SET c2 = CASE WHEN random() >= 0 THEN d.c2 ELSE 0 END
FROM ft2 AS t WHERE d.c1 = t.c1 AND d.c1 > 1000;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Update on public.ft2 d
Remote SQL: UPDATE "S 1"."T 1" SET c2 = $2 WHERE ctid = $1
-> Foreign Scan
Output: CASE WHEN (random() >= '0'::double precision) THEN d.c2 ELSE 0 END, d.ctid, d.*, t.*
Relations: (public.ft2 d) INNER JOIN (public.ft2 t)
Remote SQL: SELECT r1.c2, r1.ctid, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")) AND ((r1."C 1" > 1000)))) FOR UPDATE OF r1
-> Hash Join
Output: d.c2, d.ctid, d.*, t.*
Hash Cond: (d.c1 = t.c1)
-> Foreign Scan on public.ft2 d
Output: d.c2, d.ctid, d.*, d.c1
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8, ctid FROM "S 1"."T 1" WHERE (("C 1" > 1000)) ORDER BY "C 1" ASC NULLS LAST FOR UPDATE
-> Hash
Output: t.*, t.c1
-> Foreign Scan on public.ft2 t
Output: t.*, t.c1
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(17 rows)
UPDATE ft2 d SET c2 = CASE WHEN random() >= 0 THEN d.c2 ELSE 0 END
FROM ft2 AS t WHERE d.c1 = t.c1 AND d.c1 > 1000;
-- Test UPDATE/DELETE with WHERE or JOIN/ON conditions containing
-- user-defined operators/functions
ALTER SERVER loopback OPTIONS (DROP extensions);
INSERT INTO ft2 (c1,c2,c3)
SELECT id, id % 10, to_char(id, 'FM00000') FROM generate_series(2001, 2010) id;
EXPLAIN (verbose, costs off)
UPDATE ft2 SET c3 = 'bar' WHERE postgres_fdw_abs(c1) > 2000 RETURNING *; -- can't be pushed down
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Update on public.ft2
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: UPDATE "S 1"."T 1" SET c3 = $2 WHERE ctid = $1 RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8
-> Foreign Scan on public.ft2
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: 'bar'::text, ctid, ft2.*
Filter: (postgres_fdw_abs(ft2.c1) > 2000)
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8, ctid FROM "S 1"."T 1" FOR UPDATE
(7 rows)
UPDATE ft2 SET c3 = 'bar' WHERE postgres_fdw_abs(c1) > 2000 RETURNING *;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+----+-----+----+----+----+------------+----
2001 | 1 | bar | | | | ft2 |
2002 | 2 | bar | | | | ft2 |
2003 | 3 | bar | | | | ft2 |
2004 | 4 | bar | | | | ft2 |
2005 | 5 | bar | | | | ft2 |
2006 | 6 | bar | | | | ft2 |
2007 | 7 | bar | | | | ft2 |
2008 | 8 | bar | | | | ft2 |
2009 | 9 | bar | | | | ft2 |
2010 | 0 | bar | | | | ft2 |
(10 rows)
EXPLAIN (verbose, costs off)
UPDATE ft2 SET c3 = 'baz'
FROM ft4 INNER JOIN ft5 ON (ft4.c1 = ft5.c1)
WHERE ft2.c1 > 2000 AND ft2.c2 === ft4.c1
RETURNING ft2.*, ft4.*, ft5.*; -- can't be pushed down
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Update on public.ft2
Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft4.c1, ft4.c2, ft4.c3, ft5.c1, ft5.c2, ft5.c3
Remote SQL: UPDATE "S 1"."T 1" SET c3 = $2 WHERE ctid = $1 RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8
-> Nested Loop
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: 'baz'::text, ft2.ctid, ft2.*, ft4.*, ft5.*, ft4.c1, ft4.c2, ft4.c3, ft5.c1, ft5.c2, ft5.c3
Join Filter: (ft2.c2 === ft4.c1)
-> Foreign Scan on public.ft2
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: ft2.ctid, ft2.*, ft2.c2
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8, ctid FROM "S 1"."T 1" WHERE (("C 1" > 2000)) FOR UPDATE
-> Foreign Scan
Output: ft4.*, ft4.c1, ft4.c2, ft4.c3, ft5.*, ft5.c1, ft5.c2, ft5.c3
Relations: (public.ft4) INNER JOIN (public.ft5)
Remote SQL: SELECT CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, r2.c1, r2.c2, r2.c3, CASE WHEN (r3.*)::text IS NOT NULL THEN ROW(r3.c1, r3.c2, r3.c3) END, r3.c1, r3.c2, r3.c3 FROM ("S 1"."T 3" r2 INNER JOIN "S 1"."T 4" r3 ON (((r2.c1 = r3.c1))))
-> Hash Join
Output: ft4.*, ft4.c1, ft4.c2, ft4.c3, ft5.*, ft5.c1, ft5.c2, ft5.c3
Hash Cond: (ft4.c1 = ft5.c1)
-> Foreign Scan on public.ft4
Output: ft4.*, ft4.c1, ft4.c2, ft4.c3
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3"
-> Hash
Output: ft5.*, ft5.c1, ft5.c2, ft5.c3
-> Foreign Scan on public.ft5
Output: ft5.*, ft5.c1, ft5.c2, ft5.c3
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4"
(24 rows)
UPDATE ft2 SET c3 = 'baz'
FROM ft4 INNER JOIN ft5 ON (ft4.c1 = ft5.c1)
WHERE ft2.c1 > 2000 AND ft2.c2 === ft4.c1
RETURNING ft2.*, ft4.*, ft5.*;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c1 | c2 | c3 | c1 | c2 | c3
------+----+-----+----+----+----+------------+----+----+----+--------+----+----+--------
2006 | 6 | baz | | | | ft2 | | 6 | 7 | AAA006 | 6 | 7 | AAA006
(1 row)
EXPLAIN (verbose, costs off)
DELETE FROM ft2
USING ft4 INNER JOIN ft5 ON (ft4.c1 === ft5.c1)
WHERE ft2.c1 > 2000 AND ft2.c2 = ft4.c1
RETURNING ft2.c1, ft2.c2, ft2.c3; -- can't be pushed down
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Delete on public.ft2
Output: ft2.c1, ft2.c2, ft2.c3
Remote SQL: DELETE FROM "S 1"."T 1" WHERE ctid = $1 RETURNING "C 1", c2, c3
-> Foreign Scan
Output: ft2.ctid, ft4.*, ft5.*
Filter: (ft4.c1 === ft5.c1)
Relations: ((public.ft2) INNER JOIN (public.ft4)) INNER JOIN (public.ft5)
Remote SQL: SELECT r1.ctid, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, CASE WHEN (r3.*)::text IS NOT NULL THEN ROW(r3.c1, r3.c2, r3.c3) END, r2.c1, r3.c1 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 3" r2 ON (((r1.c2 = r2.c1)) AND ((r1."C 1" > 2000)))) INNER JOIN "S 1"."T 4" r3 ON (TRUE)) FOR UPDATE OF r1
-> Nested Loop
Output: ft2.ctid, ft4.*, ft5.*, ft4.c1, ft5.c1
-> Nested Loop
Output: ft2.ctid, ft4.*, ft4.c1
Join Filter: (ft2.c2 = ft4.c1)
-> Foreign Scan on public.ft2
Output: ft2.ctid, ft2.c2
Remote SQL: SELECT c2, ctid FROM "S 1"."T 1" WHERE (("C 1" > 2000)) FOR UPDATE
-> Foreign Scan on public.ft4
Output: ft4.*, ft4.c1
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3"
-> Foreign Scan on public.ft5
Output: ft5.*, ft5.c1
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4"
(22 rows)
DELETE FROM ft2
USING ft4 INNER JOIN ft5 ON (ft4.c1 === ft5.c1)
WHERE ft2.c1 > 2000 AND ft2.c2 = ft4.c1
RETURNING ft2.c1, ft2.c2, ft2.c3;
c1 | c2 | c3
------+----+-----
2006 | 6 | baz
(1 row)
DELETE FROM ft2 WHERE ft2.c1 > 2000;
ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw');
-- Test that trigger on remote table works as expected
CREATE OR REPLACE FUNCTION "S 1".F_BRTRIG() RETURNS trigger AS $$
BEGIN
NEW.c3 = NEW.c3 || '_trig_update';
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER t1_br_insert BEFORE INSERT OR UPDATE
ON "S 1"."T 1" FOR EACH ROW EXECUTE PROCEDURE "S 1".F_BRTRIG();
INSERT INTO ft2 (c1,c2,c3) VALUES (1208, 818, 'fff') RETURNING *;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+-----------------+----+----+----+------------+----
1208 | 818 | fff_trig_update | | | | ft2 |
(1 row)
INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+-----------------+----+----+------+------------+----
1218 | 818 | ggg_trig_update | | | (--; | ft2 |
(1 row)
UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+------------------------+------------------------------+--------------------------+----+------------+-----
8 | 608 | 00008_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
18 | 608 | 00018_trig_update | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo
28 | 608 | 00028_trig_update | Thu Jan 29 00:00:00 1970 PST | Thu Jan 29 00:00:00 1970 | 8 | 8 | foo
38 | 608 | 00038_trig_update | Sun Feb 08 00:00:00 1970 PST | Sun Feb 08 00:00:00 1970 | 8 | 8 | foo
48 | 608 | 00048_trig_update | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8 | 8 | foo
58 | 608 | 00058_trig_update | Sat Feb 28 00:00:00 1970 PST | Sat Feb 28 00:00:00 1970 | 8 | 8 | foo
68 | 608 | 00068_trig_update | Tue Mar 10 00:00:00 1970 PST | Tue Mar 10 00:00:00 1970 | 8 | 8 | foo
78 | 608 | 00078_trig_update | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8 | 8 | foo
88 | 608 | 00088_trig_update | Mon Mar 30 00:00:00 1970 PST | Mon Mar 30 00:00:00 1970 | 8 | 8 | foo
98 | 608 | 00098_trig_update | Thu Apr 09 00:00:00 1970 PST | Thu Apr 09 00:00:00 1970 | 8 | 8 | foo
108 | 608 | 00108_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
118 | 608 | 00118_trig_update | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo
128 | 608 | 00128_trig_update | Thu Jan 29 00:00:00 1970 PST | Thu Jan 29 00:00:00 1970 | 8 | 8 | foo
138 | 608 | 00138_trig_update | Sun Feb 08 00:00:00 1970 PST | Sun Feb 08 00:00:00 1970 | 8 | 8 | foo
148 | 608 | 00148_trig_update | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8 | 8 | foo
158 | 608 | 00158_trig_update | Sat Feb 28 00:00:00 1970 PST | Sat Feb 28 00:00:00 1970 | 8 | 8 | foo
168 | 608 | 00168_trig_update | Tue Mar 10 00:00:00 1970 PST | Tue Mar 10 00:00:00 1970 | 8 | 8 | foo
178 | 608 | 00178_trig_update | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8 | 8 | foo
188 | 608 | 00188_trig_update | Mon Mar 30 00:00:00 1970 PST | Mon Mar 30 00:00:00 1970 | 8 | 8 | foo
198 | 608 | 00198_trig_update | Thu Apr 09 00:00:00 1970 PST | Thu Apr 09 00:00:00 1970 | 8 | 8 | foo
208 | 608 | 00208_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
218 | 608 | 00218_trig_update | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo
228 | 608 | 00228_trig_update | Thu Jan 29 00:00:00 1970 PST | Thu Jan 29 00:00:00 1970 | 8 | 8 | foo
238 | 608 | 00238_trig_update | Sun Feb 08 00:00:00 1970 PST | Sun Feb 08 00:00:00 1970 | 8 | 8 | foo
248 | 608 | 00248_trig_update | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8 | 8 | foo
258 | 608 | 00258_trig_update | Sat Feb 28 00:00:00 1970 PST | Sat Feb 28 00:00:00 1970 | 8 | 8 | foo
268 | 608 | 00268_trig_update | Tue Mar 10 00:00:00 1970 PST | Tue Mar 10 00:00:00 1970 | 8 | 8 | foo
278 | 608 | 00278_trig_update | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8 | 8 | foo
288 | 608 | 00288_trig_update | Mon Mar 30 00:00:00 1970 PST | Mon Mar 30 00:00:00 1970 | 8 | 8 | foo
298 | 608 | 00298_trig_update | Thu Apr 09 00:00:00 1970 PST | Thu Apr 09 00:00:00 1970 | 8 | 8 | foo
308 | 608 | 00308_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
318 | 608 | 00318_trig_update | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo
328 | 608 | 00328_trig_update | Thu Jan 29 00:00:00 1970 PST | Thu Jan 29 00:00:00 1970 | 8 | 8 | foo
338 | 608 | 00338_trig_update | Sun Feb 08 00:00:00 1970 PST | Sun Feb 08 00:00:00 1970 | 8 | 8 | foo
348 | 608 | 00348_trig_update | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8 | 8 | foo
358 | 608 | 00358_trig_update | Sat Feb 28 00:00:00 1970 PST | Sat Feb 28 00:00:00 1970 | 8 | 8 | foo
368 | 608 | 00368_trig_update | Tue Mar 10 00:00:00 1970 PST | Tue Mar 10 00:00:00 1970 | 8 | 8 | foo
378 | 608 | 00378_trig_update | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8 | 8 | foo
388 | 608 | 00388_trig_update | Mon Mar 30 00:00:00 1970 PST | Mon Mar 30 00:00:00 1970 | 8 | 8 | foo
398 | 608 | 00398_trig_update | Thu Apr 09 00:00:00 1970 PST | Thu Apr 09 00:00:00 1970 | 8 | 8 | foo
408 | 608 | 00408_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
418 | 608 | 00418_trig_update | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo
428 | 608 | 00428_trig_update | Thu Jan 29 00:00:00 1970 PST | Thu Jan 29 00:00:00 1970 | 8 | 8 | foo
438 | 608 | 00438_trig_update | Sun Feb 08 00:00:00 1970 PST | Sun Feb 08 00:00:00 1970 | 8 | 8 | foo
448 | 608 | 00448_trig_update | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8 | 8 | foo
458 | 608 | 00458_trig_update | Sat Feb 28 00:00:00 1970 PST | Sat Feb 28 00:00:00 1970 | 8 | 8 | foo
468 | 608 | 00468_trig_update | Tue Mar 10 00:00:00 1970 PST | Tue Mar 10 00:00:00 1970 | 8 | 8 | foo
478 | 608 | 00478_trig_update | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8 | 8 | foo
488 | 608 | 00488_trig_update | Mon Mar 30 00:00:00 1970 PST | Mon Mar 30 00:00:00 1970 | 8 | 8 | foo
498 | 608 | 00498_trig_update | Thu Apr 09 00:00:00 1970 PST | Thu Apr 09 00:00:00 1970 | 8 | 8 | foo
508 | 608 | 00508_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
518 | 608 | 00518_trig_update | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo
528 | 608 | 00528_trig_update | Thu Jan 29 00:00:00 1970 PST | Thu Jan 29 00:00:00 1970 | 8 | 8 | foo
538 | 608 | 00538_trig_update | Sun Feb 08 00:00:00 1970 PST | Sun Feb 08 00:00:00 1970 | 8 | 8 | foo
548 | 608 | 00548_trig_update | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8 | 8 | foo
558 | 608 | 00558_trig_update | Sat Feb 28 00:00:00 1970 PST | Sat Feb 28 00:00:00 1970 | 8 | 8 | foo
568 | 608 | 00568_trig_update | Tue Mar 10 00:00:00 1970 PST | Tue Mar 10 00:00:00 1970 | 8 | 8 | foo
578 | 608 | 00578_trig_update | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8 | 8 | foo
588 | 608 | 00588_trig_update | Mon Mar 30 00:00:00 1970 PST | Mon Mar 30 00:00:00 1970 | 8 | 8 | foo
598 | 608 | 00598_trig_update | Thu Apr 09 00:00:00 1970 PST | Thu Apr 09 00:00:00 1970 | 8 | 8 | foo
608 | 608 | 00608_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
618 | 608 | 00618_trig_update | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo
628 | 608 | 00628_trig_update | Thu Jan 29 00:00:00 1970 PST | Thu Jan 29 00:00:00 1970 | 8 | 8 | foo
638 | 608 | 00638_trig_update | Sun Feb 08 00:00:00 1970 PST | Sun Feb 08 00:00:00 1970 | 8 | 8 | foo
648 | 608 | 00648_trig_update | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8 | 8 | foo
658 | 608 | 00658_trig_update | Sat Feb 28 00:00:00 1970 PST | Sat Feb 28 00:00:00 1970 | 8 | 8 | foo
668 | 608 | 00668_trig_update | Tue Mar 10 00:00:00 1970 PST | Tue Mar 10 00:00:00 1970 | 8 | 8 | foo
678 | 608 | 00678_trig_update | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8 | 8 | foo
688 | 608 | 00688_trig_update | Mon Mar 30 00:00:00 1970 PST | Mon Mar 30 00:00:00 1970 | 8 | 8 | foo
698 | 608 | 00698_trig_update | Thu Apr 09 00:00:00 1970 PST | Thu Apr 09 00:00:00 1970 | 8 | 8 | foo
708 | 608 | 00708_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
718 | 608 | 00718_trig_update | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo
728 | 608 | 00728_trig_update | Thu Jan 29 00:00:00 1970 PST | Thu Jan 29 00:00:00 1970 | 8 | 8 | foo
738 | 608 | 00738_trig_update | Sun Feb 08 00:00:00 1970 PST | Sun Feb 08 00:00:00 1970 | 8 | 8 | foo
748 | 608 | 00748_trig_update | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8 | 8 | foo
758 | 608 | 00758_trig_update | Sat Feb 28 00:00:00 1970 PST | Sat Feb 28 00:00:00 1970 | 8 | 8 | foo
768 | 608 | 00768_trig_update | Tue Mar 10 00:00:00 1970 PST | Tue Mar 10 00:00:00 1970 | 8 | 8 | foo
778 | 608 | 00778_trig_update | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8 | 8 | foo
788 | 608 | 00788_trig_update | Mon Mar 30 00:00:00 1970 PST | Mon Mar 30 00:00:00 1970 | 8 | 8 | foo
798 | 608 | 00798_trig_update | Thu Apr 09 00:00:00 1970 PST | Thu Apr 09 00:00:00 1970 | 8 | 8 | foo
808 | 608 | 00808_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
818 | 608 | 00818_trig_update | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo
828 | 608 | 00828_trig_update | Thu Jan 29 00:00:00 1970 PST | Thu Jan 29 00:00:00 1970 | 8 | 8 | foo
838 | 608 | 00838_trig_update | Sun Feb 08 00:00:00 1970 PST | Sun Feb 08 00:00:00 1970 | 8 | 8 | foo
848 | 608 | 00848_trig_update | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8 | 8 | foo
858 | 608 | 00858_trig_update | Sat Feb 28 00:00:00 1970 PST | Sat Feb 28 00:00:00 1970 | 8 | 8 | foo
868 | 608 | 00868_trig_update | Tue Mar 10 00:00:00 1970 PST | Tue Mar 10 00:00:00 1970 | 8 | 8 | foo
878 | 608 | 00878_trig_update | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8 | 8 | foo
888 | 608 | 00888_trig_update | Mon Mar 30 00:00:00 1970 PST | Mon Mar 30 00:00:00 1970 | 8 | 8 | foo
898 | 608 | 00898_trig_update | Thu Apr 09 00:00:00 1970 PST | Thu Apr 09 00:00:00 1970 | 8 | 8 | foo
908 | 608 | 00908_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
918 | 608 | 00918_trig_update | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo
928 | 608 | 00928_trig_update | Thu Jan 29 00:00:00 1970 PST | Thu Jan 29 00:00:00 1970 | 8 | 8 | foo
938 | 608 | 00938_trig_update | Sun Feb 08 00:00:00 1970 PST | Sun Feb 08 00:00:00 1970 | 8 | 8 | foo
948 | 608 | 00948_trig_update | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8 | 8 | foo
958 | 608 | 00958_trig_update | Sat Feb 28 00:00:00 1970 PST | Sat Feb 28 00:00:00 1970 | 8 | 8 | foo
968 | 608 | 00968_trig_update | Tue Mar 10 00:00:00 1970 PST | Tue Mar 10 00:00:00 1970 | 8 | 8 | foo
978 | 608 | 00978_trig_update | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8 | 8 | foo
988 | 608 | 00988_trig_update | Mon Mar 30 00:00:00 1970 PST | Mon Mar 30 00:00:00 1970 | 8 | 8 | foo
998 | 608 | 00998_trig_update | Thu Apr 09 00:00:00 1970 PST | Thu Apr 09 00:00:00 1970 | 8 | 8 | foo
1008 | 708 | 0000800008_trig_update | | | | ft2 |
1018 | 708 | 0001800018_trig_update | | | | ft2 |
(102 rows)
-- Test errors thrown on remote side during update
ALTER TABLE "S 1"."T 1" ADD CONSTRAINT c2positive CHECK (c2 >= 0);
INSERT INTO ft1(c1, c2) VALUES(11, 12); -- duplicate key
ERROR: duplicate key value violates unique constraint "t1_pkey"
DETAIL: Key ("C 1")=(11) already exists.
CONTEXT: remote SQL command: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT DO NOTHING; -- works
INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT (c1, c2) DO NOTHING; -- unsupported
ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT (c1, c2) DO UPDATE SET c3 = 'ffg'; -- unsupported
ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
INSERT INTO ft1(c1, c2) VALUES(1111, -2); -- c2positive
ERROR: new row for relation "T 1" violates check constraint "c2positive"
DETAIL: Failing row contains (1111, -2, null, null, null, null, ft1 , null).
CONTEXT: remote SQL command: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
UPDATE ft1 SET c2 = -c2 WHERE c1 = 1; -- c2positive
ERROR: new row for relation "T 1" violates check constraint "c2positive"
DETAIL: Failing row contains (1, -1, 00001_trig_update, 1970-01-02 08:00:00+00, 1970-01-02 00:00:00, 1, 1 , foo).
CONTEXT: remote SQL command: UPDATE "S 1"."T 1" SET c2 = (- c2) WHERE (("C 1" = 1))
-- Test savepoint/rollback behavior
select c2, count(*) from ft2 where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
0 | 100
1 | 100
4 | 100
6 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
select c2, count(*) from "S 1"."T 1" where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
0 | 100
1 | 100
4 | 100
6 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
begin;
update ft2 set c2 = 42 where c2 = 0;
select c2, count(*) from ft2 where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
1 | 100
4 | 100
6 | 100
42 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
savepoint s1;
update ft2 set c2 = 44 where c2 = 4;
select c2, count(*) from ft2 where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
1 | 100
6 | 100
42 | 100
44 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
release savepoint s1;
select c2, count(*) from ft2 where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
1 | 100
6 | 100
42 | 100
44 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
savepoint s2;
update ft2 set c2 = 46 where c2 = 6;
select c2, count(*) from ft2 where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
1 | 100
42 | 100
44 | 100
46 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
rollback to savepoint s2;
select c2, count(*) from ft2 where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
1 | 100
6 | 100
42 | 100
44 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
release savepoint s2;
select c2, count(*) from ft2 where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
1 | 100
6 | 100
42 | 100
44 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
savepoint s3;
update ft2 set c2 = -2 where c2 = 42 and c1 = 10; -- fail on remote side
ERROR: new row for relation "T 1" violates check constraint "c2positive"
DETAIL: Failing row contains (10, -2, 00010_trig_update_trig_update, 1970-01-11 08:00:00+00, 1970-01-11 00:00:00, 0, 0 , foo).
CONTEXT: remote SQL command: UPDATE "S 1"."T 1" SET c2 = (-2) WHERE ((c2 = 42)) AND (("C 1" = 10))
rollback to savepoint s3;
select c2, count(*) from ft2 where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
1 | 100
6 | 100
42 | 100
44 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
release savepoint s3;
select c2, count(*) from ft2 where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
1 | 100
6 | 100
42 | 100
44 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
-- none of the above is committed yet remotely
select c2, count(*) from "S 1"."T 1" where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
0 | 100
1 | 100
4 | 100
6 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
commit;
select c2, count(*) from ft2 where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
1 | 100
6 | 100
42 | 100
44 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
select c2, count(*) from "S 1"."T 1" where c2 < 500 group by 1 order by 1;
c2 | count
-----+-------
1 | 100
6 | 100
42 | 100
44 | 100
100 | 2
101 | 2
104 | 2
106 | 2
201 | 1
204 | 1
303 | 100
403 | 2
407 | 100
(13 rows)
VACUUM ANALYZE "S 1"."T 1";
-- Above DMLs add data with c6 as NULL in ft1, so test ORDER BY NULLS LAST and NULLs
-- FIRST behavior here.
-- ORDER BY DESC NULLS LAST options
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 ORDER BY c6 DESC NULLS LAST, c1 OFFSET 795 LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY c6 DESC NULLS LAST, "C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 795::bigint
(3 rows)
SELECT * FROM ft1 ORDER BY c6 DESC NULLS LAST, c1 OFFSET 795 LIMIT 10;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+--------------------+------------------------------+--------------------------+------+------------+-----
960 | 42 | 00960_trig_update | Mon Mar 02 00:00:00 1970 PST | Mon Mar 02 00:00:00 1970 | 0 | 0 | foo
970 | 42 | 00970_trig_update | Thu Mar 12 00:00:00 1970 PST | Thu Mar 12 00:00:00 1970 | 0 | 0 | foo
980 | 42 | 00980_trig_update | Sun Mar 22 00:00:00 1970 PST | Sun Mar 22 00:00:00 1970 | 0 | 0 | foo
990 | 42 | 00990_trig_update | Wed Apr 01 00:00:00 1970 PST | Wed Apr 01 00:00:00 1970 | 0 | 0 | foo
1000 | 42 | 01000_trig_update | Thu Jan 01 00:00:00 1970 PST | Thu Jan 01 00:00:00 1970 | 0 | 0 | foo
1218 | 818 | ggg_trig_update | | | (--; | ft2 |
1001 | 101 | 0000100001 | | | | ft2 |
1003 | 403 | 0000300003_update3 | | | | ft2 |
1004 | 104 | 0000400004 | | | | ft2 |
1006 | 106 | 0000600006 | | | | ft2 |
(10 rows)
-- ORDER BY DESC NULLS FIRST options
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 ORDER BY c6 DESC NULLS FIRST, c1 OFFSET 15 LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY c6 DESC NULLS FIRST, "C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 15::bigint
(3 rows)
SELECT * FROM ft1 ORDER BY c6 DESC NULLS FIRST, c1 OFFSET 15 LIMIT 10;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+-----------------+------------------------------+--------------------------+----+------------+-----
1020 | 100 | 0002000020 | | | | ft2 |
1101 | 201 | aaa | | | | ft2 |
1103 | 503 | ccc_update3 | | | | ft2 |
1104 | 204 | ddd | | | | ft2 |
1208 | 818 | fff_trig_update | | | | ft2 |
9 | 509 | 00009_update9 | Sat Jan 10 00:00:00 1970 PST | Sat Jan 10 00:00:00 1970 | 9 | ft2 | foo
19 | 509 | 00019_update9 | Tue Jan 20 00:00:00 1970 PST | Tue Jan 20 00:00:00 1970 | 9 | ft2 | foo
29 | 509 | 00029_update9 | Fri Jan 30 00:00:00 1970 PST | Fri Jan 30 00:00:00 1970 | 9 | ft2 | foo
39 | 509 | 00039_update9 | Mon Feb 09 00:00:00 1970 PST | Mon Feb 09 00:00:00 1970 | 9 | ft2 | foo
49 | 509 | 00049_update9 | Thu Feb 19 00:00:00 1970 PST | Thu Feb 19 00:00:00 1970 | 9 | ft2 | foo
(10 rows)
-- ORDER BY ASC NULLS FIRST options
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 ORDER BY c6 ASC NULLS FIRST, c1 OFFSET 15 LIMIT 10;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY c6 ASC NULLS FIRST, "C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 15::bigint
(3 rows)
SELECT * FROM ft1 ORDER BY c6 ASC NULLS FIRST, c1 OFFSET 15 LIMIT 10;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+-------------------+------------------------------+--------------------------+------+------------+-----
1020 | 100 | 0002000020 | | | | ft2 |
1101 | 201 | aaa | | | | ft2 |
1103 | 503 | ccc_update3 | | | | ft2 |
1104 | 204 | ddd | | | | ft2 |
1208 | 818 | fff_trig_update | | | | ft2 |
1218 | 818 | ggg_trig_update | | | (--; | ft2 |
10 | 42 | 00010_trig_update | Sun Jan 11 00:00:00 1970 PST | Sun Jan 11 00:00:00 1970 | 0 | 0 | foo
20 | 42 | 00020_trig_update | Wed Jan 21 00:00:00 1970 PST | Wed Jan 21 00:00:00 1970 | 0 | 0 | foo
30 | 42 | 00030_trig_update | Sat Jan 31 00:00:00 1970 PST | Sat Jan 31 00:00:00 1970 | 0 | 0 | foo
40 | 42 | 00040_trig_update | Tue Feb 10 00:00:00 1970 PST | Tue Feb 10 00:00:00 1970 | 0 | 0 | foo
(10 rows)
-- ===================================================================
-- test check constraints
-- ===================================================================
-- Consistent check constraints provide consistent results
ALTER FOREIGN TABLE ft1 ADD CONSTRAINT ft1_c2positive CHECK (c2 >= 0);
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT count(*) FROM ft1 WHERE c2 < 0;
QUERY PLAN
-----------------------------------------------------------------
Foreign Scan
Output: (count(*))
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT count(*) FROM "S 1"."T 1" WHERE ((c2 < 0))
(4 rows)
SELECT count(*) FROM ft1 WHERE c2 < 0;
count
-------
0
(1 row)
SET constraint_exclusion = 'on';
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT count(*) FROM ft1 WHERE c2 < 0;
QUERY PLAN
--------------------------------
Aggregate
Output: count(*)
-> Result
One-Time Filter: false
(4 rows)
SELECT count(*) FROM ft1 WHERE c2 < 0;
count
-------
0
(1 row)
RESET constraint_exclusion;
-- check constraint is enforced on the remote side, not locally
INSERT INTO ft1(c1, c2) VALUES(1111, -2); -- c2positive
ERROR: new row for relation "T 1" violates check constraint "c2positive"
DETAIL: Failing row contains (1111, -2, null, null, null, null, ft1 , null).
CONTEXT: remote SQL command: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
UPDATE ft1 SET c2 = -c2 WHERE c1 = 1; -- c2positive
ERROR: new row for relation "T 1" violates check constraint "c2positive"
DETAIL: Failing row contains (1, -1, 00001_trig_update, 1970-01-02 08:00:00+00, 1970-01-02 00:00:00, 1, 1 , foo).
CONTEXT: remote SQL command: UPDATE "S 1"."T 1" SET c2 = (- c2) WHERE (("C 1" = 1))
ALTER FOREIGN TABLE ft1 DROP CONSTRAINT ft1_c2positive;
-- But inconsistent check constraints provide inconsistent results
ALTER FOREIGN TABLE ft1 ADD CONSTRAINT ft1_c2negative CHECK (c2 < 0);
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT count(*) FROM ft1 WHERE c2 >= 0;
QUERY PLAN
------------------------------------------------------------------
Foreign Scan
Output: (count(*))
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT count(*) FROM "S 1"."T 1" WHERE ((c2 >= 0))
(4 rows)
SELECT count(*) FROM ft1 WHERE c2 >= 0;
count
-------
821
(1 row)
SET constraint_exclusion = 'on';
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
EXPLAIN (VERBOSE, COSTS OFF) SELECT count(*) FROM ft1 WHERE c2 >= 0;
QUERY PLAN
--------------------------------
Aggregate
Output: count(*)
-> Result
One-Time Filter: false
(4 rows)
SELECT count(*) FROM ft1 WHERE c2 >= 0;
count
-------
0
(1 row)
RESET constraint_exclusion;
-- local check constraint is not actually enforced
INSERT INTO ft1(c1, c2) VALUES(1111, 2);
UPDATE ft1 SET c2 = c2 + 1 WHERE c1 = 1;
ALTER FOREIGN TABLE ft1 DROP CONSTRAINT ft1_c2negative;
-- ===================================================================
-- test WITH CHECK OPTION constraints
-- ===================================================================
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
CREATE FUNCTION row_before_insupd_trigfunc() RETURNS trigger AS $$BEGIN NEW.a := NEW.a + 10; RETURN NEW; END$$ LANGUAGE plpgsql;
CREATE TABLE base_tbl (a int, b int);
ALTER TABLE base_tbl SET (autovacuum_enabled = 'false');
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
CREATE TRIGGER row_before_insupd_trigger BEFORE INSERT OR UPDATE ON base_tbl FOR EACH ROW EXECUTE PROCEDURE row_before_insupd_trigfunc();
CREATE FOREIGN TABLE foreign_tbl (a int, b int)
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
SERVER loopback OPTIONS (table_name 'base_tbl');
CREATE VIEW rw_view AS SELECT * FROM foreign_tbl
WHERE a < b WITH CHECK OPTION;
\d+ rw_view
View "public.rw_view"
Column | Type | Collation | Nullable | Default | Storage | Description
--------+---------+-----------+----------+---------+---------+-------------
a | integer | | | | plain |
b | integer | | | | plain |
View definition:
Get rid of the "new" and "old" entries in a view's rangetable. The rule system needs "old" and/or "new" pseudo-RTEs in rule actions that are ON INSERT/UPDATE/DELETE. Historically it's put such entries into the ON SELECT rules of views as well, but those are really quite vestigial. The only thing we've used them for is to carry the view's relid forward to AcquireExecutorLocks (so that we can re-lock the view to verify it hasn't changed before re-using a plan) and to carry its relid and permissions data forward to execution-time permissions checks. What we can do instead of that is to retain these fields of the RTE_RELATION RTE for the view even after we convert it to an RTE_SUBQUERY RTE. This requires a tiny amount of extra complication in the planner and AcquireExecutorLocks, but on the other hand we can get rid of the logic that moves that data from one place to another. The principal immediate benefit of doing this, aside from a small saving in the pg_rewrite data for views, is that these pseudo-RTEs no longer trigger ruleutils.c's heuristic about qualifying variable names when the rangetable's length is more than 1. That results in quite a number of small simplifications in regression test outputs, which are all to the good IMO. Bump catversion because we need to dump a few more fields of RTE_SUBQUERY RTEs. While those will always be zeroes anyway in stored rules (because we'd never populate them until query rewrite) they are useful for debugging, and it seems like we'd better make sure to transmit such RTEs accurately in plans sent to parallel workers. I don't think the executor actually examines these fields after startup, but someday it might. This is a second attempt at committing 1b4d280ea. The difference from the first time is that now we can add some filtering rules to AdjustUpgrade.pm to allow cross-version upgrade testing to pass despite all the cosmetic changes in CREATE VIEW outputs. Amit Langote (filtering rules by me) Discussion: https://postgr.es/m/CA+HiwqEf7gPN4Hn+LoZ4tP2q_Qt7n3vw7-6fJKOf92tSEnX6Gg@mail.gmail.com Discussion: https://postgr.es/m/891521.1673657296@sss.pgh.pa.us
2023-01-18 19:23:57 +01:00
SELECT a,
b
FROM foreign_tbl
Get rid of the "new" and "old" entries in a view's rangetable. The rule system needs "old" and/or "new" pseudo-RTEs in rule actions that are ON INSERT/UPDATE/DELETE. Historically it's put such entries into the ON SELECT rules of views as well, but those are really quite vestigial. The only thing we've used them for is to carry the view's relid forward to AcquireExecutorLocks (so that we can re-lock the view to verify it hasn't changed before re-using a plan) and to carry its relid and permissions data forward to execution-time permissions checks. What we can do instead of that is to retain these fields of the RTE_RELATION RTE for the view even after we convert it to an RTE_SUBQUERY RTE. This requires a tiny amount of extra complication in the planner and AcquireExecutorLocks, but on the other hand we can get rid of the logic that moves that data from one place to another. The principal immediate benefit of doing this, aside from a small saving in the pg_rewrite data for views, is that these pseudo-RTEs no longer trigger ruleutils.c's heuristic about qualifying variable names when the rangetable's length is more than 1. That results in quite a number of small simplifications in regression test outputs, which are all to the good IMO. Bump catversion because we need to dump a few more fields of RTE_SUBQUERY RTEs. While those will always be zeroes anyway in stored rules (because we'd never populate them until query rewrite) they are useful for debugging, and it seems like we'd better make sure to transmit such RTEs accurately in plans sent to parallel workers. I don't think the executor actually examines these fields after startup, but someday it might. This is a second attempt at committing 1b4d280ea. The difference from the first time is that now we can add some filtering rules to AdjustUpgrade.pm to allow cross-version upgrade testing to pass despite all the cosmetic changes in CREATE VIEW outputs. Amit Langote (filtering rules by me) Discussion: https://postgr.es/m/CA+HiwqEf7gPN4Hn+LoZ4tP2q_Qt7n3vw7-6fJKOf92tSEnX6Gg@mail.gmail.com Discussion: https://postgr.es/m/891521.1673657296@sss.pgh.pa.us
2023-01-18 19:23:57 +01:00
WHERE a < b;
Options: check_option=cascaded
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO rw_view VALUES (0, 5);
QUERY PLAN
--------------------------------------------------------------------------------
Insert on public.foreign_tbl
Remote SQL: INSERT INTO public.base_tbl(a, b) VALUES ($1, $2) RETURNING a, b
Batch Size: 1
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
-> Result
Output: 0, 5
(5 rows)
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
INSERT INTO rw_view VALUES (0, 5); -- should fail
ERROR: new row violates check option for view "rw_view"
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
DETAIL: Failing row contains (10, 5).
EXPLAIN (VERBOSE, COSTS OFF)
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
INSERT INTO rw_view VALUES (0, 15);
QUERY PLAN
--------------------------------------------------------------------------------
Insert on public.foreign_tbl
Remote SQL: INSERT INTO public.base_tbl(a, b) VALUES ($1, $2) RETURNING a, b
Batch Size: 1
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
-> Result
Output: 0, 15
(5 rows)
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
INSERT INTO rw_view VALUES (0, 15); -- ok
SELECT * FROM foreign_tbl;
a | b
----+----
10 | 15
(1 row)
EXPLAIN (VERBOSE, COSTS OFF)
UPDATE rw_view SET b = b + 5;
QUERY PLAN
---------------------------------------------------------------------------------------
Update on public.foreign_tbl
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
Remote SQL: UPDATE public.base_tbl SET b = $2 WHERE ctid = $1 RETURNING a, b
-> Foreign Scan on public.foreign_tbl
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: (foreign_tbl.b + 5), foreign_tbl.ctid, foreign_tbl.*
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
Remote SQL: SELECT a, b, ctid FROM public.base_tbl WHERE ((a < b)) FOR UPDATE
(5 rows)
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
UPDATE rw_view SET b = b + 5; -- should fail
ERROR: new row violates check option for view "rw_view"
DETAIL: Failing row contains (20, 20).
EXPLAIN (VERBOSE, COSTS OFF)
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
UPDATE rw_view SET b = b + 15;
QUERY PLAN
---------------------------------------------------------------------------------------
Update on public.foreign_tbl
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
Remote SQL: UPDATE public.base_tbl SET b = $2 WHERE ctid = $1 RETURNING a, b
-> Foreign Scan on public.foreign_tbl
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: (foreign_tbl.b + 15), foreign_tbl.ctid, foreign_tbl.*
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
Remote SQL: SELECT a, b, ctid FROM public.base_tbl WHERE ((a < b)) FOR UPDATE
(5 rows)
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
UPDATE rw_view SET b = b + 15; -- ok
SELECT * FROM foreign_tbl;
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
a | b
----+----
20 | 30
(1 row)
-- We don't allow batch insert when there are any WCO constraints
ALTER SERVER loopback OPTIONS (ADD batch_size '10');
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO rw_view VALUES (0, 15), (0, 5);
QUERY PLAN
--------------------------------------------------------------------------------
Insert on public.foreign_tbl
Remote SQL: INSERT INTO public.base_tbl(a, b) VALUES ($1, $2) RETURNING a, b
Batch Size: 1
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1, "*VALUES*".column2
(5 rows)
INSERT INTO rw_view VALUES (0, 15), (0, 5); -- should fail
ERROR: new row violates check option for view "rw_view"
DETAIL: Failing row contains (10, 5).
SELECT * FROM foreign_tbl;
a | b
----+----
20 | 30
(1 row)
ALTER SERVER loopback OPTIONS (DROP batch_size);
DROP FOREIGN TABLE foreign_tbl CASCADE;
NOTICE: drop cascades to view rw_view
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
DROP TRIGGER row_before_insupd_trigger ON base_tbl;
DROP TABLE base_tbl;
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
-- test WCO for partitions
CREATE TABLE child_tbl (a int, b int);
ALTER TABLE child_tbl SET (autovacuum_enabled = 'false');
CREATE TRIGGER row_before_insupd_trigger BEFORE INSERT OR UPDATE ON child_tbl FOR EACH ROW EXECUTE PROCEDURE row_before_insupd_trigfunc();
CREATE FOREIGN TABLE foreign_tbl (a int, b int)
SERVER loopback OPTIONS (table_name 'child_tbl');
CREATE TABLE parent_tbl (a int, b int) PARTITION BY RANGE(a);
ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES FROM (0) TO (100);
-- Detach and re-attach once, to stress the concurrent detach case.
ALTER TABLE parent_tbl DETACH PARTITION foreign_tbl CONCURRENTLY;
ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES FROM (0) TO (100);
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
CREATE VIEW rw_view AS SELECT * FROM parent_tbl
WHERE a < b WITH CHECK OPTION;
\d+ rw_view
View "public.rw_view"
Column | Type | Collation | Nullable | Default | Storage | Description
--------+---------+-----------+----------+---------+---------+-------------
a | integer | | | | plain |
b | integer | | | | plain |
View definition:
Get rid of the "new" and "old" entries in a view's rangetable. The rule system needs "old" and/or "new" pseudo-RTEs in rule actions that are ON INSERT/UPDATE/DELETE. Historically it's put such entries into the ON SELECT rules of views as well, but those are really quite vestigial. The only thing we've used them for is to carry the view's relid forward to AcquireExecutorLocks (so that we can re-lock the view to verify it hasn't changed before re-using a plan) and to carry its relid and permissions data forward to execution-time permissions checks. What we can do instead of that is to retain these fields of the RTE_RELATION RTE for the view even after we convert it to an RTE_SUBQUERY RTE. This requires a tiny amount of extra complication in the planner and AcquireExecutorLocks, but on the other hand we can get rid of the logic that moves that data from one place to another. The principal immediate benefit of doing this, aside from a small saving in the pg_rewrite data for views, is that these pseudo-RTEs no longer trigger ruleutils.c's heuristic about qualifying variable names when the rangetable's length is more than 1. That results in quite a number of small simplifications in regression test outputs, which are all to the good IMO. Bump catversion because we need to dump a few more fields of RTE_SUBQUERY RTEs. While those will always be zeroes anyway in stored rules (because we'd never populate them until query rewrite) they are useful for debugging, and it seems like we'd better make sure to transmit such RTEs accurately in plans sent to parallel workers. I don't think the executor actually examines these fields after startup, but someday it might. This is a second attempt at committing 1b4d280ea. The difference from the first time is that now we can add some filtering rules to AdjustUpgrade.pm to allow cross-version upgrade testing to pass despite all the cosmetic changes in CREATE VIEW outputs. Amit Langote (filtering rules by me) Discussion: https://postgr.es/m/CA+HiwqEf7gPN4Hn+LoZ4tP2q_Qt7n3vw7-6fJKOf92tSEnX6Gg@mail.gmail.com Discussion: https://postgr.es/m/891521.1673657296@sss.pgh.pa.us
2023-01-18 19:23:57 +01:00
SELECT a,
b
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
FROM parent_tbl
Get rid of the "new" and "old" entries in a view's rangetable. The rule system needs "old" and/or "new" pseudo-RTEs in rule actions that are ON INSERT/UPDATE/DELETE. Historically it's put such entries into the ON SELECT rules of views as well, but those are really quite vestigial. The only thing we've used them for is to carry the view's relid forward to AcquireExecutorLocks (so that we can re-lock the view to verify it hasn't changed before re-using a plan) and to carry its relid and permissions data forward to execution-time permissions checks. What we can do instead of that is to retain these fields of the RTE_RELATION RTE for the view even after we convert it to an RTE_SUBQUERY RTE. This requires a tiny amount of extra complication in the planner and AcquireExecutorLocks, but on the other hand we can get rid of the logic that moves that data from one place to another. The principal immediate benefit of doing this, aside from a small saving in the pg_rewrite data for views, is that these pseudo-RTEs no longer trigger ruleutils.c's heuristic about qualifying variable names when the rangetable's length is more than 1. That results in quite a number of small simplifications in regression test outputs, which are all to the good IMO. Bump catversion because we need to dump a few more fields of RTE_SUBQUERY RTEs. While those will always be zeroes anyway in stored rules (because we'd never populate them until query rewrite) they are useful for debugging, and it seems like we'd better make sure to transmit such RTEs accurately in plans sent to parallel workers. I don't think the executor actually examines these fields after startup, but someday it might. This is a second attempt at committing 1b4d280ea. The difference from the first time is that now we can add some filtering rules to AdjustUpgrade.pm to allow cross-version upgrade testing to pass despite all the cosmetic changes in CREATE VIEW outputs. Amit Langote (filtering rules by me) Discussion: https://postgr.es/m/CA+HiwqEf7gPN4Hn+LoZ4tP2q_Qt7n3vw7-6fJKOf92tSEnX6Gg@mail.gmail.com Discussion: https://postgr.es/m/891521.1673657296@sss.pgh.pa.us
2023-01-18 19:23:57 +01:00
WHERE a < b;
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
Options: check_option=cascaded
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO rw_view VALUES (0, 5);
QUERY PLAN
-----------------------------
Insert on public.parent_tbl
-> Result
Output: 0, 5
(3 rows)
INSERT INTO rw_view VALUES (0, 5); -- should fail
ERROR: new row violates check option for view "rw_view"
DETAIL: Failing row contains (10, 5).
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO rw_view VALUES (0, 15);
QUERY PLAN
-----------------------------
Insert on public.parent_tbl
-> Result
Output: 0, 15
(3 rows)
INSERT INTO rw_view VALUES (0, 15); -- ok
SELECT * FROM foreign_tbl;
a | b
----+----
10 | 15
(1 row)
EXPLAIN (VERBOSE, COSTS OFF)
UPDATE rw_view SET b = b + 5;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
------------------------------------------------------------------------------------------------
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
Update on public.parent_tbl
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Foreign Update on public.foreign_tbl parent_tbl_1
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
Remote SQL: UPDATE public.child_tbl SET b = $2 WHERE ctid = $1 RETURNING a, b
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
-> Foreign Scan on public.foreign_tbl parent_tbl_1
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: (parent_tbl_1.b + 5), parent_tbl_1.tableoid, parent_tbl_1.ctid, parent_tbl_1.*
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
Remote SQL: SELECT a, b, ctid FROM public.child_tbl WHERE ((a < b)) FOR UPDATE
(6 rows)
UPDATE rw_view SET b = b + 5; -- should fail
ERROR: new row violates check option for view "rw_view"
DETAIL: Failing row contains (20, 20).
EXPLAIN (VERBOSE, COSTS OFF)
UPDATE rw_view SET b = b + 15;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
-------------------------------------------------------------------------------------------------
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
Update on public.parent_tbl
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Foreign Update on public.foreign_tbl parent_tbl_1
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
Remote SQL: UPDATE public.child_tbl SET b = $2 WHERE ctid = $1 RETURNING a, b
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
-> Foreign Scan on public.foreign_tbl parent_tbl_1
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: (parent_tbl_1.b + 15), parent_tbl_1.tableoid, parent_tbl_1.ctid, parent_tbl_1.*
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
Remote SQL: SELECT a, b, ctid FROM public.child_tbl WHERE ((a < b)) FOR UPDATE
(6 rows)
UPDATE rw_view SET b = b + 15; -- ok
SELECT * FROM foreign_tbl;
a | b
----+----
20 | 30
(1 row)
-- We don't allow batch insert when there are any WCO constraints
ALTER SERVER loopback OPTIONS (ADD batch_size '10');
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO rw_view VALUES (0, 15), (0, 5);
QUERY PLAN
--------------------------------------------------------
Insert on public.parent_tbl
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1, "*VALUES*".column2
(3 rows)
INSERT INTO rw_view VALUES (0, 15), (0, 5); -- should fail
ERROR: new row violates check option for view "rw_view"
DETAIL: Failing row contains (10, 5).
SELECT * FROM foreign_tbl;
a | b
----+----
20 | 30
(1 row)
ALTER SERVER loopback OPTIONS (DROP batch_size);
Fix WITH CHECK OPTION on views referencing postgres_fdw tables. If a view references a foreign table, and the foreign table has a BEFORE INSERT trigger, then it's possible for a tuple inserted or updated through the view to be changed such that it violates the view's WITH CHECK OPTION constraint. Before this commit, postgres_fdw handled this case inconsistently. A RETURNING clause on the INSERT or UPDATE statement targeting the view would cause the finally-inserted tuple to be read back, and the WITH CHECK OPTION violation would throw an error. But without a RETURNING clause, postgres_fdw would not read the final tuple back, and WITH CHECK OPTION would not throw an error for the violation (or may throw an error when there is no real violation). AFTER ROW triggers on the foreign table had a similar effect as a RETURNING clause on the INSERT or UPDATE statement. To fix, this commit retrieves the attributes needed to enforce the WITH CHECK OPTION constraint along with the attributes needed for the RETURNING clause (if any) from the remote side. Thus, the WITH CHECK OPTION constraint is always evaluated against the final tuple after any triggers on the remote side. This fix may be considered inconsistent with CHECK constraints declared on foreign tables, which are not enforced locally at all (because the constraint is on a remote object). The discussion concluded that this difference is reasonable, because the WITH CHECK OPTION is a constraint on the local view (not any remote object); therefore it only makes sense to enforce its WITH CHECK OPTION constraint locally. Author: Etsuro Fujita Reviewed-by: Arthur Zakirov, Stephen Frost Discussion: https://www.postgresql.org/message-id/7eb58fab-fd3b-781b-ac33-f7cfec96021f%40lab.ntt.co.jp
2018-07-08 09:14:51 +02:00
DROP FOREIGN TABLE foreign_tbl CASCADE;
DROP TRIGGER row_before_insupd_trigger ON child_tbl;
DROP TABLE parent_tbl CASCADE;
NOTICE: drop cascades to view rw_view
DROP FUNCTION row_before_insupd_trigfunc;
-- Try a more complex permutation of WCO where there are multiple levels of
-- partitioned tables with columns not all in the same order
CREATE TABLE parent_tbl (a int, b text, c numeric) PARTITION BY RANGE(a);
CREATE TABLE sub_parent (c numeric, a int, b text) PARTITION BY RANGE(a);
ALTER TABLE parent_tbl ATTACH PARTITION sub_parent FOR VALUES FROM (1) TO (10);
CREATE TABLE child_local (b text, c numeric, a int);
CREATE FOREIGN TABLE child_foreign (b text, c numeric, a int)
SERVER loopback OPTIONS (table_name 'child_local');
ALTER TABLE sub_parent ATTACH PARTITION child_foreign FOR VALUES FROM (1) TO (10);
CREATE VIEW rw_view AS SELECT * FROM parent_tbl WHERE a < 5 WITH CHECK OPTION;
INSERT INTO parent_tbl (a) VALUES(1),(5);
EXPLAIN (VERBOSE, COSTS OFF)
UPDATE rw_view SET b = 'text', c = 123.456;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Update on public.parent_tbl
Foreign Update on public.child_foreign parent_tbl_1
Remote SQL: UPDATE public.child_local SET b = $2, c = $3 WHERE ctid = $1 RETURNING a
-> Foreign Scan on public.child_foreign parent_tbl_1
Output: 'text'::text, 123.456, parent_tbl_1.tableoid, parent_tbl_1.ctid, parent_tbl_1.*
Remote SQL: SELECT b, c, a, ctid FROM public.child_local WHERE ((a < 5)) FOR UPDATE
(6 rows)
UPDATE rw_view SET b = 'text', c = 123.456;
SELECT * FROM parent_tbl ORDER BY a;
a | b | c
---+------+---------
1 | text | 123.456
5 | |
(2 rows)
DROP VIEW rw_view;
DROP TABLE child_local;
DROP FOREIGN TABLE child_foreign;
DROP TABLE sub_parent;
DROP TABLE parent_tbl;
-- ===================================================================
-- test serial columns (ie, sequence-based defaults)
-- ===================================================================
create table loc1 (f1 serial, f2 text);
alter table loc1 set (autovacuum_enabled = 'false');
create foreign table rem1 (f1 serial, f2 text)
server loopback options(table_name 'loc1');
select pg_catalog.setval('rem1_f1_seq', 10, false);
setval
--------
10
(1 row)
insert into loc1(f2) values('hi');
insert into rem1(f2) values('hi remote');
insert into loc1(f2) values('bye');
insert into rem1(f2) values('bye remote');
select * from loc1;
f1 | f2
----+------------
1 | hi
10 | hi remote
2 | bye
11 | bye remote
(4 rows)
select * from rem1;
f1 | f2
----+------------
1 | hi
10 | hi remote
2 | bye
11 | bye remote
(4 rows)
-- ===================================================================
-- test generated columns
-- ===================================================================
create table gloc1 (
a int,
b int generated always as (a * 2) stored);
alter table gloc1 set (autovacuum_enabled = 'false');
create foreign table grem1 (
a int,
b int generated always as (a * 2) stored)
server loopback options(table_name 'gloc1');
explain (verbose, costs off)
insert into grem1 (a) values (1), (2);
QUERY PLAN
-------------------------------------------------------------------
Insert on public.grem1
Remote SQL: INSERT INTO public.gloc1(a, b) VALUES ($1, DEFAULT)
Batch Size: 1
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1, NULL::integer
(5 rows)
insert into grem1 (a) values (1), (2);
explain (verbose, costs off)
update grem1 set a = 22 where a = 2;
QUERY PLAN
------------------------------------------------------------------------------------
Update on public.grem1
Remote SQL: UPDATE public.gloc1 SET a = $2, b = DEFAULT WHERE ctid = $1
-> Foreign Scan on public.grem1
Output: 22, ctid, grem1.*
Remote SQL: SELECT a, b, ctid FROM public.gloc1 WHERE ((a = 2)) FOR UPDATE
(5 rows)
update grem1 set a = 22 where a = 2;
select * from gloc1;
a | b
----+----
1 | 2
22 | 44
(2 rows)
select * from grem1;
a | b
----+----
1 | 2
22 | 44
(2 rows)
delete from grem1;
-- test copy from
copy grem1 from stdin;
select * from gloc1;
a | b
---+---
1 | 2
2 | 4
(2 rows)
select * from grem1;
a | b
---+---
1 | 2
2 | 4
(2 rows)
delete from grem1;
-- test batch insert
alter server loopback options (add batch_size '10');
explain (verbose, costs off)
insert into grem1 (a) values (1), (2);
QUERY PLAN
-------------------------------------------------------------------
Insert on public.grem1
Remote SQL: INSERT INTO public.gloc1(a, b) VALUES ($1, DEFAULT)
Batch Size: 10
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1, NULL::integer
(5 rows)
insert into grem1 (a) values (1), (2);
select * from gloc1;
a | b
---+---
1 | 2
2 | 4
(2 rows)
select * from grem1;
a | b
---+---
1 | 2
2 | 4
(2 rows)
delete from grem1;
-- batch insert with foreign partitions.
-- This schema uses two partitions, one local and one remote with a modulo
-- to loop across all of them in batches.
create table tab_batch_local (id int, data text);
insert into tab_batch_local select i, 'test'|| i from generate_series(1, 45) i;
create table tab_batch_sharded (id int, data text) partition by hash(id);
create table tab_batch_sharded_p0 partition of tab_batch_sharded
for values with (modulus 2, remainder 0);
create table tab_batch_sharded_p1_remote (id int, data text);
create foreign table tab_batch_sharded_p1 partition of tab_batch_sharded
for values with (modulus 2, remainder 1)
server loopback options (table_name 'tab_batch_sharded_p1_remote');
insert into tab_batch_sharded select * from tab_batch_local;
select count(*) from tab_batch_sharded;
count
-------
45
(1 row)
drop table tab_batch_local;
drop table tab_batch_sharded;
drop table tab_batch_sharded_p1_remote;
alter server loopback options (drop batch_size);
-- ===================================================================
-- test local triggers
-- ===================================================================
-- Trigger functions "borrowed" from triggers regress test.
CREATE FUNCTION trigger_func() RETURNS trigger LANGUAGE plpgsql AS $$
BEGIN
RAISE NOTICE 'trigger_func(%) called: action = %, when = %, level = %',
TG_ARGV[0], TG_OP, TG_WHEN, TG_LEVEL;
RETURN NULL;
END;$$;
CREATE TRIGGER trig_stmt_before BEFORE DELETE OR INSERT OR UPDATE OR TRUNCATE ON rem1
FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
CREATE TRIGGER trig_stmt_after AFTER DELETE OR INSERT OR UPDATE OR TRUNCATE ON rem1
FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
CREATE OR REPLACE FUNCTION trigger_data() RETURNS trigger
LANGUAGE plpgsql AS $$
declare
oldnew text[];
relid text;
argstr text;
begin
relid := TG_relid::regclass;
argstr := '';
for i in 0 .. TG_nargs - 1 loop
if i > 0 then
argstr := argstr || ', ';
end if;
argstr := argstr || TG_argv[i];
end loop;
RAISE NOTICE '%(%) % % % ON %',
tg_name, argstr, TG_when, TG_level, TG_OP, relid;
oldnew := '{}'::text[];
if TG_OP != 'INSERT' then
oldnew := array_append(oldnew, format('OLD: %s', OLD));
end if;
if TG_OP != 'DELETE' then
oldnew := array_append(oldnew, format('NEW: %s', NEW));
end if;
RAISE NOTICE '%', array_to_string(oldnew, ',');
if TG_OP = 'DELETE' then
return OLD;
else
return NEW;
end if;
end;
$$;
-- Test basic functionality
CREATE TRIGGER trig_row_before
BEFORE INSERT OR UPDATE OR DELETE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
CREATE TRIGGER trig_row_after
AFTER INSERT OR UPDATE OR DELETE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
delete from rem1;
NOTICE: trigger_func(<NULL>) called: action = DELETE, when = BEFORE, level = STATEMENT
NOTICE: trig_row_before(23, skidoo) BEFORE ROW DELETE ON rem1
NOTICE: OLD: (1,hi)
NOTICE: trig_row_before(23, skidoo) BEFORE ROW DELETE ON rem1
NOTICE: OLD: (10,"hi remote")
NOTICE: trig_row_before(23, skidoo) BEFORE ROW DELETE ON rem1
NOTICE: OLD: (2,bye)
NOTICE: trig_row_before(23, skidoo) BEFORE ROW DELETE ON rem1
NOTICE: OLD: (11,"bye remote")
NOTICE: trig_row_after(23, skidoo) AFTER ROW DELETE ON rem1
NOTICE: OLD: (1,hi)
NOTICE: trig_row_after(23, skidoo) AFTER ROW DELETE ON rem1
NOTICE: OLD: (10,"hi remote")
NOTICE: trig_row_after(23, skidoo) AFTER ROW DELETE ON rem1
NOTICE: OLD: (2,bye)
NOTICE: trig_row_after(23, skidoo) AFTER ROW DELETE ON rem1
NOTICE: OLD: (11,"bye remote")
NOTICE: trigger_func(<NULL>) called: action = DELETE, when = AFTER, level = STATEMENT
insert into rem1 values(1,'insert');
NOTICE: trigger_func(<NULL>) called: action = INSERT, when = BEFORE, level = STATEMENT
NOTICE: trig_row_before(23, skidoo) BEFORE ROW INSERT ON rem1
NOTICE: NEW: (1,insert)
NOTICE: trig_row_after(23, skidoo) AFTER ROW INSERT ON rem1
NOTICE: NEW: (1,insert)
NOTICE: trigger_func(<NULL>) called: action = INSERT, when = AFTER, level = STATEMENT
update rem1 set f2 = 'update' where f1 = 1;
NOTICE: trigger_func(<NULL>) called: action = UPDATE, when = BEFORE, level = STATEMENT
NOTICE: trig_row_before(23, skidoo) BEFORE ROW UPDATE ON rem1
NOTICE: OLD: (1,insert),NEW: (1,update)
NOTICE: trig_row_after(23, skidoo) AFTER ROW UPDATE ON rem1
NOTICE: OLD: (1,insert),NEW: (1,update)
NOTICE: trigger_func(<NULL>) called: action = UPDATE, when = AFTER, level = STATEMENT
update rem1 set f2 = f2 || f2;
NOTICE: trigger_func(<NULL>) called: action = UPDATE, when = BEFORE, level = STATEMENT
NOTICE: trig_row_before(23, skidoo) BEFORE ROW UPDATE ON rem1
NOTICE: OLD: (1,update),NEW: (1,updateupdate)
NOTICE: trig_row_after(23, skidoo) AFTER ROW UPDATE ON rem1
NOTICE: OLD: (1,update),NEW: (1,updateupdate)
NOTICE: trigger_func(<NULL>) called: action = UPDATE, when = AFTER, level = STATEMENT
truncate rem1;
NOTICE: trigger_func(<NULL>) called: action = TRUNCATE, when = BEFORE, level = STATEMENT
NOTICE: trigger_func(<NULL>) called: action = TRUNCATE, when = AFTER, level = STATEMENT
-- cleanup
DROP TRIGGER trig_row_before ON rem1;
DROP TRIGGER trig_row_after ON rem1;
DROP TRIGGER trig_stmt_before ON rem1;
DROP TRIGGER trig_stmt_after ON rem1;
DELETE from rem1;
-- Test multiple AFTER ROW triggers on a foreign table
CREATE TRIGGER trig_row_after1
AFTER INSERT OR UPDATE OR DELETE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
CREATE TRIGGER trig_row_after2
AFTER INSERT OR UPDATE OR DELETE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
insert into rem1 values(1,'insert');
NOTICE: trig_row_after1(23, skidoo) AFTER ROW INSERT ON rem1
NOTICE: NEW: (1,insert)
NOTICE: trig_row_after2(23, skidoo) AFTER ROW INSERT ON rem1
NOTICE: NEW: (1,insert)
update rem1 set f2 = 'update' where f1 = 1;
NOTICE: trig_row_after1(23, skidoo) AFTER ROW UPDATE ON rem1
NOTICE: OLD: (1,insert),NEW: (1,update)
NOTICE: trig_row_after2(23, skidoo) AFTER ROW UPDATE ON rem1
NOTICE: OLD: (1,insert),NEW: (1,update)
update rem1 set f2 = f2 || f2;
NOTICE: trig_row_after1(23, skidoo) AFTER ROW UPDATE ON rem1
NOTICE: OLD: (1,update),NEW: (1,updateupdate)
NOTICE: trig_row_after2(23, skidoo) AFTER ROW UPDATE ON rem1
NOTICE: OLD: (1,update),NEW: (1,updateupdate)
delete from rem1;
NOTICE: trig_row_after1(23, skidoo) AFTER ROW DELETE ON rem1
NOTICE: OLD: (1,updateupdate)
NOTICE: trig_row_after2(23, skidoo) AFTER ROW DELETE ON rem1
NOTICE: OLD: (1,updateupdate)
-- cleanup
DROP TRIGGER trig_row_after1 ON rem1;
DROP TRIGGER trig_row_after2 ON rem1;
-- Test WHEN conditions
CREATE TRIGGER trig_row_before_insupd
BEFORE INSERT OR UPDATE ON rem1
FOR EACH ROW
WHEN (NEW.f2 like '%update%')
EXECUTE PROCEDURE trigger_data(23,'skidoo');
CREATE TRIGGER trig_row_after_insupd
AFTER INSERT OR UPDATE ON rem1
FOR EACH ROW
WHEN (NEW.f2 like '%update%')
EXECUTE PROCEDURE trigger_data(23,'skidoo');
-- Insert or update not matching: nothing happens
INSERT INTO rem1 values(1, 'insert');
UPDATE rem1 set f2 = 'test';
-- Insert or update matching: triggers are fired
INSERT INTO rem1 values(2, 'update');
NOTICE: trig_row_before_insupd(23, skidoo) BEFORE ROW INSERT ON rem1
NOTICE: NEW: (2,update)
NOTICE: trig_row_after_insupd(23, skidoo) AFTER ROW INSERT ON rem1
NOTICE: NEW: (2,update)
UPDATE rem1 set f2 = 'update update' where f1 = '2';
NOTICE: trig_row_before_insupd(23, skidoo) BEFORE ROW UPDATE ON rem1
NOTICE: OLD: (2,update),NEW: (2,"update update")
NOTICE: trig_row_after_insupd(23, skidoo) AFTER ROW UPDATE ON rem1
NOTICE: OLD: (2,update),NEW: (2,"update update")
CREATE TRIGGER trig_row_before_delete
BEFORE DELETE ON rem1
FOR EACH ROW
WHEN (OLD.f2 like '%update%')
EXECUTE PROCEDURE trigger_data(23,'skidoo');
CREATE TRIGGER trig_row_after_delete
AFTER DELETE ON rem1
FOR EACH ROW
WHEN (OLD.f2 like '%update%')
EXECUTE PROCEDURE trigger_data(23,'skidoo');
-- Trigger is fired for f1=2, not for f1=1
DELETE FROM rem1;
NOTICE: trig_row_before_delete(23, skidoo) BEFORE ROW DELETE ON rem1
NOTICE: OLD: (2,"update update")
NOTICE: trig_row_after_delete(23, skidoo) AFTER ROW DELETE ON rem1
NOTICE: OLD: (2,"update update")
-- cleanup
DROP TRIGGER trig_row_before_insupd ON rem1;
DROP TRIGGER trig_row_after_insupd ON rem1;
DROP TRIGGER trig_row_before_delete ON rem1;
DROP TRIGGER trig_row_after_delete ON rem1;
-- Test various RETURN statements in BEFORE triggers.
CREATE FUNCTION trig_row_before_insupdate() RETURNS TRIGGER AS $$
BEGIN
NEW.f2 := NEW.f2 || ' triggered !';
RETURN NEW;
END
$$ language plpgsql;
CREATE TRIGGER trig_row_before_insupd
BEFORE INSERT OR UPDATE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trig_row_before_insupdate();
-- The new values should have 'triggered' appended
INSERT INTO rem1 values(1, 'insert');
SELECT * from loc1;
f1 | f2
----+--------------------
1 | insert triggered !
(1 row)
INSERT INTO rem1 values(2, 'insert') RETURNING f2;
f2
--------------------
insert triggered !
(1 row)
SELECT * from loc1;
f1 | f2
----+--------------------
1 | insert triggered !
2 | insert triggered !
(2 rows)
UPDATE rem1 set f2 = '';
SELECT * from loc1;
f1 | f2
----+--------------
1 | triggered !
2 | triggered !
(2 rows)
UPDATE rem1 set f2 = 'skidoo' RETURNING f2;
f2
--------------------
skidoo triggered !
skidoo triggered !
(2 rows)
SELECT * from loc1;
f1 | f2
----+--------------------
1 | skidoo triggered !
2 | skidoo triggered !
(2 rows)
EXPLAIN (verbose, costs off)
UPDATE rem1 set f1 = 10; -- all columns should be transmitted
QUERY PLAN
-----------------------------------------------------------------------
Update on public.rem1
Remote SQL: UPDATE public.loc1 SET f1 = $2, f2 = $3 WHERE ctid = $1
-> Foreign Scan on public.rem1
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: 10, ctid, rem1.*
Remote SQL: SELECT f1, f2, ctid FROM public.loc1 FOR UPDATE
(5 rows)
UPDATE rem1 set f1 = 10;
SELECT * from loc1;
f1 | f2
----+--------------------------------
10 | skidoo triggered ! triggered !
10 | skidoo triggered ! triggered !
(2 rows)
DELETE FROM rem1;
-- Add a second trigger, to check that the changes are propagated correctly
-- from trigger to trigger
CREATE TRIGGER trig_row_before_insupd2
BEFORE INSERT OR UPDATE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trig_row_before_insupdate();
INSERT INTO rem1 values(1, 'insert');
SELECT * from loc1;
f1 | f2
----+--------------------------------
1 | insert triggered ! triggered !
(1 row)
INSERT INTO rem1 values(2, 'insert') RETURNING f2;
f2
--------------------------------
insert triggered ! triggered !
(1 row)
SELECT * from loc1;
f1 | f2
----+--------------------------------
1 | insert triggered ! triggered !
2 | insert triggered ! triggered !
(2 rows)
UPDATE rem1 set f2 = '';
SELECT * from loc1;
f1 | f2
----+--------------------------
1 | triggered ! triggered !
2 | triggered ! triggered !
(2 rows)
UPDATE rem1 set f2 = 'skidoo' RETURNING f2;
f2
--------------------------------
skidoo triggered ! triggered !
skidoo triggered ! triggered !
(2 rows)
SELECT * from loc1;
f1 | f2
----+--------------------------------
1 | skidoo triggered ! triggered !
2 | skidoo triggered ! triggered !
(2 rows)
DROP TRIGGER trig_row_before_insupd ON rem1;
DROP TRIGGER trig_row_before_insupd2 ON rem1;
DELETE from rem1;
INSERT INTO rem1 VALUES (1, 'test');
-- Test with a trigger returning NULL
CREATE FUNCTION trig_null() RETURNS TRIGGER AS $$
BEGIN
RETURN NULL;
END
$$ language plpgsql;
CREATE TRIGGER trig_null
BEFORE INSERT OR UPDATE OR DELETE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trig_null();
-- Nothing should have changed.
INSERT INTO rem1 VALUES (2, 'test2');
SELECT * from loc1;
f1 | f2
----+------
1 | test
(1 row)
UPDATE rem1 SET f2 = 'test2';
SELECT * from loc1;
f1 | f2
----+------
1 | test
(1 row)
DELETE from rem1;
SELECT * from loc1;
f1 | f2
----+------
1 | test
(1 row)
DROP TRIGGER trig_null ON rem1;
DELETE from rem1;
-- Test a combination of local and remote triggers
CREATE TRIGGER trig_row_before
BEFORE INSERT OR UPDATE OR DELETE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
CREATE TRIGGER trig_row_after
AFTER INSERT OR UPDATE OR DELETE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
CREATE TRIGGER trig_local_before BEFORE INSERT OR UPDATE ON loc1
FOR EACH ROW EXECUTE PROCEDURE trig_row_before_insupdate();
INSERT INTO rem1(f2) VALUES ('test');
NOTICE: trig_row_before(23, skidoo) BEFORE ROW INSERT ON rem1
NOTICE: NEW: (12,test)
NOTICE: trig_row_after(23, skidoo) AFTER ROW INSERT ON rem1
NOTICE: NEW: (12,"test triggered !")
UPDATE rem1 SET f2 = 'testo';
NOTICE: trig_row_before(23, skidoo) BEFORE ROW UPDATE ON rem1
NOTICE: OLD: (12,"test triggered !"),NEW: (12,testo)
NOTICE: trig_row_after(23, skidoo) AFTER ROW UPDATE ON rem1
NOTICE: OLD: (12,"test triggered !"),NEW: (12,"testo triggered !")
-- Test returning a system attribute
INSERT INTO rem1(f2) VALUES ('test') RETURNING ctid;
NOTICE: trig_row_before(23, skidoo) BEFORE ROW INSERT ON rem1
NOTICE: NEW: (13,test)
NOTICE: trig_row_after(23, skidoo) AFTER ROW INSERT ON rem1
NOTICE: NEW: (13,"test triggered !")
ctid
--------
(0,25)
(1 row)
-- cleanup
DROP TRIGGER trig_row_before ON rem1;
DROP TRIGGER trig_row_after ON rem1;
DROP TRIGGER trig_local_before ON loc1;
-- Test direct foreign table modification functionality
EXPLAIN (verbose, costs off)
DELETE FROM rem1; -- can be pushed down
QUERY PLAN
---------------------------------------------
Delete on public.rem1
-> Foreign Delete on public.rem1
Remote SQL: DELETE FROM public.loc1
(3 rows)
EXPLAIN (verbose, costs off)
DELETE FROM rem1 WHERE false; -- currently can't be pushed down
QUERY PLAN
-------------------------------------------------------
Delete on public.rem1
Remote SQL: DELETE FROM public.loc1 WHERE ctid = $1
-> Result
Output: ctid
One-Time Filter: false
(5 rows)
-- Test with statement-level triggers
CREATE TRIGGER trig_stmt_before
BEFORE DELETE OR INSERT OR UPDATE ON rem1
FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
EXPLAIN (verbose, costs off)
UPDATE rem1 set f2 = ''; -- can be pushed down
QUERY PLAN
----------------------------------------------------------
Update on public.rem1
-> Foreign Update on public.rem1
Remote SQL: UPDATE public.loc1 SET f2 = ''::text
(3 rows)
EXPLAIN (verbose, costs off)
DELETE FROM rem1; -- can be pushed down
QUERY PLAN
---------------------------------------------
Delete on public.rem1
-> Foreign Delete on public.rem1
Remote SQL: DELETE FROM public.loc1
(3 rows)
DROP TRIGGER trig_stmt_before ON rem1;
CREATE TRIGGER trig_stmt_after
AFTER DELETE OR INSERT OR UPDATE ON rem1
FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
EXPLAIN (verbose, costs off)
UPDATE rem1 set f2 = ''; -- can be pushed down
QUERY PLAN
----------------------------------------------------------
Update on public.rem1
-> Foreign Update on public.rem1
Remote SQL: UPDATE public.loc1 SET f2 = ''::text
(3 rows)
EXPLAIN (verbose, costs off)
DELETE FROM rem1; -- can be pushed down
QUERY PLAN
---------------------------------------------
Delete on public.rem1
-> Foreign Delete on public.rem1
Remote SQL: DELETE FROM public.loc1
(3 rows)
DROP TRIGGER trig_stmt_after ON rem1;
-- Test with row-level ON INSERT triggers
CREATE TRIGGER trig_row_before_insert
BEFORE INSERT ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
EXPLAIN (verbose, costs off)
UPDATE rem1 set f2 = ''; -- can be pushed down
QUERY PLAN
----------------------------------------------------------
Update on public.rem1
-> Foreign Update on public.rem1
Remote SQL: UPDATE public.loc1 SET f2 = ''::text
(3 rows)
EXPLAIN (verbose, costs off)
DELETE FROM rem1; -- can be pushed down
QUERY PLAN
---------------------------------------------
Delete on public.rem1
-> Foreign Delete on public.rem1
Remote SQL: DELETE FROM public.loc1
(3 rows)
DROP TRIGGER trig_row_before_insert ON rem1;
CREATE TRIGGER trig_row_after_insert
AFTER INSERT ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
EXPLAIN (verbose, costs off)
UPDATE rem1 set f2 = ''; -- can be pushed down
QUERY PLAN
----------------------------------------------------------
Update on public.rem1
-> Foreign Update on public.rem1
Remote SQL: UPDATE public.loc1 SET f2 = ''::text
(3 rows)
EXPLAIN (verbose, costs off)
DELETE FROM rem1; -- can be pushed down
QUERY PLAN
---------------------------------------------
Delete on public.rem1
-> Foreign Delete on public.rem1
Remote SQL: DELETE FROM public.loc1
(3 rows)
DROP TRIGGER trig_row_after_insert ON rem1;
-- Test with row-level ON UPDATE triggers
CREATE TRIGGER trig_row_before_update
BEFORE UPDATE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
EXPLAIN (verbose, costs off)
UPDATE rem1 set f2 = ''; -- can't be pushed down
QUERY PLAN
-----------------------------------------------------------------------
Update on public.rem1
Remote SQL: UPDATE public.loc1 SET f1 = $2, f2 = $3 WHERE ctid = $1
-> Foreign Scan on public.rem1
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: ''::text, ctid, rem1.*
Remote SQL: SELECT f1, f2, ctid FROM public.loc1 FOR UPDATE
(5 rows)
EXPLAIN (verbose, costs off)
DELETE FROM rem1; -- can be pushed down
QUERY PLAN
---------------------------------------------
Delete on public.rem1
-> Foreign Delete on public.rem1
Remote SQL: DELETE FROM public.loc1
(3 rows)
DROP TRIGGER trig_row_before_update ON rem1;
CREATE TRIGGER trig_row_after_update
AFTER UPDATE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
EXPLAIN (verbose, costs off)
UPDATE rem1 set f2 = ''; -- can't be pushed down
QUERY PLAN
-------------------------------------------------------------------------------
Update on public.rem1
Remote SQL: UPDATE public.loc1 SET f2 = $2 WHERE ctid = $1 RETURNING f1, f2
-> Foreign Scan on public.rem1
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: ''::text, ctid, rem1.*
Remote SQL: SELECT f1, f2, ctid FROM public.loc1 FOR UPDATE
(5 rows)
EXPLAIN (verbose, costs off)
DELETE FROM rem1; -- can be pushed down
QUERY PLAN
---------------------------------------------
Delete on public.rem1
-> Foreign Delete on public.rem1
Remote SQL: DELETE FROM public.loc1
(3 rows)
DROP TRIGGER trig_row_after_update ON rem1;
-- Test with row-level ON DELETE triggers
CREATE TRIGGER trig_row_before_delete
BEFORE DELETE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
EXPLAIN (verbose, costs off)
UPDATE rem1 set f2 = ''; -- can be pushed down
QUERY PLAN
----------------------------------------------------------
Update on public.rem1
-> Foreign Update on public.rem1
Remote SQL: UPDATE public.loc1 SET f2 = ''::text
(3 rows)
EXPLAIN (verbose, costs off)
DELETE FROM rem1; -- can't be pushed down
QUERY PLAN
---------------------------------------------------------------------
Delete on public.rem1
Remote SQL: DELETE FROM public.loc1 WHERE ctid = $1
-> Foreign Scan on public.rem1
Output: ctid, rem1.*
Remote SQL: SELECT f1, f2, ctid FROM public.loc1 FOR UPDATE
(5 rows)
DROP TRIGGER trig_row_before_delete ON rem1;
CREATE TRIGGER trig_row_after_delete
AFTER DELETE ON rem1
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
EXPLAIN (verbose, costs off)
UPDATE rem1 set f2 = ''; -- can be pushed down
QUERY PLAN
----------------------------------------------------------
Update on public.rem1
-> Foreign Update on public.rem1
Remote SQL: UPDATE public.loc1 SET f2 = ''::text
(3 rows)
EXPLAIN (verbose, costs off)
DELETE FROM rem1; -- can't be pushed down
QUERY PLAN
------------------------------------------------------------------------
Delete on public.rem1
Remote SQL: DELETE FROM public.loc1 WHERE ctid = $1 RETURNING f1, f2
-> Foreign Scan on public.rem1
Output: ctid, rem1.*
Remote SQL: SELECT f1, f2, ctid FROM public.loc1 FOR UPDATE
(5 rows)
DROP TRIGGER trig_row_after_delete ON rem1;
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
-- ===================================================================
-- test inheritance features
-- ===================================================================
CREATE TABLE a (aa TEXT);
CREATE TABLE loct (aa TEXT, bb TEXT);
ALTER TABLE a SET (autovacuum_enabled = 'false');
ALTER TABLE loct SET (autovacuum_enabled = 'false');
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
CREATE FOREIGN TABLE b (bb TEXT) INHERITS (a)
SERVER loopback OPTIONS (table_name 'loct');
INSERT INTO a(aa) VALUES('aaa');
INSERT INTO a(aa) VALUES('aaaa');
INSERT INTO a(aa) VALUES('aaaaa');
INSERT INTO b(aa) VALUES('bbb');
INSERT INTO b(aa) VALUES('bbbb');
INSERT INTO b(aa) VALUES('bbbbb');
SELECT tableoid::regclass, * FROM a;
tableoid | aa
----------+-------
a | aaa
a | aaaa
a | aaaaa
b | bbb
b | bbbb
b | bbbbb
(6 rows)
SELECT tableoid::regclass, * FROM b;
tableoid | aa | bb
----------+-------+----
b | bbb |
b | bbbb |
b | bbbbb |
(3 rows)
SELECT tableoid::regclass, * FROM ONLY a;
tableoid | aa
----------+-------
a | aaa
a | aaaa
a | aaaaa
(3 rows)
UPDATE a SET aa = 'zzzzzz' WHERE aa LIKE 'aaaa%';
SELECT tableoid::regclass, * FROM a;
tableoid | aa
----------+--------
a | aaa
a | zzzzzz
a | zzzzzz
b | bbb
b | bbbb
b | bbbbb
(6 rows)
SELECT tableoid::regclass, * FROM b;
tableoid | aa | bb
----------+-------+----
b | bbb |
b | bbbb |
b | bbbbb |
(3 rows)
SELECT tableoid::regclass, * FROM ONLY a;
tableoid | aa
----------+--------
a | aaa
a | zzzzzz
a | zzzzzz
(3 rows)
UPDATE b SET aa = 'new';
SELECT tableoid::regclass, * FROM a;
tableoid | aa
----------+--------
a | aaa
a | zzzzzz
a | zzzzzz
b | new
b | new
b | new
(6 rows)
SELECT tableoid::regclass, * FROM b;
tableoid | aa | bb
----------+-----+----
b | new |
b | new |
b | new |
(3 rows)
SELECT tableoid::regclass, * FROM ONLY a;
tableoid | aa
----------+--------
a | aaa
a | zzzzzz
a | zzzzzz
(3 rows)
UPDATE a SET aa = 'newtoo';
SELECT tableoid::regclass, * FROM a;
tableoid | aa
----------+--------
a | newtoo
a | newtoo
a | newtoo
b | newtoo
b | newtoo
b | newtoo
(6 rows)
SELECT tableoid::regclass, * FROM b;
tableoid | aa | bb
----------+--------+----
b | newtoo |
b | newtoo |
b | newtoo |
(3 rows)
SELECT tableoid::regclass, * FROM ONLY a;
tableoid | aa
----------+--------
a | newtoo
a | newtoo
a | newtoo
(3 rows)
DELETE FROM a;
SELECT tableoid::regclass, * FROM a;
tableoid | aa
----------+----
(0 rows)
SELECT tableoid::regclass, * FROM b;
tableoid | aa | bb
----------+----+----
(0 rows)
SELECT tableoid::regclass, * FROM ONLY a;
tableoid | aa
----------+----
(0 rows)
DROP TABLE a CASCADE;
NOTICE: drop cascades to foreign table b
DROP TABLE loct;
-- Check SELECT FOR UPDATE/SHARE with an inherited source table
create table loct1 (f1 int, f2 int, f3 int);
create table loct2 (f1 int, f2 int, f3 int);
alter table loct1 set (autovacuum_enabled = 'false');
alter table loct2 set (autovacuum_enabled = 'false');
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
create table foo (f1 int, f2 int);
create foreign table foo2 (f3 int) inherits (foo)
server loopback options (table_name 'loct1');
create table bar (f1 int, f2 int);
create foreign table bar2 (f3 int) inherits (bar)
server loopback options (table_name 'loct2');
alter table foo set (autovacuum_enabled = 'false');
alter table bar set (autovacuum_enabled = 'false');
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
insert into foo values(1,1);
insert into foo values(3,3);
insert into foo2 values(2,2,2);
insert into foo2 values(4,4,4);
insert into bar values(1,11);
insert into bar values(2,22);
insert into bar values(6,66);
insert into bar2 values(3,33,33);
insert into bar2 values(4,44,44);
insert into bar2 values(7,77,77);
explain (verbose, costs off)
select * from bar where f1 in (select f1 from foo) for update;
QUERY PLAN
----------------------------------------------------------------------------------------------
LockRows
Output: bar.f1, bar.f2, bar.ctid, foo.ctid, bar.*, bar.tableoid, foo.*, foo.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
-> Hash Join
Output: bar.f1, bar.f2, bar.ctid, foo.ctid, bar.*, bar.tableoid, foo.*, foo.tableoid
Inner Unique: true
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Hash Cond: (bar.f1 = foo.f1)
-> Append
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Seq Scan on public.bar bar_1
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: bar_1.f1, bar_1.f2, bar_1.ctid, bar_1.*, bar_1.tableoid
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on public.bar2 bar_2
Output: bar_2.f1, bar_2.f2, bar_2.ctid, bar_2.*, bar_2.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 FOR UPDATE
-> Hash
Output: foo.ctid, foo.f1, foo.*, foo.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
-> HashAggregate
Output: foo.ctid, foo.f1, foo.*, foo.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Group Key: foo.f1
-> Append
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Seq Scan on public.foo foo_1
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on public.foo2 foo_2
Output: foo_2.ctid, foo_2.f1, foo_2.*, foo_2.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct1
(23 rows)
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
select * from bar where f1 in (select f1 from foo) for update;
f1 | f2
----+----
1 | 11
2 | 22
3 | 33
4 | 44
(4 rows)
explain (verbose, costs off)
select * from bar where f1 in (select f1 from foo) for share;
QUERY PLAN
----------------------------------------------------------------------------------------------
LockRows
Output: bar.f1, bar.f2, bar.ctid, foo.ctid, bar.*, bar.tableoid, foo.*, foo.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
-> Hash Join
Output: bar.f1, bar.f2, bar.ctid, foo.ctid, bar.*, bar.tableoid, foo.*, foo.tableoid
Inner Unique: true
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Hash Cond: (bar.f1 = foo.f1)
-> Append
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Seq Scan on public.bar bar_1
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: bar_1.f1, bar_1.f2, bar_1.ctid, bar_1.*, bar_1.tableoid
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on public.bar2 bar_2
Output: bar_2.f1, bar_2.f2, bar_2.ctid, bar_2.*, bar_2.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 FOR SHARE
-> Hash
Output: foo.ctid, foo.f1, foo.*, foo.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
-> HashAggregate
Output: foo.ctid, foo.f1, foo.*, foo.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Group Key: foo.f1
-> Append
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Seq Scan on public.foo foo_1
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on public.foo2 foo_2
Output: foo_2.ctid, foo_2.f1, foo_2.*, foo_2.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct1
(23 rows)
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
select * from bar where f1 in (select f1 from foo) for share;
f1 | f2
----+----
1 | 11
2 | 22
3 | 33
4 | 44
(4 rows)
-- Now check SELECT FOR UPDATE/SHARE with an inherited source table,
-- where the parent is itself a foreign table
create table loct4 (f1 int, f2 int, f3 int);
create foreign table foo2child (f3 int) inherits (foo2)
server loopback options (table_name 'loct4');
NOTICE: moving and merging column "f3" with inherited definition
DETAIL: User-specified column moved to the position of the inherited column.
explain (verbose, costs off)
select * from bar where f1 in (select f1 from foo2) for share;
QUERY PLAN
--------------------------------------------------------------------------------------
LockRows
Output: bar.f1, bar.f2, bar.ctid, foo2.*, bar.*, bar.tableoid, foo2.tableoid
-> Hash Join
Output: bar.f1, bar.f2, bar.ctid, foo2.*, bar.*, bar.tableoid, foo2.tableoid
Inner Unique: true
Hash Cond: (bar.f1 = foo2.f1)
-> Append
-> Seq Scan on public.bar bar_1
Output: bar_1.f1, bar_1.f2, bar_1.ctid, bar_1.*, bar_1.tableoid
-> Foreign Scan on public.bar2 bar_2
Output: bar_2.f1, bar_2.f2, bar_2.ctid, bar_2.*, bar_2.tableoid
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 FOR SHARE
-> Hash
Output: foo2.*, foo2.f1, foo2.tableoid
-> HashAggregate
Output: foo2.*, foo2.f1, foo2.tableoid
Group Key: foo2.f1
-> Append
-> Foreign Scan on public.foo2 foo2_1
Output: foo2_1.*, foo2_1.f1, foo2_1.tableoid
Remote SQL: SELECT f1, f2, f3 FROM public.loct1
-> Foreign Scan on public.foo2child foo2_2
Output: foo2_2.*, foo2_2.f1, foo2_2.tableoid
Remote SQL: SELECT f1, f2, f3 FROM public.loct4
(24 rows)
select * from bar where f1 in (select f1 from foo2) for share;
f1 | f2
----+----
2 | 22
4 | 44
(2 rows)
drop foreign table foo2child;
-- And with a local child relation of the foreign table parent
create table foo2child (f3 int) inherits (foo2);
NOTICE: moving and merging column "f3" with inherited definition
DETAIL: User-specified column moved to the position of the inherited column.
explain (verbose, costs off)
select * from bar where f1 in (select f1 from foo2) for share;
QUERY PLAN
-------------------------------------------------------------------------------------------------
LockRows
Output: bar.f1, bar.f2, bar.ctid, foo2.*, bar.*, bar.tableoid, foo2.ctid, foo2.tableoid
-> Hash Join
Output: bar.f1, bar.f2, bar.ctid, foo2.*, bar.*, bar.tableoid, foo2.ctid, foo2.tableoid
Inner Unique: true
Hash Cond: (bar.f1 = foo2.f1)
-> Append
-> Seq Scan on public.bar bar_1
Output: bar_1.f1, bar_1.f2, bar_1.ctid, bar_1.*, bar_1.tableoid
-> Foreign Scan on public.bar2 bar_2
Output: bar_2.f1, bar_2.f2, bar_2.ctid, bar_2.*, bar_2.tableoid
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 FOR SHARE
-> Hash
Output: foo2.*, foo2.f1, foo2.ctid, foo2.tableoid
-> HashAggregate
Output: foo2.*, foo2.f1, foo2.ctid, foo2.tableoid
Group Key: foo2.f1
-> Append
-> Foreign Scan on public.foo2 foo2_1
Output: foo2_1.*, foo2_1.f1, foo2_1.ctid, foo2_1.tableoid
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct1
-> Seq Scan on public.foo2child foo2_2
Output: foo2_2.*, foo2_2.f1, foo2_2.ctid, foo2_2.tableoid
(23 rows)
select * from bar where f1 in (select f1 from foo2) for share;
f1 | f2
----+----
2 | 22
4 | 44
(2 rows)
drop table foo2child;
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
-- Check UPDATE with inherited target and an inherited source table
explain (verbose, costs off)
update bar set f2 = f2 + 100 where f1 in (select f1 from foo);
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
-------------------------------------------------------------------------------------------------------
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Update on public.bar
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Update on public.bar bar_1
Foreign Update on public.bar2 bar_2
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Remote SQL: UPDATE public.loct2 SET f2 = $2 WHERE ctid = $1
-> Hash Join
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: (bar.f2 + 100), foo.ctid, bar.tableoid, bar.ctid, (NULL::record), foo.*, foo.tableoid
Inner Unique: true
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Hash Cond: (bar.f1 = foo.f1)
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
-> Append
-> Seq Scan on public.bar bar_1
Output: bar_1.f2, bar_1.f1, bar_1.tableoid, bar_1.ctid, NULL::record
-> Foreign Scan on public.bar2 bar_2
Output: bar_2.f2, bar_2.f1, bar_2.tableoid, bar_2.ctid, bar_2.*
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 FOR UPDATE
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
-> Hash
Output: foo.ctid, foo.f1, foo.*, foo.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
-> HashAggregate
Output: foo.ctid, foo.f1, foo.*, foo.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Group Key: foo.f1
-> Append
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Seq Scan on public.foo foo_1
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on public.foo2 foo_2
Output: foo_2.ctid, foo_2.f1, foo_2.*, foo_2.tableoid
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct1
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
(25 rows)
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
update bar set f2 = f2 + 100 where f1 in (select f1 from foo);
select tableoid::regclass, * from bar order by 1,2;
tableoid | f1 | f2
----------+----+-----
bar | 1 | 111
bar | 2 | 122
bar | 6 | 66
bar2 | 3 | 133
bar2 | 4 | 144
bar2 | 7 | 77
(6 rows)
-- Check UPDATE with inherited target and an appendrel subquery
explain (verbose, costs off)
update bar set f2 = f2 + 100
from
( select f1 from foo union all select f1+3 from foo ) ss
where bar.f1 = ss.f1;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
------------------------------------------------------------------------------------------------
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Update on public.bar
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Update on public.bar bar_1
Foreign Update on public.bar2 bar_2
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Remote SQL: UPDATE public.loct2 SET f2 = $2 WHERE ctid = $1
-> Merge Join
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: (bar.f2 + 100), (ROW(foo.f1)), bar.tableoid, bar.ctid, (NULL::record)
Merge Cond: (bar.f1 = foo.f1)
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
-> Sort
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: bar.f2, bar.f1, bar.tableoid, bar.ctid, (NULL::record)
Sort Key: bar.f1
-> Append
-> Seq Scan on public.bar bar_1
Output: bar_1.f2, bar_1.f1, bar_1.tableoid, bar_1.ctid, NULL::record
-> Foreign Scan on public.bar2 bar_2
Output: bar_2.f2, bar_2.f1, bar_2.tableoid, bar_2.ctid, bar_2.*
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 FOR UPDATE
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
-> Sort
Output: (ROW(foo.f1)), foo.f1
Sort Key: foo.f1
-> Append
-> Seq Scan on public.foo
Output: ROW(foo.f1), foo.f1
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
-> Foreign Scan on public.foo2 foo_1
Output: ROW(foo_1.f1), foo_1.f1
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Remote SQL: SELECT f1 FROM public.loct1
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
-> Seq Scan on public.foo foo_2
Output: ROW((foo_2.f1 + 3)), (foo_2.f1 + 3)
-> Foreign Scan on public.foo2 foo_3
Output: ROW((foo_3.f1 + 3)), (foo_3.f1 + 3)
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
Remote SQL: SELECT f1 FROM public.loct1
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
(30 rows)
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
update bar set f2 = f2 + 100
from
( select f1 from foo union all select f1+3 from foo ) ss
where bar.f1 = ss.f1;
select tableoid::regclass, * from bar order by 1,2;
tableoid | f1 | f2
----------+----+-----
bar | 1 | 211
bar | 2 | 222
bar | 6 | 166
bar2 | 3 | 233
bar2 | 4 | 244
bar2 | 7 | 177
(6 rows)
-- Test forcing the remote server to produce sorted data for a merge join,
-- but the foreign table is an inheritance child.
truncate table loct1;
truncate table only foo;
\set num_rows_foo 2000
insert into loct1 select generate_series(0, :num_rows_foo, 2), generate_series(0, :num_rows_foo, 2), generate_series(0, :num_rows_foo, 2);
insert into foo select generate_series(1, :num_rows_foo, 2), generate_series(1, :num_rows_foo, 2);
SET enable_hashjoin to false;
SET enable_nestloop to false;
alter foreign table foo2 options (use_remote_estimate 'true');
create index i_loct1_f1 on loct1(f1);
create index i_foo_f1 on foo(f1);
analyze foo;
analyze loct1;
-- inner join; expressions in the clauses appear in the equivalence class list
explain (verbose, costs off)
select foo.f1, loct1.f1 from foo join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
QUERY PLAN
--------------------------------------------------------------------------------------------------
Limit
Output: foo.f1, loct1.f1, foo.f2
-> Sort
Output: foo.f1, loct1.f1, foo.f2
Sort Key: foo.f2
-> Merge Join
Output: foo.f1, loct1.f1, foo.f2
Merge Cond: (foo.f1 = loct1.f1)
-> Merge Append
Sort Key: foo.f1
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Index Scan using i_foo_f1 on public.foo foo_1
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: foo_1.f1, foo_1.f2
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on public.foo2 foo_2
Output: foo_2.f1, foo_2.f2
Remote SQL: SELECT f1, f2 FROM public.loct1 ORDER BY f1 ASC NULLS LAST
-> Index Only Scan using i_loct1_f1 on public.loct1
Output: loct1.f1
(17 rows)
select foo.f1, loct1.f1 from foo join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
f1 | f1
----+----
20 | 20
22 | 22
24 | 24
26 | 26
28 | 28
30 | 30
32 | 32
34 | 34
36 | 36
38 | 38
(10 rows)
-- outer join; expressions in the clauses do not appear in equivalence class
-- list but no output change as compared to the previous query
explain (verbose, costs off)
select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
QUERY PLAN
--------------------------------------------------------------------------------------------------
Limit
Output: foo.f1, loct1.f1, foo.f2
-> Sort
Output: foo.f1, loct1.f1, foo.f2
Sort Key: foo.f2
-> Merge Left Join
Output: foo.f1, loct1.f1, foo.f2
Merge Cond: (foo.f1 = loct1.f1)
-> Merge Append
Sort Key: foo.f1
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Index Scan using i_foo_f1 on public.foo foo_1
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: foo_1.f1, foo_1.f2
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on public.foo2 foo_2
Output: foo_2.f1, foo_2.f2
Remote SQL: SELECT f1, f2 FROM public.loct1 ORDER BY f1 ASC NULLS LAST
-> Index Only Scan using i_loct1_f1 on public.loct1
Output: loct1.f1
(17 rows)
select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
f1 | f1
----+----
10 | 10
11 |
12 | 12
13 |
14 | 14
15 |
16 | 16
17 |
18 | 18
19 |
(10 rows)
RESET enable_hashjoin;
RESET enable_nestloop;
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
-- Test that WHERE CURRENT OF is not supported
begin;
declare c cursor for select * from bar where f1 = 7;
fetch from c;
f1 | f2
----+-----
7 | 177
(1 row)
update bar set f2 = null where current of c;
ERROR: WHERE CURRENT OF is not supported for this table type
rollback;
explain (verbose, costs off)
delete from foo where f1 < 5 returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
--------------------------------------------------------------------------------------
Delete on public.foo
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: foo_1.f1, foo_1.f2
Delete on public.foo foo_1
Foreign Delete on public.foo2 foo_2
-> Append
-> Index Scan using i_foo_f1 on public.foo foo_1
Output: foo_1.tableoid, foo_1.ctid
Index Cond: (foo_1.f1 < 5)
-> Foreign Delete on public.foo2 foo_2
Remote SQL: DELETE FROM public.loct1 WHERE ((f1 < 5)) RETURNING f1, f2
(10 rows)
delete from foo where f1 < 5 returning *;
f1 | f2
----+----
1 | 1
3 | 3
0 | 0
2 | 2
4 | 4
(5 rows)
explain (verbose, costs off)
update bar set f2 = f2 + 100 returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
------------------------------------------------------------------------------------------
Update on public.bar
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: bar_1.f1, bar_1.f2
Update on public.bar bar_1
Foreign Update on public.bar2 bar_2
-> Result
Output: (bar.f2 + 100), bar.tableoid, bar.ctid, (NULL::record)
-> Append
-> Seq Scan on public.bar bar_1
Output: bar_1.f2, bar_1.tableoid, bar_1.ctid, NULL::record
-> Foreign Update on public.bar2 bar_2
Remote SQL: UPDATE public.loct2 SET f2 = (f2 + 100) RETURNING f1, f2
(11 rows)
update bar set f2 = f2 + 100 returning *;
f1 | f2
----+-----
1 | 311
2 | 322
6 | 266
3 | 333
4 | 344
7 | 277
(6 rows)
Fix creation of resjunk tlist entries for inherited mixed UPDATE/DELETE. rewriteTargetListUD's processing is dependent on the relkind of the query's target table. That was fine at the time it was made to act that way, even for queries on inheritance trees, because all tables in an inheritance tree would necessarily be plain tables. However, the 9.5 feature addition allowing some members of an inheritance tree to be foreign tables broke the assumption that rewriteTargetListUD's output tlist could be applied to all child tables with nothing more than column-number mapping. This led to visible failures if foreign child tables had row-level triggers, and would also break in cases where child tables belonged to FDWs that used methods other than CTID for row identification. To fix, delay running rewriteTargetListUD until after the planner has expanded inheritance, so that it is applied separately to the (already mapped) tlist for each child table. We can conveniently call it from preprocess_targetlist. Refactor associated code slightly to avoid the need to heap_open the target relation multiple times during preprocess_targetlist. (The APIs remain a bit ugly, particularly around the point of which steps scribble on parse->targetList and which don't. But avoiding such scribbling would require a change in FDW callback APIs, which is more pain than it's worth.) Also fix ExecModifyTable to ensure that "tupleid" is reset to NULL when we transition from rows providing a CTID to rows that don't. (That's really an independent bug, but it manifests in much the same cases.) Add a regression test checking one manifestation of this problem, which was that row-level triggers on a foreign child table did not work right. Back-patch to 9.5 where the problem was introduced. Etsuro Fujita, reviewed by Ildus Kurbangaliev and Ashutosh Bapat Discussion: https://postgr.es/m/20170514150525.0346ba72@postgrespro.ru
2017-11-27 23:53:56 +01:00
-- Test that UPDATE/DELETE with inherited target works with row-level triggers
CREATE TRIGGER trig_row_before
BEFORE UPDATE OR DELETE ON bar2
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
CREATE TRIGGER trig_row_after
AFTER UPDATE OR DELETE ON bar2
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
explain (verbose, costs off)
update bar set f2 = f2 + 100;
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Fix creation of resjunk tlist entries for inherited mixed UPDATE/DELETE. rewriteTargetListUD's processing is dependent on the relkind of the query's target table. That was fine at the time it was made to act that way, even for queries on inheritance trees, because all tables in an inheritance tree would necessarily be plain tables. However, the 9.5 feature addition allowing some members of an inheritance tree to be foreign tables broke the assumption that rewriteTargetListUD's output tlist could be applied to all child tables with nothing more than column-number mapping. This led to visible failures if foreign child tables had row-level triggers, and would also break in cases where child tables belonged to FDWs that used methods other than CTID for row identification. To fix, delay running rewriteTargetListUD until after the planner has expanded inheritance, so that it is applied separately to the (already mapped) tlist for each child table. We can conveniently call it from preprocess_targetlist. Refactor associated code slightly to avoid the need to heap_open the target relation multiple times during preprocess_targetlist. (The APIs remain a bit ugly, particularly around the point of which steps scribble on parse->targetList and which don't. But avoiding such scribbling would require a change in FDW callback APIs, which is more pain than it's worth.) Also fix ExecModifyTable to ensure that "tupleid" is reset to NULL when we transition from rows providing a CTID to rows that don't. (That's really an independent bug, but it manifests in much the same cases.) Add a regression test checking one manifestation of this problem, which was that row-level triggers on a foreign child table did not work right. Back-patch to 9.5 where the problem was introduced. Etsuro Fujita, reviewed by Ildus Kurbangaliev and Ashutosh Bapat Discussion: https://postgr.es/m/20170514150525.0346ba72@postgrespro.ru
2017-11-27 23:53:56 +01:00
Update on public.bar
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Update on public.bar bar_1
Foreign Update on public.bar2 bar_2
Remote SQL: UPDATE public.loct2 SET f1 = $2, f2 = $3, f3 = $4 WHERE ctid = $1 RETURNING f1, f2, f3
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
-> Result
Output: (bar.f2 + 100), bar.tableoid, bar.ctid, (NULL::record)
-> Append
-> Seq Scan on public.bar bar_1
Output: bar_1.f2, bar_1.tableoid, bar_1.ctid, NULL::record
-> Foreign Scan on public.bar2 bar_2
Output: bar_2.f2, bar_2.tableoid, bar_2.ctid, bar_2.*
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 FOR UPDATE
(12 rows)
Fix creation of resjunk tlist entries for inherited mixed UPDATE/DELETE. rewriteTargetListUD's processing is dependent on the relkind of the query's target table. That was fine at the time it was made to act that way, even for queries on inheritance trees, because all tables in an inheritance tree would necessarily be plain tables. However, the 9.5 feature addition allowing some members of an inheritance tree to be foreign tables broke the assumption that rewriteTargetListUD's output tlist could be applied to all child tables with nothing more than column-number mapping. This led to visible failures if foreign child tables had row-level triggers, and would also break in cases where child tables belonged to FDWs that used methods other than CTID for row identification. To fix, delay running rewriteTargetListUD until after the planner has expanded inheritance, so that it is applied separately to the (already mapped) tlist for each child table. We can conveniently call it from preprocess_targetlist. Refactor associated code slightly to avoid the need to heap_open the target relation multiple times during preprocess_targetlist. (The APIs remain a bit ugly, particularly around the point of which steps scribble on parse->targetList and which don't. But avoiding such scribbling would require a change in FDW callback APIs, which is more pain than it's worth.) Also fix ExecModifyTable to ensure that "tupleid" is reset to NULL when we transition from rows providing a CTID to rows that don't. (That's really an independent bug, but it manifests in much the same cases.) Add a regression test checking one manifestation of this problem, which was that row-level triggers on a foreign child table did not work right. Back-patch to 9.5 where the problem was introduced. Etsuro Fujita, reviewed by Ildus Kurbangaliev and Ashutosh Bapat Discussion: https://postgr.es/m/20170514150525.0346ba72@postgrespro.ru
2017-11-27 23:53:56 +01:00
update bar set f2 = f2 + 100;
NOTICE: trig_row_before(23, skidoo) BEFORE ROW UPDATE ON bar2
NOTICE: OLD: (3,333,33),NEW: (3,433,33)
NOTICE: trig_row_before(23, skidoo) BEFORE ROW UPDATE ON bar2
NOTICE: OLD: (4,344,44),NEW: (4,444,44)
NOTICE: trig_row_before(23, skidoo) BEFORE ROW UPDATE ON bar2
NOTICE: OLD: (7,277,77),NEW: (7,377,77)
NOTICE: trig_row_after(23, skidoo) AFTER ROW UPDATE ON bar2
NOTICE: OLD: (3,333,33),NEW: (3,433,33)
NOTICE: trig_row_after(23, skidoo) AFTER ROW UPDATE ON bar2
NOTICE: OLD: (4,344,44),NEW: (4,444,44)
NOTICE: trig_row_after(23, skidoo) AFTER ROW UPDATE ON bar2
NOTICE: OLD: (7,277,77),NEW: (7,377,77)
explain (verbose, costs off)
delete from bar where f2 < 400;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
---------------------------------------------------------------------------------------------------
Fix creation of resjunk tlist entries for inherited mixed UPDATE/DELETE. rewriteTargetListUD's processing is dependent on the relkind of the query's target table. That was fine at the time it was made to act that way, even for queries on inheritance trees, because all tables in an inheritance tree would necessarily be plain tables. However, the 9.5 feature addition allowing some members of an inheritance tree to be foreign tables broke the assumption that rewriteTargetListUD's output tlist could be applied to all child tables with nothing more than column-number mapping. This led to visible failures if foreign child tables had row-level triggers, and would also break in cases where child tables belonged to FDWs that used methods other than CTID for row identification. To fix, delay running rewriteTargetListUD until after the planner has expanded inheritance, so that it is applied separately to the (already mapped) tlist for each child table. We can conveniently call it from preprocess_targetlist. Refactor associated code slightly to avoid the need to heap_open the target relation multiple times during preprocess_targetlist. (The APIs remain a bit ugly, particularly around the point of which steps scribble on parse->targetList and which don't. But avoiding such scribbling would require a change in FDW callback APIs, which is more pain than it's worth.) Also fix ExecModifyTable to ensure that "tupleid" is reset to NULL when we transition from rows providing a CTID to rows that don't. (That's really an independent bug, but it manifests in much the same cases.) Add a regression test checking one manifestation of this problem, which was that row-level triggers on a foreign child table did not work right. Back-patch to 9.5 where the problem was introduced. Etsuro Fujita, reviewed by Ildus Kurbangaliev and Ashutosh Bapat Discussion: https://postgr.es/m/20170514150525.0346ba72@postgrespro.ru
2017-11-27 23:53:56 +01:00
Delete on public.bar
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Delete on public.bar bar_1
Foreign Delete on public.bar2 bar_2
Fix creation of resjunk tlist entries for inherited mixed UPDATE/DELETE. rewriteTargetListUD's processing is dependent on the relkind of the query's target table. That was fine at the time it was made to act that way, even for queries on inheritance trees, because all tables in an inheritance tree would necessarily be plain tables. However, the 9.5 feature addition allowing some members of an inheritance tree to be foreign tables broke the assumption that rewriteTargetListUD's output tlist could be applied to all child tables with nothing more than column-number mapping. This led to visible failures if foreign child tables had row-level triggers, and would also break in cases where child tables belonged to FDWs that used methods other than CTID for row identification. To fix, delay running rewriteTargetListUD until after the planner has expanded inheritance, so that it is applied separately to the (already mapped) tlist for each child table. We can conveniently call it from preprocess_targetlist. Refactor associated code slightly to avoid the need to heap_open the target relation multiple times during preprocess_targetlist. (The APIs remain a bit ugly, particularly around the point of which steps scribble on parse->targetList and which don't. But avoiding such scribbling would require a change in FDW callback APIs, which is more pain than it's worth.) Also fix ExecModifyTable to ensure that "tupleid" is reset to NULL when we transition from rows providing a CTID to rows that don't. (That's really an independent bug, but it manifests in much the same cases.) Add a regression test checking one manifestation of this problem, which was that row-level triggers on a foreign child table did not work right. Back-patch to 9.5 where the problem was introduced. Etsuro Fujita, reviewed by Ildus Kurbangaliev and Ashutosh Bapat Discussion: https://postgr.es/m/20170514150525.0346ba72@postgrespro.ru
2017-11-27 23:53:56 +01:00
Remote SQL: DELETE FROM public.loct2 WHERE ctid = $1 RETURNING f1, f2, f3
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
-> Append
-> Seq Scan on public.bar bar_1
Output: bar_1.tableoid, bar_1.ctid, NULL::record
Filter: (bar_1.f2 < 400)
-> Foreign Scan on public.bar2 bar_2
Output: bar_2.tableoid, bar_2.ctid, bar_2.*
Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 WHERE ((f2 < 400)) FOR UPDATE
(11 rows)
Fix creation of resjunk tlist entries for inherited mixed UPDATE/DELETE. rewriteTargetListUD's processing is dependent on the relkind of the query's target table. That was fine at the time it was made to act that way, even for queries on inheritance trees, because all tables in an inheritance tree would necessarily be plain tables. However, the 9.5 feature addition allowing some members of an inheritance tree to be foreign tables broke the assumption that rewriteTargetListUD's output tlist could be applied to all child tables with nothing more than column-number mapping. This led to visible failures if foreign child tables had row-level triggers, and would also break in cases where child tables belonged to FDWs that used methods other than CTID for row identification. To fix, delay running rewriteTargetListUD until after the planner has expanded inheritance, so that it is applied separately to the (already mapped) tlist for each child table. We can conveniently call it from preprocess_targetlist. Refactor associated code slightly to avoid the need to heap_open the target relation multiple times during preprocess_targetlist. (The APIs remain a bit ugly, particularly around the point of which steps scribble on parse->targetList and which don't. But avoiding such scribbling would require a change in FDW callback APIs, which is more pain than it's worth.) Also fix ExecModifyTable to ensure that "tupleid" is reset to NULL when we transition from rows providing a CTID to rows that don't. (That's really an independent bug, but it manifests in much the same cases.) Add a regression test checking one manifestation of this problem, which was that row-level triggers on a foreign child table did not work right. Back-patch to 9.5 where the problem was introduced. Etsuro Fujita, reviewed by Ildus Kurbangaliev and Ashutosh Bapat Discussion: https://postgr.es/m/20170514150525.0346ba72@postgrespro.ru
2017-11-27 23:53:56 +01:00
delete from bar where f2 < 400;
NOTICE: trig_row_before(23, skidoo) BEFORE ROW DELETE ON bar2
NOTICE: OLD: (7,377,77)
NOTICE: trig_row_after(23, skidoo) AFTER ROW DELETE ON bar2
NOTICE: OLD: (7,377,77)
-- cleanup
Allow foreign tables to participate in inheritance. Foreign tables can now be inheritance children, or parents. Much of the system was already ready for this, but we had to fix a few things of course, mostly in the area of planner and executor handling of row locks. As side effects of this, allow foreign tables to have NOT VALID CHECK constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to disallow these things would've required bizarre and inconsistent special cases in inheritance behavior. Since foreign tables don't enforce CHECK constraints anyway, a NOT VALID one is a complete no-op, but that doesn't mean we shouldn't allow it. And it's possible that some FDWs might have use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops for most. An additional change in support of this is that when a ModifyTable node has multiple target tables, they will all now be explicitly identified in EXPLAIN output, for example: Update on pt1 (cost=0.00..321.05 rows=3541 width=46) Update on pt1 Foreign Update on ft1 Foreign Update on ft2 Update on child3 -> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46) -> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46) -> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46) -> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46) This was done mainly to provide an unambiguous place to attach "Remote SQL" fields, but it is useful for inherited updates even when no foreign tables are involved. Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
drop table foo cascade;
NOTICE: drop cascades to foreign table foo2
drop table bar cascade;
NOTICE: drop cascades to foreign table bar2
drop table loct1;
drop table loct2;
-- Test pushing down UPDATE/DELETE joins to the remote server
create table parent (a int, b text);
create table loct1 (a int, b text);
create table loct2 (a int, b text);
create foreign table remt1 (a int, b text)
server loopback options (table_name 'loct1');
create foreign table remt2 (a int, b text)
server loopback options (table_name 'loct2');
alter foreign table remt1 inherit parent;
insert into remt1 values (1, 'foo');
insert into remt1 values (2, 'bar');
insert into remt2 values (1, 'foo');
insert into remt2 values (2, 'bar');
analyze remt1;
analyze remt2;
explain (verbose, costs off)
update parent set b = parent.b || remt2.b from remt2 where parent.a = remt2.a returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Update on public.parent
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: parent_1.a, parent_1.b, remt2.a, remt2.b
Update on public.parent parent_1
Foreign Update on public.remt1 parent_2
Remote SQL: UPDATE public.loct1 SET b = $2 WHERE ctid = $1 RETURNING a, b
-> Nested Loop
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: (parent.b || remt2.b), remt2.*, remt2.a, remt2.b, parent.tableoid, parent.ctid, (NULL::record)
Join Filter: (parent.a = remt2.a)
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
-> Append
-> Seq Scan on public.parent parent_1
Output: parent_1.b, parent_1.a, parent_1.tableoid, parent_1.ctid, NULL::record
-> Foreign Scan on public.remt1 parent_2
Output: parent_2.b, parent_2.a, parent_2.tableoid, parent_2.ctid, parent_2.*
Remote SQL: SELECT a, b, ctid FROM public.loct1 FOR UPDATE
-> Materialize
Output: remt2.b, remt2.*, remt2.a
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
-> Foreign Scan on public.remt2
Output: remt2.b, remt2.*, remt2.a
Remote SQL: SELECT a, b FROM public.loct2
(19 rows)
update parent set b = parent.b || remt2.b from remt2 where parent.a = remt2.a returning *;
a | b | a | b
---+--------+---+-----
1 | foofoo | 1 | foo
2 | barbar | 2 | bar
(2 rows)
explain (verbose, costs off)
delete from parent using remt2 where parent.a = remt2.a returning parent;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
-----------------------------------------------------------------------------
Delete on public.parent
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: parent_1.*
Delete on public.parent parent_1
Foreign Delete on public.remt1 parent_2
Remote SQL: DELETE FROM public.loct1 WHERE ctid = $1 RETURNING a, b
-> Nested Loop
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: remt2.*, parent.tableoid, parent.ctid
Join Filter: (parent.a = remt2.a)
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
-> Append
-> Seq Scan on public.parent parent_1
Output: parent_1.a, parent_1.tableoid, parent_1.ctid
-> Foreign Scan on public.remt1 parent_2
Output: parent_2.a, parent_2.tableoid, parent_2.ctid
Remote SQL: SELECT a, ctid FROM public.loct1 FOR UPDATE
-> Materialize
Output: remt2.*, remt2.a
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
-> Foreign Scan on public.remt2
Output: remt2.*, remt2.a
Remote SQL: SELECT a, b FROM public.loct2
(19 rows)
delete from parent using remt2 where parent.a = remt2.a returning parent;
parent
------------
(1,foofoo)
(2,barbar)
(2 rows)
-- cleanup
drop foreign table remt1;
drop foreign table remt2;
drop table loct1;
drop table loct2;
drop table parent;
-- ===================================================================
-- test tuple routing for foreign-table partitions
-- ===================================================================
-- Test insert tuple routing
create table itrtest (a int, b text) partition by list (a);
create table loct1 (a int check (a in (1)), b text);
create foreign table remp1 (a int check (a in (1)), b text) server loopback options (table_name 'loct1');
create table loct2 (a int check (a in (2)), b text);
create foreign table remp2 (b text, a int check (a in (2))) server loopback options (table_name 'loct2');
alter table itrtest attach partition remp1 for values in (1);
alter table itrtest attach partition remp2 for values in (2);
insert into itrtest values (1, 'foo');
insert into itrtest values (1, 'bar') returning *;
a | b
---+-----
1 | bar
(1 row)
insert into itrtest values (2, 'baz');
insert into itrtest values (2, 'qux') returning *;
a | b
---+-----
2 | qux
(1 row)
insert into itrtest values (1, 'test1'), (2, 'test2') returning *;
a | b
---+-------
1 | test1
2 | test2
(2 rows)
select tableoid::regclass, * FROM itrtest;
tableoid | a | b
----------+---+-------
remp1 | 1 | foo
remp1 | 1 | bar
remp1 | 1 | test1
remp2 | 2 | baz
remp2 | 2 | qux
remp2 | 2 | test2
(6 rows)
select tableoid::regclass, * FROM remp1;
tableoid | a | b
----------+---+-------
remp1 | 1 | foo
remp1 | 1 | bar
remp1 | 1 | test1
(3 rows)
select tableoid::regclass, * FROM remp2;
tableoid | b | a
----------+-------+---
remp2 | baz | 2
remp2 | qux | 2
remp2 | test2 | 2
(3 rows)
delete from itrtest;
-- MERGE ought to fail cleanly
merge into itrtest using (select 1, 'foo') as source on (true)
when matched then do nothing;
ERROR: cannot execute MERGE on relation "remp1"
DETAIL: This operation is not supported for foreign tables.
create unique index loct1_idx on loct1 (a);
-- DO NOTHING without an inference specification is supported
insert into itrtest values (1, 'foo') on conflict do nothing returning *;
a | b
---+-----
1 | foo
(1 row)
insert into itrtest values (1, 'foo') on conflict do nothing returning *;
a | b
---+---
(0 rows)
-- But other cases are not supported
insert into itrtest values (1, 'bar') on conflict (a) do nothing;
ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
insert into itrtest values (1, 'bar') on conflict (a) do update set b = excluded.b;
ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
select tableoid::regclass, * FROM itrtest;
tableoid | a | b
----------+---+-----
remp1 | 1 | foo
(1 row)
delete from itrtest;
drop index loct1_idx;
-- Test that remote triggers work with insert tuple routing
create function br_insert_trigfunc() returns trigger as $$
begin
new.b := new.b || ' triggered !';
return new;
end
$$ language plpgsql;
create trigger loct1_br_insert_trigger before insert on loct1
for each row execute procedure br_insert_trigfunc();
create trigger loct2_br_insert_trigger before insert on loct2
for each row execute procedure br_insert_trigfunc();
-- The new values are concatenated with ' triggered !'
insert into itrtest values (1, 'foo') returning *;
a | b
---+-----------------
1 | foo triggered !
(1 row)
insert into itrtest values (2, 'qux') returning *;
a | b
---+-----------------
2 | qux triggered !
(1 row)
insert into itrtest values (1, 'test1'), (2, 'test2') returning *;
a | b
---+-------------------
1 | test1 triggered !
2 | test2 triggered !
(2 rows)
with result as (insert into itrtest values (1, 'test1'), (2, 'test2') returning *) select * from result;
a | b
---+-------------------
1 | test1 triggered !
2 | test2 triggered !
(2 rows)
drop trigger loct1_br_insert_trigger on loct1;
drop trigger loct2_br_insert_trigger on loct2;
drop table itrtest;
drop table loct1;
drop table loct2;
-- Test update tuple routing
create table utrtest (a int, b text) partition by list (a);
create table loct (a int check (a in (1)), b text);
create foreign table remp (a int check (a in (1)), b text) server loopback options (table_name 'loct');
create table locp (a int check (a in (2)), b text);
alter table utrtest attach partition remp for values in (1);
alter table utrtest attach partition locp for values in (2);
insert into utrtest values (1, 'foo');
insert into utrtest values (2, 'qux');
select tableoid::regclass, * FROM utrtest;
tableoid | a | b
----------+---+-----
remp | 1 | foo
locp | 2 | qux
(2 rows)
select tableoid::regclass, * FROM remp;
tableoid | a | b
----------+---+-----
remp | 1 | foo
(1 row)
select tableoid::regclass, * FROM locp;
tableoid | a | b
----------+---+-----
locp | 2 | qux
(1 row)
-- It's not allowed to move a row from a partition that is foreign to another
update utrtest set a = 2 where b = 'foo' returning *;
ERROR: new row for relation "loct" violates check constraint "loct_a_check"
DETAIL: Failing row contains (2, foo).
postgres_fdw: suppress casts on constants in limited cases. When deparsing an expression of the form "remote_var OP constant", we'd normally apply a cast to the constant to make sure that the remote parser thinks it's of the same type we do. However, doing so is often not necessary, and it causes problems if the user has intentionally declared the local column as being of a different type than the remote column. A plausible use-case for that is using text to represent a type that's an enum on the remote side. A comparison on such a column will get shipped as "var = 'foo'::text", which blows up on the remote side because there's no enum = text operator. But if we simply leave off the explicit cast, the comparison will do exactly what the user wants. It's possible to do this without major risk of semantic problems, by relying on the longstanding parser heuristic that "if one operand of an operator is of type unknown, while the other one has a known type, assume that the unknown operand is also of that type". Hence, this patch leaves off the cast only if (a) the operator inputs have the same type locally; (b) the constant will print as a string literal or NULL, both of which are initially taken as type unknown; and (c) the non-Const input is a plain foreign Var. Rule (c) guarantees that the remote parser will know the type of the non-Const input; moreover, it means that if this cast-omission does cause any semantic surprises, that can only happen in cases where the local column has a different type than the remote column. That wasn't guaranteed to work anyway, and this patch should represent a net usability gain for such cases. One point that I (tgl) remain slightly uncomfortable with is that we will ignore an implicit RelabelType when deciding if the non-Const input is a plain Var. That makes it a little squishy to argue that the remote should resolve the Const as being of the same type as its Var, because then our Const is not the same type as our Var. However, if we don't do that, then this hack won't work as desired if the user chooses to use varchar rather than text to represent some remote column. That seems useful, so do it like this for now. We might have to give up the RelabelType-ignoring bit if any problems surface. Dian Fay, with review and kibitzing by me Discussion: https://postgr.es/m/C9LU294V7K4F.34LRRDU449O45@lamia
2021-11-12 17:50:40 +01:00
CONTEXT: remote SQL command: UPDATE public.loct SET a = 2 WHERE ((b = 'foo')) RETURNING a, b
-- But the reverse is allowed
update utrtest set a = 1 where b = 'qux' returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
ERROR: cannot route tuples into foreign table to be updated "remp"
select tableoid::regclass, * FROM utrtest;
tableoid | a | b
----------+---+-----
remp | 1 | foo
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
locp | 2 | qux
(2 rows)
select tableoid::regclass, * FROM remp;
tableoid | a | b
----------+---+-----
remp | 1 | foo
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
(1 row)
select tableoid::regclass, * FROM locp;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
tableoid | a | b
----------+---+-----
locp | 2 | qux
(1 row)
-- The executor should not let unexercised FDWs shut down
update utrtest set a = 1 where b = 'foo';
-- Test that remote triggers work with update tuple routing
create trigger loct_br_insert_trigger before insert on loct
for each row execute procedure br_insert_trigfunc();
delete from utrtest;
insert into utrtest values (2, 'qux');
-- Check case where the foreign partition is a subplan target rel
explain (verbose, costs off)
update utrtest set a = 1 where a = 1 or a = 2 returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
----------------------------------------------------------------------------------------------------
Update on public.utrtest
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: utrtest_1.a, utrtest_1.b
Foreign Update on public.remp utrtest_1
Update on public.locp utrtest_2
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
-> Append
-> Foreign Update on public.remp utrtest_1
Remote SQL: UPDATE public.loct SET a = 1 WHERE (((a = 1) OR (a = 2))) RETURNING a, b
-> Seq Scan on public.locp utrtest_2
Output: 1, utrtest_2.tableoid, utrtest_2.ctid, NULL::record
Filter: ((utrtest_2.a = 1) OR (utrtest_2.a = 2))
(10 rows)
-- The new values are concatenated with ' triggered !'
update utrtest set a = 1 where a = 1 or a = 2 returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
ERROR: cannot route tuples into foreign table to be updated "remp"
delete from utrtest;
insert into utrtest values (2, 'qux');
-- Check case where the foreign partition isn't a subplan target rel
explain (verbose, costs off)
update utrtest set a = 1 where a = 2 returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
-------------------------------------------------------
Update on public.utrtest
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: utrtest_1.a, utrtest_1.b
Update on public.locp utrtest_1
-> Seq Scan on public.locp utrtest_1
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: 1, utrtest_1.tableoid, utrtest_1.ctid
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Filter: (utrtest_1.a = 2)
(6 rows)
-- The new values are concatenated with ' triggered !'
update utrtest set a = 1 where a = 2 returning *;
a | b
---+-----------------
1 | qux triggered !
(1 row)
drop trigger loct_br_insert_trigger on loct;
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
-- We can move rows to a foreign partition that has been updated already,
-- but can't move rows to a foreign partition that hasn't been updated yet
delete from utrtest;
insert into utrtest values (1, 'foo');
insert into utrtest values (2, 'qux');
-- Test the former case:
-- with a direct modification plan
explain (verbose, costs off)
update utrtest set a = 1 returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
---------------------------------------------------------------------------
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
Update on public.utrtest
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: utrtest_1.a, utrtest_1.b
Foreign Update on public.remp utrtest_1
Update on public.locp utrtest_2
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
-> Append
-> Foreign Update on public.remp utrtest_1
Remote SQL: UPDATE public.loct SET a = 1 RETURNING a, b
-> Seq Scan on public.locp utrtest_2
Output: 1, utrtest_2.tableoid, utrtest_2.ctid, NULL::record
(9 rows)
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
update utrtest set a = 1 returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
ERROR: cannot route tuples into foreign table to be updated "remp"
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
delete from utrtest;
insert into utrtest values (1, 'foo');
insert into utrtest values (2, 'qux');
-- with a non-direct modification plan
explain (verbose, costs off)
update utrtest set a = 1 from (values (1), (2)) s(x) where a = s.x returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
------------------------------------------------------------------------------------------------
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
Update on public.utrtest
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: utrtest_1.a, utrtest_1.b, "*VALUES*".column1
Foreign Update on public.remp utrtest_1
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
Remote SQL: UPDATE public.loct SET a = $2 WHERE ctid = $1 RETURNING a, b
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Update on public.locp utrtest_2
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
-> Hash Join
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: 1, "*VALUES*".*, "*VALUES*".column1, utrtest.tableoid, utrtest.ctid, utrtest.*
Hash Cond: (utrtest.a = "*VALUES*".column1)
-> Append
-> Foreign Scan on public.remp utrtest_1
Output: utrtest_1.a, utrtest_1.tableoid, utrtest_1.ctid, utrtest_1.*
Remote SQL: SELECT a, b, ctid FROM public.loct FOR UPDATE
-> Seq Scan on public.locp utrtest_2
Output: utrtest_2.a, utrtest_2.tableoid, utrtest_2.ctid, NULL::record
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
-> Hash
Output: "*VALUES*".*, "*VALUES*".column1
-> Values Scan on "*VALUES*"
Output: "*VALUES*".*, "*VALUES*".column1
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
(18 rows)
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
update utrtest set a = 1 from (values (1), (2)) s(x) where a = s.x returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
ERROR: cannot route tuples into foreign table to be updated "remp"
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
-- Change the definition of utrtest so that the foreign partition get updated
-- after the local partition
delete from utrtest;
alter table utrtest detach partition remp;
drop foreign table remp;
alter table loct drop constraint loct_a_check;
alter table loct add check (a in (3));
create foreign table remp (a int check (a in (3)), b text) server loopback options (table_name 'loct');
alter table utrtest attach partition remp for values in (3);
insert into utrtest values (2, 'qux');
insert into utrtest values (3, 'xyzzy');
-- Test the latter case:
-- with a direct modification plan
explain (verbose, costs off)
update utrtest set a = 3 returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
---------------------------------------------------------------------------
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
Update on public.utrtest
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: utrtest_1.a, utrtest_1.b
Update on public.locp utrtest_1
Foreign Update on public.remp utrtest_2
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
-> Append
-> Seq Scan on public.locp utrtest_1
Output: 3, utrtest_1.tableoid, utrtest_1.ctid, NULL::record
-> Foreign Update on public.remp utrtest_2
Remote SQL: UPDATE public.loct SET a = 3 RETURNING a, b
(9 rows)
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
update utrtest set a = 3 returning *; -- ERROR
ERROR: cannot route tuples into foreign table to be updated "remp"
-- with a non-direct modification plan
explain (verbose, costs off)
update utrtest set a = 3 from (values (2), (3)) s(x) where a = s.x returning *;
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
QUERY PLAN
-----------------------------------------------------------------------------------------------------
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
Update on public.utrtest
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: utrtest_1.a, utrtest_1.b, "*VALUES*".column1
Update on public.locp utrtest_1
Foreign Update on public.remp utrtest_2
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
Remote SQL: UPDATE public.loct SET a = $2 WHERE ctid = $1 RETURNING a, b
-> Hash Join
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
Output: 3, "*VALUES*".*, "*VALUES*".column1, utrtest.tableoid, utrtest.ctid, (NULL::record)
Hash Cond: (utrtest.a = "*VALUES*".column1)
-> Append
-> Seq Scan on public.locp utrtest_1
Output: utrtest_1.a, utrtest_1.tableoid, utrtest_1.ctid, NULL::record
-> Foreign Scan on public.remp utrtest_2
Output: utrtest_2.a, utrtest_2.tableoid, utrtest_2.ctid, utrtest_2.*
Remote SQL: SELECT a, b, ctid FROM public.loct FOR UPDATE
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
-> Hash
Output: "*VALUES*".*, "*VALUES*".column1
-> Values Scan on "*VALUES*"
Output: "*VALUES*".*, "*VALUES*".column1
Rework planning and execution of UPDATE and DELETE. This patch makes two closely related sets of changes: 1. For UPDATE, the subplan of the ModifyTable node now only delivers the new values of the changed columns (i.e., the expressions computed in the query's SET clause) plus row identity information such as CTID. ModifyTable must re-fetch the original tuple to merge in the old values of any unchanged columns. The core advantage of this is that the changed columns are uniform across all tables of an inherited or partitioned target relation, whereas the other columns might not be. A secondary advantage, when the UPDATE involves joins, is that less data needs to pass through the plan tree. The disadvantage of course is an extra fetch of each tuple to be updated. However, that seems to be very nearly free in context; even worst-case tests don't show it to add more than a couple percent to the total query cost. At some point it might be interesting to combine the re-fetch with the tuple access that ModifyTable must do anyway to mark the old tuple dead; but that would require a good deal of refactoring and it seems it wouldn't buy all that much, so this patch doesn't attempt it. 2. For inherited UPDATE/DELETE, instead of generating a separate subplan for each target relation, we now generate a single subplan that is just exactly like a SELECT's plan, then stick ModifyTable on top of that. To let ModifyTable know which target relation a given incoming row refers to, a tableoid junk column is added to the row identity information. This gets rid of the horrid hack that was inheritance_planner(), eliminating O(N^2) planning cost and memory consumption in cases where there were many unprunable target relations. Point 2 of course requires point 1, so that there is a uniform definition of the non-junk columns to be returned by the subplan. We can't insist on uniform definition of the row identity junk columns however, if we want to keep the ability to have both plain and foreign tables in a partitioning hierarchy. Since it wouldn't scale very far to have every child table have its own row identity column, this patch includes provisions to merge similar row identity columns into one column of the subplan result. In particular, we can merge the whole-row Vars typically used as row identity by FDWs into one column by pretending they are type RECORD. (It's still okay for the actual composite Datums to be labeled with the table's rowtype OID, though.) There is more that can be done to file down residual inefficiencies in this patch, but it seems to be committable now. FDW authors should note several API changes: * The argument list for AddForeignUpdateTargets() has changed, and so has the method it must use for adding junk columns to the query. Call add_row_identity_var() instead of manipulating the parse tree directly. You might want to reconsider exactly what you're adding, too. * PlanDirectModify() must now work a little harder to find the ForeignScan plan node; if the foreign table is part of a partitioning hierarchy then the ForeignScan might not be the direct child of ModifyTable. See postgres_fdw for sample code. * To check whether a relation is a target relation, it's no longer sufficient to compare its relid to root->parse->resultRelation. Instead, check it against all_result_relids or leaf_result_relids, as appropriate. Amit Langote and Tom Lane Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
(18 rows)
postgres_fdw: Fix incorrect handling of row movement for remote partitions. Commit 3d956d9562 added support for update row movement in postgres_fdw. This patch fixes the following issues introduced by that commit: * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel that would be updated later, the UPDATE that used a direct modification plan modified those routed rows incorrectly because those routed rows were visible to the later UPDATE command. The right fix for this would be to have some way in postgres_fdw in which the later UPDATE command ignores those routed rows, but it seems hard to do so with the current infrastructure. For now throw an error in that case. * When a remote partition chosen to insert routed rows into was also an UPDATE subplan target rel, fmstate created for the UPDATE that used a non-direct modification plan was mistakenly overridden by another fmstate created for inserting those routed rows into the partition. This caused 1) server crash when the partition would be updated later, and 2) resource leak when the partition had been already updated. To avoid that, adjust the treatment of the fmstate for the inserting. As for #1, since we would also have the incorrectness issue as mentioned above, error out in that case as well. Update the docs to mention that postgres_fdw currently does not handle the case where a remote partition chosen to insert a routed row into is also an UPDATE subplan target rel that will be updated later. Author: Amit Langote and Etsuro Fujita Reviewed-by: Amit Langote Backpatch-through: 11 where row movement in postgres_fdw was added Discussion: https://postgr.es/m/21e7eaa4-0d4d-20c2-a1f7-c7e96f4ce440@lab.ntt.co.jp
2019-04-24 11:31:50 +02:00
update utrtest set a = 3 from (values (2), (3)) s(x) where a = s.x returning *; -- ERROR
ERROR: cannot route tuples into foreign table to be updated "remp"
drop table utrtest;
drop table loct;
-- Test copy tuple routing
create table ctrtest (a int, b text) partition by list (a);
create table loct1 (a int check (a in (1)), b text);
create foreign table remp1 (a int check (a in (1)), b text) server loopback options (table_name 'loct1');
create table loct2 (a int check (a in (2)), b text);
create foreign table remp2 (b text, a int check (a in (2))) server loopback options (table_name 'loct2');
alter table ctrtest attach partition remp1 for values in (1);
alter table ctrtest attach partition remp2 for values in (2);
copy ctrtest from stdin;
select tableoid::regclass, * FROM ctrtest;
tableoid | a | b
----------+---+-----
remp1 | 1 | foo
remp2 | 2 | qux
(2 rows)
select tableoid::regclass, * FROM remp1;
tableoid | a | b
----------+---+-----
remp1 | 1 | foo
(1 row)
select tableoid::regclass, * FROM remp2;
tableoid | b | a
----------+-----+---
remp2 | qux | 2
(1 row)
-- Copying into foreign partitions directly should work as well
copy remp1 from stdin;
select tableoid::regclass, * FROM remp1;
tableoid | a | b
----------+---+-----
remp1 | 1 | foo
remp1 | 1 | bar
(2 rows)
2022-10-13 11:45:00 +02:00
delete from ctrtest;
-- Test copy tuple routing with the batch_size option enabled
alter server loopback options (add batch_size '2');
copy ctrtest from stdin;
select tableoid::regclass, * FROM ctrtest;
tableoid | a | b
----------+---+-------
remp1 | 1 | foo
remp1 | 1 | bar
remp1 | 1 | test1
remp2 | 2 | baz
remp2 | 2 | qux
remp2 | 2 | test2
(6 rows)
select tableoid::regclass, * FROM remp1;
tableoid | a | b
----------+---+-------
remp1 | 1 | foo
remp1 | 1 | bar
remp1 | 1 | test1
(3 rows)
select tableoid::regclass, * FROM remp2;
tableoid | b | a
----------+-------+---
remp2 | baz | 2
remp2 | qux | 2
remp2 | test2 | 2
(3 rows)
delete from ctrtest;
alter server loopback options (drop batch_size);
drop table ctrtest;
drop table loct1;
drop table loct2;
-- ===================================================================
-- test COPY FROM
-- ===================================================================
create table loc2 (f1 int, f2 text);
alter table loc2 set (autovacuum_enabled = 'false');
create foreign table rem2 (f1 int, f2 text) server loopback options(table_name 'loc2');
-- Test basic functionality
copy rem2 from stdin;
select * from rem2;
f1 | f2
----+-----
1 | foo
2 | bar
(2 rows)
delete from rem2;
-- Test check constraints
alter table loc2 add constraint loc2_f1positive check (f1 >= 0);
alter foreign table rem2 add constraint rem2_f1positive check (f1 >= 0);
-- check constraint is enforced on the remote side, not locally
copy rem2 from stdin;
copy rem2 from stdin; -- ERROR
ERROR: new row for relation "loc2" violates check constraint "loc2_f1positive"
DETAIL: Failing row contains (-1, xyzzy).
CONTEXT: remote SQL command: INSERT INTO public.loc2(f1, f2) VALUES ($1, $2)
COPY rem2, line 1: "-1 xyzzy"
select * from rem2;
f1 | f2
----+-----
1 | foo
2 | bar
(2 rows)
alter foreign table rem2 drop constraint rem2_f1positive;
alter table loc2 drop constraint loc2_f1positive;
delete from rem2;
-- Test local triggers
create trigger trig_stmt_before before insert on rem2
for each statement execute procedure trigger_func();
create trigger trig_stmt_after after insert on rem2
for each statement execute procedure trigger_func();
create trigger trig_row_before before insert on rem2
for each row execute procedure trigger_data(23,'skidoo');
create trigger trig_row_after after insert on rem2
for each row execute procedure trigger_data(23,'skidoo');
copy rem2 from stdin;
NOTICE: trigger_func(<NULL>) called: action = INSERT, when = BEFORE, level = STATEMENT
NOTICE: trig_row_before(23, skidoo) BEFORE ROW INSERT ON rem2
NOTICE: NEW: (1,foo)
NOTICE: trig_row_before(23, skidoo) BEFORE ROW INSERT ON rem2
NOTICE: NEW: (2,bar)
NOTICE: trig_row_after(23, skidoo) AFTER ROW INSERT ON rem2
NOTICE: NEW: (1,foo)
NOTICE: trig_row_after(23, skidoo) AFTER ROW INSERT ON rem2
NOTICE: NEW: (2,bar)
NOTICE: trigger_func(<NULL>) called: action = INSERT, when = AFTER, level = STATEMENT
select * from rem2;
f1 | f2
----+-----
1 | foo
2 | bar
(2 rows)
drop trigger trig_row_before on rem2;
drop trigger trig_row_after on rem2;
drop trigger trig_stmt_before on rem2;
drop trigger trig_stmt_after on rem2;
delete from rem2;
create trigger trig_row_before_insert before insert on rem2
for each row execute procedure trig_row_before_insupdate();
-- The new values are concatenated with ' triggered !'
copy rem2 from stdin;
select * from rem2;
f1 | f2
----+-----------------
1 | foo triggered !
2 | bar triggered !
(2 rows)
drop trigger trig_row_before_insert on rem2;
delete from rem2;
create trigger trig_null before insert on rem2
for each row execute procedure trig_null();
-- Nothing happens
copy rem2 from stdin;
select * from rem2;
f1 | f2
----+----
(0 rows)
drop trigger trig_null on rem2;
delete from rem2;
-- Test remote triggers
create trigger trig_row_before_insert before insert on loc2
for each row execute procedure trig_row_before_insupdate();
-- The new values are concatenated with ' triggered !'
copy rem2 from stdin;
select * from rem2;
f1 | f2
----+-----------------
1 | foo triggered !
2 | bar triggered !
(2 rows)
drop trigger trig_row_before_insert on loc2;
delete from rem2;
create trigger trig_null before insert on loc2
for each row execute procedure trig_null();
-- Nothing happens
copy rem2 from stdin;
select * from rem2;
f1 | f2
----+----
(0 rows)
drop trigger trig_null on loc2;
delete from rem2;
-- Test a combination of local and remote triggers
create trigger rem2_trig_row_before before insert on rem2
for each row execute procedure trigger_data(23,'skidoo');
create trigger rem2_trig_row_after after insert on rem2
for each row execute procedure trigger_data(23,'skidoo');
create trigger loc2_trig_row_before_insert before insert on loc2
for each row execute procedure trig_row_before_insupdate();
copy rem2 from stdin;
NOTICE: rem2_trig_row_before(23, skidoo) BEFORE ROW INSERT ON rem2
NOTICE: NEW: (1,foo)
NOTICE: rem2_trig_row_before(23, skidoo) BEFORE ROW INSERT ON rem2
NOTICE: NEW: (2,bar)
NOTICE: rem2_trig_row_after(23, skidoo) AFTER ROW INSERT ON rem2
NOTICE: NEW: (1,"foo triggered !")
NOTICE: rem2_trig_row_after(23, skidoo) AFTER ROW INSERT ON rem2
NOTICE: NEW: (2,"bar triggered !")
select * from rem2;
f1 | f2
----+-----------------
1 | foo triggered !
2 | bar triggered !
(2 rows)
drop trigger rem2_trig_row_before on rem2;
drop trigger rem2_trig_row_after on rem2;
drop trigger loc2_trig_row_before_insert on loc2;
delete from rem2;
-- test COPY FROM with foreign table created in the same transaction
create table loc3 (f1 int, f2 text);
begin;
create foreign table rem3 (f1 int, f2 text)
server loopback options(table_name 'loc3');
copy rem3 from stdin;
commit;
select * from rem3;
f1 | f2
----+-----
1 | foo
2 | bar
(2 rows)
drop foreign table rem3;
drop table loc3;
2022-10-13 11:45:00 +02:00
-- Test COPY FROM with the batch_size option enabled
alter server loopback options (add batch_size '2');
-- Test basic functionality
copy rem2 from stdin;
select * from rem2;
f1 | f2
----+-----
1 | foo
2 | bar
3 | baz
(3 rows)
delete from rem2;
-- Test check constraints
alter table loc2 add constraint loc2_f1positive check (f1 >= 0);
alter foreign table rem2 add constraint rem2_f1positive check (f1 >= 0);
-- check constraint is enforced on the remote side, not locally
copy rem2 from stdin;
copy rem2 from stdin; -- ERROR
ERROR: new row for relation "loc2" violates check constraint "loc2_f1positive"
DETAIL: Failing row contains (-1, xyzzy).
CONTEXT: remote SQL command: INSERT INTO public.loc2(f1, f2) VALUES ($1, $2)
COPY rem2
select * from rem2;
f1 | f2
----+-----
1 | foo
2 | bar
3 | baz
(3 rows)
alter foreign table rem2 drop constraint rem2_f1positive;
alter table loc2 drop constraint loc2_f1positive;
delete from rem2;
-- Test remote triggers
create trigger trig_row_before_insert before insert on loc2
for each row execute procedure trig_row_before_insupdate();
-- The new values are concatenated with ' triggered !'
copy rem2 from stdin;
select * from rem2;
f1 | f2
----+-----------------
1 | foo triggered !
2 | bar triggered !
3 | baz triggered !
(3 rows)
drop trigger trig_row_before_insert on loc2;
delete from rem2;
create trigger trig_null before insert on loc2
for each row execute procedure trig_null();
-- Nothing happens
copy rem2 from stdin;
select * from rem2;
f1 | f2
----+----
(0 rows)
drop trigger trig_null on loc2;
delete from rem2;
-- Check with zero-column foreign table; batch insert will be disabled
alter table loc2 drop column f1;
alter table loc2 drop column f2;
alter table rem2 drop column f1;
alter table rem2 drop column f2;
copy rem2 from stdin;
select * from rem2;
--
(3 rows)
delete from rem2;
alter server loopback options (drop batch_size);
-- ===================================================================
-- test for TRUNCATE
-- ===================================================================
CREATE TABLE tru_rtable0 (id int primary key);
CREATE FOREIGN TABLE tru_ftable (id int)
SERVER loopback OPTIONS (table_name 'tru_rtable0');
INSERT INTO tru_rtable0 (SELECT x FROM generate_series(1,10) x);
CREATE TABLE tru_ptable (id int) PARTITION BY HASH(id);
CREATE TABLE tru_ptable__p0 PARTITION OF tru_ptable
FOR VALUES WITH (MODULUS 2, REMAINDER 0);
CREATE TABLE tru_rtable1 (id int primary key);
CREATE FOREIGN TABLE tru_ftable__p1 PARTITION OF tru_ptable
FOR VALUES WITH (MODULUS 2, REMAINDER 1)
SERVER loopback OPTIONS (table_name 'tru_rtable1');
INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);
CREATE TABLE tru_pk_table(id int primary key);
CREATE TABLE tru_fk_table(fkey int references tru_pk_table(id));
INSERT INTO tru_pk_table (SELECT x FROM generate_series(1,10) x);
INSERT INTO tru_fk_table (SELECT x % 10 + 1 FROM generate_series(5,25) x);
CREATE FOREIGN TABLE tru_pk_ftable (id int)
SERVER loopback OPTIONS (table_name 'tru_pk_table');
CREATE TABLE tru_rtable_parent (id int);
CREATE TABLE tru_rtable_child (id int);
CREATE FOREIGN TABLE tru_ftable_parent (id int)
SERVER loopback OPTIONS (table_name 'tru_rtable_parent');
CREATE FOREIGN TABLE tru_ftable_child () INHERITS (tru_ftable_parent)
SERVER loopback OPTIONS (table_name 'tru_rtable_child');
INSERT INTO tru_rtable_parent (SELECT x FROM generate_series(1,8) x);
INSERT INTO tru_rtable_child (SELECT x FROM generate_series(10, 18) x);
-- normal truncate
SELECT sum(id) FROM tru_ftable; -- 55
sum
-----
55
(1 row)
TRUNCATE tru_ftable;
SELECT count(*) FROM tru_rtable0; -- 0
count
-------
0
(1 row)
SELECT count(*) FROM tru_ftable; -- 0
count
-------
0
(1 row)
-- 'truncatable' option
ALTER SERVER loopback OPTIONS (ADD truncatable 'false');
TRUNCATE tru_ftable; -- error
ERROR: foreign table "tru_ftable" does not allow truncates
ALTER FOREIGN TABLE tru_ftable OPTIONS (ADD truncatable 'true');
TRUNCATE tru_ftable; -- accepted
ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
TRUNCATE tru_ftable; -- error
ERROR: foreign table "tru_ftable" does not allow truncates
ALTER SERVER loopback OPTIONS (DROP truncatable);
ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'false');
TRUNCATE tru_ftable; -- error
ERROR: foreign table "tru_ftable" does not allow truncates
ALTER FOREIGN TABLE tru_ftable OPTIONS (SET truncatable 'true');
TRUNCATE tru_ftable; -- accepted
-- partitioned table with both local and foreign tables as partitions
SELECT sum(id) FROM tru_ptable; -- 155
sum
-----
155
(1 row)
TRUNCATE tru_ptable;
SELECT count(*) FROM tru_ptable; -- 0
count
-------
0
(1 row)
SELECT count(*) FROM tru_ptable__p0; -- 0
count
-------
0
(1 row)
SELECT count(*) FROM tru_ftable__p1; -- 0
count
-------
0
(1 row)
SELECT count(*) FROM tru_rtable1; -- 0
count
-------
0
(1 row)
-- 'CASCADE' option
SELECT sum(id) FROM tru_pk_ftable; -- 55
sum
-----
55
(1 row)
TRUNCATE tru_pk_ftable; -- failed by FK reference
ERROR: cannot truncate a table referenced in a foreign key constraint
DETAIL: Table "tru_fk_table" references "tru_pk_table".
HINT: Truncate table "tru_fk_table" at the same time, or use TRUNCATE ... CASCADE.
CONTEXT: remote SQL command: TRUNCATE public.tru_pk_table CONTINUE IDENTITY RESTRICT
TRUNCATE tru_pk_ftable CASCADE;
SELECT count(*) FROM tru_pk_ftable; -- 0
count
-------
0
(1 row)
SELECT count(*) FROM tru_fk_table; -- also truncated,0
count
-------
0
(1 row)
-- truncate two tables at a command
INSERT INTO tru_ftable (SELECT x FROM generate_series(1,8) x);
INSERT INTO tru_pk_ftable (SELECT x FROM generate_series(3,10) x);
SELECT count(*) from tru_ftable; -- 8
count
-------
8
(1 row)
SELECT count(*) from tru_pk_ftable; -- 8
count
-------
8
(1 row)
TRUNCATE tru_ftable, tru_pk_ftable CASCADE;
SELECT count(*) from tru_ftable; -- 0
count
-------
0
(1 row)
SELECT count(*) from tru_pk_ftable; -- 0
count
-------
0
(1 row)
-- truncate with ONLY clause
-- Since ONLY is specified, the table tru_ftable_child that inherits
-- tru_ftable_parent locally is not truncated.
TRUNCATE ONLY tru_ftable_parent;
SELECT sum(id) FROM tru_ftable_parent; -- 126
sum
-----
126
(1 row)
TRUNCATE tru_ftable_parent;
SELECT count(*) FROM tru_ftable_parent; -- 0
count
-------
0
(1 row)
-- in case when remote table has inherited children
CREATE TABLE tru_rtable0_child () INHERITS (tru_rtable0);
INSERT INTO tru_rtable0 (SELECT x FROM generate_series(5,9) x);
INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(10,14) x);
SELECT sum(id) FROM tru_ftable; -- 95
sum
-----
95
(1 row)
-- Both parent and child tables in the foreign server are truncated
-- even though ONLY is specified because ONLY has no effect
-- when truncating a foreign table.
TRUNCATE ONLY tru_ftable;
SELECT count(*) FROM tru_ftable; -- 0
count
-------
0
(1 row)
INSERT INTO tru_rtable0 (SELECT x FROM generate_series(21,25) x);
INSERT INTO tru_rtable0_child (SELECT x FROM generate_series(26,30) x);
SELECT sum(id) FROM tru_ftable; -- 255
sum
-----
255
(1 row)
TRUNCATE tru_ftable; -- truncate both of parent and child
SELECT count(*) FROM tru_ftable; -- 0
count
-------
0
(1 row)
-- cleanup
DROP FOREIGN TABLE tru_ftable_parent, tru_ftable_child, tru_pk_ftable,tru_ftable__p1,tru_ftable;
DROP TABLE tru_rtable0, tru_rtable1, tru_ptable, tru_ptable__p0, tru_pk_table, tru_fk_table,
tru_rtable_parent,tru_rtable_child, tru_rtable0_child;
-- ===================================================================
-- test IMPORT FOREIGN SCHEMA
-- ===================================================================
CREATE SCHEMA import_source;
CREATE TABLE import_source.t1 (c1 int, c2 varchar NOT NULL);
CREATE TABLE import_source.t2 (c1 int default 42, c2 varchar NULL, c3 text collate "POSIX");
CREATE TYPE typ1 AS (m1 int, m2 varchar);
CREATE TABLE import_source.t3 (c1 timestamptz default now(), c2 typ1);
CREATE TABLE import_source."x 4" (c1 float8, "C 2" text, c3 varchar(42));
CREATE TABLE import_source."x 5" (c1 float8);
ALTER TABLE import_source."x 5" DROP COLUMN c1;
CREATE TABLE import_source."x 6" (c1 int, c2 int generated always as (c1 * 2) stored);
CREATE TABLE import_source.t4 (c1 int) PARTITION BY RANGE (c1);
CREATE TABLE import_source.t4_part PARTITION OF import_source.t4
FOR VALUES FROM (1) TO (100);
CREATE TABLE import_source.t4_part2 PARTITION OF import_source.t4
FOR VALUES FROM (100) TO (200);
CREATE SCHEMA import_dest1;
IMPORT FOREIGN SCHEMA import_source FROM SERVER loopback INTO import_dest1;
\det+ import_dest1.*
List of foreign tables
Schema | Table | Server | FDW options | Description
--------------+-------+----------+-------------------------------------------------+-------------
import_dest1 | t1 | loopback | (schema_name 'import_source', table_name 't1') |
import_dest1 | t2 | loopback | (schema_name 'import_source', table_name 't2') |
import_dest1 | t3 | loopback | (schema_name 'import_source', table_name 't3') |
import_dest1 | t4 | loopback | (schema_name 'import_source', table_name 't4') |
import_dest1 | x 4 | loopback | (schema_name 'import_source', table_name 'x 4') |
import_dest1 | x 5 | loopback | (schema_name 'import_source', table_name 'x 5') |
import_dest1 | x 6 | loopback | (schema_name 'import_source', table_name 'x 6') |
(7 rows)
\d import_dest1.*
Foreign table "import_dest1.t1"
Column | Type | Collation | Nullable | Default | FDW options
--------+-------------------+-----------+----------+---------+--------------------
c1 | integer | | | | (column_name 'c1')
c2 | character varying | | not null | | (column_name 'c2')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't1')
Foreign table "import_dest1.t2"
Column | Type | Collation | Nullable | Default | FDW options
--------+-------------------+-----------+----------+---------+--------------------
c1 | integer | | | | (column_name 'c1')
c2 | character varying | | | | (column_name 'c2')
c3 | text | POSIX | | | (column_name 'c3')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't2')
Foreign table "import_dest1.t3"
Column | Type | Collation | Nullable | Default | FDW options
--------+--------------------------+-----------+----------+---------+--------------------
c1 | timestamp with time zone | | | | (column_name 'c1')
c2 | typ1 | | | | (column_name 'c2')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't3')
Foreign table "import_dest1.t4"
Column | Type | Collation | Nullable | Default | FDW options
--------+---------+-----------+----------+---------+--------------------
c1 | integer | | | | (column_name 'c1')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't4')
Foreign table "import_dest1.x 4"
Column | Type | Collation | Nullable | Default | FDW options
--------+-----------------------+-----------+----------+---------+---------------------
c1 | double precision | | | | (column_name 'c1')
C 2 | text | | | | (column_name 'C 2')
c3 | character varying(42) | | | | (column_name 'c3')
Server: loopback
FDW options: (schema_name 'import_source', table_name 'x 4')
Foreign table "import_dest1.x 5"
Column | Type | Collation | Nullable | Default | FDW options
--------+------+-----------+----------+---------+-------------
Server: loopback
FDW options: (schema_name 'import_source', table_name 'x 5')
Foreign table "import_dest1.x 6"
Column | Type | Collation | Nullable | Default | FDW options
--------+---------+-----------+----------+-------------------------------------+--------------------
c1 | integer | | | | (column_name 'c1')
c2 | integer | | | generated always as (c1 * 2) stored | (column_name 'c2')
Server: loopback
FDW options: (schema_name 'import_source', table_name 'x 6')
-- Options
CREATE SCHEMA import_dest2;
IMPORT FOREIGN SCHEMA import_source FROM SERVER loopback INTO import_dest2
OPTIONS (import_default 'true');
\det+ import_dest2.*
List of foreign tables
Schema | Table | Server | FDW options | Description
--------------+-------+----------+-------------------------------------------------+-------------
import_dest2 | t1 | loopback | (schema_name 'import_source', table_name 't1') |
import_dest2 | t2 | loopback | (schema_name 'import_source', table_name 't2') |
import_dest2 | t3 | loopback | (schema_name 'import_source', table_name 't3') |
import_dest2 | t4 | loopback | (schema_name 'import_source', table_name 't4') |
import_dest2 | x 4 | loopback | (schema_name 'import_source', table_name 'x 4') |
import_dest2 | x 5 | loopback | (schema_name 'import_source', table_name 'x 5') |
import_dest2 | x 6 | loopback | (schema_name 'import_source', table_name 'x 6') |
(7 rows)
\d import_dest2.*
Foreign table "import_dest2.t1"
Column | Type | Collation | Nullable | Default | FDW options
--------+-------------------+-----------+----------+---------+--------------------
c1 | integer | | | | (column_name 'c1')
c2 | character varying | | not null | | (column_name 'c2')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't1')
Foreign table "import_dest2.t2"
Column | Type | Collation | Nullable | Default | FDW options
--------+-------------------+-----------+----------+---------+--------------------
c1 | integer | | | 42 | (column_name 'c1')
c2 | character varying | | | | (column_name 'c2')
c3 | text | POSIX | | | (column_name 'c3')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't2')
Foreign table "import_dest2.t3"
Column | Type | Collation | Nullable | Default | FDW options
--------+--------------------------+-----------+----------+---------+--------------------
c1 | timestamp with time zone | | | now() | (column_name 'c1')
c2 | typ1 | | | | (column_name 'c2')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't3')
Foreign table "import_dest2.t4"
Column | Type | Collation | Nullable | Default | FDW options
--------+---------+-----------+----------+---------+--------------------
c1 | integer | | | | (column_name 'c1')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't4')
Foreign table "import_dest2.x 4"
Column | Type | Collation | Nullable | Default | FDW options
--------+-----------------------+-----------+----------+---------+---------------------
c1 | double precision | | | | (column_name 'c1')
C 2 | text | | | | (column_name 'C 2')
c3 | character varying(42) | | | | (column_name 'c3')
Server: loopback
FDW options: (schema_name 'import_source', table_name 'x 4')
Foreign table "import_dest2.x 5"
Column | Type | Collation | Nullable | Default | FDW options
--------+------+-----------+----------+---------+-------------
Server: loopback
FDW options: (schema_name 'import_source', table_name 'x 5')
Foreign table "import_dest2.x 6"
Column | Type | Collation | Nullable | Default | FDW options
--------+---------+-----------+----------+-------------------------------------+--------------------
c1 | integer | | | | (column_name 'c1')
c2 | integer | | | generated always as (c1 * 2) stored | (column_name 'c2')
Server: loopback
FDW options: (schema_name 'import_source', table_name 'x 6')
CREATE SCHEMA import_dest3;
IMPORT FOREIGN SCHEMA import_source FROM SERVER loopback INTO import_dest3
OPTIONS (import_collate 'false', import_generated 'false', import_not_null 'false');
\det+ import_dest3.*
List of foreign tables
Schema | Table | Server | FDW options | Description
--------------+-------+----------+-------------------------------------------------+-------------
import_dest3 | t1 | loopback | (schema_name 'import_source', table_name 't1') |
import_dest3 | t2 | loopback | (schema_name 'import_source', table_name 't2') |
import_dest3 | t3 | loopback | (schema_name 'import_source', table_name 't3') |
import_dest3 | t4 | loopback | (schema_name 'import_source', table_name 't4') |
import_dest3 | x 4 | loopback | (schema_name 'import_source', table_name 'x 4') |
import_dest3 | x 5 | loopback | (schema_name 'import_source', table_name 'x 5') |
import_dest3 | x 6 | loopback | (schema_name 'import_source', table_name 'x 6') |
(7 rows)
\d import_dest3.*
Foreign table "import_dest3.t1"
Column | Type | Collation | Nullable | Default | FDW options
--------+-------------------+-----------+----------+---------+--------------------
c1 | integer | | | | (column_name 'c1')
c2 | character varying | | | | (column_name 'c2')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't1')
Foreign table "import_dest3.t2"
Column | Type | Collation | Nullable | Default | FDW options
--------+-------------------+-----------+----------+---------+--------------------
c1 | integer | | | | (column_name 'c1')
c2 | character varying | | | | (column_name 'c2')
c3 | text | | | | (column_name 'c3')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't2')
Foreign table "import_dest3.t3"
Column | Type | Collation | Nullable | Default | FDW options
--------+--------------------------+-----------+----------+---------+--------------------
c1 | timestamp with time zone | | | | (column_name 'c1')
c2 | typ1 | | | | (column_name 'c2')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't3')
Foreign table "import_dest3.t4"
Column | Type | Collation | Nullable | Default | FDW options
--------+---------+-----------+----------+---------+--------------------
c1 | integer | | | | (column_name 'c1')
Server: loopback
FDW options: (schema_name 'import_source', table_name 't4')
Foreign table "import_dest3.x 4"
Column | Type | Collation | Nullable | Default | FDW options
--------+-----------------------+-----------+----------+---------+---------------------
c1 | double precision | | | | (column_name 'c1')
C 2 | text | | | | (column_name 'C 2')
c3 | character varying(42) | | | | (column_name 'c3')
Server: loopback
FDW options: (schema_name 'import_source', table_name 'x 4')
Foreign table "import_dest3.x 5"
Column | Type | Collation | Nullable | Default | FDW options
--------+------+-----------+----------+---------+-------------
Server: loopback
FDW options: (schema_name 'import_source', table_name 'x 5')
Foreign table "import_dest3.x 6"
Column | Type | Collation | Nullable | Default | FDW options
--------+---------+-----------+----------+---------+--------------------
c1 | integer | | | | (column_name 'c1')
c2 | integer | | | | (column_name 'c2')
Server: loopback
FDW options: (schema_name 'import_source', table_name 'x 6')
-- Check LIMIT TO and EXCEPT
CREATE SCHEMA import_dest4;
IMPORT FOREIGN SCHEMA import_source LIMIT TO (t1, nonesuch, t4_part)
FROM SERVER loopback INTO import_dest4;
\det+ import_dest4.*
List of foreign tables
Schema | Table | Server | FDW options | Description
--------------+---------+----------+-----------------------------------------------------+-------------
import_dest4 | t1 | loopback | (schema_name 'import_source', table_name 't1') |
import_dest4 | t4_part | loopback | (schema_name 'import_source', table_name 't4_part') |
(2 rows)
IMPORT FOREIGN SCHEMA import_source EXCEPT (t1, "x 4", nonesuch, t4_part)
FROM SERVER loopback INTO import_dest4;
\det+ import_dest4.*
List of foreign tables
Schema | Table | Server | FDW options | Description
--------------+---------+----------+-----------------------------------------------------+-------------
import_dest4 | t1 | loopback | (schema_name 'import_source', table_name 't1') |
import_dest4 | t2 | loopback | (schema_name 'import_source', table_name 't2') |
import_dest4 | t3 | loopback | (schema_name 'import_source', table_name 't3') |
import_dest4 | t4 | loopback | (schema_name 'import_source', table_name 't4') |
import_dest4 | t4_part | loopback | (schema_name 'import_source', table_name 't4_part') |
import_dest4 | x 5 | loopback | (schema_name 'import_source', table_name 'x 5') |
import_dest4 | x 6 | loopback | (schema_name 'import_source', table_name 'x 6') |
(7 rows)
-- Assorted error cases
IMPORT FOREIGN SCHEMA import_source FROM SERVER loopback INTO import_dest4;
ERROR: relation "t1" already exists
CONTEXT: importing foreign table "t1"
IMPORT FOREIGN SCHEMA nonesuch FROM SERVER loopback INTO import_dest4;
ERROR: schema "nonesuch" is not present on foreign server "loopback"
IMPORT FOREIGN SCHEMA nonesuch FROM SERVER loopback INTO notthere;
ERROR: schema "notthere" does not exist
IMPORT FOREIGN SCHEMA nonesuch FROM SERVER nowhere INTO notthere;
ERROR: server "nowhere" does not exist
-- Check case of a type present only on the remote server.
-- We can fake this by dropping the type locally in our transaction.
CREATE TYPE "Colors" AS ENUM ('red', 'green', 'blue');
CREATE TABLE import_source.t5 (c1 int, c2 text collate "C", "Col" "Colors");
CREATE SCHEMA import_dest5;
BEGIN;
DROP TYPE "Colors" CASCADE;
NOTICE: drop cascades to column Col of table import_source.t5
IMPORT FOREIGN SCHEMA import_source LIMIT TO (t5)
FROM SERVER loopback INTO import_dest5; -- ERROR
ERROR: type "public.Colors" does not exist
LINE 4: "Col" public."Colors" OPTIONS (column_name 'Col')
^
QUERY: CREATE FOREIGN TABLE t5 (
c1 integer OPTIONS (column_name 'c1'),
c2 text OPTIONS (column_name 'c2') COLLATE pg_catalog."C",
"Col" public."Colors" OPTIONS (column_name 'Col')
) SERVER loopback
OPTIONS (schema_name 'import_source', table_name 't5');
CONTEXT: importing foreign table "t5"
ROLLBACK;
BEGIN;
CREATE SERVER fetch101 FOREIGN DATA WRAPPER postgres_fdw OPTIONS( fetch_size '101' );
SELECT count(*)
FROM pg_foreign_server
WHERE srvname = 'fetch101'
AND srvoptions @> array['fetch_size=101'];
count
-------
1
(1 row)
ALTER SERVER fetch101 OPTIONS( SET fetch_size '202' );
SELECT count(*)
FROM pg_foreign_server
WHERE srvname = 'fetch101'
AND srvoptions @> array['fetch_size=101'];
count
-------
0
(1 row)
SELECT count(*)
FROM pg_foreign_server
WHERE srvname = 'fetch101'
AND srvoptions @> array['fetch_size=202'];
count
-------
1
(1 row)
CREATE FOREIGN TABLE table30000 ( x int ) SERVER fetch101 OPTIONS ( fetch_size '30000' );
SELECT COUNT(*)
FROM pg_foreign_table
WHERE ftrelid = 'table30000'::regclass
AND ftoptions @> array['fetch_size=30000'];
count
-------
1
(1 row)
ALTER FOREIGN TABLE table30000 OPTIONS ( SET fetch_size '60000');
SELECT COUNT(*)
FROM pg_foreign_table
WHERE ftrelid = 'table30000'::regclass
AND ftoptions @> array['fetch_size=30000'];
count
-------
0
(1 row)
SELECT COUNT(*)
FROM pg_foreign_table
WHERE ftrelid = 'table30000'::regclass
AND ftoptions @> array['fetch_size=60000'];
count
-------
1
(1 row)
ROLLBACK;
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
-- ===================================================================
-- test partitionwise joins
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
-- ===================================================================
SET enable_partitionwise_join=on;
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
CREATE TABLE fprt1 (a int, b int, c varchar) PARTITION BY RANGE(a);
CREATE TABLE fprt1_p1 (LIKE fprt1);
CREATE TABLE fprt1_p2 (LIKE fprt1);
ALTER TABLE fprt1_p1 SET (autovacuum_enabled = 'false');
ALTER TABLE fprt1_p2 SET (autovacuum_enabled = 'false');
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
INSERT INTO fprt1_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 2) i;
INSERT INTO fprt1_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 2) i;
CREATE FOREIGN TABLE ftprt1_p1 PARTITION OF fprt1 FOR VALUES FROM (0) TO (250)
SERVER loopback OPTIONS (table_name 'fprt1_p1', use_remote_estimate 'true');
CREATE FOREIGN TABLE ftprt1_p2 PARTITION OF fprt1 FOR VALUES FROM (250) TO (500)
SERVER loopback OPTIONS (TABLE_NAME 'fprt1_p2');
ANALYZE fprt1;
ANALYZE fprt1_p1;
ANALYZE fprt1_p2;
CREATE TABLE fprt2 (a int, b int, c varchar) PARTITION BY RANGE(b);
CREATE TABLE fprt2_p1 (LIKE fprt2);
CREATE TABLE fprt2_p2 (LIKE fprt2);
ALTER TABLE fprt2_p1 SET (autovacuum_enabled = 'false');
ALTER TABLE fprt2_p2 SET (autovacuum_enabled = 'false');
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i;
INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i;
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
CREATE FOREIGN TABLE ftprt2_p1 (b int, c varchar, a int)
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true');
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
ALTER TABLE fprt2 ATTACH PARTITION ftprt2_p1 FOR VALUES FROM (0) TO (250);
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500)
SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true');
ANALYZE fprt2;
ANALYZE fprt2_p1;
ANALYZE fprt2_p2;
-- inner join three tables
EXPLAIN (COSTS OFF)
SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
Sort
Sort Key: t1.a, t3.c
-> Append
-> Foreign Scan
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
Relations: ((ftprt1_p1 t1_1) INNER JOIN (ftprt2_p1 t2_1)) INNER JOIN (ftprt1_p1 t3_1)
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
-> Foreign Scan
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
Relations: ((ftprt1_p2 t1_2) INNER JOIN (ftprt2_p2 t2_2)) INNER JOIN (ftprt1_p2 t3_2)
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
(7 rows)
SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
a | b | c
-----+-----+------
0 | 0 | 0000
150 | 150 | 0003
250 | 250 | 0005
400 | 400 | 0008
(4 rows)
-- left outer join + nullable clause
EXPLAIN (VERBOSE, COSTS OFF)
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Output: t1.a, fprt2.b, fprt2.c
Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10)) ORDER BY r5.a ASC NULLS LAST, r6.b ASC NULLS LAST, r6.c ASC NULLS LAST
(4 rows)
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
a | b | c
---+---+------
0 | 0 | 0000
2 | |
4 | |
6 | 6 | 0000
8 | |
(5 rows)
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
-- with whole-row reference; partitionwise join does not apply
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
EXPLAIN (COSTS OFF)
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2;
QUERY PLAN
--------------------------------------------------------
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
Sort
Sort Key: ((t1.*)::fprt1), ((t2.*)::fprt2)
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
-> Hash Full Join
Hash Cond: (t1.a = t2.b)
-> Append
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on ftprt1_p1 t1_1
-> Foreign Scan on ftprt1_p2 t1_2
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
-> Hash
-> Append
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on ftprt2_p1 t2_1
-> Foreign Scan on ftprt2_p2 t2_2
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
(11 rows)
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2;
wr | wr
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
----------------+----------------
(0,0,0000) | (0,0,0000)
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
(50,50,0001) |
(100,100,0002) |
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
(150,150,0003) | (150,150,0003)
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
(200,200,0004) |
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
(250,250,0005) | (250,250,0005)
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
(300,300,0006) |
(350,350,0007) |
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
(400,400,0008) | (400,400,0008)
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
(450,450,0009) |
| (75,75,0001)
| (225,225,0004)
| (325,325,0006)
| (475,475,0009)
(14 rows)
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
-- join with lateral reference
EXPLAIN (COSTS OFF)
SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
QUERY PLAN
-----------------------------------------------------------------------
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
Sort
Sort Key: t1.a, t1.b
-> Append
-> Foreign Scan
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
Relations: (ftprt1_p1 t1_1) INNER JOIN (ftprt2_p1 t2_1)
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
-> Foreign Scan
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
Relations: (ftprt1_p2 t1_2) INNER JOIN (ftprt2_p2 t2_2)
Basic partition-wise join functionality. Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
(7 rows)
SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
a | b
-----+-----
0 | 0
150 | 150
250 | 250
400 | 400
(4 rows)
-- with PHVs, partitionwise join selected but no join pushdown
EXPLAIN (COSTS OFF)
SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE a % 25 = 0) t1 FULL JOIN (SELECT 't2_phv' phv, * FROM fprt2 WHERE b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY t1.a, t2.b;
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
QUERY PLAN
-----------------------------------------------------------
Sort
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Sort Key: fprt1.a, fprt2.b
-> Append
-> Hash Full Join
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
Hash Cond: (fprt1_1.a = fprt2_1.b)
-> Foreign Scan on ftprt1_p1 fprt1_1
-> Hash
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on ftprt2_p1 fprt2_1
-> Hash Full Join
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
Hash Cond: (fprt1_2.a = fprt2_2.b)
-> Foreign Scan on ftprt1_p2 fprt1_2
-> Hash
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on ftprt2_p2 fprt2_2
(13 rows)
SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE a % 25 = 0) t1 FULL JOIN (SELECT 't2_phv' phv, * FROM fprt2 WHERE b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY t1.a, t2.b;
a | phv | b | phv
-----+--------+-----+--------
0 | t1_phv | 0 | t2_phv
50 | t1_phv | |
100 | t1_phv | |
150 | t1_phv | 150 | t2_phv
200 | t1_phv | |
250 | t1_phv | 250 | t2_phv
300 | t1_phv | |
350 | t1_phv | |
400 | t1_phv | 400 | t2_phv
450 | t1_phv | |
| | 75 | t2_phv
| | 225 | t2_phv
| | 325 | t2_phv
| | 475 | t2_phv
(14 rows)
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
-- test FOR UPDATE; partitionwise join does not apply
EXPLAIN (COSTS OFF)
SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
QUERY PLAN
--------------------------------------------------------------
LockRows
-> Sort
Sort Key: t1.a
-> Hash Join
Hash Cond: (t2.b = t1.a)
-> Append
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on ftprt2_p1 t2_1
-> Foreign Scan on ftprt2_p2 t2_2
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
-> Hash
-> Append
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on ftprt1_p1 t1_1
-> Foreign Scan on ftprt1_p2 t1_2
Disable support for partitionwise joins in problematic cases. Commit f49842d, which added support for partitionwise joins, built the child's tlist by applying adjust_appendrel_attrs() to the parent's. So in the case where the parent's included a whole-row Var for the parent, the child's contained a ConvertRowtypeExpr. To cope with that, that commit added code to the planner, such as setrefs.c, but some code paths still assumed that the tlist for a scan (or join) rel would only include Vars and PlaceHolderVars, which was true before that commit, causing errors: * When creating an explicit sort node for an input path for a mergejoin path for a child join, prepare_sort_from_pathkeys() threw the 'could not find pathkey item to sort' error. * When deparsing a relation participating in a pushed down child join as a subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw the 'unexpected expression in subquery output' error. * When performing set_plan_references() on a local join plan generated by contrib/postgres_fdw for EvalPlanQual support for a pushed down child join, fix_join_expr() threw the 'variable not found in subplan target lists' error. To fix these, two approaches have been proposed: one by Ashutosh Bapat and one by me. While the former keeps building the child's tlist with a ConvertRowtypeExpr, the latter builds it with a whole-row Var for the child not to violate the planner assumption, and tries to fix it up later, But both approaches need more work, so refuse to generate partitionwise join paths when whole-row Vars are involved, instead. We don't need to handle ConvertRowtypeExprs in the child's tlists for now, so this commit also removes the changes to the planner. Previously, partitionwise join computed attr_needed data for each child separately, and built the child join's tlist using that data, which also required an extra step for adding PlaceHolderVars to that tlist, but it would be more efficient to build it from the parent join's tlist through the adjust_appendrel_attrs() transformation. So this commit builds that list that way, and simplifies build_joinrel_tlist() and placeholder.c as well as part of set_append_rel_size() to basically what they were before partitionwise join went in. Back-patch to PG11 where partitionwise join was introduced. Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also provided some of regression tests. Patch by me, reviewed by Robert Haas. Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
(12 rows)
SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
a | b
-----+-----
0 | 0
150 | 150
250 | 250
400 | 400
(4 rows)
RESET enable_partitionwise_join;
-- ===================================================================
-- test partitionwise aggregates
-- ===================================================================
CREATE TABLE pagg_tab (a int, b int, c text) PARTITION BY RANGE(a);
CREATE TABLE pagg_tab_p1 (LIKE pagg_tab);
CREATE TABLE pagg_tab_p2 (LIKE pagg_tab);
CREATE TABLE pagg_tab_p3 (LIKE pagg_tab);
INSERT INTO pagg_tab_p1 SELECT i % 30, i % 50, to_char(i/30, 'FM0000') FROM generate_series(1, 3000) i WHERE (i % 30) < 10;
INSERT INTO pagg_tab_p2 SELECT i % 30, i % 50, to_char(i/30, 'FM0000') FROM generate_series(1, 3000) i WHERE (i % 30) < 20 and (i % 30) >= 10;
INSERT INTO pagg_tab_p3 SELECT i % 30, i % 50, to_char(i/30, 'FM0000') FROM generate_series(1, 3000) i WHERE (i % 30) < 30 and (i % 30) >= 20;
-- Create foreign partitions
CREATE FOREIGN TABLE fpagg_tab_p1 PARTITION OF pagg_tab FOR VALUES FROM (0) TO (10) SERVER loopback OPTIONS (table_name 'pagg_tab_p1');
CREATE FOREIGN TABLE fpagg_tab_p2 PARTITION OF pagg_tab FOR VALUES FROM (10) TO (20) SERVER loopback OPTIONS (table_name 'pagg_tab_p2');
CREATE FOREIGN TABLE fpagg_tab_p3 PARTITION OF pagg_tab FOR VALUES FROM (20) TO (30) SERVER loopback OPTIONS (table_name 'pagg_tab_p3');
ANALYZE pagg_tab;
ANALYZE fpagg_tab_p1;
ANALYZE fpagg_tab_p2;
ANALYZE fpagg_tab_p3;
-- When GROUP BY clause matches with PARTITION KEY.
-- Plan with partitionwise aggregates is disabled
SET enable_partitionwise_aggregate TO false;
EXPLAIN (COSTS OFF)
SELECT a, sum(b), min(b), count(*) FROM pagg_tab GROUP BY a HAVING avg(b) < 22 ORDER BY 1;
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
QUERY PLAN
-----------------------------------------------------------
Sort
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Sort Key: pagg_tab.a
-> HashAggregate
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Group Key: pagg_tab.a
Filter: (avg(pagg_tab.b) < '22'::numeric)
-> Append
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Foreign Scan on fpagg_tab_p1 pagg_tab_1
-> Foreign Scan on fpagg_tab_p2 pagg_tab_2
-> Foreign Scan on fpagg_tab_p3 pagg_tab_3
(9 rows)
-- Plan with partitionwise aggregates is enabled
SET enable_partitionwise_aggregate TO true;
EXPLAIN (COSTS OFF)
SELECT a, sum(b), min(b), count(*) FROM pagg_tab GROUP BY a HAVING avg(b) < 22 ORDER BY 1;
QUERY PLAN
-----------------------------------------------------------------
Sort
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Sort Key: pagg_tab.a
-> Append
-> Foreign Scan
Relations: Aggregate on (fpagg_tab_p1 pagg_tab)
-> Foreign Scan
Relations: Aggregate on (fpagg_tab_p2 pagg_tab_1)
-> Foreign Scan
Relations: Aggregate on (fpagg_tab_p3 pagg_tab_2)
(9 rows)
SELECT a, sum(b), min(b), count(*) FROM pagg_tab GROUP BY a HAVING avg(b) < 22 ORDER BY 1;
a | sum | min | count
----+------+-----+-------
0 | 2000 | 0 | 100
1 | 2100 | 1 | 100
10 | 2000 | 0 | 100
11 | 2100 | 1 | 100
20 | 2000 | 0 | 100
21 | 2100 | 1 | 100
(6 rows)
-- Check with whole-row reference
-- Should have all the columns in the target list for the given relation
EXPLAIN (VERBOSE, COSTS OFF)
SELECT a, count(t1) FROM pagg_tab t1 GROUP BY a HAVING avg(b) < 22 ORDER BY 1;
QUERY PLAN
------------------------------------------------------------------------
Sort
Output: t1.a, (count(((t1.*)::pagg_tab)))
Sort Key: t1.a
-> Append
-> HashAggregate
Output: t1.a, count(((t1.*)::pagg_tab))
Group Key: t1.a
Filter: (avg(t1.b) < '22'::numeric)
-> Foreign Scan on public.fpagg_tab_p1 t1
Output: t1.a, t1.*, t1.b
Remote SQL: SELECT a, b, c FROM public.pagg_tab_p1
-> HashAggregate
Output: t1_1.a, count(((t1_1.*)::pagg_tab))
Group Key: t1_1.a
Filter: (avg(t1_1.b) < '22'::numeric)
-> Foreign Scan on public.fpagg_tab_p2 t1_1
Output: t1_1.a, t1_1.*, t1_1.b
Remote SQL: SELECT a, b, c FROM public.pagg_tab_p2
-> HashAggregate
Output: t1_2.a, count(((t1_2.*)::pagg_tab))
Group Key: t1_2.a
Filter: (avg(t1_2.b) < '22'::numeric)
-> Foreign Scan on public.fpagg_tab_p3 t1_2
Output: t1_2.a, t1_2.*, t1_2.b
Remote SQL: SELECT a, b, c FROM public.pagg_tab_p3
(25 rows)
SELECT a, count(t1) FROM pagg_tab t1 GROUP BY a HAVING avg(b) < 22 ORDER BY 1;
a | count
----+-------
0 | 100
1 | 100
10 | 100
11 | 100
20 | 100
21 | 100
(6 rows)
-- When GROUP BY clause does not match with PARTITION KEY.
EXPLAIN (COSTS OFF)
SELECT b, avg(a), max(a), count(*) FROM pagg_tab GROUP BY b HAVING sum(a) < 700 ORDER BY 1;
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
QUERY PLAN
-----------------------------------------------------------------
Sort
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Sort Key: pagg_tab.b
-> Finalize HashAggregate
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Group Key: pagg_tab.b
Filter: (sum(pagg_tab.a) < 700)
-> Append
-> Partial HashAggregate
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Group Key: pagg_tab.b
-> Foreign Scan on fpagg_tab_p1 pagg_tab
-> Partial HashAggregate
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Group Key: pagg_tab_1.b
-> Foreign Scan on fpagg_tab_p2 pagg_tab_1
-> Partial HashAggregate
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Group Key: pagg_tab_2.b
-> Foreign Scan on fpagg_tab_p3 pagg_tab_2
(15 rows)
-- ===================================================================
-- access rights and superuser
-- ===================================================================
-- Non-superuser cannot create a FDW without a password in the connstr
CREATE ROLE regress_nosuper NOSUPERUSER;
GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO regress_nosuper;
SET ROLE regress_nosuper;
SHOW is_superuser;
is_superuser
--------------
off
(1 row)
-- This will be OK, we can create the FDW
DO $d$
BEGIN
EXECUTE $$CREATE SERVER loopback_nopw FOREIGN DATA WRAPPER postgres_fdw
OPTIONS (dbname '$$||current_database()||$$',
port '$$||current_setting('port')||$$'
)$$;
END;
$d$;
-- But creation of user mappings for non-superusers should fail
CREATE USER MAPPING FOR public SERVER loopback_nopw;
CREATE USER MAPPING FOR CURRENT_USER SERVER loopback_nopw;
CREATE FOREIGN TABLE pg_temp.ft1_nopw (
c1 int NOT NULL,
c2 int NOT NULL,
c3 text,
c4 timestamptz,
c5 timestamp,
c6 varchar(10),
c7 char(10) default 'ft1',
c8 user_enum
) SERVER loopback_nopw OPTIONS (schema_name 'public', table_name 'ft1');
SELECT 1 FROM ft1_nopw LIMIT 1;
ERROR: password or GSSAPI delegated credentials required
DETAIL: Non-superusers must delegate GSSAPI credentials or provide a password in the user mapping.
-- If we add a password to the connstr it'll fail, because we don't allow passwords
-- in connstrs only in user mappings.
ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw');
ERROR: invalid option "password"
HINT: Perhaps you meant the option "passfile".
-- If we add a password for our user mapping instead, we should get a different
-- error because the password wasn't actually *used* when we run with trust auth.
--
-- This won't work with installcheck, but neither will most of the FDW checks.
ALTER USER MAPPING FOR CURRENT_USER SERVER loopback_nopw OPTIONS (ADD password 'dummypw');
SELECT 1 FROM ft1_nopw LIMIT 1;
ERROR: password or GSSAPI delegated credentials required
DETAIL: Non-superuser cannot connect if the server does not request a password or use GSSAPI with delegated credentials.
HINT: Target server's authentication method must be changed or password_required=false set in the user mapping attributes.
-- Unpriv user cannot make the mapping passwordless
ALTER USER MAPPING FOR CURRENT_USER SERVER loopback_nopw OPTIONS (ADD password_required 'false');
ERROR: password_required=false is superuser-only
HINT: User mappings with the password_required option set to false may only be created or modified by the superuser.
SELECT 1 FROM ft1_nopw LIMIT 1;
ERROR: password or GSSAPI delegated credentials required
DETAIL: Non-superuser cannot connect if the server does not request a password or use GSSAPI with delegated credentials.
HINT: Target server's authentication method must be changed or password_required=false set in the user mapping attributes.
RESET ROLE;
-- But the superuser can
ALTER USER MAPPING FOR regress_nosuper SERVER loopback_nopw OPTIONS (ADD password_required 'false');
SET ROLE regress_nosuper;
-- Should finally work now
SELECT 1 FROM ft1_nopw LIMIT 1;
?column?
----------
1
(1 row)
-- unpriv user also cannot set sslcert / sslkey on the user mapping
-- first set password_required so we see the right error messages
ALTER USER MAPPING FOR CURRENT_USER SERVER loopback_nopw OPTIONS (SET password_required 'true');
ALTER USER MAPPING FOR CURRENT_USER SERVER loopback_nopw OPTIONS (ADD sslcert 'foo.crt');
ERROR: sslcert and sslkey are superuser-only
HINT: User mappings with the sslcert or sslkey options set may only be created or modified by the superuser.
ALTER USER MAPPING FOR CURRENT_USER SERVER loopback_nopw OPTIONS (ADD sslkey 'foo.key');
ERROR: sslcert and sslkey are superuser-only
HINT: User mappings with the sslcert or sslkey options set may only be created or modified by the superuser.
-- We're done with the role named after a specific user and need to check the
-- changes to the public mapping.
DROP USER MAPPING FOR CURRENT_USER SERVER loopback_nopw;
-- This will fail again as it'll resolve the user mapping for public, which
-- lacks password_required=false
SELECT 1 FROM ft1_nopw LIMIT 1;
ERROR: password or GSSAPI delegated credentials required
DETAIL: Non-superusers must delegate GSSAPI credentials or provide a password in the user mapping.
RESET ROLE;
-- The user mapping for public is passwordless and lacks the password_required=false
-- mapping option, but will work because the current user is a superuser.
SELECT 1 FROM ft1_nopw LIMIT 1;
?column?
----------
1
(1 row)
-- cleanup
DROP USER MAPPING FOR public SERVER loopback_nopw;
DROP OWNED BY regress_nosuper;
DROP ROLE regress_nosuper;
-- Clean-up
RESET enable_partitionwise_aggregate;
-- Two-phase transactions are not supported.
BEGIN;
SELECT count(*) FROM ft1;
count
-------
822
(1 row)
-- error here
PREPARE TRANSACTION 'fdw_tpc';
ERROR: cannot PREPARE a transaction that has operated on postgres_fdw foreign tables
ROLLBACK;
WARNING: there is no transaction in progress
-- ===================================================================
-- reestablish new connection
-- ===================================================================
-- Change application_name of remote connection to special one
-- so that we can easily terminate the connection later.
ALTER SERVER loopback OPTIONS (application_name 'fdw_retry_check');
-- Make sure we have a remote connection.
SELECT 1 FROM ft1 LIMIT 1;
?column?
----------
1
(1 row)
-- Terminate the remote connection and wait for the termination to complete.
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
-- (If a cache flush happens, the remote connection might have already been
-- dropped; so code this step in a way that doesn't fail if no connection.)
DO $$ BEGIN
PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity
WHERE application_name = 'fdw_retry_check';
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
END $$;
-- This query should detect the broken connection when starting new remote
-- transaction, reestablish new connection, and then succeed.
BEGIN;
SELECT 1 FROM ft1 LIMIT 1;
?column?
----------
1
(1 row)
-- If we detect the broken connection when starting a new remote
-- subtransaction, we should fail instead of establishing a new connection.
-- Terminate the remote connection and wait for the termination to complete.
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
DO $$ BEGIN
PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity
WHERE application_name = 'fdw_retry_check';
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
END $$;
SAVEPOINT s;
-- The text of the error might vary across platforms, so only show SQLSTATE.
\set VERBOSITY sqlstate
SELECT 1 FROM ft1 LIMIT 1; -- should fail
ERROR: 08006
\set VERBOSITY default
COMMIT;
-- =============================================================================
-- test connection invalidation cases and postgres_fdw_get_connections function
-- =============================================================================
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- Let's ensure to close all the existing cached connections.
SELECT 1 FROM postgres_fdw_disconnect_all();
?column?
----------
1
(1 row)
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- No cached connections, so no records should be output.
SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1;
server_name
-------------
(0 rows)
-- This test case is for closing the connection in pgfdw_xact_callback
BEGIN;
-- Connection xact depth becomes 1 i.e. the connection is in midst of the xact.
SELECT 1 FROM ft1 LIMIT 1;
?column?
----------
1
(1 row)
SELECT 1 FROM ft7 LIMIT 1;
?column?
----------
1
(1 row)
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- List all the existing cached connections. loopback and loopback3 should be
-- output.
SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1;
server_name
-------------
loopback
loopback3
(2 rows)
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- Connections are not closed at the end of the alter and drop statements.
-- That's because the connections are in midst of this xact,
-- they are just marked as invalid in pgfdw_inval_callback.
ALTER SERVER loopback OPTIONS (ADD use_remote_estimate 'off');
DROP SERVER loopback3 CASCADE;
NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to user mapping for public on server loopback3
drop cascades to foreign table ft7
-- List all the existing cached connections. loopback and loopback3
-- should be output as invalid connections. Also the server name for
-- loopback3 should be NULL because the server was dropped.
SELECT * FROM postgres_fdw_get_connections() ORDER BY 1;
server_name | valid
-------------+-------
loopback | f
| f
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
(2 rows)
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- The invalid connections get closed in pgfdw_xact_callback during commit.
COMMIT;
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- All cached connections were closed while committing above xact, so no
-- records should be output.
SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1;
server_name
-------------
(0 rows)
-- =======================================================================
-- test postgres_fdw_disconnect and postgres_fdw_disconnect_all functions
-- =======================================================================
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
BEGIN;
-- Ensure to cache loopback connection.
SELECT 1 FROM ft1 LIMIT 1;
?column?
----------
1
(1 row)
-- Ensure to cache loopback2 connection.
SELECT 1 FROM ft6 LIMIT 1;
?column?
----------
1
(1 row)
-- List all the existing cached connections. loopback and loopback2 should be
-- output.
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1;
server_name
-------------
loopback
loopback2
(2 rows)
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- Issue a warning and return false as loopback connection is still in use and
-- can not be closed.
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
SELECT postgres_fdw_disconnect('loopback');
WARNING: cannot close connection for server "loopback" because it is still in use
postgres_fdw_disconnect
-------------------------
f
(1 row)
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- List all the existing cached connections. loopback and loopback2 should be
-- output.
SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1;
server_name
-------------
loopback
loopback2
(2 rows)
-- Return false as connections are still in use, warnings are issued.
-- But disable warnings temporarily because the order of them is not stable.
SET client_min_messages = 'ERROR';
SELECT postgres_fdw_disconnect_all();
postgres_fdw_disconnect_all
-----------------------------
f
(1 row)
RESET client_min_messages;
COMMIT;
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- Ensure that loopback2 connection is closed.
SELECT 1 FROM postgres_fdw_disconnect('loopback2');
?column?
----------
1
(1 row)
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
SELECT server_name FROM postgres_fdw_get_connections() WHERE server_name = 'loopback2';
server_name
-------------
(0 rows)
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- Return false as loopback2 connection is closed already.
SELECT postgres_fdw_disconnect('loopback2');
postgres_fdw_disconnect
-------------------------
f
(1 row)
-- Return an error as there is no foreign server with given name.
SELECT postgres_fdw_disconnect('unknownserver');
ERROR: server "unknownserver" does not exist
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- Let's ensure to close all the existing cached connections.
SELECT 1 FROM postgres_fdw_disconnect_all();
?column?
----------
1
(1 row)
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- No cached connections, so no records should be output.
SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1;
server_name
-------------
(0 rows)
-- =============================================================================
-- test case for having multiple cached connections for a foreign server
-- =============================================================================
CREATE ROLE regress_multi_conn_user1 SUPERUSER;
CREATE ROLE regress_multi_conn_user2 SUPERUSER;
CREATE USER MAPPING FOR regress_multi_conn_user1 SERVER loopback;
CREATE USER MAPPING FOR regress_multi_conn_user2 SERVER loopback;
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
BEGIN;
-- Will cache loopback connection with user mapping for regress_multi_conn_user1
SET ROLE regress_multi_conn_user1;
SELECT 1 FROM ft1 LIMIT 1;
?column?
----------
1
(1 row)
RESET ROLE;
-- Will cache loopback connection with user mapping for regress_multi_conn_user2
SET ROLE regress_multi_conn_user2;
SELECT 1 FROM ft1 LIMIT 1;
?column?
----------
1
(1 row)
RESET ROLE;
-- Should output two connections for loopback server
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1;
server_name
-------------
loopback
loopback
(2 rows)
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
COMMIT;
-- Let's ensure to close all the existing cached connections.
SELECT 1 FROM postgres_fdw_disconnect_all();
?column?
----------
1
(1 row)
postgres_fdw: Fix tests for CLOBBER_CACHE_ALWAYS. The regression tests added in commits 708d165ddb and 411ae64997 caused buildfarm failures when CLOBBER_CACHE_ALWAYS was enabled. This commit stabilizes those tests. The foreign server connections established by postgres_fdw behaves differently depending on whether CLOBBER_CACHE_ALWAYS is enabled or not. If it's not enabled, those connections are cached. On the other hand, if it's enabled, when the connections are established outside transaction block, they are not cached (i.e., they are immediately closed at the end of query that established them). So the subsequent postgres_fdw_get_connections() cannot list those connections and postgres_fdw_disconnect() cannot close them (because they are already closed). When the connections are established inside transaction block, they are cached whether CLOBBER_CACHE_ALWAYS was enabled or not. But if it's enabled, they are immediately marked as invalid, otherwise not. This causes the subsequent postgres_fdw_get_connections() to return different result in "valid" column depending on whether CLOBBER_CACHE_ALWAYS was enabled or not. This commit prevents the above differences of behavior from affecting the regression tests. Per buildfarm failure on trilobite. Original patch by Bharath Rupireddy. I (Fujii Masao) extracted the regression test fix from that and revised it a bit. Reported-by: Tom Lane Author: Bharath Rupireddy Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/2688508.1611865371@sss.pgh.pa.us
2021-01-30 02:12:22 +01:00
-- No cached connections, so no records should be output.
SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1;
server_name
-------------
(0 rows)
-- Clean up
DROP USER MAPPING FOR regress_multi_conn_user1 SERVER loopback;
DROP USER MAPPING FOR regress_multi_conn_user2 SERVER loopback;
DROP ROLE regress_multi_conn_user1;
DROP ROLE regress_multi_conn_user2;
-- ===================================================================
-- Test foreign server level option keep_connections
-- ===================================================================
-- By default, the connections associated with foreign server are cached i.e.
-- keep_connections option is on. Set it to off.
ALTER SERVER loopback OPTIONS (keep_connections 'off');
-- connection to loopback server is closed at the end of xact
-- as keep_connections was set to off.
SELECT 1 FROM ft1 LIMIT 1;
?column?
----------
1
(1 row)
-- No cached connections, so no records should be output.
SELECT server_name FROM postgres_fdw_get_connections() ORDER BY 1;
server_name
-------------
(0 rows)
ALTER SERVER loopback OPTIONS (SET keep_connections 'on');
-- ===================================================================
-- batch insert
-- ===================================================================
BEGIN;
CREATE SERVER batch10 FOREIGN DATA WRAPPER postgres_fdw OPTIONS( batch_size '10' );
SELECT count(*)
FROM pg_foreign_server
WHERE srvname = 'batch10'
AND srvoptions @> array['batch_size=10'];
count
-------
1
(1 row)
ALTER SERVER batch10 OPTIONS( SET batch_size '20' );
SELECT count(*)
FROM pg_foreign_server
WHERE srvname = 'batch10'
AND srvoptions @> array['batch_size=10'];
count
-------
0
(1 row)
SELECT count(*)
FROM pg_foreign_server
WHERE srvname = 'batch10'
AND srvoptions @> array['batch_size=20'];
count
-------
1
(1 row)
CREATE FOREIGN TABLE table30 ( x int ) SERVER batch10 OPTIONS ( batch_size '30' );
SELECT COUNT(*)
FROM pg_foreign_table
WHERE ftrelid = 'table30'::regclass
AND ftoptions @> array['batch_size=30'];
count
-------
1
(1 row)
ALTER FOREIGN TABLE table30 OPTIONS ( SET batch_size '40');
SELECT COUNT(*)
FROM pg_foreign_table
WHERE ftrelid = 'table30'::regclass
AND ftoptions @> array['batch_size=30'];
count
-------
0
(1 row)
SELECT COUNT(*)
FROM pg_foreign_table
WHERE ftrelid = 'table30'::regclass
AND ftoptions @> array['batch_size=40'];
count
-------
1
(1 row)
ROLLBACK;
CREATE TABLE batch_table ( x int );
CREATE FOREIGN TABLE ftable ( x int ) SERVER loopback OPTIONS ( table_name 'batch_table', batch_size '10' );
EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable SELECT * FROM generate_series(1, 10) i;
QUERY PLAN
-------------------------------------------------------------
Insert on public.ftable
Remote SQL: INSERT INTO public.batch_table(x) VALUES ($1)
Batch Size: 10
-> Function Scan on pg_catalog.generate_series i
Output: i.i
Function Call: generate_series(1, 10)
(6 rows)
INSERT INTO ftable SELECT * FROM generate_series(1, 10) i;
INSERT INTO ftable SELECT * FROM generate_series(11, 31) i;
INSERT INTO ftable VALUES (32);
INSERT INTO ftable VALUES (33), (34);
SELECT COUNT(*) FROM ftable;
count
-------
34
(1 row)
TRUNCATE batch_table;
DROP FOREIGN TABLE ftable;
-- Disable batch insert
CREATE FOREIGN TABLE ftable ( x int ) SERVER loopback OPTIONS ( table_name 'batch_table', batch_size '1' );
EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable VALUES (1), (2);
QUERY PLAN
-------------------------------------------------------------
Insert on public.ftable
Remote SQL: INSERT INTO public.batch_table(x) VALUES ($1)
Batch Size: 1
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1
(5 rows)
INSERT INTO ftable VALUES (1), (2);
SELECT COUNT(*) FROM ftable;
count
-------
2
(1 row)
-- Disable batch inserting into foreign tables with BEFORE ROW INSERT triggers
-- even if the batch_size option is enabled.
ALTER FOREIGN TABLE ftable OPTIONS ( SET batch_size '10' );
CREATE TRIGGER trig_row_before BEFORE INSERT ON ftable
FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable VALUES (3), (4);
QUERY PLAN
-------------------------------------------------------------
Insert on public.ftable
Remote SQL: INSERT INTO public.batch_table(x) VALUES ($1)
Batch Size: 1
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1
(5 rows)
INSERT INTO ftable VALUES (3), (4);
NOTICE: trig_row_before(23, skidoo) BEFORE ROW INSERT ON ftable
NOTICE: NEW: (3)
NOTICE: trig_row_before(23, skidoo) BEFORE ROW INSERT ON ftable
NOTICE: NEW: (4)
SELECT COUNT(*) FROM ftable;
count
-------
4
(1 row)
-- Clean up
DROP TRIGGER trig_row_before ON ftable;
DROP FOREIGN TABLE ftable;
DROP TABLE batch_table;
-- Use partitioning
CREATE TABLE batch_table ( x int ) PARTITION BY HASH (x);
CREATE TABLE batch_table_p0 (LIKE batch_table);
CREATE FOREIGN TABLE batch_table_p0f
PARTITION OF batch_table
FOR VALUES WITH (MODULUS 3, REMAINDER 0)
SERVER loopback
OPTIONS (table_name 'batch_table_p0', batch_size '10');
CREATE TABLE batch_table_p1 (LIKE batch_table);
CREATE FOREIGN TABLE batch_table_p1f
PARTITION OF batch_table
FOR VALUES WITH (MODULUS 3, REMAINDER 1)
SERVER loopback
OPTIONS (table_name 'batch_table_p1', batch_size '1');
CREATE TABLE batch_table_p2
PARTITION OF batch_table
FOR VALUES WITH (MODULUS 3, REMAINDER 2);
INSERT INTO batch_table SELECT * FROM generate_series(1, 66) i;
SELECT COUNT(*) FROM batch_table;
count
-------
66
(1 row)
-- Clean up
DROP TABLE batch_table;
DROP TABLE batch_table_p0;
DROP TABLE batch_table_p1;
-- Check that batched mode also works for some inserts made during
-- cross-partition updates
CREATE TABLE batch_cp_upd_test (a int) PARTITION BY LIST (a);
CREATE TABLE batch_cp_upd_test1 (LIKE batch_cp_upd_test);
CREATE FOREIGN TABLE batch_cp_upd_test1_f
PARTITION OF batch_cp_upd_test
FOR VALUES IN (1)
SERVER loopback
OPTIONS (table_name 'batch_cp_upd_test1', batch_size '10');
CREATE TABLE batch_cp_upd_test2 PARTITION OF batch_cp_upd_test
FOR VALUES IN (2);
CREATE TABLE batch_cp_upd_test3 (LIKE batch_cp_upd_test);
CREATE FOREIGN TABLE batch_cp_upd_test3_f
PARTITION OF batch_cp_upd_test
FOR VALUES IN (3)
SERVER loopback
OPTIONS (table_name 'batch_cp_upd_test3', batch_size '1');
-- Create statement triggers on remote tables that "log" any INSERTs
-- performed on them.
CREATE TABLE cmdlog (cmd text);
CREATE FUNCTION log_stmt() RETURNS TRIGGER LANGUAGE plpgsql AS $$
BEGIN INSERT INTO public.cmdlog VALUES (TG_OP || ' on ' || TG_RELNAME); RETURN NULL; END;
$$;
CREATE TRIGGER stmt_trig AFTER INSERT ON batch_cp_upd_test1
FOR EACH STATEMENT EXECUTE FUNCTION log_stmt();
CREATE TRIGGER stmt_trig AFTER INSERT ON batch_cp_upd_test3
FOR EACH STATEMENT EXECUTE FUNCTION log_stmt();
-- This update moves rows from the local partition 'batch_cp_upd_test2' to the
-- foreign partition 'batch_cp_upd_test1', one that has insert batching
-- enabled, so a single INSERT for both rows.
INSERT INTO batch_cp_upd_test VALUES (2), (2);
UPDATE batch_cp_upd_test t SET a = 1 FROM (VALUES (1), (2)) s(a) WHERE t.a = s.a AND s.a = 2;
-- This one moves rows from the local partition 'batch_cp_upd_test2' to the
-- foreign partition 'batch_cp_upd_test2', one that has insert batching
-- disabled, so separate INSERTs for the two rows.
INSERT INTO batch_cp_upd_test VALUES (2), (2);
UPDATE batch_cp_upd_test t SET a = 3 FROM (VALUES (1), (2)) s(a) WHERE t.a = s.a AND s.a = 2;
SELECT tableoid::regclass, * FROM batch_cp_upd_test ORDER BY 1;
tableoid | a
----------------------+---
batch_cp_upd_test1_f | 1
batch_cp_upd_test1_f | 1
batch_cp_upd_test3_f | 3
batch_cp_upd_test3_f | 3
(4 rows)
-- Should see 1 INSERT on batch_cp_upd_test1 and 2 on batch_cp_upd_test3 as
-- described above.
SELECT * FROM cmdlog ORDER BY 1;
cmd
------------------------------
INSERT on batch_cp_upd_test1
INSERT on batch_cp_upd_test3
INSERT on batch_cp_upd_test3
(3 rows)
-- Clean up
DROP TABLE batch_cp_upd_test;
DROP TABLE batch_cp_upd_test1;
DROP TABLE batch_cp_upd_test3;
DROP TABLE cmdlog;
DROP FUNCTION log_stmt();
-- Use partitioning
ALTER SERVER loopback OPTIONS (ADD batch_size '10');
CREATE TABLE batch_table ( x int, field1 text, field2 text) PARTITION BY HASH (x);
CREATE TABLE batch_table_p0 (LIKE batch_table);
ALTER TABLE batch_table_p0 ADD CONSTRAINT p0_pkey PRIMARY KEY (x);
CREATE FOREIGN TABLE batch_table_p0f
PARTITION OF batch_table
FOR VALUES WITH (MODULUS 2, REMAINDER 0)
SERVER loopback
OPTIONS (table_name 'batch_table_p0');
CREATE TABLE batch_table_p1 (LIKE batch_table);
ALTER TABLE batch_table_p1 ADD CONSTRAINT p1_pkey PRIMARY KEY (x);
CREATE FOREIGN TABLE batch_table_p1f
PARTITION OF batch_table
FOR VALUES WITH (MODULUS 2, REMAINDER 1)
SERVER loopback
OPTIONS (table_name 'batch_table_p1');
INSERT INTO batch_table SELECT i, 'test'||i, 'test'|| i FROM generate_series(1, 50) i;
SELECT COUNT(*) FROM batch_table;
count
-------
50
(1 row)
SELECT * FROM batch_table ORDER BY x;
x | field1 | field2
----+--------+--------
1 | test1 | test1
2 | test2 | test2
3 | test3 | test3
4 | test4 | test4
5 | test5 | test5
6 | test6 | test6
7 | test7 | test7
8 | test8 | test8
9 | test9 | test9
10 | test10 | test10
11 | test11 | test11
12 | test12 | test12
13 | test13 | test13
14 | test14 | test14
15 | test15 | test15
16 | test16 | test16
17 | test17 | test17
18 | test18 | test18
19 | test19 | test19
20 | test20 | test20
21 | test21 | test21
22 | test22 | test22
23 | test23 | test23
24 | test24 | test24
25 | test25 | test25
26 | test26 | test26
27 | test27 | test27
28 | test28 | test28
29 | test29 | test29
30 | test30 | test30
31 | test31 | test31
32 | test32 | test32
33 | test33 | test33
34 | test34 | test34
35 | test35 | test35
36 | test36 | test36
37 | test37 | test37
38 | test38 | test38
39 | test39 | test39
40 | test40 | test40
41 | test41 | test41
42 | test42 | test42
43 | test43 | test43
44 | test44 | test44
45 | test45 | test45
46 | test46 | test46
47 | test47 | test47
48 | test48 | test48
49 | test49 | test49
50 | test50 | test50
(50 rows)
-- Clean up
DROP TABLE batch_table;
DROP TABLE batch_table_p0;
DROP TABLE batch_table_p1;
ALTER SERVER loopback OPTIONS (DROP batch_size);
-- Test that pending inserts are handled properly when needed
CREATE TABLE batch_table (a text, b int);
CREATE FOREIGN TABLE ftable (a text, b int)
SERVER loopback
OPTIONS (table_name 'batch_table', batch_size '2');
CREATE TABLE ltable (a text, b int);
CREATE FUNCTION ftable_rowcount_trigf() RETURNS trigger LANGUAGE plpgsql AS
$$
begin
raise notice '%: there are % rows in ftable',
TG_NAME, (SELECT count(*) FROM ftable);
if TG_OP = 'DELETE' then
return OLD;
else
return NEW;
end if;
end;
$$;
CREATE TRIGGER ftable_rowcount_trigger
BEFORE INSERT OR UPDATE OR DELETE ON ltable
FOR EACH ROW EXECUTE PROCEDURE ftable_rowcount_trigf();
WITH t AS (
INSERT INTO ltable VALUES ('AAA', 42), ('BBB', 42) RETURNING *
)
INSERT INTO ftable SELECT * FROM t;
NOTICE: ftable_rowcount_trigger: there are 0 rows in ftable
NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
SELECT * FROM ltable;
a | b
-----+----
AAA | 42
BBB | 42
(2 rows)
SELECT * FROM ftable;
a | b
-----+----
AAA | 42
BBB | 42
(2 rows)
DELETE FROM ftable;
WITH t AS (
UPDATE ltable SET b = b + 100 RETURNING *
)
INSERT INTO ftable SELECT * FROM t;
NOTICE: ftable_rowcount_trigger: there are 0 rows in ftable
NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
SELECT * FROM ltable;
a | b
-----+-----
AAA | 142
BBB | 142
(2 rows)
SELECT * FROM ftable;
a | b
-----+-----
AAA | 142
BBB | 142
(2 rows)
DELETE FROM ftable;
WITH t AS (
DELETE FROM ltable RETURNING *
)
INSERT INTO ftable SELECT * FROM t;
NOTICE: ftable_rowcount_trigger: there are 0 rows in ftable
NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
SELECT * FROM ltable;
a | b
---+---
(0 rows)
SELECT * FROM ftable;
a | b
-----+-----
AAA | 142
BBB | 142
(2 rows)
DELETE FROM ftable;
-- Clean up
DROP FOREIGN TABLE ftable;
DROP TABLE batch_table;
DROP TRIGGER ftable_rowcount_trigger ON ltable;
DROP TABLE ltable;
CREATE TABLE parent (a text, b int) PARTITION BY LIST (a);
CREATE TABLE batch_table (a text, b int);
CREATE FOREIGN TABLE ftable
PARTITION OF parent
FOR VALUES IN ('AAA')
SERVER loopback
OPTIONS (table_name 'batch_table', batch_size '2');
CREATE TABLE ltable
PARTITION OF parent
FOR VALUES IN ('BBB');
CREATE TRIGGER ftable_rowcount_trigger
BEFORE INSERT ON ltable
FOR EACH ROW EXECUTE PROCEDURE ftable_rowcount_trigf();
INSERT INTO parent VALUES ('AAA', 42), ('BBB', 42), ('AAA', 42), ('BBB', 42);
NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
NOTICE: ftable_rowcount_trigger: there are 2 rows in ftable
SELECT tableoid::regclass, * FROM parent;
tableoid | a | b
----------+-----+----
ftable | AAA | 42
ftable | AAA | 42
ltable | BBB | 42
ltable | BBB | 42
(4 rows)
-- Clean up
DROP FOREIGN TABLE ftable;
DROP TABLE batch_table;
DROP TRIGGER ftable_rowcount_trigger ON ltable;
DROP TABLE ltable;
DROP TABLE parent;
DROP FUNCTION ftable_rowcount_trigf;
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
-- ===================================================================
-- test asynchronous execution
-- ===================================================================
ALTER SERVER loopback OPTIONS (DROP extensions);
ALTER SERVER loopback OPTIONS (ADD async_capable 'true');
ALTER SERVER loopback2 OPTIONS (ADD async_capable 'true');
CREATE TABLE async_pt (a int, b int, c text) PARTITION BY RANGE (a);
CREATE TABLE base_tbl1 (a int, b int, c text);
CREATE TABLE base_tbl2 (a int, b int, c text);
CREATE FOREIGN TABLE async_p1 PARTITION OF async_pt FOR VALUES FROM (1000) TO (2000)
SERVER loopback OPTIONS (table_name 'base_tbl1');
CREATE FOREIGN TABLE async_p2 PARTITION OF async_pt FOR VALUES FROM (2000) TO (3000)
SERVER loopback2 OPTIONS (table_name 'base_tbl2');
INSERT INTO async_p1 SELECT 1000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i;
INSERT INTO async_p2 SELECT 2000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i;
ANALYZE async_pt;
-- simple queries
CREATE TABLE result_tbl (a int, b int, c text);
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO result_tbl SELECT * FROM async_pt WHERE b % 100 = 0;
QUERY PLAN
----------------------------------------------------------------------------------------
Insert on public.result_tbl
-> Append
-> Async Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE (((b % 100) = 0))
-> Async Foreign Scan on public.async_p2 async_pt_2
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c
Remote SQL: SELECT a, b, c FROM public.base_tbl2 WHERE (((b % 100) = 0))
(8 rows)
INSERT INTO result_tbl SELECT * FROM async_pt WHERE b % 100 = 0;
SELECT * FROM result_tbl ORDER BY a;
a | b | c
------+-----+------
1000 | 0 | 0000
1100 | 100 | 0100
1200 | 200 | 0200
1300 | 300 | 0300
1400 | 400 | 0400
1500 | 500 | 0500
1600 | 600 | 0600
1700 | 700 | 0700
1800 | 800 | 0800
1900 | 900 | 0900
2000 | 0 | 0000
2100 | 100 | 0100
2200 | 200 | 0200
2300 | 300 | 0300
2400 | 400 | 0400
2500 | 500 | 0500
2600 | 600 | 0600
2700 | 700 | 0700
2800 | 800 | 0800
2900 | 900 | 0900
(20 rows)
DELETE FROM result_tbl;
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505;
QUERY PLAN
----------------------------------------------------------------
Insert on public.result_tbl
-> Append
-> Async Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Filter: (async_pt_1.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl1
-> Async Foreign Scan on public.async_p2 async_pt_2
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c
Filter: (async_pt_2.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl2
(10 rows)
INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505;
SELECT * FROM result_tbl ORDER BY a;
a | b | c
------+-----+------
1505 | 505 | 0505
2505 | 505 | 0505
(2 rows)
DELETE FROM result_tbl;
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO result_tbl SELECT a, b, 'AAA' || c FROM async_pt WHERE b === 505;
QUERY PLAN
---------------------------------------------------------------------------------
Insert on public.result_tbl
-> Append
-> Async Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, ('AAA'::text || async_pt_1.c)
Filter: (async_pt_1.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl1
-> Async Foreign Scan on public.async_p2 async_pt_2
Output: async_pt_2.a, async_pt_2.b, ('AAA'::text || async_pt_2.c)
Filter: (async_pt_2.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl2
(10 rows)
INSERT INTO result_tbl SELECT a, b, 'AAA' || c FROM async_pt WHERE b === 505;
SELECT * FROM result_tbl ORDER BY a;
a | b | c
------+-----+---------
1505 | 505 | AAA0505
2505 | 505 | AAA0505
(2 rows)
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
DELETE FROM result_tbl;
-- Check case where multiple partitions use the same connection
CREATE TABLE base_tbl3 (a int, b int, c text);
CREATE FOREIGN TABLE async_p3 PARTITION OF async_pt FOR VALUES FROM (3000) TO (4000)
SERVER loopback2 OPTIONS (table_name 'base_tbl3');
INSERT INTO async_p3 SELECT 3000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i;
ANALYZE async_pt;
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505;
QUERY PLAN
----------------------------------------------------------------
Insert on public.result_tbl
-> Append
-> Async Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Filter: (async_pt_1.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl1
-> Async Foreign Scan on public.async_p2 async_pt_2
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c
Filter: (async_pt_2.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl2
-> Async Foreign Scan on public.async_p3 async_pt_3
Output: async_pt_3.a, async_pt_3.b, async_pt_3.c
Filter: (async_pt_3.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl3
(14 rows)
INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505;
SELECT * FROM result_tbl ORDER BY a;
a | b | c
------+-----+------
1505 | 505 | 0505
2505 | 505 | 0505
3505 | 505 | 0505
(3 rows)
DELETE FROM result_tbl;
DROP FOREIGN TABLE async_p3;
DROP TABLE base_tbl3;
-- Check case where the partitioned table has local/remote partitions
CREATE TABLE async_p3 PARTITION OF async_pt FOR VALUES FROM (3000) TO (4000);
INSERT INTO async_p3 SELECT 3000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i;
ANALYZE async_pt;
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505;
QUERY PLAN
----------------------------------------------------------------
Insert on public.result_tbl
-> Append
-> Async Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Filter: (async_pt_1.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl1
-> Async Foreign Scan on public.async_p2 async_pt_2
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c
Filter: (async_pt_2.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl2
-> Seq Scan on public.async_p3 async_pt_3
Output: async_pt_3.a, async_pt_3.b, async_pt_3.c
Filter: (async_pt_3.b === 505)
(13 rows)
INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505;
SELECT * FROM result_tbl ORDER BY a;
a | b | c
------+-----+------
1505 | 505 | 0505
2505 | 505 | 0505
3505 | 505 | 0505
(3 rows)
DELETE FROM result_tbl;
-- partitionwise joins
SET enable_partitionwise_join TO true;
CREATE TABLE join_tbl (a1 int, b1 int, c1 text, a2 int, b2 int, c2 text);
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO join_tbl SELECT * FROM async_pt t1, async_pt t2 WHERE t1.a = t2.a AND t1.b = t2.b AND t1.b % 100 = 0;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Insert on public.join_tbl
-> Append
-> Async Foreign Scan
Output: t1_1.a, t1_1.b, t1_1.c, t2_1.a, t2_1.b, t2_1.c
Relations: (public.async_p1 t1_1) INNER JOIN (public.async_p1 t2_1)
Remote SQL: SELECT r5.a, r5.b, r5.c, r8.a, r8.b, r8.c FROM (public.base_tbl1 r5 INNER JOIN public.base_tbl1 r8 ON (((r5.a = r8.a)) AND ((r5.b = r8.b)) AND (((r5.b % 100) = 0))))
-> Async Foreign Scan
Output: t1_2.a, t1_2.b, t1_2.c, t2_2.a, t2_2.b, t2_2.c
Relations: (public.async_p2 t1_2) INNER JOIN (public.async_p2 t2_2)
Remote SQL: SELECT r6.a, r6.b, r6.c, r9.a, r9.b, r9.c FROM (public.base_tbl2 r6 INNER JOIN public.base_tbl2 r9 ON (((r6.a = r9.a)) AND ((r6.b = r9.b)) AND (((r6.b % 100) = 0))))
-> Hash Join
Output: t1_3.a, t1_3.b, t1_3.c, t2_3.a, t2_3.b, t2_3.c
Hash Cond: ((t2_3.a = t1_3.a) AND (t2_3.b = t1_3.b))
-> Seq Scan on public.async_p3 t2_3
Output: t2_3.a, t2_3.b, t2_3.c
-> Hash
Output: t1_3.a, t1_3.b, t1_3.c
-> Seq Scan on public.async_p3 t1_3
Output: t1_3.a, t1_3.b, t1_3.c
Filter: ((t1_3.b % 100) = 0)
(20 rows)
INSERT INTO join_tbl SELECT * FROM async_pt t1, async_pt t2 WHERE t1.a = t2.a AND t1.b = t2.b AND t1.b % 100 = 0;
SELECT * FROM join_tbl ORDER BY a1;
a1 | b1 | c1 | a2 | b2 | c2
------+-----+------+------+-----+------
1000 | 0 | 0000 | 1000 | 0 | 0000
1100 | 100 | 0100 | 1100 | 100 | 0100
1200 | 200 | 0200 | 1200 | 200 | 0200
1300 | 300 | 0300 | 1300 | 300 | 0300
1400 | 400 | 0400 | 1400 | 400 | 0400
1500 | 500 | 0500 | 1500 | 500 | 0500
1600 | 600 | 0600 | 1600 | 600 | 0600
1700 | 700 | 0700 | 1700 | 700 | 0700
1800 | 800 | 0800 | 1800 | 800 | 0800
1900 | 900 | 0900 | 1900 | 900 | 0900
2000 | 0 | 0000 | 2000 | 0 | 0000
2100 | 100 | 0100 | 2100 | 100 | 0100
2200 | 200 | 0200 | 2200 | 200 | 0200
2300 | 300 | 0300 | 2300 | 300 | 0300
2400 | 400 | 0400 | 2400 | 400 | 0400
2500 | 500 | 0500 | 2500 | 500 | 0500
2600 | 600 | 0600 | 2600 | 600 | 0600
2700 | 700 | 0700 | 2700 | 700 | 0700
2800 | 800 | 0800 | 2800 | 800 | 0800
2900 | 900 | 0900 | 2900 | 900 | 0900
3000 | 0 | 0000 | 3000 | 0 | 0000
3100 | 100 | 0100 | 3100 | 100 | 0100
3200 | 200 | 0200 | 3200 | 200 | 0200
3300 | 300 | 0300 | 3300 | 300 | 0300
3400 | 400 | 0400 | 3400 | 400 | 0400
3500 | 500 | 0500 | 3500 | 500 | 0500
3600 | 600 | 0600 | 3600 | 600 | 0600
3700 | 700 | 0700 | 3700 | 700 | 0700
3800 | 800 | 0800 | 3800 | 800 | 0800
3900 | 900 | 0900 | 3900 | 900 | 0900
(30 rows)
DELETE FROM join_tbl;
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO join_tbl SELECT t1.a, t1.b, 'AAA' || t1.c, t2.a, t2.b, 'AAA' || t2.c FROM async_pt t1, async_pt t2 WHERE t1.a = t2.a AND t1.b = t2.b AND t1.b % 100 = 0;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Insert on public.join_tbl
-> Append
-> Async Foreign Scan
Output: t1_1.a, t1_1.b, ('AAA'::text || t1_1.c), t2_1.a, t2_1.b, ('AAA'::text || t2_1.c)
Relations: (public.async_p1 t1_1) INNER JOIN (public.async_p1 t2_1)
Remote SQL: SELECT r5.a, r5.b, r5.c, r8.a, r8.b, r8.c FROM (public.base_tbl1 r5 INNER JOIN public.base_tbl1 r8 ON (((r5.a = r8.a)) AND ((r5.b = r8.b)) AND (((r5.b % 100) = 0))))
-> Async Foreign Scan
Output: t1_2.a, t1_2.b, ('AAA'::text || t1_2.c), t2_2.a, t2_2.b, ('AAA'::text || t2_2.c)
Relations: (public.async_p2 t1_2) INNER JOIN (public.async_p2 t2_2)
Remote SQL: SELECT r6.a, r6.b, r6.c, r9.a, r9.b, r9.c FROM (public.base_tbl2 r6 INNER JOIN public.base_tbl2 r9 ON (((r6.a = r9.a)) AND ((r6.b = r9.b)) AND (((r6.b % 100) = 0))))
-> Hash Join
Output: t1_3.a, t1_3.b, ('AAA'::text || t1_3.c), t2_3.a, t2_3.b, ('AAA'::text || t2_3.c)
Hash Cond: ((t2_3.a = t1_3.a) AND (t2_3.b = t1_3.b))
-> Seq Scan on public.async_p3 t2_3
Output: t2_3.a, t2_3.b, t2_3.c
-> Hash
Output: t1_3.a, t1_3.b, t1_3.c
-> Seq Scan on public.async_p3 t1_3
Output: t1_3.a, t1_3.b, t1_3.c
Filter: ((t1_3.b % 100) = 0)
(20 rows)
INSERT INTO join_tbl SELECT t1.a, t1.b, 'AAA' || t1.c, t2.a, t2.b, 'AAA' || t2.c FROM async_pt t1, async_pt t2 WHERE t1.a = t2.a AND t1.b = t2.b AND t1.b % 100 = 0;
SELECT * FROM join_tbl ORDER BY a1;
a1 | b1 | c1 | a2 | b2 | c2
------+-----+---------+------+-----+---------
1000 | 0 | AAA0000 | 1000 | 0 | AAA0000
1100 | 100 | AAA0100 | 1100 | 100 | AAA0100
1200 | 200 | AAA0200 | 1200 | 200 | AAA0200
1300 | 300 | AAA0300 | 1300 | 300 | AAA0300
1400 | 400 | AAA0400 | 1400 | 400 | AAA0400
1500 | 500 | AAA0500 | 1500 | 500 | AAA0500
1600 | 600 | AAA0600 | 1600 | 600 | AAA0600
1700 | 700 | AAA0700 | 1700 | 700 | AAA0700
1800 | 800 | AAA0800 | 1800 | 800 | AAA0800
1900 | 900 | AAA0900 | 1900 | 900 | AAA0900
2000 | 0 | AAA0000 | 2000 | 0 | AAA0000
2100 | 100 | AAA0100 | 2100 | 100 | AAA0100
2200 | 200 | AAA0200 | 2200 | 200 | AAA0200
2300 | 300 | AAA0300 | 2300 | 300 | AAA0300
2400 | 400 | AAA0400 | 2400 | 400 | AAA0400
2500 | 500 | AAA0500 | 2500 | 500 | AAA0500
2600 | 600 | AAA0600 | 2600 | 600 | AAA0600
2700 | 700 | AAA0700 | 2700 | 700 | AAA0700
2800 | 800 | AAA0800 | 2800 | 800 | AAA0800
2900 | 900 | AAA0900 | 2900 | 900 | AAA0900
3000 | 0 | AAA0000 | 3000 | 0 | AAA0000
3100 | 100 | AAA0100 | 3100 | 100 | AAA0100
3200 | 200 | AAA0200 | 3200 | 200 | AAA0200
3300 | 300 | AAA0300 | 3300 | 300 | AAA0300
3400 | 400 | AAA0400 | 3400 | 400 | AAA0400
3500 | 500 | AAA0500 | 3500 | 500 | AAA0500
3600 | 600 | AAA0600 | 3600 | 600 | AAA0600
3700 | 700 | AAA0700 | 3700 | 700 | AAA0700
3800 | 800 | AAA0800 | 3800 | 800 | AAA0800
3900 | 900 | AAA0900 | 3900 | 900 | AAA0900
(30 rows)
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
DELETE FROM join_tbl;
RESET enable_partitionwise_join;
-- Test rescan of an async Append node with do_exec_prune=false
SET enable_hashjoin TO false;
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO join_tbl SELECT * FROM async_p1 t1, async_pt t2 WHERE t1.a = t2.a AND t1.b = t2.b AND t1.b % 100 = 0;
QUERY PLAN
----------------------------------------------------------------------------------------
Insert on public.join_tbl
-> Nested Loop
Output: t1.a, t1.b, t1.c, t2.a, t2.b, t2.c
Join Filter: ((t1.a = t2.a) AND (t1.b = t2.b))
-> Foreign Scan on public.async_p1 t1
Output: t1.a, t1.b, t1.c
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE (((b % 100) = 0))
-> Append
-> Async Foreign Scan on public.async_p1 t2_1
Output: t2_1.a, t2_1.b, t2_1.c
Remote SQL: SELECT a, b, c FROM public.base_tbl1
-> Async Foreign Scan on public.async_p2 t2_2
Output: t2_2.a, t2_2.b, t2_2.c
Remote SQL: SELECT a, b, c FROM public.base_tbl2
-> Seq Scan on public.async_p3 t2_3
Output: t2_3.a, t2_3.b, t2_3.c
(16 rows)
INSERT INTO join_tbl SELECT * FROM async_p1 t1, async_pt t2 WHERE t1.a = t2.a AND t1.b = t2.b AND t1.b % 100 = 0;
SELECT * FROM join_tbl ORDER BY a1;
a1 | b1 | c1 | a2 | b2 | c2
------+-----+------+------+-----+------
1000 | 0 | 0000 | 1000 | 0 | 0000
1100 | 100 | 0100 | 1100 | 100 | 0100
1200 | 200 | 0200 | 1200 | 200 | 0200
1300 | 300 | 0300 | 1300 | 300 | 0300
1400 | 400 | 0400 | 1400 | 400 | 0400
1500 | 500 | 0500 | 1500 | 500 | 0500
1600 | 600 | 0600 | 1600 | 600 | 0600
1700 | 700 | 0700 | 1700 | 700 | 0700
1800 | 800 | 0800 | 1800 | 800 | 0800
1900 | 900 | 0900 | 1900 | 900 | 0900
(10 rows)
DELETE FROM join_tbl;
RESET enable_hashjoin;
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
-- Test interaction of async execution with plan-time partition pruning
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM async_pt WHERE a < 3000;
QUERY PLAN
-----------------------------------------------------------------------------
Append
-> Async Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE ((a < 3000))
-> Async Foreign Scan on public.async_p2 async_pt_2
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c
Remote SQL: SELECT a, b, c FROM public.base_tbl2 WHERE ((a < 3000))
(7 rows)
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM async_pt WHERE a < 2000;
QUERY PLAN
-----------------------------------------------------------------------
Foreign Scan on public.async_p1 async_pt
Output: async_pt.a, async_pt.b, async_pt.c
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE ((a < 2000))
(3 rows)
-- Test interaction of async execution with run-time partition pruning
SET plan_cache_mode TO force_generic_plan;
PREPARE async_pt_query (int, int) AS
INSERT INTO result_tbl SELECT * FROM async_pt WHERE a < $1 AND b === $2;
EXPLAIN (VERBOSE, COSTS OFF)
EXECUTE async_pt_query (3000, 505);
QUERY PLAN
------------------------------------------------------------------------------------------
Insert on public.result_tbl
-> Append
Subplans Removed: 1
-> Async Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Filter: (async_pt_1.b === $2)
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE ((a < $1::integer))
-> Async Foreign Scan on public.async_p2 async_pt_2
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c
Filter: (async_pt_2.b === $2)
Remote SQL: SELECT a, b, c FROM public.base_tbl2 WHERE ((a < $1::integer))
(11 rows)
EXECUTE async_pt_query (3000, 505);
SELECT * FROM result_tbl ORDER BY a;
a | b | c
------+-----+------
1505 | 505 | 0505
2505 | 505 | 0505
(2 rows)
DELETE FROM result_tbl;
EXPLAIN (VERBOSE, COSTS OFF)
EXECUTE async_pt_query (2000, 505);
QUERY PLAN
------------------------------------------------------------------------------------------
Insert on public.result_tbl
-> Append
Subplans Removed: 2
-> Async Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Filter: (async_pt_1.b === $2)
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE ((a < $1::integer))
(7 rows)
EXECUTE async_pt_query (2000, 505);
SELECT * FROM result_tbl ORDER BY a;
a | b | c
------+-----+------
1505 | 505 | 0505
(1 row)
DELETE FROM result_tbl;
RESET plan_cache_mode;
CREATE TABLE local_tbl(a int, b int, c text);
INSERT INTO local_tbl VALUES (1505, 505, 'foo'), (2505, 505, 'bar');
ANALYZE local_tbl;
CREATE INDEX base_tbl1_idx ON base_tbl1 (a);
CREATE INDEX base_tbl2_idx ON base_tbl2 (a);
CREATE INDEX async_p3_idx ON async_p3 (a);
ANALYZE base_tbl1;
ANALYZE base_tbl2;
ANALYZE async_p3;
ALTER FOREIGN TABLE async_p1 OPTIONS (use_remote_estimate 'true');
ALTER FOREIGN TABLE async_p2 OPTIONS (use_remote_estimate 'true');
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM local_tbl, async_pt WHERE local_tbl.a = async_pt.a AND local_tbl.c = 'bar';
QUERY PLAN
------------------------------------------------------------------------------------------
Nested Loop
Output: local_tbl.a, local_tbl.b, local_tbl.c, async_pt.a, async_pt.b, async_pt.c
-> Seq Scan on public.local_tbl
Output: local_tbl.a, local_tbl.b, local_tbl.c
Filter: (local_tbl.c = 'bar'::text)
-> Append
-> Async Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE ((a = $1::integer))
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
-> Async Foreign Scan on public.async_p2 async_pt_2
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Remote SQL: SELECT a, b, c FROM public.base_tbl2 WHERE ((a = $1::integer))
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
-> Seq Scan on public.async_p3 async_pt_3
Output: async_pt_3.a, async_pt_3.b, async_pt_3.c
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Filter: (async_pt_3.a = local_tbl.a)
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
(15 rows)
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT * FROM local_tbl, async_pt WHERE local_tbl.a = async_pt.a AND local_tbl.c = 'bar';
QUERY PLAN
-------------------------------------------------------------------------------
Nested Loop (actual rows=1 loops=1)
-> Seq Scan on local_tbl (actual rows=1 loops=1)
Filter: (c = 'bar'::text)
Rows Removed by Filter: 1
-> Append (actual rows=1 loops=1)
-> Async Foreign Scan on async_p1 async_pt_1 (never executed)
-> Async Foreign Scan on async_p2 async_pt_2 (actual rows=1 loops=1)
-> Seq Scan on async_p3 async_pt_3 (never executed)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Filter: (a = local_tbl.a)
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
(9 rows)
SELECT * FROM local_tbl, async_pt WHERE local_tbl.a = async_pt.a AND local_tbl.c = 'bar';
a | b | c | a | b | c
------+-----+-----+------+-----+------
2505 | 505 | bar | 2505 | 505 | 0505
(1 row)
ALTER FOREIGN TABLE async_p1 OPTIONS (DROP use_remote_estimate);
ALTER FOREIGN TABLE async_p2 OPTIONS (DROP use_remote_estimate);
DROP TABLE local_tbl;
DROP INDEX base_tbl1_idx;
DROP INDEX base_tbl2_idx;
DROP INDEX async_p3_idx;
-- UNION queries
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO result_tbl
(SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
UNION
(SELECT a, b, 'AAA' || c FROM async_p2 WHERE b < 10);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
Insert on public.result_tbl
-> HashAggregate
Output: async_p1.a, async_p1.b, (('AAA'::text || async_p1.c))
Group Key: async_p1.a, async_p1.b, (('AAA'::text || async_p1.c))
-> Append
-> Async Foreign Scan on public.async_p1
Output: async_p1.a, async_p1.b, ('AAA'::text || async_p1.c)
Remote SQL: SELECT a, b, c FROM public.base_tbl1 ORDER BY a ASC NULLS LAST LIMIT 10::bigint
-> Async Foreign Scan on public.async_p2
Output: async_p2.a, async_p2.b, ('AAA'::text || async_p2.c)
Remote SQL: SELECT a, b, c FROM public.base_tbl2 WHERE ((b < 10))
(11 rows)
INSERT INTO result_tbl
(SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
UNION
(SELECT a, b, 'AAA' || c FROM async_p2 WHERE b < 10);
SELECT * FROM result_tbl ORDER BY a;
a | b | c
------+----+---------
1000 | 0 | AAA0000
1005 | 5 | AAA0005
1010 | 10 | AAA0010
1015 | 15 | AAA0015
1020 | 20 | AAA0020
1025 | 25 | AAA0025
1030 | 30 | AAA0030
1035 | 35 | AAA0035
1040 | 40 | AAA0040
1045 | 45 | AAA0045
2000 | 0 | AAA0000
2005 | 5 | AAA0005
(12 rows)
DELETE FROM result_tbl;
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO result_tbl
(SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
UNION ALL
(SELECT a, b, 'AAA' || c FROM async_p2 WHERE b < 10);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Insert on public.result_tbl
-> Append
-> Async Foreign Scan on public.async_p1
Output: async_p1.a, async_p1.b, ('AAA'::text || async_p1.c)
Remote SQL: SELECT a, b, c FROM public.base_tbl1 ORDER BY a ASC NULLS LAST LIMIT 10::bigint
-> Async Foreign Scan on public.async_p2
Output: async_p2.a, async_p2.b, ('AAA'::text || async_p2.c)
Remote SQL: SELECT a, b, c FROM public.base_tbl2 WHERE ((b < 10))
(8 rows)
INSERT INTO result_tbl
(SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
UNION ALL
(SELECT a, b, 'AAA' || c FROM async_p2 WHERE b < 10);
SELECT * FROM result_tbl ORDER BY a;
a | b | c
------+----+---------
1000 | 0 | AAA0000
1005 | 5 | AAA0005
1010 | 10 | AAA0010
1015 | 15 | AAA0015
1020 | 20 | AAA0020
1025 | 25 | AAA0025
1030 | 30 | AAA0030
1035 | 35 | AAA0035
1040 | 40 | AAA0040
1045 | 45 | AAA0045
2000 | 0 | AAA0000
2005 | 5 | AAA0005
(12 rows)
DELETE FROM result_tbl;
-- Disable async execution if we use gating Result nodes for pseudoconstant
-- quals
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM async_pt WHERE CURRENT_USER = SESSION_USER;
QUERY PLAN
----------------------------------------------------------------
Append
-> Result
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
One-Time Filter: (CURRENT_USER = SESSION_USER)
-> Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Remote SQL: SELECT a, b, c FROM public.base_tbl1
-> Result
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c
One-Time Filter: (CURRENT_USER = SESSION_USER)
-> Foreign Scan on public.async_p2 async_pt_2
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c
Remote SQL: SELECT a, b, c FROM public.base_tbl2
-> Result
Output: async_pt_3.a, async_pt_3.b, async_pt_3.c
One-Time Filter: (CURRENT_USER = SESSION_USER)
-> Seq Scan on public.async_p3 async_pt_3
Output: async_pt_3.a, async_pt_3.b, async_pt_3.c
(18 rows)
EXPLAIN (VERBOSE, COSTS OFF)
(SELECT * FROM async_p1 WHERE CURRENT_USER = SESSION_USER)
UNION ALL
(SELECT * FROM async_p2 WHERE CURRENT_USER = SESSION_USER);
QUERY PLAN
----------------------------------------------------------------
Append
-> Result
Output: async_p1.a, async_p1.b, async_p1.c
One-Time Filter: (CURRENT_USER = SESSION_USER)
-> Foreign Scan on public.async_p1
Output: async_p1.a, async_p1.b, async_p1.c
Remote SQL: SELECT a, b, c FROM public.base_tbl1
-> Result
Output: async_p2.a, async_p2.b, async_p2.c
One-Time Filter: (CURRENT_USER = SESSION_USER)
-> Foreign Scan on public.async_p2
Output: async_p2.a, async_p2.b, async_p2.c
Remote SQL: SELECT a, b, c FROM public.base_tbl2
(13 rows)
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ((SELECT * FROM async_p1 WHERE b < 10) UNION ALL (SELECT * FROM async_p2 WHERE b < 10)) s WHERE CURRENT_USER = SESSION_USER;
QUERY PLAN
---------------------------------------------------------------------------------
Append
-> Result
Output: async_p1.a, async_p1.b, async_p1.c
One-Time Filter: (CURRENT_USER = SESSION_USER)
-> Foreign Scan on public.async_p1
Output: async_p1.a, async_p1.b, async_p1.c
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE ((b < 10))
-> Result
Output: async_p2.a, async_p2.b, async_p2.c
One-Time Filter: (CURRENT_USER = SESSION_USER)
-> Foreign Scan on public.async_p2
Output: async_p2.a, async_p2.b, async_p2.c
Remote SQL: SELECT a, b, c FROM public.base_tbl2 WHERE ((b < 10))
(13 rows)
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
-- Test that pending requests are processed properly
SET enable_mergejoin TO false;
SET enable_hashjoin TO false;
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM async_pt t1, async_p2 t2 WHERE t1.a = t2.a AND t1.b === 505;
QUERY PLAN
----------------------------------------------------------------
Nested Loop
Output: t1.a, t1.b, t1.c, t2.a, t2.b, t2.c
Join Filter: (t1.a = t2.a)
-> Append
-> Async Foreign Scan on public.async_p1 t1_1
Output: t1_1.a, t1_1.b, t1_1.c
Filter: (t1_1.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl1
-> Async Foreign Scan on public.async_p2 t1_2
Output: t1_2.a, t1_2.b, t1_2.c
Filter: (t1_2.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl2
-> Seq Scan on public.async_p3 t1_3
Output: t1_3.a, t1_3.b, t1_3.c
Filter: (t1_3.b === 505)
-> Materialize
Output: t2.a, t2.b, t2.c
-> Foreign Scan on public.async_p2 t2
Output: t2.a, t2.b, t2.c
Remote SQL: SELECT a, b, c FROM public.base_tbl2
(20 rows)
SELECT * FROM async_pt t1, async_p2 t2 WHERE t1.a = t2.a AND t1.b === 505;
a | b | c | a | b | c
------+-----+------+------+-----+------
2505 | 505 | 0505 | 2505 | 505 | 0505
(1 row)
postgres_fdw: Fix handling of pending asynchronous requests. A pending asynchronous request is handled by process_pending_request(), which previously not only processed an in-progress remote query but performed ExecForeignScan() to produce a tuple to return to the local server asynchronously from the result of the remote query. But that led to a server crash when executing a query or led to an "InstrStartNode called twice in a row" or "InstrEndLoop called on running node" failure when doing EXPLAIN ANALYZE of it, in cases where the plan tree for it contained multiple async-capable nodes accessing the same initplan/subplan that contained multiple async-capable nodes scanning the same foreign tables as for the parent async-capable nodes, as reported by Andrey Lepikhov. The reason is that the second step in process_pending_request() invoked when executing the initplan/subplan for one of the parent async-capable nodes caused recursive execution of the initplan/subplan for another of the parent async-capable nodes. To fix, split process_pending_request() into the two steps and postpone the second step until ForeignAsyncConfigureWait() is called for each of the pending asynchronous requests. Also, in ExecAppendAsyncEventWait() we assumed that FDWs would register at least one wait event in a WaitEventSet created there when they were called from ForeignAsyncConfigureWait() in that function, but allow FDWs to register zero wait events in the WaitEventSet; modify ExecAppendAsyncEventWait() to just return in that case. Oversight in commit 27e1f1456. Back-patch to v14 where that commit went in. Andrey Lepikhov and Etsuro Fujita Discussion: https://postgr.es/m/fe5eaa19-1704-e4a4-76ee-3b9d37ade399@postgrespro.ru
2021-07-30 10:00:00 +02:00
CREATE TABLE local_tbl (a int, b int, c text);
INSERT INTO local_tbl VALUES (1505, 505, 'foo');
ANALYZE local_tbl;
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM local_tbl t1 LEFT JOIN (SELECT *, (SELECT count(*) FROM async_pt WHERE a < 3000) FROM async_pt WHERE a < 3000) t2 ON t1.a = t2.a;
QUERY PLAN
----------------------------------------------------------------------------------------
Nested Loop Left Join
Output: t1.a, t1.b, t1.c, async_pt.a, async_pt.b, async_pt.c, ($0)
Join Filter: (t1.a = async_pt.a)
InitPlan 1 (returns $0)
-> Aggregate
Output: count(*)
-> Append
-> Async Foreign Scan on public.async_p1 async_pt_4
Remote SQL: SELECT NULL FROM public.base_tbl1 WHERE ((a < 3000))
-> Async Foreign Scan on public.async_p2 async_pt_5
Remote SQL: SELECT NULL FROM public.base_tbl2 WHERE ((a < 3000))
-> Seq Scan on public.local_tbl t1
Output: t1.a, t1.b, t1.c
-> Append
-> Async Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c, $0
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE ((a < 3000))
-> Async Foreign Scan on public.async_p2 async_pt_2
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c, $0
Remote SQL: SELECT a, b, c FROM public.base_tbl2 WHERE ((a < 3000))
(20 rows)
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT * FROM local_tbl t1 LEFT JOIN (SELECT *, (SELECT count(*) FROM async_pt WHERE a < 3000) FROM async_pt WHERE a < 3000) t2 ON t1.a = t2.a;
QUERY PLAN
-----------------------------------------------------------------------------------------
Nested Loop Left Join (actual rows=1 loops=1)
Join Filter: (t1.a = async_pt.a)
Rows Removed by Join Filter: 399
InitPlan 1 (returns $0)
-> Aggregate (actual rows=1 loops=1)
-> Append (actual rows=400 loops=1)
-> Async Foreign Scan on async_p1 async_pt_4 (actual rows=200 loops=1)
-> Async Foreign Scan on async_p2 async_pt_5 (actual rows=200 loops=1)
-> Seq Scan on local_tbl t1 (actual rows=1 loops=1)
-> Append (actual rows=400 loops=1)
-> Async Foreign Scan on async_p1 async_pt_1 (actual rows=200 loops=1)
-> Async Foreign Scan on async_p2 async_pt_2 (actual rows=200 loops=1)
(12 rows)
SELECT * FROM local_tbl t1 LEFT JOIN (SELECT *, (SELECT count(*) FROM async_pt WHERE a < 3000) FROM async_pt WHERE a < 3000) t2 ON t1.a = t2.a;
a | b | c | a | b | c | count
------+-----+-----+------+-----+------+-------
1505 | 505 | foo | 1505 | 505 | 0505 | 400
(1 row)
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM async_pt t1 WHERE t1.b === 505 LIMIT 1;
QUERY PLAN
----------------------------------------------------------------
Limit
Output: t1.a, t1.b, t1.c
-> Append
-> Async Foreign Scan on public.async_p1 t1_1
Output: t1_1.a, t1_1.b, t1_1.c
Filter: (t1_1.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl1
-> Async Foreign Scan on public.async_p2 t1_2
Output: t1_2.a, t1_2.b, t1_2.c
Filter: (t1_2.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl2
-> Seq Scan on public.async_p3 t1_3
Output: t1_3.a, t1_3.b, t1_3.c
Filter: (t1_3.b === 505)
(14 rows)
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT * FROM async_pt t1 WHERE t1.b === 505 LIMIT 1;
QUERY PLAN
-------------------------------------------------------------------------
Limit (actual rows=1 loops=1)
-> Append (actual rows=1 loops=1)
-> Async Foreign Scan on async_p1 t1_1 (actual rows=0 loops=1)
Filter: (b === 505)
-> Async Foreign Scan on async_p2 t1_2 (actual rows=0 loops=1)
Filter: (b === 505)
-> Seq Scan on async_p3 t1_3 (actual rows=1 loops=1)
Filter: (b === 505)
Rows Removed by Filter: 101
(9 rows)
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
SELECT * FROM async_pt t1 WHERE t1.b === 505 LIMIT 1;
a | b | c
------+-----+------
3505 | 505 | 0505
(1 row)
-- Check with foreign modify
CREATE TABLE base_tbl3 (a int, b int, c text);
CREATE FOREIGN TABLE remote_tbl (a int, b int, c text)
SERVER loopback OPTIONS (table_name 'base_tbl3');
INSERT INTO remote_tbl VALUES (2505, 505, 'bar');
CREATE TABLE base_tbl4 (a int, b int, c text);
CREATE FOREIGN TABLE insert_tbl (a int, b int, c text)
SERVER loopback OPTIONS (table_name 'base_tbl4');
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO insert_tbl (SELECT * FROM local_tbl UNION ALL SELECT * FROM remote_tbl);
QUERY PLAN
-------------------------------------------------------------------------
Insert on public.insert_tbl
Remote SQL: INSERT INTO public.base_tbl4(a, b, c) VALUES ($1, $2, $3)
Batch Size: 1
-> Append
-> Seq Scan on public.local_tbl
Output: local_tbl.a, local_tbl.b, local_tbl.c
-> Async Foreign Scan on public.remote_tbl
Output: remote_tbl.a, remote_tbl.b, remote_tbl.c
Remote SQL: SELECT a, b, c FROM public.base_tbl3
(9 rows)
INSERT INTO insert_tbl (SELECT * FROM local_tbl UNION ALL SELECT * FROM remote_tbl);
SELECT * FROM insert_tbl ORDER BY a;
a | b | c
------+-----+-----
1505 | 505 | foo
2505 | 505 | bar
(2 rows)
-- Check with direct modify
EXPLAIN (VERBOSE, COSTS OFF)
WITH t AS (UPDATE remote_tbl SET c = c || c RETURNING *)
INSERT INTO join_tbl SELECT * FROM async_pt LEFT JOIN t ON (async_pt.a = t.a AND async_pt.b = t.b) WHERE async_pt.b === 505;
QUERY PLAN
----------------------------------------------------------------------------------------
Insert on public.join_tbl
CTE t
-> Update on public.remote_tbl
Output: remote_tbl.a, remote_tbl.b, remote_tbl.c
-> Foreign Update on public.remote_tbl
Remote SQL: UPDATE public.base_tbl3 SET c = (c || c) RETURNING a, b, c
-> Nested Loop Left Join
Output: async_pt.a, async_pt.b, async_pt.c, t.a, t.b, t.c
Join Filter: ((async_pt.a = t.a) AND (async_pt.b = t.b))
-> Append
-> Async Foreign Scan on public.async_p1 async_pt_1
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Filter: (async_pt_1.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl1
-> Async Foreign Scan on public.async_p2 async_pt_2
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c
Filter: (async_pt_2.b === 505)
Remote SQL: SELECT a, b, c FROM public.base_tbl2
-> Seq Scan on public.async_p3 async_pt_3
Output: async_pt_3.a, async_pt_3.b, async_pt_3.c
Filter: (async_pt_3.b === 505)
-> CTE Scan on t
Output: t.a, t.b, t.c
(23 rows)
WITH t AS (UPDATE remote_tbl SET c = c || c RETURNING *)
INSERT INTO join_tbl SELECT * FROM async_pt LEFT JOIN t ON (async_pt.a = t.a AND async_pt.b = t.b) WHERE async_pt.b === 505;
SELECT * FROM join_tbl ORDER BY a1;
a1 | b1 | c1 | a2 | b2 | c2
------+-----+------+------+-----+--------
1505 | 505 | 0505 | | |
2505 | 505 | 0505 | 2505 | 505 | barbar
3505 | 505 | 0505 | | |
(3 rows)
DELETE FROM join_tbl;
DROP TABLE local_tbl;
DROP FOREIGN TABLE remote_tbl;
DROP FOREIGN TABLE insert_tbl;
DROP TABLE base_tbl3;
DROP TABLE base_tbl4;
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
RESET enable_mergejoin;
RESET enable_hashjoin;
-- Test that UPDATE/DELETE with inherited target works with async_capable enabled
EXPLAIN (VERBOSE, COSTS OFF)
UPDATE async_pt SET c = c || c WHERE b = 0 RETURNING *;
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Update on public.async_pt
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Foreign Update on public.async_p1 async_pt_1
Foreign Update on public.async_p2 async_pt_2
Update on public.async_p3 async_pt_3
-> Append
-> Foreign Update on public.async_p1 async_pt_1
Remote SQL: UPDATE public.base_tbl1 SET c = (c || c) WHERE ((b = 0)) RETURNING a, b, c
-> Foreign Update on public.async_p2 async_pt_2
Remote SQL: UPDATE public.base_tbl2 SET c = (c || c) WHERE ((b = 0)) RETURNING a, b, c
-> Seq Scan on public.async_p3 async_pt_3
Output: (async_pt_3.c || async_pt_3.c), async_pt_3.tableoid, async_pt_3.ctid, NULL::record
Filter: (async_pt_3.b = 0)
(13 rows)
UPDATE async_pt SET c = c || c WHERE b = 0 RETURNING *;
a | b | c
------+---+----------
1000 | 0 | 00000000
2000 | 0 | 00000000
3000 | 0 | 00000000
(3 rows)
EXPLAIN (VERBOSE, COSTS OFF)
DELETE FROM async_pt WHERE b = 0 RETURNING *;
QUERY PLAN
------------------------------------------------------------------------------------------
Delete on public.async_pt
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
Foreign Delete on public.async_p1 async_pt_1
Foreign Delete on public.async_p2 async_pt_2
Delete on public.async_p3 async_pt_3
-> Append
-> Foreign Delete on public.async_p1 async_pt_1
Remote SQL: DELETE FROM public.base_tbl1 WHERE ((b = 0)) RETURNING a, b, c
-> Foreign Delete on public.async_p2 async_pt_2
Remote SQL: DELETE FROM public.base_tbl2 WHERE ((b = 0)) RETURNING a, b, c
-> Seq Scan on public.async_p3 async_pt_3
Output: async_pt_3.tableoid, async_pt_3.ctid
Filter: (async_pt_3.b = 0)
(13 rows)
DELETE FROM async_pt WHERE b = 0 RETURNING *;
a | b | c
------+---+----------
1000 | 0 | 00000000
2000 | 0 | 00000000
3000 | 0 | 00000000
(3 rows)
-- Check EXPLAIN ANALYZE for a query that scans empty partitions asynchronously
DELETE FROM async_p1;
DELETE FROM async_p2;
DELETE FROM async_p3;
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT * FROM async_pt;
QUERY PLAN
-------------------------------------------------------------------------
Append (actual rows=0 loops=1)
-> Async Foreign Scan on async_p1 async_pt_1 (actual rows=0 loops=1)
-> Async Foreign Scan on async_p2 async_pt_2 (actual rows=0 loops=1)
-> Seq Scan on async_p3 async_pt_3 (actual rows=0 loops=1)
(4 rows)
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
-- Clean up
DROP TABLE async_pt;
DROP TABLE base_tbl1;
DROP TABLE base_tbl2;
DROP TABLE result_tbl;
DROP TABLE join_tbl;
-- Test that an asynchronous fetch is processed before restarting the scan in
-- ReScanForeignScan
CREATE TABLE base_tbl (a int, b int);
INSERT INTO base_tbl VALUES (1, 11), (2, 22), (3, 33);
CREATE FOREIGN TABLE foreign_tbl (b int)
SERVER loopback OPTIONS (table_name 'base_tbl');
CREATE FOREIGN TABLE foreign_tbl2 () INHERITS (foreign_tbl)
SERVER loopback OPTIONS (table_name 'base_tbl');
EXPLAIN (VERBOSE, COSTS OFF)
SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
QUERY PLAN
-----------------------------------------------------------------------------
Seq Scan on public.base_tbl
Output: base_tbl.a
Filter: (SubPlan 1)
SubPlan 1
-> Result
Output: base_tbl.a
-> Append
-> Async Foreign Scan on public.foreign_tbl foreign_tbl_1
Remote SQL: SELECT NULL FROM public.base_tbl
-> Async Foreign Scan on public.foreign_tbl2 foreign_tbl_2
Remote SQL: SELECT NULL FROM public.base_tbl
(11 rows)
SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
a
---
1
2
3
(3 rows)
-- Clean up
DROP FOREIGN TABLE foreign_tbl CASCADE;
NOTICE: drop cascades to foreign table foreign_tbl2
DROP TABLE base_tbl;
Add support for asynchronous execution. This implements asynchronous execution, which runs multiple parts of a non-parallel-aware Append concurrently rather than serially to improve performance when possible. Currently, the only node type that can be run concurrently is a ForeignScan that is an immediate child of such an Append. In the case where such ForeignScans access data on different remote servers, this would run those ForeignScans concurrently, and overlap the remote operations to be performed simultaneously, so it'll improve the performance especially when the operations involve time-consuming ones such as remote join and remote aggregation. We may extend this to other node types such as joins or aggregates over ForeignScans in the future. This also adds the support for postgres_fdw, which is enabled by the table-level/server-level option "async_capable". The default is false. Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit is mostly based on the patch proposed by Robert Haas, but also uses stuff from the patch proposed by Kyotaro Horiguchi and from the patch proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and others. Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 11:45:00 +02:00
ALTER SERVER loopback OPTIONS (DROP async_capable);
ALTER SERVER loopback2 OPTIONS (DROP async_capable);
-- ===================================================================
-- test invalid server, foreign table and foreign data wrapper options
-- ===================================================================
-- Invalid fdw_startup_cost option
CREATE SERVER inv_scst FOREIGN DATA WRAPPER postgres_fdw
OPTIONS(fdw_startup_cost '100$%$#$#');
ERROR: invalid value for floating point option "fdw_startup_cost": 100$%$#$#
-- Invalid fdw_tuple_cost option
CREATE SERVER inv_scst FOREIGN DATA WRAPPER postgres_fdw
OPTIONS(fdw_tuple_cost '100$%$#$#');
ERROR: invalid value for floating point option "fdw_tuple_cost": 100$%$#$#
-- Invalid fetch_size option
CREATE FOREIGN TABLE inv_fsz (c1 int )
SERVER loopback OPTIONS (fetch_size '100$%$#$#');
ERROR: invalid value for integer option "fetch_size": 100$%$#$#
-- Invalid batch_size option
CREATE FOREIGN TABLE inv_bsz (c1 int )
SERVER loopback OPTIONS (batch_size '100$%$#$#');
ERROR: invalid value for integer option "batch_size": 100$%$#$#
-- No option is allowed to be specified at foreign data wrapper level
ALTER FOREIGN DATA WRAPPER postgres_fdw OPTIONS (nonexistent 'fdw');
ERROR: invalid option "nonexistent"
HINT: There are no valid options in this context.
postgres_fdw: Add regression test for postgres_fdw.application_name GUC. Commit 449ab63505 added postgres_fdw.application_name GUC that specifies a value for application_name configuration parameter used when postgres_fdw establishes a connection to a foreign server. Also commit 6e0cb3dec1 allowed it to include escape sequences. Both commits added the regression tests for the GUC, but those tests were reverted by commits 98dbef90eb and 5e64ad3697 because they were unstable and caused some buildfarm members to report the failure. This is the third try to add the regression test for postgres_fdw.application_name GUC. One of issues to make the test unstable was to have used postgres_fdw_disconnect_all() to close the existing remote connections. The test expected that the remote connection and its corresponding backend at the remote server disappeared just after postgres_fdw_disconnect_all() was executed, but it could take a bit time for them to disappear. To make sure that they exit, this commit makes the test use pg_terminate_backend() with the timeout at the remote server, instead. If the timeout is set to greater than zero, this function waits until they are actually terminated (or until the given time has passed). Another issue was that the test didn't take into consideration the case where postgres_fdw.application_name containing some escape sequences was converted to the string larger than NAMEDATALEN. In this case it was truncated to less than NAMEDATALEN when it's passed to the remote server, but the test expected wrongly that full string of application_name was always viewable. This commit changes the test so that it can handle that case. Author: Fujii Masao Reviewed-by: Masahiko Sawada, Hayato Kuroda, Kyotaro Horiguchi Discussion: https://postgr.es/m/3220909.1631054766@sss.pgh.pa.us Discussion: https://postgr.es/m/20211224.180006.2247635208768233073.horikyota.ntt@gmail.com Discussion: https://postgr.es/m/e7b61420-a97b-8246-77c4-a0d48fba5a45@oss.nttdata.com
2022-01-07 07:31:56 +01:00
-- ===================================================================
-- test postgres_fdw.application_name GUC
-- ===================================================================
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
-- To avoid race conditions in checking the remote session's application_name,
-- use this view to make the remote session itself read its application_name.
CREATE VIEW my_application_name AS
SELECT application_name FROM pg_stat_activity WHERE pid = pg_backend_pid();
CREATE FOREIGN TABLE remote_application_name (application_name text)
SERVER loopback2
OPTIONS (schema_name 'public', table_name 'my_application_name');
SELECT count(*) FROM remote_application_name;
count
-------
1
(1 row)
postgres_fdw: Add regression test for postgres_fdw.application_name GUC. Commit 449ab63505 added postgres_fdw.application_name GUC that specifies a value for application_name configuration parameter used when postgres_fdw establishes a connection to a foreign server. Also commit 6e0cb3dec1 allowed it to include escape sequences. Both commits added the regression tests for the GUC, but those tests were reverted by commits 98dbef90eb and 5e64ad3697 because they were unstable and caused some buildfarm members to report the failure. This is the third try to add the regression test for postgres_fdw.application_name GUC. One of issues to make the test unstable was to have used postgres_fdw_disconnect_all() to close the existing remote connections. The test expected that the remote connection and its corresponding backend at the remote server disappeared just after postgres_fdw_disconnect_all() was executed, but it could take a bit time for them to disappear. To make sure that they exit, this commit makes the test use pg_terminate_backend() with the timeout at the remote server, instead. If the timeout is set to greater than zero, this function waits until they are actually terminated (or until the given time has passed). Another issue was that the test didn't take into consideration the case where postgres_fdw.application_name containing some escape sequences was converted to the string larger than NAMEDATALEN. In this case it was truncated to less than NAMEDATALEN when it's passed to the remote server, but the test expected wrongly that full string of application_name was always viewable. This commit changes the test so that it can handle that case. Author: Fujii Masao Reviewed-by: Masahiko Sawada, Hayato Kuroda, Kyotaro Horiguchi Discussion: https://postgr.es/m/3220909.1631054766@sss.pgh.pa.us Discussion: https://postgr.es/m/20211224.180006.2247635208768233073.horikyota.ntt@gmail.com Discussion: https://postgr.es/m/e7b61420-a97b-8246-77c4-a0d48fba5a45@oss.nttdata.com
2022-01-07 07:31:56 +01:00
-- Specify escape sequences in application_name option of a server
-- object so as to test that they are replaced with status information
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
-- expectedly. Note that we are also relying on ALTER SERVER to force
-- the remote session to be restarted with its new application name.
postgres_fdw: Add regression test for postgres_fdw.application_name GUC. Commit 449ab63505 added postgres_fdw.application_name GUC that specifies a value for application_name configuration parameter used when postgres_fdw establishes a connection to a foreign server. Also commit 6e0cb3dec1 allowed it to include escape sequences. Both commits added the regression tests for the GUC, but those tests were reverted by commits 98dbef90eb and 5e64ad3697 because they were unstable and caused some buildfarm members to report the failure. This is the third try to add the regression test for postgres_fdw.application_name GUC. One of issues to make the test unstable was to have used postgres_fdw_disconnect_all() to close the existing remote connections. The test expected that the remote connection and its corresponding backend at the remote server disappeared just after postgres_fdw_disconnect_all() was executed, but it could take a bit time for them to disappear. To make sure that they exit, this commit makes the test use pg_terminate_backend() with the timeout at the remote server, instead. If the timeout is set to greater than zero, this function waits until they are actually terminated (or until the given time has passed). Another issue was that the test didn't take into consideration the case where postgres_fdw.application_name containing some escape sequences was converted to the string larger than NAMEDATALEN. In this case it was truncated to less than NAMEDATALEN when it's passed to the remote server, but the test expected wrongly that full string of application_name was always viewable. This commit changes the test so that it can handle that case. Author: Fujii Masao Reviewed-by: Masahiko Sawada, Hayato Kuroda, Kyotaro Horiguchi Discussion: https://postgr.es/m/3220909.1631054766@sss.pgh.pa.us Discussion: https://postgr.es/m/20211224.180006.2247635208768233073.horikyota.ntt@gmail.com Discussion: https://postgr.es/m/e7b61420-a97b-8246-77c4-a0d48fba5a45@oss.nttdata.com
2022-01-07 07:31:56 +01:00
--
-- Since pg_stat_activity.application_name may be truncated to less than
-- NAMEDATALEN characters, note that substring() needs to be used
-- at the condition of test query to make sure that the string consisting
-- of database name and process ID is also less than that.
ALTER SERVER loopback2 OPTIONS (application_name 'fdw_%d%p');
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
SELECT count(*) FROM remote_application_name
postgres_fdw: Add regression test for postgres_fdw.application_name GUC. Commit 449ab63505 added postgres_fdw.application_name GUC that specifies a value for application_name configuration parameter used when postgres_fdw establishes a connection to a foreign server. Also commit 6e0cb3dec1 allowed it to include escape sequences. Both commits added the regression tests for the GUC, but those tests were reverted by commits 98dbef90eb and 5e64ad3697 because they were unstable and caused some buildfarm members to report the failure. This is the third try to add the regression test for postgres_fdw.application_name GUC. One of issues to make the test unstable was to have used postgres_fdw_disconnect_all() to close the existing remote connections. The test expected that the remote connection and its corresponding backend at the remote server disappeared just after postgres_fdw_disconnect_all() was executed, but it could take a bit time for them to disappear. To make sure that they exit, this commit makes the test use pg_terminate_backend() with the timeout at the remote server, instead. If the timeout is set to greater than zero, this function waits until they are actually terminated (or until the given time has passed). Another issue was that the test didn't take into consideration the case where postgres_fdw.application_name containing some escape sequences was converted to the string larger than NAMEDATALEN. In this case it was truncated to less than NAMEDATALEN when it's passed to the remote server, but the test expected wrongly that full string of application_name was always viewable. This commit changes the test so that it can handle that case. Author: Fujii Masao Reviewed-by: Masahiko Sawada, Hayato Kuroda, Kyotaro Horiguchi Discussion: https://postgr.es/m/3220909.1631054766@sss.pgh.pa.us Discussion: https://postgr.es/m/20211224.180006.2247635208768233073.horikyota.ntt@gmail.com Discussion: https://postgr.es/m/e7b61420-a97b-8246-77c4-a0d48fba5a45@oss.nttdata.com
2022-01-07 07:31:56 +01:00
WHERE application_name =
substring('fdw_' || current_database() || pg_backend_pid() for
current_setting('max_identifier_length')::int);
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
count
-------
1
postgres_fdw: Add regression test for postgres_fdw.application_name GUC. Commit 449ab63505 added postgres_fdw.application_name GUC that specifies a value for application_name configuration parameter used when postgres_fdw establishes a connection to a foreign server. Also commit 6e0cb3dec1 allowed it to include escape sequences. Both commits added the regression tests for the GUC, but those tests were reverted by commits 98dbef90eb and 5e64ad3697 because they were unstable and caused some buildfarm members to report the failure. This is the third try to add the regression test for postgres_fdw.application_name GUC. One of issues to make the test unstable was to have used postgres_fdw_disconnect_all() to close the existing remote connections. The test expected that the remote connection and its corresponding backend at the remote server disappeared just after postgres_fdw_disconnect_all() was executed, but it could take a bit time for them to disappear. To make sure that they exit, this commit makes the test use pg_terminate_backend() with the timeout at the remote server, instead. If the timeout is set to greater than zero, this function waits until they are actually terminated (or until the given time has passed). Another issue was that the test didn't take into consideration the case where postgres_fdw.application_name containing some escape sequences was converted to the string larger than NAMEDATALEN. In this case it was truncated to less than NAMEDATALEN when it's passed to the remote server, but the test expected wrongly that full string of application_name was always viewable. This commit changes the test so that it can handle that case. Author: Fujii Masao Reviewed-by: Masahiko Sawada, Hayato Kuroda, Kyotaro Horiguchi Discussion: https://postgr.es/m/3220909.1631054766@sss.pgh.pa.us Discussion: https://postgr.es/m/20211224.180006.2247635208768233073.horikyota.ntt@gmail.com Discussion: https://postgr.es/m/e7b61420-a97b-8246-77c4-a0d48fba5a45@oss.nttdata.com
2022-01-07 07:31:56 +01:00
(1 row)
-- postgres_fdw.application_name overrides application_name option
-- of a server object if both settings are present.
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
ALTER SERVER loopback2 OPTIONS (SET application_name 'fdw_wrong');
postgres_fdw: Add regression test for postgres_fdw.application_name GUC. Commit 449ab63505 added postgres_fdw.application_name GUC that specifies a value for application_name configuration parameter used when postgres_fdw establishes a connection to a foreign server. Also commit 6e0cb3dec1 allowed it to include escape sequences. Both commits added the regression tests for the GUC, but those tests were reverted by commits 98dbef90eb and 5e64ad3697 because they were unstable and caused some buildfarm members to report the failure. This is the third try to add the regression test for postgres_fdw.application_name GUC. One of issues to make the test unstable was to have used postgres_fdw_disconnect_all() to close the existing remote connections. The test expected that the remote connection and its corresponding backend at the remote server disappeared just after postgres_fdw_disconnect_all() was executed, but it could take a bit time for them to disappear. To make sure that they exit, this commit makes the test use pg_terminate_backend() with the timeout at the remote server, instead. If the timeout is set to greater than zero, this function waits until they are actually terminated (or until the given time has passed). Another issue was that the test didn't take into consideration the case where postgres_fdw.application_name containing some escape sequences was converted to the string larger than NAMEDATALEN. In this case it was truncated to less than NAMEDATALEN when it's passed to the remote server, but the test expected wrongly that full string of application_name was always viewable. This commit changes the test so that it can handle that case. Author: Fujii Masao Reviewed-by: Masahiko Sawada, Hayato Kuroda, Kyotaro Horiguchi Discussion: https://postgr.es/m/3220909.1631054766@sss.pgh.pa.us Discussion: https://postgr.es/m/20211224.180006.2247635208768233073.horikyota.ntt@gmail.com Discussion: https://postgr.es/m/e7b61420-a97b-8246-77c4-a0d48fba5a45@oss.nttdata.com
2022-01-07 07:31:56 +01:00
SET postgres_fdw.application_name TO 'fdw_%a%u%%';
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
SELECT count(*) FROM remote_application_name
postgres_fdw: Add regression test for postgres_fdw.application_name GUC. Commit 449ab63505 added postgres_fdw.application_name GUC that specifies a value for application_name configuration parameter used when postgres_fdw establishes a connection to a foreign server. Also commit 6e0cb3dec1 allowed it to include escape sequences. Both commits added the regression tests for the GUC, but those tests were reverted by commits 98dbef90eb and 5e64ad3697 because they were unstable and caused some buildfarm members to report the failure. This is the third try to add the regression test for postgres_fdw.application_name GUC. One of issues to make the test unstable was to have used postgres_fdw_disconnect_all() to close the existing remote connections. The test expected that the remote connection and its corresponding backend at the remote server disappeared just after postgres_fdw_disconnect_all() was executed, but it could take a bit time for them to disappear. To make sure that they exit, this commit makes the test use pg_terminate_backend() with the timeout at the remote server, instead. If the timeout is set to greater than zero, this function waits until they are actually terminated (or until the given time has passed). Another issue was that the test didn't take into consideration the case where postgres_fdw.application_name containing some escape sequences was converted to the string larger than NAMEDATALEN. In this case it was truncated to less than NAMEDATALEN when it's passed to the remote server, but the test expected wrongly that full string of application_name was always viewable. This commit changes the test so that it can handle that case. Author: Fujii Masao Reviewed-by: Masahiko Sawada, Hayato Kuroda, Kyotaro Horiguchi Discussion: https://postgr.es/m/3220909.1631054766@sss.pgh.pa.us Discussion: https://postgr.es/m/20211224.180006.2247635208768233073.horikyota.ntt@gmail.com Discussion: https://postgr.es/m/e7b61420-a97b-8246-77c4-a0d48fba5a45@oss.nttdata.com
2022-01-07 07:31:56 +01:00
WHERE application_name =
substring('fdw_' || current_setting('application_name') ||
CURRENT_USER || '%' for current_setting('max_identifier_length')::int);
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
count
-------
1
postgres_fdw: Add regression test for postgres_fdw.application_name GUC. Commit 449ab63505 added postgres_fdw.application_name GUC that specifies a value for application_name configuration parameter used when postgres_fdw establishes a connection to a foreign server. Also commit 6e0cb3dec1 allowed it to include escape sequences. Both commits added the regression tests for the GUC, but those tests were reverted by commits 98dbef90eb and 5e64ad3697 because they were unstable and caused some buildfarm members to report the failure. This is the third try to add the regression test for postgres_fdw.application_name GUC. One of issues to make the test unstable was to have used postgres_fdw_disconnect_all() to close the existing remote connections. The test expected that the remote connection and its corresponding backend at the remote server disappeared just after postgres_fdw_disconnect_all() was executed, but it could take a bit time for them to disappear. To make sure that they exit, this commit makes the test use pg_terminate_backend() with the timeout at the remote server, instead. If the timeout is set to greater than zero, this function waits until they are actually terminated (or until the given time has passed). Another issue was that the test didn't take into consideration the case where postgres_fdw.application_name containing some escape sequences was converted to the string larger than NAMEDATALEN. In this case it was truncated to less than NAMEDATALEN when it's passed to the remote server, but the test expected wrongly that full string of application_name was always viewable. This commit changes the test so that it can handle that case. Author: Fujii Masao Reviewed-by: Masahiko Sawada, Hayato Kuroda, Kyotaro Horiguchi Discussion: https://postgr.es/m/3220909.1631054766@sss.pgh.pa.us Discussion: https://postgr.es/m/20211224.180006.2247635208768233073.horikyota.ntt@gmail.com Discussion: https://postgr.es/m/e7b61420-a97b-8246-77c4-a0d48fba5a45@oss.nttdata.com
2022-01-07 07:31:56 +01:00
(1 row)
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
RESET postgres_fdw.application_name;
-- Test %c (session ID) and %C (cluster name) escape sequences.
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
ALTER SERVER loopback2 OPTIONS (SET application_name 'fdw_%C%c');
SELECT count(*) FROM remote_application_name
WHERE application_name =
substring('fdw_' || current_setting('cluster_name') ||
to_hex(trunc(EXTRACT(EPOCH FROM (SELECT backend_start FROM
pg_stat_get_activity(pg_backend_pid()))))::integer) || '.' ||
to_hex(pg_backend_pid())
for current_setting('max_identifier_length')::int);
Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de
2023-02-27 22:29:51 +01:00
count
-------
1
(1 row)
-- Clean up.
DROP FOREIGN TABLE remote_application_name;
DROP VIEW my_application_name;
-- ===================================================================
-- test parallel commit and parallel abort
-- ===================================================================
ALTER SERVER loopback OPTIONS (ADD parallel_commit 'true');
ALTER SERVER loopback OPTIONS (ADD parallel_abort 'true');
ALTER SERVER loopback2 OPTIONS (ADD parallel_commit 'true');
ALTER SERVER loopback2 OPTIONS (ADD parallel_abort 'true');
CREATE TABLE ploc1 (f1 int, f2 text);
CREATE FOREIGN TABLE prem1 (f1 int, f2 text)
SERVER loopback OPTIONS (table_name 'ploc1');
CREATE TABLE ploc2 (f1 int, f2 text);
CREATE FOREIGN TABLE prem2 (f1 int, f2 text)
SERVER loopback2 OPTIONS (table_name 'ploc2');
BEGIN;
INSERT INTO prem1 VALUES (101, 'foo');
INSERT INTO prem2 VALUES (201, 'bar');
COMMIT;
SELECT * FROM prem1;
f1 | f2
-----+-----
101 | foo
(1 row)
SELECT * FROM prem2;
f1 | f2
-----+-----
201 | bar
(1 row)
BEGIN;
SAVEPOINT s;
INSERT INTO prem1 VALUES (102, 'foofoo');
INSERT INTO prem2 VALUES (202, 'barbar');
RELEASE SAVEPOINT s;
COMMIT;
SELECT * FROM prem1;
f1 | f2
-----+--------
101 | foo
102 | foofoo
(2 rows)
SELECT * FROM prem2;
f1 | f2
-----+--------
201 | bar
202 | barbar
(2 rows)
-- This tests executing DEALLOCATE ALL against foreign servers in parallel
-- during pre-commit
BEGIN;
SAVEPOINT s;
INSERT INTO prem1 VALUES (103, 'baz');
INSERT INTO prem2 VALUES (203, 'qux');
ROLLBACK TO SAVEPOINT s;
RELEASE SAVEPOINT s;
INSERT INTO prem1 VALUES (104, 'bazbaz');
INSERT INTO prem2 VALUES (204, 'quxqux');
COMMIT;
SELECT * FROM prem1;
f1 | f2
-----+--------
101 | foo
102 | foofoo
104 | bazbaz
(3 rows)
SELECT * FROM prem2;
f1 | f2
-----+--------
201 | bar
202 | barbar
204 | quxqux
(3 rows)
BEGIN;
INSERT INTO prem1 VALUES (105, 'test1');
INSERT INTO prem2 VALUES (205, 'test2');
ABORT;
SELECT * FROM prem1;
f1 | f2
-----+--------
101 | foo
102 | foofoo
104 | bazbaz
(3 rows)
SELECT * FROM prem2;
f1 | f2
-----+--------
201 | bar
202 | barbar
204 | quxqux
(3 rows)
-- This tests executing DEALLOCATE ALL against foreign servers in parallel
-- during post-abort
BEGIN;
SAVEPOINT s;
INSERT INTO prem1 VALUES (105, 'test1');
INSERT INTO prem2 VALUES (205, 'test2');
ROLLBACK TO SAVEPOINT s;
RELEASE SAVEPOINT s;
INSERT INTO prem1 VALUES (105, 'test1');
INSERT INTO prem2 VALUES (205, 'test2');
ABORT;
SELECT * FROM prem1;
f1 | f2
-----+--------
101 | foo
102 | foofoo
104 | bazbaz
(3 rows)
SELECT * FROM prem2;
f1 | f2
-----+--------
201 | bar
202 | barbar
204 | quxqux
(3 rows)
ALTER SERVER loopback OPTIONS (DROP parallel_commit);
ALTER SERVER loopback OPTIONS (DROP parallel_abort);
ALTER SERVER loopback2 OPTIONS (DROP parallel_commit);
ALTER SERVER loopback2 OPTIONS (DROP parallel_abort);
-- ===================================================================
-- test for ANALYZE sampling
-- ===================================================================
CREATE TABLE analyze_table (id int, a text, b bigint);
CREATE FOREIGN TABLE analyze_ftable (id int, a text, b bigint)
SERVER loopback OPTIONS (table_name 'analyze_rtable1');
INSERT INTO analyze_table (SELECT x FROM generate_series(1,1000) x);
ANALYZE analyze_table;
SET default_statistics_target = 10;
ANALYZE analyze_table;
ALTER SERVER loopback OPTIONS (analyze_sampling 'invalid');
ERROR: invalid value for string option "analyze_sampling": invalid
ALTER SERVER loopback OPTIONS (analyze_sampling 'auto');
ANALYZE analyze_table;
ALTER SERVER loopback OPTIONS (SET analyze_sampling 'system');
ANALYZE analyze_table;
ALTER SERVER loopback OPTIONS (SET analyze_sampling 'bernoulli');
ANALYZE analyze_table;
ALTER SERVER loopback OPTIONS (SET analyze_sampling 'random');
ANALYZE analyze_table;
ALTER SERVER loopback OPTIONS (SET analyze_sampling 'off');
ANALYZE analyze_table;
-- cleanup
DROP FOREIGN TABLE analyze_ftable;
DROP TABLE analyze_table;