From 43da837edac8fec352dad0f20f7ec56056b609e3 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 2 Dec 2007 19:20:32 +0000 Subject: [PATCH] Improve the manual's discussion of partitioning. Recommend using a trigger instead of a rule to redirect insertions, use NEW.* notation where appropriate, some other updates and adjustments. David Fetter and Tom Lane --- doc/src/sgml/ddl.sgml | 320 +++++++++++++++++++++++++----------------- 1 file changed, 193 insertions(+), 127 deletions(-) diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index 2bb325a99b..dc60146001 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -1,4 +1,4 @@ - + Data Definition @@ -2383,8 +2383,8 @@ CHECK ( outletID BETWEEN 200 AND 300 ) - Optionally, define a rule or trigger to redirect modifications - of the master table to the appropriate partition. + Optionally, define a trigger or rule to redirect data inserted into + the master table to the appropriate partition. @@ -2443,16 +2443,16 @@ CREATE TABLE measurement ( Next we create one partition for each active month: -CREATE TABLE measurement_y2004m02 ( ) INHERITS (measurement); -CREATE TABLE measurement_y2004m03 ( ) INHERITS (measurement); +CREATE TABLE measurement_y2006m02 ( ) INHERITS (measurement); +CREATE TABLE measurement_y2006m03 ( ) INHERITS (measurement); ... -CREATE TABLE measurement_y2005m11 ( ) INHERITS (measurement); -CREATE TABLE measurement_y2005m12 ( ) INHERITS (measurement); -CREATE TABLE measurement_y2006m01 ( ) INHERITS (measurement); +CREATE TABLE measurement_y2007m11 ( ) INHERITS (measurement); +CREATE TABLE measurement_y2007m12 ( ) INHERITS (measurement); +CREATE TABLE measurement_y2008m01 ( ) INHERITS (measurement); Each of the partitions are complete tables in their own right, - but they inherit their definition from the + but they inherit their definitions from the measurement table. @@ -2470,21 +2470,21 @@ CREATE TABLE measurement_y2006m01 ( ) INHERITS (measurement); table creation script becomes: -CREATE TABLE measurement_y2004m02 ( - CHECK ( logdate >= DATE '2004-02-01' AND logdate < DATE '2004-03-01' ) +CREATE TABLE measurement_y2006m02 ( + CHECK ( logdate >= DATE '2006-02-01' AND logdate < DATE '2006-03-01' ) ) INHERITS (measurement); -CREATE TABLE measurement_y2004m03 ( - CHECK ( logdate >= DATE '2004-03-01' AND logdate < DATE '2004-04-01' ) +CREATE TABLE measurement_y2006m03 ( + CHECK ( logdate >= DATE '2006-03-01' AND logdate < DATE '2006-04-01' ) ) INHERITS (measurement); ... -CREATE TABLE measurement_y2005m11 ( - CHECK ( logdate >= DATE '2005-11-01' AND logdate < DATE '2005-12-01' ) +CREATE TABLE measurement_y2007m11 ( + CHECK ( logdate >= DATE '2007-11-01' AND logdate < DATE '2007-12-01' ) ) INHERITS (measurement); -CREATE TABLE measurement_y2005m12 ( - CHECK ( logdate >= DATE '2005-12-01' AND logdate < DATE '2006-01-01' ) +CREATE TABLE measurement_y2007m12 ( + CHECK ( logdate >= DATE '2007-12-01' AND logdate < DATE '2008-01-01' ) ) INHERITS (measurement); -CREATE TABLE measurement_y2006m01 ( - CHECK ( logdate >= DATE '2006-01-01' AND logdate < DATE '2006-02-01' ) +CREATE TABLE measurement_y2008m01 ( + CHECK ( logdate >= DATE '2008-01-01' AND logdate < DATE '2008-02-01' ) ) INHERITS (measurement); @@ -2495,12 +2495,12 @@ CREATE TABLE measurement_y2006m01 ( We probably need indexes on the key columns too: -CREATE INDEX measurement_y2004m02_logdate ON measurement_y2004m02 (logdate); -CREATE INDEX measurement_y2004m03_logdate ON measurement_y2004m03 (logdate); +CREATE INDEX measurement_y2006m02_logdate ON measurement_y2006m02 (logdate); +CREATE INDEX measurement_y2006m03_logdate ON measurement_y2006m03 (logdate); ... -CREATE INDEX measurement_y2005m11_logdate ON measurement_y2005m11 (logdate); -CREATE INDEX measurement_y2005m12_logdate ON measurement_y2005m12 (logdate); -CREATE INDEX measurement_y2006m01_logdate ON measurement_y2006m01 (logdate); +CREATE INDEX measurement_y2007m11_logdate ON measurement_y2007m11 (logdate); +CREATE INDEX measurement_y2007m12_logdate ON measurement_y2007m12 (logdate); +CREATE INDEX measurement_y2008m01_logdate ON measurement_y2008m01 (logdate); We choose not to add further indexes at this time. @@ -2509,56 +2509,72 @@ CREATE INDEX measurement_y2006m01_logdate ON measurement_y2006m01 (logdate); + We want our application to be able to say INSERT INTO + measurement ... and have the data be redirected into the + appropriate partition table. We can arrange that by attaching + a suitable trigger function to the master table. If data will be added only to the latest partition, we can - set up a very simple rule to insert data. We must - redefine this each month so that it always points to the - current partition: + use a very simple trigger function: -CREATE OR REPLACE RULE measurement_current_partition AS -ON INSERT TO measurement -DO INSTEAD - INSERT INTO measurement_y2006m01 VALUES ( NEW.city_id, - NEW.logdate, - NEW.peaktemp, - NEW.unitsales ); +CREATE OR REPLACE FUNCTION measurement_insert_trigger() +RETURNS TRIGGER AS $$ +BEGIN + INSERT INTO measurement_y2008m01 VALUES (NEW.*); + RETURN NULL; +END; +$$ +LANGUAGE plpgsql; + After creating the function, we create a trigger which + calls the trigger function: + + +CREATE TRIGGER insert_measurement_trigger + BEFORE INSERT ON measurement + FOR EACH ROW EXECUTE PROCEDURE measurement_insert_trigger(); + + + We must redefine the trigger function each month so that it always + points to the current partition. The trigger definition does + not need to be updated, however. + + + We might want to insert data and have the server automatically locate the partition into which the row should be added. We - could do this with a more complex set of rules as shown below: + could do this with a more complex trigger function, for example: -CREATE RULE measurement_insert_y2004m02 AS -ON INSERT TO measurement WHERE - ( logdate >= DATE '2004-02-01' AND logdate < DATE '2004-03-01' ) -DO INSTEAD - INSERT INTO measurement_y2004m02 VALUES ( NEW.city_id, - NEW.logdate, - NEW.peaktemp, - NEW.unitsales ); -... -CREATE RULE measurement_insert_y2005m12 AS -ON INSERT TO measurement WHERE - ( logdate >= DATE '2005-12-01' AND logdate < DATE '2006-01-01' ) -DO INSTEAD - INSERT INTO measurement_y2005m12 VALUES ( NEW.city_id, - NEW.logdate, - NEW.peaktemp, - NEW.unitsales ); -CREATE RULE measurement_insert_y2006m01 AS -ON INSERT TO measurement WHERE - ( logdate >= DATE '2006-01-01' AND logdate < DATE '2006-02-01' ) -DO INSTEAD - INSERT INTO measurement_y2006m01 VALUES ( NEW.city_id, - NEW.logdate, - NEW.peaktemp, - NEW.unitsales ); +CREATE OR REPLACE FUNCTION measurement_insert_trigger() +RETURNS TRIGGER AS $$ +BEGIN + IF ( logdate >= DATE '2006-02-01' AND logdate < DATE '2006-03-01' ) THEN + INSERT INTO measurement_y2006m02 VALUES (NEW.*); + ELSIF ( logdate >= DATE '2006-03-01' AND logdate < DATE '2006-04-01' ) THEN + INSERT INTO measurement_y2006m03 VALUES (NEW.*); + ... + ELSIF ( logdate >= DATE '2008-01-01' AND logdate < DATE '2008-02-01' ) THEN + INSERT INTO measurement_y2008m01 VALUES (NEW.*); + ELSE + RAISE EXCEPTION 'Date out of range. Fix the measurement_insert_trigger() function!'; + END IF; + RETURN NULL; +END; +$$ +LANGUAGE plpgsql; - Note that the WHERE clause in each rule - exactly matches the CHECK - constraint for its partition. + The trigger definition is the same as before. + Note that each IF test must exactly match the + CHECK constraint for its partition. + + + + While this function is more complex than the single-month case, + it doesn't need to be updated as often, since branches can be + added in advance of being needed. @@ -2571,24 +2587,6 @@ DO INSTEAD script that generates the required DDL automatically. - - Partitioning can also be arranged using a UNION ALL - view: - - -CREATE VIEW measurement AS - SELECT * FROM measurement_y2004m02 -UNION ALL SELECT * FROM measurement_y2004m03 -... -UNION ALL SELECT * FROM measurement_y2005m11 -UNION ALL SELECT * FROM measurement_y2005m12 -UNION ALL SELECT * FROM measurement_y2006m01; - - - However, the need to - recreate the view adds an extra step to adding and dropping - individual partitions of the data set. - @@ -2609,7 +2607,7 @@ UNION ALL SELECT * FROM measurement_y2006m01; The simplest option for removing old data is simply to drop the partition that is no longer necessary: -DROP TABLE measurement_y2003m02; +DROP TABLE measurement_y2006m02; This can very quickly delete millions of records because it doesn't have to individually delete every record. @@ -2620,7 +2618,7 @@ DROP TABLE measurement_y2003m02; the partitioned table but retain access to it as a table in its own right: -ALTER TABLE measurement_y2003m02 NO INHERIT measurement; +ALTER TABLE measurement_y2006m02 NO INHERIT measurement; This allows further operations to be performed on the data before it is dropped. For example, this is often a useful time to back up @@ -2636,8 +2634,8 @@ ALTER TABLE measurement_y2003m02 NO INHERIT measurement; were created above: -CREATE TABLE measurement_y2006m02 ( - CHECK ( logdate >= DATE '2006-02-01' AND logdate < DATE '2006-03-01' ) +CREATE TABLE measurement_y2008m02 ( + CHECK ( logdate >= DATE '2008-02-01' AND logdate < DATE '2008-03-01' ) ) INHERITS (measurement); @@ -2647,13 +2645,13 @@ CREATE TABLE measurement_y2006m02 ( transformed prior to it appearing in the partitioned table: -CREATE TABLE measurement_y2006m02 +CREATE TABLE measurement_y2008m02 (LIKE measurement INCLUDING DEFAULTS INCLUDING CONSTRAINTS); -ALTER TABLE measurement_y2006m02 ADD CONSTRAINT y2006m02 - CHECK ( logdate >= DATE '2006-02-01' AND logdate < DATE '2006-03-01' ); -\copy measurement_y2006m02 from 'measurement_y2006m02' +ALTER TABLE measurement_y2008m02 ADD CONSTRAINT y2008m02 + CHECK ( logdate >= DATE '2008-02-01' AND logdate < DATE '2008-03-01' ); +\copy measurement_y2008m02 from 'measurement_y2008m02' -- possibly some other data preparation work -ALTER TABLE measurement_y2006m02 INHERIT measurement; +ALTER TABLE measurement_y2008m02 INHERIT measurement; @@ -2672,7 +2670,7 @@ ALTER TABLE measurement_y2006m02 INHERIT measurement; SET constraint_exclusion = on; -SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; +SELECT count(*) FROM measurement WHERE logdate >= DATE '2008-01-01'; Without constraint exclusion, the above query would scan each of @@ -2691,23 +2689,23 @@ SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; SET constraint_exclusion = off; -EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; +EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2008-01-01'; QUERY PLAN ----------------------------------------------------------------------------------------------- Aggregate (cost=158.66..158.68 rows=1 width=0) -> Append (cost=0.00..151.88 rows=2715 width=0) -> Seq Scan on measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) - -> Seq Scan on measurement_y2004m02 measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) - -> Seq Scan on measurement_y2004m03 measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) + Filter: (logdate >= '2008-01-01'::date) + -> Seq Scan on measurement_y2006m02 measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2008-01-01'::date) + -> Seq Scan on measurement_y2006m03 measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2008-01-01'::date) ... - -> Seq Scan on measurement_y2005m12 measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) - -> Seq Scan on measurement_y2006m01 measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) + -> Seq Scan on measurement_y2007m12 measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2008-01-01'::date) + -> Seq Scan on measurement_y2008m01 measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2008-01-01'::date) Some or all of the partitions might use index scans instead of @@ -2718,15 +2716,15 @@ EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; SET constraint_exclusion = on; -EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; +EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2008-01-01'; QUERY PLAN ----------------------------------------------------------------------------------------------- Aggregate (cost=63.47..63.48 rows=1 width=0) -> Append (cost=0.00..60.75 rows=1086 width=0) -> Seq Scan on measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) - -> Seq Scan on measurement_y2006m01 measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) + Filter: (logdate >= '2008-01-01'::date) + -> Seq Scan on measurement_y2008m01 measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2008-01-01'::date) @@ -2739,6 +2737,69 @@ EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; a large part of the partition or just a small part. An index will be helpful in the latter case but not the former. + + + + + Alternative Partitioning Methods + + + A different approach to redirecting inserts into the appropriate + partition table is to set up rules, instead of a trigger, on the + master table. For example: + + +CREATE RULE measurement_insert_y2006m02 AS +ON INSERT TO measurement WHERE + ( logdate >= DATE '2006-02-01' AND logdate < DATE '2006-03-01' ) +DO INSTEAD + INSERT INTO measurement_y2006m02 VALUES (NEW.*); +... +CREATE RULE measurement_insert_y2008m01 AS +ON INSERT TO measurement WHERE + ( logdate >= DATE '2008-01-01' AND logdate < DATE '2008-02-01' ) +DO INSTEAD + INSERT INTO measurement_y2008m01 VALUES (NEW.*); + + + A rule has significantly more overhead than a trigger, but the overhead + is paid once per query rather than once per row, so this method might be + advantageous for bulk-insert situations. In most cases, however, the + trigger method will offer better performance. + + + + Be aware that COPY ignores rules. If you want to + use COPY to insert data, you'll need to copy into the correct + partition table rather than into the master. COPY does fire + triggers, so you can use it normally if you use the trigger approach. + + + + Another disadvantage of the rule approach is that there is no simple + way to force an error if the set of rules doesn't cover the insertion + date; the data will silently go into the master table instead. + + + + Partitioning can also be arranged using a UNION ALL + view, instead of table inheritance. For example, + + +CREATE VIEW measurement AS + SELECT * FROM measurement_y2006m02 +UNION ALL SELECT * FROM measurement_y2006m03 +... +UNION ALL SELECT * FROM measurement_y2007m11 +UNION ALL SELECT * FROM measurement_y2007m12 +UNION ALL SELECT * FROM measurement_y2008m01; + + + However, the need to recreate the view adds an extra step to adding and + dropping individual partitions of the data set. In practice this + method has little to recommend it compared to using inheritance. + + @@ -2749,24 +2810,38 @@ EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; - There is currently no way to verify that all of the + There is no automatic way to verify that all of the CHECK constraints are mutually - exclusive. Care is required by the database designer. + exclusive. It is safer to create code that generates + partitions and creates and/or modifies associated objects than + to write each by hand. - There is currently no simple way to specify that rows must not be - inserted into the master table. A CHECK (false) - constraint on the master table would be inherited by all child - tables, so that cannot be used for this purpose. One possibility is - to set up an ON INSERT trigger on the master table that - always raises an error. (Alternatively, such a trigger could be - used to redirect the data into the proper child table, instead of - using a set of rules as suggested above.) + The schemes shown here assume that the partition key column(s) + of a row never change, or at least do not change enough to require + it to move to another partition. An UPDATE that attempts + to do that will fail because of the CHECK constraints. + If you need to handle such cases, you can put suitable update triggers + on the partition tables, but it makes management of the structure + much more complicated. + + + + If you are using manual VACUUM or + ANALYZE commands, don't forget that + you need to run them on each partition individually. A command like + +ANALYZE measurement; + + will only process the master table. + + + @@ -2801,18 +2876,9 @@ EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; All constraints on all partitions of the master table are examined during constraint exclusion, so large numbers of partitions are likely - to increase query planning time considerably. - - - - - - Don't forget that you still need to run ANALYZE - on each partition individually. A command like: - -ANALYZE measurement; - - will only process the master table. + to increase query planning time considerably. Partitioning using + these techniques will work well with up to perhaps a hundred partitions; + don't try to use many thousands of partitions.