From 2daeba6a4e49f2ac4cc202fc9352c2a59e928a6c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 7 Apr 2024 16:26:47 -0400 Subject: [PATCH] Doc: show how to get the equivalent of LIMIT for UPDATE/DELETE. Add examples showing use of a CTE and a self-join to perform partial UPDATEs and DELETEs. Corey Huinker, reviewed by Laurenz Albe Discussion: https://postgr.es/m/CADkLM=caNEQsUwPWnfi2jR4ix99E0EJM_3jtcE-YjnEQC7Rssw@mail.gmail.com --- doc/src/sgml/ref/delete.sgml | 24 ++++++++++++++++++++-- doc/src/sgml/ref/update.sgml | 40 +++++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/ref/delete.sgml b/doc/src/sgml/ref/delete.sgml index 1b81b4e7d7..0b6fa00512 100644 --- a/doc/src/sgml/ref/delete.sgml +++ b/doc/src/sgml/ref/delete.sgml @@ -260,12 +260,32 @@ DELETE FROM tasks WHERE status = 'DONE' RETURNING *; - + Delete the row of tasks on which the cursor c_tasks is currently positioned: DELETE FROM tasks WHERE CURRENT OF c_tasks; - + + + + + While there is no LIMIT clause + for DELETE, it is possible to get a similar effect + using the same method described in the + documentation of UPDATE: + +WITH delete_batch AS ( + SELECT l.ctid FROM user_logs AS l + WHERE l.status = 'archived' + ORDER BY l.creation_date + FOR UPDATE + LIMIT 10000 +) +DELETE FROM user_logs AS dl + USING delete_batch AS del + WHERE dl.ctid = del.ctid; + + diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml index 2ab24b0523..babb34fa51 100644 --- a/doc/src/sgml/ref/update.sgml +++ b/doc/src/sgml/ref/update.sgml @@ -441,7 +441,45 @@ COMMIT; c_films is currently positioned: UPDATE films SET kind = 'Dramatic' WHERE CURRENT OF c_films; - + + + + + Updates affecting many rows can have negative effects on system + performance, such as table bloat, increased replica lag, and increased + lock contention. In such situations it can make sense to perform the + operation in smaller batches, possibly with a VACUUM + operation on the table between batches. While there is + no LIMIT clause for UPDATE, it is + possible to get a similar effect through the use of + a Common Table Expression and a + self-join. With the standard PostgreSQL + table access method, a self-join on the system + column ctid is very + efficient: + +WITH exceeded_max_retries AS ( + SELECT w.ctid FROM work_item AS w + WHERE w.status = 'active' AND w.num_retries > 10 + ORDER BY w.retry_timestamp + FOR UPDATE + LIMIT 5000 +) +UPDATE work_item SET status = 'failed' + FROM exceeded_max_retries AS emr + WHERE work_item.ctid = emr.ctid; + + This command will need to be repeated until no rows remain to be updated. + Use of an ORDER BY clause allows the command to + prioritize which rows will be updated; it can also prevent deadlock + with other update operations if they use the same ordering. + If lock contention is a concern, then SKIP LOCKED + can be added to the CTE to prevent multiple commands + from updating the same row. However, then a + final UPDATE without SKIP LOCKED + or LIMIT will be needed to ensure that no matching + rows were overlooked. +