From 2f0c19ce4eaf902f9ae5eaaa2e9a8544decf41e5 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 21 Sep 2018 19:55:07 -0400 Subject: [PATCH] docs: remove use of escape strings and use bytea hex output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit standard_conforming_strings defaulted to 'on' in PG 9.1. bytea_output defaulted to 'hex' in PG 9.0. Reported-by: André Hänsel Discussion: https://postgr.es/m/12e601d447ac$345994a0$9d0cbde0$@webkr.de Backpatch-through: 9.3 --- doc/src/sgml/array.sgml | 27 ++------------- doc/src/sgml/datatype.sgml | 71 ++++++++++++++++++++++---------------- doc/src/sgml/func.sgml | 48 +++++++++++++------------- doc/src/sgml/lobj.sgml | 4 +-- doc/src/sgml/rowtypes.sgml | 2 +- 5 files changed, 72 insertions(+), 80 deletions(-) diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml index 58878451f0..7fd35d298a 100644 --- a/doc/src/sgml/array.sgml +++ b/doc/src/sgml/array.sgml @@ -767,9 +767,9 @@ SELECT f1[1][-2][3] AS e1, f1[1][-1][5] AS e2 For example, elements containing curly braces, commas (or the data type's delimiter character), double quotes, backslashes, or leading or trailing whitespace must be double-quoted. Empty strings and strings matching the - word NULL must be quoted, too. To put a double quote or - backslash in a quoted array element value, use escape string syntax - and precede it with a backslash. Alternatively, you can avoid quotes and use + word NULL must be quoted, too. To put a double + quote or backslash in a quoted array element value, precede it + with a backslash. Alternatively, you can avoid quotes and use backslash-escaping to protect all data characters that would otherwise be taken as array syntax. @@ -782,27 +782,6 @@ SELECT f1[1][-2][3] AS e1, f1[1][-1][5] AS e2 non-whitespace characters of an element, is not ignored. - - - Remember that what you write in an SQL command will first be interpreted - as a string literal, and then as an array. This doubles the number of - backslashes you need. For example, to insert a text array - value containing a backslash and a double quote, you'd need to write: - -INSERT ... VALUES (E'{"\\\\","\\""}'); - - The escape string processor removes one level of backslashes, so that - what arrives at the array-value parser looks like {"\\","\""}. - In turn, the strings fed to the text data type's input routine - become \ and " respectively. (If we were working - with a data type whose input routine also treated backslashes specially, - bytea for example, we might need as many as eight backslashes - in the command to get one backslash into the stored array element.) - Dollar quoting (see ) can be - used to avoid the need to double backslashes. - - - The ARRAY constructor syntax (see diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 5f45db2488..a7eafc471f 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1282,7 +1282,7 @@ SELECT b, char_length(b) FROM test2; strings are distinguished from character strings in two ways. First, binary strings specifically allow storing octets of value zero and other non-printable - octets (usually, octets outside the range 32 to 126). + octets (usually, octets outside the decimal range 32 to 126). Character strings disallow zero octets, and also disallow any other octet values and sequences of octet values that are invalid according to the database's selected character set encoding. @@ -1294,9 +1294,10 @@ SELECT b, char_length(b) FROM test2; - The bytea type supports two external formats for - input and output: PostgreSQL's historical - escape format, and hex format. Both + The bytea type supports two + formats for input and output: hex format + and PostgreSQL's historical + escape format. Both of these are always accepted on input. The output format depends on the configuration parameter ; the default is hex. (Note that the hex format was introduced in @@ -1334,7 +1335,7 @@ SELECT b, char_length(b) FROM test2; Example: -SELECT E'\\xDEADBEEF'; +SELECT '\xDEADBEEF'; @@ -1354,7 +1355,7 @@ SELECT E'\\xDEADBEEF'; convenient. But in practice it is usually confusing because it fuzzes up the distinction between binary strings and character strings, and also the particular escape mechanism that was chosen is - somewhat unwieldy. So this format should probably be avoided + somewhat unwieldy. Therefore, this format should probably be avoided for most new applications. @@ -1367,7 +1368,7 @@ SELECT E'\\xDEADBEEF'; octal value and precede it by a backslash (or two backslashes, if writing the value as a literal using escape string syntax). - Backslash itself (octet value 92) can alternatively be represented by + Backslash itself (octet decimal value 92) can alternatively be represented by double backslashes. shows the characters that must be escaped, and gives the alternative @@ -1391,33 +1392,33 @@ SELECT E'\\xDEADBEEF'; 0 zero octet - E'\\000' - SELECT E'\\000'::bytea; - \000 + '\000' + SELECT '\000'::bytea; + \x00 39 single quote - '''' or E'\\047' - SELECT E'\''::bytea; - ' + '''' or '\047' + SELECT ''''::bytea; + \x27 92 backslash - E'\\\\' or E'\\134' - SELECT E'\\\\'::bytea; - \\ + '\' or '\\134' + SELECT '\\'::bytea; + \x5c 0 to 31 and 127 to 255 non-printable octets - E'\\xxx' (octal value) - SELECT E'\\001'::bytea; - \001 + '\xxx' (octal value) + SELECT '\001'::bytea; + \x01 @@ -1445,7 +1446,7 @@ SELECT E'\\xDEADBEEF'; of escaping.) The remaining backslash is then recognized by the bytea input function as starting either a three digit octal value or escaping another backslash. For example, - a string literal passed to the server as E'\\001' + a string literal passed to the server as '\001' becomes \001 after passing through the escape string parser. The \001 is then sent to the bytea input function, where it is converted @@ -1456,12 +1457,24 @@ SELECT E'\\xDEADBEEF'; - Bytea octets are sometimes escaped when output. In general, each - non-printable octet is converted into - its equivalent three-digit octal value and preceded by one backslash. - Most printable octets are represented by their standard - representation in the client character set. The octet with decimal - value 92 (backslash) is doubled in the output. + Bytea octets are output in hex + format by default. If you change + to escape, + non-printable octet are converted to + equivalent three-digit octal value and preceded by one backslash. + Most printable octets are output by their standard + representation in the client character set, e.g.: + + +SET bytea_output = 'escape'; + +SELECT 'abc \153\154\155 \052\251\124'::bytea; + bytea +---------------- + abc klm *\251T + + + The octet with decimal value 92 (backslash) is doubled in the output. Details are in . @@ -1484,7 +1497,7 @@ SELECT E'\\xDEADBEEF'; 92 backslash \\ - SELECT E'\\134'::bytea; + SELECT '\134'::bytea; \\ @@ -1492,7 +1505,7 @@ SELECT E'\\xDEADBEEF'; 0 to 31 and 127 to 255 non-printable octets \xxx (octal value) - SELECT E'\\001'::bytea; + SELECT '\001'::bytea; \001 @@ -1500,7 +1513,7 @@ SELECT E'\\xDEADBEEF'; 32 to 126 printable octets client character set representation - SELECT E'\\176'::bytea; + SELECT '\176'::bytea; ~ diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 4f4083711b..92eb084d3c 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1776,7 +1776,7 @@ octal sequences (\nnn) and doubles backslashes. - encode(E'123\\000\\001', 'base64') + encode('123\000\001', 'base64') MTIzAAE= @@ -2083,7 +2083,7 @@ the delimiter. See for more information. - regexp_split_to_array('hello world', E'\\s+') + regexp_split_to_array('hello world', '\s+') {hello,world} @@ -2100,7 +2100,7 @@ the delimiter. See for more information. - regexp_split_to_table('hello world', E'\\s+') + regexp_split_to_table('hello world', '\s+') helloworld (2 rows) @@ -3269,8 +3269,8 @@ SELECT format('Testing %s, %s, %s, %%', 'one', 'two', 'three'); SELECT format('INSERT INTO %I VALUES(%L)', 'Foo bar', E'O\'Reilly'); Result: INSERT INTO "Foo bar" VALUES('O''Reilly') -SELECT format('INSERT INTO %I VALUES(%L)', 'locations', E'C:\\Program Files'); -Result: INSERT INTO locations VALUES(E'C:\\Program Files') +SELECT format('INSERT INTO %I VALUES(%L)', 'locations', 'C:\Program Files'); +Result: INSERT INTO locations VALUES('C:\Program Files') @@ -3397,7 +3397,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); concatenation - E'\\\\Post'::bytea || E'\\047gres\\000'::bytea + '\\Post'::bytea || '\047gres\000'::bytea \\Post'gres\000 @@ -3410,7 +3410,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); int Number of bytes in binary string - octet_length(E'jo\\000se'::bytea) + octet_length('jo\000se'::bytea) 5 @@ -3425,7 +3425,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); Replace substring - overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 2 for 3) + overlay('Th\000omas'::bytea placing '\002\003'::bytea from 2 for 3) T\\002\\003mas @@ -3438,7 +3438,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); int Location of specified substring - position(E'\\000om'::bytea in E'Th\\000omas'::bytea) + position('\000om'::bytea in 'Th\000omas'::bytea) 3 @@ -3453,7 +3453,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); Extract substring - substring(E'Th\\000omas'::bytea from 2 for 3) + substring('Th\000omas'::bytea from 2 for 3) h\000o @@ -3472,7 +3472,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); bytes from the start and end of string - trim(E'\\000\\001'::bytea from E'\\000Tom\\001'::bytea) + trim('\000\001'::bytea from '\000Tom\001'::bytea) Tom @@ -3515,7 +3515,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); bytes from the start and end of string - btrim(E'\\000trim\\001'::bytea, E'\\000\\001'::bytea) + btrim('\000trim\001'::bytea, '\000\001'::bytea) trim @@ -3532,7 +3532,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); Decode binary data from textual representation in string. Options for format are same as in encode. - decode(E'123\\000456', 'escape') + decode('123\000456', 'escape') 123\000456 @@ -3552,7 +3552,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); octal sequences (\nnn) and doubles backslashes. - encode(E'123\\000456'::bytea, 'escape') + encode('123\000456'::bytea, 'escape') 123\000456 @@ -3567,7 +3567,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); Extract bit from string - get_bit(E'Th\\000omas'::bytea, 45) + get_bit('Th\000omas'::bytea, 45) 1 @@ -3582,7 +3582,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); Extract byte from string - get_byte(E'Th\\000omas'::bytea, 4) + get_byte('Th\000omas'::bytea, 4) 109 @@ -3606,7 +3606,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); binary strings, length - length(E'jo\\000se'::bytea) + length('jo\000se'::bytea) 5 @@ -3622,7 +3622,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); Calculates the MD5 hash of string, returning the result in hexadecimal - md5(E'Th\\000omas'::bytea) + md5('Th\000omas'::bytea) 8ab2d3c9689aaf18 b4958c334c82d8b1 @@ -3638,7 +3638,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); Set bit in string - set_bit(E'Th\\000omas'::bytea, 45, 0) + set_bit('Th\000omas'::bytea, 45, 0) Th\000omAs @@ -3654,7 +3654,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); Set byte in string - set_byte(E'Th\\000omas'::bytea, 4, 64) + set_byte('Th\000omas'::bytea, 4, 64) Th\000o@as @@ -4267,7 +4267,7 @@ regexp_replace('foobarbaz', 'b..', 'X') fooXbaz regexp_replace('foobarbaz', 'b..', 'X', 'g') fooXX -regexp_replace('foobarbaz', 'b(..)', E'X\\1Y', 'g') +regexp_replace('foobarbaz', 'b(..)', 'X\1Y', 'g') fooXarYXazY @@ -4361,7 +4361,7 @@ SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab; Some examples: -SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', E'\\s+') AS foo; +SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', '\s+') AS foo; foo ------- the @@ -4375,13 +4375,13 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy d dog (9 rows) -SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', E'\\s+'); +SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', '\s+'); regexp_split_to_array ----------------------------------------------- {the,quick,brown,fox,jumps,over,the,lazy,dog} (1 row) -SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; +SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo; foo ----- t diff --git a/doc/src/sgml/lobj.sgml b/doc/src/sgml/lobj.sgml index 152eb6d44a..10bd17bf1b 100644 --- a/doc/src/sgml/lobj.sgml +++ b/doc/src/sgml/lobj.sgml @@ -556,7 +556,7 @@ int lo_unlink(PGconn *conn, Oid lobjId); Create a large object and store data there, returning its OID. Pass 0 to have the system choose an OID. - lo_from_bytea(0, E'\\xffffff00') + lo_from_bytea(0, '\xffffff00') 24528 @@ -571,7 +571,7 @@ int lo_unlink(PGconn *conn, Oid lobjId); Write data at the given offset. - lo_put(24528, 1, E'\\xaa') + lo_put(24528, 1, '\xaa') diff --git a/doc/src/sgml/rowtypes.sgml b/doc/src/sgml/rowtypes.sgml index 9d6768e006..f96df693bf 100644 --- a/doc/src/sgml/rowtypes.sgml +++ b/doc/src/sgml/rowtypes.sgml @@ -501,7 +501,7 @@ SELECT c.somefunc FROM inventory_item c; containing a double quote and a backslash in a composite value, you'd need to write: -INSERT ... VALUES (E'("\\"\\\\")'); +INSERT ... VALUES ('("\"\\")'); The string-literal processor removes one level of backslashes, so that what arrives at the composite-value parser looks like