Add bytea equivalents of ltrim() and rtrim().

We had bytea btrim() already, but for some reason not the other two.

Joel Jacobson

Discussion: https://postgr.es/m/d10cd5cd-a901-42f1-b832-763ac6f7ff3a@www.fastmail.com
This commit is contained in:
Tom Lane 2021-01-18 15:11:32 -05:00
parent a3ed4d1efe
commit a6cf3df4eb
9 changed files with 203 additions and 57 deletions

View File

@ -3948,15 +3948,16 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
<indexterm>
<primary>trim</primary>
</indexterm>
<function>trim</function> ( <optional> <literal>BOTH</literal> </optional>
<function>trim</function> ( <optional> <literal>LEADING</literal> | <literal>TRAILING</literal> | <literal>BOTH</literal> </optional>
<parameter>bytesremoved</parameter> <type>bytea</type> <literal>FROM</literal>
<parameter>bytes</parameter> <type>bytea</type> )
<returnvalue>bytea</returnvalue>
</para>
<para>
Removes the longest string containing only bytes appearing in
<parameter>bytesremoved</parameter> from the start
and end of <parameter>bytes</parameter>.
<parameter>bytesremoved</parameter> from the start,
end, or both ends (<literal>BOTH</literal> is the default)
of <parameter>bytes</parameter>.
</para>
<para>
<literal>trim('\x9012'::bytea from '\x1234567890'::bytea)</literal>
@ -3966,7 +3967,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
<row>
<entry role="func_table_entry"><para role="func_signature">
<function>trim</function> ( <optional> <literal>BOTH</literal> </optional> <optional> <literal>FROM</literal> </optional>
<function>trim</function> ( <optional> <literal>LEADING</literal> | <literal>TRAILING</literal> | <literal>BOTH</literal> </optional> <optional> <literal>FROM</literal> </optional>
<parameter>bytes</parameter> <type>bytea</type>,
<parameter>bytesremoved</parameter> <type>bytea</type> )
<returnvalue>bytea</returnvalue>
@ -4109,6 +4110,26 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
<primary>ltrim</primary>
</indexterm>
<function>ltrim</function> ( <parameter>bytes</parameter> <type>bytea</type>,
<parameter>bytesremoved</parameter> <type>bytea</type> )
<returnvalue>bytea</returnvalue>
</para>
<para>
Removes the longest string containing only bytes appearing in
<parameter>bytesremoved</parameter> from the start of
<parameter>bytes</parameter>.
</para>
<para>
<literal>ltrim('\x1234567890'::bytea, '\x9012'::bytea)</literal>
<returnvalue>\x34567890</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
@ -4127,6 +4148,26 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
<primary>rtrim</primary>
</indexterm>
<function>rtrim</function> ( <parameter>bytes</parameter> <type>bytea</type>,
<parameter>bytesremoved</parameter> <type>bytea</type> )
<returnvalue>bytea</returnvalue>
</para>
<para>
Removes the longest string containing only bytes appearing in
<parameter>bytesremoved</parameter> from the end of
<parameter>bytes</parameter>.
</para>
<para>
<literal>rtrim('\x1234567890'::bytea, '\x9012'::bytea)</literal>
<returnvalue>\x12345678</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>

View File

@ -24,6 +24,8 @@
static text *dotrim(const char *string, int stringlen,
const char *set, int setlen,
bool doltrim, bool dortrim);
static bytea *dobyteatrim(bytea *string, bytea *set,
bool doltrim, bool dortrim);
/********************************************************************
@ -521,6 +523,76 @@ dotrim(const char *string, int stringlen,
return cstring_to_text_with_len(string, stringlen);
}
/*
* Common implementation for bytea versions of btrim, ltrim, rtrim
*/
bytea *
dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
{
bytea *ret;
char *ptr,
*end,
*ptr2,
*ptr2start,
*end2;
int m,
stringlen,
setlen;
stringlen = VARSIZE_ANY_EXHDR(string);
setlen = VARSIZE_ANY_EXHDR(set);
if (stringlen <= 0 || setlen <= 0)
return string;
m = stringlen;
ptr = VARDATA_ANY(string);
end = ptr + stringlen - 1;
ptr2start = VARDATA_ANY(set);
end2 = ptr2start + setlen - 1;
if (doltrim)
{
while (m > 0)
{
ptr2 = ptr2start;
while (ptr2 <= end2)
{
if (*ptr == *ptr2)
break;
++ptr2;
}
if (ptr2 > end2)
break;
ptr++;
m--;
}
}
if (dortrim)
{
while (m > 0)
{
ptr2 = ptr2start;
while (ptr2 <= end2)
{
if (*end == *ptr2)
break;
++ptr2;
}
if (ptr2 > end2)
break;
end--;
m--;
}
}
ret = (bytea *) palloc(VARHDRSZ + m);
SET_VARSIZE(ret, VARHDRSZ + m);
memcpy(VARDATA(ret), ptr, m);
return ret;
}
/********************************************************************
*
* byteatrim
@ -543,60 +615,62 @@ byteatrim(PG_FUNCTION_ARGS)
bytea *string = PG_GETARG_BYTEA_PP(0);
bytea *set = PG_GETARG_BYTEA_PP(1);
bytea *ret;
char *ptr,
*end,
*ptr2,
*ptr2start,
*end2;
int m,
stringlen,
setlen;
stringlen = VARSIZE_ANY_EXHDR(string);
setlen = VARSIZE_ANY_EXHDR(set);
ret = dobyteatrim(string, set, true, true);
if (stringlen <= 0 || setlen <= 0)
PG_RETURN_BYTEA_P(string);
PG_RETURN_BYTEA_P(ret);
}
m = stringlen;
ptr = VARDATA_ANY(string);
end = ptr + stringlen - 1;
ptr2start = VARDATA_ANY(set);
end2 = ptr2start + setlen - 1;
/********************************************************************
*
* bytealtrim
*
* Syntax:
*
* bytea bytealtrim(bytea string, bytea set)
*
* Purpose:
*
* Returns string with initial characters removed up to the first
* character not in set.
*
********************************************************************/
while (m > 0)
{
ptr2 = ptr2start;
while (ptr2 <= end2)
{
if (*ptr == *ptr2)
break;
++ptr2;
}
if (ptr2 > end2)
break;
ptr++;
m--;
}
Datum
bytealtrim(PG_FUNCTION_ARGS)
{
bytea *string = PG_GETARG_BYTEA_PP(0);
bytea *set = PG_GETARG_BYTEA_PP(1);
bytea *ret;
while (m > 0)
{
ptr2 = ptr2start;
while (ptr2 <= end2)
{
if (*end == *ptr2)
break;
++ptr2;
}
if (ptr2 > end2)
break;
end--;
m--;
}
ret = dobyteatrim(string, set, true, false);
ret = (bytea *) palloc(VARHDRSZ + m);
SET_VARSIZE(ret, VARHDRSZ + m);
memcpy(VARDATA(ret), ptr, m);
PG_RETURN_BYTEA_P(ret);
}
/********************************************************************
*
* byteartrim
*
* Syntax:
*
* bytea byteartrim(bytea string, bytea set)
*
* Purpose:
*
* Returns string with final characters removed after the last
* character not in set.
*
********************************************************************/
Datum
byteartrim(PG_FUNCTION_ARGS)
{
bytea *string = PG_GETARG_BYTEA_PP(0);
bytea *set = PG_GETARG_BYTEA_PP(1);
bytea *ret;
ret = dobyteatrim(string, set, false, true);
PG_RETURN_BYTEA_P(ret);
}

View File

@ -9680,6 +9680,7 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context)
appendStringInfoChar(buf, ')');
return true;
case F_LTRIM_BYTEA_BYTEA:
case F_LTRIM_TEXT:
case F_LTRIM_TEXT_TEXT:
/* TRIM() */
@ -9694,6 +9695,7 @@ get_func_sql_syntax(FuncExpr *expr, deparse_context *context)
appendStringInfoChar(buf, ')');
return true;
case F_RTRIM_BYTEA_BYTEA:
case F_RTRIM_TEXT:
case F_RTRIM_TEXT_TEXT:
/* TRIM() */

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202101171
#define CATALOG_VERSION_NO 202101181
#endif

View File

@ -5779,9 +5779,15 @@
{ oid => '2014', descr => 'position of substring',
proname => 'position', prorettype => 'int4', proargtypes => 'bytea bytea',
prosrc => 'byteapos' },
{ oid => '2015', descr => 'trim both ends of string',
{ oid => '2015', descr => 'trim selected bytes from both ends of string',
proname => 'btrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
prosrc => 'byteatrim' },
{ oid => '9612', descr => 'trim selected bytes from left end of string',
proname => 'ltrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
prosrc => 'bytealtrim' },
{ oid => '9613', descr => 'trim selected bytes from right end of string',
proname => 'rtrim', prorettype => 'bytea', proargtypes => 'bytea bytea',
prosrc => 'byteartrim' },
{ oid => '2019', descr => 'convert timestamp with time zone to time',
proname => 'time', provolatile => 's', prorettype => 'time',

View File

@ -1735,7 +1735,10 @@ select
substring('foo' from 'oo') as ssf, -- historically-permitted abuse
trim(' ' from ' foo ') as bt,
trim(leading ' ' from ' foo ') as lt,
trim(trailing ' foo ') as rt;
trim(trailing ' foo ') as rt,
trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea) as btb,
trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea) as ltb,
trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea) as rtb;
select pg_get_viewdef('tt201v', true);
pg_get_viewdef
-----------------------------------------------------------------------------------------------
@ -1753,7 +1756,10 @@ select pg_get_viewdef('tt201v', true);
"substring"('foo'::text, 'oo'::text) AS ssf, +
TRIM(BOTH ' '::text FROM ' foo '::text) AS bt, +
TRIM(LEADING ' '::text FROM ' foo '::text) AS lt, +
TRIM(TRAILING FROM ' foo '::text) AS rt;
TRIM(TRAILING FROM ' foo '::text) AS rt, +
TRIM(BOTH '\x00'::bytea FROM '\x00546f6d00'::bytea) AS btb, +
TRIM(LEADING '\x00'::bytea FROM '\x00546f6d00'::bytea) AS ltb, +
TRIM(TRAILING '\x00'::bytea FROM '\x00546f6d00'::bytea) AS rtb;
(1 row)
-- corner cases with empty join conditions

View File

@ -2131,6 +2131,18 @@ SELECT trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea);
Tom
(1 row)
SELECT trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea);
ltrim
---------
Tom\000
(1 row)
SELECT trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea);
rtrim
---------
\000Tom
(1 row)
SELECT btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea);
btrim
-------

View File

@ -605,7 +605,10 @@ select
substring('foo' from 'oo') as ssf, -- historically-permitted abuse
trim(' ' from ' foo ') as bt,
trim(leading ' ' from ' foo ') as lt,
trim(trailing ' foo ') as rt;
trim(trailing ' foo ') as rt,
trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea) as btb,
trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea) as ltb,
trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea) as rtb;
select pg_get_viewdef('tt201v', true);
-- corner cases with empty join conditions

View File

@ -722,6 +722,8 @@ SELECT SUBSTRING('string'::bytea FROM -10 FOR 2147483646) AS "string";
SELECT SUBSTRING('string'::bytea FROM -10 FOR -2147483646) AS "error";
SELECT trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea);
SELECT trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea);
SELECT trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea);
SELECT btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea);
SELECT btrim(''::bytea, E'\\000'::bytea);
SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea);