Support negative indexes in split_part().
This provides a handy way to get, say, the last field of the string. Use of a negative index in this way has precedent in the nearby left() and right() functions. The implementation scans the string twice when N < -1, but it seems likely that N = -1 will be the huge majority of actual use cases, so I'm not really excited about adding complexity to avoid that. Nikhil Benesch, reviewed by Jacob Champion; cosmetic tweakage by me Discussion: https://postgr.es/m/cbb7f861-6162-3a51-9823-97bc3aa0b638@gmail.com
This commit is contained in:
parent
3bf44303b9
commit
ec0294fb2c
|
@ -3356,11 +3356,17 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
|
||||||
<para>
|
<para>
|
||||||
Splits <parameter>string</parameter> at occurrences
|
Splits <parameter>string</parameter> at occurrences
|
||||||
of <parameter>delimiter</parameter> and returns
|
of <parameter>delimiter</parameter> and returns
|
||||||
the <parameter>n</parameter>'th field (counting from one).
|
the <parameter>n</parameter>'th field (counting from one),
|
||||||
|
or when <parameter>n</parameter> is negative, returns
|
||||||
|
the |<parameter>n</parameter>|'th-from-last field.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
<literal>split_part('abc~@~def~@~ghi', '~@~', 2)</literal>
|
<literal>split_part('abc~@~def~@~ghi', '~@~', 2)</literal>
|
||||||
<returnvalue>def</returnvalue>
|
<returnvalue>def</returnvalue>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
<literal>split_part('abc,def,ghi,jkl', ',', -2)</literal>
|
||||||
|
<returnvalue>ghi</returnvalue>
|
||||||
</para></entry>
|
</para></entry>
|
||||||
</row>
|
</row>
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,7 @@ typedef struct varlena VarString;
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
bool is_multibyte; /* T if multibyte encoding */
|
bool is_multibyte; /* T if multibyte encoding */
|
||||||
bool is_multibyte_char_in_char;
|
bool is_multibyte_char_in_char; /* need to check char boundaries? */
|
||||||
|
|
||||||
char *str1; /* haystack string */
|
char *str1; /* haystack string */
|
||||||
char *str2; /* needle string */
|
char *str2; /* needle string */
|
||||||
|
@ -1439,8 +1439,7 @@ text_position_next_internal(char *start_ptr, TextPositionState *state)
|
||||||
/*
|
/*
|
||||||
* Return a pointer to the current match.
|
* Return a pointer to the current match.
|
||||||
*
|
*
|
||||||
* The returned pointer points into correct position in the original
|
* The returned pointer points into the original haystack string.
|
||||||
* the haystack string.
|
|
||||||
*/
|
*/
|
||||||
static char *
|
static char *
|
||||||
text_position_get_match_ptr(TextPositionState *state)
|
text_position_get_match_ptr(TextPositionState *state)
|
||||||
|
@ -1471,12 +1470,27 @@ text_position_get_match_pos(TextPositionState *state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset search state to the initial state installed by text_position_setup.
|
||||||
|
*
|
||||||
|
* The next call to text_position_next will search from the beginning
|
||||||
|
* of the string.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
text_position_reset(TextPositionState *state)
|
||||||
|
{
|
||||||
|
state->last_match = NULL;
|
||||||
|
state->refpoint = state->str1;
|
||||||
|
state->refpos = 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
text_position_cleanup(TextPositionState *state)
|
text_position_cleanup(TextPositionState *state)
|
||||||
{
|
{
|
||||||
/* no cleanup needed */
|
/* no cleanup needed */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
check_collation_set(Oid collid)
|
check_collation_set(Oid collid)
|
||||||
{
|
{
|
||||||
|
@ -4581,9 +4595,8 @@ replace_text_regexp(text *src_text, void *regexp,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* split_part
|
* split_part
|
||||||
* parse input string
|
* parse input string based on provided field separator
|
||||||
* return ord item (1 based)
|
* return N'th item (1 based, negative counts from end)
|
||||||
* based on provided field separator
|
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
split_part(PG_FUNCTION_ARGS)
|
split_part(PG_FUNCTION_ARGS)
|
||||||
|
@ -4600,10 +4613,10 @@ split_part(PG_FUNCTION_ARGS)
|
||||||
bool found;
|
bool found;
|
||||||
|
|
||||||
/* field number is 1 based */
|
/* field number is 1 based */
|
||||||
if (fldnum < 1)
|
if (fldnum == 0)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
errmsg("field position must be greater than zero")));
|
errmsg("field position must not be zero")));
|
||||||
|
|
||||||
inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
|
inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
|
||||||
fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
|
fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
|
||||||
|
@ -4612,32 +4625,72 @@ split_part(PG_FUNCTION_ARGS)
|
||||||
if (inputstring_len < 1)
|
if (inputstring_len < 1)
|
||||||
PG_RETURN_TEXT_P(cstring_to_text(""));
|
PG_RETURN_TEXT_P(cstring_to_text(""));
|
||||||
|
|
||||||
/* empty field separator */
|
/* handle empty field separator */
|
||||||
if (fldsep_len < 1)
|
if (fldsep_len < 1)
|
||||||
{
|
{
|
||||||
/* if first field, return input string, else empty string */
|
/* if first or last field, return input string, else empty string */
|
||||||
if (fldnum == 1)
|
if (fldnum == 1 || fldnum == -1)
|
||||||
PG_RETURN_TEXT_P(inputstring);
|
PG_RETURN_TEXT_P(inputstring);
|
||||||
else
|
else
|
||||||
PG_RETURN_TEXT_P(cstring_to_text(""));
|
PG_RETURN_TEXT_P(cstring_to_text(""));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* find the first field separator */
|
||||||
text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
|
text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
|
||||||
|
|
||||||
/* identify bounds of first field */
|
|
||||||
start_ptr = VARDATA_ANY(inputstring);
|
|
||||||
found = text_position_next(&state);
|
found = text_position_next(&state);
|
||||||
|
|
||||||
/* special case if fldsep not found at all */
|
/* special case if fldsep not found at all */
|
||||||
if (!found)
|
if (!found)
|
||||||
{
|
{
|
||||||
text_position_cleanup(&state);
|
text_position_cleanup(&state);
|
||||||
/* if field 1 requested, return input string, else empty string */
|
/* if first or last field, return input string, else empty string */
|
||||||
if (fldnum == 1)
|
if (fldnum == 1 || fldnum == -1)
|
||||||
PG_RETURN_TEXT_P(inputstring);
|
PG_RETURN_TEXT_P(inputstring);
|
||||||
else
|
else
|
||||||
PG_RETURN_TEXT_P(cstring_to_text(""));
|
PG_RETURN_TEXT_P(cstring_to_text(""));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* take care of a negative field number (i.e. count from the right) by
|
||||||
|
* converting to a positive field number; we need total number of fields
|
||||||
|
*/
|
||||||
|
if (fldnum < 0)
|
||||||
|
{
|
||||||
|
/* we found a fldsep, so there are at least two fields */
|
||||||
|
int numfields = 2;
|
||||||
|
|
||||||
|
while (text_position_next(&state))
|
||||||
|
numfields++;
|
||||||
|
|
||||||
|
/* special case of last field does not require an extra pass */
|
||||||
|
if (fldnum == -1)
|
||||||
|
{
|
||||||
|
start_ptr = text_position_get_match_ptr(&state) + fldsep_len;
|
||||||
|
end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
|
||||||
|
text_position_cleanup(&state);
|
||||||
|
PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr,
|
||||||
|
end_ptr - start_ptr));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* else, convert fldnum to positive notation */
|
||||||
|
fldnum += numfields + 1;
|
||||||
|
|
||||||
|
/* if nonexistent field, return empty string */
|
||||||
|
if (fldnum <= 0)
|
||||||
|
{
|
||||||
|
text_position_cleanup(&state);
|
||||||
|
PG_RETURN_TEXT_P(cstring_to_text(""));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* reset to pointing at first match, but now with positive fldnum */
|
||||||
|
text_position_reset(&state);
|
||||||
|
found = text_position_next(&state);
|
||||||
|
Assert(found);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* identify bounds of first field */
|
||||||
|
start_ptr = VARDATA_ANY(inputstring);
|
||||||
end_ptr = text_position_get_match_ptr(&state);
|
end_ptr = text_position_get_match_ptr(&state);
|
||||||
|
|
||||||
while (found && --fldnum > 0)
|
while (found && --fldnum > 0)
|
||||||
|
|
|
@ -1552,8 +1552,56 @@ SELECT replace('yabadoo', 'bad', '') AS "yaoo";
|
||||||
--
|
--
|
||||||
-- test split_part
|
-- test split_part
|
||||||
--
|
--
|
||||||
|
select split_part('','@',1) AS "empty string";
|
||||||
|
empty string
|
||||||
|
--------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select split_part('','@',-1) AS "empty string";
|
||||||
|
empty string
|
||||||
|
--------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','',1) AS "joeuser@mydatabase";
|
||||||
|
joeuser@mydatabase
|
||||||
|
--------------------
|
||||||
|
joeuser@mydatabase
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','',2) AS "empty string";
|
||||||
|
empty string
|
||||||
|
--------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','',-1) AS "joeuser@mydatabase";
|
||||||
|
joeuser@mydatabase
|
||||||
|
--------------------
|
||||||
|
joeuser@mydatabase
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','',-2) AS "empty string";
|
||||||
|
empty string
|
||||||
|
--------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
select split_part('joeuser@mydatabase','@',0) AS "an error";
|
select split_part('joeuser@mydatabase','@',0) AS "an error";
|
||||||
ERROR: field position must be greater than zero
|
ERROR: field position must not be zero
|
||||||
|
select split_part('joeuser@mydatabase','@@',1) AS "joeuser@mydatabase";
|
||||||
|
joeuser@mydatabase
|
||||||
|
--------------------
|
||||||
|
joeuser@mydatabase
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','@@',2) AS "empty string";
|
||||||
|
empty string
|
||||||
|
--------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
select split_part('joeuser@mydatabase','@',1) AS "joeuser";
|
select split_part('joeuser@mydatabase','@',1) AS "joeuser";
|
||||||
joeuser
|
joeuser
|
||||||
---------
|
---------
|
||||||
|
@ -1578,6 +1626,30 @@ select split_part('@joeuser@mydatabase@','@',2) AS "joeuser";
|
||||||
joeuser
|
joeuser
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','@',-1) AS "mydatabase";
|
||||||
|
mydatabase
|
||||||
|
------------
|
||||||
|
mydatabase
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','@',-2) AS "joeuser";
|
||||||
|
joeuser
|
||||||
|
---------
|
||||||
|
joeuser
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','@',-3) AS "empty string";
|
||||||
|
empty string
|
||||||
|
--------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select split_part('@joeuser@mydatabase@','@',-2) AS "mydatabase";
|
||||||
|
mydatabase
|
||||||
|
------------
|
||||||
|
mydatabase
|
||||||
|
(1 row)
|
||||||
|
|
||||||
--
|
--
|
||||||
-- test to_hex
|
-- test to_hex
|
||||||
--
|
--
|
||||||
|
|
|
@ -533,8 +533,24 @@ SELECT replace('yabadoo', 'bad', '') AS "yaoo";
|
||||||
--
|
--
|
||||||
-- test split_part
|
-- test split_part
|
||||||
--
|
--
|
||||||
|
select split_part('','@',1) AS "empty string";
|
||||||
|
|
||||||
|
select split_part('','@',-1) AS "empty string";
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','',1) AS "joeuser@mydatabase";
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','',2) AS "empty string";
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','',-1) AS "joeuser@mydatabase";
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','',-2) AS "empty string";
|
||||||
|
|
||||||
select split_part('joeuser@mydatabase','@',0) AS "an error";
|
select split_part('joeuser@mydatabase','@',0) AS "an error";
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','@@',1) AS "joeuser@mydatabase";
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','@@',2) AS "empty string";
|
||||||
|
|
||||||
select split_part('joeuser@mydatabase','@',1) AS "joeuser";
|
select split_part('joeuser@mydatabase','@',1) AS "joeuser";
|
||||||
|
|
||||||
select split_part('joeuser@mydatabase','@',2) AS "mydatabase";
|
select split_part('joeuser@mydatabase','@',2) AS "mydatabase";
|
||||||
|
@ -543,6 +559,14 @@ select split_part('joeuser@mydatabase','@',3) AS "empty string";
|
||||||
|
|
||||||
select split_part('@joeuser@mydatabase@','@',2) AS "joeuser";
|
select split_part('@joeuser@mydatabase@','@',2) AS "joeuser";
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','@',-1) AS "mydatabase";
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','@',-2) AS "joeuser";
|
||||||
|
|
||||||
|
select split_part('joeuser@mydatabase','@',-3) AS "empty string";
|
||||||
|
|
||||||
|
select split_part('@joeuser@mydatabase@','@',-2) AS "mydatabase";
|
||||||
|
|
||||||
--
|
--
|
||||||
-- test to_hex
|
-- test to_hex
|
||||||
--
|
--
|
||||||
|
|
Loading…
Reference in New Issue