From 13dbc7a824b3f905904cab51840d37f31a07a9ef Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Wed, 18 Mar 2015 16:01:34 -0300 Subject: [PATCH] array_offset() and array_offsets() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions return the offset position or positions of a value in an array. Author: Pavel Stěhule Reviewed by: Jim Nasby --- doc/src/sgml/array.sgml | 19 ++ doc/src/sgml/func.sgml | 49 ++++ src/backend/utils/adt/array_userfuncs.c | 296 ++++++++++++++++++++++++ src/backend/utils/adt/arrayfuncs.c | 20 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.h | 6 + src/include/utils/array.h | 6 +- src/pl/plpgsql/src/pl_exec.c | 2 +- src/test/regress/expected/arrays.out | 100 ++++++++ src/test/regress/sql/arrays.sql | 33 +++ 10 files changed, 525 insertions(+), 8 deletions(-) diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml index 9ea10682a5..092013b83b 100644 --- a/doc/src/sgml/array.sgml +++ b/doc/src/sgml/array.sgml @@ -600,6 +600,25 @@ SELECT * FROM sal_emp WHERE pay_by_quarter && ARRAY[10000]; index, as described in . + + You can also search for specific values in an array using the array_offset + and array_offsets functions. The former returns the position of + the first occurrence of a value in an array; the latter returns an array with the + positions of all occurrences of the value in the array. For example: + + +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'mon'); + array_offset +-------------- + 2 + +SELECT array_offsets(ARRAY[1, 4, 3, 1, 3, 4, 2, 1], 1); + array_offsets +--------------- + {1,4,8} + + + Arrays are not sets; searching for specific array elements diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index c198beaf39..5843eaa9ff 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -11479,6 +11479,12 @@ SELECT NULLIF(value, '(none)') ... array_lower + + array_offset + + + array_offsets + array_prepend @@ -11596,6 +11602,32 @@ SELECT NULLIF(value, '(none)') ... array_lower('[0:2]={1,2,3}'::int[], 1) 0 + + + + array_offset(anyarray, anyelement , int) + + + int + returns the offset of the first occurrence of the second + argument in the array, starting at the element indicated by the third + argument or at the first element (array must be one-dimensional) + array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'mon') + 2 + + + + + array_offsets(anyarray, anyelement) + + + int[] + returns an array of offsets of all occurrences of the second + argument in the array given as first argument (array must be + one-dimensional) + array_offsets(ARRAY['A','A','B','A'], 'A') + {1,2,4} + @@ -11707,6 +11739,23 @@ NULL baz(3 rows) + + In array_offset and array_offsets, + each array element is compared to the searched value using + IS NOT DISTINCT FROM semantics. + + + + In array_offset, NULL is returned + if the value is not found. + + + + In array_offsets, NULL is returned + only if the array is NULL; if the value is not found in + the array, an empty array is returned instead. + + In string_to_array, if the delimiter parameter is NULL, each character in the input string will become a separate element in diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c index 6679333522..57074e0f46 100644 --- a/src/backend/utils/adt/array_userfuncs.c +++ b/src/backend/utils/adt/array_userfuncs.c @@ -12,9 +12,14 @@ */ #include "postgres.h" +#include "catalog/pg_type.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/typcache.h" + + +static Datum array_offset_common(FunctionCallInfo fcinfo); /* @@ -652,3 +657,294 @@ array_agg_array_finalfn(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } + +/*----------------------------------------------------------------------------- + * array_offset, array_offset_start : + * return the offset of a value in an array. + * + * IS NOT DISTINCT FROM semantics are used for comparisons. Return NULL when + * the value is not found. + *----------------------------------------------------------------------------- + */ +Datum +array_offset(PG_FUNCTION_ARGS) +{ + return array_offset_common(fcinfo); +} + +Datum +array_offset_start(PG_FUNCTION_ARGS) +{ + return array_offset_common(fcinfo); +} + +/* + * array_offset_common + * Common code for array_offset and array_offset_start + * + * These are separate wrappers for the sake of opr_sanity regression test. + * They are not strict so we have to test for null inputs explicitly. + */ +static Datum +array_offset_common(FunctionCallInfo fcinfo) +{ + ArrayType *array; + Oid collation = PG_GET_COLLATION(); + Oid element_type; + Datum searched_element, + value; + bool isnull; + int offset = 0, + offset_min; + bool found = false; + TypeCacheEntry *typentry; + ArrayMetaState *my_extra; + bool null_search; + ArrayIterator array_iterator; + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + array = PG_GETARG_ARRAYTYPE_P(0); + element_type = ARR_ELEMTYPE(array); + + /* + * We refuse to search for elements in multi-dimensional arrays, since we + * have no good way to report the element's location in the array. + */ + if (ARR_NDIM(array) > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("searching for elements in multidimensional arrays is not supported"))); + + if (PG_ARGISNULL(1)) + { + /* fast return when the array doesn't have have nulls */ + if (!array_contains_nulls(array)) + PG_RETURN_NULL(); + searched_element = (Datum) 0; + null_search = true; + } + else + { + searched_element = PG_GETARG_DATUM(1); + null_search = false; + } + + /* figure out where to start */ + if (PG_NARGS() == 3) + { + if (PG_ARGISNULL(2)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("initial offset should not be NULL"))); + + offset_min = PG_GETARG_INT32(2); + } + else + offset_min = 1; + + /* + * We arrange to look up type info for array_create_iterator only once per + * series of calls, assuming the element type doesn't change underneath us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = ~element_type; + } + + if (my_extra->element_type != element_type) + { + get_typlenbyvalalign(element_type, + &my_extra->typlen, + &my_extra->typbyval, + &my_extra->typalign); + + typentry = lookup_type_cache(element_type, TYPECACHE_EQ_OPR_FINFO); + + if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(element_type)))); + + my_extra->element_type = element_type; + fmgr_info(typentry->eq_opr_finfo.fn_oid, &my_extra->proc); + } + + /* Examine each array element until we find a match. */ + array_iterator = array_create_iterator(array, 0, my_extra); + while (array_iterate(array_iterator, &value, &isnull)) + { + offset += 1; + + /* skip initial elements if caller requested so */ + if (offset < offset_min) + continue; + + /* + * Can't look at the array element's value if it's null; but if we + * search for null, we have a hit and are done. + */ + if (isnull || null_search) + { + if (isnull && null_search) + { + found = true; + break; + } + else + continue; + } + + /* not nulls, so run the operator */ + if (DatumGetBool(FunctionCall2Coll(&my_extra->proc, collation, + searched_element, value))) + { + found = true; + break; + } + } + + array_free_iterator(array_iterator); + + /* Avoid leaking memory when handed toasted input */ + PG_FREE_IF_COPY(array, 0); + + if (!found) + PG_RETURN_NULL(); + + PG_RETURN_INT32(offset); +} + +/*----------------------------------------------------------------------------- + * array_offsets : + * return an array of offsets of a value in an array. + * + * IS NOT DISTINCT FROM semantics are used for comparisons. Returns NULL when + * the input array is NULL. When the value is not found in the array, returns + * an empty array. + * + * This is not strict so we have to test for null inputs explicitly. + *----------------------------------------------------------------------------- + */ +Datum +array_offsets(PG_FUNCTION_ARGS) +{ + ArrayType *array; + Oid collation = PG_GET_COLLATION(); + Oid element_type; + Datum searched_element, + value; + bool isnull; + int offset = 0; + TypeCacheEntry *typentry; + ArrayMetaState *my_extra; + bool null_search; + ArrayIterator array_iterator; + ArrayBuildState *astate = NULL; + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + array = PG_GETARG_ARRAYTYPE_P(0); + element_type = ARR_ELEMTYPE(array); + + /* + * We refuse to search for elements in multi-dimensional arrays, since we + * have no good way to report the element's location in the array. + */ + if (ARR_NDIM(array) > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("searching for elements in multidimensional arrays is not supported"))); + + astate = initArrayResult(INT4OID, CurrentMemoryContext, false); + + if (PG_ARGISNULL(1)) + { + /* fast return when the array doesn't have have nulls */ + if (!array_contains_nulls(array)) + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); + searched_element = (Datum) 0; + null_search = true; + } + else + { + searched_element = PG_GETARG_DATUM(1); + null_search = false; + } + + /* + * We arrange to look up type info for array_create_iterator only once per + * series of calls, assuming the element type doesn't change underneath us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = ~element_type; + } + + if (my_extra->element_type != element_type) + { + get_typlenbyvalalign(element_type, + &my_extra->typlen, + &my_extra->typbyval, + &my_extra->typalign); + + typentry = lookup_type_cache(element_type, TYPECACHE_EQ_OPR_FINFO); + + if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(element_type)))); + + my_extra->element_type = element_type; + fmgr_info(typentry->eq_opr_finfo.fn_oid, &my_extra->proc); + } + + /* + * Accumulate each array offset iff the element matches the given element. + */ + array_iterator = array_create_iterator(array, 0, my_extra); + while (array_iterate(array_iterator, &value, &isnull)) + { + offset += 1; + + /* + * Can't look at the array element's value if it's null; but if we + * search for null, we have a hit. + */ + if (isnull || null_search) + { + if (isnull && null_search) + astate = + accumArrayResult(astate, Int32GetDatum(offset), false, + INT4OID, CurrentMemoryContext); + + continue; + } + + /* not nulls, so run the operator */ + if (DatumGetBool(FunctionCall2Coll(&my_extra->proc, collation, + searched_element, value))) + astate = + accumArrayResult(astate, Int32GetDatum(offset), false, + INT4OID, CurrentMemoryContext); + } + + array_free_iterator(array_iterator); + + /* Avoid leaking memory when handed toasted input */ + PG_FREE_IF_COPY(array, 0); + + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +} diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 54979fa636..9117a5515a 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3989,7 +3989,7 @@ arraycontained(PG_FUNCTION_ARGS) * The passed-in array must remain valid for the lifetime of the iterator. */ ArrayIterator -array_create_iterator(ArrayType *arr, int slice_ndim) +array_create_iterator(ArrayType *arr, int slice_ndim, ArrayMetaState *mstate) { ArrayIterator iterator = palloc0(sizeof(ArrayIteratorData)); @@ -4006,10 +4006,20 @@ array_create_iterator(ArrayType *arr, int slice_ndim) iterator->arr = arr; iterator->nullbitmap = ARR_NULLBITMAP(arr); iterator->nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); - get_typlenbyvalalign(ARR_ELEMTYPE(arr), - &iterator->typlen, - &iterator->typbyval, - &iterator->typalign); + + if (mstate != NULL) + { + Assert(mstate->element_type == ARR_ELEMTYPE(arr)); + + iterator->typlen = mstate->typlen; + iterator->typbyval = mstate->typbyval; + iterator->typalign = mstate->typalign; + } + else + get_typlenbyvalalign(ARR_ELEMTYPE(arr), + &iterator->typlen, + &iterator->typbyval, + &iterator->typalign); /* * Remember the slicing parameters. diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 479fc91d46..0c435c2ba6 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201503151 +#define CATALOG_VERSION_NO 201503181 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index b8a3660122..6a757f3d64 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -895,6 +895,12 @@ DATA(insert OID = 515 ( array_larger PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 DESCR("larger of two"); DATA(insert OID = 516 ( array_smaller PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2277 "2277 2277" _null_ _null_ _null_ _null_ array_smaller _null_ _null_ _null_ )); DESCR("smaller of two"); +DATA(insert OID = 3277 ( array_offset PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 23 "2277 2283" _null_ _null_ _null_ _null_ array_offset _null_ _null_ _null_ )); +DESCR("returns a offset of value in array"); +DATA(insert OID = 3278 ( array_offset PGNSP PGUID 12 1 0 0 0 f f f f f f i 3 0 23 "2277 2283 23" _null_ _null_ _null_ _null_ array_offset_start _null_ _null_ _null_ )); +DESCR("returns a offset of value in array with start index"); +DATA(insert OID = 3279 ( array_offsets PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 1007 "2277 2283" _null_ _null_ _null_ _null_ array_offsets _null_ _null_ _null_ )); +DESCR("returns a array of offsets of some value in array"); DATA(insert OID = 1191 ( generate_subscripts PGNSP PGUID 12 1 1000 0 0 f f f f t t i 3 0 23 "2277 23 16" _null_ _null_ _null_ _null_ generate_subscripts _null_ _null_ _null_ )); DESCR("array subscripts generator"); DATA(insert OID = 1192 ( generate_subscripts PGNSP PGUID 12 1 1000 0 0 f f f f t t i 2 0 23 "2277 23" _null_ _null_ _null_ _null_ generate_subscripts_nodir _null_ _null_ _null_ )); diff --git a/src/include/utils/array.h b/src/include/utils/array.h index 649688ca0a..b78b42abdd 100644 --- a/src/include/utils/array.h +++ b/src/include/utils/array.h @@ -323,7 +323,7 @@ extern ArrayBuildStateAny *accumArrayResultAny(ArrayBuildStateAny *astate, extern Datum makeArrayResultAny(ArrayBuildStateAny *astate, MemoryContext rcontext, bool release); -extern ArrayIterator array_create_iterator(ArrayType *arr, int slice_ndim); +extern ArrayIterator array_create_iterator(ArrayType *arr, int slice_ndim, ArrayMetaState *mstate); extern bool array_iterate(ArrayIterator iterator, Datum *value, bool *isnull); extern void array_free_iterator(ArrayIterator iterator); @@ -358,6 +358,10 @@ extern Datum array_agg_finalfn(PG_FUNCTION_ARGS); extern Datum array_agg_array_transfn(PG_FUNCTION_ARGS); extern Datum array_agg_array_finalfn(PG_FUNCTION_ARGS); +extern Datum array_offset(PG_FUNCTION_ARGS); +extern Datum array_offset_start(PG_FUNCTION_ARGS); +extern Datum array_offsets(PG_FUNCTION_ARGS); + /* * prototypes for functions defined in array_typanalyze.c */ diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index e332fa0727..6a9354092b 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -2315,7 +2315,7 @@ exec_stmt_foreach_a(PLpgSQL_execstate *estate, PLpgSQL_stmt_foreach_a *stmt) errmsg("FOREACH loop variable must not be of an array type"))); /* Create an iterator to step through the array */ - array_iterator = array_create_iterator(arr, stmt->slice); + array_iterator = array_create_iterator(arr, stmt->slice, NULL); /* Identify iterator result type */ if (stmt->slice > 0) diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index d33c9b90ff..14d6d32990 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -366,6 +366,106 @@ SELECT array_cat(ARRAY[[3,4],[5,6]], ARRAY[1,2]) AS "{{3,4},{5,6},{1,2}}"; {{3,4},{5,6},{1,2}} (1 row) +SELECT array_offset(ARRAY[1,2,3,4,5], 4); + array_offset +-------------- + 4 +(1 row) + +SELECT array_offset(ARRAY[5,3,4,2,1], 4); + array_offset +-------------- + 3 +(1 row) + +SELECT array_offset(ARRAY[[1,2],[3,4]], 3); +ERROR: searching for elements in multidimensional arrays is not supported +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'mon'); + array_offset +-------------- + 2 +(1 row) + +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'sat'); + array_offset +-------------- + 7 +(1 row) + +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], NULL); + array_offset +-------------- + +(1 row) + +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu',NULL,'fri','sat'], NULL); + array_offset +-------------- + 6 +(1 row) + +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu',NULL,'fri','sat'], 'sat'); + array_offset +-------------- + 8 +(1 row) + +SELECT array_offsets(NULL, 10); + array_offsets +--------------- + +(1 row) + +SELECT array_offsets(NULL, NULL::int); + array_offsets +--------------- + +(1 row) + +SELECT array_offsets(ARRAY[1,2,3,4,5,6,1,2,3,4,5,6], 4); + array_offsets +--------------- + {4,10} +(1 row) + +SELECT array_offsets(ARRAY[[1,2],[3,4]], 4); +ERROR: searching for elements in multidimensional arrays is not supported +SELECT array_offsets(ARRAY[1,2,3,4,5,6,1,2,3,4,5,6], NULL); + array_offsets +--------------- + {} +(1 row) + +SELECT array_offsets(ARRAY[1,2,3,NULL,5,6,1,2,3,NULL,5,6], NULL); + array_offsets +--------------- + {4,10} +(1 row) + +SELECT array_length(array_offsets(ARRAY(SELECT 'AAAAAAAAAAAAAAAAAAAAAAAAA'::text || i % 10 + FROM generate_series(1,100) g(i)), + 'AAAAAAAAAAAAAAAAAAAAAAAAA5'), 1); + array_length +-------------- + 10 +(1 row) + +DO $$ +DECLARE + o int; + a int[] := ARRAY[1,2,3,2,3,1,2]; +BEGIN + o := array_offset(a, 2); + WHILE o IS NOT NULL + LOOP + RAISE NOTICE '%', o; + o := array_offset(a, 2, o + 1); + END LOOP; +END +$$ LANGUAGE plpgsql; +NOTICE: 2 +NOTICE: 4 +NOTICE: 7 -- operators SELECT a FROM arrtest WHERE b = ARRAY[[[113,142],[1,147]]]; a diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index 733c19bed8..40950a2c20 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -185,6 +185,39 @@ SELECT array_cat(ARRAY[1,2], ARRAY[3,4]) AS "{1,2,3,4}"; SELECT array_cat(ARRAY[1,2], ARRAY[[3,4],[5,6]]) AS "{{1,2},{3,4},{5,6}}"; SELECT array_cat(ARRAY[[3,4],[5,6]], ARRAY[1,2]) AS "{{3,4},{5,6},{1,2}}"; +SELECT array_offset(ARRAY[1,2,3,4,5], 4); +SELECT array_offset(ARRAY[5,3,4,2,1], 4); +SELECT array_offset(ARRAY[[1,2],[3,4]], 3); +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'mon'); +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'sat'); +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], NULL); +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu',NULL,'fri','sat'], NULL); +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu',NULL,'fri','sat'], 'sat'); + +SELECT array_offsets(NULL, 10); +SELECT array_offsets(NULL, NULL::int); +SELECT array_offsets(ARRAY[1,2,3,4,5,6,1,2,3,4,5,6], 4); +SELECT array_offsets(ARRAY[[1,2],[3,4]], 4); +SELECT array_offsets(ARRAY[1,2,3,4,5,6,1,2,3,4,5,6], NULL); +SELECT array_offsets(ARRAY[1,2,3,NULL,5,6,1,2,3,NULL,5,6], NULL); +SELECT array_length(array_offsets(ARRAY(SELECT 'AAAAAAAAAAAAAAAAAAAAAAAAA'::text || i % 10 + FROM generate_series(1,100) g(i)), + 'AAAAAAAAAAAAAAAAAAAAAAAAA5'), 1); + +DO $$ +DECLARE + o int; + a int[] := ARRAY[1,2,3,2,3,1,2]; +BEGIN + o := array_offset(a, 2); + WHILE o IS NOT NULL + LOOP + RAISE NOTICE '%', o; + o := array_offset(a, 2, o + 1); + END LOOP; +END +$$ LANGUAGE plpgsql; + -- operators SELECT a FROM arrtest WHERE b = ARRAY[[[113,142],[1,147]]]; SELECT NOT ARRAY[1.1,1.2,1.3] = ARRAY[1.1,1.2,1.3] AS "FALSE";