postgresql/src/include/utils/arrayaccess.h

/*-------------------------------------------------------------------------
 *
 * arrayaccess.h
 *	  Declarations for element-by-element access to Postgres arrays.
 *
 *
 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * src/include/utils/arrayaccess.h
 *
 *-------------------------------------------------------------------------
 */
#ifndef ARRAYACCESS_H
#define ARRAYACCESS_H

#include "access/tupmacs.h"
#include "utils/array.h"


/*
 * Functions for iterating through elements of a flat or expanded array.
 * These require a state struct "array_iter iter".
 *
 * Use "array_iter_setup(&iter, arrayptr);" to prepare to iterate, and
 * "datumvar = array_iter_next(&iter, &isnullvar, index, ...);" to fetch
 * the next element into datumvar/isnullvar.
 * "index" must be the zero-origin element number; we make caller provide
 * this since caller is generally counting the elements anyway.  Despite
 * that, these functions can only fetch elements sequentially.
 */

typedef struct array_iter
{
	/* datumptr being NULL or not tells if we have flat or expanded array */

	/* Fields used when we have an expanded array */
	Datum	   *datumptr;		/* Pointer to Datum array */
	bool	   *isnullptr;		/* Pointer to isnull array */

	/* Fields used when we have a flat array */
	char	   *dataptr;		/* Current spot in the data area */
	bits8	   *bitmapptr;		/* Current byte of the nulls bitmap, or NULL */
	int			bitmask;		/* mask for current bit in nulls bitmap */
} array_iter;


static inline void
array_iter_setup(array_iter *it, AnyArrayType *a)
{
	if (VARATT_IS_EXPANDED_HEADER(a))
	{
		if (a->xpn.dvalues)
		{
			it->datumptr = a->xpn.dvalues;
			it->isnullptr = a->xpn.dnulls;
			/* we must fill all fields to prevent compiler warnings */
			it->dataptr = NULL;
			it->bitmapptr = NULL;
		}
		else
		{
			/* Work with flat array embedded in the expanded datum */
			it->datumptr = NULL;
			it->isnullptr = NULL;
			it->dataptr = ARR_DATA_PTR(a->xpn.fvalue);
			it->bitmapptr = ARR_NULLBITMAP(a->xpn.fvalue);
		}
	}
	else
	{
		it->datumptr = NULL;
		it->isnullptr = NULL;
		it->dataptr = ARR_DATA_PTR((ArrayType *) a);
		it->bitmapptr = ARR_NULLBITMAP((ArrayType *) a);
	}
	it->bitmask = 1;
}

static inline Datum
array_iter_next(array_iter *it, bool *isnull, int i,
				int elmlen, bool elmbyval, char elmalign)
{
	Datum		ret;

	if (it->datumptr)
	{
		ret = it->datumptr[i];
		*isnull = it->isnullptr ? it->isnullptr[i] : false;
	}
	else
	{
		if (it->bitmapptr && (*(it->bitmapptr) & it->bitmask) == 0)
		{
			*isnull = true;
			ret = (Datum) 0;
		}
		else
		{
			*isnull = false;
			ret = fetch_att(it->dataptr, elmbyval, elmlen);
			it->dataptr = att_addlength_pointer(it->dataptr, elmlen,
												it->dataptr);
			it->dataptr = (char *) att_align_nominal(it->dataptr, elmalign);
		}
		it->bitmask <<= 1;
		if (it->bitmask == 0x100)
		{
			if (it->bitmapptr)
				it->bitmapptr++;
			it->bitmask = 1;
		}
	}

	return ret;
}

#endif							/* ARRAYACCESS_H */
Support "expanded" objects, particularly arrays, for better performance. This patch introduces the ability for complex datatypes to have an in-memory representation that is different from their on-disk format. On-disk formats are typically optimized for minimal size, and in any case they can't contain pointers, so they are often not well-suited for computation. Now a datatype can invent an "expanded" in-memory format that is better suited for its operations, and then pass that around among the C functions that operate on the datatype. There are also provisions (rudimentary as yet) to allow an expanded object to be modified in-place under suitable conditions, so that operations like assignment to an element of an array need not involve copying the entire array. The initial application for this feature is arrays, but it is not hard to foresee using it for other container types like JSON, XML and hstore. I have hopes that it will be useful to PostGIS as well. In this initial implementation, a few heuristics have been hard-wired into plpgsql to improve performance for arrays that are stored in plpgsql variables. We would like to generalize those hacks so that other datatypes can obtain similar improvements, but figuring out some appropriate APIs is left as a task for future work. (The heuristics themselves are probably not optimal yet, either, as they sometimes force expansion of arrays that would be better left alone.) Preliminary performance testing shows impressive speed gains for plpgsql functions that do element-by-element access or update of large arrays. There are other cases that get a little slower, as a result of added array format conversions; but we can hope to improve anything that's annoyingly bad. In any case most applications should see a net win. Tom Lane, reviewed by Andres Freund 2015-05-14 18:08:40 +02:00			`/*-------------------------------------------------------------------------`
			`*`
			`* arrayaccess.h`
			`* Declarations for element-by-element access to Postgres arrays.`
			`*`
			`*`
Update copyright for 2021 Backpatch-through: 9.5 2021-01-02 19:06:25 +01:00			`* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group`
Support "expanded" objects, particularly arrays, for better performance. This patch introduces the ability for complex datatypes to have an in-memory representation that is different from their on-disk format. On-disk formats are typically optimized for minimal size, and in any case they can't contain pointers, so they are often not well-suited for computation. Now a datatype can invent an "expanded" in-memory format that is better suited for its operations, and then pass that around among the C functions that operate on the datatype. There are also provisions (rudimentary as yet) to allow an expanded object to be modified in-place under suitable conditions, so that operations like assignment to an element of an array need not involve copying the entire array. The initial application for this feature is arrays, but it is not hard to foresee using it for other container types like JSON, XML and hstore. I have hopes that it will be useful to PostGIS as well. In this initial implementation, a few heuristics have been hard-wired into plpgsql to improve performance for arrays that are stored in plpgsql variables. We would like to generalize those hacks so that other datatypes can obtain similar improvements, but figuring out some appropriate APIs is left as a task for future work. (The heuristics themselves are probably not optimal yet, either, as they sometimes force expansion of arrays that would be better left alone.) Preliminary performance testing shows impressive speed gains for plpgsql functions that do element-by-element access or update of large arrays. There are other cases that get a little slower, as a result of added array format conversions; but we can hope to improve anything that's annoyingly bad. In any case most applications should see a net win. Tom Lane, reviewed by Andres Freund 2015-05-14 18:08:40 +02:00			`* Portions Copyright (c) 1994, Regents of the University of California`
			`*`
			`* src/include/utils/arrayaccess.h`
			`*`
			`*-------------------------------------------------------------------------`
			`*/`
			`#ifndef ARRAYACCESS_H`
			`#define ARRAYACCESS_H`

			`#include "access/tupmacs.h"`
			`#include "utils/array.h"`


			`/*`
			`* Functions for iterating through elements of a flat or expanded array.`
			`* These require a state struct "array_iter iter".`
			`*`
			`* Use "array_iter_setup(&iter, arrayptr);" to prepare to iterate, and`
			`* "datumvar = array_iter_next(&iter, &isnullvar, index, ...);" to fetch`
			`* the next element into datumvar/isnullvar.`
			`* "index" must be the zero-origin element number; we make caller provide`
			`* this since caller is generally counting the elements anyway. Despite`
			`* that, these functions can only fetch elements sequentially.`
			`*/`

			`typedef struct array_iter`
			`{`
			`/* datumptr being NULL or not tells if we have flat or expanded array */`

			`/* Fields used when we have an expanded array */`
			`Datum datumptr; / Pointer to Datum array */`
			`bool isnullptr; / Pointer to isnull array */`

			`/* Fields used when we have a flat array */`
			`char dataptr; / Current spot in the data area */`
			`bits8 bitmapptr; / Current byte of the nulls bitmap, or NULL */`
			`int bitmask; /* mask for current bit in nulls bitmap */`
			`} array_iter;`


Rely on inline functions even if that causes warnings in older compilers. So far we have worked around the fact that some very old compilers do not support 'inline' functions by only using inline functions conditionally (or not at all). Since such compilers are very rare by now, we have decided to rely on inline functions from 9.6 onwards. To avoid breaking these old compilers inline is defined away when not supported. That'll cause "function x defined but not used" type of warnings, but since nobody develops on such compilers anymore that's ok. This change in policy will allow us to more easily employ inline functions. I chose to remove code previously conditional on PG_USE_INLINE as it seemed confusing to have code dependent on a define that's always defined. Blacklisting of compilers, like in c53f73879f, now has to be done differently. A platform template can define PG_FORCE_DISABLE_INLINE to force inline to be defined empty. Discussion: 20150701161447.GB30708@awork2.anarazel.de 2015-08-05 18:19:52 +02:00			`static inline void`
Support "expanded" objects, particularly arrays, for better performance. This patch introduces the ability for complex datatypes to have an in-memory representation that is different from their on-disk format. On-disk formats are typically optimized for minimal size, and in any case they can't contain pointers, so they are often not well-suited for computation. Now a datatype can invent an "expanded" in-memory format that is better suited for its operations, and then pass that around among the C functions that operate on the datatype. There are also provisions (rudimentary as yet) to allow an expanded object to be modified in-place under suitable conditions, so that operations like assignment to an element of an array need not involve copying the entire array. The initial application for this feature is arrays, but it is not hard to foresee using it for other container types like JSON, XML and hstore. I have hopes that it will be useful to PostGIS as well. In this initial implementation, a few heuristics have been hard-wired into plpgsql to improve performance for arrays that are stored in plpgsql variables. We would like to generalize those hacks so that other datatypes can obtain similar improvements, but figuring out some appropriate APIs is left as a task for future work. (The heuristics themselves are probably not optimal yet, either, as they sometimes force expansion of arrays that would be better left alone.) Preliminary performance testing shows impressive speed gains for plpgsql functions that do element-by-element access or update of large arrays. There are other cases that get a little slower, as a result of added array format conversions; but we can hope to improve anything that's annoyingly bad. In any case most applications should see a net win. Tom Lane, reviewed by Andres Freund 2015-05-14 18:08:40 +02:00			`array_iter_setup(array_iter it, AnyArrayType a)`
			`{`
			`if (VARATT_IS_EXPANDED_HEADER(a))`
			`{`
			`if (a->xpn.dvalues)`
			`{`
			`it->datumptr = a->xpn.dvalues;`
			`it->isnullptr = a->xpn.dnulls;`
			`/* we must fill all fields to prevent compiler warnings */`
			`it->dataptr = NULL;`
			`it->bitmapptr = NULL;`
			`}`
			`else`
			`{`
			`/* Work with flat array embedded in the expanded datum */`
			`it->datumptr = NULL;`
			`it->isnullptr = NULL;`
			`it->dataptr = ARR_DATA_PTR(a->xpn.fvalue);`
			`it->bitmapptr = ARR_NULLBITMAP(a->xpn.fvalue);`
			`}`
			`}`
			`else`
			`{`
			`it->datumptr = NULL;`
			`it->isnullptr = NULL;`
Don't read fields of a misaligned ExpandedObjectHeader or AnyArrayType. UBSan complains about this. Instead, cast to a suitable type requiring only 4-byte alignment. DatumGetAnyArrayP() already assumes one can cast between AnyArrayType and ArrayType, so this doesn't introduce a new assumption. Back-patch to 9.5, where AnyArrayType was introduced. Reviewed by Tom Lane. Discussion: https://postgr.es/m/20190629210334.GA1244217@rfd.leadboat.com 2019-07-01 02:34:17 +02:00			`it->dataptr = ARR_DATA_PTR((ArrayType *) a);`
			`it->bitmapptr = ARR_NULLBITMAP((ArrayType *) a);`
Support "expanded" objects, particularly arrays, for better performance. This patch introduces the ability for complex datatypes to have an in-memory representation that is different from their on-disk format. On-disk formats are typically optimized for minimal size, and in any case they can't contain pointers, so they are often not well-suited for computation. Now a datatype can invent an "expanded" in-memory format that is better suited for its operations, and then pass that around among the C functions that operate on the datatype. There are also provisions (rudimentary as yet) to allow an expanded object to be modified in-place under suitable conditions, so that operations like assignment to an element of an array need not involve copying the entire array. The initial application for this feature is arrays, but it is not hard to foresee using it for other container types like JSON, XML and hstore. I have hopes that it will be useful to PostGIS as well. In this initial implementation, a few heuristics have been hard-wired into plpgsql to improve performance for arrays that are stored in plpgsql variables. We would like to generalize those hacks so that other datatypes can obtain similar improvements, but figuring out some appropriate APIs is left as a task for future work. (The heuristics themselves are probably not optimal yet, either, as they sometimes force expansion of arrays that would be better left alone.) Preliminary performance testing shows impressive speed gains for plpgsql functions that do element-by-element access or update of large arrays. There are other cases that get a little slower, as a result of added array format conversions; but we can hope to improve anything that's annoyingly bad. In any case most applications should see a net win. Tom Lane, reviewed by Andres Freund 2015-05-14 18:08:40 +02:00			`}`
			`it->bitmask = 1;`
			`}`

Rely on inline functions even if that causes warnings in older compilers. So far we have worked around the fact that some very old compilers do not support 'inline' functions by only using inline functions conditionally (or not at all). Since such compilers are very rare by now, we have decided to rely on inline functions from 9.6 onwards. To avoid breaking these old compilers inline is defined away when not supported. That'll cause "function x defined but not used" type of warnings, but since nobody develops on such compilers anymore that's ok. This change in policy will allow us to more easily employ inline functions. I chose to remove code previously conditional on PG_USE_INLINE as it seemed confusing to have code dependent on a define that's always defined. Blacklisting of compilers, like in c53f73879f, now has to be done differently. A platform template can define PG_FORCE_DISABLE_INLINE to force inline to be defined empty. Discussion: 20150701161447.GB30708@awork2.anarazel.de 2015-08-05 18:19:52 +02:00			`static inline Datum`
Support "expanded" objects, particularly arrays, for better performance. This patch introduces the ability for complex datatypes to have an in-memory representation that is different from their on-disk format. On-disk formats are typically optimized for minimal size, and in any case they can't contain pointers, so they are often not well-suited for computation. Now a datatype can invent an "expanded" in-memory format that is better suited for its operations, and then pass that around among the C functions that operate on the datatype. There are also provisions (rudimentary as yet) to allow an expanded object to be modified in-place under suitable conditions, so that operations like assignment to an element of an array need not involve copying the entire array. The initial application for this feature is arrays, but it is not hard to foresee using it for other container types like JSON, XML and hstore. I have hopes that it will be useful to PostGIS as well. In this initial implementation, a few heuristics have been hard-wired into plpgsql to improve performance for arrays that are stored in plpgsql variables. We would like to generalize those hacks so that other datatypes can obtain similar improvements, but figuring out some appropriate APIs is left as a task for future work. (The heuristics themselves are probably not optimal yet, either, as they sometimes force expansion of arrays that would be better left alone.) Preliminary performance testing shows impressive speed gains for plpgsql functions that do element-by-element access or update of large arrays. There are other cases that get a little slower, as a result of added array format conversions; but we can hope to improve anything that's annoyingly bad. In any case most applications should see a net win. Tom Lane, reviewed by Andres Freund 2015-05-14 18:08:40 +02:00			`array_iter_next(array_iter it, bool isnull, int i,`
			`int elmlen, bool elmbyval, char elmalign)`
			`{`
			`Datum ret;`

			`if (it->datumptr)`
			`{`
			`ret = it->datumptr[i];`
			`*isnull = it->isnullptr ? it->isnullptr[i] : false;`
			`}`
			`else`
			`{`
			`if (it->bitmapptr && (*(it->bitmapptr) & it->bitmask) == 0)`
			`{`
			`*isnull = true;`
			`ret = (Datum) 0;`
			`}`
			`else`
			`{`
			`*isnull = false;`
			`ret = fetch_att(it->dataptr, elmbyval, elmlen);`
			`it->dataptr = att_addlength_pointer(it->dataptr, elmlen,`
			`it->dataptr);`
			`it->dataptr = (char *) att_align_nominal(it->dataptr, elmalign);`
			`}`
			`it->bitmask <<= 1;`
			`if (it->bitmask == 0x100)`
			`{`
			`if (it->bitmapptr)`
			`it->bitmapptr++;`
			`it->bitmask = 1;`
			`}`
			`}`

			`return ret;`
			`}`

Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us 2017-06-21 21:18:54 +02:00			`#endif /* ARRAYACCESS_H */`