postgresql/src/backend/access/common/tupconvert.c

391 lines
11 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* tupconvert.c
* Tuple conversion support.
*
* These functions provide conversion between rowtypes that are logically
* equivalent but might have columns in a different order or different sets
* of dropped columns. There is some overlap of functionality with the
* executor's "junkfilter" routines, but these functions work on bare
* HeapTuples rather than TupleTableSlots.
*
2017-01-03 19:48:53 +01:00
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/access/common/tupconvert.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/htup_details.h"
#include "access/tupconvert.h"
#include "utils/builtins.h"
/*
* The conversion setup routines have the following common API:
*
* The setup routine checks whether the given source and destination tuple
* descriptors are logically compatible. If not, it throws an error.
* If so, it returns NULL if they are physically compatible (ie, no conversion
* is needed), else a TupleConversionMap that can be used by do_convert_tuple
* to perform the conversion.
*
* The TupleConversionMap, if needed, is palloc'd in the caller's memory
* context. Also, the given tuple descriptors are referenced by the map,
* so they must survive as long as the map is needed.
*
* The caller must supply a suitable primary error message to be used if
* a compatibility error is thrown. Recommended coding practice is to use
* gettext_noop() on this string, so that it is translatable but won't
* actually be translated unless the error gets thrown.
*
*
* Implementation notes:
*
* The key component of a TupleConversionMap is an attrMap[] array with
* one entry per output column. This entry contains the 1-based index of
* the corresponding input column, or zero to force a NULL value (for
* a dropped output column). The TupleConversionMap also contains workspace
* arrays.
*/
/*
* Set up for tuple conversion, matching input and output columns by
* position. (Dropped columns are ignored in both input and output.)
*
* Note: the errdetail messages speak of indesc as the "returned" rowtype,
* outdesc as the "expected" rowtype. This is okay for current uses but
* might need generalization in future.
*/
TupleConversionMap *
convert_tuples_by_position(TupleDesc indesc,
TupleDesc outdesc,
const char *msg)
{
TupleConversionMap *map;
AttrNumber *attrMap;
int nincols;
int noutcols;
int n;
int i;
int j;
bool same;
/* Verify compatibility and prepare attribute-number map */
n = outdesc->natts;
attrMap = (AttrNumber *) palloc0(n * sizeof(AttrNumber));
j = 0; /* j is next physical input attribute */
nincols = noutcols = 0; /* these count non-dropped attributes */
same = true;
for (i = 0; i < n; i++)
{
Form_pg_attribute att = outdesc->attrs[i];
Oid atttypid;
int32 atttypmod;
if (att->attisdropped)
2010-02-26 03:01:40 +01:00
continue; /* attrMap[i] is already 0 */
noutcols++;
atttypid = att->atttypid;
atttypmod = att->atttypmod;
for (; j < indesc->natts; j++)
{
att = indesc->attrs[j];
if (att->attisdropped)
continue;
nincols++;
/* Found matching column, check type */
if (atttypid != att->atttypid ||
(atttypmod != att->atttypmod && atttypmod >= 0))
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg_internal("%s", _(msg)),
errdetail("Returned type %s does not match expected type %s in column %d.",
format_type_with_typemod(att->atttypid,
att->atttypmod),
format_type_with_typemod(atttypid,
atttypmod),
noutcols)));
attrMap[i] = (AttrNumber) (j + 1);
j++;
break;
}
if (attrMap[i] == 0)
same = false; /* we'll complain below */
}
/* Check for unused input columns */
for (; j < indesc->natts; j++)
{
if (indesc->attrs[j]->attisdropped)
continue;
nincols++;
same = false; /* we'll complain below */
}
/* Report column count mismatch using the non-dropped-column counts */
if (!same)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg_internal("%s", _(msg)),
errdetail("Number of returned columns (%d) does not match "
"expected column count (%d).",
nincols, noutcols)));
/*
Clean up after insufficiently-researched optimization of tuple conversions. tupconvert.c's functions formerly considered that an explicit tuple conversion was necessary if the input and output tupdescs contained different type OIDs. The point of that was to make sure that a composite datum resulting from the conversion would contain the destination rowtype OID in its composite-datum header. However, commit 3838074f8 entirely misunderstood what that check was for, thinking that it had something to do with presence or absence of an OID column within the tuple. Removal of the check broke the no-op conversion path in ExecEvalConvertRowtype, as reported by Ashutosh Bapat. It turns out that of the dozen or so call sites for tupconvert.c functions, ExecEvalConvertRowtype is the only one that cares about the composite-datum header fields in the output tuple. In all the rest, we'd much rather avoid an unnecessary conversion whenever the tuples are physically compatible. Moreover, the comments in tupconvert.c only promise physical compatibility not a metadata match. So, let's accept the removal of the guarantee about the output tuple's rowtype marking, recognizing that this is a API change that could conceivably break third-party callers of tupconvert.c. (So, let's remember to mention it in the v10 release notes.) However, commit 3838074f8 did have a bit of a point here, in that two tuples mustn't be considered physically compatible if one has HEAP_HASOID set and the other doesn't. (Some of the callers of tupconvert.c might not really care about that, but we can't assume it in general.) The previous check accidentally covered that issue, because no RECORD types ever have OIDs, while if two tupdescs have the same named composite type OID then, a fortiori, they have the same tdhasoid setting. If we're removing the type OID match check then we'd better include tdhasoid match as part of the physical compatibility check. Without that hack in tupconvert.c, we need ExecEvalConvertRowtype to take responsibility for inserting the correct rowtype OID label whenever tupconvert.c decides it need not do anything. This is easily done with heap_copy_tuple_as_datum, which will be considerably faster than a tuple disassembly and reassembly anyway; so from a performance standpoint this change is a win all around compared to what happened in earlier branches. It just means a couple more lines of code in ExecEvalConvertRowtype. Ashutosh Bapat and Tom Lane Discussion: https://postgr.es/m/CAFjFpRfvHABV6+oVvGcshF8rHn+1LfRUhj7Jz1CDZ4gPUwehBg@mail.gmail.com
2017-04-07 03:10:09 +02:00
* Check to see if the map is one-to-one, in which case we need not do a
* tuple conversion. We must also insist that both tupdescs either
* specify or don't specify an OID column, else we need a conversion to
* add/remove space for that. (For some callers, presence or absence of
* an OID column perhaps would not really matter, but let's be safe.)
*/
if (indesc->natts == outdesc->natts &&
Clean up after insufficiently-researched optimization of tuple conversions. tupconvert.c's functions formerly considered that an explicit tuple conversion was necessary if the input and output tupdescs contained different type OIDs. The point of that was to make sure that a composite datum resulting from the conversion would contain the destination rowtype OID in its composite-datum header. However, commit 3838074f8 entirely misunderstood what that check was for, thinking that it had something to do with presence or absence of an OID column within the tuple. Removal of the check broke the no-op conversion path in ExecEvalConvertRowtype, as reported by Ashutosh Bapat. It turns out that of the dozen or so call sites for tupconvert.c functions, ExecEvalConvertRowtype is the only one that cares about the composite-datum header fields in the output tuple. In all the rest, we'd much rather avoid an unnecessary conversion whenever the tuples are physically compatible. Moreover, the comments in tupconvert.c only promise physical compatibility not a metadata match. So, let's accept the removal of the guarantee about the output tuple's rowtype marking, recognizing that this is a API change that could conceivably break third-party callers of tupconvert.c. (So, let's remember to mention it in the v10 release notes.) However, commit 3838074f8 did have a bit of a point here, in that two tuples mustn't be considered physically compatible if one has HEAP_HASOID set and the other doesn't. (Some of the callers of tupconvert.c might not really care about that, but we can't assume it in general.) The previous check accidentally covered that issue, because no RECORD types ever have OIDs, while if two tupdescs have the same named composite type OID then, a fortiori, they have the same tdhasoid setting. If we're removing the type OID match check then we'd better include tdhasoid match as part of the physical compatibility check. Without that hack in tupconvert.c, we need ExecEvalConvertRowtype to take responsibility for inserting the correct rowtype OID label whenever tupconvert.c decides it need not do anything. This is easily done with heap_copy_tuple_as_datum, which will be considerably faster than a tuple disassembly and reassembly anyway; so from a performance standpoint this change is a win all around compared to what happened in earlier branches. It just means a couple more lines of code in ExecEvalConvertRowtype. Ashutosh Bapat and Tom Lane Discussion: https://postgr.es/m/CAFjFpRfvHABV6+oVvGcshF8rHn+1LfRUhj7Jz1CDZ4gPUwehBg@mail.gmail.com
2017-04-07 03:10:09 +02:00
indesc->tdhasoid == outdesc->tdhasoid)
{
for (i = 0; i < n; i++)
{
2010-02-26 03:01:40 +01:00
if (attrMap[i] == (i + 1))
continue;
/*
2010-02-26 03:01:40 +01:00
* If it's a dropped column and the corresponding input column is
* also dropped, we needn't convert. However, attlen and attalign
* must agree.
*/
if (attrMap[i] == 0 &&
indesc->attrs[i]->attisdropped &&
indesc->attrs[i]->attlen == outdesc->attrs[i]->attlen &&
indesc->attrs[i]->attalign == outdesc->attrs[i]->attalign)
continue;
same = false;
break;
}
}
else
same = false;
if (same)
{
/* Runtime conversion is not needed */
pfree(attrMap);
return NULL;
}
/* Prepare the map structure */
map = (TupleConversionMap *) palloc(sizeof(TupleConversionMap));
map->indesc = indesc;
map->outdesc = outdesc;
map->attrMap = attrMap;
/* preallocate workspace for Datum arrays */
map->outvalues = (Datum *) palloc(n * sizeof(Datum));
map->outisnull = (bool *) palloc(n * sizeof(bool));
2010-02-26 03:01:40 +01:00
n = indesc->natts + 1; /* +1 for NULL */
map->invalues = (Datum *) palloc(n * sizeof(Datum));
map->inisnull = (bool *) palloc(n * sizeof(bool));
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
map->invalues[0] = (Datum) 0; /* set up the NULL entry */
map->inisnull[0] = true;
return map;
}
/*
* Set up for tuple conversion, matching input and output columns by name.
2010-02-26 03:01:40 +01:00
* (Dropped columns are ignored in both input and output.) This is intended
* for use when the rowtypes are related by inheritance, so we expect an exact
* match of both type and typmod. The error messages will be a bit unhelpful
* unless both rowtypes are named composite types.
*/
TupleConversionMap *
convert_tuples_by_name(TupleDesc indesc,
TupleDesc outdesc,
const char *msg)
{
TupleConversionMap *map;
AttrNumber *attrMap;
int n = outdesc->natts;
int i;
bool same;
/* Verify compatibility and prepare attribute-number map */
attrMap = convert_tuples_by_name_map(indesc, outdesc, msg);
/*
Clean up after insufficiently-researched optimization of tuple conversions. tupconvert.c's functions formerly considered that an explicit tuple conversion was necessary if the input and output tupdescs contained different type OIDs. The point of that was to make sure that a composite datum resulting from the conversion would contain the destination rowtype OID in its composite-datum header. However, commit 3838074f8 entirely misunderstood what that check was for, thinking that it had something to do with presence or absence of an OID column within the tuple. Removal of the check broke the no-op conversion path in ExecEvalConvertRowtype, as reported by Ashutosh Bapat. It turns out that of the dozen or so call sites for tupconvert.c functions, ExecEvalConvertRowtype is the only one that cares about the composite-datum header fields in the output tuple. In all the rest, we'd much rather avoid an unnecessary conversion whenever the tuples are physically compatible. Moreover, the comments in tupconvert.c only promise physical compatibility not a metadata match. So, let's accept the removal of the guarantee about the output tuple's rowtype marking, recognizing that this is a API change that could conceivably break third-party callers of tupconvert.c. (So, let's remember to mention it in the v10 release notes.) However, commit 3838074f8 did have a bit of a point here, in that two tuples mustn't be considered physically compatible if one has HEAP_HASOID set and the other doesn't. (Some of the callers of tupconvert.c might not really care about that, but we can't assume it in general.) The previous check accidentally covered that issue, because no RECORD types ever have OIDs, while if two tupdescs have the same named composite type OID then, a fortiori, they have the same tdhasoid setting. If we're removing the type OID match check then we'd better include tdhasoid match as part of the physical compatibility check. Without that hack in tupconvert.c, we need ExecEvalConvertRowtype to take responsibility for inserting the correct rowtype OID label whenever tupconvert.c decides it need not do anything. This is easily done with heap_copy_tuple_as_datum, which will be considerably faster than a tuple disassembly and reassembly anyway; so from a performance standpoint this change is a win all around compared to what happened in earlier branches. It just means a couple more lines of code in ExecEvalConvertRowtype. Ashutosh Bapat and Tom Lane Discussion: https://postgr.es/m/CAFjFpRfvHABV6+oVvGcshF8rHn+1LfRUhj7Jz1CDZ4gPUwehBg@mail.gmail.com
2017-04-07 03:10:09 +02:00
* Check to see if the map is one-to-one, in which case we need not do a
* tuple conversion. We must also insist that both tupdescs either
* specify or don't specify an OID column, else we need a conversion to
* add/remove space for that. (For some callers, presence or absence of
* an OID column perhaps would not really matter, but let's be safe.)
*/
if (indesc->natts == outdesc->natts &&
Clean up after insufficiently-researched optimization of tuple conversions. tupconvert.c's functions formerly considered that an explicit tuple conversion was necessary if the input and output tupdescs contained different type OIDs. The point of that was to make sure that a composite datum resulting from the conversion would contain the destination rowtype OID in its composite-datum header. However, commit 3838074f8 entirely misunderstood what that check was for, thinking that it had something to do with presence or absence of an OID column within the tuple. Removal of the check broke the no-op conversion path in ExecEvalConvertRowtype, as reported by Ashutosh Bapat. It turns out that of the dozen or so call sites for tupconvert.c functions, ExecEvalConvertRowtype is the only one that cares about the composite-datum header fields in the output tuple. In all the rest, we'd much rather avoid an unnecessary conversion whenever the tuples are physically compatible. Moreover, the comments in tupconvert.c only promise physical compatibility not a metadata match. So, let's accept the removal of the guarantee about the output tuple's rowtype marking, recognizing that this is a API change that could conceivably break third-party callers of tupconvert.c. (So, let's remember to mention it in the v10 release notes.) However, commit 3838074f8 did have a bit of a point here, in that two tuples mustn't be considered physically compatible if one has HEAP_HASOID set and the other doesn't. (Some of the callers of tupconvert.c might not really care about that, but we can't assume it in general.) The previous check accidentally covered that issue, because no RECORD types ever have OIDs, while if two tupdescs have the same named composite type OID then, a fortiori, they have the same tdhasoid setting. If we're removing the type OID match check then we'd better include tdhasoid match as part of the physical compatibility check. Without that hack in tupconvert.c, we need ExecEvalConvertRowtype to take responsibility for inserting the correct rowtype OID label whenever tupconvert.c decides it need not do anything. This is easily done with heap_copy_tuple_as_datum, which will be considerably faster than a tuple disassembly and reassembly anyway; so from a performance standpoint this change is a win all around compared to what happened in earlier branches. It just means a couple more lines of code in ExecEvalConvertRowtype. Ashutosh Bapat and Tom Lane Discussion: https://postgr.es/m/CAFjFpRfvHABV6+oVvGcshF8rHn+1LfRUhj7Jz1CDZ4gPUwehBg@mail.gmail.com
2017-04-07 03:10:09 +02:00
indesc->tdhasoid == outdesc->tdhasoid)
{
same = true;
for (i = 0; i < n; i++)
{
2010-02-26 03:01:40 +01:00
if (attrMap[i] == (i + 1))
continue;
/*
2010-02-26 03:01:40 +01:00
* If it's a dropped column and the corresponding input column is
* also dropped, we needn't convert. However, attlen and attalign
* must agree.
*/
if (attrMap[i] == 0 &&
indesc->attrs[i]->attisdropped &&
indesc->attrs[i]->attlen == outdesc->attrs[i]->attlen &&
indesc->attrs[i]->attalign == outdesc->attrs[i]->attalign)
continue;
same = false;
break;
}
}
else
same = false;
if (same)
{
/* Runtime conversion is not needed */
pfree(attrMap);
return NULL;
}
/* Prepare the map structure */
map = (TupleConversionMap *) palloc(sizeof(TupleConversionMap));
map->indesc = indesc;
map->outdesc = outdesc;
map->attrMap = attrMap;
/* preallocate workspace for Datum arrays */
map->outvalues = (Datum *) palloc(n * sizeof(Datum));
map->outisnull = (bool *) palloc(n * sizeof(bool));
2010-02-26 03:01:40 +01:00
n = indesc->natts + 1; /* +1 for NULL */
map->invalues = (Datum *) palloc(n * sizeof(Datum));
map->inisnull = (bool *) palloc(n * sizeof(bool));
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
map->invalues[0] = (Datum) 0; /* set up the NULL entry */
map->inisnull[0] = true;
return map;
}
/*
* Return a palloc'd bare attribute map for tuple conversion, matching input
* and output columns by name. (Dropped columns are ignored in both input and
* output.) This is normally a subroutine for convert_tuples_by_name, but can
* be used standalone.
*/
AttrNumber *
convert_tuples_by_name_map(TupleDesc indesc,
TupleDesc outdesc,
const char *msg)
{
AttrNumber *attrMap;
int n;
int i;
n = outdesc->natts;
attrMap = (AttrNumber *) palloc0(n * sizeof(AttrNumber));
for (i = 0; i < n; i++)
{
Form_pg_attribute att = outdesc->attrs[i];
char *attname;
Oid atttypid;
int32 atttypmod;
int j;
if (att->attisdropped)
continue; /* attrMap[i] is already 0 */
attname = NameStr(att->attname);
atttypid = att->atttypid;
atttypmod = att->atttypmod;
for (j = 0; j < indesc->natts; j++)
{
att = indesc->attrs[j];
if (att->attisdropped)
continue;
if (strcmp(attname, NameStr(att->attname)) == 0)
{
/* Found it, check type */
if (atttypid != att->atttypid || atttypmod != att->atttypmod)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg_internal("%s", _(msg)),
errdetail("Attribute \"%s\" of type %s does not match corresponding attribute of type %s.",
attname,
format_type_be(outdesc->tdtypeid),
format_type_be(indesc->tdtypeid))));
attrMap[i] = (AttrNumber) (j + 1);
break;
}
}
if (attrMap[i] == 0)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg_internal("%s", _(msg)),
errdetail("Attribute \"%s\" of type %s does not exist in type %s.",
attname,
format_type_be(outdesc->tdtypeid),
format_type_be(indesc->tdtypeid))));
}
return attrMap;
}
/*
* Perform conversion of a tuple according to the map.
*/
HeapTuple
do_convert_tuple(HeapTuple tuple, TupleConversionMap *map)
{
AttrNumber *attrMap = map->attrMap;
Datum *invalues = map->invalues;
bool *inisnull = map->inisnull;
Datum *outvalues = map->outvalues;
bool *outisnull = map->outisnull;
int outnatts = map->outdesc->natts;
int i;
/*
* Extract all the values of the old tuple, offsetting the arrays so that
* invalues[0] is left NULL and invalues[1] is the first source attribute;
* this exactly matches the numbering convention in attrMap.
*/
heap_deform_tuple(tuple, map->indesc, invalues + 1, inisnull + 1);
/*
* Transpose into proper fields of the new tuple.
*/
for (i = 0; i < outnatts; i++)
{
int j = attrMap[i];
outvalues[i] = invalues[j];
outisnull[i] = inisnull[j];
}
/*
* Now form the new tuple.
*/
return heap_form_tuple(map->outdesc, outvalues, outisnull);
}
/*
* Free a TupleConversionMap structure.
*/
void
free_conversion_map(TupleConversionMap *map)
{
/* indesc and outdesc are not ours to free */
pfree(map->attrMap);
pfree(map->invalues);
pfree(map->inisnull);
pfree(map->outvalues);
pfree(map->outisnull);
pfree(map);
}