/*------------------------------------------------------------------------- * * sequence.c * PostgreSQL sequences support code. * * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.171 2010/08/18 18:35:19 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/heapam.h" #include "access/transam.h" #include "access/xact.h" #include "access/xlogutils.h" #include "catalog/dependency.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" #include "commands/defrem.h" #include "commands/sequence.h" #include "commands/tablecmds.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "storage/proc.h" #include "storage/smgr.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/resowner.h" #include "utils/syscache.h" /* * We don't want to log each fetching of a value from a sequence, * so we pre-log a few fetches in advance. In the event of * crash we can lose as much as we pre-logged. */ #define SEQ_LOG_VALS 32 /* * The "special area" of a sequence's buffer page looks like this. */ #define SEQ_MAGIC 0x1717 typedef struct sequence_magic { uint32 magic; } sequence_magic; /* * We store a SeqTable item for every sequence we have touched in the current * session. This is needed to hold onto nextval/currval state. (We can't * rely on the relcache, since it's only, well, a cache, and may decide to * discard entries.) * * XXX We use linear search to find pre-existing SeqTable entries. This is * good when only a small number of sequences are touched in a session, but * would suck with many different sequences. Perhaps use a hashtable someday. */ typedef struct SeqTableData { struct SeqTableData *next; /* link to next SeqTable object */ Oid relid; /* pg_class OID of this sequence */ LocalTransactionId lxid; /* xact in which we last did a seq op */ bool last_valid; /* do we have a valid "last" value? */ int64 last; /* value last returned by nextval */ int64 cached; /* last value already cached for nextval */ /* if last != cached, we have not used up all the cached values */ int64 increment; /* copy of sequence's increment field */ /* note that increment is zero until we first do read_info() */ } SeqTableData; typedef SeqTableData *SeqTable; static SeqTable seqtab = NULL; /* Head of list of SeqTable items */ /* * last_used_seq is updated by nextval() to point to the last used * sequence. */ static SeqTableData *last_used_seq = NULL; static int64 nextval_internal(Oid relid); static Relation open_share_lock(SeqTable seq); static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel); static Form_pg_sequence read_info(SeqTable elm, Relation rel, Buffer *buf); static void init_params(List *options, bool isInit, Form_pg_sequence new, List **owned_by); static void do_setval(Oid relid, int64 next, bool iscalled); static void process_owned_by(Relation seqrel, List *owned_by); /* * DefineSequence * Creates a new sequence relation */ void DefineSequence(CreateSeqStmt *seq) { FormData_pg_sequence new; List *owned_by; CreateStmt *stmt = makeNode(CreateStmt); Oid seqoid; Relation rel; Buffer buf; Page page; sequence_magic *sm; HeapTuple tuple; TupleDesc tupDesc; Datum value[SEQ_COL_LASTCOL]; bool null[SEQ_COL_LASTCOL]; int i; NameData name; /* Check and set all option values */ init_params(seq->options, true, &new, &owned_by); /* * Create relation (and fill value[] and null[] for the tuple) */ stmt->tableElts = NIL; for (i = SEQ_COL_FIRSTCOL; i <= SEQ_COL_LASTCOL; i++) { ColumnDef *coldef = makeNode(ColumnDef); coldef->inhcount = 0; coldef->is_local = true; coldef->is_not_null = true; coldef->storage = 0; coldef->raw_default = NULL; coldef->cooked_default = NULL; coldef->constraints = NIL; null[i - 1] = false; switch (i) { case SEQ_COL_NAME: coldef->typeName = makeTypeNameFromOid(NAMEOID, -1); coldef->colname = "sequence_name"; namestrcpy(&name, seq->sequence->relname); value[i - 1] = NameGetDatum(&name); break; case SEQ_COL_LASTVAL: coldef->typeName = makeTypeNameFromOid(INT8OID, -1); coldef->colname = "last_value"; value[i - 1] = Int64GetDatumFast(new.last_value); break; case SEQ_COL_STARTVAL: coldef->typeName = makeTypeNameFromOid(INT8OID, -1); coldef->colname = "start_value"; value[i - 1] = Int64GetDatumFast(new.start_value); break; case SEQ_COL_INCBY: coldef->typeName = makeTypeNameFromOid(INT8OID, -1); coldef->colname = "increment_by"; value[i - 1] = Int64GetDatumFast(new.increment_by); break; case SEQ_COL_MAXVALUE: coldef->typeName = makeTypeNameFromOid(INT8OID, -1); coldef->colname = "max_value"; value[i - 1] = Int64GetDatumFast(new.max_value); break; case SEQ_COL_MINVALUE: coldef->typeName = makeTypeNameFromOid(INT8OID, -1); coldef->colname = "min_value"; value[i - 1] = Int64GetDatumFast(new.min_value); break; case SEQ_COL_CACHE: coldef->typeName = makeTypeNameFromOid(INT8OID, -1); coldef->colname = "cache_value"; value[i - 1] = Int64GetDatumFast(new.cache_value); break; case SEQ_COL_LOG: coldef->typeName = makeTypeNameFromOid(INT8OID, -1); coldef->colname = "log_cnt"; value[i - 1] = Int64GetDatum((int64) 1); break; case SEQ_COL_CYCLE: coldef->typeName = makeTypeNameFromOid(BOOLOID, -1); coldef->colname = "is_cycled"; value[i - 1] = BoolGetDatum(new.is_cycled); break; case SEQ_COL_CALLED: coldef->typeName = makeTypeNameFromOid(BOOLOID, -1); coldef->colname = "is_called"; value[i - 1] = BoolGetDatum(false); break; } stmt->tableElts = lappend(stmt->tableElts, coldef); } stmt->relation = seq->sequence; stmt->inhRelations = NIL; stmt->constraints = NIL; stmt->options = list_make1(defWithOids(false)); stmt->oncommit = ONCOMMIT_NOOP; stmt->tablespacename = NULL; stmt->if_not_exists = false; seqoid = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId); Assert(seqoid != InvalidOid); rel = heap_open(seqoid, AccessExclusiveLock); tupDesc = RelationGetDescr(rel); /* Initialize first page of relation with special magic number */ buf = ReadBuffer(rel, P_NEW); Assert(BufferGetBlockNumber(buf) == 0); page = BufferGetPage(buf); PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic)); sm = (sequence_magic *) PageGetSpecialPointer(page); sm->magic = SEQ_MAGIC; /* hack: ensure heap_insert will insert on the just-created page */ RelationSetTargetBlock(rel, 0); /* Now form & insert sequence tuple */ tuple = heap_form_tuple(tupDesc, value, null); simple_heap_insert(rel, tuple); Assert(ItemPointerGetOffsetNumber(&(tuple->t_self)) == FirstOffsetNumber); /* * Two special hacks here: * * 1. Since VACUUM does not process sequences, we have to force the tuple * to have xmin = FrozenTransactionId now. Otherwise it would become * invisible to SELECTs after 2G transactions. It is okay to do this * because if the current transaction aborts, no other xact will ever * examine the sequence tuple anyway. * * 2. Even though heap_insert emitted a WAL log record, we have to emit an * XLOG_SEQ_LOG record too, since (a) the heap_insert record will not have * the right xmin, and (b) REDO of the heap_insert record would re-init * page and sequence magic number would be lost. This means two log * records instead of one :-( */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); START_CRIT_SECTION(); { /* * Note that the "tuple" structure is still just a local tuple record * created by heap_form_tuple; its t_data pointer doesn't point at the * disk buffer. To scribble on the disk buffer we need to fetch the * item pointer. But do the same to the local tuple, since that will * be the source for the WAL log record, below. */ ItemId itemId; Item item; itemId = PageGetItemId((Page) page, FirstOffsetNumber); item = PageGetItem((Page) page, itemId); HeapTupleHeaderSetXmin((HeapTupleHeader) item, FrozenTransactionId); ((HeapTupleHeader) item)->t_infomask |= HEAP_XMIN_COMMITTED; HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); tuple->t_data->t_infomask |= HEAP_XMIN_COMMITTED; } MarkBufferDirty(buf); /* XLOG stuff */ if (!rel->rd_istemp) { xl_seq_rec xlrec; XLogRecPtr recptr; XLogRecData rdata[2]; Form_pg_sequence newseq = (Form_pg_sequence) GETSTRUCT(tuple); /* We do not log first nextval call, so "advance" sequence here */ /* Note we are scribbling on local tuple, not the disk buffer */ newseq->is_called = true; newseq->log_cnt = 0; xlrec.node = rel->rd_node; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_seq_rec); rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); rdata[1].data = (char *) tuple->t_data; rdata[1].len = tuple->t_len; rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } END_CRIT_SECTION(); UnlockReleaseBuffer(buf); /* process OWNED BY if given */ if (owned_by) process_owned_by(rel, owned_by); heap_close(rel, NoLock); } /* * AlterSequence * * Modify the definition of a sequence relation */ void AlterSequence(AlterSeqStmt *stmt) { Oid relid; /* find sequence */ relid = RangeVarGetRelid(stmt->sequence, false); /* allow ALTER to sequence owner only */ /* if you change this, see also callers of AlterSequenceInternal! */ if (!pg_class_ownercheck(relid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, stmt->sequence->relname); /* do the work */ AlterSequenceInternal(relid, stmt->options); } /* * AlterSequenceInternal * * Same as AlterSequence except that the sequence is specified by OID * and we assume the caller already checked permissions. */ void AlterSequenceInternal(Oid relid, List *options) { SeqTable elm; Relation seqrel; Buffer buf; Page page; Form_pg_sequence seq; FormData_pg_sequence new; List *owned_by; /* open and AccessShareLock sequence */ init_sequence(relid, &elm, &seqrel); /* lock page' buffer and read tuple into new sequence structure */ seq = read_info(elm, seqrel, &buf); page = BufferGetPage(buf); /* Copy old values of options into workspace */ memcpy(&new, seq, sizeof(FormData_pg_sequence)); /* Check and set new values */ init_params(options, false, &new, &owned_by); /* Clear local cache so that we don't think we have cached numbers */ /* Note that we do not change the currval() state */ elm->cached = elm->last; /* Now okay to update the on-disk tuple */ memcpy(seq, &new, sizeof(FormData_pg_sequence)); START_CRIT_SECTION(); MarkBufferDirty(buf); /* XLOG stuff */ if (!seqrel->rd_istemp) { xl_seq_rec xlrec; XLogRecPtr recptr; XLogRecData rdata[2]; xlrec.node = seqrel->rd_node; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_seq_rec); rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper; rdata[1].len = ((PageHeader) page)->pd_special - ((PageHeader) page)->pd_upper; rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } END_CRIT_SECTION(); UnlockReleaseBuffer(buf); /* process OWNED BY if given */ if (owned_by) process_owned_by(seqrel, owned_by); relation_close(seqrel, NoLock); } /* * Note: nextval with a text argument is no longer exported as a pg_proc * entry, but we keep it around to ease porting of C code that may have * called the function directly. */ Datum nextval(PG_FUNCTION_ARGS) { text *seqin = PG_GETARG_TEXT_P(0); RangeVar *sequence; Oid relid; sequence = makeRangeVarFromNameList(textToQualifiedNameList(seqin)); relid = RangeVarGetRelid(sequence, false); PG_RETURN_INT64(nextval_internal(relid)); } Datum nextval_oid(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); PG_RETURN_INT64(nextval_internal(relid)); } static int64 nextval_internal(Oid relid) { SeqTable elm; Relation seqrel; Buffer buf; Page page; Form_pg_sequence seq; int64 incby, maxv, minv, cache, log, fetch, last; int64 result, next, rescnt = 0; bool logit = false; /* open and AccessShareLock sequence */ init_sequence(relid, &elm, &seqrel); if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_USAGE) != ACLCHECK_OK && pg_class_aclcheck(elm->relid, GetUserId(), ACL_UPDATE) != ACLCHECK_OK) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied for sequence %s", RelationGetRelationName(seqrel)))); /* read-only transactions may only modify temp sequences */ if (seqrel->rd_backend != MyBackendId) PreventCommandIfReadOnly("nextval()"); if (elm->last != elm->cached) /* some numbers were cached */ { Assert(elm->last_valid); Assert(elm->increment != 0); elm->last += elm->increment; relation_close(seqrel, NoLock); last_used_seq = elm; return elm->last; } /* lock page' buffer and read tuple */ seq = read_info(elm, seqrel, &buf); page = BufferGetPage(buf); last = next = result = seq->last_value; incby = seq->increment_by; maxv = seq->max_value; minv = seq->min_value; fetch = cache = seq->cache_value; log = seq->log_cnt; if (!seq->is_called) { rescnt++; /* last_value if not called */ fetch--; log--; } /* * Decide whether we should emit a WAL log record. If so, force up the * fetch count to grab SEQ_LOG_VALS more values than we actually need to * cache. (These will then be usable without logging.) * * If this is the first nextval after a checkpoint, we must force a new * WAL record to be written anyway, else replay starting from the * checkpoint would fail to advance the sequence past the logged values. * In this case we may as well fetch extra values. */ if (log < fetch) { /* forced log to satisfy local demand for values */ fetch = log = fetch + SEQ_LOG_VALS; logit = true; } else { XLogRecPtr redoptr = GetRedoRecPtr(); if (XLByteLE(PageGetLSN(page), redoptr)) { /* last update of seq was before checkpoint */ fetch = log = fetch + SEQ_LOG_VALS; logit = true; } } while (fetch) /* try to fetch cache [+ log ] numbers */ { /* * Check MAXVALUE for ascending sequences and MINVALUE for descending * sequences */ if (incby > 0) { /* ascending sequence */ if ((maxv >= 0 && next > maxv - incby) || (maxv < 0 && next + incby > maxv)) { if (rescnt > 0) break; /* stop fetching */ if (!seq->is_cycled) { char buf[100]; snprintf(buf, sizeof(buf), INT64_FORMAT, maxv); ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("nextval: reached maximum value of sequence \"%s\" (%s)", RelationGetRelationName(seqrel), buf))); } next = minv; } else next += incby; } else { /* descending sequence */ if ((minv < 0 && next < minv - incby) || (minv >= 0 && next + incby < minv)) { if (rescnt > 0) break; /* stop fetching */ if (!seq->is_cycled) { char buf[100]; snprintf(buf, sizeof(buf), INT64_FORMAT, minv); ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("nextval: reached minimum value of sequence \"%s\" (%s)", RelationGetRelationName(seqrel), buf))); } next = maxv; } else next += incby; } fetch--; if (rescnt < cache) { log--; rescnt++; last = next; if (rescnt == 1) /* if it's first result - */ result = next; /* it's what to return */ } } log -= fetch; /* adjust for any unfetched numbers */ Assert(log >= 0); /* save info in local cache */ elm->last = result; /* last returned number */ elm->cached = last; /* last fetched number */ elm->last_valid = true; last_used_seq = elm; START_CRIT_SECTION(); MarkBufferDirty(buf); /* XLOG stuff */ if (logit && !seqrel->rd_istemp) { xl_seq_rec xlrec; XLogRecPtr recptr; XLogRecData rdata[2]; xlrec.node = seqrel->rd_node; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_seq_rec); rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); /* set values that will be saved in xlog */ seq->last_value = next; seq->is_called = true; seq->log_cnt = 0; rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper; rdata[1].len = ((PageHeader) page)->pd_special - ((PageHeader) page)->pd_upper; rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } /* update on-disk data */ seq->last_value = last; /* last fetched number */ seq->is_called = true; seq->log_cnt = log; /* how much is logged */ END_CRIT_SECTION(); UnlockReleaseBuffer(buf); relation_close(seqrel, NoLock); return result; } Datum currval_oid(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); int64 result; SeqTable elm; Relation seqrel; /* open and AccessShareLock sequence */ init_sequence(relid, &elm, &seqrel); if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_SELECT) != ACLCHECK_OK && pg_class_aclcheck(elm->relid, GetUserId(), ACL_USAGE) != ACLCHECK_OK) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied for sequence %s", RelationGetRelationName(seqrel)))); if (!elm->last_valid) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("currval of sequence \"%s\" is not yet defined in this session", RelationGetRelationName(seqrel)))); result = elm->last; relation_close(seqrel, NoLock); PG_RETURN_INT64(result); } Datum lastval(PG_FUNCTION_ARGS) { Relation seqrel; int64 result; if (last_used_seq == NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("lastval is not yet defined in this session"))); /* Someone may have dropped the sequence since the last nextval() */ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(last_used_seq->relid))) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("lastval is not yet defined in this session"))); seqrel = open_share_lock(last_used_seq); /* nextval() must have already been called for this sequence */ Assert(last_used_seq->last_valid); if (pg_class_aclcheck(last_used_seq->relid, GetUserId(), ACL_SELECT) != ACLCHECK_OK && pg_class_aclcheck(last_used_seq->relid, GetUserId(), ACL_USAGE) != ACLCHECK_OK) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied for sequence %s", RelationGetRelationName(seqrel)))); result = last_used_seq->last; relation_close(seqrel, NoLock); PG_RETURN_INT64(result); } /* * Main internal procedure that handles 2 & 3 arg forms of SETVAL. * * Note that the 3 arg version (which sets the is_called flag) is * only for use in pg_dump, and setting the is_called flag may not * work if multiple users are attached to the database and referencing * the sequence (unlikely if pg_dump is restoring it). * * It is necessary to have the 3 arg version so that pg_dump can * restore the state of a sequence exactly during data-only restores - * it is the only way to clear the is_called flag in an existing * sequence. */ static void do_setval(Oid relid, int64 next, bool iscalled) { SeqTable elm; Relation seqrel; Buffer buf; Form_pg_sequence seq; /* open and AccessShareLock sequence */ init_sequence(relid, &elm, &seqrel); if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_UPDATE) != ACLCHECK_OK) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied for sequence %s", RelationGetRelationName(seqrel)))); /* read-only transactions may only modify temp sequences */ if (seqrel->rd_backend != MyBackendId) PreventCommandIfReadOnly("setval()"); /* lock page' buffer and read tuple */ seq = read_info(elm, seqrel, &buf); if ((next < seq->min_value) || (next > seq->max_value)) { char bufv[100], bufm[100], bufx[100]; snprintf(bufv, sizeof(bufv), INT64_FORMAT, next); snprintf(bufm, sizeof(bufm), INT64_FORMAT, seq->min_value); snprintf(bufx, sizeof(bufx), INT64_FORMAT, seq->max_value); ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("setval: value %s is out of bounds for sequence \"%s\" (%s..%s)", bufv, RelationGetRelationName(seqrel), bufm, bufx))); } /* Set the currval() state only if iscalled = true */ if (iscalled) { elm->last = next; /* last returned number */ elm->last_valid = true; } /* In any case, forget any future cached numbers */ elm->cached = elm->last; START_CRIT_SECTION(); MarkBufferDirty(buf); /* XLOG stuff */ if (!seqrel->rd_istemp) { xl_seq_rec xlrec; XLogRecPtr recptr; XLogRecData rdata[2]; Page page = BufferGetPage(buf); xlrec.node = seqrel->rd_node; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_seq_rec); rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); /* set values that will be saved in xlog */ seq->last_value = next; seq->is_called = true; seq->log_cnt = 0; rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper; rdata[1].len = ((PageHeader) page)->pd_special - ((PageHeader) page)->pd_upper; rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } /* save info in sequence relation */ seq->last_value = next; /* last fetched number */ seq->is_called = iscalled; seq->log_cnt = (iscalled) ? 0 : 1; END_CRIT_SECTION(); UnlockReleaseBuffer(buf); relation_close(seqrel, NoLock); } /* * Implement the 2 arg setval procedure. * See do_setval for discussion. */ Datum setval_oid(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); int64 next = PG_GETARG_INT64(1); do_setval(relid, next, true); PG_RETURN_INT64(next); } /* * Implement the 3 arg setval procedure. * See do_setval for discussion. */ Datum setval3_oid(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); int64 next = PG_GETARG_INT64(1); bool iscalled = PG_GETARG_BOOL(2); do_setval(relid, next, iscalled); PG_RETURN_INT64(next); } /* * Open the sequence and acquire AccessShareLock if needed * * If we haven't touched the sequence already in this transaction, * we need to acquire AccessShareLock. We arrange for the lock to * be owned by the top transaction, so that we don't need to do it * more than once per xact. */ static Relation open_share_lock(SeqTable seq) { LocalTransactionId thislxid = MyProc->lxid; /* Get the lock if not already held in this xact */ if (seq->lxid != thislxid) { ResourceOwner currentOwner; currentOwner = CurrentResourceOwner; PG_TRY(); { CurrentResourceOwner = TopTransactionResourceOwner; LockRelationOid(seq->relid, AccessShareLock); } PG_CATCH(); { /* Ensure CurrentResourceOwner is restored on error */ CurrentResourceOwner = currentOwner; PG_RE_THROW(); } PG_END_TRY(); CurrentResourceOwner = currentOwner; /* Flag that we have a lock in the current xact */ seq->lxid = thislxid; } /* We now know we have AccessShareLock, and can safely open the rel */ return relation_open(seq->relid, NoLock); } /* * Given a relation OID, open and lock the sequence. p_elm and p_rel are * output parameters. */ static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel) { SeqTable elm; Relation seqrel; /* Look to see if we already have a seqtable entry for relation */ for (elm = seqtab; elm != NULL; elm = elm->next) { if (elm->relid == relid) break; } /* * Allocate new seqtable entry if we didn't find one. * * NOTE: seqtable entries remain in the list for the life of a backend. If * the sequence itself is deleted then the entry becomes wasted memory, * but it's small enough that this should not matter. */ if (elm == NULL) { /* * Time to make a new seqtable entry. These entries live as long as * the backend does, so we use plain malloc for them. */ elm = (SeqTable) malloc(sizeof(SeqTableData)); if (elm == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); elm->relid = relid; elm->lxid = InvalidLocalTransactionId; elm->last_valid = false; elm->last = elm->cached = elm->increment = 0; elm->next = seqtab; seqtab = elm; } /* * Open the sequence relation. */ seqrel = open_share_lock(elm); if (seqrel->rd_rel->relkind != RELKIND_SEQUENCE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a sequence", RelationGetRelationName(seqrel)))); *p_elm = elm; *p_rel = seqrel; } /* Given an opened relation, lock the page buffer and find the tuple */ static Form_pg_sequence read_info(SeqTable elm, Relation rel, Buffer *buf) { Page page; ItemId lp; HeapTupleData tuple; sequence_magic *sm; Form_pg_sequence seq; *buf = ReadBuffer(rel, 0); LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(*buf); sm = (sequence_magic *) PageGetSpecialPointer(page); if (sm->magic != SEQ_MAGIC) elog(ERROR, "bad magic number in sequence \"%s\": %08X", RelationGetRelationName(rel), sm->magic); lp = PageGetItemId(page, FirstOffsetNumber); Assert(ItemIdIsNormal(lp)); tuple.t_data = (HeapTupleHeader) PageGetItem(page, lp); seq = (Form_pg_sequence) GETSTRUCT(&tuple); /* this is a handy place to update our copy of the increment */ elm->increment = seq->increment_by; return seq; } /* * init_params: process the options list of CREATE or ALTER SEQUENCE, * and store the values into appropriate fields of *new. Also set * *owned_by to any OWNED BY option, or to NIL if there is none. * * If isInit is true, fill any unspecified options with default values; * otherwise, do not change existing options that aren't explicitly overridden. */ static void init_params(List *options, bool isInit, Form_pg_sequence new, List **owned_by) { DefElem *start_value = NULL; DefElem *restart_value = NULL; DefElem *increment_by = NULL; DefElem *max_value = NULL; DefElem *min_value = NULL; DefElem *cache_value = NULL; DefElem *is_cycled = NULL; ListCell *option; *owned_by = NIL; foreach(option, options) { DefElem *defel = (DefElem *) lfirst(option); if (strcmp(defel->defname, "increment") == 0) { if (increment_by) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); increment_by = defel; } else if (strcmp(defel->defname, "start") == 0) { if (start_value) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); start_value = defel; } else if (strcmp(defel->defname, "restart") == 0) { if (restart_value) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); restart_value = defel; } else if (strcmp(defel->defname, "maxvalue") == 0) { if (max_value) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); max_value = defel; } else if (strcmp(defel->defname, "minvalue") == 0) { if (min_value) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); min_value = defel; } else if (strcmp(defel->defname, "cache") == 0) { if (cache_value) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); cache_value = defel; } else if (strcmp(defel->defname, "cycle") == 0) { if (is_cycled) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); is_cycled = defel; } else if (strcmp(defel->defname, "owned_by") == 0) { if (*owned_by) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); *owned_by = defGetQualifiedName(defel); } else elog(ERROR, "option \"%s\" not recognized", defel->defname); } /* INCREMENT BY */ if (increment_by != NULL) { new->increment_by = defGetInt64(increment_by); if (new->increment_by == 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("INCREMENT must not be zero"))); } else if (isInit) new->increment_by = 1; /* CYCLE */ if (is_cycled != NULL) { new->is_cycled = intVal(is_cycled->arg); Assert(new->is_cycled == false || new->is_cycled == true); } else if (isInit) new->is_cycled = false; /* MAXVALUE (null arg means NO MAXVALUE) */ if (max_value != NULL && max_value->arg) new->max_value = defGetInt64(max_value); else if (isInit || max_value != NULL) { if (new->increment_by > 0) new->max_value = SEQ_MAXVALUE; /* ascending seq */ else new->max_value = -1; /* descending seq */ } /* MINVALUE (null arg means NO MINVALUE) */ if (min_value != NULL && min_value->arg) new->min_value = defGetInt64(min_value); else if (isInit || min_value != NULL) { if (new->increment_by > 0) new->min_value = 1; /* ascending seq */ else new->min_value = SEQ_MINVALUE; /* descending seq */ } /* crosscheck min/max */ if (new->min_value >= new->max_value) { char bufm[100], bufx[100]; snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->min_value); snprintf(bufx, sizeof(bufx), INT64_FORMAT, new->max_value); ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("MINVALUE (%s) must be less than MAXVALUE (%s)", bufm, bufx))); } /* START WITH */ if (start_value != NULL) new->start_value = defGetInt64(start_value); else if (isInit) { if (new->increment_by > 0) new->start_value = new->min_value; /* ascending seq */ else new->start_value = new->max_value; /* descending seq */ } /* crosscheck START */ if (new->start_value < new->min_value) { char bufs[100], bufm[100]; snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->start_value); snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->min_value); ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("START value (%s) cannot be less than MINVALUE (%s)", bufs, bufm))); } if (new->start_value > new->max_value) { char bufs[100], bufm[100]; snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->start_value); snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->max_value); ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("START value (%s) cannot be greater than MAXVALUE (%s)", bufs, bufm))); } /* RESTART [WITH] */ if (restart_value != NULL) { if (restart_value->arg != NULL) new->last_value = defGetInt64(restart_value); else new->last_value = new->start_value; new->is_called = false; new->log_cnt = 1; } else if (isInit) { new->last_value = new->start_value; new->is_called = false; new->log_cnt = 1; } /* crosscheck RESTART (or current value, if changing MIN/MAX) */ if (new->last_value < new->min_value) { char bufs[100], bufm[100]; snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->last_value); snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->min_value); ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("RESTART value (%s) cannot be less than MINVALUE (%s)", bufs, bufm))); } if (new->last_value > new->max_value) { char bufs[100], bufm[100]; snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->last_value); snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->max_value); ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("RESTART value (%s) cannot be greater than MAXVALUE (%s)", bufs, bufm))); } /* CACHE */ if (cache_value != NULL) { new->cache_value = defGetInt64(cache_value); if (new->cache_value <= 0) { char buf[100]; snprintf(buf, sizeof(buf), INT64_FORMAT, new->cache_value); ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("CACHE (%s) must be greater than zero", buf))); } } else if (isInit) new->cache_value = 1; } /* * Process an OWNED BY option for CREATE/ALTER SEQUENCE * * Ownership permissions on the sequence are already checked, * but if we are establishing a new owned-by dependency, we must * enforce that the referenced table has the same owner and namespace * as the sequence. */ static void process_owned_by(Relation seqrel, List *owned_by) { int nnames; Relation tablerel; AttrNumber attnum; nnames = list_length(owned_by); Assert(nnames > 0); if (nnames == 1) { /* Must be OWNED BY NONE */ if (strcmp(strVal(linitial(owned_by)), "none") != 0) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("invalid OWNED BY option"), errhint("Specify OWNED BY table.column or OWNED BY NONE."))); tablerel = NULL; attnum = 0; } else { List *relname; char *attrname; RangeVar *rel; /* Separate relname and attr name */ relname = list_truncate(list_copy(owned_by), nnames - 1); attrname = strVal(lfirst(list_tail(owned_by))); /* Open and lock rel to ensure it won't go away meanwhile */ rel = makeRangeVarFromNameList(relname); tablerel = relation_openrv(rel, AccessShareLock); /* Must be a regular table */ if (tablerel->rd_rel->relkind != RELKIND_RELATION) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("referenced relation \"%s\" is not a table", RelationGetRelationName(tablerel)))); /* We insist on same owner and schema */ if (seqrel->rd_rel->relowner != tablerel->rd_rel->relowner) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("sequence must have same owner as table it is linked to"))); if (RelationGetNamespace(seqrel) != RelationGetNamespace(tablerel)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("sequence must be in same schema as table it is linked to"))); /* Now, fetch the attribute number from the system cache */ attnum = get_attnum(RelationGetRelid(tablerel), attrname); if (attnum == InvalidAttrNumber) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("column \"%s\" of relation \"%s\" does not exist", attrname, RelationGetRelationName(tablerel)))); } /* * OK, we are ready to update pg_depend. First remove any existing AUTO * dependencies for the sequence, then optionally add a new one. */ markSequenceUnowned(RelationGetRelid(seqrel)); if (tablerel) { ObjectAddress refobject, depobject; refobject.classId = RelationRelationId; refobject.objectId = RelationGetRelid(tablerel); refobject.objectSubId = attnum; depobject.classId = RelationRelationId; depobject.objectId = RelationGetRelid(seqrel); depobject.objectSubId = 0; recordDependencyOn(&depobject, &refobject, DEPENDENCY_AUTO); } /* Done, but hold lock until commit */ if (tablerel) relation_close(tablerel, NoLock); } void seq_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; Buffer buffer; Page page; char *item; Size itemsz; xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record); sequence_magic *sm; /* Backup blocks are not used in seq records */ Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); if (info != XLOG_SEQ_LOG) elog(PANIC, "seq_redo: unknown op code %u", info); buffer = XLogReadBuffer(xlrec->node, 0, true); Assert(BufferIsValid(buffer)); page = (Page) BufferGetPage(buffer); /* Always reinit the page and reinstall the magic number */ /* See comments in DefineSequence */ PageInit((Page) page, BufferGetPageSize(buffer), sizeof(sequence_magic)); sm = (sequence_magic *) PageGetSpecialPointer(page); sm->magic = SEQ_MAGIC; item = (char *) xlrec + sizeof(xl_seq_rec); itemsz = record->xl_len - sizeof(xl_seq_rec); itemsz = MAXALIGN(itemsz); if (PageAddItem(page, (Item) item, itemsz, FirstOffsetNumber, false, false) == InvalidOffsetNumber) elog(PANIC, "seq_redo: failed to add item to page"); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); } void seq_desc(StringInfo buf, uint8 xl_info, char *rec) { uint8 info = xl_info & ~XLR_INFO_MASK; xl_seq_rec *xlrec = (xl_seq_rec *) rec; if (info == XLOG_SEQ_LOG) appendStringInfo(buf, "log: "); else { appendStringInfo(buf, "UNKNOWN"); return; } appendStringInfo(buf, "rel %u/%u/%u", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode); }