/*------------------------------------------------------------------------- * * logtape.c * Management of "logical tapes" within temporary files. * * This module exists to support sorting via multiple merge passes (see * tuplesort.c). Merging is an ideal algorithm for tape devices, but if * we implement it on disk by creating a separate file for each "tape", * there is an annoying problem: the peak space usage is at least twice * the volume of actual data to be sorted. (This must be so because each * datum will appear in both the input and output tapes of the final * merge pass. For seven-tape polyphase merge, which is otherwise a * pretty good algorithm, peak usage is more like 4x actual data volume.) * * We can work around this problem by recognizing that any one tape * dataset (with the possible exception of the final output) is written * and read exactly once in a perfectly sequential manner. Therefore, * a datum once read will not be required again, and we can recycle its * space for use by the new tape dataset(s) being generated. In this way, * the total space usage is essentially just the actual data volume, plus * insignificant bookkeeping and start/stop overhead. * * Few OSes allow arbitrary parts of a file to be released back to the OS, * so we have to implement this space-recycling ourselves within a single * logical file. logtape.c exists to perform this bookkeeping and provide * the illusion of N independent tape devices to tuplesort.c. Note that * logtape.c itself depends on buffile.c to provide a "logical file" of * larger size than the underlying OS may support. * * For simplicity, we allocate and release space in the underlying file * in BLCKSZ-size blocks. Space allocation boils down to keeping track * of which blocks in the underlying file belong to which logical tape, * plus any blocks that are free (recycled and not yet reused). Normally * there are not very many free blocks, so we just keep those in a list. * The blocks in each logical tape are remembered using a method borrowed * from the Unix HFS filesystem: we store data block numbers in an * "indirect block". If an indirect block fills up, we write it out to * the underlying file and remember its location in a second-level indirect * block. In the same way second-level blocks are remembered in third- * level blocks, and so on if necessary (of course we're talking huge * amounts of data here). The topmost indirect block of a given logical * tape is never actually written out to the physical file, but all lower- * level indirect blocks will be. * * The initial write pass is guaranteed to fill the underlying file * perfectly sequentially, no matter how data is divided into logical tapes. * Once we begin merge passes, the access pattern becomes considerably * less predictable --- but the seeking involved should be comparable to * what would happen if we kept each logical tape in a separate file, * so there's no serious performance penalty paid to obtain the space * savings of recycling. We try to localize the write accesses by always * writing to the lowest-numbered free block when we have a choice; it's * not clear this helps much, but it can't hurt. (XXX perhaps a LIFO * policy for free blocks would be better?) * * Since all the bookkeeping and buffer memory is allocated with palloc(), * and the underlying file(s) are made with OpenTemporaryFile, all resources * for a logical tape set are certain to be cleaned up even if processing * is aborted by elog(ERROR). To avoid confusion, the caller should take * care that all calls for a single LogicalTapeSet are made in the same * palloc context. * * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * $Header: /cvsroot/pgsql/src/backend/utils/sort/logtape.c,v 1.5 2000/04/12 17:16:11 momjian Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "storage/buffile.h" #include "utils/logtape.h" /* * Block indexes are "long"s, so we can fit this many per indirect block. * NB: we assume this is an exact fit! */ #define BLOCKS_PER_INDIR_BLOCK ((int) (BLCKSZ / sizeof(long))) /* * We use a struct like this for each active indirection level of each * logical tape. If the indirect block is not the highest level of its * tape, the "nextup" link points to the next higher level. Only the * "ptrs" array is written out if we have to dump the indirect block to * disk. If "ptrs" is not completely full, we store -1L in the first * unused slot at completion of the write phase for the logical tape. */ typedef struct IndirectBlock { int nextSlot; /* next pointer slot to write or read */ struct IndirectBlock *nextup; /* parent indirect level, or NULL * if top */ long ptrs[BLOCKS_PER_INDIR_BLOCK]; /* indexes of contained * blocks */ } IndirectBlock; /* * This data structure represents a single "logical tape" within the set * of logical tapes stored in the same file. We must keep track of the * current partially-read-or-written data block as well as the active * indirect block level(s). */ typedef struct LogicalTape { IndirectBlock *indirect; /* bottom of my indirect-block hierarchy */ bool writing; /* T while in write phase */ bool frozen; /* T if blocks should not be freed when * read */ bool dirty; /* does buffer need to be written? */ /* * The total data volume in the logical tape is numFullBlocks * BLCKSZ * + lastBlockBytes. BUT: we do not update lastBlockBytes during * writing, only at completion of a write phase. */ long numFullBlocks; /* number of complete blocks in log tape */ int lastBlockBytes; /* valid bytes in last (incomplete) block */ /* * Buffer for current data block. Note we don't bother to store the * actual file block number of the data block (during the write phase * it hasn't been assigned yet, and during read we don't care * anymore). But we do need the relative block number so we can detect * end-of-tape while reading. */ long curBlockNumber; /* this block's logical blk# within tape */ int pos; /* next read/write position in buffer */ int nbytes; /* total # of valid bytes in buffer */ char buffer[BLCKSZ]; } LogicalTape; /* * This data structure represents a set of related "logical tapes" sharing * space in a single underlying file. (But that "file" may be multiple files * if needed to escape OS limits on file size; buffile.c handles that for us.) * The number of tapes is fixed at creation. */ struct LogicalTapeSet { BufFile *pfile; /* underlying file for whole tape set */ long nFileBlocks; /* # of blocks used in underlying file */ /* * We store the numbers of recycled-and-available blocks in * freeBlocks[]. When there are no such blocks, we extend the * underlying file. Note that the block numbers in freeBlocks are * always in *decreasing* order, so that removing the last entry gives * us the lowest free block. */ long *freeBlocks; /* resizable array */ int nFreeBlocks; /* # of currently free blocks */ int freeBlocksLen; /* current allocated length of * freeBlocks[] */ /* * tapes[] is declared size 1 since C wants a fixed size, but actually * it is of length nTapes. */ int nTapes; /* # of logical tapes in set */ LogicalTape *tapes[1]; /* must be last in struct! */ }; static void ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer); static void ltsReadBlock(LogicalTapeSet *lts, long blocknum, void *buffer); static long ltsGetFreeBlock(LogicalTapeSet *lts); static void ltsReleaseBlock(LogicalTapeSet *lts, long blocknum); static void ltsRecordBlockNum(LogicalTapeSet *lts, IndirectBlock *indirect, long blocknum); static long ltsRewindIndirectBlock(LogicalTapeSet *lts, IndirectBlock *indirect, bool freezing); static long ltsRewindFrozenIndirectBlock(LogicalTapeSet *lts, IndirectBlock *indirect); static long ltsRecallNextBlockNum(LogicalTapeSet *lts, IndirectBlock *indirect, bool frozen); static long ltsRecallPrevBlockNum(LogicalTapeSet *lts, IndirectBlock *indirect); static void ltsDumpBuffer(LogicalTapeSet *lts, LogicalTape *lt); /* * Write a block-sized buffer to the specified block of the underlying file. * * NB: should not attempt to write beyond current end of file (ie, create * "holes" in file), since BufFile doesn't allow that. The first write pass * must write blocks sequentially. * * No need for an error return convention; we elog() on any error. */ static void ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer) { if (BufFileSeekBlock(lts->pfile, blocknum) != 0 || BufFileWrite(lts->pfile, buffer, BLCKSZ) != BLCKSZ) elog(ERROR, "ltsWriteBlock: failed to write block %ld of temporary file\n\t\tPerhaps out of disk space?", blocknum); } /* * Read a block-sized buffer from the specified block of the underlying file. * * No need for an error return convention; we elog() on any error. This * module should never attempt to read a block it doesn't know is there. */ static void ltsReadBlock(LogicalTapeSet *lts, long blocknum, void *buffer) { if (BufFileSeekBlock(lts->pfile, blocknum) != 0 || BufFileRead(lts->pfile, buffer, BLCKSZ) != BLCKSZ) elog(ERROR, "ltsReadBlock: failed to read block %ld of temporary file", blocknum); } /* * Select a currently unused block for writing to. * * NB: should only be called when writer is ready to write immediately, * to ensure that first write pass is sequential. */ static long ltsGetFreeBlock(LogicalTapeSet *lts) { /* * If there are multiple free blocks, we select the one appearing last * in freeBlocks[]. If there are none, assign the next block at the * end of the file. */ if (lts->nFreeBlocks > 0) return lts->freeBlocks[--lts->nFreeBlocks]; else return lts->nFileBlocks++; } /* * Return a block# to the freelist. */ static void ltsReleaseBlock(LogicalTapeSet *lts, long blocknum) { int ndx; long *ptr; /* * Enlarge freeBlocks array if full. */ if (lts->nFreeBlocks >= lts->freeBlocksLen) { lts->freeBlocksLen *= 2; lts->freeBlocks = (long *) repalloc(lts->freeBlocks, lts->freeBlocksLen * sizeof(long)); } /* * Insert blocknum into array, preserving decreasing order (so that * ltsGetFreeBlock returns the lowest available block number). This * could get fairly slow if there were many free blocks, but we don't * expect there to be very many at one time. */ ndx = lts->nFreeBlocks++; ptr = lts->freeBlocks + ndx; while (ndx > 0 && ptr[-1] < blocknum) { ptr[0] = ptr[-1]; ndx--, ptr--; } ptr[0] = blocknum; } /* * These routines manipulate indirect-block hierarchies. All are recursive * so that they don't have any specific limit on the depth of hierarchy. */ /* * Record a data block number in a logical tape's lowest indirect block, * or record an indirect block's number in the next higher indirect level. */ static void ltsRecordBlockNum(LogicalTapeSet *lts, IndirectBlock *indirect, long blocknum) { if (indirect->nextSlot >= BLOCKS_PER_INDIR_BLOCK) { /* * This indirect block is full, so dump it out and recursively * save its address in the next indirection level. Create a new * indirection level if there wasn't one before. */ long indirblock = ltsGetFreeBlock(lts); ltsWriteBlock(lts, indirblock, (void *) indirect->ptrs); if (indirect->nextup == NULL) { indirect->nextup = (IndirectBlock *) palloc(sizeof(IndirectBlock)); indirect->nextup->nextSlot = 0; indirect->nextup->nextup = NULL; } ltsRecordBlockNum(lts, indirect->nextup, indirblock); /* * Reset to fill another indirect block at this level. */ indirect->nextSlot = 0; } indirect->ptrs[indirect->nextSlot++] = blocknum; } /* * Reset a logical tape's indirect-block hierarchy after a write pass * to prepare for reading. We dump out partly-filled blocks except * at the top of the hierarchy, and we rewind each level to the start. * This call returns the first data block number, or -1L if the tape * is empty. * * Unless 'freezing' is true, release indirect blocks to the free pool after * reading them. */ static long ltsRewindIndirectBlock(LogicalTapeSet *lts, IndirectBlock *indirect, bool freezing) { /* Insert sentinel if block is not full */ if (indirect->nextSlot < BLOCKS_PER_INDIR_BLOCK) indirect->ptrs[indirect->nextSlot] = -1L; /* * If block is not topmost, write it out, and recurse to obtain * address of first block in this hierarchy level. Read that one in. */ if (indirect->nextup != NULL) { long indirblock = ltsGetFreeBlock(lts); ltsWriteBlock(lts, indirblock, (void *) indirect->ptrs); ltsRecordBlockNum(lts, indirect->nextup, indirblock); indirblock = ltsRewindIndirectBlock(lts, indirect->nextup, freezing); Assert(indirblock != -1L); ltsReadBlock(lts, indirblock, (void *) indirect->ptrs); if (!freezing) ltsReleaseBlock(lts, indirblock); } /* * Reset my next-block pointer, and then fetch a block number if any. */ indirect->nextSlot = 0; if (indirect->ptrs[0] == -1L) return -1L; return indirect->ptrs[indirect->nextSlot++]; } /* * Rewind a previously-frozen indirect-block hierarchy for another read pass. * This call returns the first data block number, or -1L if the tape * is empty. */ static long ltsRewindFrozenIndirectBlock(LogicalTapeSet *lts, IndirectBlock *indirect) { /* * If block is not topmost, recurse to obtain address of first block * in this hierarchy level. Read that one in. */ if (indirect->nextup != NULL) { long indirblock; indirblock = ltsRewindFrozenIndirectBlock(lts, indirect->nextup); Assert(indirblock != -1L); ltsReadBlock(lts, indirblock, (void *) indirect->ptrs); } /* * Reset my next-block pointer, and then fetch a block number if any. */ indirect->nextSlot = 0; if (indirect->ptrs[0] == -1L) return -1L; return indirect->ptrs[indirect->nextSlot++]; } /* * Obtain next data block number in the forward direction, or -1L if no more. * * Unless 'frozen' is true, release indirect blocks to the free pool after * reading them. */ static long ltsRecallNextBlockNum(LogicalTapeSet *lts, IndirectBlock *indirect, bool frozen) { if (indirect->nextSlot >= BLOCKS_PER_INDIR_BLOCK || indirect->ptrs[indirect->nextSlot] == -1L) { long indirblock; if (indirect->nextup == NULL) return -1L; /* nothing left at this level */ indirblock = ltsRecallNextBlockNum(lts, indirect->nextup, frozen); if (indirblock == -1L) return -1L; /* nothing left at this level */ ltsReadBlock(lts, indirblock, (void *) indirect->ptrs); if (!frozen) ltsReleaseBlock(lts, indirblock); indirect->nextSlot = 0; } if (indirect->ptrs[indirect->nextSlot] == -1L) return -1L; return indirect->ptrs[indirect->nextSlot++]; } /* * Obtain next data block number in the reverse direction, or -1L if no more. * * Note this fetches the block# before the one last returned, no matter which * direction of call returned that one. If we fail, no change in state. * * This routine can only be used in 'frozen' state, so there's no need to * pass a parameter telling whether to release blocks ... we never do. */ static long ltsRecallPrevBlockNum(LogicalTapeSet *lts, IndirectBlock *indirect) { if (indirect->nextSlot <= 1) { long indirblock; if (indirect->nextup == NULL) return -1L; /* nothing left at this level */ indirblock = ltsRecallPrevBlockNum(lts, indirect->nextup); if (indirblock == -1L) return -1L; /* nothing left at this level */ ltsReadBlock(lts, indirblock, (void *) indirect->ptrs); /* * The previous block would only have been written out if full, so * we need not search it for a -1 sentinel. */ indirect->nextSlot = BLOCKS_PER_INDIR_BLOCK + 1; } indirect->nextSlot--; return indirect->ptrs[indirect->nextSlot - 1]; } /* * Create a set of logical tapes in a temporary underlying file. * * Each tape is initialized in write state. */ LogicalTapeSet * LogicalTapeSetCreate(int ntapes) { LogicalTapeSet *lts; LogicalTape *lt; int i; /* * Create top-level struct. First LogicalTape pointer is already * counted in sizeof(LogicalTapeSet). */ Assert(ntapes > 0); lts = (LogicalTapeSet *) palloc(sizeof(LogicalTapeSet) + (ntapes - 1) *sizeof(LogicalTape *)); lts->pfile = BufFileCreateTemp(); lts->nFileBlocks = 0L; lts->freeBlocksLen = 32; /* reasonable initial guess */ lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long)); lts->nFreeBlocks = 0; lts->nTapes = ntapes; /* * Create per-tape structs, including first-level indirect blocks. */ for (i = 0; i < ntapes; i++) { lt = (LogicalTape *) palloc(sizeof(LogicalTape)); lts->tapes[i] = lt; lt->indirect = (IndirectBlock *) palloc(sizeof(IndirectBlock)); lt->indirect->nextSlot = 0; lt->indirect->nextup = NULL; lt->writing = true; lt->frozen = false; lt->dirty = false; lt->numFullBlocks = 0L; lt->lastBlockBytes = 0; lt->curBlockNumber = 0L; lt->pos = 0; lt->nbytes = 0; } return lts; } /* * Close a logical tape set and release all resources. */ void LogicalTapeSetClose(LogicalTapeSet *lts) { LogicalTape *lt; IndirectBlock *ib, *nextib; int i; BufFileClose(lts->pfile); for (i = 0; i < lts->nTapes; i++) { lt = lts->tapes[i]; for (ib = lt->indirect; ib != NULL; ib = nextib) { nextib = ib->nextup; pfree(ib); } pfree(lt); } pfree(lts->freeBlocks); pfree(lts); } /* * Dump the dirty buffer of a logical tape. */ static void ltsDumpBuffer(LogicalTapeSet *lts, LogicalTape *lt) { long datablock = ltsGetFreeBlock(lts); Assert(lt->dirty); ltsWriteBlock(lts, datablock, (void *) lt->buffer); ltsRecordBlockNum(lts, lt->indirect, datablock); lt->dirty = false; /* Caller must do other state update as needed */ } /* * Write to a logical tape. * * There are no error returns; we elog() on failure. */ void LogicalTapeWrite(LogicalTapeSet *lts, int tapenum, void *ptr, size_t size) { LogicalTape *lt; size_t nthistime; Assert(tapenum >= 0 && tapenum < lts->nTapes); lt = lts->tapes[tapenum]; Assert(lt->writing); while (size > 0) { if (lt->pos >= BLCKSZ) { /* Buffer full, dump it out */ if (lt->dirty) ltsDumpBuffer(lts, lt); else { /* Hmm, went directly from reading to writing? */ elog(ERROR, "LogicalTapeWrite: impossible state"); } lt->numFullBlocks++; lt->curBlockNumber++; lt->pos = 0; lt->nbytes = 0; } nthistime = BLCKSZ - lt->pos; if (nthistime > size) nthistime = size; Assert(nthistime > 0); memcpy(lt->buffer + lt->pos, ptr, nthistime); lt->dirty = true; lt->pos += nthistime; if (lt->nbytes < lt->pos) lt->nbytes = lt->pos; ptr = (void *) ((char *) ptr + nthistime); size -= nthistime; } } /* * Rewind logical tape and switch from writing to reading or vice versa. * * Unless the tape has been "frozen" in read state, forWrite must be the * opposite of the previous tape state. */ void LogicalTapeRewind(LogicalTapeSet *lts, int tapenum, bool forWrite) { LogicalTape *lt; long datablocknum; Assert(tapenum >= 0 && tapenum < lts->nTapes); lt = lts->tapes[tapenum]; if (!forWrite) { if (lt->writing) { /* * Completion of a write phase. Flush last partial data * block, flush any partial indirect blocks, rewind for normal * (destructive) read. */ if (lt->dirty) ltsDumpBuffer(lts, lt); lt->lastBlockBytes = lt->nbytes; lt->writing = false; datablocknum = ltsRewindIndirectBlock(lts, lt->indirect, false); } else { /* * This is only OK if tape is frozen; we rewind for (another) * read pass. */ Assert(lt->frozen); datablocknum = ltsRewindFrozenIndirectBlock(lts, lt->indirect); } /* Read the first block, or reset if tape is empty */ lt->curBlockNumber = 0L; lt->pos = 0; lt->nbytes = 0; if (datablocknum != -1L) { ltsReadBlock(lts, datablocknum, (void *) lt->buffer); if (!lt->frozen) ltsReleaseBlock(lts, datablocknum); lt->nbytes = (lt->curBlockNumber < lt->numFullBlocks) ? BLCKSZ : lt->lastBlockBytes; } } else { /* * Completion of a read phase. Rewind and prepare for write. * * NOTE: we assume the caller has read the tape to the end; otherwise * untouched data and indirect blocks will not have been freed. We * could add more code to free any unread blocks, but in current * usage of this module it'd be useless code. */ IndirectBlock *ib, *nextib; Assert(!lt->writing && !lt->frozen); /* Must truncate the indirect-block hierarchy down to one level. */ for (ib = lt->indirect->nextup; ib != NULL; ib = nextib) { nextib = ib->nextup; pfree(ib); } lt->indirect->nextSlot = 0; lt->indirect->nextup = NULL; lt->writing = true; lt->dirty = false; lt->numFullBlocks = 0L; lt->lastBlockBytes = 0; lt->curBlockNumber = 0L; lt->pos = 0; lt->nbytes = 0; } } /* * Read from a logical tape. * * Early EOF is indicated by return value less than #bytes requested. */ size_t LogicalTapeRead(LogicalTapeSet *lts, int tapenum, void *ptr, size_t size) { LogicalTape *lt; size_t nread = 0; size_t nthistime; Assert(tapenum >= 0 && tapenum < lts->nTapes); lt = lts->tapes[tapenum]; Assert(!lt->writing); while (size > 0) { if (lt->pos >= lt->nbytes) { /* Try to load more data into buffer. */ long datablocknum = ltsRecallNextBlockNum(lts, lt->indirect, lt->frozen); if (datablocknum == -1L) break; /* EOF */ lt->curBlockNumber++; lt->pos = 0; ltsReadBlock(lts, datablocknum, (void *) lt->buffer); if (!lt->frozen) ltsReleaseBlock(lts, datablocknum); lt->nbytes = (lt->curBlockNumber < lt->numFullBlocks) ? BLCKSZ : lt->lastBlockBytes; if (lt->nbytes <= 0) break; /* EOF (possible here?) */ } nthistime = lt->nbytes - lt->pos; if (nthistime > size) nthistime = size; Assert(nthistime > 0); memcpy(ptr, lt->buffer + lt->pos, nthistime); lt->pos += nthistime; ptr = (void *) ((char *) ptr + nthistime); size -= nthistime; nread += nthistime; } return nread; } /* * "Freeze" the contents of a tape so that it can be read multiple times * and/or read backwards. Once a tape is frozen, its contents will not * be released until the LogicalTapeSet is destroyed. This is expected * to be used only for the final output pass of a merge. * * This *must* be called just at the end of a write pass, before the * tape is rewound (after rewind is too late!). It performs a rewind * and switch to read mode "for free". An immediately following rewind- * for-read call is OK but not necessary. */ void LogicalTapeFreeze(LogicalTapeSet *lts, int tapenum) { LogicalTape *lt; long datablocknum; Assert(tapenum >= 0 && tapenum < lts->nTapes); lt = lts->tapes[tapenum]; Assert(lt->writing); /* * Completion of a write phase. Flush last partial data block, flush * any partial indirect blocks, rewind for nondestructive read. */ if (lt->dirty) ltsDumpBuffer(lts, lt); lt->lastBlockBytes = lt->nbytes; lt->writing = false; lt->frozen = true; datablocknum = ltsRewindIndirectBlock(lts, lt->indirect, true); /* Read the first block, or reset if tape is empty */ lt->curBlockNumber = 0L; lt->pos = 0; lt->nbytes = 0; if (datablocknum != -1L) { ltsReadBlock(lts, datablocknum, (void *) lt->buffer); lt->nbytes = (lt->curBlockNumber < lt->numFullBlocks) ? BLCKSZ : lt->lastBlockBytes; } } /* * Backspace the tape a given number of bytes. (We also support a more * general seek interface, see below.) * * *Only* a frozen-for-read tape can be backed up; we don't support * random access during write, and an unfrozen read tape may have * already discarded the desired data! * * Return value is TRUE if seek successful, FALSE if there isn't that much * data before the current point (in which case there's no state change). */ bool LogicalTapeBackspace(LogicalTapeSet *lts, int tapenum, size_t size) { LogicalTape *lt; long nblocks; int newpos; Assert(tapenum >= 0 && tapenum < lts->nTapes); lt = lts->tapes[tapenum]; Assert(lt->frozen); /* * Easy case for seek within current block. */ if (size <= (size_t) lt->pos) { lt->pos -= (int) size; return true; } /* * Not-so-easy case. Figure out whether it's possible at all. */ size -= (size_t) lt->pos; /* part within this block */ nblocks = size / BLCKSZ; size = size % BLCKSZ; if (size) { nblocks++; newpos = (int) (BLCKSZ - size); } else newpos = 0; if (nblocks > lt->curBlockNumber) return false; /* a seek too far... */ /* * OK, we need to back up nblocks blocks. This implementation would * be pretty inefficient for long seeks, but we really aren't * expecting that (a seek over one tuple is typical). */ while (nblocks-- > 0) { long datablocknum = ltsRecallPrevBlockNum(lts, lt->indirect); if (datablocknum == -1L) elog(ERROR, "LogicalTapeBackspace: unexpected end of tape"); lt->curBlockNumber--; if (nblocks == 0) { ltsReadBlock(lts, datablocknum, (void *) lt->buffer); lt->nbytes = BLCKSZ; } } lt->pos = newpos; return true; } /* * Seek to an arbitrary position in a logical tape. * * *Only* a frozen-for-read tape can be seeked. * * Return value is TRUE if seek successful, FALSE if there isn't that much * data in the tape (in which case there's no state change). */ bool LogicalTapeSeek(LogicalTapeSet *lts, int tapenum, long blocknum, int offset) { LogicalTape *lt; Assert(tapenum >= 0 && tapenum < lts->nTapes); lt = lts->tapes[tapenum]; Assert(lt->frozen); Assert(offset >= 0 && offset <= BLCKSZ); /* * Easy case for seek within current block. */ if (blocknum == lt->curBlockNumber && offset <= lt->nbytes) { lt->pos = offset; return true; } /* * Not-so-easy case. Figure out whether it's possible at all. */ if (blocknum < 0 || blocknum > lt->numFullBlocks || (blocknum == lt->numFullBlocks && offset > lt->lastBlockBytes)) return false; /* * OK, advance or back up to the target block. This implementation * would be pretty inefficient for long seeks, but we really aren't * expecting that (a seek over one tuple is typical). */ while (lt->curBlockNumber > blocknum) { long datablocknum = ltsRecallPrevBlockNum(lts, lt->indirect); if (datablocknum == -1L) elog(ERROR, "LogicalTapeSeek: unexpected end of tape"); if (--lt->curBlockNumber == blocknum) ltsReadBlock(lts, datablocknum, (void *) lt->buffer); } while (lt->curBlockNumber < blocknum) { long datablocknum = ltsRecallNextBlockNum(lts, lt->indirect, lt->frozen); if (datablocknum == -1L) elog(ERROR, "LogicalTapeSeek: unexpected end of tape"); if (++lt->curBlockNumber == blocknum) ltsReadBlock(lts, datablocknum, (void *) lt->buffer); } lt->nbytes = (lt->curBlockNumber < lt->numFullBlocks) ? BLCKSZ : lt->lastBlockBytes; lt->pos = offset; return true; } /* * Obtain current position in a form suitable for a later LogicalTapeSeek. * * NOTE: it'd be OK to do this during write phase with intention of using * the position for a seek after freezing. Not clear if anyone needs that. */ void LogicalTapeTell(LogicalTapeSet *lts, int tapenum, long *blocknum, int *offset) { LogicalTape *lt; Assert(tapenum >= 0 && tapenum < lts->nTapes); lt = lts->tapes[tapenum]; *blocknum = lt->curBlockNumber; *offset = lt->pos; }