postgresql/src/include/executor/hashjoin.h

/*-------------------------------------------------------------------------
 *
 * hashjoin.h
 *	  internal structures for hash joins
 *
 *
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * $Id: hashjoin.h,v 1.16 2000/01/26 05:58:05 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
#ifndef HASHJOIN_H
#define HASHJOIN_H

#include "access/htup.h"
#include "storage/buffile.h"

/* ----------------------------------------------------------------
 *				hash-join hash table structures
 *
 * Each active hashjoin has a HashJoinTable control block which is
 * palloc'd in the executor's context.	All other storage needed for
 * the hashjoin is kept in a private "named portal", one for each hashjoin.
 * This makes it easy and fast to release the storage when we don't need it
 * anymore.
 *
 * The portal manager guarantees that portals will be discarded at end of
 * transaction, so we have no problem with a memory leak if the join is
 * aborted early by an error.  (Likewise, any temporary files we make will
 * be cleaned up by the virtual file manager in event of an error.)
 *
 * Storage that should live through the entire join is allocated from the
 * portal's "variable context", while storage that is only wanted for the
 * current batch is allocated in the portal's "heap context".  By popping
 * the portal's heap at the end of a batch, we free all the per-batch storage
 * reliably and without tedium.
 * ----------------------------------------------------------------
 */

typedef struct HashJoinTupleData
{
	struct HashJoinTupleData *next;		/* link to next tuple in same
										 * bucket */
	HeapTupleData htup;			/* tuple header */
} HashJoinTupleData;

typedef HashJoinTupleData *HashJoinTuple;

typedef struct HashTableData
{
	int			nbuckets;		/* buckets in use during this batch */
	int			totalbuckets;	/* total number of (virtual) buckets */
	HashJoinTuple *buckets;		/* buckets[i] is head of list of tuples */
	/* buckets array is per-batch storage, as are all the tuples */

	int			nbatch;			/* number of batches; 0 means 1-pass join */
	int			curbatch;		/* current batch #, or 0 during 1st pass */

	/*
	 * all these arrays are allocated for the life of the hash join, but
	 * only if nbatch > 0:
	 */
	BufFile   **innerBatchFile; /* buffered virtual temp file per batch */
	BufFile   **outerBatchFile; /* buffered virtual temp file per batch */
	long	   *outerBatchSize; /* count of tuples in each outer batch
								 * file */
	long	   *innerBatchSize; /* count of tuples in each inner batch
								 * file */

	/*
	 * During 1st scan of inner relation, we get tuples from executor. If
	 * nbatch > 0 then tuples that don't belong in first nbuckets logical
	 * buckets get dumped into inner-batch temp files. The same statements
	 * apply for the 1st scan of the outer relation, except we write
	 * tuples to outer-batch temp files. If nbatch > 0 then we do the
	 * following for each batch: 1. Read tuples from inner batch file,
	 * load into hash buckets. 2. Read tuples from outer batch file, match
	 * to hash buckets and output.
	 */

	/*
	 * Ugly kluge: myPortal ought to be declared as type Portal (ie,
	 * PortalD*) but if we try to include utils/portal.h here, we end up
	 * with a circular dependency of include files!  Until the various
	 * node.h files are restructured in a cleaner way, we have to fake it.
	 * The most reliable fake seems to be to declare myPortal as void *
	 * and then cast it to the right things in nodeHash.c.
	 */
	void	   *myPortal;		/* where to keep working storage */
	MemoryContext hashCxt;		/* context for whole-hash-join storage */
	MemoryContext batchCxt;		/* context for this-batch-only storage */
} HashTableData;

typedef HashTableData *HashJoinTable;

#endif	 /* HASHJOIN_H */
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00			`/*-------------------------------------------------------------------------`
			`*`
Change my-function-name-- to my_function_name, and optimizer renames. 1999-02-14 00:22:53 +01:00			`* hashjoin.h`
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`* internal structures for hash joins`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00			`*`
			`*`
Add: * Portions Copyright (c) 1996-2000, PostgreSQL, Inc to all files copyright Regents of Berkeley. Man, that's a lot of files. 2000-01-26 06:58:53 +01:00			`* Portions Copyright (c) 1996-2000, PostgreSQL, Inc`
			`* Portions Copyright (c) 1994, Regents of the University of California`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00			`*`
Add: * Portions Copyright (c) 1996-2000, PostgreSQL, Inc to all files copyright Regents of Berkeley. Man, that's a lot of files. 2000-01-26 06:58:53 +01:00			`* $Id: hashjoin.h,v 1.16 2000/01/26 05:58:05 momjian Exp $`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00			`*`
			`*-------------------------------------------------------------------------`
			`*/`
Massive commit to run PGINDENT on all .c and .h files. 1997-09-07 07:04:48 +02:00			`#ifndef HASHJOIN_H`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00			`#define HASHJOIN_H`

Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`#include "access/htup.h"`
Split 'BufFile' routines out of fd.c into a new module, buffile.c. Extend BufFile so that it handles multi-segment temporary files transparently. This allows sorts and hashes to work with data exceeding 2Gig (or whatever the local limit on file size is). Change psort.c to use relative seeks instead of absolute seeks for backwards scanning, so that it won't fail when the data volume exceeds 2Gig. 1999-10-13 17:02:32 +02:00			`#include "storage/buffile.h"`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`/* ----------------------------------------------------------------`
			`* hash-join hash table structures`
			`*`
			`* Each active hashjoin has a HashJoinTable control block which is`
pgindent run over code. 1999-05-25 18:15:34 +02:00			`* palloc'd in the executor's context. All other storage needed for`
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`* the hashjoin is kept in a private "named portal", one for each hashjoin.`
			`* This makes it easy and fast to release the storage when we don't need it`
			`* anymore.`
			`*`
			`* The portal manager guarantees that portals will be discarded at end of`
			`* transaction, so we have no problem with a memory leak if the join is`
			`* aborted early by an error. (Likewise, any temporary files we make will`
			`* be cleaned up by the virtual file manager in event of an error.)`
Fix some nasty coredump bugs in hashjoin. This code was just about certain to fail anytime it decided the relation to be hashed was too big to fit in memory --- the code for 'batching' a series of hashjoins had multiple errors. I've fixed the easier problems. A remaining big problem is that you can get 'hashtable out of memory' if the code's guesstimate about how much overflow space it will need turns out wrong. That will require much more extensive revisions to fix, so I'm committing these fixes now before I start on that problem. 1999-05-06 02:30:47 +02:00			`*`
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`* Storage that should live through the entire join is allocated from the`
			`* portal's "variable context", while storage that is only wanted for the`
			`* current batch is allocated in the portal's "heap context". By popping`
			`* the portal's heap at the end of a batch, we free all the per-batch storage`
			`* reliably and without tedium.`
			`* ----------------------------------------------------------------`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00			`*/`

Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`typedef struct HashJoinTupleData`
			`{`
pgindent run over code. 1999-05-25 18:15:34 +02:00			`struct HashJoinTupleData next; / link to next tuple in same`
			`* bucket */`
			`HeapTupleData htup; /* tuple header */`
Another pgindent run. Sorry folks. 1999-05-26 00:43:53 +02:00			`} HashJoinTupleData;`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`typedef HashJoinTupleData *HashJoinTuple;`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00
Massive commit to run PGINDENT on all .c and .h files. 1997-09-07 07:04:48 +02:00			`typedef struct HashTableData`
			`{`
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`int nbuckets; /* buckets in use during this batch */`
			`int totalbuckets; /* total number of (virtual) buckets */`
pgindent run over code. 1999-05-25 18:15:34 +02:00			`HashJoinTuple buckets; / buckets[i] is head of list of tuples */`
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`/* buckets array is per-batch storage, as are all the tuples */`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`int nbatch; /* number of batches; 0 means 1-pass join */`
			`int curbatch; /* current batch #, or 0 during 1st pass */`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00
pgindent run over code. 1999-05-25 18:15:34 +02:00			`/*`
			`* all these arrays are allocated for the life of the hash join, but`
			`* only if nbatch > 0:`
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`*/`
pgindent run over code. 1999-05-25 18:15:34 +02:00			`BufFile *innerBatchFile; / buffered virtual temp file per batch */`
			`BufFile *outerBatchFile; / buffered virtual temp file per batch */`
			`long outerBatchSize; / count of tuples in each outer batch`
			`* file */`
			`long innerBatchSize; / count of tuples in each inner batch`
			`* file */`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00
pgindent run over code. 1999-05-25 18:15:34 +02:00			`/*`
			`* During 1st scan of inner relation, we get tuples from executor. If`
			`* nbatch > 0 then tuples that don't belong in first nbuckets logical`
			`* buckets get dumped into inner-batch temp files. The same statements`
			`* apply for the 1st scan of the outer relation, except we write`
			`* tuples to outer-batch temp files. If nbatch > 0 then we do the`
			`* following for each batch: 1. Read tuples from inner batch file,`
			`* load into hash buckets. 2. Read tuples from outer batch file, match`
			`* to hash buckets and output.`
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`*/`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00
pgindent run over code. 1999-05-25 18:15:34 +02:00			`/*`
			`* Ugly kluge: myPortal ought to be declared as type Portal (ie,`
			`* PortalD*) but if we try to include utils/portal.h here, we end up`
			`* with a circular dependency of include files! Until the various`
			`* node.h files are restructured in a cleaner way, we have to fake it.`
			`* The most reliable fake seems to be to declare myPortal as void *`
			`* and then cast it to the right things in nodeHash.c.`
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`*/`
pgindent run over code. 1999-05-25 18:15:34 +02:00			`void myPortal; / where to keep working storage */`
			`MemoryContext hashCxt; /* context for whole-hash-join storage */`
			`MemoryContext batchCxt; /* context for this-batch-only storage */`
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`} HashTableData;`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00
Rewrite hash join to use simple linked lists instead of a fixed-size hashtable. This should prevent 'hashtable out of memory' errors, unless you really do run out of memory. Note: target size for hashtable is now taken from -S postmaster switch, not -B, since it is local memory in the backend rather than shared memory. 1999-05-18 23:33:06 +02:00			`typedef HashTableData *HashJoinTable;`
Postgres95 1.01 Distribution - Virgin Sources 1996-07-09 08:22:35 +02:00
OK, folks, here is the pgindent output. 1998-09-01 06:40:42 +02:00			`#endif /* HASHJOIN_H */`