Install a lookaside cache to speed up repeated lookups of the same operator

by short-circuiting schema search path and ambiguous-operator resolution
computations.  Remarkably, this buys as much as 45% speedup of repetitive
simple queries that involve operators that are not an exact match to the
input datatypes.  It should be marginally faster even for exact-match
cases, though I've not had success in proving an improvement in benchmark
tests.  Per report from Guillame Smet and subsequent discussion.
This commit is contained in:
Tom Lane 2007-11-28 18:47:56 +00:00
parent a238bd146d
commit d54ca56743
3 changed files with 311 additions and 6 deletions

View File

@ -13,7 +13,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.102 2007/11/25 02:09:46 tgl Exp $
* $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.103 2007/11/28 18:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -3006,6 +3006,40 @@ fetch_search_path(bool includeImplicit)
return result;
}
/*
* Fetch the active search path into a caller-allocated array of OIDs.
* Returns the number of path entries. (If this is more than sarray_len,
* then the data didn't fit and is not all stored.)
*
* The returned list always includes the implicitly-prepended namespaces,
* but never includes the temp namespace. (This is suitable for existing
* users, which would want to ignore the temp namespace anyway.) This
* definition allows us to not worry about initializing the temp namespace.
*/
int
fetch_search_path_array(Oid *sarray, int sarray_len)
{
int count = 0;
ListCell *l;
recomputeNamespacePath();
foreach(l, activeSearchPath)
{
Oid namespaceId = lfirst_oid(l);
if (namespaceId == myTempNamespace)
continue; /* do not include temp namespace */
if (count < sarray_len)
sarray[count] = namespaceId;
count++;
}
return count;
}
/*
* Export the FooIsVisible functions as SQL-callable functions.
*/

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parse_oper.c,v 1.98 2007/11/22 19:40:25 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/parse_oper.c,v 1.99 2007/11/28 18:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -24,11 +24,47 @@
#include "parser/parse_oper.h"
#include "parser/parse_type.h"
#include "utils/builtins.h"
#include "utils/hsearch.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
/*
* The lookup key for the operator lookaside hash table. Unused bits must be
* zeroes to ensure hashing works consistently --- in particular, oprname
* must be zero-padded and any unused entries in search_path must be zero.
*
* search_path contains the actual search_path with which the entry was
* derived (minus temp namespace if any), or else the single specified
* schema OID if we are looking up an explicitly-qualified operator name.
*
* search_path has to be fixed-length since the hashtable code insists on
* fixed-size keys. If your search path is longer than that, we just punt
* and don't cache anything.
*/
/* If your search_path is longer than this, sucks to be you ... */
#define MAX_CACHED_PATH_LEN 16
typedef struct OprCacheKey
{
char oprname[NAMEDATALEN];
Oid left_arg; /* Left input OID, or 0 if prefix op */
Oid right_arg; /* Right input OID, or 0 if postfix op */
Oid search_path[MAX_CACHED_PATH_LEN];
} OprCacheKey;
typedef struct OprCacheEntry
{
/* the hash lookup key MUST BE FIRST */
OprCacheKey key;
Oid opr_oid; /* OID of the resolved operator */
} OprCacheEntry;
static Oid binary_oper_exact(List *opname, Oid arg1, Oid arg2);
static FuncDetailCode oper_select_candidate(int nargs,
Oid *input_typeids,
@ -42,6 +78,11 @@ static void op_error(ParseState *pstate, List *op, char oprkind,
static Expr *make_op_expr(ParseState *pstate, Operator op,
Node *ltree, Node *rtree,
Oid ltypeId, Oid rtypeId);
static bool make_oper_cache_key(OprCacheKey *key, List *opname,
Oid ltypeId, Oid rtypeId);
static Oid find_oper_cache_entry(OprCacheKey *key);
static void make_oper_cache_entry(OprCacheKey *key, Oid opr_oid);
static void InvalidateOprCacheCallBack(Datum arg, Oid relid);
/*
@ -496,9 +537,28 @@ oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId,
bool noError, int location)
{
Oid operOid;
OprCacheKey key;
bool key_ok;
FuncDetailCode fdresult = FUNCDETAIL_NOTFOUND;
HeapTuple tup = NULL;
/*
* Try to find the mapping in the lookaside cache.
*/
key_ok = make_oper_cache_key(&key, opname, ltypeId, rtypeId);
if (key_ok)
{
operOid = find_oper_cache_entry(&key);
if (OidIsValid(operOid))
{
tup = SearchSysCache(OPEROID,
ObjectIdGetDatum(operOid),
0, 0, 0);
if (HeapTupleIsValid(tup))
return (Operator) tup;
}
}
/*
* First try for an "exact" match.
*/
@ -537,7 +597,12 @@ oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId,
ObjectIdGetDatum(operOid),
0, 0, 0);
if (!HeapTupleIsValid(tup) && !noError)
if (HeapTupleIsValid(tup))
{
if (key_ok)
make_oper_cache_entry(&key, operOid);
}
else if (!noError)
op_error(pstate, opname, 'b', ltypeId, rtypeId, fdresult, location);
return (Operator) tup;
@ -622,9 +687,28 @@ Operator
right_oper(ParseState *pstate, List *op, Oid arg, bool noError, int location)
{
Oid operOid;
OprCacheKey key;
bool key_ok;
FuncDetailCode fdresult = FUNCDETAIL_NOTFOUND;
HeapTuple tup = NULL;
/*
* Try to find the mapping in the lookaside cache.
*/
key_ok = make_oper_cache_key(&key, op, arg, InvalidOid);
if (key_ok)
{
operOid = find_oper_cache_entry(&key);
if (OidIsValid(operOid))
{
tup = SearchSysCache(OPEROID,
ObjectIdGetDatum(operOid),
0, 0, 0);
if (HeapTupleIsValid(tup))
return (Operator) tup;
}
}
/*
* First try for an "exact" match.
*/
@ -655,7 +739,12 @@ right_oper(ParseState *pstate, List *op, Oid arg, bool noError, int location)
ObjectIdGetDatum(operOid),
0, 0, 0);
if (!HeapTupleIsValid(tup) && !noError)
if (HeapTupleIsValid(tup))
{
if (key_ok)
make_oper_cache_entry(&key, operOid);
}
else if (!noError)
op_error(pstate, op, 'r', arg, InvalidOid, fdresult, location);
return (Operator) tup;
@ -680,9 +769,28 @@ Operator
left_oper(ParseState *pstate, List *op, Oid arg, bool noError, int location)
{
Oid operOid;
OprCacheKey key;
bool key_ok;
FuncDetailCode fdresult = FUNCDETAIL_NOTFOUND;
HeapTuple tup = NULL;
/*
* Try to find the mapping in the lookaside cache.
*/
key_ok = make_oper_cache_key(&key, op, InvalidOid, arg);
if (key_ok)
{
operOid = find_oper_cache_entry(&key);
if (OidIsValid(operOid))
{
tup = SearchSysCache(OPEROID,
ObjectIdGetDatum(operOid),
0, 0, 0);
if (HeapTupleIsValid(tup))
return (Operator) tup;
}
}
/*
* First try for an "exact" match.
*/
@ -725,7 +833,12 @@ left_oper(ParseState *pstate, List *op, Oid arg, bool noError, int location)
ObjectIdGetDatum(operOid),
0, 0, 0);
if (!HeapTupleIsValid(tup) && !noError)
if (HeapTupleIsValid(tup))
{
if (key_ok)
make_oper_cache_entry(&key, operOid);
}
else if (!noError)
op_error(pstate, op, 'l', InvalidOid, arg, fdresult, location);
return (Operator) tup;
@ -1018,3 +1131,160 @@ make_op_expr(ParseState *pstate, Operator op,
return (Expr *) result;
}
/*
* Lookaside cache to speed operator lookup. Possibly this should be in
* a separate module under utils/cache/ ?
*
* The idea here is that the mapping from operator name and given argument
* types is constant for a given search path (or single specified schema OID)
* so long as the contents of pg_operator and pg_cast don't change. And that
* mapping is pretty expensive to compute, especially for ambiguous operators;
* this is mainly because there are a *lot* of instances of popular operator
* names such as "=", and we have to check each one to see which is the
* best match. So once we have identified the correct mapping, we save it
* in a cache that need only be flushed on pg_operator or pg_cast change.
* (pg_cast must be considered because changes in the set of implicit casts
* affect the set of applicable operators for any given input datatype.)
*
* XXX in principle, ALTER TABLE ... INHERIT could affect the mapping as
* well, but we disregard that since there's no convenient way to find out
* about it, and it seems a pretty far-fetched corner-case anyway.
*
* Note: at some point it might be worth doing a similar cache for function
* lookups. However, the potential gain is a lot less since (a) function
* names are generally not overloaded as heavily as operator names, and
* (b) we'd have to flush on pg_proc updates, which are probably a good
* deal more common than pg_operator updates.
*/
/* The operator cache hashtable */
static HTAB *OprCacheHash = NULL;
/*
* make_oper_cache_key
* Fill the lookup key struct given operator name and arg types.
*
* Returns TRUE if successful, FALSE if the search_path overflowed
* (hence no caching is possible).
*/
static bool
make_oper_cache_key(OprCacheKey *key, List *opname, Oid ltypeId, Oid rtypeId)
{
char *schemaname;
char *opername;
/* deconstruct the name list */
DeconstructQualifiedName(opname, &schemaname, &opername);
/* ensure zero-fill for stable hashing */
MemSet(key, 0, sizeof(OprCacheKey));
/* save operator name and input types into key */
strlcpy(key->oprname, opername, NAMEDATALEN);
key->left_arg = ltypeId;
key->right_arg = rtypeId;
if (schemaname)
{
/* search only in exact schema given */
key->search_path[0] = LookupExplicitNamespace(schemaname);
}
else
{
/* get the active search path */
if (fetch_search_path_array(key->search_path,
MAX_CACHED_PATH_LEN) > MAX_CACHED_PATH_LEN)
return false; /* oops, didn't fit */
}
return true;
}
/*
* find_oper_cache_entry
*
* Look for a cache entry matching the given key. If found, return the
* contained operator OID, else return InvalidOid.
*/
static Oid
find_oper_cache_entry(OprCacheKey *key)
{
OprCacheEntry *oprentry;
if (OprCacheHash == NULL)
{
/* First time through: initialize the hash table */
HASHCTL ctl;
if (!CacheMemoryContext)
CreateCacheMemoryContext();
MemSet(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(OprCacheKey);
ctl.entrysize = sizeof(OprCacheEntry);
ctl.hash = tag_hash;
OprCacheHash = hash_create("Operator lookup cache", 256,
&ctl, HASH_ELEM | HASH_FUNCTION);
/* Arrange to flush cache on pg_operator and pg_cast changes */
CacheRegisterSyscacheCallback(OPERNAMENSP,
InvalidateOprCacheCallBack,
(Datum) 0);
CacheRegisterSyscacheCallback(CASTSOURCETARGET,
InvalidateOprCacheCallBack,
(Datum) 0);
}
/* Look for an existing entry */
oprentry = (OprCacheEntry *) hash_search(OprCacheHash,
(void *) key,
HASH_FIND, NULL);
if (oprentry == NULL)
return InvalidOid;
return oprentry->opr_oid;
}
/*
* make_oper_cache_entry
*
* Insert a cache entry for the given key.
*/
static void
make_oper_cache_entry(OprCacheKey *key, Oid opr_oid)
{
OprCacheEntry *oprentry;
Assert(OprCacheHash != NULL);
oprentry = (OprCacheEntry *) hash_search(OprCacheHash,
(void *) key,
HASH_ENTER, NULL);
oprentry->opr_oid = opr_oid;
}
/*
* Callback for pg_operator and pg_cast inval events
*/
static void
InvalidateOprCacheCallBack(Datum arg, Oid relid)
{
HASH_SEQ_STATUS status;
OprCacheEntry *hentry;
Assert(OprCacheHash != NULL);
/* Currently we just flush all entries; hard to be smarter ... */
hash_seq_init(&status, OprCacheHash);
while ((hentry = (OprCacheEntry *) hash_seq_search(&status)) != NULL)
{
if (hash_search(OprCacheHash,
(void *) &hentry->key,
HASH_REMOVE, NULL) == NULL)
elog(ERROR, "hash table corrupted");
}
}

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/namespace.h,v 1.51 2007/11/15 22:25:17 momjian Exp $
* $PostgreSQL: pgsql/src/include/catalog/namespace.h,v 1.52 2007/11/28 18:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -115,5 +115,6 @@ extern void AtEOSubXact_Namespace(bool isCommit, SubTransactionId mySubid,
extern char *namespace_search_path;
extern List *fetch_search_path(bool includeImplicit);
extern int fetch_search_path_array(Oid *sarray, int sarray_len);
#endif /* NAMESPACE_H */