From 8f0ca623ffe71e3d7ae511b2c404b2eece085095 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Sun, 19 Jul 1998 18:26:41 +0000 Subject: [PATCH] I finally got the time to put together some stuff for fti for inclusion in pgsql. I have included a README which should be enough to start using it, plus a BENCH file that describes some timings I have done. Please have a look at it, and if you think everything is OK, I would like it seen included in the contrib-section of pgsql. I don't think I will do any more work in this, but maybe it inspires somebody else to improve on it. Maarten Boekhold --- contrib/fulltextindex/BENCH | 97 ++++++++ contrib/fulltextindex/Makefile | 24 ++ contrib/fulltextindex/README | 95 ++++++++ contrib/fulltextindex/fti.c | 381 +++++++++++++++++++++++++++++++ contrib/fulltextindex/timings.sh | 350 ++++++++++++++++++++++++++++ 5 files changed, 947 insertions(+) create mode 100644 contrib/fulltextindex/BENCH create mode 100644 contrib/fulltextindex/Makefile create mode 100644 contrib/fulltextindex/README create mode 100644 contrib/fulltextindex/fti.c create mode 100755 contrib/fulltextindex/timings.sh diff --git a/contrib/fulltextindex/BENCH b/contrib/fulltextindex/BENCH new file mode 100644 index 0000000000..d8375fe606 --- /dev/null +++ b/contrib/fulltextindex/BENCH @@ -0,0 +1,97 @@ +The following data was generated by the 'timings.sh' script included +in this directory. It uses a very large table with music-related +articles as a source for the fti-table. The tables used are: + +product : contains product information : 540.429 rows +artist_fti : fti table for product : 4.501.321 rows +clustered : same as above, only clustered : 4.501.321 rows + +A sequential scan of the artist_fti table (and thus also the clustered table) +takes around 6:16 minutes.... + +Unfortunately I cannot probide anybody else with this test-date, since I +am not allowed to redistribute the data (it's a database being sold by +a couple of wholesale companies). Anyways, it's megabytes, so you probably +wouldn't want it in this distribution anyways. + +I haven't tested this with less data. + +The test-machine is a Pentium 133, 64 MB, Linux 2.0.32 with the database +on a 'QUANTUM BIGFOOT_CY4320A, 4134MB w/67kB Cache, CHS=8960/15/63'. This +is a very slow disk. + +The postmaster was running with: + + postmaster -i -b /usr/local/pgsql/bin/postgres -S 1024 -B 256 \ + -o -o /usr/local/pgsql/debug-output -F -d 1 + +('trashing' means a 'select count(*) from artist_fti' to completely trash +any disk-caches and buffers....) + +TESTING ON UNCLUSTERED FTI +trashing +1: ^lapton and ^ric : 0.050u 0.000s 5m37.484s 0.01% +2: ^lapton and ^ric : 0.050u 0.030s 5m32.447s 0.02% +3: ^lapton and ^ric : 0.030u 0.020s 5m28.822s 0.01% +trashing +1: ^lling and ^tones : 0.020u 0.030s 0m54.313s 0.09% +2: ^lling and ^tones : 0.040u 0.030s 0m5.057s 1.38% +3: ^lling and ^tones : 0.010u 0.050s 0m2.072s 2.89% +trashing +1: ^aughan and ^evie : 0.020u 0.030s 0m26.241s 0.19% +2: ^aughan and ^evie : 0.050u 0.010s 0m1.316s 4.55% +3: ^aughan and ^evie : 0.030u 0.020s 0m1.029s 4.85% +trashing +1: ^lling : 0.040u 0.010s 0m55.104s 0.09% +2: ^lling : 0.030u 0.030s 0m4.716s 1.27% +3: ^lling : 0.040u 0.010s 0m2.157s 2.31% +trashing +1: ^stev and ^ray and ^vaugh : 0.040u 0.000s 1m5.630s 0.06% +2: ^stev and ^ray and ^vaugh : 0.050u 0.020s 1m3.561s 0.11% +3: ^stev and ^ray and ^vaugh : 0.050u 0.010s 1m5.923s 0.09% +trashing +1: ^lling (no join) : 0.050u 0.020s 0m24.139s 0.28% +2: ^lling (no join) : 0.040u 0.040s 0m1.087s 7.35% +3: ^lling (no join) : 0.020u 0.030s 0m0.772s 6.48% +trashing +1: ^vaughan (no join) : 0.040u 0.030s 0m9.075s 0.77% +2: ^vaughan (no join) : 0.030u 0.010s 0m0.609s 6.56% +3: ^vaughan (no join) : 0.040u 0.010s 0m0.503s 9.94% +trashing +1: ^rol (no join) : 0.020u 0.030s 0m49.898s 0.10% +2: ^rol (no join) : 0.030u 0.020s 0m3.136s 1.59% +3: ^rol (no join) : 0.030u 0.020s 0m1.231s 4.06% + +TESTING ON CLUSTERED FTI +trashing +1: ^lapton and ^ric : 0.020u 0.020s 2m17.120s 0.02% +2: ^lapton and ^ric : 0.030u 0.020s 2m11.767s 0.03% +3: ^lapton and ^ric : 0.040u 0.010s 2m8.128s 0.03% +trashing +1: ^lling and ^tones : 0.020u 0.030s 0m18.179s 0.27% +2: ^lling and ^tones : 0.030u 0.010s 0m1.897s 2.10% +3: ^lling and ^tones : 0.040u 0.010s 0m1.619s 3.08% +trashing +1: ^aughan and ^evie : 0.070u 0.010s 0m11.765s 0.67% +2: ^aughan and ^evie : 0.040u 0.010s 0m1.198s 4.17% +3: ^aughan and ^evie : 0.030u 0.020s 0m0.872s 5.73% +trashing +1: ^lling : 0.040u 0.000s 0m28.623s 0.13% +2: ^lling : 0.030u 0.010s 0m2.339s 1.70% +3: ^lling : 0.030u 0.010s 0m1.975s 2.02% +trashing +1: ^stev and ^ray and ^vaugh : 0.020u 0.010s 0m17.667s 0.16% +2: ^stev and ^ray and ^vaugh : 0.030u 0.010s 0m3.745s 1.06% +3: ^stev and ^ray and ^vaugh : 0.030u 0.020s 0m3.439s 1.45% +trashing +1: ^lling (no join) : 0.020u 0.040s 0m2.218s 2.70% +2: ^lling (no join) : 0.020u 0.020s 0m0.506s 7.90% +3: ^lling (no join) : 0.030u 0.030s 0m0.510s 11.76% +trashing +1: ^vaughan (no join) : 0.040u 0.050s 0m2.048s 4.39% +2: ^vaughan (no join) : 0.030u 0.020s 0m0.332s 15.04% +3: ^vaughan (no join) : 0.040u 0.010s 0m0.318s 15.72% +trashing +1: ^rol (no join) : 0.020u 0.030s 0m2.384s 2.09% +2: ^rol (no join) : 0.020u 0.030s 0m0.676s 7.39% +3: ^rol (no join) : 0.020u 0.030s 0m0.697s 7.17% diff --git a/contrib/fulltextindex/Makefile b/contrib/fulltextindex/Makefile new file mode 100644 index 0000000000..c5986b02f7 --- /dev/null +++ b/contrib/fulltextindex/Makefile @@ -0,0 +1,24 @@ +SRCDIR= ../../src + +include $(SRCDIR)/Makefile.global + +CONTRIBDIR=$(LIBDIR)/contrib + +CFLAGS+= $(CFLAGS_SL) -I$(SRCDIR)/include + +TARGETS= fti$(DLSUFFIX) +CLEANFILES+= $(TARGETS) +CURDIR=`pwd` + +all:: $(TARGETS) + +%.sql: %.source + rm -f $@; \ + sed -e "s:_CURRENTDIR_:$(CURDIR):g" \ + -e "s:_DLSUFFIX_:$(DLSUFFIX):g" < $< > $@ + +clean: + rm -f $(TARGETS) *.o + +dist: + tar cf fti.tar README BENCH Makefile fti.c timings.sh diff --git a/contrib/fulltextindex/README b/contrib/fulltextindex/README new file mode 100644 index 0000000000..06ea57f53e --- /dev/null +++ b/contrib/fulltextindex/README @@ -0,0 +1,95 @@ +An attempt at some sort of Full Text Indexing for PostgreSQL. + +The included software is an attempt to add some sort of Full Text Indexing +support to PostgreSQL. I mean by this that we can ask questions like: + + Give me all rows that have 'still' and 'nash' in the 'artist' field. + +Ofcourse we can write this as: + + select * from cds where artist ~* 'stills' and artist ~* 'nash'; + +But this does not use any indices, and therefore, if your database +gets very large, it will not have very high performance (the above query +requires at least one sequential scan, it probably takes 2 due to the +self-join). + +The approach used by this add-on is to define a trigger on the table and +column you want to do this queries on. On every insert in the table, it +takes the value in the specified column, breaks the text in this column +up into pieces, and stores all sub-strings into another table, together +with a reference to the row in the original table that contained this +sub-string (it uses the oid of that row). + +By now creating an index over the 'fti-table', we can search for +substrings that occur in the original table. By making a join between +the fti-table and the orig-table, we can get the actual rows we want +(this can also be done by using subselects, and maybe there're other +ways too). + +As an example we take the previous query, where we assume we have all +sub-strings in the table 'cds-fti': + + select c.* + from cds c, cds-fti f1, cds-fti f2 + where f1.string ~ '^stills' and + f2.string ~ '^nash' and + f1.id = c.oid and + f2.id = c.oid ; + +We can use the ~ (case-sensitive regular expression) here, because of +the way sub-strings are built: from right to left, ie. house -> 'se' + +'use' + 'ouse' + 'house'. If a ~ search starts with a ^ (match start of +string), btree indices can be used by PostgreSQL. + +Now, how do we create the trigger that maintains the fti-table? First: the +fti-table should have the following schema: + + create cds-fti ( string varchar(N), id oid ); + +Don't change the *names* of the columns, the varchar() can in fact also +be of text-type. If you do use varchar, make sure the largest possible +sub-string will fit. + +The create the function that contains the trigger:: + + create function fti() returns opaque as '/path/to/fti.so' language 'C'; + +And finally define the trigger on the 'cds' table: + + create trigger cds-fti-trigger after update or insert or delete on cds + for each row execute procedure fti(cds-fti, artist); + +Here, the trigger will be defined on table 'cds', it will create +sub-strings from the field 'artist', and it will place those sub-strings +in the table 'cds-fti'. + +Now populate the table 'cds'. This will also populate the table 'cds-fti'. +It's fastest to populate the table *before* you create the indices. + +Before you start using the system, you should at least have the following +indices: + + create index cds-fti-idx on cds-fti (string, id); + create index cds-oid-idx on cds (oid); + +To get the most performance out of this, you should have 'cds-fti' +clustered on disk, ie. all rows with the same sub-strings should be +close to each other. There are 3 ways of doing this: + +1. After you have created the indices, execute 'cluster cds-fti-idx on cds-fti'. +2. Do a 'select * into tmp-table from cds-fti order by string' *before* + you create the indices, then 'drop table cds-fti' and + 'alter table tmp-table rename to cds-fti' +3. *Before* creating indices, dump the contents of the cds-fti table using + 'pg_dump -a -t cds-fti dbase-name', remove the \connect + from the beginning and the \. from the end, and sort it using the + UNIX 'sort' program, and reload the data. + +Method 1 is very slow, 2 a lot faster, and for very large tables, 3 is +preferred. + + + + + diff --git a/contrib/fulltextindex/fti.c b/contrib/fulltextindex/fti.c new file mode 100644 index 0000000000..1131b6078b --- /dev/null +++ b/contrib/fulltextindex/fti.c @@ -0,0 +1,381 @@ +#include "executor/spi.h" +#include "commands/trigger.h" +#include "c.h" /* endof() macro */ +#include /* tolower */ +#include /* debugging */ + +/* + * Trigger function takes 2 arguments: + 1. relation in which to store the substrings + 2. field to extract substrings from + + The relation in which to insert *must* have the following layout: + + string varchar(#) + id oid + + Example: + +create function fti() returns opaque as +'/home/boekhold/src/postgresql-6.2/contrib/fti/fti.so' language 'c'; + +create table title_fti (string varchar(25), id oid); +create index title_fti_idx on title_fti (string); + +create trigger title_fti_trigger after update or insert or delete on product +for each row execute procedure fti(title_fti, title); + ^^^^^^^^^ + where to store index in + ^^^^^ + which column to index + +ofcourse don't forget to create an index on title_idx, column string, else +you won't notice much speedup :) + +After populating 'product', try something like: + +select p.* from product p, title_fti f1, title_fti f2 where + f1.string='slippery' and f2.string='wet' and f1.id=f2.id and p.oid=f1.id; +*/ + +/* + march 4 1998 Changed breakup() to return less substrings. Only breakup + in word parts which are in turn shortened from the start + of the word (ie. word, ord, rd) + Did allocation of substring buffer outside of breakup() + oct. 5 1997, fixed a bug in string breakup (where there are more nonalpha + characters between words then 1). + + oct 4-5 1997 implemented the thing, at least the basic functionallity + of it all.... +*/ + +/* IMPROVEMENTS: + + save a plan for deletes + create a function that will make the index *after* we have populated + the main table (probably first delete all contents to be sure there's + nothing in it, then re-populate the fti-table) + + can we do something with operator overloading or a seperate function + that can build the final query automatigally? + */ + +HeapTuple fti(void); +char *breakup(char*, char*); +bool is_stopword(char*); + +bool new_tuple = false; + + +/* THIS LIST MUST BE IN SORTED ORDER, A BINARY SEARCH IS USED!!!! */ +char *StopWords[] = { /* list of words to skip in indexing */ + "no" + "the", + "yes", +}; + +/* stuff for caching query-plans, stolen from contrib/spi/\*.c */ +typedef struct +{ + char *ident; + int nplans; + void **splan; +} EPlan; + +static EPlan *InsertPlans = NULL; +static EPlan *DeletePlans = NULL; +static int nInsertPlans = 0; +static int nDeletePlans = 0; + +static EPlan *find_plan(char *ident, EPlan ** eplan, int *nplans); + +/***********************************************************************/ +HeapTuple +fti() +{ + Trigger *trigger; /* to get trigger name */ + int nargs; /* # of arguments */ + char **args; /* arguments */ + char *relname; /* triggered relation name */ + Relation rel; /* triggered relation */ + char *indexname; /* name of table for substrings */ + HeapTuple rettuple = NULL; + TupleDesc tupdesc; /* tuple description */ + bool isinsert=false; + bool isdelete=false; + int ret; + char query[8192]; + Oid oid; + /* + FILE *debug; + */ + + /* + debug = fopen("/dev/xconsole", "w"); + fprintf(debug, "FTI: entered function\n"); + fflush(debug); + */ + + if (!CurrentTriggerData) + elog(ERROR, "Full Text Indexing: triggers are not initialized"); + if (TRIGGER_FIRED_FOR_STATEMENT(CurrentTriggerData->tg_event)) + elog(ERROR, "Full Text Indexing: can't process STATEMENT events"); + if (TRIGGER_FIRED_BEFORE(CurrentTriggerData->tg_event)) + elog(ERROR, "Full Text Indexing: must be fired AFTER event"); + + if (TRIGGER_FIRED_BY_INSERT(CurrentTriggerData->tg_event)) + isinsert=true; + if (TRIGGER_FIRED_BY_UPDATE(CurrentTriggerData->tg_event)) + { isdelete=true;isinsert=true;} + if (TRIGGER_FIRED_BY_DELETE(CurrentTriggerData->tg_event)) + isdelete=true; + + trigger = CurrentTriggerData->tg_trigger; + rel = CurrentTriggerData->tg_relation; + relname = SPI_getrelname(rel); + rettuple=CurrentTriggerData->tg_trigtuple; + if (isdelete&&isinsert) /* is an UPDATE */ + rettuple=CurrentTriggerData->tg_newtuple; + + CurrentTriggerData = NULL; /* invalidate 'normal' calls to this function */ + + if ((ret = SPI_connect()) <0) + elog(ERROR,"Full Text Indexing: SPI_connect failed, returned %d\n",ret); + + nargs = trigger->tgnargs; + if (nargs != 2) + elog(ERROR, "Full Text Indexing: trigger can only have 2 arguments"); + + args = trigger->tgargs; + indexname = args[0]; + tupdesc = rel->rd_att; /* what the tuple looks like (?) */ + + /* get oid of current tuple, needed by all, so place here */ + oid = rettuple->t_oid; + if (!OidIsValid(oid)) + elog(ERROR,"Full Text Indexing: oid of current tuple is NULL"); + + if (isdelete) { + void *pplan; + Oid *argtypes; + Datum values[1]; + EPlan *plan; + + sprintf(query, "D%s$%s", args[0], args[1]); + plan = find_plan(query, &DeletePlans, &nDeletePlans); + if (plan->nplans <= 0) { + argtypes = (Oid *)palloc(sizeof(Oid)); + + argtypes[0] = OIDOID; + + sprintf(query, "DELETE FROM %s WHERE id = $1", indexname); + pplan = SPI_prepare(query, 1, argtypes); + if (!pplan) + elog(ERROR, "Full Text Indexing: SPI_prepare returned NULL " + "in delete"); + pplan = SPI_saveplan(pplan); + if (pplan == NULL) + elog(ERROR, "Full Text Indexing: SPI_saveplan returned NULL " + "in delete"); + + plan->splan = (void **)malloc(sizeof(void*)); + *(plan->splan) = pplan; + plan->nplans = 1; + } + + values[0] = oid; + + ret = SPI_execp(*(plan->splan), values, NULL, 0); + if (ret != SPI_OK_DELETE) + elog(ERROR, "Full Text Indexing: error executing plan in delete"); + } + + if (isinsert) { + char *substring, *column; + void *pplan; + Oid *argtypes; + Datum values[2]; + int colnum; + struct varlena *data; + EPlan *plan; + + sprintf(query, "I%s$%s", args[0], args[1]); + plan = find_plan(query, &InsertPlans, &nInsertPlans); + + /* no plan yet, so allocate mem for argtypes */ + if (plan->nplans <= 0) { + argtypes = (Oid *)palloc(2*sizeof(Oid)); + + argtypes[0] = VARCHAROID; /*create table t_name + (string varchar, */ + argtypes[1] = OIDOID; /* id oid); */ + + /* prepare plan to gain speed */ + sprintf(query, "INSERT INTO %s (string, id) VALUES ($1, $2)", + indexname); + pplan = SPI_prepare(query, 2, argtypes); + if (!pplan) + elog(ERROR, "Full Text Indexing: SPI_prepare returned NULL " + "in insert"); + + pplan = SPI_saveplan(pplan); + if (pplan == NULL) + elog(ERROR, "Full Text Indexing: SPI_saveplan returned NULL" + " in insert"); + + plan->splan = (void **)malloc(sizeof(void*)); + *(plan->splan) = pplan; + plan->nplans = 1; + } + + + /* prepare plan for query */ + colnum=SPI_fnumber(tupdesc, args[1]); + if (colnum == SPI_ERROR_NOATTRIBUTE) + elog(ERROR, "Full Text Indexing: column '%s' of '%s' not found", + args[1], args[0]); + + /* Get the char* representation of the column with name args[1] */ + column = SPI_getvalue(rettuple, tupdesc, colnum); + + if (column) { /* make sure we don't try to index NULL's */ + char *buff; + char *string = column; + + while(*string != '\0') { /* placed 'really' inline. */ + *string = tolower(*string); /* some compilers will choke */ + string++; /* on 'inline' keyword */ + } + + data = (struct varlena*)palloc(sizeof(int32)+strlen(column)+1); + buff = palloc(strlen(column) + 1); + /* saves lots of calls in while-loop and in breakup()*/ + + new_tuple=true; + while ((substring = breakup(column, buff))) { + int l; + + l = strlen(substring); + + data->vl_len = l+sizeof(int32); + memcpy(VARDATA(data), substring, l); + values[0] = PointerGetDatum(data); + values[1] = oid; + + ret = SPI_execp(*(plan->splan), values, NULL, 0); + if (ret != SPI_OK_INSERT) + elog(ERROR, "Full Text Indexing: error executing plan " + "in insert"); + } + pfree(buff); + pfree(data); + } + } + + SPI_finish(); + return (rettuple); +} + +char *breakup(char *string, char *substring) +{ + static char *last_start; + static char *cur_pos; + + if (new_tuple) + { + cur_pos=last_start=&string[strlen(string)-1]; + new_tuple=false; /* don't initialize this next time */ + } + + while (cur_pos > string) /* don't read before start of 'string' */ + { + /* skip pieces at the end of a string that are not + alfa-numeric (ie. 'string$%^&', last_start first points to + '&', and after this to 'g' */ + if (!isalnum((int)*last_start)) { + while (!isalnum((int)*last_start) && + last_start > string) + last_start--; + cur_pos=last_start; + } + + cur_pos--; /* substrings are at minimum 2 characters long */ + + if (isalnum((int)*cur_pos)) + { + /* Houston, we have a substring! :) */ + memcpy(substring, cur_pos, last_start - cur_pos + 1); + substring[last_start-cur_pos+1]='\0'; + if (!is_stopword(substring)) return substring; + } + else + { + last_start=cur_pos-1; + cur_pos = last_start; + } + } + + return NULL; /* we've processed all of 'string' */ +} + +/* copied from src/backend/parser/keywords.c and adjusted for our situation*/ +bool +is_stopword(char *text) +{ + char **StopLow; /* for list of stop-words */ + char **StopHigh; + char **StopMiddle; + unsigned int difference; + + StopLow = &StopWords[0]; /* initialize stuff for binary search */ + StopHigh = endof(StopWords); + + while (StopLow <= StopHigh) + { + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + difference = strcmp(*StopMiddle, text); + if (difference == 0) + return (true); + else if (difference < 0) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle - 1; + } + + return (false); +} + +/* for caching of query plans, stolen from contrib/spi/\*.c */ +static EPlan * +find_plan(char *ident, EPlan ** eplan, int *nplans) +{ + EPlan *newp; + int i; + + if (*nplans > 0) + { + for (i = 0; i < *nplans; i++) + { + if (strcmp((*eplan)[i].ident, ident) == 0) + break; + } + if (i != *nplans) + return (*eplan + i); + *eplan = (EPlan *) realloc(*eplan, (i + 1) * sizeof(EPlan)); + newp = *eplan + i; + } + else + { + newp = *eplan = (EPlan *) malloc(sizeof(EPlan)); + (*nplans) = i = 0; + } + + newp->ident = (char *) malloc(strlen(ident) + 1); + strcpy(newp->ident, ident); + newp->nplans = 0; + newp->splan = NULL; + (*nplans)++; + + return (newp); +} diff --git a/contrib/fulltextindex/timings.sh b/contrib/fulltextindex/timings.sh new file mode 100755 index 0000000000..ad1495128c --- /dev/null +++ b/contrib/fulltextindex/timings.sh @@ -0,0 +1,350 @@ +#!/bin/sh + +PATH=${PATH}:/usr/local/pgsql/bin +TIMEFORMAT="%3Uu %3Ss %lR %P%%" +export PATH TIMEFORMAT + +case "$1" in + -n) + trashing=0 + ;; + *) + trashing=1 + ;; +esac + +echo "TESTING ON UNCLUSTERED FTI" + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(p.oid) from product p, artist_fti f1, artist_fti f2 + where + f1.string ~ '^lapton' and f2.string ~ '^ric' and + f1.id=p.oid and f2.id=p.oid;" + +echo -n "1: ^lapton and ^ric : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^lapton and ^ric : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^lapton and ^ric : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(p.oid) from product p, artist_fti f1, artist_fti f2 + where + f1.string ~ '^lling' and f2.string ~ '^tones' and + f1.id=p.oid and f2.id=p.oid;" + +echo -n "1: ^lling and ^tones : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^lling and ^tones : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^lling and ^tones : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(p.oid) from product p, artist_fti f1, artist_fti f2 + where + f1.string ~ '^aughan' and f2.string ~ '^evie' and + f1.id=p.oid and f2.id=p.oid;" + +echo -n "1: ^aughan and ^evie : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^aughan and ^evie : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^aughan and ^evie : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(p.oid) from product p, artist_fti f1 + where + f1.string ~ '^lling' and + p.oid=f1.id;" + +echo -n "1: ^lling : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^lling : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^lling : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(p.oid) from product p, artist_fti f1, artist_fti f2, artist_fti f3 + where + f1.string ~ '^stev' and + f2.string ~ '^ray' and + f3.string ~ '^vaugh' and + p.oid=f1.id and p.oid=f2.id and p.oid=f3.id;" + +echo -n "1: ^stev and ^ray and ^vaugh : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^stev and ^ray and ^vaugh : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^stev and ^ray and ^vaugh : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(*) from artist_fti where string ~ '^lling';" + +echo -n "1: ^lling (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^lling (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^lling (no join) : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(*) from artist_fti where string ~ '^vaughan';" + +echo -n "1: ^vaughan (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^vaughan (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^vaughan (no join) : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(*) from artist_fti where string ~ '^rol';" + +echo -n "1: ^rol (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^rol (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^rol (no join) : " +time psql -q -n -o /dev/null -c "$Q" test + +echo +echo "TESTING ON CLUSTERED FTI" + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(p.oid) from product p, clustered f1, clustered f2 + where + f1.string ~ '^lapton' and f2.string ~ '^ric' and + f1.id=p.oid and f2.id=p.oid;" + +echo -n "1: ^lapton and ^ric : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^lapton and ^ric : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^lapton and ^ric : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(p.oid) from product p, clustered f1, clustered f2 + where + f1.string ~ '^lling' and f2.string ~ '^tones' and + f1.id=p.oid and f2.id=p.oid;" + +echo -n "1: ^lling and ^tones : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^lling and ^tones : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^lling and ^tones : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(p.oid) from product p, clustered f1, clustered f2 + where + f1.string ~ '^aughan' and f2.string ~ '^evie' and + f1.id=p.oid and f2.id=p.oid;" + +echo -n "1: ^aughan and ^evie : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^aughan and ^evie : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^aughan and ^evie : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(p.oid) from product p, clustered f1 + where + f1.string ~ '^lling' and + p.oid=f1.id;" + +echo -n "1: ^lling : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^lling : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^lling : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(p.oid) from product p, clustered f1, clustered f2, clustered f3 + where + f1.string ~ '^stev' and + f2.string ~ '^ray' and + f3.string ~ '^vaugh' and + p.oid=f1.id and p.oid=f2.id and p.oid=f3.id;" + +echo -n "1: ^stev and ^ray and ^vaugh : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^stev and ^ray and ^vaugh : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^stev and ^ray and ^vaugh : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(*) from clustered where string ~ '^lling';" + +echo -n "1: ^lling (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^lling (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^lling (no join) : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(*) from clustered where string ~ '^vaughan';" + +echo -n "1: ^vaughan (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^vaughan (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^vaughan (no join) : " +time psql -q -n -o /dev/null -c "$Q" test + +# trash disk +if [ $trashing = 1 ] +then + echo "trashing" + psql -q -n -o /dev/null -c "select count(*) from product;" test +else + echo +fi + +Q="select count(*) from clustered where string ~ '^rol';" + +echo -n "1: ^rol (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "2: ^rol (no join) : " +time psql -q -n -o /dev/null -c "$Q" test +echo -n "3: ^rol (no join) : " +time psql -q -n -o /dev/null -c "$Q" test + + + + + + + + +