diff --git a/contrib/fulltextindex/fticopy b/contrib/fulltextindex/fticopy new file mode 100644 index 0000000000..02bf057e94 --- /dev/null +++ b/contrib/fulltextindex/fticopy @@ -0,0 +1,204 @@ +#!/usr/bin/perl +# +# This script substracts all substrings out of a specific column in a table +# and generates output that can be loaded into a new table with the +# psql '\copy' command. The new table should have the following structure: +# +# create table tab ( +# string text, +# id oid +# ); +# +# Note that you cannot use 'copy' (the SQL-command) directly, because +# there's no '\.' included at the end of the output. +# +# The output can be fed through the UNIX commands 'uniq' and 'sort' +# to generate the smallest and sorted output to populate the fti-table. +# +# Example: +# +# fti.pl -u -d mydb -t mytable -c mycolumn -f myfile +# sort -o myoutfile myfile +# uniq myoutfile sorted-file +# +# psql -u mydb +# +# \copy my_fti_table from myfile +# +# create index fti_idx on my_fti_table (string,id); +# +# create function fti() returns opaque as +# '/path/to/fti/file/fti.so' +# language 'C'; +# +# create trigger my_fti_trigger after update or insert or delete +# on mytable +# for each row execute procedure fti(my_fti_table, mycolumn); +# +# Make sure you have an index on mytable(oid) to be able to do somewhat +# efficient substring searches. + +#use lib '/usr/local/pgsql/lib/perl5/'; +use lib '/mnt/web/guide/postgres/lib/perl5/site_perl'; +use Pg; +use Getopt::Std; + +$PGRES_EMPTY_QUERY = 0 ; +$PGRES_COMMAND_OK = 1 ; +$PGRES_TUPLES_OK = 2 ; +$PGRES_COPY_OUT = 3 ; +$PGRES_COPY_IN = 4 ; +$PGRES_BAD_RESPONSE = 5 ; +$PGRES_NONFATAL_ERROR = 6 ; +$PGRES_FATAL_ERROR = 7 ; + +$[ = 0; # make sure string offsets start at 0 + +sub break_up { + my $string = pop @_; + + @strings = split(/\W+/, $string); + @subs = (); + + foreach $s (@strings) { + $len = length($s); + next if ($len < 4); + + $lpos = $len-1; + while ($lpos >= 3) { + $fpos = $lpos - 3; + while ($fpos >= 0) { + $sub = substr($s, $fpos, $lpos - $fpos + 1); + push(@subs, $sub); + $fpos = $fpos - 1; + } + $lpos = $lpos - 1; + } + } + + return @subs; +} + +sub connect_db { + my $dbname = shift @_; + my $user = shift @_; + my $passwd = shift @_; + + if (!defined($dbname) || $dbname eq "") { + return 1; + } + $connect_string = "dbname=$dbname"; + + if ($user ne "") { + if ($passwd eq "") { + return 0; + } + $connect_string = "$connect_string user=$user password=$passwd ". + "authtype=password"; + } + + $PG_CONN = PQconnectdb($connect_string); + + if (PQstatus($PG_CONN)) { + print STDERR "Couldn't make connection with database!\n"; + print STDERR PQerrorMessage($PG_CONN), "\n"; + return 0; + } + + return 1; +} + +sub quit_prog { + close(OUT); + unlink $opt_f; + if (defined($PG_CONN)) { + PQfinish($PG_CONN); + } + exit 1; +} + +sub get_username { + print "Username: "; + chop($n = ); + + return $n;; +} + +sub get_password { + print "Password: "; + + system("stty -echo < /dev/tty"); + chop($pwd = ); + print "\n"; + system("stty echo < /dev/tty"); + + return $pwd; +} + +sub main { + getopts('d:t:c:f:u'); + + if (!$opt_d || !$opt_t || !$opt_c || !$opt_f) { + print STDERR "usage: $0 [-u] -d database -t table -c column ". + "-f output-file\n"; + return 1; + } + + if (defined($opt_u)) { + $uname = get_username(); + $pwd = get_password(); + } else { + $uname = ""; + $pwd = ""; + } + + $SIG{'INT'} = 'quit_prog'; + if (!connect_db($opt_d, $uname, $pwd)) { + print STDERR "Connecting to database failed!\n"; + return 1; + } + + if (!open(OUT, ">$opt_f")) { + print STDERR "Couldnt' open file '$opt_f' for output!\n"; + return 1; + } + + PQexec($PG_CONN, "begin"); + + $query = "declare C cursor for select $opt_c, oid from $opt_t"; + $res = PQexec($PG_CONN, $query); + if (!$res || (PQresultStatus($res) != $PGRES_COMMAND_OK)) { + print STDERR "Error declaring cursor!\n"; + print STDERR PQerrorMessage($PG_CONN), "\n"; + PQfinish($PG_CONN); + return 1; + } + PQclear($res); + + $query = "fetch in C"; + while (($res = PQexec($PG_CONN, $query)) && + (PQresultStatus($res) == $PGRES_TUPLES_OK) && + (PQntuples($res) == 1)) { + $col = PQgetvalue($res, 0, 0); + $oid = PQgetvalue($res, 0, 1); + + @subs = break_up($col); + foreach $i (@subs) { + print OUT "$i\t$oid\n"; + } + } + + if (!$res || (PQresultStatus($res) != PGRES_TUPLES_OK)) { + print STDERR "Error retrieving data from backend!\n"; + print STDERR PQerrorMEssage($PG_CONN), "\n"; + PQfinish($PG_CONN); + return 1; + } + + PQclear($res); + PQfinish($PG_CONN); + + return 0; +} + +exit main();