2007-08-21 03:11:32 +02:00
|
|
|
--
|
|
|
|
-- Sanity checks for text search catalogs
|
|
|
|
--
|
|
|
|
-- NB: we assume the oidjoins test will have caught any dangling links,
|
|
|
|
-- that is OID or REGPROC fields that are not zero and do not match some
|
|
|
|
-- row in the linked-to table. However, if we want to enforce that a link
|
|
|
|
-- field can't be 0, we have to check it here.
|
|
|
|
-- Find unexpected zero link entries
|
|
|
|
SELECT oid, prsname
|
|
|
|
FROM pg_ts_parser
|
|
|
|
WHERE prsnamespace = 0 OR prsstart = 0 OR prstoken = 0 OR prsend = 0 OR
|
|
|
|
-- prsheadline is optional
|
|
|
|
prslextype = 0;
|
|
|
|
oid | prsname
|
|
|
|
-----+---------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
SELECT oid, dictname
|
|
|
|
FROM pg_ts_dict
|
|
|
|
WHERE dictnamespace = 0 OR dictowner = 0 OR dicttemplate = 0;
|
|
|
|
oid | dictname
|
|
|
|
-----+----------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
SELECT oid, tmplname
|
|
|
|
FROM pg_ts_template
|
|
|
|
WHERE tmplnamespace = 0 OR tmpllexize = 0; -- tmplinit is optional
|
|
|
|
oid | tmplname
|
|
|
|
-----+----------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
SELECT oid, cfgname
|
|
|
|
FROM pg_ts_config
|
|
|
|
WHERE cfgnamespace = 0 OR cfgowner = 0 OR cfgparser = 0;
|
|
|
|
oid | cfgname
|
|
|
|
-----+---------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
SELECT mapcfg, maptokentype, mapseqno
|
|
|
|
FROM pg_ts_config_map
|
|
|
|
WHERE mapcfg = 0 OR mapdict = 0;
|
|
|
|
mapcfg | maptokentype | mapseqno
|
|
|
|
--------+--------------+----------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
-- Look for pg_ts_config_map entries that aren't one of parser's token types
|
|
|
|
SELECT * FROM
|
|
|
|
( SELECT oid AS cfgid, (ts_token_type(cfgparser)).tokid AS tokid
|
2010-11-23 21:27:50 +01:00
|
|
|
FROM pg_ts_config ) AS tt
|
2007-08-21 03:11:32 +02:00
|
|
|
RIGHT JOIN pg_ts_config_map AS m
|
|
|
|
ON (tt.cfgid=m.mapcfg AND tt.tokid=m.maptokentype)
|
|
|
|
WHERE
|
|
|
|
tt.cfgid IS NULL OR tt.tokid IS NULL;
|
|
|
|
cfgid | tokid | mapcfg | maptokentype | mapseqno | mapdict
|
|
|
|
-------+-------+--------+--------------+----------+---------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
-- test basic text search behavior without indexes, then with
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
17
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
98
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
23
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
39
|
|
|
|
(1 row)
|
|
|
|
|
2008-05-16 18:31:02 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
494
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
create index wowidx on test_tsvector using gist (a);
|
|
|
|
SET enable_seqscan=OFF;
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
17
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
98
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
23
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
39
|
|
|
|
(1 row)
|
|
|
|
|
2008-05-16 18:31:02 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
494
|
|
|
|
(1 row)
|
|
|
|
|
2011-12-21 01:57:34 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
RESET enable_seqscan;
|
2007-08-21 17:41:13 +02:00
|
|
|
DROP INDEX wowidx;
|
|
|
|
CREATE INDEX wowidx ON test_tsvector USING gin (a);
|
2007-08-21 03:11:32 +02:00
|
|
|
SET enable_seqscan=OFF;
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
17
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
98
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
23
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
39
|
|
|
|
(1 row)
|
|
|
|
|
2008-05-16 18:31:02 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
494
|
|
|
|
(1 row)
|
|
|
|
|
2011-12-21 01:57:34 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
RESET enable_seqscan;
|
2007-08-21 17:41:13 +02:00
|
|
|
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
|
|
|
|
SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10;
|
2007-08-21 03:11:32 +02:00
|
|
|
word | ndoc | nentry
|
|
|
|
------+------+--------
|
|
|
|
qq | 108 | 108
|
|
|
|
qt | 102 | 102
|
|
|
|
qe | 100 | 100
|
|
|
|
qh | 98 | 98
|
|
|
|
qw | 98 | 98
|
|
|
|
qa | 97 | 97
|
|
|
|
ql | 94 | 94
|
|
|
|
qs | 94 | 94
|
|
|
|
qi | 92 | 92
|
|
|
|
qr | 92 | 92
|
|
|
|
(10 rows)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT * FROM ts_stat('SELECT a FROM test_tsvector', 'AB') ORDER BY ndoc DESC, nentry DESC, word;
|
2007-08-21 03:11:32 +02:00
|
|
|
word | ndoc | nentry
|
|
|
|
------+------+--------
|
|
|
|
DFG | 1 | 2
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
--dictionaries and to_tsvector
|
2007-08-25 03:06:25 +02:00
|
|
|
SELECT ts_lexize('english_stem', 'skies');
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{sky}
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-25 03:06:25 +02:00
|
|
|
SELECT ts_lexize('english_stem', 'identity');
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{ident}
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT * FROM ts_token_type('default');
|
2007-10-23 22:46:12 +02:00
|
|
|
tokid | alias | description
|
|
|
|
-------+-----------------+------------------------------------------
|
|
|
|
1 | asciiword | Word, all ASCII
|
|
|
|
2 | word | Word, all letters
|
|
|
|
3 | numword | Word, letters and digits
|
|
|
|
4 | email | Email address
|
|
|
|
5 | url | URL
|
|
|
|
6 | host | Host
|
|
|
|
7 | sfloat | Scientific notation
|
|
|
|
8 | version | Version number
|
|
|
|
9 | hword_numpart | Hyphenated word part, letters and digits
|
|
|
|
10 | hword_part | Hyphenated word part, all letters
|
|
|
|
11 | hword_asciipart | Hyphenated word part, all ASCII
|
|
|
|
12 | blank | Space symbols
|
2007-11-20 03:25:22 +01:00
|
|
|
13 | tag | XML tag
|
2007-10-23 22:46:12 +02:00
|
|
|
14 | protocol | Protocol head
|
|
|
|
15 | numhword | Hyphenated word, letters and digits
|
|
|
|
16 | asciihword | Hyphenated word, all ASCII
|
|
|
|
17 | hword | Hyphenated word, all letters
|
2007-10-27 18:01:09 +02:00
|
|
|
18 | url_path | URL path
|
2007-10-23 22:46:12 +02:00
|
|
|
19 | file | File or path name
|
|
|
|
20 | float | Decimal notation
|
|
|
|
21 | int | Signed integer
|
|
|
|
22 | uint | Unsigned integer
|
2007-11-20 03:25:22 +01:00
|
|
|
23 | entity | XML entity
|
2007-08-21 03:11:32 +02:00
|
|
|
(23 rows)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT * FROM ts_parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
2007-08-21 03:11:32 +02:00
|
|
|
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
|
|
|
<i <b> wow < jqw <> qwerty');
|
|
|
|
tokid | token
|
|
|
|
-------+--------------------------------------
|
|
|
|
22 | 345
|
|
|
|
12 |
|
|
|
|
1 | qwe
|
|
|
|
12 | @
|
|
|
|
19 | efd.r
|
|
|
|
12 | '
|
|
|
|
14 | http://
|
|
|
|
6 | www.com
|
|
|
|
12 | /
|
|
|
|
14 | http://
|
|
|
|
5 | aew.werc.ewr/?ad=qwe&dw
|
|
|
|
6 | aew.werc.ewr
|
|
|
|
18 | /?ad=qwe&dw
|
|
|
|
12 |
|
|
|
|
5 | 1aew.werc.ewr/?ad=qwe&dw
|
|
|
|
6 | 1aew.werc.ewr
|
|
|
|
18 | /?ad=qwe&dw
|
|
|
|
12 |
|
|
|
|
6 | 2aew.werc.ewr
|
|
|
|
12 |
|
|
|
|
14 | http://
|
|
|
|
5 | 3aew.werc.ewr/?ad=qwe&dw
|
|
|
|
6 | 3aew.werc.ewr
|
|
|
|
18 | /?ad=qwe&dw
|
|
|
|
12 |
|
|
|
|
14 | http://
|
|
|
|
6 | 4aew.werc.ewr
|
|
|
|
12 |
|
|
|
|
14 | http://
|
Modify the built-in text search parser to handle URLs more nearly according
to RFC 3986. In particular, these characters now terminate the path part
of a URL: '"', '<', '>', '\', '^', '`', '{', '|', '}'. The previous behavior
was inconsistent and depended on whether a "?" was present in the path.
Per gripe from Donald Fraser and spec research by Kevin Grittner.
This is a pre-existing bug, but not back-patching since the risks of
breaking existing applications seem to outweigh the benefits.
2010-04-28 04:04:16 +02:00
|
|
|
5 | 5aew.werc.ewr:8100/?
|
2007-08-21 03:11:32 +02:00
|
|
|
6 | 5aew.werc.ewr:8100
|
Modify the built-in text search parser to handle URLs more nearly according
to RFC 3986. In particular, these characters now terminate the path part
of a URL: '"', '<', '>', '\', '^', '`', '{', '|', '}'. The previous behavior
was inconsistent and depended on whether a "?" was present in the path.
Per gripe from Donald Fraser and spec research by Kevin Grittner.
This is a pre-existing bug, but not back-patching since the risks of
breaking existing applications seem to outweigh the benefits.
2010-04-28 04:04:16 +02:00
|
|
|
18 | /?
|
|
|
|
12 |
|
2007-08-21 03:11:32 +02:00
|
|
|
1 | ad
|
|
|
|
12 | =
|
|
|
|
1 | qwe
|
|
|
|
12 | &
|
|
|
|
1 | dw
|
|
|
|
12 |
|
|
|
|
5 | 6aew.werc.ewr:8100/?ad=qwe&dw
|
|
|
|
6 | 6aew.werc.ewr:8100
|
|
|
|
18 | /?ad=qwe&dw
|
|
|
|
12 |
|
|
|
|
5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
|
|
|
|
6 | 7aew.werc.ewr:8100
|
|
|
|
18 | /?ad=qwe&dw=%20%32
|
|
|
|
12 |
|
|
|
|
7 | +4.0e-10
|
|
|
|
12 |
|
|
|
|
1 | qwe
|
|
|
|
12 |
|
|
|
|
1 | qwe
|
|
|
|
12 |
|
|
|
|
1 | qwqwe
|
|
|
|
12 |
|
|
|
|
20 | 234.435
|
|
|
|
12 |
|
|
|
|
22 | 455
|
|
|
|
12 |
|
|
|
|
20 | 5.005
|
|
|
|
12 |
|
|
|
|
4 | teodor@stack.net
|
|
|
|
12 |
|
|
|
|
16 | qwe-wer
|
|
|
|
11 | qwe
|
|
|
|
12 | -
|
|
|
|
11 | wer
|
|
|
|
12 |
|
|
|
|
1 | asdf
|
|
|
|
12 |
|
|
|
|
13 | <fr>
|
|
|
|
1 | qwer
|
|
|
|
12 |
|
|
|
|
1 | jf
|
|
|
|
12 |
|
|
|
|
1 | sdjk
|
|
|
|
12 | <
|
|
|
|
1 | we
|
|
|
|
12 |
|
|
|
|
1 | hjwer
|
|
|
|
12 |
|
|
|
|
13 | <werrwe>
|
|
|
|
12 |
|
|
|
|
3 | ewr1
|
|
|
|
12 | >
|
|
|
|
3 | ewri2
|
|
|
|
12 |
|
|
|
|
13 | <a href="qwe<qwe>">
|
2009-11-22 06:20:41 +01:00
|
|
|
12 | +
|
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
19 | /usr/local/fff
|
|
|
|
12 |
|
|
|
|
19 | /awdf/dwqe/4325
|
|
|
|
12 |
|
|
|
|
19 | rewt/ewr
|
|
|
|
12 |
|
|
|
|
1 | wefjn
|
|
|
|
12 |
|
|
|
|
19 | /wqe-324/ewr
|
|
|
|
12 |
|
|
|
|
19 | gist.h
|
|
|
|
12 |
|
|
|
|
19 | gist.h.c
|
|
|
|
12 |
|
|
|
|
19 | gist.c
|
|
|
|
12 | .
|
|
|
|
1 | readline
|
|
|
|
12 |
|
|
|
|
20 | 4.2
|
|
|
|
12 |
|
|
|
|
20 | 4.2
|
|
|
|
12 | .
|
|
|
|
20 | 4.2
|
|
|
|
12 | ,
|
2007-10-27 21:03:45 +02:00
|
|
|
1 | readline
|
|
|
|
20 | -4.2
|
2007-08-21 03:11:32 +02:00
|
|
|
12 |
|
2007-10-27 21:03:45 +02:00
|
|
|
1 | readline
|
|
|
|
20 | -4.2
|
2007-08-21 03:11:32 +02:00
|
|
|
12 | .
|
|
|
|
22 | 234
|
2009-11-22 06:20:41 +01:00
|
|
|
12 | +
|
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
12 | <
|
|
|
|
1 | i
|
|
|
|
12 |
|
|
|
|
13 | <b>
|
|
|
|
12 |
|
|
|
|
1 | wow
|
|
|
|
12 |
|
|
|
|
12 | <
|
|
|
|
1 | jqw
|
|
|
|
12 |
|
|
|
|
12 | <>
|
|
|
|
1 | qwerty
|
Modify the built-in text search parser to handle URLs more nearly according
to RFC 3986. In particular, these characters now terminate the path part
of a URL: '"', '<', '>', '\', '^', '`', '{', '|', '}'. The previous behavior
was inconsistent and depended on whether a "?" was present in the path.
Per gripe from Donald Fraser and spec research by Kevin Grittner.
This is a pre-existing bug, but not back-patching since the risks of
breaking existing applications seem to outweigh the benefits.
2010-04-28 04:04:16 +02:00
|
|
|
(133 rows)
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
2007-08-21 03:11:32 +02:00
|
|
|
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
|
|
|
<i <b> wow < jqw <> qwerty');
|
Modify the built-in text search parser to handle URLs more nearly according
to RFC 3986. In particular, these characters now terminate the path part
of a URL: '"', '<', '>', '\', '^', '`', '{', '|', '}'. The previous behavior
was inconsistent and depended on whether a "?" was present in the path.
Per gripe from Donald Fraser and spec research by Kevin Grittner.
This is a pre-existing bug, but not back-patching since the risks of
breaking existing applications seem to outweigh the benefits.
2010-04-28 04:04:16 +02:00
|
|
|
to_tsvector
|
|
|
|
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
|
|
'+4.0e-10':28 '-4.2':60,62 '/?':18 '/?ad=qwe&dw':7,10,14,24 '/?ad=qwe&dw=%20%32':27 '/awdf/dwqe/4325':48 '/usr/local/fff':47 '/wqe-324/ewr':51 '1aew.werc.ewr':9 '1aew.werc.ewr/?ad=qwe&dw':8 '234':63 '234.435':32 '2aew.werc.ewr':11 '345':1 '3aew.werc.ewr':13 '3aew.werc.ewr/?ad=qwe&dw':12 '4.2':56,57,58 '455':33 '4aew.werc.ewr':15 '5.005':34 '5aew.werc.ewr:8100':17 '5aew.werc.ewr:8100/?':16 '6aew.werc.ewr:8100':23 '6aew.werc.ewr:8100/?ad=qwe&dw':22 '7aew.werc.ewr:8100':26 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':25 'ad':19 'aew.werc.ewr':6 'aew.werc.ewr/?ad=qwe&dw':5 'asdf':39 'dw':21 'efd.r':3 'ewr1':45 'ewri2':46 'gist.c':54 'gist.h':52 'gist.h.c':53 'hjwer':44 'jf':41 'jqw':66 'qwe':2,20,29,30,37 'qwe-wer':36 'qwer':40 'qwerti':67 'qwqwe':31 'readlin':55,59,61 'rewt/ewr':49 'sdjk':42 'teodor@stack.net':35 'wefjn':50 'wer':38 'wow':65 'www.com':4
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
2007-08-21 03:11:32 +02:00
|
|
|
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
|
|
|
<i <b> wow < jqw <> qwerty'));
|
|
|
|
length
|
|
|
|
--------
|
Modify the built-in text search parser to handle URLs more nearly according
to RFC 3986. In particular, these characters now terminate the path part
of a URL: '"', '<', '>', '\', '^', '`', '{', '|', '}'. The previous behavior
was inconsistent and depended on whether a "?" was present in the path.
Per gripe from Donald Fraser and spec research by Kevin Grittner.
This is a pre-existing bug, but not back-patching since the risks of
breaking existing applications seem to outweigh the benefits.
2010-04-28 04:04:16 +02:00
|
|
|
53
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-11-25 16:37:11 +01:00
|
|
|
-- ts_debug
|
2008-01-13 22:17:46 +01:00
|
|
|
SELECT * from ts_debug('english', '<myns:foo-bar_baz.blurfl>abc&nm1;def©ghiõjkl</myns:foo-bar_baz.blurfl>');
|
2007-11-25 16:37:11 +01:00
|
|
|
alias | description | token | dictionaries | dictionary | lexemes
|
|
|
|
-----------+-----------------+----------------------------+----------------+--------------+---------
|
|
|
|
tag | XML tag | <myns:foo-bar_baz.blurfl> | {} | |
|
|
|
|
asciiword | Word, all ASCII | abc | {english_stem} | english_stem | {abc}
|
|
|
|
entity | XML entity | &nm1; | {} | |
|
|
|
|
asciiword | Word, all ASCII | def | {english_stem} | english_stem | {def}
|
|
|
|
entity | XML entity | © | {} | |
|
|
|
|
asciiword | Word, all ASCII | ghi | {english_stem} | english_stem | {ghi}
|
|
|
|
entity | XML entity | õ | {} | |
|
|
|
|
asciiword | Word, all ASCII | jkl | {english_stem} | english_stem | {jkl}
|
|
|
|
tag | XML tag | </myns:foo-bar_baz.blurfl> | {} | |
|
|
|
|
(9 rows)
|
|
|
|
|
Modify the built-in text search parser to handle URLs more nearly according
to RFC 3986. In particular, these characters now terminate the path part
of a URL: '"', '<', '>', '\', '^', '`', '{', '|', '}'. The previous behavior
was inconsistent and depended on whether a "?" was present in the path.
Per gripe from Donald Fraser and spec research by Kevin Grittner.
This is a pre-existing bug, but not back-patching since the risks of
breaking existing applications seem to outweigh the benefits.
2010-04-28 04:04:16 +02:00
|
|
|
-- check parsing of URLs
|
|
|
|
SELECT * from ts_debug('english', 'http://www.harewoodsolutions.co.uk/press.aspx</span>');
|
|
|
|
alias | description | token | dictionaries | dictionary | lexemes
|
|
|
|
----------+---------------+----------------------------------------+--------------+------------+------------------------------------------
|
|
|
|
protocol | Protocol head | http:// | {} | |
|
|
|
|
url | URL | www.harewoodsolutions.co.uk/press.aspx | {simple} | simple | {www.harewoodsolutions.co.uk/press.aspx}
|
|
|
|
host | Host | www.harewoodsolutions.co.uk | {simple} | simple | {www.harewoodsolutions.co.uk}
|
|
|
|
url_path | URL path | /press.aspx | {simple} | simple | {/press.aspx}
|
|
|
|
tag | XML tag | </span> | {} | |
|
|
|
|
(5 rows)
|
|
|
|
|
|
|
|
SELECT * from ts_debug('english', 'http://aew.wer0c.ewr/id?ad=qwe&dw<span>');
|
|
|
|
alias | description | token | dictionaries | dictionary | lexemes
|
|
|
|
----------+---------------+----------------------------+--------------+------------+------------------------------
|
|
|
|
protocol | Protocol head | http:// | {} | |
|
|
|
|
url | URL | aew.wer0c.ewr/id?ad=qwe&dw | {simple} | simple | {aew.wer0c.ewr/id?ad=qwe&dw}
|
|
|
|
host | Host | aew.wer0c.ewr | {simple} | simple | {aew.wer0c.ewr}
|
|
|
|
url_path | URL path | /id?ad=qwe&dw | {simple} | simple | {/id?ad=qwe&dw}
|
|
|
|
tag | XML tag | <span> | {} | |
|
|
|
|
(5 rows)
|
|
|
|
|
|
|
|
SELECT * from ts_debug('english', 'http://5aew.werc.ewr:8100/?');
|
|
|
|
alias | description | token | dictionaries | dictionary | lexemes
|
|
|
|
----------+---------------+----------------------+--------------+------------+------------------------
|
|
|
|
protocol | Protocol head | http:// | {} | |
|
|
|
|
url | URL | 5aew.werc.ewr:8100/? | {simple} | simple | {5aew.werc.ewr:8100/?}
|
|
|
|
host | Host | 5aew.werc.ewr:8100 | {simple} | simple | {5aew.werc.ewr:8100}
|
|
|
|
url_path | URL path | /? | {simple} | simple | {/?}
|
|
|
|
(4 rows)
|
|
|
|
|
|
|
|
SELECT * from ts_debug('english', '5aew.werc.ewr:8100/?xx');
|
|
|
|
alias | description | token | dictionaries | dictionary | lexemes
|
|
|
|
----------+-------------+------------------------+--------------+------------+--------------------------
|
|
|
|
url | URL | 5aew.werc.ewr:8100/?xx | {simple} | simple | {5aew.werc.ewr:8100/?xx}
|
|
|
|
host | Host | 5aew.werc.ewr:8100 | {simple} | simple | {5aew.werc.ewr:8100}
|
|
|
|
url_path | URL path | /?xx | {simple} | simple | {/?xx}
|
|
|
|
(3 rows)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
-- to_tsquery
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', 'qwe & sKies ');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
---------------
|
|
|
|
'qwe' & 'sky'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('simple', 'qwe & sKies ');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
-----------------
|
|
|
|
'qwe' & 'skies'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', '''the wether'':dc & '' sKies '':BC ');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
------------------------
|
|
|
|
'wether':CD & 'sky':BC
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', 'asd&(and|fghj)');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
----------------
|
|
|
|
'asd' & 'fghj'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', '(asd&and)|fghj');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
----------------
|
|
|
|
'asd' | 'fghj'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', '(asd&!and)|fghj');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
----------------
|
|
|
|
'asd' | 'fghj'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', '(the|and&(i&1))&fghj');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
--------------
|
|
|
|
'1' & 'fghj'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('english', 'the and z 1))& fghj');
|
2007-08-21 03:11:32 +02:00
|
|
|
plainto_tsquery
|
|
|
|
--------------------
|
|
|
|
'z' & '1' & 'fghj'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('english', 'foo bar') && plainto_tsquery('english', 'asd');
|
2007-08-21 03:11:32 +02:00
|
|
|
?column?
|
|
|
|
-----------------------
|
|
|
|
'foo' & 'bar' & 'asd'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd fg');
|
2007-08-21 03:11:32 +02:00
|
|
|
?column?
|
|
|
|
------------------------------
|
|
|
|
'foo' & 'bar' | 'asd' & 'fg'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg');
|
2007-08-21 03:11:32 +02:00
|
|
|
?column?
|
|
|
|
-----------------------------------
|
|
|
|
'foo' & 'bar' | !( 'asd' & 'fg' )
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg';
|
2007-08-21 03:11:32 +02:00
|
|
|
?column?
|
|
|
|
----------------------------------
|
|
|
|
'foo' & 'bar' & ( 'asd' | 'fg' )
|
|
|
|
(1 row)
|
|
|
|
|
2007-12-09 22:01:18 +01:00
|
|
|
SELECT ts_rank_cd(to_tsvector('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
'), to_tsquery('english', 'paint&water'));
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rank_cd
|
|
|
|
------------
|
2007-12-09 22:01:18 +01:00
|
|
|
0.05
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_rank_cd(to_tsvector('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
'), to_tsquery('english', 'breath&motion&water'));
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rank_cd
|
|
|
|
------------
|
2007-12-09 22:01:18 +01:00
|
|
|
0.00833333
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_rank_cd(to_tsvector('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
'), to_tsquery('english', 'ocean'));
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rank_cd
|
|
|
|
------------
|
2007-12-09 22:01:18 +01:00
|
|
|
0.1
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
--headline tests
|
2007-12-09 22:01:18 +01:00
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
', to_tsquery('english', 'paint&water'));
|
|
|
|
ts_headline
|
|
|
|
-----------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
<b>painted</b> Ocean. +
|
|
|
|
<b>Water</b>, <b>water</b>, every where+
|
|
|
|
And all the boards did shrink; +
|
2007-12-09 22:01:18 +01:00
|
|
|
<b>Water</b>, <b>water</b>, every
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
', to_tsquery('english', 'breath&motion&water'));
|
|
|
|
ts_headline
|
|
|
|
----------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
<b>breath</b> nor <b>motion</b>,+
|
|
|
|
As idle as a painted Ship +
|
|
|
|
Upon a painted Ocean. +
|
2007-12-09 22:01:18 +01:00
|
|
|
<b>Water</b>, <b>water</b>
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
', to_tsquery('english', 'ocean'));
|
|
|
|
ts_headline
|
|
|
|
----------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
<b>Ocean</b>. +
|
|
|
|
Water, water, every where +
|
|
|
|
And all the boards did shrink;+
|
2007-12-09 22:01:18 +01:00
|
|
|
Water, water, every where
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT ts_headline('english', '
|
2007-08-21 03:11:32 +02:00
|
|
|
<html>
|
|
|
|
<!-- some comment -->
|
|
|
|
<body>
|
|
|
|
Sea view wow <u>foo bar</u> <i>qq</i>
|
|
|
|
<a href="http://www.google.com/foo.bar.html" target="_blank">YES </a>
|
|
|
|
ff-bg
|
|
|
|
<script>
|
|
|
|
document.write(15);
|
|
|
|
</script>
|
|
|
|
</body>
|
|
|
|
</html>',
|
|
|
|
to_tsquery('english', 'sea&foo'), 'HighlightAll=true');
|
|
|
|
ts_headline
|
|
|
|
-----------------------------------------------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
+
|
|
|
|
<html> +
|
|
|
|
<!-- some comment --> +
|
|
|
|
<body> +
|
|
|
|
<b>Sea</b> view wow <u><b>foo</b> bar</u> <i>qq</i> +
|
|
|
|
<a href="http://www.google.com/foo.bar.html" target="_blank">YES </a>+
|
|
|
|
ff-bg +
|
|
|
|
<script> +
|
|
|
|
document.write(15); +
|
|
|
|
</script> +
|
|
|
|
</body> +
|
2007-08-21 03:11:32 +02:00
|
|
|
</html>
|
|
|
|
(1 row)
|
|
|
|
|
2010-11-23 21:27:50 +01:00
|
|
|
--Check if headline fragments work
|
2008-10-17 20:05:19 +02:00
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'ocean'), 'MaxFragments=1');
|
|
|
|
ts_headline
|
|
|
|
------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
after day, +
|
|
|
|
We stuck, nor breath nor motion,+
|
|
|
|
As idle as a painted Ship +
|
|
|
|
Upon a painted <b>Ocean</b>. +
|
|
|
|
Water, water, every where +
|
|
|
|
And all the boards did shrink; +
|
|
|
|
Water, water, every where, +
|
2008-10-17 20:05:19 +02:00
|
|
|
Nor any drop
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
--Check if more than one fragments are displayed
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'Coleridge & stuck'), 'MaxFragments=2');
|
|
|
|
ts_headline
|
|
|
|
----------------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
after day, day after day, +
|
|
|
|
We <b>stuck</b>, nor breath nor motion, +
|
|
|
|
As idle as a painted Ship +
|
|
|
|
Upon a painted Ocean. +
|
|
|
|
Water, water, every where +
|
|
|
|
And all the boards did shrink; +
|
|
|
|
Water, water, every where ... drop to drink.+
|
2008-10-17 20:05:19 +02:00
|
|
|
S. T. <b>Coleridge</b>
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
--Fragments when there all query words are not in the document
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'ocean & seahorse'), 'MaxFragments=1');
|
|
|
|
ts_headline
|
|
|
|
------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
+
|
|
|
|
Day after day, day after day, +
|
|
|
|
We stuck, nor breath nor motion,+
|
2008-10-17 20:05:19 +02:00
|
|
|
As idle as
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
--FragmentDelimiter option
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'Coleridge & stuck'), 'MaxFragments=2,FragmentDelimiter=***');
|
|
|
|
ts_headline
|
|
|
|
--------------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
after day, day after day, +
|
|
|
|
We <b>stuck</b>, nor breath nor motion, +
|
|
|
|
As idle as a painted Ship +
|
|
|
|
Upon a painted Ocean. +
|
|
|
|
Water, water, every where +
|
|
|
|
And all the boards did shrink; +
|
|
|
|
Water, water, every where***drop to drink.+
|
2008-10-17 20:05:19 +02:00
|
|
|
S. T. <b>Coleridge</b>
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
--Rewrite sub system
|
2007-08-21 17:41:13 +02:00
|
|
|
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
|
2007-08-21 03:11:32 +02:00
|
|
|
\set ECHO none
|
2007-08-21 17:41:13 +02:00
|
|
|
ALTER TABLE test_tsquery ADD COLUMN keyword tsquery;
|
|
|
|
UPDATE test_tsquery SET keyword = to_tsquery('english', txtkeyword);
|
|
|
|
ALTER TABLE test_tsquery ADD COLUMN sample tsquery;
|
|
|
|
UPDATE test_tsquery SET sample = to_tsquery('english', txtsample::text);
|
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new & york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new & york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
2
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new & york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
3
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new & york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
2
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
CREATE UNIQUE INDEX bt_tsq ON test_tsquery (keyword);
|
2007-08-21 03:11:32 +02:00
|
|
|
SET enable_seqscan=OFF;
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new & york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new & york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
2
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new & york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
3
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new & york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
2
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
RESET enable_seqscan;
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT ts_rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
----------------------------------------------------------------------------------
|
|
|
|
'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) )
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT ts_rewrite('moscow', 'SELECT keyword, sample FROM test_tsquery'::text );
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT ts_rewrite('moscow & hotel', 'SELECT keyword, sample FROM test_tsquery'::text );
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-23 03:44:40 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite('bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery'::text );
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery');
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery');
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-24 04:24:49 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( 'bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery');
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
----------------
|
|
|
|
'new' & 'york'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
----------
|
|
|
|
'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'new';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
---------
|
|
|
|
(0 rows)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'moscow';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
----------
|
|
|
|
'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-24 04:24:49 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-24 04:24:49 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
CREATE INDEX qq ON test_tsquery USING gist (keyword tsquery_ops);
|
2007-08-21 03:11:32 +02:00
|
|
|
SET enable_seqscan=OFF;
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
----------------
|
|
|
|
'new' & 'york'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
----------
|
|
|
|
'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'new';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
---------
|
|
|
|
(0 rows)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'moscow';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
----------
|
|
|
|
'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-24 04:24:49 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-24 04:24:49 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
RESET enable_seqscan;
|
|
|
|
--test GUC
|
2007-08-21 17:41:13 +02:00
|
|
|
SET default_text_search_config=simple;
|
|
|
|
SELECT to_tsvector('SKIES My booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsvector
|
|
|
|
----------------------------
|
2008-05-16 18:31:02 +02:00
|
|
|
'books':3 'my':2 'skies':1
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('SKIES My booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
plainto_tsquery
|
|
|
|
--------------------------
|
|
|
|
'skies' & 'my' & 'books'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('SKIES & My | booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
--------------------------
|
|
|
|
'skies' & 'my' | 'books'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SET default_text_search_config=english;
|
|
|
|
SELECT to_tsvector('SKIES My booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsvector
|
|
|
|
------------------
|
2008-05-16 18:31:02 +02:00
|
|
|
'book':3 'sky':1
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('SKIES My booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
plainto_tsquery
|
|
|
|
-----------------
|
|
|
|
'sky' & 'book'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('SKIES & My | booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
----------------
|
|
|
|
'sky' | 'book'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
--trigger
|
|
|
|
CREATE TRIGGER tsvectorupdate
|
|
|
|
BEFORE UPDATE OR INSERT ON test_tsvector
|
|
|
|
FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger(a, 'pg_catalog.english', t);
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
2009-05-19 04:48:26 +02:00
|
|
|
-- test finding items in GIN's pending list
|
|
|
|
create temp table pendtest (ts tsvector);
|
|
|
|
create index pendtest_idx on pendtest using gin(ts);
|
|
|
|
insert into pendtest values (to_tsvector('Lore ipsam'));
|
|
|
|
insert into pendtest values (to_tsvector('Lore ipsum'));
|
|
|
|
select * from pendtest where 'ipsu:*'::tsquery @@ ts;
|
|
|
|
ts
|
|
|
|
--------------------
|
|
|
|
'ipsum':2 'lore':1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select * from pendtest where 'ipsa:*'::tsquery @@ ts;
|
|
|
|
ts
|
|
|
|
--------------------
|
|
|
|
'ipsam':2 'lore':1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select * from pendtest where 'ips:*'::tsquery @@ ts;
|
|
|
|
ts
|
|
|
|
--------------------
|
|
|
|
'ipsam':2 'lore':1
|
|
|
|
'ipsum':2 'lore':1
|
|
|
|
(2 rows)
|
|
|
|
|
|
|
|
select * from pendtest where 'ipt:*'::tsquery @@ ts;
|
|
|
|
ts
|
|
|
|
----
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
select * from pendtest where 'ipi:*'::tsquery @@ ts;
|
|
|
|
ts
|
|
|
|
----
|
|
|
|
(0 rows)
|
|
|
|
|