From 4069d48aa763bae9fad9b642276959e4749370fd Mon Sep 17 00:00:00 2001 From: Jan Wieck Date: Thu, 25 Nov 1999 01:28:07 +0000 Subject: [PATCH] Added another single byte oriented decompressor, useful for comparision functions. Added all lztext comparision functions, operators and a default operator class for nbtree on lztext. Jan --- src/backend/utils/adt/lztext.c | 145 +++++++++++++++++++++- src/backend/utils/adt/pg_lzcompress.c | 166 +++++++++++++++++++++++++- src/include/catalog/pg_amop.h | 12 +- src/include/catalog/pg_amproc.h | 3 +- src/include/catalog/pg_opclass.h | 4 +- src/include/catalog/pg_operator.h | 10 +- src/include/catalog/pg_proc.h | 16 ++- src/include/utils/builtins.h | 9 +- src/include/utils/pg_lzcompress.h | 79 +++++++++++- 9 files changed, 435 insertions(+), 9 deletions(-) diff --git a/src/backend/utils/adt/lztext.c b/src/backend/utils/adt/lztext.c index 6ff128515b..ef31094fb5 100644 --- a/src/backend/utils/adt/lztext.c +++ b/src/backend/utils/adt/lztext.c @@ -1,7 +1,7 @@ /* ---------- * lztext.c - * - * $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/lztext.c,v 1.3 1999/11/24 03:45:12 ishii Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/lztext.c,v 1.4 1999/11/25 01:28:04 wieck Exp $ * * Text type with internal LZ compressed representation. Uses the * standard PostgreSQL compression method. @@ -290,3 +290,146 @@ lztext_text(lztext *lz) } +/* ---------- + * lztext_cmp - + * + * Comparision function for two lztext datum's. + * + * Returns -1, 0 or 1. + * ---------- + */ +int32 +lztext_cmp(lztext *lz1, lztext *lz2) +{ +#ifdef USE_LOCALE + + char *cp1; + char *cp2; + int result; + + if (lz1 == NULL || lz2 == NULL) + return (int32)0; + + cp1 = lztextout(lz1); + cp2 = lztextout(lz2); + + result = strcoll(cp1, cp2); + + pfree(cp1); + pfree(cp2); + + return result; + +#else /* !USE_LOCALE */ + + PGLZ_DecompState ds1; + PGLZ_DecompState ds2; + int c1; + int c2; + int32 result = (int32)0; + + if (lz1 == NULL || lz2 == NULL) + return (int32)0; + + pglz_decomp_init(&ds1, lz1); + pglz_decomp_init(&ds2, lz2); + + for(;;) + { + c1 = pglz_decomp_getchar(&ds1); + c2 = pglz_decomp_getchar(&ds2); + + if (c1 == EOF) + { + if (c2 != EOF) + result = (int32)-1; + break; + } else { + if (c2 == EOF) + { + result = (int32)1; + } + } + if (c1 != c2) + { + result = (int32)(c1 - c2); + break; + } + } + + pglz_decomp_end(&ds1); + pglz_decomp_end(&ds2); + + return result; + +#endif /* USE_LOCALE */ +} + + +/* ---------- + * lztext_eq ... - + * + * =, !=, >, >=, < and <= operator functions for two + * lztext datums. + * ---------- + */ +bool +lztext_eq(lztext *lz1, lztext *lz2) +{ + if (lz1 == NULL || lz2 == NULL) + return false; + + return (bool)(lztext_cmp(lz1, lz2) == 0); +} + + +bool +lztext_ne(lztext *lz1, lztext *lz2) +{ + if (lz1 == NULL || lz2 == NULL) + return false; + + return (bool)(lztext_cmp(lz1, lz2) != 0); +} + + +bool +lztext_gt(lztext *lz1, lztext *lz2) +{ + if (lz1 == NULL || lz2 == NULL) + return false; + + return (bool)(lztext_cmp(lz1, lz2) > 0); +} + + +bool +lztext_ge(lztext *lz1, lztext *lz2) +{ + if (lz1 == NULL || lz2 == NULL) + return false; + + return (bool)(lztext_cmp(lz1, lz2) >= 0); +} + + +bool +lztext_lt(lztext *lz1, lztext *lz2) +{ + if (lz1 == NULL || lz2 == NULL) + return false; + + return (bool)(lztext_cmp(lz1, lz2) < 0); +} + + +bool +lztext_le(lztext *lz1, lztext *lz2) +{ + if (lz1 == NULL || lz2 == NULL) + return false; + + return (bool)(lztext_cmp(lz1, lz2) <= 0); +} + + diff --git a/src/backend/utils/adt/pg_lzcompress.c b/src/backend/utils/adt/pg_lzcompress.c index b3eb5e2a01..c35568e959 100644 --- a/src/backend/utils/adt/pg_lzcompress.c +++ b/src/backend/utils/adt/pg_lzcompress.c @@ -1,7 +1,7 @@ /* ---------- * pg_lzcompress.c - * - * $Header: /cvsroot/pgsql/src/backend/utils/adt/pg_lzcompress.c,v 1.2 1999/11/17 22:18:45 wieck Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/pg_lzcompress.c,v 1.3 1999/11/25 01:28:04 wieck Exp $ * * This is an implementation of LZ compression for PostgreSQL. * It uses a simple history table and generates 2-3 byte tags @@ -671,3 +671,167 @@ pglz_decompress (PGLZ_Header *source, char *dest) } +/* ---------- + * pglz_get_next_decomp_char_from_lzdata - + * + * Reads the next character from a decompression state if the + * input data to pglz_decomp_init() was in compressed format. + * ---------- + */ +int +pglz_get_next_decomp_char_from_lzdata(PGLZ_DecompState *dstate) +{ + unsigned char retval; + + if (dstate->tocopy > 0) + { + /* ---------- + * Copy one byte from output to output until we did it + * for the length specified by the last tag. Return that + * byte. + * ---------- + */ + dstate->tocopy--; + return (*(dstate->cp_out++) = *(dstate->cp_copy++)); + } + + if (dstate->ctrl_count == 0) + { + /* ---------- + * Get the next control byte if we need to, but check + * for EOF before. + * ---------- + */ + if (dstate->cp_in == dstate->cp_end) + { + return EOF; + } + + /* ---------- + * This decompression method saves time only, if we stop near + * the beginning of the data (maybe because we're called by a + * comparision function and a difference occurs early). Otherwise, + * all the checks, needed here, cause too much overhead. + * + * Thus we decompress the entire rest at once into the temporary + * buffer and change the decomp state to return the prepared + * data from the buffer by the more simple calls to + * pglz_get_next_decomp_char_from_plain(). + * ---------- + */ + if (dstate->cp_out - dstate->temp_buf >= 256) + { + unsigned char *cp_in = dstate->cp_in; + unsigned char *cp_out = dstate->cp_out; + unsigned char *cp_end = dstate->cp_end; + unsigned char *cp_copy; + unsigned char ctrl; + int off; + int len; + int i; + + while (cp_in < cp_end) + { + ctrl = *cp_in++; + + for (i = 0; i < 8; i++) + { + if (cp_in == cp_end) + break; + + if (ctrl & 0x01) + { + len = (cp_in[0] & 0x0f) + 3; + off = ((cp_in[0] & 0xf0) << 4) | cp_in[1]; + cp_in += 2; + if (len == 18) + len += *cp_in++; + + cp_copy = cp_out - off; + while(len--) + *cp_out++ = *cp_copy++; + } else { + *cp_out++ = *cp_in++; + } + ctrl >>= 1; + } + } + + dstate->cp_in = dstate->cp_out; + dstate->cp_end = cp_out; + dstate->next_char = pglz_get_next_decomp_char_from_plain; + + return (int)(*(dstate->cp_in++)); + } + + /* ---------- + * Not yet, get next control byte into decomp state. + * ---------- + */ + dstate->ctrl = (unsigned char)(*(dstate->cp_in++)); + dstate->ctrl_count = 8; + } + + /* ---------- + * Check for EOF in tag/literal byte data. + * ---------- + */ + if (dstate->cp_in == dstate->cp_end) + { + return EOF; + } + + /* ---------- + * Handle next control bit. + * ---------- + */ + dstate->ctrl_count--; + if (dstate->ctrl & 0x01) + { + /* ---------- + * Bit is set, so tag is following. Setup copy information + * and do the copy for the first byte as above. + * ---------- + */ + int off; + + dstate->tocopy = (dstate->cp_in[0] & 0x0f) + 3; + off = ((dstate->cp_in[0] & 0xf0) << 4) | dstate->cp_in[1]; + dstate->cp_in += 2; + if (dstate->tocopy == 18) + dstate->tocopy += *(dstate->cp_in++); + dstate->cp_copy = dstate->cp_out - off; + + dstate->tocopy--; + retval = (*(dstate->cp_out++) = *(dstate->cp_copy++)); + } else { + /* ---------- + * Bit is unset, so literal byte follows. + * ---------- + */ + retval = (int)(*(dstate->cp_out++) = *(dstate->cp_in++)); + } + dstate->ctrl >>= 1; + + return (int)retval; +} + + +/* ---------- + * pglz_get_next_decomp_char_from_plain - + * + * The input data to pglz_decomp_init() was stored in uncompressed + * format. So we don't have a temporary output buffer and simply + * return bytes from the input until EOF. + * ---------- + */ +int +pglz_get_next_decomp_char_from_plain(PGLZ_DecompState *dstate) +{ + if (dstate->cp_in >= dstate->cp_end) + return EOF; + + return (int)(*(dstate->cp_in++)); +} + + diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h index 0cbaac5322..ef499d7afd 100644 --- a/src/include/catalog/pg_amop.h +++ b/src/include/catalog/pg_amop.h @@ -7,7 +7,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: pg_amop.h,v 1.24 1999/09/29 21:13:30 wieck Exp $ + * $Id: pg_amop.h,v 1.25 1999/11/25 01:28:05 wieck Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -348,6 +348,16 @@ DATA(insert OID = 0 ( 403 1768 1752 3 btreesel btreenpage )); DATA(insert OID = 0 ( 403 1768 1757 4 btreesel btreenpage )); DATA(insert OID = 0 ( 403 1768 1756 5 btreesel btreenpage )); +/* + * nbtree lztext + */ + +DATA(insert OID = 0 ( 403 1663 1659 1 btreesel btreenpage )); +DATA(insert OID = 0 ( 403 1663 1660 2 btreesel btreenpage )); +DATA(insert OID = 0 ( 403 1663 1657 3 btreesel btreenpage )); +DATA(insert OID = 0 ( 403 1663 1662 4 btreesel btreenpage )); +DATA(insert OID = 0 ( 403 1663 1661 5 btreesel btreenpage )); + /* * hash table _ops */ diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h index 45d1b28587..8adee475b0 100644 --- a/src/include/catalog/pg_amproc.h +++ b/src/include/catalog/pg_amproc.h @@ -9,7 +9,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: pg_amproc.h,v 1.15 1999/09/29 21:13:30 wieck Exp $ + * $Id: pg_amproc.h,v 1.16 1999/11/25 01:28:05 wieck Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -97,6 +97,7 @@ DATA(insert OID = 0 (403 1313 1315 1)); DATA(insert OID = 0 (403 810 836 1)); DATA(insert OID = 0 (403 935 926 1)); DATA(insert OID = 0 (403 1768 1769 1)); +DATA(insert OID = 0 (403 1663 1636 1)); /* hash */ diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h index 8b670e3e07..0284d08335 100644 --- a/src/include/catalog/pg_opclass.h +++ b/src/include/catalog/pg_opclass.h @@ -7,7 +7,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: pg_opclass.h,v 1.22 1999/11/23 04:47:39 momjian Exp $ + * $Id: pg_opclass.h,v 1.23 1999/11/25 01:28:05 wieck Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -117,5 +117,7 @@ DATA(insert OID = 652 ( cidr_ops 650 )); DESCR(""); DATA(insert OID = 1768 ( numeric_ops 1700 )); DESCR(""); +DATA(insert OID = 1663 ( lztext_ops 1625 )); +DESCR(""); #endif /* PG_OPCLASS_H */ diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index 00d19560b9..b572b0b01a 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -7,7 +7,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: pg_operator.h,v 1.61 1999/11/22 17:56:37 momjian Exp $ + * $Id: pg_operator.h,v 1.62 1999/11/25 01:28:05 wieck Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -687,6 +687,14 @@ DATA(insert OID = 1761 ( "/" PGUID 0 b t f 1700 1700 1700 0 0 0 0 numeric DATA(insert OID = 1762 ( "%" PGUID 0 b t f 1700 1700 1700 0 0 0 0 numeric_mod - - )); DATA(insert OID = 1763 ( "@" PGUID 0 l t f 0 1700 1700 0 0 0 0 numeric_abs - - )); +/* LZTEXT type */ +DATA(insert OID = 1657 ( "=" PGUID 0 b t f 1625 1625 16 1657 1658 1659 1659 lztext_eq eqsel eqjoinsel )); +DATA(insert OID = 1658 ( "<>" PGUID 0 b t f 1625 1625 16 1658 1657 0 0 lztext_ne neqsel neqjoinsel )); +DATA(insert OID = 1659 ( "<" PGUID 0 b t f 1625 1625 16 1661 1662 0 0 lztext_lt intltsel intltjoinsel )); +DATA(insert OID = 1660 ( "<=" PGUID 0 b t f 1625 1625 16 1662 1661 0 0 lztext_le intltsel intltjoinsel )); +DATA(insert OID = 1661 ( ">" PGUID 0 b t f 1625 1625 16 1659 1660 0 0 lztext_gt intgtsel intgtjoinsel )); +DATA(insert OID = 1662 ( ">=" PGUID 0 b t f 1625 1625 16 1660 1659 0 0 lztext_ge intgtsel intgtjoinsel )); + /* diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index bb2a5b6dd7..2cf19d5788 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: pg_proc.h,v 1.106 1999/11/17 21:21:50 wieck Exp $ + * $Id: pg_proc.h,v 1.107 1999/11/25 01:28:05 wieck Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -2359,6 +2359,20 @@ DATA(insert OID = 1634 ( lztextoctetlen PGUID 11 f t t 1 f 23 "1625" 100 0 DESCR("octet length"); DATA(insert OID = 1635 ( octet_length PGUID 11 f t t 1 f 23 "1625" 100 0 1 0 lztextoctetlen - )); DESCR("octet length"); +DATA(insert OID = 1636 ( lztext_cmp PGUID 11 f t t 2 f 23 "1625 1625" 100 0 1 0 lztext_cmp - )); +DESCR("compare lztext vs. lztext"); +DATA(insert OID = 1637 ( lztext_eq PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0 lztext_eq - )); +DESCR("equal"); +DATA(insert OID = 1638 ( lztext_ne PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0 lztext_ne - )); +DESCR("not equal"); +DATA(insert OID = 1639 ( lztext_gt PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0 lztext_gt - )); +DESCR("greater-than"); +DATA(insert OID = 1654 ( lztext_ge PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0 lztext_ge - )); +DESCR("greater-than-or-equal"); +DATA(insert OID = 1655 ( lztext_lt PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0 lztext_lt - )); +DESCR("lower-than"); +DATA(insert OID = 1656 ( lztext_le PGUID 11 f t t 2 f 16 "1625 1625" 100 0 1 0 lztext_le - )); +DESCR("lower-than-or-equal"); /* diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 1bf3273ca1..0b24dbab3f 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: builtins.h,v 1.90 1999/11/17 21:21:51 wieck Exp $ + * $Id: builtins.h,v 1.91 1999/11/25 01:28:07 wieck Exp $ * * NOTES * This should normally only be included by fmgr.h. @@ -635,5 +635,12 @@ text *lztext_text(lztext *lz); lztext *text_lztext(text *txt); int32 lztextlen(lztext *lz); int32 lztextoctetlen(lztext *lz); +int32 lztext_cmp(lztext *lz1, lztext *lz2); +bool lztext_eq(lztext *lz1, lztext *lz2); +bool lztext_ne(lztext *lz1, lztext *lz2); +bool lztext_gt(lztext *lz1, lztext *lz2); +bool lztext_ge(lztext *lz1, lztext *lz2); +bool lztext_lt(lztext *lz1, lztext *lz2); +bool lztext_le(lztext *lz1, lztext *lz2); #endif /* BUILTINS_H */ diff --git a/src/include/utils/pg_lzcompress.h b/src/include/utils/pg_lzcompress.h index 481fd24fab..9e3d3f3294 100644 --- a/src/include/utils/pg_lzcompress.h +++ b/src/include/utils/pg_lzcompress.h @@ -1,7 +1,7 @@ /* ---------- * pg_lzcompress.h - * - * $Header: /cvsroot/pgsql/src/include/utils/pg_lzcompress.h,v 1.2 1999/11/17 22:18:46 wieck Exp $ + * $Header: /cvsroot/pgsql/src/include/utils/pg_lzcompress.h,v 1.3 1999/11/25 01:28:07 wieck Exp $ * * Definitions for the builtin LZ compressor * ---------- @@ -110,6 +110,26 @@ typedef struct PGLZ_Strategy { } PGLZ_Strategy; +/* ---------- + * PGLZ_DecompState - + * + * Decompression state variable for byte-per-byte decompression + * using pglz_decomp_getchar() macro. + * ---------- + */ +typedef struct PGLZ_DecompState { + unsigned char *temp_buf; + unsigned char *cp_in; + unsigned char *cp_end; + unsigned char *cp_out; + unsigned char *cp_copy; + int (*next_char)(struct PGLZ_DecompState *dstate); + int tocopy; + int ctrl_count; + unsigned char ctrl; +} PGLZ_DecompState; + + /* ---------- * The standard strategies * @@ -139,6 +159,55 @@ extern PGLZ_Strategy *PGLZ_strategy_allways; extern PGLZ_Strategy *PGLZ_strategy_never; +/* ---------- + * pglz_decomp_getchar - + * + * Get next character (or EOF) from decompressor. + * The status variable must be initialized before and deinitialized + * after compression with the next two macros below. + * ---------- + */ +#define pglz_decomp_getchar(_ds) \ + ((*((_ds)->next_char))((_ds))) + + +/* ---------- + * pglz_decomp_init - + * + * Initialize a decomp state from a compressed input. + * ---------- + */ +#define pglz_decomp_init(_ds,_lz) { \ + (_ds)->cp_in = ((unsigned char *)(_lz)) \ + + sizeof(PGLZ_Header); \ + (_ds)->cp_end = (_ds)->cp_in + (_lz)->varsize \ + - sizeof(PGLZ_Header); \ + if (PGLZ_IS_COMPRESSED((_lz))) { \ + (_ds)->temp_buf = (unsigned char *) \ + palloc(PGLZ_RAW_SIZE((_lz))); \ + (_ds)->cp_out = (_ds)->temp_buf; \ + (_ds)->next_char = pglz_get_next_decomp_char_from_lzdata; \ + (_ds)->tocopy = 0; \ + (_ds)->ctrl_count = 0; \ + } else { \ + (_ds)->temp_buf = NULL; \ + (_ds)->next_char = pglz_get_next_decomp_char_from_plain; \ + } \ + } + + +/* ---------- + * pglz_decomp_end - + * + * Deallocate resources after decompression. + * ---------- + */ +#define pglz_decomp_end(_ds) { \ + if ((_ds)->temp_buf != NULL) \ + pfree((void *)((_ds)->temp_buf)); \ + } + + /* ---------- * Global function declarations * ---------- @@ -148,5 +217,13 @@ int pglz_compress (char *source, int32 slen, PGLZ_Header *dest, int pglz_decompress (PGLZ_Header *source, char *dest); +/* ---------- + * Functions used by pglz_decomp_getchar(). + * Internal use only. + * ---------- + */ +extern int pglz_get_next_decomp_char_from_lzdata(PGLZ_DecompState *dstate); +extern int pglz_get_next_decomp_char_from_plain(PGLZ_DecompState *dstate); + #endif /* _PG_LZCOMPRESS_H_ */