From 2b6a35f7cdc045bf01a0539cf76dcd34adb0ccbf Mon Sep 17 00:00:00 2001 From: "Thomas G. Lockhart" Date: Wed, 23 Aug 2000 05:59:11 +0000 Subject: [PATCH] Fix several tags which refer to e-mail addresses but were missing the "mailto:" prefix. Fix typo. Thanks to Neil Conway for the heads-up. --- doc/src/sgml/datatype.sgml | 4 +- doc/src/sgml/datetime.sgml | 4 +- doc/src/sgml/ecpg.sgml | 6 +- doc/src/sgml/geqo.sgml | 602 +++++++++++++++++---------------- doc/src/sgml/indices.sgml | 4 +- doc/src/sgml/jdbc.sgml | 4 +- doc/src/sgml/keys.sgml | 314 +++++++++-------- doc/src/sgml/ref/ecpg-ref.sgml | 8 +- doc/src/sgml/ref/insert.sgml | 22 +- 9 files changed, 506 insertions(+), 462 deletions(-) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 07d7cec29b..2b03cdeffc 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1,5 +1,5 @@ @@ -594,7 +594,7 @@ CREATE TABLE tablename (12 bytes -178000000 years 178000000 years - 1 mircosecond + 1 microsecond date diff --git a/doc/src/sgml/datetime.sgml b/doc/src/sgml/datetime.sgml index 0ebdd14d91..97952aa839 100644 --- a/doc/src/sgml/datetime.sgml +++ b/doc/src/sgml/datetime.sgml @@ -1,5 +1,5 @@ @@ -629,7 +629,7 @@ Date/time details Contributed by - José Soares. + José Soares. diff --git a/doc/src/sgml/ecpg.sgml b/doc/src/sgml/ecpg.sgml index 917493f2a8..fabf9bee39 100644 --- a/doc/src/sgml/ecpg.sgml +++ b/doc/src/sgml/ecpg.sgml @@ -1,5 +1,5 @@ @@ -32,8 +32,8 @@ $Header: /cvsroot/pgsql/doc/src/sgml/ecpg.sgml,v 1.14 2000/05/02 20:01:51 thomas This describes an embedded SQL in C package for Postgres. - It is written by Linus Tolke - and Michael Meskes. + It is written by Linus Tolke + and Michael Meskes. diff --git a/doc/src/sgml/geqo.sgml b/doc/src/sgml/geqo.sgml index 4f2f80e97a..04b8def4ed 100644 --- a/doc/src/sgml/geqo.sgml +++ b/doc/src/sgml/geqo.sgml @@ -1,119 +1,125 @@ - - - -Martin -Utesch - - -University of Mining and Technology - - -Institute of Automatic Control - -
- -Freiberg - - -Germany - -
-
-
-1997-10-02 -
+ + + + Martin + Utesch + + + University of Mining and Technology + + + Institute of Automatic Control + +
+ + Freiberg + + + Germany + +
+
+
+ 1997-10-02 +
-Genetic Query Optimization in Database Systems + Genetic Query Optimization in Database Systems - - -Author - -Written by Martin Utesch -for the Institute of Automatic Control at the University of Mining and Technology in Freiberg, Germany. - - - + + + Author + + Written by Martin Utesch + for the Institute of Automatic Control at the University of Mining and Technology in Freiberg, Germany. + + + - -Query Handling as a Complex Optimization Problem + + Query Handling as a Complex Optimization Problem - - Among all relational operators the most difficult one to process and -optimize is the join. The number of alternative plans to answer a query -grows exponentially with the number of joins included in it. Further -optimization effort is caused by the support of a variety of join methods - (e.g., nested loop, index scan, merge join in Postgres) to -process individual joins and a diversity of indices (e.g., r-tree, -b-tree, hash in Postgres) as access paths for relations. - + + Among all relational operators the most difficult one to process and + optimize is the join. The number of alternative plans to answer a query + grows exponentially with the number of joins included in it. Further + optimization effort is caused by the support of a variety of + join methods + (e.g., nested loop, index scan, merge join in Postgres) to + process individual joins and a diversity of + indices (e.g., r-tree, + b-tree, hash in Postgres) as access paths for relations. + - - The current Postgres optimizer implementation performs a near- -exhaustive search over the space of alternative strategies. This query -optimization technique is inadequate to support database application -domains that involve the need for extensive queries, such as artificial -intelligence. - + + The current Postgres optimizer + implementation performs a near- + exhaustive search over the space of alternative strategies. This query + optimization technique is inadequate to support database application + domains that involve the need for extensive queries, such as artificial + intelligence. + - - The Institute of Automatic Control at the University of Mining and -Technology, in Freiberg, Germany, encountered the described problems as its -folks wanted to take the Postgres DBMS as the backend for a decision -support knowledge based system for the maintenance of an electrical -power grid. The DBMS needed to handle large join queries for the -inference machine of the knowledge based system. - + + The Institute of Automatic Control at the University of Mining and + Technology, in Freiberg, Germany, encountered the described problems as its + folks wanted to take the Postgres DBMS as the backend for a decision + support knowledge based system for the maintenance of an electrical + power grid. The DBMS needed to handle large join queries for the + inference machine of the knowledge based system. + - - Performance difficulties within exploring the space of possible query -plans arose the demand for a new optimization technique being developed. - + + Performance difficulties within exploring the space of possible query + plans arose the demand for a new optimization technique being developed. + - - In the following we propose the implementation of a Genetic Algorithm - as an option for the database query optimization problem. - - + + In the following we propose the implementation of a Genetic Algorithm + as an option for the database query optimization problem. + + - -Genetic Algorithms (<Acronym>GA</Acronym>) + + Genetic Algorithms (<acronym>GA</acronym>) - - The GA is a heuristic optimization method which operates through -determined, randomized search. The set of possible solutions for the -optimization problem is considered as a population of individuals. -The degree of adaption of an individual to its environment is specified -by its fitness. - + + The GA is a heuristic optimization method which operates through + determined, randomized search. The set of possible solutions for the + optimization problem is considered as a + erm>populaerm> of individuals. + The degree of adaption of an individual to its environment is specified + by its fitness. + - - The coordinates of an individual in the search space are represented -by chromosomes, in essence a set of character strings. A gene is a -subsection of a chromosome which encodes the value of a single parameter -being optimized. Typical encodings for a gene could be binary or -integer. - + + The coordinates of an individual in the search space are represented + by chromosomes, in essence a set of character + strings. A gene is a + subsection of a chromosome which encodes the value of a single parameter + being optimized. Typical encodings for a gene could be binary or + integer. + - - Through simulation of the evolutionary operations recombination, -mutation, and selection new generations of search points are found -that show a higher average fitness than their ancestors. - + + Through simulation of the evolutionary operations recombination, + mutation, and + selection new generations of search points are found + that show a higher average fitness than their ancestors. + - - According to the "comp.ai.genetic" FAQ it cannot be stressed too -strongly that a GA is not a pure random search for a solution to a -problem. A GA uses stochastic processes, but the result is distinctly -non-random (better than random). + + According to the "comp.ai.genetic" FAQ it cannot be stressed too + strongly that a GA is not a pure random search for a solution to a + problem. A GA uses stochastic processes, but the result is distinctly + non-random (better than random). - -Structured Diagram of a GA: + +Structured Diagram of a GA: --------------------------- P(t) generation of ancestors at a time t @@ -140,229 +146,235 @@ P''(t) generation of descendants at a time t | +-------------------------------------+ | | t := t + 1 | +===+=====================================+ - - - + +
+ - -Genetic Query Optimization (<Acronym>GEQO</Acronym>) in Postgres + + Genetic Query Optimization (<acronym>GEQO</acronym>) in Postgres - - The GEQO module is intended for the solution of the query -optimization problem similar to a traveling salesman problem (TSP). -Possible query plans are encoded as integer strings. Each string -represents the join order from one relation of the query to the next. -E. g., the query tree - - /\ - /\ 2 - /\ 3 - 4 1 - -is encoded by the integer string '4-1-3-2', -which means, first join relation '4' and '1', then '3', and -then '2', where 1, 2, 3, 4 are relids in Postgres. - + + The GEQO module is intended for the solution of the query + optimization problem similar to a traveling salesman problem (TSP). + Possible query plans are encoded as integer strings. Each string + represents the join order from one relation of the query to the next. + E. g., the query tree + + /\ + /\ 2 + /\ 3 +4 1 + + is encoded by the integer string '4-1-3-2', + which means, first join relation '4' and '1', then '3', and + then '2', where 1, 2, 3, 4 are relids in Postgres. + - - Parts of the GEQO module are adapted from D. Whitley's Genitor -algorithm. - + + Parts of the GEQO module are adapted from D. Whitley's Genitor + algorithm. + - - Specific characteristics of the GEQO implementation in Postgres -are: + + Specific characteristics of the GEQO + implementation in Postgres + are: - - - -Usage of a steady state GA (replacement of the least fit - individuals in a population, not whole-generational replacement) - allows fast convergence towards improved query plans. This is - essential for query handling with reasonable time; - - + + + + Usage of a steady state GA (replacement of the least fit + individuals in a population, not whole-generational replacement) + allows fast convergence towards improved query plans. This is + essential for query handling with reasonable time; + + - - -Usage of edge recombination crossover which is especially suited - to keep edge losses low for the solution of the TSP by means of a GA; - - + + + Usage of edge recombination crossover which is especially suited + to keep edge losses low for the solution of the + crocronym> by means of a GA; + + - - -Mutation as genetic operator is deprecated so that no repair - mechanisms are needed to generate legal TSP tours. - - - - + + + Mutation as genetic operator is deprecated so that no repair + mechanisms are needed to generate legal TSP tours. + + + + - - The GEQO module gives the following benefits to the Postgres DBMS -compared to the Postgres query optimizer implementation: + + The GEQO module gives the following benefits to + the Postgres DBMS + compared to the Postgres query optimizer implementation: - - - -Handling of large join queries through non-exhaustive search; - - + + + + Handling of large join queries through non-exhaustive search; + + - - -Improved cost size approximation of query plans since no longer - plan merging is needed (the GEQO module evaluates the cost for a - query plan as an individual). - - - - + + + Improved cost size approximation of query plans since no longer + plan merging is needed (the GEQO module evaluates the cost for a + query plan as an individual). + + + + - + - -Future Implementation Tasks for <ProductName>Postgres</ProductName> <Acronym>GEQO</Acronym> + + Future Implementation Tasks for + <productname>ame>Post</productname>ame> <acronym>GEQO</acronym> - -Basic Improvements + + Basic Improvements - -Improve genetic algorithm parameter settings + + Improve genetic algorithm parameter settings - -In file backend/optimizer/geqo/geqo_params.c, routines -gimme_pool_size and gimme_number_generations, -we have to find a compromise for the parameter settings -to satisfy two competing demands: - - - -Optimality of the query plan - - - - -Computing time - - - - - + + In file backend/optimizer/geqo/geqo_params.c, routines + gimme_pool_size and gimme_number_generations, + we have to find a compromise for the parameter settings + to satisfy two competing demands: + + + + Optimality of the query plan + + + + + Computing time + + + + + - -Find better solution for integer overflow + + Find better solution for integer overflow - -In file backend/optimizer/geqo/geqo_eval.c, routine -geqo_joinrel_size, -the present hack for MAXINT overflow is to set the Postgres integer -value of rel->size to its logarithm. -Modifications of Rel in backend/nodes/relation.h will -surely have severe impacts on the whole Postgres implementation. - - + + In file backend/optimizer/geqo/geqo_eval.c, routine + geqo_joinrel_size, + the present hack for MAXINT overflow is to set the Postgres integer + value of rel->size to its logarithm. + Modifications of Rel in backend/nodes/relation.h will + surely have severe impacts on the whole Postgres implementation. + + - -Find solution for exhausted memory + + Find solution for exhausted memory - -Memory exhaustion may occur with more than 10 relations involved in a query. -In file backend/optimizer/geqo/geqo_eval.c, routine -gimme_tree is recursively called. -Maybe I forgot something to be freed correctly, but I dunno what. -Of course the rel data structure of the join keeps growing and -growing the more relations are packed into it. -Suggestions are welcome :-( - - - + + Memory exhaustion may occur with more than 10 relations involved in a query. + In file backend/optimizer/geqo/geqo_eval.c, routine + gimme_tree is recursively called. + Maybe I forgot something to be freed correctly, but I dunno what. + Of course the rel data structure of the + join keeps growing and + growing the more relations are packed into it. + Suggestions are welcome :-( + + + - - -References - -Reference information for GEQ algorithms. - - + + + References + + Reference information for GEQ algorithms. + + - - -The Hitch-Hiker's Guide to Evolutionary Computation - - - -Jörg -Heitkötter - - -David -Beasley - - - - -InterNet resource - - - - -FAQ in comp.ai.genetic -is available at Encore. - - - + + + The Hitch-Hiker's Guide to Evolutionary Computation + + + + Jörg + Heitkötter + + + David + Beasley + + + + + InterNet resource + + + + + FAQ in comp.ai.genetic + is available at Encore. + + + - - -The Design and Implementation of the Postgres Query Optimizer - - - -Z. -Fong - - - - -University of California, Berkeley Computer Science Department - - - - -File planner/Report.ps in the 'postgres-papers' distribution. - - - + + + The Design and Implementation of the Postgres Query Optimizer + + + + Z. + Fong + + + + + University of California, Berkeley Computer Science Department + + + + + File planner/Report.ps in the 'postgres-papers' distribution. + + + - - -Fundamentals of Database Systems - - - -R. -Elmasri - - -S. -Navathe - - - - -The Benjamin/Cummings Pub., Inc. - - - + + + Fundamentals of Database Systems + + + + R. + Elmasri + + + S. + Navathe + + + + + The Benjamin/Cummings Pub., Inc. + + + - - + + - -
+ +
@@ -9,7 +9,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/Attic/jdbc.sgml,v 1.10 2000/03/31 03:27:40 Author - Written by Peter T. Mount, the + Written by Peter T. Mount, the author of the JDBC driver. diff --git a/doc/src/sgml/keys.sgml b/doc/src/sgml/keys.sgml index 11e421dddb..29a6248965 100644 --- a/doc/src/sgml/keys.sgml +++ b/doc/src/sgml/keys.sgml @@ -1,8 +1,14 @@ - - - - -Herouth -Maoz - - -1998-03-02 - + + + + + Herouth + Maoz + + + 1998-03-02 + -Indices and Keys + Indices and Keys - -Author - -Written by -Herouth Maoz - - + + Author + + Written by + Herouth Maoz + + - -Editor's Note - -This originally appeared on the mailing list - in response to the question: - "What is the difference between PRIMARY KEY and UNIQUE constraints?". - - + + Editor's Note + + This originally appeared on the mailing list + in response to the question: + "What is the difference between PRIMARY KEY and UNIQUE constraints?". + + - + Subject: Re: [QUESTIONS] PRIMARY KEY | UNIQUE What's the difference between: @@ -59,125 +65,143 @@ Subject: Re: [QUESTIONS] PRIMARY KEY | UNIQUE - Is this an alias? - If PRIMARY KEY is already unique, then why is there another kind of key named UNIQUE? - + - -A primary key is the field(s) used to identify a specific row. For example, -Social Security numbers identifying a person. - - -A simply UNIQUE combination of fields has nothing to do with identifying -the row. It's simply an integrity constraint. For example, I have -collections of links. Each collection is identified by a unique number, -which is the primary key. This key is used in relations. - - -However, my application requires that each collection will also have a -unique name. Why? So that a human being who wants to modify a collection -will be able to identify it. It's much harder to know, if you have two -collections named "Life Science", the the one tagged 24433 is the one you -need, and the one tagged 29882 is not. - - -So, the user selects the collection by its name. We therefore make sure, -withing the database, that names are unique. However, no other table in the -database relates to the collections table by the collection Name. That -would be very inefficient. - - -Moreover, despite being unique, the collection name does not actually -define the collection! For example, if somebody decided to change the name -of the collection from "Life Science" to "Biology", it will still be the -same collection, only with a different name. As long as the name is unique, -that's OK. - - -So: + + A primary key is the field(s) used to identify a specific row. For example, + Social Security numbers identifying a person. + + + A simply UNIQUE combination of fields has nothing to do with identifying + the row. It's simply an integrity constraint. For example, I have + collections of links. Each collection is identified by a unique number, + which is the primary key. This key is used in relations. + + + However, my application requires that each collection will also have a + unique name. Why? So that a human being who wants to modify a collection + will be able to identify it. It's much harder to know, if you have two + collections named "Life Science", the the one tagged 24433 is the one you + need, and the one tagged 29882 is not. + + + So, the user selects the collection by its name. We therefore make sure, + withing the database, that names are unique. However, no other table in the + database relates to the collections table by the collection Name. That + would be very inefficient. + + + Moreover, despite being unique, the collection name does not actually + define the collection! For example, if somebody decided to change the name + of the collection from "Life Science" to "Biology", it will still be the + same collection, only with a different name. As long as the name is unique, + that's OK. + + + So: - - - -Primary key: - - - -Is used for identifying the row and relating to it. - - - - -Is impossible (or hard) to update. - - - - -Should not allow NULLs. - - - - - + + + + Primary key: + + + + Is used for identifying the row and relating to it. + + + + + Is impossible (or hard) to update. + + + + + Should not allow NULLs. + + + + + - - -Unique field(s): - - - -Are used as an alternative access to the row. - - - - -Are updateable, so long as they are kept unique. - - - - -NULLs are acceptable. - - - - - - - + + + Unique field(s): + + + + Are used as an alternative access to the row. + + + + + Are updateable, so long as they are kept unique. + + + + + NULLs are acceptable. + + + + + + + - -As for why no non-unique keys are defined explicitly in standard SQL syntax? -Well, you -must understand that indices are implementation-dependent. SQL does not -define the implementation, merely the relations between data in the -database. Postgres does allow non-unique indices, but indices -used to enforce SQL keys are always unique. - - -Thus, you may query a table by any combination of its columns, despite the -fact that you don't have an index on these columns. The indexes are merely -an implementational aid which each RDBMS offers you, in order to cause -commonly used queries to be done more efficiently. Some RDBMS may give you -additional measures, such as keeping a key stored in main memory. They will -have a special command, for example - -CREATE MEMSTORE ON <table> COLUMNS <cols> - -(this is not an existing command, just an example). - - -In fact, when you create a primary key or a unique combination of fields, -nowhere in the SQL specification does it say that an index is created, nor that -the retrieval of data by the key is going to be more efficient than a -sequential scan! - - -So, if you want to use a combination of fields which is not unique as a -secondary key, you really don't have to specify anything - just start -retrieving by that combination! However, if you want to make the retrieval -efficient, you'll have to resort to the means your RDBMS provider gives you -- be it an index, my imaginary MEMSTORE command, or an intelligent RDBMS -which creates indices without your knowledge based on the fact that you have -sent it many queries based on a specific combination of keys... (It learns -from experience). - - + + As for why no non-unique keys are defined explicitly in standard + SQL syntax? + Well, you + must understand that indices are implementation-dependent. SQL does not + define the implementation, merely the relations between data in the + database. Postgres does allow non-unique indices, but indices + used to enforce SQL keys are always unique. + + + Thus, you may query a table by any combination of its columns, despite the + fact that you don't have an index on these columns. The indexes are merely + an implementational aid which each RDBMS offers you, in order to cause + commonly used queries to be done more efficiently. Some RDBMS may give you + additional measures, such as keeping a key stored in main memory. They will + have a special command, for example + + CREATE MEMSTORE ON <table> COLUMNS <cols> + + (this is not an existing command, just an example). + + + In fact, when you create a primary key or a unique combination of fields, + nowhere in the SQL specification does it say that an index is created, nor that + the retrieval of data by the key is going to be more efficient than a + sequential scan! + + + So, if you want to use a combination of fields which is not unique as a + secondary key, you really don't have to specify anything - just start + retrieving by that combination! However, if you want to make the retrieval + efficient, you'll have to resort to the means your RDBMS provider gives you + - be it an index, my imaginary MEMSTORE command, or an intelligent + RDBMS + which creates indices without your knowledge based on the fact that you have + sent it many queries based on a specific combination of keys... (It learns + from experience). + + + diff --git a/doc/src/sgml/ref/ecpg-ref.sgml b/doc/src/sgml/ref/ecpg-ref.sgml index 8bb1dd27f1..818c7d59ae 100644 --- a/doc/src/sgml/ref/ecpg-ref.sgml +++ b/doc/src/sgml/ref/ecpg-ref.sgml @@ -1,5 +1,5 @@ @@ -135,11 +135,11 @@ ecpg [ -v ] [ -t ] [ -I include-path ] [ -o outfile ] file1 [ file2 ] [ ... ] - Linus Tolke was the + Linus Tolke was the original author of ecpg (up to version 0.2). - Michael Meskes + Michael Meskes is the current author and maintainer of ecpg. - Thomas Good + Thomas Good is the author of the last revision of the ecpg man page, on which this document is based. diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml index 27dedecd60..84abd7c629 100644 --- a/doc/src/sgml/ref/insert.sgml +++ b/doc/src/sgml/ref/insert.sgml @@ -1,5 +1,5 @@ @@ -20,20 +20,20 @@ Postgres documentation - 1999-07-20 + 2000-08-08 INSERT INTO table [ ( column [, ...] ) ] - { VALUES ( expression [, ...] ) | SELECT query } + { DEFAULT VALUES | VALUES ( expression [, ...] ) | SELECT query } - 1998-09-23 Inputs + @@ -45,6 +45,7 @@ INSERT INTO table [ ( + column @@ -54,6 +55,16 @@ INSERT INTO table [ ( + + DEFAULT VALUES + + + All columns will be filled by NULLs or by values specified + when the table was created using DEFAULT clauses. + + + + expression @@ -79,7 +90,6 @@ INSERT INTO table [ ( - 1998-09-23 Outputs @@ -118,7 +128,6 @@ INSERT 0 <replaceable>#</replaceable> <refsect1 id="R1-SQL-INSERT-1"> <refsect1info> - <date>1998-09-02</date> </refsect1info> <title> Description @@ -217,7 +226,6 @@ INSERT INTO tictactoe (game, board) <refsect2 id="R2-SQL-INSERT-4"> <refsect2info> - <date>1998-09-23</date> </refsect2info> <title> SQL92