From e90d4ddc639aac7a7217ebc670ad6e49eaeddbf9 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 17 Apr 2018 18:10:16 -0400 Subject: [PATCH] Simplify genbki.pl's data quoting rules. During the bootstrap data format conversion, it seemed important for verifiability's sake that the generated postgres.bki file stayed the same as before. That resulted in adding a bunch of ad-hoc rules about when to quote emitted data values, to match previous manual decisions that had often quoted values unnecessarily. Now that the conversion is complete, it seems fine to remove all those ad-hoc rules. The net actual effect on the current contents of postgres.bki is that some fields that had been quoted despite containing only digits or only "-" lose their unnecessary quotes. Also, now that genbki.pl will always quote values containing a backslash, there's no need for bootscanner.l to allow unquoted octal escapes; so simplify its production for "id" by removing that possibility. John Naylor, slightly modified by me Discussion: https://postgr.es/m/CAJVSVGUNao=-Q2-vAN3PYcdF5tnL5JAHwGwzZGuYHtq+Mk_9ng@mail.gmail.com --- src/backend/bootstrap/bootscanner.l | 3 +-- src/backend/catalog/genbki.pl | 28 ++++++---------------------- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/src/backend/bootstrap/bootscanner.l b/src/backend/bootstrap/bootscanner.l index 2ce6e524db..91d73d713f 100644 --- a/src/backend/bootstrap/bootscanner.l +++ b/src/backend/bootstrap/bootscanner.l @@ -66,8 +66,7 @@ static int yyline = 1; /* line number for error reporting */ D [0-9] -oct \\{D}{D}{D} -id ([A-Za-z0-9_]|{oct}|\-)+ +id [-A-Za-z0-9_]+ sid \"([^\"])*\" arrayid [A-Za-z0-9_]+\[{D}*\] diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index cd68fc3108..6597c6a21f 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -645,9 +645,7 @@ sub morph_row_for_pgattr Catalog::AddDefaultValues($row, $pgattr_schema, 'pg_attribute'); } -# Write an entry to postgres.bki. Adding quotes here allows us to keep -# most double quotes out of the catalog data files for readability. See -# bootscanner.l for what tokens need quoting. +# Write an entry to postgres.bki. sub print_bki_insert { my $row = shift; @@ -666,26 +664,12 @@ sub print_bki_insert # since that represents a NUL char in C code. $bki_value = '' if $bki_value eq '\0'; + # Quote value if needed. We need not quote values that satisfy + # the "id" pattern in bootscanner.l, currently "[-A-Za-z0-9_]+". $bki_value = sprintf(qq'"%s"', $bki_value) - if $bki_value ne '_null_' - and $bki_value !~ /^"[^"]+"$/ - and ( length($bki_value) == 0 # Empty string - or $bki_value =~ /\s/ # Contains whitespace - - # To preserve historical formatting, operator names are - # always quoted. Likewise for values of multi-element types, - # even if they only contain a single element. - or $attname eq 'oprname' - or $atttype eq 'oidvector' - or $atttype eq 'int2vector' - or $atttype =~ /\[\]$/ - - # Quote strings that have non-word characters. We make - # exceptions for values that are octals or negative numbers, - # for the same historical reason as above. - or ( $bki_value =~ /\W/ - and $bki_value !~ /^\\\d{3}$/ - and $bki_value !~ /^-\d*$/)); + if $bki_value !~ /^"[^"]+"$/ + and ( length($bki_value) == 0 + or $bki_value =~ /[^-A-Za-z0-9_]/); push @bki_values, $bki_value; }