#---------------------------------------------------------------------- # # gen_keywordlist.pl # Perl script that transforms a list of keywords into a ScanKeywordList # data structure that can be passed to ScanKeywordLookup(). # # The input is a C header file containing a series of macro calls # PG_KEYWORD("keyword", ...) # Lines not starting with PG_KEYWORD are ignored. The keywords are # implicitly numbered 0..N-1 in order of appearance in the header file. # Currently, the keywords are required to appear in ASCII order. # # The output is a C header file that defines a "const ScanKeywordList" # variable named according to the -v switch ("ScanKeywords" by default). # The variable is marked "static" unless the -e switch is given. # # ScanKeywordList uses hash-based lookup, so this script also selects # a minimal perfect hash function for the keyword set, and emits a # static hash function that is referenced in the ScanKeywordList struct. # The hash function is case-insensitive unless --no-case-fold is specified. # Note that case folding works correctly only for all-ASCII keywords! # # # Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/tools/gen_keywordlist.pl # #---------------------------------------------------------------------- use strict; use warnings FATAL => 'all'; use Getopt::Long; use FindBin; use lib $FindBin::RealBin; use PerfectHash; my $output_path = ''; my $extern = 0; my $case_fold = 1; my $varname = 'ScanKeywords'; GetOptions( 'output:s' => \$output_path, 'extern' => \$extern, 'case-fold!' => \$case_fold, 'varname:s' => \$varname) || usage(); my $kw_input_file = shift @ARGV || die "No input file.\n"; # Make sure output_path ends in a slash if needed. if ($output_path ne '' && substr($output_path, -1) ne '/') { $output_path .= '/'; } $kw_input_file =~ /(\w+)\.h$/ || die "Input file must be named something.h.\n"; my $base_filename = $1 . '_d'; my $kw_def_file = $output_path . $base_filename . '.h'; open(my $kif, '<', $kw_input_file) || die "$kw_input_file: $!\n"; open(my $kwdef, '>', $kw_def_file) || die "$kw_def_file: $!\n"; # Opening boilerplate for keyword definition header. printf $kwdef <) { if (/^PG_KEYWORD\("(\w+)"/) { push @keywords, $1; } } # When being case-insensitive, insist that the input be all-lower-case. if ($case_fold) { foreach my $kw (@keywords) { die qq|The keyword "$kw" is not lower-case in $kw_input_file\n| if ($kw ne lc $kw); } } # Error out if the keyword names are not in ASCII order. # # While this isn't really necessary with hash-based lookup, it's still # helpful because it provides a cheap way to reject duplicate keywords. # Also, insisting on sorted order ensures that code that scans the keyword # table linearly will see the keywords in a canonical order. for my $i (0 .. $#keywords - 1) { die qq|The keyword "$keywords[$i + 1]" is out of order in $kw_input_file\n| if ($keywords[$i] cmp $keywords[ $i + 1 ]) >= 0; } # Emit the string containing all the keywords. printf $kwdef qq|static const char %s_kw_string[] =\n\t"|, $varname; print $kwdef join qq|\\0"\n\t"|, @keywords; print $kwdef qq|";\n\n|; # Emit an array of numerical offsets which will be used to index into the # keyword string. Also determine max keyword length. printf $kwdef "static const uint16 %s_kw_offsets[] = {\n", $varname; my $offset = 0; my $max_len = 0; foreach my $name (@keywords) { my $this_length = length($name); print $kwdef "\t$offset,\n"; # Calculate the cumulative offset of the next keyword, # taking into account the null terminator. $offset += $this_length + 1; # Update max keyword length. $max_len = $this_length if $max_len < $this_length; } print $kwdef "};\n\n"; # Emit a macro defining the number of keywords. # (In some places it's useful to have access to that as a constant.) printf $kwdef "#define %s_NUM_KEYWORDS %d\n\n", uc $varname, scalar @keywords; # Emit the definition of the hash function. my $funcname = $varname . "_hash_func"; my $f = PerfectHash::generate_hash_function(\@keywords, $funcname, case_fold => $case_fold); printf $kwdef qq|static %s\n|, $f; # Emit the struct that wraps all this lookup info into one variable. printf $kwdef "static " if !$extern; printf $kwdef "const ScanKeywordList %s = {\n", $varname; printf $kwdef qq|\t%s_kw_string,\n|, $varname; printf $kwdef qq|\t%s_kw_offsets,\n|, $varname; printf $kwdef qq|\t%s,\n|, $funcname; printf $kwdef qq|\t%s_NUM_KEYWORDS,\n|, uc $varname; printf $kwdef qq|\t%d\n|, $max_len; printf $kwdef "};\n\n"; printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_H */\n", uc $base_filename; sub usage { die <] [--varname/-v ] [--extern/-e] [--[no-]case-fold] input_file --output Output directory (default '.') --varname Name for ScanKeywordList variable (default 'ScanKeywords') --extern Allow the ScanKeywordList variable to be globally visible --no-case-fold Keyword matching is to be case-sensitive gen_keywordlist.pl transforms a list of keywords into a ScanKeywordList. The output filename is derived from the input file by inserting _d, for example kwlist_d.h is produced from kwlist.h. EOM }