mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-09-28 00:01:51 +02:00
c8e1ba736b
Backpatch-through: 11
126 lines
3.0 KiB
Perl
126 lines
3.0 KiB
Perl
#!/usr/bin/perl
|
|
#
|
|
# Generate table of Unicode normalization "quick check" properties
|
|
# (see UAX #15). Pass DerivedNormalizationProps.txt as argument. The
|
|
# output is on stdout.
|
|
#
|
|
# Copyright (c) 2020-2023, PostgreSQL Global Development Group
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use FindBin;
|
|
use lib "$FindBin::RealBin/../../tools/";
|
|
use PerfectHash;
|
|
|
|
my %data;
|
|
|
|
print
|
|
"/* generated by src/common/unicode/generate-unicode_normprops_table.pl, do not edit */\n\n";
|
|
|
|
print <<EOS;
|
|
#include "common/unicode_norm.h"
|
|
|
|
/*
|
|
* Normalization quick check entry for codepoint. We use a bit field
|
|
* here to save space.
|
|
*/
|
|
typedef struct
|
|
{
|
|
unsigned int codepoint:21;
|
|
signed int quickcheck:4; /* really UnicodeNormalizationQC */
|
|
} pg_unicode_normprops;
|
|
|
|
/* Typedef for hash function on quick check table */
|
|
typedef int (*qc_hash_func) (const void *key);
|
|
|
|
/* Information for quick check lookup with perfect hash function */
|
|
typedef struct
|
|
{
|
|
const pg_unicode_normprops *normprops;
|
|
qc_hash_func hash;
|
|
int num_normprops;
|
|
} pg_unicode_norminfo;
|
|
EOS
|
|
|
|
foreach my $line (<ARGV>)
|
|
{
|
|
chomp $line;
|
|
$line =~ s/\s*#.*$//;
|
|
next if $line eq '';
|
|
my ($codepoint, $prop, $value) = split /\s*;\s*/, $line;
|
|
next if $prop !~ /_QC/;
|
|
|
|
my ($first, $last);
|
|
if ($codepoint =~ /\.\./)
|
|
{
|
|
($first, $last) = split /\.\./, $codepoint;
|
|
}
|
|
else
|
|
{
|
|
$first = $last = $codepoint;
|
|
}
|
|
|
|
foreach my $cp (hex($first) .. hex($last))
|
|
{
|
|
$data{$prop}{$cp} = $value;
|
|
}
|
|
}
|
|
|
|
# We create a separate array for each normalization form rather than,
|
|
# say, a two-dimensional array, because that array would be very
|
|
# sparse and would create unnecessary overhead especially for the NFC
|
|
# lookup.
|
|
foreach my $prop (sort keys %data)
|
|
{
|
|
# Don't build the tables for the "D" forms because they are too
|
|
# big. See also unicode_is_normalized_quickcheck().
|
|
next if $prop eq "NFD_QC" || $prop eq "NFKD_QC";
|
|
|
|
print "\n";
|
|
print
|
|
"static const pg_unicode_normprops UnicodeNormProps_${prop}[] = {\n";
|
|
|
|
my %subdata = %{ $data{$prop} };
|
|
my @cp_packed;
|
|
foreach my $cp (sort { $a <=> $b } keys %subdata)
|
|
{
|
|
my $qc;
|
|
if ($subdata{$cp} eq 'N')
|
|
{
|
|
$qc = 'UNICODE_NORM_QC_NO';
|
|
}
|
|
elsif ($subdata{$cp} eq 'M')
|
|
{
|
|
$qc = 'UNICODE_NORM_QC_MAYBE';
|
|
}
|
|
else
|
|
{
|
|
die;
|
|
}
|
|
printf "\t{0x%04X, %s},\n", $cp, $qc;
|
|
|
|
# Save the bytes as a string in network order.
|
|
push @cp_packed, pack('N', $cp);
|
|
}
|
|
|
|
print "};\n";
|
|
|
|
# Emit the definition of the perfect hash function.
|
|
my $funcname = $prop . '_hash_func';
|
|
my $f = PerfectHash::generate_hash_function(\@cp_packed, $funcname,
|
|
fixed_key_length => 4);
|
|
printf "\n/* Perfect hash function for %s */", $prop;
|
|
print "\nstatic $f\n";
|
|
|
|
# Emit the structure that wraps the hash lookup information into
|
|
# one variable.
|
|
printf "/* Hash lookup information for %s */", $prop;
|
|
printf "\nstatic const pg_unicode_norminfo ";
|
|
printf "UnicodeNormInfo_%s = {\n", $prop;
|
|
printf "\tUnicodeNormProps_%s,\n", $prop;
|
|
printf "\t%s,\n", $funcname;
|
|
printf "\t%d\n", scalar @cp_packed;
|
|
printf "};\n";
|
|
}
|