#! /usr/bin/perl # # Copyright (c) 2007-2020, PostgreSQL Global Development Group # # src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl # # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from # "sjis-0213-2004-std.txt" (http://x0213.org) use strict; use convutils; # first generate UTF-8 --> SHIFT_JIS_2004 table my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl'; my $in_file = "sjis-0213-2004-std.txt"; open(my $in, '<', $in_file) || die("cannot open $in_file"); my @mapping; while (my $line = <$in>) { if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) { # combined characters my ($c, $u1, $u2) = ($1, $2, $3); my $rest = "U+" . $u1 . "+" . $u2 . $4; my $code = hex($c); my $ucs1 = hex($u1); my $ucs2 = hex($u2); push @mapping, { code => $code, ucs => $ucs1, ucs_second => $ucs2, comment => $rest, direction => BOTH, f => $in_file, l => $. }; } elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) { # non-combined characters my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3); my $ucs = hex($u); my $code = hex($c); my $direction; if ($code < 0x80 && $ucs < 0x80) { next; } elsif ($code < 0x80) { $direction = FROM_UNICODE; } elsif ($ucs < 0x80) { $direction = TO_UNICODE; } else { $direction = BOTH; } push @mapping, { code => $code, ucs => $ucs, comment => $rest, direction => $direction, f => $in_file, l => $. }; } } close($in); print_conversion_tables($this_script, "SHIFT_JIS_2004", \@mapping);