#----------------------------------------------------------------------
#
# Catalog.pm
#    Perl module that extracts info from catalog files into Perl
#    data structures
#
# Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# src/backend/catalog/Catalog.pm
#
#----------------------------------------------------------------------

package Catalog;

use strict;
use warnings;

# Parses a catalog header file into a data structure describing the schema
# of the catalog.
sub ParseHeader
{
	my $input_file = shift;

	# There are a few types which are given one name in the C source, but a
	# different name at the SQL level.  These are enumerated here.
	my %RENAME_ATTTYPE = (
		'int16'         => 'int2',
		'int32'         => 'int4',
		'int64'         => 'int8',
		'Oid'           => 'oid',
		'NameData'      => 'name',
		'TransactionId' => 'xid',
		'XLogRecPtr'    => 'pg_lsn');

	my %catalog;
	my $declaring_attributes = 0;
	my $is_varlen            = 0;
	my $is_client_code       = 0;

	$catalog{columns}     = [];
	$catalog{toasting}    = [];
	$catalog{indexing}    = [];
	$catalog{client_code} = [];

	open(my $ifh, '<', $input_file) || die "$input_file: $!";

	# Scan the input file.
	while (<$ifh>)
	{

		# Set appropriate flag when we're in certain code sections.
		if (/^#/)
		{
			$is_varlen = 1 if /^#ifdef\s+CATALOG_VARLEN/;
			if (/^#ifdef\s+EXPOSE_TO_CLIENT_CODE/)
			{
				$is_client_code = 1;
				next;
			}
			next if !$is_client_code;
		}

		if (!$is_client_code)
		{
			# Strip C-style comments.
			s;/\*(.|\n)*\*/;;g;
			if (m;/\*;)
			{

				# handle multi-line comments properly.
				my $next_line = <$ifh>;
				die "$input_file: ends within C-style comment\n"
				  if !defined $next_line;
				$_ .= $next_line;
				redo;
			}

			# Strip useless whitespace and trailing semicolons.
			chomp;
			s/^\s+//;
			s/;\s*$//;
			s/\s+/ /g;
		}

		# Push the data into the appropriate data structure.
		if (/^DECLARE_TOAST\(\s*(\w+),\s*(\d+),\s*(\d+)\)/)
		{
			my ($toast_name, $toast_oid, $index_oid) = ($1, $2, $3);
			push @{ $catalog{toasting} },
			  "declare toast $toast_oid $index_oid on $toast_name\n";
		}
		elsif (/^DECLARE_(UNIQUE_)?INDEX\(\s*(\w+),\s*(\d+),\s*(.+)\)/)
		{
			my ($is_unique, $index_name, $index_oid, $using) =
			  ($1, $2, $3, $4);
			push @{ $catalog{indexing} },
			  sprintf(
				"declare %sindex %s %s %s\n",
				$is_unique ? 'unique ' : '',
				$index_name, $index_oid, $using);
		}
		elsif (/^BUILD_INDICES/)
		{
			push @{ $catalog{indexing} }, "build indices\n";
		}
		elsif (/^CATALOG\((\w+),(\d+),(\w+)\)/)
		{
			$catalog{catname}            = $1;
			$catalog{relation_oid}       = $2;
			$catalog{relation_oid_macro} = $3;

			$catalog{bootstrap} = /BKI_BOOTSTRAP/ ? ' bootstrap' : '';
			$catalog{shared_relation} =
			  /BKI_SHARED_RELATION/ ? ' shared_relation' : '';
			$catalog{without_oids} =
			  /BKI_WITHOUT_OIDS/ ? ' without_oids' : '';
			if (/BKI_ROWTYPE_OID\((\d+),(\w+)\)/)
			{
				$catalog{rowtype_oid}        = $1;
				$catalog{rowtype_oid_clause} = " rowtype_oid $1";
				$catalog{rowtype_oid_macro}  = $2;
			}
			else
			{
				$catalog{rowtype_oid}        = '';
				$catalog{rowtype_oid_clause} = '';
				$catalog{rowtype_oid_macro}  = '';
			}
			$catalog{schema_macro} = /BKI_SCHEMA_MACRO/ ? 1 : 0;
			$declaring_attributes = 1;
		}
		elsif ($is_client_code)
		{
			if (/^#endif/)
			{
				$is_client_code = 0;
			}
			else
			{
				push @{ $catalog{client_code} }, $_;
			}
		}
		elsif ($declaring_attributes)
		{
			next if (/^{|^$/);
			if (/^}/)
			{
				$declaring_attributes = 0;
			}
			else
			{
				my %column;
				my @attopts = split /\s+/, $_;
				my $atttype = shift @attopts;
				my $attname = shift @attopts;
				die "parse error ($input_file)"
				  unless ($attname and $atttype);

				if (exists $RENAME_ATTTYPE{$atttype})
				{
					$atttype = $RENAME_ATTTYPE{$atttype};
				}

				# If the C name ends with '[]' or '[digits]', we have
				# an array type, so we discard that from the name and
				# prepend '_' to the type.
				if ($attname =~ /(\w+)\[\d*\]/)
				{
					$attname = $1;
					$atttype = '_' . $atttype;
				}

				$column{type}      = $atttype;
				$column{name}      = $attname;
				$column{is_varlen} = 1 if $is_varlen;

				foreach my $attopt (@attopts)
				{
					if ($attopt eq 'BKI_FORCE_NULL')
					{
						$column{forcenull} = 1;
					}
					elsif ($attopt eq 'BKI_FORCE_NOT_NULL')
					{
						$column{forcenotnull} = 1;
					}

					# We use quotes for values like \0 and \054, to
					# make sure all compilers and syntax highlighters
					# can recognize them properly.
					elsif ($attopt =~ /BKI_DEFAULT\(['"]?([^'"]+)['"]?\)/)
					{
						$column{default} = $1;
					}
					elsif ($attopt =~ /BKI_LOOKUP\((\w+)\)/)
					{
						$column{lookup} = $1;
					}
					else
					{
						die
						  "unknown column option $attopt on column $attname";
					}

					if ($column{forcenull} and $column{forcenotnull})
					{
						die "$attname is forced both null and not null";
					}
				}
				push @{ $catalog{columns} }, \%column;
			}
		}
	}
	close $ifh;
	return \%catalog;
}

# Parses a file containing Perl data structure literals, returning live data.
#
# The parameter $preserve_formatting needs to be set for callers that want
# to work with non-data lines in the data files, such as comments and blank
# lines. If a caller just wants to consume the data, leave it unset.
sub ParseData
{
	my ($input_file, $schema, $preserve_formatting) = @_;

	open(my $ifd, '<', $input_file) || die "$input_file: $!";
	$input_file =~ /(\w+)\.dat$/
	  or die "Input file $input_file needs to be a .dat file.\n";
	my $catname = $1;
	my $data    = [];

	# Scan the input file.
	while (<$ifd>)
	{
		my $hash_ref;

		if (/{/)
		{
			# Capture the hash ref
			# NB: Assumes that the next hash ref can't start on the
			# same line where the present one ended.
			# Not foolproof, but we shouldn't need a full parser,
			# since we expect relatively well-behaved input.

			# Quick hack to detect when we have a full hash ref to
			# parse. We can't just use a regex because of values in
			# pg_aggregate and pg_proc like '{0,0}'.  This will need
			# work if we ever need to allow unbalanced braces within
			# a field value.
			my $lcnt = tr/{//;
			my $rcnt = tr/}//;

			if ($lcnt == $rcnt)
			{
				eval '$hash_ref = ' . $_;
				if (!ref $hash_ref)
				{
					die "$input_file: error parsing line $.:\n$_\n";
				}

				# Annotate each hash with the source line number.
				$hash_ref->{line_number} = $.;

				# Expand tuples to their full representation.
				AddDefaultValues($hash_ref, $schema, $catname);
			}
			else
			{
				my $next_line = <$ifd>;
				die "$input_file: file ends within Perl hash\n"
				  if !defined $next_line;
				$_ .= $next_line;
				redo;
			}
		}

		# If we found a hash reference, keep it.
		# Only keep non-data strings if we
		# are told to preserve formatting.
		if (defined $hash_ref)
		{
			push @$data, $hash_ref;
		}
		elsif ($preserve_formatting)
		{
			push @$data, $_;
		}
	}
	close $ifd;
	return $data;
}

# Fill in default values of a record using the given schema.
# It's the caller's responsibility to specify other values beforehand.
sub AddDefaultValues
{
	my ($row, $schema, $catname) = @_;
	my @missing_fields;

	foreach my $column (@$schema)
	{
		my $attname = $column->{name};
		my $atttype = $column->{type};

		if (defined $row->{$attname})
		{
			;
		}
		elsif (defined $column->{default})
		{
			$row->{$attname} = $column->{default};
		}
		elsif ($catname eq 'pg_proc'
			&& $attname eq 'pronargs'
			&& defined($row->{proargtypes}))
		{
			# pg_proc.pronargs can be derived from proargtypes.
			my @proargtypes = split /\s+/, $row->{proargtypes};
			$row->{$attname} = scalar(@proargtypes);
		}
		else
		{
			# Failed to find a value.
			push @missing_fields, $attname;
		}
	}

	if (@missing_fields)
	{
		die sprintf "missing values for field(s) %s in %s.dat line %s\n",
		  join(', ', @missing_fields), $catname, $row->{line_number};
	}
}

# Rename temporary files to final names.
# Call this function with the final file name and the .tmp extension
# Note: recommended extension is ".tmp$$", so that parallel make steps
# can't use the same temp files
sub RenameTempFile
{
	my $final_name = shift;
	my $extension  = shift;
	my $temp_name  = $final_name . $extension;
	rename($temp_name, $final_name) || die "rename: $temp_name: $!";
}

# Find a symbol defined in a particular header file and extract the value.
#
# The include path has to be passed as a reference to an array.
sub FindDefinedSymbol
{
	my ($catalog_header, $include_path, $symbol) = @_;

	for my $path (@$include_path)
	{

		# Make sure include path ends in a slash.
		if (substr($path, -1) ne '/')
		{
			$path .= '/';
		}
		my $file = $path . $catalog_header;
		next if !-f $file;
		open(my $find_defined_symbol, '<', $file) || die "$file: $!";
		while (<$find_defined_symbol>)
		{
			if (/^#define\s+\Q$symbol\E\s+(\S+)/)
			{
				return $1;
			}
		}
		close $find_defined_symbol;
		die "$file: no definition found for $symbol\n";
	}
	die "$catalog_header: not found in any include directory\n";
}

# Similar to FindDefinedSymbol, but looks in the bootstrap metadata.
sub FindDefinedSymbolFromData
{
	my ($data, $symbol) = @_;
	foreach my $row (@{$data})
	{
		if ($row->{oid_symbol} eq $symbol)
		{
			return $row->{oid};
		}
	}
	die "no definition found for $symbol\n";
}

1;