Add dbase conversion utility to /contrib.

This commit is contained in:
Bruce Momjian 2001-05-10 14:41:23 +00:00
parent 72c8af51fd
commit c2a062b7fe
8 changed files with 1751 additions and 0 deletions

View File

@ -34,6 +34,10 @@ cube -
Multidimensional-cube datatype (GiST indexing example)
by Gene Selkov, Jr. <selkovjr@mcs.anl.gov>
dbase -
Converts from dbase/xbase to PostgreSQL
by Ivan Baldo, lubaldo@adinet.com.uy
earthdistance -
Operator for computing earth distance for two points
by Hal Snyder <hal@vailsys.com>

36
contrib/dbase/Makefile Normal file
View File

@ -0,0 +1,36 @@
#
# $Header: /cvsroot/pgsql/contrib/dbase/Attic/Makefile,v 1.1 2001/05/10 14:41:23 momjian Exp $
#
subdir = contrib/dbase
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
OBJS = dbf.o dbf2pg.o endian.o
all: dbf2pg
dbf2pg: $(OBJS) $(libpq_builddir)/libpq.a
$(CC) $(CFLAGS) $(OBJS) $(libpq) $(LDFLAGS) $(LIBS) -liconv -o $@
install: all installdirs
$(INSTALL_PROGRAM) dbf2pg$(X) $(bindir)
$(INSTALL_DATA) README.dbf2pg $(docdir)/contrib
installdirs:
$(mkinstalldirs) $(bindir) $(docdir)/contrib
uninstall:
rm -f $(bindir)/dbf2pg$(X) $(docdir)/contrib/README.dbf2pg
clean distclean maintainer-clean:
rm -f dbf2pg$(X) $(OBJS)
depend dep:
$(CC) -MM -MG $(CFLAGS) *.c > depend
ifeq (depend,$(wildcard depend))
include depend
endif

132
contrib/dbase/README.dbf2pg Normal file
View File

@ -0,0 +1,132 @@
dbf2sql(1L) dbf2sql(1L)
NAME
dbf2sql - Insert xBase-style .dbf-files into a Post-
greSQL-table
SYNOPSIS
"dbf2pg [options] dbf-file"
Options:
[-v[v]] [-f] [-u | -l] [-c | -D] [-d database] [-t table]
[-h host] [-s oldname=newname[,oldname=newname]] [-s
start] [-e end] [-W] [-U username] [-B transaction_size]
[-F charset_from [-T charset_to]]
DESCRIPTION
This manual page documents the program dbf2pg. It takes
an xBase-style .dbf-file, and inserts it into the speci-
fied database and table.
OPTIONS
-v Display some status-messages.
-vv Also display progress.
-f Convert all field-names from the .dbf-file to low-
ercase.
-u Convert the contents of all fields to uppercase.
-l Convert the contents of all fields to lowercase.
-c Create the table specified with -t. If this table
already exists, first DROP it.
-D Delete the contents of the table specified with -t.
Note that this table has to exists. An error is
returned if this is not the case.
-W Ask for password.
-d database
Specify the database to use. An error is returned
if this database does not exists. Default is
"test".
-t table
Specify the table to insert in. An error is
returned if this table does not exists. Default is
"test".
-h host
Specify the host to which to connect. Default is
"localhost".
1
dbf2sql(1L) dbf2sql(1L)
-s oldname=newname[,oldname=newname]
Change the name of a field from oldname to newname.
This is mainly used to avoid using reserved SQL-
keywords. Example:
-s SELECT=SEL,COMMIT=doit
This is done before the -f operator has taken
effect!
-s start
Specify the first record-number in the xBase-file
we will insert.
-e end Specify the last record-number in the xBase-file we
will insert.
-B transaction_size
Specify the number of records per transaction,
default is all records.
-U username
Log as the specified user in the database.
-F charset_from
If specified, it converts the data from the speci-
fied charset. Example:
-F IBM437
Consult your system documentation to see the con-
vertions available.
-T charset_to
Together with -F charset_from , it converts the
data to the specified charset. Default is
"ISO-8859-1".
ENVIRONMENT
This program is affected by the environment-variables as
used by "PostgresSQL." See the documentation of Post-
gresSQL for more info.
BUGS
Fields larger than 8192 characters are not supported and
could break the program.
Some charset convertions could cause the output to be
larger than the input and could break the program.
2

474
contrib/dbase/dbf.c Normal file
View File

@ -0,0 +1,474 @@
/* Routines to read and write xBase-files (.dbf)
By Maarten Boekhold, 29th of oktober 1995
Modified by Frank Koormann (fkoorman@usf.uni-osnabrueck.de), Jun 10 1996
prepare dataarea with memset
get systemtime and set filedate
set formatstring for real numbers
*/
#include <sys/types.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <ctype.h>
#include <time.h>
#include "dbf.h"
/* open a dbf-file, get it's field-info and store this information */
dbhead *dbf_open(u_char *file, int flags) {
int file_no;
dbhead *dbh;
f_descr *fields;
dbf_header *head;
dbf_field *fieldc;
int t;
if ((dbh = (dbhead *)malloc(sizeof(dbhead))) == NULL) {
return (dbhead *)DBF_ERROR;
}
if ((head = (dbf_header *)malloc(sizeof(dbf_header))) == NULL) {
free(dbh);
return (dbhead *)DBF_ERROR;
}
if ((fieldc = (dbf_field *)malloc(sizeof(dbf_field))) == NULL) {
free(head);
free(dbh);
return (dbhead *)DBF_ERROR;
}
if ((file_no = open(file, flags)) == -1) {
free(fieldc);
free(head);
free(dbh);
return (dbhead *)DBF_ERROR;
}
/* read in the disk-header */
if (read(file_no, head, sizeof(dbf_header)) == -1) {
close(file_no);
free(fieldc);
free(head);
free(dbh);
return (dbhead *)DBF_ERROR;
}
if (!(head->dbh_dbt & DBH_NORMAL)) {
close(file_no);
free(fieldc);
free(head);
free(dbh);
return (dbhead *)DBF_ERROR;
}
dbh->db_fd = file_no;
if (head->dbh_dbt & DBH_MEMO) {
dbh->db_memo = 1;
} else {
dbh->db_memo = 0;
}
dbh->db_year = head->dbh_year;
dbh->db_month = head->dbh_month;
dbh->db_day = head->dbh_day;
dbh->db_hlen = get_short((u_char *)&head->dbh_hlen);
dbh->db_records = get_long((u_char *)&head->dbh_records);
dbh->db_currec = 0;
dbh->db_rlen = get_short((u_char *)&head->dbh_rlen);
dbh->db_nfields = (dbh->db_hlen - sizeof(dbf_header)) / sizeof(dbf_field);
/* dbh->db_hlen - sizeof(dbf_header) isn't the
correct size, cos dbh->hlen is in fact
a little more cos of the 0x0D (and
possibly another byte, 0x4E, I have
seen this somewhere). Because of rounding
everything turns out right :) */
if ((fields = (f_descr *)calloc(dbh->db_nfields, sizeof(f_descr)))
== NULL) {
close(file_no);
free(fieldc);
free(head);
free(dbh);
return (dbhead *)DBF_ERROR;
}
for (t = 0; t < dbh->db_nfields; t++) {
/* Maybe I have calculated the number of fields incorrectly. This can happen
when programs reserve lots of space at the end of the header for future
expansion. This will catch this situation */
if (fields[t].db_name[0] == 0x0D) {
dbh->db_nfields = t;
break;
}
read(file_no, fieldc, sizeof(dbf_field));
strncpy(fields[t].db_name, fieldc->dbf_name, DBF_NAMELEN);
fields[t].db_type = fieldc->dbf_type;
fields[t].db_flen = fieldc->dbf_flen;
fields[t].db_dec = fieldc->dbf_dec;
}
dbh->db_offset = dbh->db_hlen;
dbh->db_fields = fields;
if ((dbh->db_buff = (u_char *)malloc(dbh->db_rlen)) == NULL) {
return (dbhead *)DBF_ERROR;
}
free(fieldc);
free(head);
return dbh;
}
int dbf_write_head(dbhead *dbh) {
dbf_header head;
time_t now;
struct tm *dbf_time;
if (lseek(dbh->db_fd, 0, SEEK_SET) == -1) {
return DBF_ERROR;
}
/* fill up the diskheader */
/* Set dataarea of head to '\0' */
memset(&head,'\0',sizeof(dbf_header));
head.dbh_dbt = DBH_NORMAL;
if (dbh->db_memo) head.dbh_dbt = DBH_MEMO;
now = time((time_t *)NULL);
dbf_time = localtime(&now);
head.dbh_year = dbf_time->tm_year;
head.dbh_month = dbf_time->tm_mon + 1; /* Months since January + 1 */
head.dbh_day = dbf_time->tm_mday;
put_long(head.dbh_records, dbh->db_records);
put_short(head.dbh_hlen, dbh->db_hlen);
put_short(head.dbh_rlen, dbh->db_rlen);
if (write(dbh->db_fd, &head, sizeof(dbf_header)) == -1 ) {
return DBF_ERROR;
}
return 0;
}
int dbf_put_fields(dbhead *dbh) {
dbf_field field;
u_long t;
u_char end = 0x0D;
if (lseek(dbh->db_fd, sizeof(dbf_header), SEEK_SET) == -1) {
return DBF_ERROR;
}
/* Set dataarea of field to '\0' */
memset(&field,'\0',sizeof(dbf_field));
for (t = 0; t < dbh->db_nfields; t++) {
strncpy(field.dbf_name, dbh->db_fields[t].db_name, DBF_NAMELEN - 1);
field.dbf_type = dbh->db_fields[t].db_type;
field.dbf_flen = dbh->db_fields[t].db_flen;
field.dbf_dec = dbh->db_fields[t].db_dec;
if (write(dbh->db_fd, &field, sizeof(dbf_field)) == -1) {
return DBF_ERROR;
}
}
if (write(dbh->db_fd, &end, 1) == -1) {
return DBF_ERROR;
}
return 0;
}
int dbf_add_field(dbhead *dbh, u_char *name, u_char type,
u_char length, u_char dec) {
f_descr *ptr;
u_char *foo;
u_long size, field_no;
size = (dbh->db_nfields + 1) * sizeof(f_descr);
if (!(ptr = (f_descr *) realloc(dbh->db_fields, size))) {
return DBF_ERROR;
}
dbh->db_fields = ptr;
field_no = dbh->db_nfields;
strncpy(dbh->db_fields[field_no].db_name, name, DBF_NAMELEN);
dbh->db_fields[field_no].db_type = type;
dbh->db_fields[field_no].db_flen = length;
dbh->db_fields[field_no].db_dec = dec;
dbh->db_nfields++;
dbh->db_hlen += sizeof(dbf_field);
dbh->db_rlen += length;
if (!(foo = (u_char *) realloc(dbh->db_buff, dbh->db_rlen))) {
return DBF_ERROR;
}
dbh->db_buff = foo;
return 0;
}
dbhead *dbf_open_new(u_char *name, int flags) {
dbhead *dbh;
if (!(dbh = (dbhead *)malloc(sizeof(dbhead)))) {
return (dbhead *)DBF_ERROR;
}
if (flags & O_CREAT) {
if ((dbh->db_fd = open(name, flags, DBF_FILE_MODE)) == -1) {
free(dbh);
return (dbhead *)DBF_ERROR;
}
} else {
if ((dbh->db_fd = open(name, flags)) == -1) {
free(dbh);
return (dbhead *)DBF_ERROR;
}
}
dbh->db_offset = 0;
dbh->db_memo = 0;
dbh->db_year = 0;
dbh->db_month = 0;
dbh->db_day = 0;
dbh->db_hlen = sizeof(dbf_header) + 1;
dbh->db_records = 0;
dbh->db_currec = 0;
dbh->db_rlen = 1;
dbh->db_nfields = 0;
dbh->db_buff = NULL;
dbh->db_fields = (f_descr *)NULL;
return dbh;
}
void dbf_close(dbhead *dbh) {
int t;
close(dbh->db_fd);
for (t = 0; t < dbh->db_nfields; t++) {
free(&dbh->db_fields[t]);
}
if (dbh->db_buff != NULL) {
free(dbh->db_buff);
}
free(dbh);
}
int dbf_get_record(dbhead *dbh, field *fields, u_long rec) {
u_char *data;
int t, i, offset;
u_char *dbffield, *end;
/* calculate at which offset we have to read. *DON'T* forget the
0x0D which seperates field-descriptions from records!
Note (april 5 1996): This turns out to be included in db_hlen
*/
offset = dbh->db_hlen + (rec * dbh->db_rlen);
if (lseek(dbh->db_fd, offset, SEEK_SET) == -1) {
lseek(dbh->db_fd, 0, SEEK_SET);
dbh->db_offset = 0;
return DBF_ERROR;
}
dbh->db_offset = offset;
dbh->db_currec = rec;
data = dbh->db_buff;
read(dbh->db_fd, data, dbh->db_rlen);
if (data[0] == DBF_DELETED) {
return DBF_DELETED;
}
dbffield = &data[1];
for (t = 0; t < dbh->db_nfields; t++) {
strncpy(fields[t].db_name, dbh->db_fields[t].db_name, DBF_NAMELEN);
fields[t].db_type = dbh->db_fields[t].db_type;
fields[t].db_flen = dbh->db_fields[t].db_flen;
fields[t].db_dec = dbh->db_fields[t].db_dec;
if (fields[t].db_type == 'C') {
end = &dbffield[fields[t].db_flen - 1 ];
i = fields[t].db_flen;
while (( i > 0) && ((*end < 0x21) || (*end > 0x7E))) {
end--;
i--;
}
strncpy(fields[t].db_contents, dbffield, i);
fields[t].db_contents[i] = '\0';
} else {
end = dbffield;
i = fields[t].db_flen;
while (( i > 0) && ((*end < 0x21) || (*end > 0x7E))) {
end++;
i--;
}
strncpy(fields[t].db_contents, end, i);
fields[t].db_contents[i] = '\0';
}
dbffield += fields[t].db_flen;
}
dbh->db_offset += dbh->db_rlen;
return DBF_VALID;
}
field *dbf_build_record(dbhead *dbh) {
int t;
field *fields;
if (!(fields = (field *)calloc(dbh->db_nfields, sizeof(field)))) {
return (field *)DBF_ERROR;
}
for ( t = 0; t < dbh->db_nfields; t++) {
if (!(fields[t].db_contents =
(u_char *)malloc(dbh->db_fields[t].db_flen + 1))) {
for (t = 0; t < dbh->db_nfields; t++) {
if (fields[t].db_contents != 0) {
free(fields[t].db_contents);
free(fields);
}
return (field *)DBF_ERROR;
}
}
strncpy(fields[t].db_name, dbh->db_fields[t].db_name, DBF_NAMELEN);
fields[t].db_type = dbh->db_fields[t].db_type;
fields[t].db_flen = dbh->db_fields[t].db_flen;
fields[t].db_dec = dbh->db_fields[t].db_dec;
}
return fields;
}
void dbf_free_record(dbhead *dbh, field *rec) {
int t;
for ( t = 0; t < dbh->db_nfields; t++) {
free(rec[t].db_contents);
}
free(rec);
}
int dbf_put_record(dbhead *dbh, field *rec, u_long where) {
u_long offset, new, idx, t, h, length;
u_char *data, end = 0x1a;
double fl;
u_char foo[128], format[32];
/* offset: offset in file for this record
new: real offset after lseek
idx: index to which place we are inside the 'hardcore'-data for this
record
t: field-counter
data: the hardcore-data that is put on disk
h: index into the field-part in the hardcore-data
length: length of the data to copy
fl: a float used to get the right precision with real numbers
foo: copy of db_contents when field is not 'C'
format: sprintf format-string to get the right precision with real numbers
NOTE: this declaration of 'foo' can cause overflow when the contents-field
is longer the 127 chars (which is highly unlikely, cos it is not used
in text-fields).
*/
/* REMEMBER THAT THERE'S A 0x1A AT THE END OF THE FILE, SO DON'T
DO A SEEK_END WITH 0!!!!!! USE -1 !!!!!!!!!!
*/
if (where > dbh->db_records) {
if ((new = lseek(dbh->db_fd, -1, SEEK_END)) == -1) {
return DBF_ERROR;
}
dbh->db_records++;
} else {
offset = dbh->db_hlen + (where * dbh->db_rlen);
if ((new = lseek(dbh->db_fd, offset, SEEK_SET)) == -1) {
return DBF_ERROR;
}
}
dbh->db_offset = new;
data = dbh->db_buff;
/* Set dataarea of data to ' ' (space) */
memset(data,' ',dbh->db_rlen);
/* data[0] = DBF_VALID; */
idx = 1;
for (t = 0; t < dbh->db_nfields; t++) {
/* if field is empty, don't do a thing */
if (rec[t].db_contents[0] != '\0') {
/* Handle text */
if (rec[t].db_type == 'C') {
if (strlen(rec[t].db_contents) > rec[t].db_flen) {
length = rec[t].db_flen;
} else {
length = strlen(rec[t].db_contents);
}
strncpy(data+idx, rec[t].db_contents, length);
} else {
/* Handle the rest */
/* Numeric is special, because of real numbers */
if ((rec[t].db_type == 'N') && (rec[t].db_dec != 0)) {
fl = atof(rec[t].db_contents);
sprintf(format, "%%.%df", rec[t].db_dec);
sprintf(foo, format, fl);
} else {
strcpy(foo, rec[t].db_contents);
}
if (strlen(foo) > rec[t].db_flen) {
length = rec[t].db_flen;
} else {
length = strlen(foo);
}
h = rec[t].db_flen - length;
strncpy(data+idx+h, foo, length);
}
}
idx += rec[t].db_flen;
}
if (write(dbh->db_fd, data, dbh->db_rlen) == -1) {
return DBF_ERROR;
}
/* There's a 0x1A at the end of a dbf-file */
if (where == dbh->db_records) {
if (write(dbh->db_fd, &end, 1) == -1) {
return DBF_ERROR;
}
}
dbh->db_offset += dbh->db_rlen;
return 0;
}

135
contrib/dbase/dbf.h Normal file
View File

@ -0,0 +1,135 @@
/* header-file for dbf.c
declares routines for reading and writing xBase-files (.dbf), and
associated structures
Maarten Boekhold (boekhold@cindy.et.tudelft.nl) 29 oktober 1995
*/
#ifndef _DBF_H
#define _DBF_H
#include <sys/types.h>
/**********************************************************************
The DBF-part
***********************************************************************/
#define DBF_FILE_MODE 0644
/* byte offsets for date in dbh_date */
#define DBH_DATE_YEAR 0
#define DBH_DATE_MONTH 1
#define DBH_DATE_DAY 2
/* maximum fieldname-length */
#define DBF_NAMELEN 11
/* magic-cookies for the file */
#define DBH_NORMAL 0x03
#define DBH_MEMO 0x83
/* magic-cookies for the fields */
#define DBF_ERROR -1
#define DBF_VALID 0x20
#define DBF_DELETED 0x2A
/* diskheader */
typedef struct {
u_char dbh_dbt; /* indentification field */
u_char dbh_year; /* last modification-date */
u_char dbh_month;
u_char dbh_day;
u_char dbh_records[4]; /* number of records */
u_char dbh_hlen[2]; /* length of this header */
u_char dbh_rlen[2]; /* length of a record */
u_char dbh_stub[20]; /* misc stuff we don't need */
} dbf_header;
/* disk field-description */
typedef struct {
u_char dbf_name[DBF_NAMELEN]; /* field-name terminated with \0 */
u_char dbf_type; /* field-type */
u_char dbf_reserved[4]; /* some reserved stuff */
u_char dbf_flen; /* field-length */
u_char dbf_dec; /* number of decimal positions if
type is 'N' */
u_char dbf_stub[14]; /* stuff we don't need */
} dbf_field;
/* memory field-description */
typedef struct {
u_char db_name[DBF_NAMELEN]; /* field-name terminated with \0 */
u_char db_type; /* field-type */
u_char db_flen; /* field-length */
u_char db_dec; /* number of decimal positions */
} f_descr;
/* memory dfb-header */
typedef struct {
int db_fd; /* file-descriptor */
u_long db_offset; /* current offset in file */
u_char db_memo; /* memo-file present */
u_char db_year; /* last update as YYMMDD */
u_char db_month;
u_char db_day;
u_long db_hlen; /* length of the diskheader, for
calculating the offsets */
u_long db_records; /* number of records */
u_long db_currec; /* current record-number starting
at 0 */
u_short db_rlen; /* length of the record */
u_char db_nfields; /* number of fields */
u_char *db_buff; /* record-buffer to save malloc()'s */
f_descr *db_fields; /* pointer to an array of field-
descriptions */
} dbhead;
/* structure that contains everything a user wants from a field, including
the contents (in ASCII). Warning! db_flen may be bigger than the actual
length of db_name! This is because a field doesn't have to be completely
filled */
typedef struct {
u_char db_name[DBF_NAMELEN]; /* field-name terminated with \0 */
u_char db_type; /* field-type */
u_char db_flen; /* field-length */
u_char db_dec; /* number of decimal positions */
u_char* db_contents; /* contents of the field in ASCII */
} field;
/* prototypes for functions */
extern dbhead* dbf_open(u_char *file ,int flags);
extern int dbf_write_head(dbhead *dbh);
extern int dbf_put_fields(dbhead *dbh);
extern int dbf_add_field(dbhead *dbh, u_char *name, u_char type,
u_char length, u_char dec);
extern dbhead * dbf_open_new(u_char *name, int flags);
extern void dbf_close(dbhead *dbh);
extern int dbf_get_record(dbhead *dbh, field *fields, u_long rec);
extern field* dbf_build_record(dbhead *dbh);
extern void dbf_free_record(dbhead *dbh, field* fields);
extern int dbf_put_record(dbhead *dbh, field *rec, u_long where);
/*********************************************************************
The endian-part
***********************************************************************/
extern long get_long(u_char *cp);
extern void put_long(u_char *cp, long lval);
extern short get_short(u_char *cp);
extern void put_short(u_char *cp, short lval);
#endif /* _DBF_H */

116
contrib/dbase/dbf2pg.1 Normal file
View File

@ -0,0 +1,116 @@
.TH dbf2sql 1L \" -*- nroff -*-
.SH NAME
dbf2sql \- Insert xBase\-style .dbf\-files into a PostgreSQL\-table
.SH SYNOPSIS
.B dbf2pg [options] dbf-file
.br
.br
Options:
.br
[-v[v]] [-f] [-u | -l] [-c | -D] [-d database] [-t table]
[-h host] [-s oldname=newname[,oldname=newname]]
[-s start] [-e end] [-W] [-U username] [-B transaction_size]
[-F charset_from [-T charset_to]]
.SH DESCRIPTION
This manual page documents the program
.BR dbf2pg.
It takes an xBase-style .dbf-file, and inserts it into the specified
database and table.
.SS OPTIONS
.TP
.I "\-v"
Display some status-messages.
.TP
.I "-vv"
Also display progress.
.TP
.I "-f"
Convert all field-names from the .dbf-file to lowercase.
.TP
.I "-u"
Convert the contents of all fields to uppercase.
.TP
.I "-l"
Convert the contents of all fields to lowercase.
.TP
.I "-c"
Create the table specified with
.IR \-t .
If this table already exists, first
.BR DROP
it.
.TP
.I "-D"
Delete the contents of the table specified with
.IR \-t .
Note that this table has to exists. An error is returned if this is not the
case.
.TP
.I "-W"
Ask for password.
.TP
.I "-d database"
Specify the database to use. An error is returned if this database does not
exists. Default is "test".
.TP
.I "-t table"
Specify the table to insert in. An error is returned if this table does not
exists. Default is "test".
.TP
.I "-h host"
Specify the host to which to connect. Default is "localhost".
.TP
.I "-s oldname=newname[,oldname=newname]"
Change the name of a field from
.BR oldname
to
.BR newname .
This is mainly used to avoid using reserved SQL-keywords. Example:
.br
.br
-s SELECT=SEL,COMMIT=doit
.br
.br
This is done
.BR before
the
.IR -f
operator has taken effect!
.TP
.I "-s start"
Specify the first record-number in the xBase-file we will insert.
.TP
.I "-e end"
Specify the last record-number in the xBase-file we will insert.
.TP
.I "-B transaction_size"
Specify the number of records per transaction, default is all records.
.TP
.I "-U username"
Log as the specified user in the database.
.TP
.I "-F charset_from"
If specified, it converts the data from the specified charset. Example:
.br
.br
-F IBM437
.br
.br
Consult your system documentation to see the convertions available.
.TP
.I "-T charset_to"
Together with
.I "-F charset_from"
, it converts the data to the specified charset. Default is "ISO-8859-1".
.SH ENVIRONMENT
This program is affected by the environment-variables as used
by
.B PostgresSQL.
See the documentation of PostgresSQL for more info.
.SH BUGS
Fields larger than 8192 characters are not supported and could break the
program.
.br
Some charset convertions could cause the output to be larger than the input
and could break the program.

809
contrib/dbase/dbf2pg.c Normal file
View File

@ -0,0 +1,809 @@
/* This program reads in an xbase-dbf file and sends 'inserts' to an
PostgreSQL-server with the records in the xbase-file
M. Boekhold (boekhold@cindy.et.tudelft.nl) okt. 1995
oktober 1996: merged sources of dbf2msql.c and dbf2pg.c
oktober 1997: removed msql support
*/
#define HAVE_TERMIOS_H
#define HAVE_ICONV_H
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#ifdef HAVE_TERMIOS_H
#include <termios.h>
#endif
#ifdef HAVE_ICONV_H
#include <iconv.h>
#endif
#include <libpq-fe.h>
#include "dbf.h"
int verbose = 0, upper = 0, lower = 0, create = 0, fieldlow = 0;
int del = 0;
unsigned int begin = 0, end = 0;
unsigned int t_block = 0;
#ifdef HAVE_ICONV_H
char *charset_from=NULL;
char *charset_to="ISO-8859-1";
iconv_t iconv_d;
char convert_charset_buff[8192];
#endif
char *host = NULL;
char *dbase = "test";
char *table = "test";
char *username = NULL;
char *password = NULL;
char *subarg = NULL;
char escape_buff[8192];
void do_substitute(char *subarg, dbhead *dbh);
inline void strtoupper(char *string);
inline void strtolower(char *string);
void do_create(PGconn *, char*, dbhead*);
void do_inserts(PGconn *, char*, dbhead*);
int check_table(PGconn *, char*);
char *Escape(char*);
#ifdef HAVE_ICONV_H
char *convert_charset(char *string);
#endif
void usage(void);
unsigned int isinteger(char *);
char *simple_prompt(const char *prompt, int maxlen, int echo);
unsigned int isinteger(char *buff) {
char *i=buff;
while (*i != '\0') {
if (i==buff)
if ((*i == '-') ||
(*i == '+')) {
i++; continue;
}
if (!isdigit((int)*i)) return 0;
i++;
}
return 1;
}
inline void strtoupper(char *string) {
while(*string != '\0') {
*string = toupper(*string);
string++;
}
}
inline void strtolower(char *string) {
while(*string != '\0') {
*string = tolower(*string);
string++;
}
}
/* FIXME: should this check for overflow? */
char *Escape(char *string) {
char *foo, *bar;
foo = escape_buff;
bar = string;
while (*bar != '\0') {
if ((*bar == '\t') ||
(*bar == '\n') ||
(*bar == '\\')) {
*foo++ = '\\';
}
*foo++ = *bar++;
}
*foo = '\0';
return escape_buff;
}
#ifdef HAVE_ICONV_H
char *convert_charset(char *string) {
size_t in_size, out_size, nconv;
char *in_ptr,*out_ptr;
in_size=strlen(string)+1;
out_size=sizeof(convert_charset_buff);
in_ptr=string;
out_ptr=convert_charset_buff;
iconv(iconv_d, NULL, &in_size, &out_ptr, &out_size); /* necessary to reset state information */
while(in_size>0)
{
nconv = iconv(iconv_d, &in_ptr, &in_size, &out_ptr, &out_size);
if(nconv == (size_t) -1)
{
printf("WARNING: cannot convert charset of string \"%s\".\n",
string);
strcpy(convert_charset_buff,string);
return convert_charset_buff;
}
}
*out_ptr = 0; /* terminate output string */
return convert_charset_buff;
}
#endif
int check_table(PGconn *conn, char *table) {
char *q = "select relname from pg_class where "
"relkind='r' and relname !~* '^pg'";
PGresult *res;
int i = 0;
if (!(res = PQexec(conn, q))) {
printf("%s\n", PQerrorMessage(conn));
return 0;
}
for (i = 0; i < PQntuples(res); i++) {
if (!strcmp(table, PQgetvalue(res, i, PQfnumber(res, "relname")))) {
return 1;
}
}
return 0;
}
void usage(void){
printf("\
dbf2pg
usage: dbf2pg [-u | -l] [-h hostname] [-W] [-U username]
[-B transaction_size] [-F charset_from [-T charset_to]]
[-s oldname=newname[,oldname=newname[...]]] [-d dbase]
[-t table] [-c | -D] [-f] [-v[v]] dbf-file\n");
}
/* patch submitted by Jeffrey Y. Sue <jysue@aloha.net> */
/* Provides functionallity for substituting dBase-fieldnames for others */
/* Mainly for avoiding conflicts between fieldnames and SQL-reserved */
/* keywords */
void do_substitute(char *subarg, dbhead *dbh)
{
/* NOTE: subarg is modified in this function */
int i,bad;
char *p,*oldname,*newname;
if (!subarg) {
return;
}
if (verbose>1) {
printf("Substituting new field names\n");
}
/* use strstr instead of strtok because of possible empty tokens */
oldname = subarg;
while (oldname && strlen(oldname) && (p=strstr(oldname,"=")) ) {
*p = '\0'; /* mark end of oldname */
newname = ++p; /* point past \0 of oldname */
if (strlen(newname)) { /* if not an empty string */
p = strstr(newname,",");
if (p) {
*p = '\0'; /* mark end of newname */
p++; /* point past where the comma was */
}
}
if (strlen(newname)>=DBF_NAMELEN) {
printf("Truncating new field name %s to %d chars\n",
newname,DBF_NAMELEN-1);
newname[DBF_NAMELEN-1] = '\0';
}
bad = 1;
for (i=0;i<dbh->db_nfields;i++) {
if (strcmp(dbh->db_fields[i].db_name,oldname)==0) {
bad = 0;
strcpy(dbh->db_fields[i].db_name,newname);
if (verbose>1) {
printf("Substitute old:%s new:%s\n",
oldname,newname);
}
break;
}
}
if (bad) {
printf("Warning: old field name %s not found\n",
oldname);
}
oldname = p;
}
} /* do_substitute */
void do_create(PGconn *conn, char *table, dbhead *dbh) {
char *query;
char t[20];
int i, length;
PGresult *res;
if (verbose > 1) {
printf("Building CREATE-clause\n");
}
if (!(query = (char *)malloc(
(dbh->db_nfields * 40) + 29 + strlen(table)))) {
fprintf(stderr, "Memory allocation error in function do_create\n");
PQfinish(conn);
close(dbh->db_fd);
free(dbh);
exit(1);
}
sprintf(query, "CREATE TABLE %s (", table);
length = strlen(query);
for ( i = 0; i < dbh->db_nfields; i++) {
if (!strlen(dbh->db_fields[i].db_name)) {
continue;
/* skip field if length of name == 0 */
}
if ((strlen(query) != length)) {
strcat(query, ",");
}
if (fieldlow)
strtolower(dbh->db_fields[i].db_name);
strcat(query, dbh->db_fields[i].db_name);
switch(dbh->db_fields[i].db_type) {
case 'D':
strcat(query, " date");
break;
case 'C':
if (dbh->db_fields[i].db_flen > 1) {
strcat(query, " varchar");
sprintf(t, "(%d)",
dbh->db_fields[i].db_flen);
strcat(query, t);
} else {
strcat(query, " char");
}
break;
case 'N':
if (dbh->db_fields[i].db_dec != 0) {
strcat(query, " real");
} else {
strcat(query, " int");
}
break;
case 'L':
strcat(query, " char");
break;
}
}
strcat(query, ")");
if (verbose > 1) {
printf("Sending create-clause\n");
printf("%s\n", query);
}
if ((res = PQexec(conn, query)) == NULL) {
fprintf(stderr, "Error creating table!\n");
fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn));
close(dbh->db_fd);
free(dbh);
free(query);
PQfinish(conn);
exit(1);
}
PQclear(res);
free(query);
}
/* FIXME: can be optimized to not use strcat, but it is worth the effort? */
void do_inserts(PGconn *conn, char *table, dbhead *dbh) {
PGresult *res;
field *fields;
int i, h, result;
char *query, *foo;
char pgdate[10];
if (verbose > 1) {
printf("Inserting records\n");
}
h = 2; /* 2 because of terminating \n\0 */
for ( i = 0 ; i < dbh->db_nfields ; i++ ) {
h += dbh->db_fields[i].db_flen > 2 ?
dbh->db_fields[i].db_flen :
2; /* account for possible NULL values (\N) */
h += 1; /* the delimiter */
}
/* make sure we can build the COPY query, note that we don't need to just
add this value, since the COPY query is a separate query (see below) */
if (h < 17+strlen(table)) h = 17+strlen(table);
if (!(query = (char *)malloc(h))) {
PQfinish(conn);
fprintf(stderr,
"Memory allocation error in function do_inserts (query)\n");
close(dbh->db_fd);
free(dbh);
exit(1);
}
if ((fields = dbf_build_record(dbh)) == (field *)DBF_ERROR) {
fprintf(stderr,
"Couldn't allocate memory for record in do_insert\n");
PQfinish(conn);
free(query);
dbf_close(dbh);
exit(1);
}
if (end == 0) /* "end" is a user option, if not specified, */
end = dbh->db_records; /* then all records are processed. */
if (t_block == 0) /* user not specified transaction block size */
t_block = end-begin; /* then we set it to be the full data */
for (i = begin; i < end; i++) {
/* we need to start a new transaction and COPY statement */
if (((i-begin) % t_block) == 0) {
if (verbose > 1)
fprintf(stderr, "Transaction: START\n");
res = PQexec(conn, "BEGIN");
if (res == NULL) {
fprintf(stderr, "Error starting transaction!\n");
fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn));
exit(1);
}
sprintf(query, "COPY %s FROM stdin", table);
res = PQexec(conn, query);
if (res == NULL) {
fprintf(stderr, "Error starting COPY!\n");
fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn));
exit(1);
}
}
/* build line and submit */
result = dbf_get_record(dbh, fields, i);
if (result == DBF_VALID) {
query[0] = '\0';
for (h = 0; h < dbh->db_nfields; h++) {
if (!strlen(fields[h].db_name)) {
continue;
}
if (h!=0) /* not for the first field! */
strcat(query, "\t"); /* COPY statement field separator */
if (upper) {
strtoupper(fields[h].db_contents);
}
if (lower) {
strtolower(fields[h].db_contents);
}
foo = fields[h].db_contents;
#ifdef HAVE_ICONV_H
if(charset_from)
foo = convert_charset(foo);
#endif
foo = Escape(foo);
/* handle the date first - liuk */
if(fields[h].db_type=='D') {
if((strlen(foo)==8) && isinteger(foo)) {
sprintf(pgdate,"%c%c%c%c-%c%c-%c%c",
foo[0],foo[1],foo[2],foo[3],
foo[4],foo[5],foo[6],foo[7]);
strcat(query,pgdate);
} else {
/* empty field must be inserted as NULL value in this
way */
strcat(query,"\\N");
}
}
else if ((fields[h].db_type == 'N') &&
(fields[h].db_dec == 0)){
if (isinteger(foo)) {
strcat(query, foo);
} else {
strcat(query, "\\N");
if (verbose)
fprintf(stderr, "Illegal numeric value found "
"in record %d, field \"%s\"\n",
i, fields[h].db_name);
}
} else {
strcat(query, foo); /* must be character */
}
}
strcat(query, "\n");
if ((verbose > 1) && (( i % 100) == 0)) {/* Only show every 100 */
printf("Inserting record %d\n", i); /* records. */
}
PQputline(conn, query);
}
/* we need to end this copy and transaction */
if (((i-begin) % t_block) == t_block-1) {
if (verbose > 1)
fprintf(stderr, "Transaction: END\n");
PQputline(conn, "\\.\n");
if (PQendcopy(conn) != 0) {
fprintf(stderr, "Something went wrong while copying. Check "
"your tables!\n");
exit(1);
}
res = PQexec(conn, "END");
if (res == NULL) {
fprintf(stderr, "Error committing work!\n");
fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn));
exit(1);
}
}
}
/* last row copied in, end copy and transaction */
/* remember, i is now 1 greater then when we left the loop */
if (((i-begin) % t_block) != 0) {
if (verbose > 1)
fprintf(stderr, "Transaction: END\n");
PQputline(conn, "\\.\n");
if (PQendcopy(conn) != 0) {
fprintf(stderr, "Something went wrong while copying. Check "
"your tables!\n");
}
res = PQexec(conn, "END");
if (res == NULL) {
fprintf(stderr, "Error committing work!\n");
fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn));
exit(1);
}
}
dbf_free_record(dbh, fields);
free(query);
}
/*
* This is from Postgres 7.0.3 source tarball, utility program PSQL.
*
* simple_prompt
*
* Generalized function especially intended for reading in usernames and
* password interactively. Reads from stdin.
*
* prompt: The prompt to print
* maxlen: How many characters to accept
* echo: Set to false if you want to hide what is entered (for passwords)
*
* Returns a malloc()'ed string with the input (w/o trailing newline).
*/
static int prompt_state;
char *
simple_prompt(const char *prompt, int maxlen, int echo)
{
int length;
char *destination;
#ifdef HAVE_TERMIOS_H
struct termios t_orig,
t;
#endif
destination = (char *) malloc(maxlen + 2);
if (!destination)
return NULL;
if (prompt)
fputs(prompt, stderr);
prompt_state = 1;
#ifdef HAVE_TERMIOS_H
if (!echo)
{
tcgetattr(0, &t);
t_orig = t;
t.c_lflag &= ~ECHO;
tcsetattr(0, TCSADRAIN, &t);
}
#endif
fgets(destination, maxlen, stdin);
#ifdef HAVE_TERMIOS_H
if (!echo)
{
tcsetattr(0, TCSADRAIN, &t_orig);
puts("");
}
#endif
prompt_state = 0;
length = strlen(destination);
if (length > 0 && destination[length - 1] != '\n')
{
/* eat rest of the line */
char buf[512];
do
{
fgets(buf, 512, stdin);
} while (buf[strlen(buf) - 1] != '\n');
}
if (length > 0 && destination[length - 1] == '\n')
/* remove trailing newline */
destination[length - 1] = '\0';
return destination;
}
int main(int argc, char **argv)
{
PGconn *conn;
int i;
extern int optind;
extern char *optarg;
char *query;
dbhead *dbh;
while ((i = getopt(argc, argv, "DWflucvh:b:e:d:t:s:B:U:F:T:")) != EOF) {
switch (i) {
case 'D':
if (create) {
usage();
printf("Can't use -c and -D at the same time!\n");
exit(1);
}
del = 1;
break;
case 'W':
password=simple_prompt("Password: ",100,0);
break;
case 'f':
fieldlow=1;
break;
case 'v':
verbose++;
break;
case 'c':
if (del) {
usage();
printf("Can't use -c and -D at the same time!\n");
exit(1);
}
create=1;
break;
case 'l':
lower=1;
break;
case 'u':
if (lower) {
usage();
printf("Can't use -u and -l at the same time!\n");
exit(1);
}
upper=1;
break;
case 'b':
begin = atoi(optarg);
break;
case 'e':
end = atoi(optarg);
break;
case 'h':
host = (char *)strdup(optarg);
break;
case 'd':
dbase = (char *)strdup(optarg);
break;
case 't':
table = (char *)strdup(optarg);
break;
case 's':
subarg = (char *)strdup(optarg);
break;
case 'B':
t_block = atoi(optarg);
break;
case 'U':
username = (char *)strdup(optarg);
break;
case 'F':
charset_from = (char *)strdup(optarg);
break;
case 'T':
charset_to = (char *)strdup(optarg);
break;
case ':':
usage();
printf("missing argument!\n");
exit(1);
break;
case '?':
usage();
/* FIXME: Ivan thinks this is bad: printf("unknown argument: %s\n", argv[0]); */
exit(1);
break;
default:
break;
}
}
argc -= optind;
argv = &argv[optind];
if (argc != 1) {
usage();
if(username)
free(username);
if(password)
free(password);
exit(1);
}
#ifdef HAVE_ICONV_H
if(charset_from)
{
if(verbose>1)
printf("Setting conversion from charset \"%s\" to \"%s\".\n",
charset_from,charset_to);
iconv_d = iconv_open(charset_to,charset_from);
if(iconv_d == (iconv_t) -1)
{
printf("Cannot convert from charset \"%s\" to charset \"%s\".\n",
charset_from,charset_to);
exit(1);
}
}
#endif
if (verbose > 1) {
printf("Opening dbf-file\n");
}
if ((dbh = dbf_open(argv[0], O_RDONLY)) == (dbhead *)-1) {
fprintf(stderr, "Couldn't open xbase-file %s\n", argv[0]);
if(username)
free(username);
if(password)
free(password);
if(charset_from)
iconv_close(iconv_d);
exit(1);
}
if (fieldlow)
for ( i = 0 ; i < dbh->db_nfields ; i++ )
strtolower(dbh->db_fields[i].db_name);
if (verbose) {
printf("dbf-file: %s, PG-dbase: %s, PG-table: %s\n", argv[0],
dbase,
table);
printf("Number of records: %ld\n", dbh->db_records);
printf("NAME:\t\tLENGTH:\t\tTYPE:\n");
printf("-------------------------------------\n");
for (i = 0; i < dbh->db_nfields ; i++) {
printf("%-12s\t%7d\t\t%5c\n",dbh->db_fields[i].db_name,
dbh->db_fields[i].db_flen,
dbh->db_fields[i].db_type);
}
}
if (verbose > 1) {
printf("Making connection to PG-server\n");
}
conn = PQsetdbLogin(host,NULL,NULL,NULL, dbase, username, password);
if (PQstatus(conn) != CONNECTION_OK) {
fprintf(stderr, "Couldn't get a connection with the ");
fprintf(stderr, "designated host!\n");
fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn));
close(dbh->db_fd);
free(dbh);
if(username)
free(username);
if(password)
free(password);
if(charset_from)
iconv_close(iconv_d);
exit(1);
}
/* Substitute field names */
do_substitute(subarg, dbh);
/* create table if specified, else check if target table exists */
if (!create) {
if (!check_table(conn, table)) {
printf("Table does not exist!\n");
if(username)
free(username);
if(password)
free(password);
if(charset_from)
iconv_close(iconv_d);
exit(1);
}
if (del) {
if (!(query = (char *)malloc(13 + strlen(table)))) {
printf("Memory-allocation error in main (delete)!\n");
close(dbh->db_fd);
free(dbh);
PQfinish(conn);
if(username)
free(username);
if(password)
free(password);
if(charset_from)
iconv_close(iconv_d);
exit(1);
}
if (verbose > 1) {
printf("Deleting from original table\n");
}
sprintf(query, "DELETE FROM %s", table);
PQexec(conn, query);
free(query);
}
} else {
if (!(query = (char *)malloc(12 + strlen(table)))) {
printf("Memory-allocation error in main (drop)!\n");
close(dbh->db_fd);
free(dbh);
PQfinish(conn);
if(username)
free(username);
if(password)
free(password);
if(charset_from)
iconv_close(iconv_d);
exit(1);
}
if (verbose > 1) {
printf("Dropping original table (if one exists)\n");
}
sprintf(query, "DROP TABLE %s", table);
PQexec(conn, query);
free(query);
/* Build a CREATE-clause
*/
do_create(conn, table, dbh);
}
/* Build an INSERT-clause
*/
PQexec(conn, "SET DATESTYLE TO 'ISO';");
do_inserts(conn, table, dbh);
if (verbose > 1) {
printf("Closing up....\n");
}
close(dbh->db_fd);
free(dbh);
PQfinish(conn);
if(username)
free(username);
if(password)
free(password);
if(charset_from)
iconv_close(iconv_d);
exit(0);
}

45
contrib/dbase/endian.c Normal file
View File

@ -0,0 +1,45 @@
/* Maarten Boekhold (boekhold@cindy.et.tudelft.nl) oktober 1995 */
#include <sys/types.h>
#include "dbf.h"
/*
* routine to change little endian long to host long
*/
long get_long(u_char *cp)
{
long ret;
ret = *cp++;
ret += ((*cp++)<<8);
ret += ((*cp++)<<16);
ret += ((*cp++)<<24);
return ret;
}
void put_long(u_char *cp, long lval)
{
cp[0] = lval & 0xff;
cp[1] = (lval >> 8) & 0xff;
cp[2] = (lval >> 16) & 0xff;
cp[3] = (lval >> 24) & 0xff;
}
/*
* routine to change little endian short to host short
*/
short get_short(u_char *cp)
{
short ret;
ret = *cp++;
ret += ((*cp++)<<8);
return ret;
}
void put_short(u_char *cp, short sval)
{
cp[0] = sval & 0xff;
cp[1] = (sval >> 8) & 0xff;
}