From c2a062b7fe55a0d29a8b2f7dffd4f6dd1202af31 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 10 May 2001 14:41:23 +0000 Subject: [PATCH] Add dbase conversion utility to /contrib. --- contrib/README | 4 + contrib/dbase/Makefile | 36 ++ contrib/dbase/README.dbf2pg | 132 ++++++ contrib/dbase/dbf.c | 474 +++++++++++++++++++++ contrib/dbase/dbf.h | 135 ++++++ contrib/dbase/dbf2pg.1 | 116 ++++++ contrib/dbase/dbf2pg.c | 809 ++++++++++++++++++++++++++++++++++++ contrib/dbase/endian.c | 45 ++ 8 files changed, 1751 insertions(+) create mode 100644 contrib/dbase/Makefile create mode 100644 contrib/dbase/README.dbf2pg create mode 100644 contrib/dbase/dbf.c create mode 100644 contrib/dbase/dbf.h create mode 100644 contrib/dbase/dbf2pg.1 create mode 100644 contrib/dbase/dbf2pg.c create mode 100644 contrib/dbase/endian.c diff --git a/contrib/README b/contrib/README index fca74d32cd..4ad2760f66 100644 --- a/contrib/README +++ b/contrib/README @@ -34,6 +34,10 @@ cube - Multidimensional-cube datatype (GiST indexing example) by Gene Selkov, Jr. +dbase - + Converts from dbase/xbase to PostgreSQL + by Ivan Baldo, lubaldo@adinet.com.uy + earthdistance - Operator for computing earth distance for two points by Hal Snyder diff --git a/contrib/dbase/Makefile b/contrib/dbase/Makefile new file mode 100644 index 0000000000..8302e77702 --- /dev/null +++ b/contrib/dbase/Makefile @@ -0,0 +1,36 @@ +# +# $Header: /cvsroot/pgsql/contrib/dbase/Attic/Makefile,v 1.1 2001/05/10 14:41:23 momjian Exp $ +# + +subdir = contrib/dbase +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global + +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) + +OBJS = dbf.o dbf2pg.o endian.o + +all: dbf2pg + +dbf2pg: $(OBJS) $(libpq_builddir)/libpq.a + $(CC) $(CFLAGS) $(OBJS) $(libpq) $(LDFLAGS) $(LIBS) -liconv -o $@ + +install: all installdirs + $(INSTALL_PROGRAM) dbf2pg$(X) $(bindir) + $(INSTALL_DATA) README.dbf2pg $(docdir)/contrib + +installdirs: + $(mkinstalldirs) $(bindir) $(docdir)/contrib + +uninstall: + rm -f $(bindir)/dbf2pg$(X) $(docdir)/contrib/README.dbf2pg + +clean distclean maintainer-clean: + rm -f dbf2pg$(X) $(OBJS) + +depend dep: + $(CC) -MM -MG $(CFLAGS) *.c > depend + +ifeq (depend,$(wildcard depend)) +include depend +endif diff --git a/contrib/dbase/README.dbf2pg b/contrib/dbase/README.dbf2pg new file mode 100644 index 0000000000..3dfa40ad31 --- /dev/null +++ b/contrib/dbase/README.dbf2pg @@ -0,0 +1,132 @@ + + + +dbf2sql(1L) dbf2sql(1L) + + +NAME + dbf2sql - Insert xBase-style .dbf-files into a Post- + greSQL-table + +SYNOPSIS + "dbf2pg [options] dbf-file" + Options: + [-v[v]] [-f] [-u | -l] [-c | -D] [-d database] [-t table] + [-h host] [-s oldname=newname[,oldname=newname]] [-s + start] [-e end] [-W] [-U username] [-B transaction_size] + [-F charset_from [-T charset_to]] + + +DESCRIPTION + This manual page documents the program dbf2pg. It takes + an xBase-style .dbf-file, and inserts it into the speci- + fied database and table. + + OPTIONS + -v Display some status-messages. + + -vv Also display progress. + + -f Convert all field-names from the .dbf-file to low- + ercase. + + -u Convert the contents of all fields to uppercase. + + -l Convert the contents of all fields to lowercase. + + -c Create the table specified with -t. If this table + already exists, first DROP it. + + -D Delete the contents of the table specified with -t. + Note that this table has to exists. An error is + returned if this is not the case. + + -W Ask for password. + + -d database + Specify the database to use. An error is returned + if this database does not exists. Default is + "test". + + -t table + Specify the table to insert in. An error is + returned if this table does not exists. Default is + "test". + + -h host + Specify the host to which to connect. Default is + "localhost". + + + + + + 1 + + + + + +dbf2sql(1L) dbf2sql(1L) + + + -s oldname=newname[,oldname=newname] + Change the name of a field from oldname to newname. + This is mainly used to avoid using reserved SQL- + keywords. Example: + -s SELECT=SEL,COMMIT=doit + This is done before the -f operator has taken + effect! + + -s start + Specify the first record-number in the xBase-file + we will insert. + + -e end Specify the last record-number in the xBase-file we + will insert. + + -B transaction_size + Specify the number of records per transaction, + default is all records. + + -U username + Log as the specified user in the database. + + -F charset_from + If specified, it converts the data from the speci- + fied charset. Example: + -F IBM437 + Consult your system documentation to see the con- + vertions available. + + -T charset_to + Together with -F charset_from , it converts the + data to the specified charset. Default is + "ISO-8859-1". + +ENVIRONMENT + This program is affected by the environment-variables as + used by "PostgresSQL." See the documentation of Post- + gresSQL for more info. + +BUGS + Fields larger than 8192 characters are not supported and + could break the program. + Some charset convertions could cause the output to be + larger than the input and could break the program. + + + + + + + + + + + + + + 2 + + diff --git a/contrib/dbase/dbf.c b/contrib/dbase/dbf.c new file mode 100644 index 0000000000..94f0b165ca --- /dev/null +++ b/contrib/dbase/dbf.c @@ -0,0 +1,474 @@ +/* Routines to read and write xBase-files (.dbf) + + By Maarten Boekhold, 29th of oktober 1995 + + Modified by Frank Koormann (fkoorman@usf.uni-osnabrueck.de), Jun 10 1996 + prepare dataarea with memset + get systemtime and set filedate + set formatstring for real numbers +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dbf.h" + +/* open a dbf-file, get it's field-info and store this information */ + +dbhead *dbf_open(u_char *file, int flags) { + int file_no; + dbhead *dbh; + f_descr *fields; + dbf_header *head; + dbf_field *fieldc; + int t; + + if ((dbh = (dbhead *)malloc(sizeof(dbhead))) == NULL) { + return (dbhead *)DBF_ERROR; + } + + if ((head = (dbf_header *)malloc(sizeof(dbf_header))) == NULL) { + free(dbh); + return (dbhead *)DBF_ERROR; + } + + if ((fieldc = (dbf_field *)malloc(sizeof(dbf_field))) == NULL) { + free(head); + free(dbh); + return (dbhead *)DBF_ERROR; + } + + if ((file_no = open(file, flags)) == -1) { + free(fieldc); + free(head); + free(dbh); + return (dbhead *)DBF_ERROR; + } + +/* read in the disk-header */ + + if (read(file_no, head, sizeof(dbf_header)) == -1) { + close(file_no); + free(fieldc); + free(head); + free(dbh); + return (dbhead *)DBF_ERROR; + } + + if (!(head->dbh_dbt & DBH_NORMAL)) { + close(file_no); + free(fieldc); + free(head); + free(dbh); + return (dbhead *)DBF_ERROR; + } + + dbh->db_fd = file_no; + if (head->dbh_dbt & DBH_MEMO) { + dbh->db_memo = 1; + } else { + dbh->db_memo = 0; + } + dbh->db_year = head->dbh_year; + dbh->db_month = head->dbh_month; + dbh->db_day = head->dbh_day; + dbh->db_hlen = get_short((u_char *)&head->dbh_hlen); + dbh->db_records = get_long((u_char *)&head->dbh_records); + dbh->db_currec = 0; + dbh->db_rlen = get_short((u_char *)&head->dbh_rlen); + dbh->db_nfields = (dbh->db_hlen - sizeof(dbf_header)) / sizeof(dbf_field); + + /* dbh->db_hlen - sizeof(dbf_header) isn't the + correct size, cos dbh->hlen is in fact + a little more cos of the 0x0D (and + possibly another byte, 0x4E, I have + seen this somewhere). Because of rounding + everything turns out right :) */ + + if ((fields = (f_descr *)calloc(dbh->db_nfields, sizeof(f_descr))) + == NULL) { + close(file_no); + free(fieldc); + free(head); + free(dbh); + return (dbhead *)DBF_ERROR; + } + + for (t = 0; t < dbh->db_nfields; t++) { +/* Maybe I have calculated the number of fields incorrectly. This can happen + when programs reserve lots of space at the end of the header for future + expansion. This will catch this situation */ + if (fields[t].db_name[0] == 0x0D) { + dbh->db_nfields = t; + break; + } + read(file_no, fieldc, sizeof(dbf_field)); + strncpy(fields[t].db_name, fieldc->dbf_name, DBF_NAMELEN); + fields[t].db_type = fieldc->dbf_type; + fields[t].db_flen = fieldc->dbf_flen; + fields[t].db_dec = fieldc->dbf_dec; + } + + dbh->db_offset = dbh->db_hlen; + dbh->db_fields = fields; + + if ((dbh->db_buff = (u_char *)malloc(dbh->db_rlen)) == NULL) { + return (dbhead *)DBF_ERROR; + } + + free(fieldc); + free(head); + + return dbh; +} + +int dbf_write_head(dbhead *dbh) { + dbf_header head; + time_t now; + struct tm *dbf_time; + + if (lseek(dbh->db_fd, 0, SEEK_SET) == -1) { + return DBF_ERROR; + } + +/* fill up the diskheader */ + +/* Set dataarea of head to '\0' */ + memset(&head,'\0',sizeof(dbf_header)); + + head.dbh_dbt = DBH_NORMAL; + if (dbh->db_memo) head.dbh_dbt = DBH_MEMO; + + now = time((time_t *)NULL); + dbf_time = localtime(&now); + head.dbh_year = dbf_time->tm_year; + head.dbh_month = dbf_time->tm_mon + 1; /* Months since January + 1 */ + head.dbh_day = dbf_time->tm_mday; + + put_long(head.dbh_records, dbh->db_records); + put_short(head.dbh_hlen, dbh->db_hlen); + put_short(head.dbh_rlen, dbh->db_rlen); + + if (write(dbh->db_fd, &head, sizeof(dbf_header)) == -1 ) { + return DBF_ERROR; + } + + return 0; +} + +int dbf_put_fields(dbhead *dbh) { + dbf_field field; + u_long t; + u_char end = 0x0D; + + if (lseek(dbh->db_fd, sizeof(dbf_header), SEEK_SET) == -1) { + return DBF_ERROR; + } + +/* Set dataarea of field to '\0' */ + memset(&field,'\0',sizeof(dbf_field)); + + for (t = 0; t < dbh->db_nfields; t++) { + strncpy(field.dbf_name, dbh->db_fields[t].db_name, DBF_NAMELEN - 1); + field.dbf_type = dbh->db_fields[t].db_type; + field.dbf_flen = dbh->db_fields[t].db_flen; + field.dbf_dec = dbh->db_fields[t].db_dec; + + if (write(dbh->db_fd, &field, sizeof(dbf_field)) == -1) { + return DBF_ERROR; + } + } + + if (write(dbh->db_fd, &end, 1) == -1) { + return DBF_ERROR; + } + + return 0; +} + +int dbf_add_field(dbhead *dbh, u_char *name, u_char type, + u_char length, u_char dec) { +f_descr *ptr; +u_char *foo; +u_long size, field_no; + + size = (dbh->db_nfields + 1) * sizeof(f_descr); + if (!(ptr = (f_descr *) realloc(dbh->db_fields, size))) { + return DBF_ERROR; + } + dbh->db_fields = ptr; + + field_no = dbh->db_nfields; + strncpy(dbh->db_fields[field_no].db_name, name, DBF_NAMELEN); + dbh->db_fields[field_no].db_type = type; + dbh->db_fields[field_no].db_flen = length; + dbh->db_fields[field_no].db_dec = dec; + + dbh->db_nfields++; + dbh->db_hlen += sizeof(dbf_field); + dbh->db_rlen += length; + + if (!(foo = (u_char *) realloc(dbh->db_buff, dbh->db_rlen))) { + return DBF_ERROR; + } + + dbh->db_buff = foo; + + return 0; +} + +dbhead *dbf_open_new(u_char *name, int flags) { +dbhead *dbh; + + if (!(dbh = (dbhead *)malloc(sizeof(dbhead)))) { + return (dbhead *)DBF_ERROR; + } + + if (flags & O_CREAT) { + if ((dbh->db_fd = open(name, flags, DBF_FILE_MODE)) == -1) { + free(dbh); + return (dbhead *)DBF_ERROR; + } + } else { + if ((dbh->db_fd = open(name, flags)) == -1) { + free(dbh); + return (dbhead *)DBF_ERROR; + } + } + + + dbh->db_offset = 0; + dbh->db_memo = 0; + dbh->db_year = 0; + dbh->db_month = 0; + dbh->db_day = 0; + dbh->db_hlen = sizeof(dbf_header) + 1; + dbh->db_records = 0; + dbh->db_currec = 0; + dbh->db_rlen = 1; + dbh->db_nfields = 0; + dbh->db_buff = NULL; + dbh->db_fields = (f_descr *)NULL; + + return dbh; +} + +void dbf_close(dbhead *dbh) { + int t; + + close(dbh->db_fd); + + for (t = 0; t < dbh->db_nfields; t++) { + free(&dbh->db_fields[t]); + } + + if (dbh->db_buff != NULL) { + free(dbh->db_buff); + } + + free(dbh); +} + +int dbf_get_record(dbhead *dbh, field *fields, u_long rec) { + u_char *data; + int t, i, offset; + u_char *dbffield, *end; + +/* calculate at which offset we have to read. *DON'T* forget the + 0x0D which seperates field-descriptions from records! + + Note (april 5 1996): This turns out to be included in db_hlen +*/ + offset = dbh->db_hlen + (rec * dbh->db_rlen); + + if (lseek(dbh->db_fd, offset, SEEK_SET) == -1) { + lseek(dbh->db_fd, 0, SEEK_SET); + dbh->db_offset = 0; + return DBF_ERROR; + } + + dbh->db_offset = offset; + dbh->db_currec = rec; + data = dbh->db_buff; + + read(dbh->db_fd, data, dbh->db_rlen); + + if (data[0] == DBF_DELETED) { + return DBF_DELETED; + } + + dbffield = &data[1]; + for (t = 0; t < dbh->db_nfields; t++) { + strncpy(fields[t].db_name, dbh->db_fields[t].db_name, DBF_NAMELEN); + fields[t].db_type = dbh->db_fields[t].db_type; + fields[t].db_flen = dbh->db_fields[t].db_flen; + fields[t].db_dec = dbh->db_fields[t].db_dec; + + if (fields[t].db_type == 'C') { + end = &dbffield[fields[t].db_flen - 1 ]; + i = fields[t].db_flen; + while (( i > 0) && ((*end < 0x21) || (*end > 0x7E))) { + end--; + i--; + } + strncpy(fields[t].db_contents, dbffield, i); + fields[t].db_contents[i] = '\0'; + } else { + end = dbffield; + i = fields[t].db_flen; + while (( i > 0) && ((*end < 0x21) || (*end > 0x7E))) { + end++; + i--; + } + strncpy(fields[t].db_contents, end, i); + fields[t].db_contents[i] = '\0'; + } + + dbffield += fields[t].db_flen; + } + + dbh->db_offset += dbh->db_rlen; + + return DBF_VALID; +} + +field *dbf_build_record(dbhead *dbh) { + int t; + field *fields; + + if (!(fields = (field *)calloc(dbh->db_nfields, sizeof(field)))) { + return (field *)DBF_ERROR; + } + + for ( t = 0; t < dbh->db_nfields; t++) { + if (!(fields[t].db_contents = + (u_char *)malloc(dbh->db_fields[t].db_flen + 1))) { + for (t = 0; t < dbh->db_nfields; t++) { + if (fields[t].db_contents != 0) { + free(fields[t].db_contents); + free(fields); + } + return (field *)DBF_ERROR; + } + } + strncpy(fields[t].db_name, dbh->db_fields[t].db_name, DBF_NAMELEN); + fields[t].db_type = dbh->db_fields[t].db_type; + fields[t].db_flen = dbh->db_fields[t].db_flen; + fields[t].db_dec = dbh->db_fields[t].db_dec; + } + + return fields; +} + +void dbf_free_record(dbhead *dbh, field *rec) { + int t; + + for ( t = 0; t < dbh->db_nfields; t++) { + free(rec[t].db_contents); + } + + free(rec); +} + +int dbf_put_record(dbhead *dbh, field *rec, u_long where) { + u_long offset, new, idx, t, h, length; + u_char *data, end = 0x1a; + double fl; + u_char foo[128], format[32]; + +/* offset: offset in file for this record + new: real offset after lseek + idx: index to which place we are inside the 'hardcore'-data for this + record + t: field-counter + data: the hardcore-data that is put on disk + h: index into the field-part in the hardcore-data + length: length of the data to copy + fl: a float used to get the right precision with real numbers + foo: copy of db_contents when field is not 'C' + format: sprintf format-string to get the right precision with real numbers + + NOTE: this declaration of 'foo' can cause overflow when the contents-field + is longer the 127 chars (which is highly unlikely, cos it is not used + in text-fields). +*/ +/* REMEMBER THAT THERE'S A 0x1A AT THE END OF THE FILE, SO DON'T + DO A SEEK_END WITH 0!!!!!! USE -1 !!!!!!!!!! +*/ + + if (where > dbh->db_records) { + if ((new = lseek(dbh->db_fd, -1, SEEK_END)) == -1) { + return DBF_ERROR; + } + dbh->db_records++; + } else { + offset = dbh->db_hlen + (where * dbh->db_rlen); + if ((new = lseek(dbh->db_fd, offset, SEEK_SET)) == -1) { + return DBF_ERROR; + } + } + + dbh->db_offset = new; + + data = dbh->db_buff; + +/* Set dataarea of data to ' ' (space) */ + memset(data,' ',dbh->db_rlen); + +/* data[0] = DBF_VALID; */ + + idx = 1; + for (t = 0; t < dbh->db_nfields; t++) { +/* if field is empty, don't do a thing */ + if (rec[t].db_contents[0] != '\0') { +/* Handle text */ + if (rec[t].db_type == 'C') { + if (strlen(rec[t].db_contents) > rec[t].db_flen) { + length = rec[t].db_flen; + } else { + length = strlen(rec[t].db_contents); + } + strncpy(data+idx, rec[t].db_contents, length); + } else { +/* Handle the rest */ +/* Numeric is special, because of real numbers */ + if ((rec[t].db_type == 'N') && (rec[t].db_dec != 0)) { + fl = atof(rec[t].db_contents); + sprintf(format, "%%.%df", rec[t].db_dec); + sprintf(foo, format, fl); + } else { + strcpy(foo, rec[t].db_contents); + } + if (strlen(foo) > rec[t].db_flen) { + length = rec[t].db_flen; + } else { + length = strlen(foo); + } + h = rec[t].db_flen - length; + strncpy(data+idx+h, foo, length); + } + } + idx += rec[t].db_flen; + } + + if (write(dbh->db_fd, data, dbh->db_rlen) == -1) { + return DBF_ERROR; + } + +/* There's a 0x1A at the end of a dbf-file */ + if (where == dbh->db_records) { + if (write(dbh->db_fd, &end, 1) == -1) { + return DBF_ERROR; + } + } + + dbh->db_offset += dbh->db_rlen; + + return 0; +} diff --git a/contrib/dbase/dbf.h b/contrib/dbase/dbf.h new file mode 100644 index 0000000000..a6e92517ea --- /dev/null +++ b/contrib/dbase/dbf.h @@ -0,0 +1,135 @@ +/* header-file for dbf.c + declares routines for reading and writing xBase-files (.dbf), and + associated structures + + Maarten Boekhold (boekhold@cindy.et.tudelft.nl) 29 oktober 1995 +*/ + +#ifndef _DBF_H +#define _DBF_H + +#include + +/********************************************************************** + + The DBF-part + +***********************************************************************/ + +#define DBF_FILE_MODE 0644 + +/* byte offsets for date in dbh_date */ + +#define DBH_DATE_YEAR 0 +#define DBH_DATE_MONTH 1 +#define DBH_DATE_DAY 2 + +/* maximum fieldname-length */ + +#define DBF_NAMELEN 11 + +/* magic-cookies for the file */ + +#define DBH_NORMAL 0x03 +#define DBH_MEMO 0x83 + +/* magic-cookies for the fields */ + +#define DBF_ERROR -1 +#define DBF_VALID 0x20 +#define DBF_DELETED 0x2A + +/* diskheader */ + +typedef struct { + u_char dbh_dbt; /* indentification field */ + u_char dbh_year; /* last modification-date */ + u_char dbh_month; + u_char dbh_day; + u_char dbh_records[4]; /* number of records */ + u_char dbh_hlen[2]; /* length of this header */ + u_char dbh_rlen[2]; /* length of a record */ + u_char dbh_stub[20]; /* misc stuff we don't need */ +} dbf_header; + +/* disk field-description */ + +typedef struct { + u_char dbf_name[DBF_NAMELEN]; /* field-name terminated with \0 */ + u_char dbf_type; /* field-type */ + u_char dbf_reserved[4]; /* some reserved stuff */ + u_char dbf_flen; /* field-length */ + u_char dbf_dec; /* number of decimal positions if + type is 'N' */ + u_char dbf_stub[14]; /* stuff we don't need */ +} dbf_field; + +/* memory field-description */ + +typedef struct { + u_char db_name[DBF_NAMELEN]; /* field-name terminated with \0 */ + u_char db_type; /* field-type */ + u_char db_flen; /* field-length */ + u_char db_dec; /* number of decimal positions */ +} f_descr; + +/* memory dfb-header */ + +typedef struct { + int db_fd; /* file-descriptor */ + u_long db_offset; /* current offset in file */ + u_char db_memo; /* memo-file present */ + u_char db_year; /* last update as YYMMDD */ + u_char db_month; + u_char db_day; + u_long db_hlen; /* length of the diskheader, for + calculating the offsets */ + u_long db_records; /* number of records */ + u_long db_currec; /* current record-number starting + at 0 */ + u_short db_rlen; /* length of the record */ + u_char db_nfields; /* number of fields */ + u_char *db_buff; /* record-buffer to save malloc()'s */ + f_descr *db_fields; /* pointer to an array of field- + descriptions */ +} dbhead; + +/* structure that contains everything a user wants from a field, including + the contents (in ASCII). Warning! db_flen may be bigger than the actual + length of db_name! This is because a field doesn't have to be completely + filled */ + +typedef struct { + u_char db_name[DBF_NAMELEN]; /* field-name terminated with \0 */ + u_char db_type; /* field-type */ + u_char db_flen; /* field-length */ + u_char db_dec; /* number of decimal positions */ + u_char* db_contents; /* contents of the field in ASCII */ +} field; + +/* prototypes for functions */ + +extern dbhead* dbf_open(u_char *file ,int flags); +extern int dbf_write_head(dbhead *dbh); +extern int dbf_put_fields(dbhead *dbh); +extern int dbf_add_field(dbhead *dbh, u_char *name, u_char type, + u_char length, u_char dec); +extern dbhead * dbf_open_new(u_char *name, int flags); +extern void dbf_close(dbhead *dbh); +extern int dbf_get_record(dbhead *dbh, field *fields, u_long rec); +extern field* dbf_build_record(dbhead *dbh); +extern void dbf_free_record(dbhead *dbh, field* fields); +extern int dbf_put_record(dbhead *dbh, field *rec, u_long where); + +/********************************************************************* + + The endian-part + +***********************************************************************/ + +extern long get_long(u_char *cp); +extern void put_long(u_char *cp, long lval); +extern short get_short(u_char *cp); +extern void put_short(u_char *cp, short lval); + +#endif /* _DBF_H */ diff --git a/contrib/dbase/dbf2pg.1 b/contrib/dbase/dbf2pg.1 new file mode 100644 index 0000000000..a377e489c8 --- /dev/null +++ b/contrib/dbase/dbf2pg.1 @@ -0,0 +1,116 @@ +.TH dbf2sql 1L \" -*- nroff -*- +.SH NAME +dbf2sql \- Insert xBase\-style .dbf\-files into a PostgreSQL\-table +.SH SYNOPSIS +.B dbf2pg [options] dbf-file +.br +.br +Options: +.br +[-v[v]] [-f] [-u | -l] [-c | -D] [-d database] [-t table] +[-h host] [-s oldname=newname[,oldname=newname]] +[-s start] [-e end] [-W] [-U username] [-B transaction_size] +[-F charset_from [-T charset_to]] + +.SH DESCRIPTION +This manual page documents the program +.BR dbf2pg. +It takes an xBase-style .dbf-file, and inserts it into the specified +database and table. +.SS OPTIONS +.TP +.I "\-v" +Display some status-messages. +.TP +.I "-vv" +Also display progress. +.TP +.I "-f" +Convert all field-names from the .dbf-file to lowercase. +.TP +.I "-u" +Convert the contents of all fields to uppercase. +.TP +.I "-l" +Convert the contents of all fields to lowercase. +.TP +.I "-c" +Create the table specified with +.IR \-t . +If this table already exists, first +.BR DROP +it. +.TP +.I "-D" +Delete the contents of the table specified with +.IR \-t . +Note that this table has to exists. An error is returned if this is not the +case. +.TP +.I "-W" +Ask for password. +.TP +.I "-d database" +Specify the database to use. An error is returned if this database does not +exists. Default is "test". +.TP +.I "-t table" +Specify the table to insert in. An error is returned if this table does not +exists. Default is "test". +.TP +.I "-h host" +Specify the host to which to connect. Default is "localhost". +.TP +.I "-s oldname=newname[,oldname=newname]" +Change the name of a field from +.BR oldname +to +.BR newname . +This is mainly used to avoid using reserved SQL-keywords. Example: +.br +.br +-s SELECT=SEL,COMMIT=doit +.br +.br +This is done +.BR before +the +.IR -f +operator has taken effect! +.TP +.I "-s start" +Specify the first record-number in the xBase-file we will insert. +.TP +.I "-e end" +Specify the last record-number in the xBase-file we will insert. +.TP +.I "-B transaction_size" +Specify the number of records per transaction, default is all records. +.TP +.I "-U username" +Log as the specified user in the database. +.TP +.I "-F charset_from" +If specified, it converts the data from the specified charset. Example: +.br +.br +-F IBM437 +.br +.br +Consult your system documentation to see the convertions available. +.TP +.I "-T charset_to" +Together with +.I "-F charset_from" +, it converts the data to the specified charset. Default is "ISO-8859-1". +.SH ENVIRONMENT +This program is affected by the environment-variables as used +by +.B PostgresSQL. +See the documentation of PostgresSQL for more info. +.SH BUGS +Fields larger than 8192 characters are not supported and could break the +program. +.br +Some charset convertions could cause the output to be larger than the input +and could break the program. diff --git a/contrib/dbase/dbf2pg.c b/contrib/dbase/dbf2pg.c new file mode 100644 index 0000000000..53c0314afb --- /dev/null +++ b/contrib/dbase/dbf2pg.c @@ -0,0 +1,809 @@ +/* This program reads in an xbase-dbf file and sends 'inserts' to an + PostgreSQL-server with the records in the xbase-file + + M. Boekhold (boekhold@cindy.et.tudelft.nl) okt. 1995 + oktober 1996: merged sources of dbf2msql.c and dbf2pg.c + oktober 1997: removed msql support +*/ +#define HAVE_TERMIOS_H +#define HAVE_ICONV_H + +#include +#include +#include +#include +#include +#include +#ifdef HAVE_TERMIOS_H +#include +#endif +#ifdef HAVE_ICONV_H +#include +#endif + +#include +#include "dbf.h" + +int verbose = 0, upper = 0, lower = 0, create = 0, fieldlow = 0; +int del = 0; +unsigned int begin = 0, end = 0; +unsigned int t_block = 0; +#ifdef HAVE_ICONV_H +char *charset_from=NULL; +char *charset_to="ISO-8859-1"; +iconv_t iconv_d; +char convert_charset_buff[8192]; +#endif + +char *host = NULL; +char *dbase = "test"; +char *table = "test"; +char *username = NULL; +char *password = NULL; +char *subarg = NULL; +char escape_buff[8192]; + +void do_substitute(char *subarg, dbhead *dbh); +inline void strtoupper(char *string); + +inline void strtolower(char *string); +void do_create(PGconn *, char*, dbhead*); +void do_inserts(PGconn *, char*, dbhead*); +int check_table(PGconn *, char*); + +char *Escape(char*); +#ifdef HAVE_ICONV_H +char *convert_charset(char *string); +#endif +void usage(void); +unsigned int isinteger(char *); + +char *simple_prompt(const char *prompt, int maxlen, int echo); + + +unsigned int isinteger(char *buff) { + char *i=buff; + + while (*i != '\0') { + if (i==buff) + if ((*i == '-') || + (*i == '+')) { + i++; continue; + } + if (!isdigit((int)*i)) return 0; + i++; + } + return 1; +} + +inline void strtoupper(char *string) { + while(*string != '\0') { + *string = toupper(*string); + string++; + } +} + +inline void strtolower(char *string) { + while(*string != '\0') { + *string = tolower(*string); + string++; + } +} + +/* FIXME: should this check for overflow? */ +char *Escape(char *string) { + char *foo, *bar; + + foo = escape_buff; + + bar = string; + while (*bar != '\0') { + if ((*bar == '\t') || + (*bar == '\n') || + (*bar == '\\')) { + *foo++ = '\\'; + } + *foo++ = *bar++; + } + *foo = '\0'; + + return escape_buff; +} + +#ifdef HAVE_ICONV_H +char *convert_charset(char *string) { + size_t in_size, out_size, nconv; + char *in_ptr,*out_ptr; + + in_size=strlen(string)+1; + out_size=sizeof(convert_charset_buff); + in_ptr=string; + out_ptr=convert_charset_buff; + + iconv(iconv_d, NULL, &in_size, &out_ptr, &out_size); /* necessary to reset state information */ + while(in_size>0) + { + nconv = iconv(iconv_d, &in_ptr, &in_size, &out_ptr, &out_size); + if(nconv == (size_t) -1) + { + printf("WARNING: cannot convert charset of string \"%s\".\n", + string); + strcpy(convert_charset_buff,string); + return convert_charset_buff; + } + } + *out_ptr = 0; /* terminate output string */ + return convert_charset_buff; +} +#endif + +int check_table(PGconn *conn, char *table) { + char *q = "select relname from pg_class where " + "relkind='r' and relname !~* '^pg'"; + PGresult *res; + int i = 0; + + if (!(res = PQexec(conn, q))) { + printf("%s\n", PQerrorMessage(conn)); + return 0; + } + + for (i = 0; i < PQntuples(res); i++) { + if (!strcmp(table, PQgetvalue(res, i, PQfnumber(res, "relname")))) { + return 1; + } + } + + return 0; +} + +void usage(void){ + printf("\ +dbf2pg +usage: dbf2pg [-u | -l] [-h hostname] [-W] [-U username] + [-B transaction_size] [-F charset_from [-T charset_to]] + [-s oldname=newname[,oldname=newname[...]]] [-d dbase] + [-t table] [-c | -D] [-f] [-v[v]] dbf-file\n"); +} + +/* patch submitted by Jeffrey Y. Sue */ +/* Provides functionallity for substituting dBase-fieldnames for others */ +/* Mainly for avoiding conflicts between fieldnames and SQL-reserved */ +/* keywords */ + +void do_substitute(char *subarg, dbhead *dbh) +{ + /* NOTE: subarg is modified in this function */ + int i,bad; + char *p,*oldname,*newname; + if (!subarg) { + return; + } + if (verbose>1) { + printf("Substituting new field names\n"); + } + /* use strstr instead of strtok because of possible empty tokens */ + oldname = subarg; + while (oldname && strlen(oldname) && (p=strstr(oldname,"=")) ) { + *p = '\0'; /* mark end of oldname */ + newname = ++p; /* point past \0 of oldname */ + if (strlen(newname)) { /* if not an empty string */ + p = strstr(newname,","); + if (p) { + *p = '\0'; /* mark end of newname */ + p++; /* point past where the comma was */ + } + } + if (strlen(newname)>=DBF_NAMELEN) { + printf("Truncating new field name %s to %d chars\n", + newname,DBF_NAMELEN-1); + newname[DBF_NAMELEN-1] = '\0'; + } + bad = 1; + for (i=0;idb_nfields;i++) { + if (strcmp(dbh->db_fields[i].db_name,oldname)==0) { + bad = 0; + strcpy(dbh->db_fields[i].db_name,newname); + if (verbose>1) { + printf("Substitute old:%s new:%s\n", + oldname,newname); + } + break; + } + } + if (bad) { + printf("Warning: old field name %s not found\n", + oldname); + } + oldname = p; + } +} /* do_substitute */ + +void do_create(PGconn *conn, char *table, dbhead *dbh) { + char *query; + char t[20]; + int i, length; + PGresult *res; + + if (verbose > 1) { + printf("Building CREATE-clause\n"); + } + + if (!(query = (char *)malloc( + (dbh->db_nfields * 40) + 29 + strlen(table)))) { + fprintf(stderr, "Memory allocation error in function do_create\n"); + PQfinish(conn); + close(dbh->db_fd); + free(dbh); + exit(1); + } + + sprintf(query, "CREATE TABLE %s (", table); + length = strlen(query); + for ( i = 0; i < dbh->db_nfields; i++) { + if (!strlen(dbh->db_fields[i].db_name)) { + continue; + /* skip field if length of name == 0 */ + } + if ((strlen(query) != length)) { + strcat(query, ","); + } + + if (fieldlow) + strtolower(dbh->db_fields[i].db_name); + + strcat(query, dbh->db_fields[i].db_name); + switch(dbh->db_fields[i].db_type) { + case 'D': + strcat(query, " date"); + break; + case 'C': + if (dbh->db_fields[i].db_flen > 1) { + strcat(query, " varchar"); + sprintf(t, "(%d)", + dbh->db_fields[i].db_flen); + strcat(query, t); + } else { + strcat(query, " char"); + } + break; + case 'N': + if (dbh->db_fields[i].db_dec != 0) { + strcat(query, " real"); + } else { + strcat(query, " int"); + } + break; + case 'L': + strcat(query, " char"); + break; + } + } + + strcat(query, ")"); + + if (verbose > 1) { + printf("Sending create-clause\n"); + printf("%s\n", query); + } + + if ((res = PQexec(conn, query)) == NULL) { + fprintf(stderr, "Error creating table!\n"); + fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn)); + close(dbh->db_fd); + free(dbh); + free(query); + PQfinish(conn); + exit(1); + } + + PQclear(res); + free(query); +} + +/* FIXME: can be optimized to not use strcat, but it is worth the effort? */ +void do_inserts(PGconn *conn, char *table, dbhead *dbh) { + PGresult *res; + field *fields; + int i, h, result; + char *query, *foo; + char pgdate[10]; + + if (verbose > 1) { + printf("Inserting records\n"); + } + + h = 2; /* 2 because of terminating \n\0 */ + + for ( i = 0 ; i < dbh->db_nfields ; i++ ) { + h += dbh->db_fields[i].db_flen > 2 ? + dbh->db_fields[i].db_flen : + 2; /* account for possible NULL values (\N) */ + h += 1; /* the delimiter */ + } + + /* make sure we can build the COPY query, note that we don't need to just + add this value, since the COPY query is a separate query (see below) */ + if (h < 17+strlen(table)) h = 17+strlen(table); + + if (!(query = (char *)malloc(h))) { + PQfinish(conn); + fprintf(stderr, + "Memory allocation error in function do_inserts (query)\n"); + close(dbh->db_fd); + free(dbh); + exit(1); + } + + if ((fields = dbf_build_record(dbh)) == (field *)DBF_ERROR) { + fprintf(stderr, + "Couldn't allocate memory for record in do_insert\n"); + PQfinish(conn); + free(query); + dbf_close(dbh); + exit(1); + } + + if (end == 0) /* "end" is a user option, if not specified, */ + end = dbh->db_records; /* then all records are processed. */ + + if (t_block == 0) /* user not specified transaction block size */ + t_block = end-begin; /* then we set it to be the full data */ + + for (i = begin; i < end; i++) { + /* we need to start a new transaction and COPY statement */ + if (((i-begin) % t_block) == 0) { + if (verbose > 1) + fprintf(stderr, "Transaction: START\n"); + res = PQexec(conn, "BEGIN"); + if (res == NULL) { + fprintf(stderr, "Error starting transaction!\n"); + fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn)); + exit(1); + } + sprintf(query, "COPY %s FROM stdin", table); + res = PQexec(conn, query); + if (res == NULL) { + fprintf(stderr, "Error starting COPY!\n"); + fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn)); + exit(1); + } + } + + /* build line and submit */ + result = dbf_get_record(dbh, fields, i); + if (result == DBF_VALID) { + query[0] = '\0'; + for (h = 0; h < dbh->db_nfields; h++) { + if (!strlen(fields[h].db_name)) { + continue; + } + + if (h!=0) /* not for the first field! */ + strcat(query, "\t"); /* COPY statement field separator */ + + if (upper) { + strtoupper(fields[h].db_contents); + } + if (lower) { + strtolower(fields[h].db_contents); + } + + foo = fields[h].db_contents; +#ifdef HAVE_ICONV_H + if(charset_from) + foo = convert_charset(foo); +#endif + foo = Escape(foo); + + /* handle the date first - liuk */ + if(fields[h].db_type=='D') { + if((strlen(foo)==8) && isinteger(foo)) { + sprintf(pgdate,"%c%c%c%c-%c%c-%c%c", + foo[0],foo[1],foo[2],foo[3], + foo[4],foo[5],foo[6],foo[7]); + strcat(query,pgdate); + } else { + /* empty field must be inserted as NULL value in this + way */ + strcat(query,"\\N"); + } + } + else if ((fields[h].db_type == 'N') && + (fields[h].db_dec == 0)){ + if (isinteger(foo)) { + strcat(query, foo); + } else { + strcat(query, "\\N"); + if (verbose) + fprintf(stderr, "Illegal numeric value found " + "in record %d, field \"%s\"\n", + i, fields[h].db_name); + } + } else { + strcat(query, foo); /* must be character */ + } + } + strcat(query, "\n"); + + if ((verbose > 1) && (( i % 100) == 0)) {/* Only show every 100 */ + printf("Inserting record %d\n", i); /* records. */ + } + PQputline(conn, query); + + } + /* we need to end this copy and transaction */ + if (((i-begin) % t_block) == t_block-1) { + if (verbose > 1) + fprintf(stderr, "Transaction: END\n"); + PQputline(conn, "\\.\n"); + if (PQendcopy(conn) != 0) { + fprintf(stderr, "Something went wrong while copying. Check " + "your tables!\n"); + exit(1); + } + res = PQexec(conn, "END"); + if (res == NULL) { + fprintf(stderr, "Error committing work!\n"); + fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn)); + exit(1); + } + } + } + + /* last row copied in, end copy and transaction */ + /* remember, i is now 1 greater then when we left the loop */ + if (((i-begin) % t_block) != 0) { + if (verbose > 1) + fprintf(stderr, "Transaction: END\n"); + PQputline(conn, "\\.\n"); + + if (PQendcopy(conn) != 0) { + fprintf(stderr, "Something went wrong while copying. Check " + "your tables!\n"); + } + res = PQexec(conn, "END"); + if (res == NULL) { + fprintf(stderr, "Error committing work!\n"); + fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn)); + exit(1); + } + } + dbf_free_record(dbh, fields); + + free(query); +} + +/* + * This is from Postgres 7.0.3 source tarball, utility program PSQL. + * + * simple_prompt + * + * Generalized function especially intended for reading in usernames and + * password interactively. Reads from stdin. + * + * prompt: The prompt to print + * maxlen: How many characters to accept + * echo: Set to false if you want to hide what is entered (for passwords) + * + * Returns a malloc()'ed string with the input (w/o trailing newline). + */ +static int prompt_state; + +char * +simple_prompt(const char *prompt, int maxlen, int echo) +{ + int length; + char *destination; + +#ifdef HAVE_TERMIOS_H + struct termios t_orig, + t; + +#endif + + destination = (char *) malloc(maxlen + 2); + if (!destination) + return NULL; + if (prompt) + fputs(prompt, stderr); + + prompt_state = 1; + +#ifdef HAVE_TERMIOS_H + if (!echo) + { + tcgetattr(0, &t); + t_orig = t; + t.c_lflag &= ~ECHO; + tcsetattr(0, TCSADRAIN, &t); + } +#endif + + fgets(destination, maxlen, stdin); + +#ifdef HAVE_TERMIOS_H + if (!echo) + { + tcsetattr(0, TCSADRAIN, &t_orig); + puts(""); + } +#endif + + prompt_state = 0; + + length = strlen(destination); + if (length > 0 && destination[length - 1] != '\n') + { + /* eat rest of the line */ + char buf[512]; + + do + { + fgets(buf, 512, stdin); + } while (buf[strlen(buf) - 1] != '\n'); + } + + if (length > 0 && destination[length - 1] == '\n') + /* remove trailing newline */ + destination[length - 1] = '\0'; + + return destination; +} + + +int main(int argc, char **argv) +{ + PGconn *conn; + int i; + extern int optind; + extern char *optarg; + char *query; + dbhead *dbh; + + while ((i = getopt(argc, argv, "DWflucvh:b:e:d:t:s:B:U:F:T:")) != EOF) { + switch (i) { + case 'D': + if (create) { + usage(); + printf("Can't use -c and -D at the same time!\n"); + exit(1); + } + del = 1; + break; + case 'W': + password=simple_prompt("Password: ",100,0); + break; + case 'f': + fieldlow=1; + break; + case 'v': + verbose++; + break; + case 'c': + if (del) { + usage(); + printf("Can't use -c and -D at the same time!\n"); + exit(1); + } + create=1; + break; + case 'l': + lower=1; + break; + case 'u': + if (lower) { + usage(); + printf("Can't use -u and -l at the same time!\n"); + exit(1); + } + upper=1; + break; + case 'b': + begin = atoi(optarg); + break; + case 'e': + end = atoi(optarg); + break; + case 'h': + host = (char *)strdup(optarg); + break; + case 'd': + dbase = (char *)strdup(optarg); + break; + case 't': + table = (char *)strdup(optarg); + break; + case 's': + subarg = (char *)strdup(optarg); + break; + case 'B': + t_block = atoi(optarg); + break; + case 'U': + username = (char *)strdup(optarg); + break; + case 'F': + charset_from = (char *)strdup(optarg); + break; + case 'T': + charset_to = (char *)strdup(optarg); + break; + case ':': + usage(); + printf("missing argument!\n"); + exit(1); + break; + case '?': + usage(); + /* FIXME: Ivan thinks this is bad: printf("unknown argument: %s\n", argv[0]); */ + exit(1); + break; + default: + break; + } + } + + argc -= optind; + argv = &argv[optind]; + + if (argc != 1) { + usage(); + if(username) + free(username); + if(password) + free(password); + exit(1); + } + +#ifdef HAVE_ICONV_H + if(charset_from) + { + if(verbose>1) + printf("Setting conversion from charset \"%s\" to \"%s\".\n", + charset_from,charset_to); + iconv_d = iconv_open(charset_to,charset_from); + if(iconv_d == (iconv_t) -1) + { + printf("Cannot convert from charset \"%s\" to charset \"%s\".\n", + charset_from,charset_to); + exit(1); + } + } +#endif + + if (verbose > 1) { + printf("Opening dbf-file\n"); + } + + if ((dbh = dbf_open(argv[0], O_RDONLY)) == (dbhead *)-1) { + fprintf(stderr, "Couldn't open xbase-file %s\n", argv[0]); + if(username) + free(username); + if(password) + free(password); + if(charset_from) + iconv_close(iconv_d); + exit(1); + } + + if (fieldlow) + for ( i = 0 ; i < dbh->db_nfields ; i++ ) + strtolower(dbh->db_fields[i].db_name); + + if (verbose) { + printf("dbf-file: %s, PG-dbase: %s, PG-table: %s\n", argv[0], + dbase, + table); + printf("Number of records: %ld\n", dbh->db_records); + printf("NAME:\t\tLENGTH:\t\tTYPE:\n"); + printf("-------------------------------------\n"); + for (i = 0; i < dbh->db_nfields ; i++) { + printf("%-12s\t%7d\t\t%5c\n",dbh->db_fields[i].db_name, + dbh->db_fields[i].db_flen, + dbh->db_fields[i].db_type); + } + } + + if (verbose > 1) { + printf("Making connection to PG-server\n"); + } + + conn = PQsetdbLogin(host,NULL,NULL,NULL, dbase, username, password); + if (PQstatus(conn) != CONNECTION_OK) { + fprintf(stderr, "Couldn't get a connection with the "); + fprintf(stderr, "designated host!\n"); + fprintf(stderr, "Detailed report: %s\n", PQerrorMessage(conn)); + close(dbh->db_fd); + free(dbh); + if(username) + free(username); + if(password) + free(password); + if(charset_from) + iconv_close(iconv_d); + exit(1); + } + +/* Substitute field names */ + do_substitute(subarg, dbh); + +/* create table if specified, else check if target table exists */ + if (!create) { + if (!check_table(conn, table)) { + printf("Table does not exist!\n"); + if(username) + free(username); + if(password) + free(password); + if(charset_from) + iconv_close(iconv_d); + exit(1); + } + if (del) { + if (!(query = (char *)malloc(13 + strlen(table)))) { + printf("Memory-allocation error in main (delete)!\n"); + close(dbh->db_fd); + free(dbh); + PQfinish(conn); + if(username) + free(username); + if(password) + free(password); + if(charset_from) + iconv_close(iconv_d); + exit(1); + } + if (verbose > 1) { + printf("Deleting from original table\n"); + } + sprintf(query, "DELETE FROM %s", table); + PQexec(conn, query); + free(query); + } + } else { + if (!(query = (char *)malloc(12 + strlen(table)))) { + printf("Memory-allocation error in main (drop)!\n"); + close(dbh->db_fd); + free(dbh); + PQfinish(conn); + if(username) + free(username); + if(password) + free(password); + if(charset_from) + iconv_close(iconv_d); + exit(1); + } + if (verbose > 1) { + printf("Dropping original table (if one exists)\n"); + } + sprintf(query, "DROP TABLE %s", table); + PQexec(conn, query); + free(query); + +/* Build a CREATE-clause +*/ + do_create(conn, table, dbh); + } + +/* Build an INSERT-clause +*/ + PQexec(conn, "SET DATESTYLE TO 'ISO';"); + do_inserts(conn, table, dbh); + + if (verbose > 1) { + printf("Closing up....\n"); + } + + close(dbh->db_fd); + free(dbh); + PQfinish(conn); + if(username) + free(username); + if(password) + free(password); + if(charset_from) + iconv_close(iconv_d); + exit(0); +} diff --git a/contrib/dbase/endian.c b/contrib/dbase/endian.c new file mode 100644 index 0000000000..55c53d8980 --- /dev/null +++ b/contrib/dbase/endian.c @@ -0,0 +1,45 @@ +/* Maarten Boekhold (boekhold@cindy.et.tudelft.nl) oktober 1995 */ + +#include +#include "dbf.h" +/* + * routine to change little endian long to host long + */ +long get_long(u_char *cp) +{ + long ret; + + ret = *cp++; + ret += ((*cp++)<<8); + ret += ((*cp++)<<16); + ret += ((*cp++)<<24); + + return ret; +} + +void put_long(u_char *cp, long lval) +{ + cp[0] = lval & 0xff; + cp[1] = (lval >> 8) & 0xff; + cp[2] = (lval >> 16) & 0xff; + cp[3] = (lval >> 24) & 0xff; +} + +/* + * routine to change little endian short to host short + */ +short get_short(u_char *cp) +{ + short ret; + + ret = *cp++; + ret += ((*cp++)<<8); + + return ret; +} + +void put_short(u_char *cp, short sval) +{ + cp[0] = sval & 0xff; + cp[1] = (sval >> 8) & 0xff; +}