// Copyright 2016 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package bigquery import ( "io" bq "google.golang.org/api/bigquery/v2" ) // A ReaderSource is a source for a load operation that gets // data from an io.Reader. type ReaderSource struct { r io.Reader FileConfig } // NewReaderSource creates a ReaderSource from an io.Reader. You may // optionally configure properties on the ReaderSource that describe the // data being read, before passing it to Table.LoaderFrom. func NewReaderSource(r io.Reader) *ReaderSource { return &ReaderSource{r: r} } func (r *ReaderSource) populateInsertJobConfForLoad(conf *insertJobConf) { conf.media = r.r r.FileConfig.populateLoadConfig(conf.job.Configuration.Load) } // FileConfig contains configuration options that pertain to files, typically // text files that require interpretation to be used as a BigQuery table. A // file may live in Google Cloud Storage (see GCSReference), or it may be // loaded into a table via the Table.LoaderFromReader. type FileConfig struct { // SourceFormat is the format of the GCS data to be read. // Allowed values are: CSV, Avro, JSON, DatastoreBackup. The default is CSV. SourceFormat DataFormat // FieldDelimiter is the separator for fields in a CSV file, used when // reading or exporting data. The default is ",". FieldDelimiter string // The number of rows at the top of a CSV file that BigQuery will skip when // reading data. SkipLeadingRows int64 // AllowJaggedRows causes missing trailing optional columns to be tolerated // when reading CSV data. Missing values are treated as nulls. AllowJaggedRows bool // AllowQuotedNewlines sets whether quoted data sections containing // newlines are allowed when reading CSV data. AllowQuotedNewlines bool // Indicates if we should automatically infer the options and // schema for CSV and JSON sources. AutoDetect bool // Encoding is the character encoding of data to be read. Encoding Encoding // MaxBadRecords is the maximum number of bad records that will be ignored // when reading data. MaxBadRecords int64 // IgnoreUnknownValues causes values not matching the schema to be // tolerated. Unknown values are ignored. For CSV this ignores extra values // at the end of a line. For JSON this ignores named values that do not // match any column name. If this field is not set, records containing // unknown values are treated as bad records. The MaxBadRecords field can // be used to customize how bad records are handled. IgnoreUnknownValues bool // Schema describes the data. It is required when reading CSV or JSON data, // unless the data is being loaded into a table that already exists. Schema Schema // Quote is the value used to quote data sections in a CSV file. The // default quotation character is the double quote ("), which is used if // both Quote and ForceZeroQuote are unset. // To specify that no character should be interpreted as a quotation // character, set ForceZeroQuote to true. // Only used when reading data. Quote string ForceZeroQuote bool } // quote returns the CSV quote character, or nil if unset. func (fc *FileConfig) quote() *string { if fc.ForceZeroQuote { quote := "" return "e } if fc.Quote == "" { return nil } return &fc.Quote } func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) { conf.SkipLeadingRows = fc.SkipLeadingRows conf.SourceFormat = string(fc.SourceFormat) conf.Autodetect = fc.AutoDetect conf.AllowJaggedRows = fc.AllowJaggedRows conf.AllowQuotedNewlines = fc.AllowQuotedNewlines conf.Encoding = string(fc.Encoding) conf.FieldDelimiter = fc.FieldDelimiter conf.IgnoreUnknownValues = fc.IgnoreUnknownValues conf.MaxBadRecords = fc.MaxBadRecords if fc.Schema != nil { conf.Schema = fc.Schema.asTableSchema() } conf.Quote = fc.quote() } func (fc *FileConfig) populateExternalDataConfig(conf *bq.ExternalDataConfiguration) { format := fc.SourceFormat if format == "" { // Format must be explicitly set for external data sources. format = CSV } // TODO(jba): support AutoDetect. conf.IgnoreUnknownValues = fc.IgnoreUnknownValues conf.MaxBadRecords = fc.MaxBadRecords conf.SourceFormat = string(format) if fc.Schema != nil { conf.Schema = fc.Schema.asTableSchema() } if format == CSV { conf.CsvOptions = &bq.CsvOptions{ AllowJaggedRows: fc.AllowJaggedRows, AllowQuotedNewlines: fc.AllowQuotedNewlines, Encoding: string(fc.Encoding), FieldDelimiter: fc.FieldDelimiter, SkipLeadingRows: fc.SkipLeadingRows, Quote: fc.quote(), } } } // DataFormat describes the format of BigQuery table data. type DataFormat string // Constants describing the format of BigQuery table data. const ( CSV DataFormat = "CSV" Avro DataFormat = "AVRO" JSON DataFormat = "NEWLINE_DELIMITED_JSON" DatastoreBackup DataFormat = "DATASTORE_BACKUP" ) // Encoding specifies the character encoding of data to be loaded into BigQuery. // See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding // for more details about how this is used. type Encoding string const ( UTF_8 Encoding = "UTF-8" ISO_8859_1 Encoding = "ISO-8859-1" )