// Copyright 2015 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package bigquery import ( "errors" "fmt" "reflect" "cloud.google.com/go/internal/atomiccache" bq "google.golang.org/api/bigquery/v2" ) // Schema describes the fields in a table or query result. type Schema []*FieldSchema type FieldSchema struct { // The field name. // Must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_), // and must start with a letter or underscore. // The maximum length is 128 characters. Name string // A description of the field. The maximum length is 16,384 characters. Description string // Whether the field may contain multiple values. Repeated bool // Whether the field is required. Ignored if Repeated is true. Required bool // The field data type. If Type is Record, then this field contains a nested schema, // which is described by Schema. Type FieldType // Describes the nested schema if Type is set to Record. Schema Schema } func (fs *FieldSchema) asTableFieldSchema() *bq.TableFieldSchema { tfs := &bq.TableFieldSchema{ Description: fs.Description, Name: fs.Name, Type: string(fs.Type), } if fs.Repeated { tfs.Mode = "REPEATED" } else if fs.Required { tfs.Mode = "REQUIRED" } // else leave as default, which is interpreted as NULLABLE. for _, f := range fs.Schema { tfs.Fields = append(tfs.Fields, f.asTableFieldSchema()) } return tfs } func (s Schema) asTableSchema() *bq.TableSchema { var fields []*bq.TableFieldSchema for _, f := range s { fields = append(fields, f.asTableFieldSchema()) } return &bq.TableSchema{Fields: fields} } func convertTableFieldSchema(tfs *bq.TableFieldSchema) *FieldSchema { fs := &FieldSchema{ Description: tfs.Description, Name: tfs.Name, Repeated: tfs.Mode == "REPEATED", Required: tfs.Mode == "REQUIRED", Type: FieldType(tfs.Type), } for _, f := range tfs.Fields { fs.Schema = append(fs.Schema, convertTableFieldSchema(f)) } return fs } func convertTableSchema(ts *bq.TableSchema) Schema { if ts == nil { return nil } var s Schema for _, f := range ts.Fields { s = append(s, convertTableFieldSchema(f)) } return s } type FieldType string const ( StringFieldType FieldType = "STRING" BytesFieldType FieldType = "BYTES" IntegerFieldType FieldType = "INTEGER" FloatFieldType FieldType = "FLOAT" BooleanFieldType FieldType = "BOOLEAN" TimestampFieldType FieldType = "TIMESTAMP" RecordFieldType FieldType = "RECORD" DateFieldType FieldType = "DATE" TimeFieldType FieldType = "TIME" DateTimeFieldType FieldType = "DATETIME" ) var ( errNoStruct = errors.New("bigquery: can only infer schema from struct or pointer to struct") errUnsupportedFieldType = errors.New("bigquery: unsupported type of field in struct") errInvalidFieldName = errors.New("bigquery: invalid name of field in struct") ) var typeOfByteSlice = reflect.TypeOf([]byte{}) // InferSchema tries to derive a BigQuery schema from the supplied struct value. // NOTE: All fields in the returned Schema are configured to be required, // unless the corresponding field in the supplied struct is a slice or array. // // It is considered an error if the struct (including nested structs) contains // any exported fields that are pointers or one of the following types: // uint, uint64, uintptr, map, interface, complex64, complex128, func, chan. // In these cases, an error will be returned. // Future versions may handle these cases without error. // // Recursively defined structs are also disallowed. func InferSchema(st interface{}) (Schema, error) { return inferSchemaReflectCached(reflect.TypeOf(st)) } var schemaCache atomiccache.Cache type cacheVal struct { schema Schema err error } func inferSchemaReflectCached(t reflect.Type) (Schema, error) { cv := schemaCache.Get(t, func() interface{} { s, err := inferSchemaReflect(t) return cacheVal{s, err} }).(cacheVal) return cv.schema, cv.err } func inferSchemaReflect(t reflect.Type) (Schema, error) { rec, err := hasRecursiveType(t, nil) if err != nil { return nil, err } if rec { return nil, fmt.Errorf("bigquery: schema inference for recursive type %s", t) } return inferStruct(t) } func inferStruct(t reflect.Type) (Schema, error) { switch t.Kind() { case reflect.Ptr: if t.Elem().Kind() != reflect.Struct { return nil, errNoStruct } t = t.Elem() fallthrough case reflect.Struct: return inferFields(t) default: return nil, errNoStruct } } // inferFieldSchema infers the FieldSchema for a Go type func inferFieldSchema(rt reflect.Type) (*FieldSchema, error) { switch rt { case typeOfByteSlice: return &FieldSchema{Required: true, Type: BytesFieldType}, nil case typeOfGoTime: return &FieldSchema{Required: true, Type: TimestampFieldType}, nil case typeOfDate: return &FieldSchema{Required: true, Type: DateFieldType}, nil case typeOfTime: return &FieldSchema{Required: true, Type: TimeFieldType}, nil case typeOfDateTime: return &FieldSchema{Required: true, Type: DateTimeFieldType}, nil } if isSupportedIntType(rt) { return &FieldSchema{Required: true, Type: IntegerFieldType}, nil } switch rt.Kind() { case reflect.Slice, reflect.Array: et := rt.Elem() if et != typeOfByteSlice && (et.Kind() == reflect.Slice || et.Kind() == reflect.Array) { // Multi dimensional slices/arrays are not supported by BigQuery return nil, errUnsupportedFieldType } f, err := inferFieldSchema(et) if err != nil { return nil, err } f.Repeated = true f.Required = false return f, nil case reflect.Struct, reflect.Ptr: nested, err := inferStruct(rt) if err != nil { return nil, err } return &FieldSchema{Required: true, Type: RecordFieldType, Schema: nested}, nil case reflect.String: return &FieldSchema{Required: true, Type: StringFieldType}, nil case reflect.Bool: return &FieldSchema{Required: true, Type: BooleanFieldType}, nil case reflect.Float32, reflect.Float64: return &FieldSchema{Required: true, Type: FloatFieldType}, nil default: return nil, errUnsupportedFieldType } } // inferFields extracts all exported field types from struct type. func inferFields(rt reflect.Type) (Schema, error) { var s Schema fields, err := fieldCache.Fields(rt) if err != nil { return nil, err } for _, field := range fields { f, err := inferFieldSchema(field.Type) if err != nil { return nil, err } f.Name = field.Name s = append(s, f) } return s, nil } // isSupportedIntType reports whether t can be properly represented by the // BigQuery INTEGER/INT64 type. func isSupportedIntType(t reflect.Type) bool { switch t.Kind() { case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int, reflect.Uint8, reflect.Uint16, reflect.Uint32: return true default: return false } } // typeList is a linked list of reflect.Types. type typeList struct { t reflect.Type next *typeList } func (l *typeList) has(t reflect.Type) bool { for l != nil { if l.t == t { return true } l = l.next } return false } // hasRecursiveType reports whether t or any type inside t refers to itself, directly or indirectly, // via exported fields. (Schema inference ignores unexported fields.) func hasRecursiveType(t reflect.Type, seen *typeList) (bool, error) { for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice || t.Kind() == reflect.Array { t = t.Elem() } if t.Kind() != reflect.Struct { return false, nil } if seen.has(t) { return true, nil } fields, err := fieldCache.Fields(t) if err != nil { return false, err } seen = &typeList{t, seen} // Because seen is a linked list, additions to it from one field's // recursive call will not affect the value for subsequent fields' calls. for _, field := range fields { ok, err := hasRecursiveType(field.Type, seen) if err != nil { return false, err } if ok { return true, nil } } return false, nil }