1// Copyright 2015 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package bigquery 16 17import ( 18 "encoding/json" 19 "errors" 20 "fmt" 21 "reflect" 22 "sync" 23 24 bq "google.golang.org/api/bigquery/v2" 25) 26 27// Schema describes the fields in a table or query result. 28type Schema []*FieldSchema 29 30// Relax returns a version of the schema where no fields are marked 31// as Required. 32func (s Schema) Relax() Schema { 33 var out Schema 34 for _, v := range s { 35 relaxed := &FieldSchema{ 36 Name: v.Name, 37 Description: v.Description, 38 Repeated: v.Repeated, 39 Required: false, 40 Type: v.Type, 41 Schema: v.Schema.Relax(), 42 } 43 out = append(out, relaxed) 44 } 45 return out 46} 47 48// FieldSchema describes a single field. 49type FieldSchema struct { 50 // The field name. 51 // Must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_), 52 // and must start with a letter or underscore. 53 // The maximum length is 128 characters. 54 Name string 55 56 // A description of the field. The maximum length is 16,384 characters. 57 Description string 58 59 // Whether the field may contain multiple values. 60 Repeated bool 61 // Whether the field is required. Ignored if Repeated is true. 62 Required bool 63 64 // The field data type. If Type is Record, then this field contains a nested schema, 65 // which is described by Schema. 66 Type FieldType 67 // Describes the nested schema if Type is set to Record. 68 Schema Schema 69} 70 71func (fs *FieldSchema) toBQ() *bq.TableFieldSchema { 72 tfs := &bq.TableFieldSchema{ 73 Description: fs.Description, 74 Name: fs.Name, 75 Type: string(fs.Type), 76 } 77 78 if fs.Repeated { 79 tfs.Mode = "REPEATED" 80 } else if fs.Required { 81 tfs.Mode = "REQUIRED" 82 } // else leave as default, which is interpreted as NULLABLE. 83 84 for _, f := range fs.Schema { 85 tfs.Fields = append(tfs.Fields, f.toBQ()) 86 } 87 88 return tfs 89} 90 91func (s Schema) toBQ() *bq.TableSchema { 92 var fields []*bq.TableFieldSchema 93 for _, f := range s { 94 fields = append(fields, f.toBQ()) 95 } 96 return &bq.TableSchema{Fields: fields} 97} 98 99func bqToFieldSchema(tfs *bq.TableFieldSchema) *FieldSchema { 100 fs := &FieldSchema{ 101 Description: tfs.Description, 102 Name: tfs.Name, 103 Repeated: tfs.Mode == "REPEATED", 104 Required: tfs.Mode == "REQUIRED", 105 Type: FieldType(tfs.Type), 106 } 107 108 for _, f := range tfs.Fields { 109 fs.Schema = append(fs.Schema, bqToFieldSchema(f)) 110 } 111 return fs 112} 113 114func bqToSchema(ts *bq.TableSchema) Schema { 115 if ts == nil { 116 return nil 117 } 118 var s Schema 119 for _, f := range ts.Fields { 120 s = append(s, bqToFieldSchema(f)) 121 } 122 return s 123} 124 125// FieldType is the type of field. 126type FieldType string 127 128const ( 129 // StringFieldType is a string field type. 130 StringFieldType FieldType = "STRING" 131 // BytesFieldType is a bytes field type. 132 BytesFieldType FieldType = "BYTES" 133 // IntegerFieldType is a integer field type. 134 IntegerFieldType FieldType = "INTEGER" 135 // FloatFieldType is a float field type. 136 FloatFieldType FieldType = "FLOAT" 137 // BooleanFieldType is a boolean field type. 138 BooleanFieldType FieldType = "BOOLEAN" 139 // TimestampFieldType is a timestamp field type. 140 TimestampFieldType FieldType = "TIMESTAMP" 141 // RecordFieldType is a record field type. It is typically used to create columns with repeated or nested data. 142 RecordFieldType FieldType = "RECORD" 143 // DateFieldType is a date field type. 144 DateFieldType FieldType = "DATE" 145 // TimeFieldType is a time field type. 146 TimeFieldType FieldType = "TIME" 147 // DateTimeFieldType is a datetime field type. 148 DateTimeFieldType FieldType = "DATETIME" 149 // NumericFieldType is a numeric field type. Numeric types include integer types, floating point types and the 150 // NUMERIC data type. 151 NumericFieldType FieldType = "NUMERIC" 152 // GeographyFieldType is a string field type. Geography types represent a set of points 153 // on the Earth's surface, represented in Well Known Text (WKT) format. 154 GeographyFieldType FieldType = "GEOGRAPHY" 155) 156 157var ( 158 errEmptyJSONSchema = errors.New("bigquery: empty JSON schema") 159 fieldTypes = map[FieldType]bool{ 160 StringFieldType: true, 161 BytesFieldType: true, 162 IntegerFieldType: true, 163 FloatFieldType: true, 164 BooleanFieldType: true, 165 TimestampFieldType: true, 166 RecordFieldType: true, 167 DateFieldType: true, 168 TimeFieldType: true, 169 DateTimeFieldType: true, 170 NumericFieldType: true, 171 GeographyFieldType: true, 172 } 173) 174 175var typeOfByteSlice = reflect.TypeOf([]byte{}) 176 177// InferSchema tries to derive a BigQuery schema from the supplied struct value. 178// Each exported struct field is mapped to a field in the schema. 179// 180// The following BigQuery types are inferred from the corresponding Go types. 181// (This is the same mapping as that used for RowIterator.Next.) Fields inferred 182// from these types are marked required (non-nullable). 183// 184// STRING string 185// BOOL bool 186// INTEGER int, int8, int16, int32, int64, uint8, uint16, uint32 187// FLOAT float32, float64 188// BYTES []byte 189// TIMESTAMP time.Time 190// DATE civil.Date 191// TIME civil.Time 192// DATETIME civil.DateTime 193// NUMERIC *big.Rat 194// 195// The big.Rat type supports numbers of arbitrary size and precision. Values 196// will be rounded to 9 digits after the decimal point before being transmitted 197// to BigQuery. See https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type 198// for more on NUMERIC. 199// 200// A Go slice or array type is inferred to be a BigQuery repeated field of the 201// element type. The element type must be one of the above listed types. 202// 203// Due to lack of unique native Go type for GEOGRAPHY, there is no schema 204// inference to GEOGRAPHY at this time. 205// 206// Nullable fields are inferred from the NullXXX types, declared in this package: 207// 208// STRING NullString 209// BOOL NullBool 210// INTEGER NullInt64 211// FLOAT NullFloat64 212// TIMESTAMP NullTimestamp 213// DATE NullDate 214// TIME NullTime 215// DATETIME NullDateTime 216// GEOGRAPHY NullGeography 217// 218// For a nullable BYTES field, use the type []byte and tag the field "nullable" (see below). 219// For a nullable NUMERIC field, use the type *big.Rat and tag the field "nullable". 220// 221// A struct field that is of struct type is inferred to be a required field of type 222// RECORD with a schema inferred recursively. For backwards compatibility, a field of 223// type pointer to struct is also inferred to be required. To get a nullable RECORD 224// field, use the "nullable" tag (see below). 225// 226// InferSchema returns an error if any of the examined fields is of type uint, 227// uint64, uintptr, map, interface, complex64, complex128, func, or chan. Future 228// versions may handle these cases without error. 229// 230// Recursively defined structs are also disallowed. 231// 232// Struct fields may be tagged in a way similar to the encoding/json package. 233// A tag of the form 234// bigquery:"name" 235// uses "name" instead of the struct field name as the BigQuery field name. 236// A tag of the form 237// bigquery:"-" 238// omits the field from the inferred schema. 239// The "nullable" option marks the field as nullable (not required). It is only 240// needed for []byte, *big.Rat and pointer-to-struct fields, and cannot appear on other 241// fields. In this example, the Go name of the field is retained: 242// bigquery:",nullable" 243func InferSchema(st interface{}) (Schema, error) { 244 return inferSchemaReflectCached(reflect.TypeOf(st)) 245} 246 247var schemaCache sync.Map 248 249type cacheVal struct { 250 schema Schema 251 err error 252} 253 254func inferSchemaReflectCached(t reflect.Type) (Schema, error) { 255 var cv cacheVal 256 v, ok := schemaCache.Load(t) 257 if ok { 258 cv = v.(cacheVal) 259 } else { 260 s, err := inferSchemaReflect(t) 261 cv = cacheVal{s, err} 262 schemaCache.Store(t, cv) 263 } 264 return cv.schema, cv.err 265} 266 267func inferSchemaReflect(t reflect.Type) (Schema, error) { 268 rec, err := hasRecursiveType(t, nil) 269 if err != nil { 270 return nil, err 271 } 272 if rec { 273 return nil, fmt.Errorf("bigquery: schema inference for recursive type %s", t) 274 } 275 return inferStruct(t) 276} 277 278func inferStruct(t reflect.Type) (Schema, error) { 279 switch t.Kind() { 280 case reflect.Ptr: 281 if t.Elem().Kind() != reflect.Struct { 282 return nil, noStructError{t} 283 } 284 t = t.Elem() 285 fallthrough 286 287 case reflect.Struct: 288 return inferFields(t) 289 default: 290 return nil, noStructError{t} 291 } 292} 293 294// inferFieldSchema infers the FieldSchema for a Go type 295func inferFieldSchema(fieldName string, rt reflect.Type, nullable bool) (*FieldSchema, error) { 296 // Only []byte and struct pointers can be tagged nullable. 297 if nullable && !(rt == typeOfByteSlice || rt.Kind() == reflect.Ptr && rt.Elem().Kind() == reflect.Struct) { 298 return nil, badNullableError{fieldName, rt} 299 } 300 switch rt { 301 case typeOfByteSlice: 302 return &FieldSchema{Required: !nullable, Type: BytesFieldType}, nil 303 case typeOfGoTime: 304 return &FieldSchema{Required: true, Type: TimestampFieldType}, nil 305 case typeOfDate: 306 return &FieldSchema{Required: true, Type: DateFieldType}, nil 307 case typeOfTime: 308 return &FieldSchema{Required: true, Type: TimeFieldType}, nil 309 case typeOfDateTime: 310 return &FieldSchema{Required: true, Type: DateTimeFieldType}, nil 311 case typeOfRat: 312 return &FieldSchema{Required: !nullable, Type: NumericFieldType}, nil 313 } 314 if ft := nullableFieldType(rt); ft != "" { 315 return &FieldSchema{Required: false, Type: ft}, nil 316 } 317 if isSupportedIntType(rt) || isSupportedUintType(rt) { 318 return &FieldSchema{Required: true, Type: IntegerFieldType}, nil 319 } 320 switch rt.Kind() { 321 case reflect.Slice, reflect.Array: 322 et := rt.Elem() 323 if et != typeOfByteSlice && (et.Kind() == reflect.Slice || et.Kind() == reflect.Array) { 324 // Multi dimensional slices/arrays are not supported by BigQuery 325 return nil, unsupportedFieldTypeError{fieldName, rt} 326 } 327 if nullableFieldType(et) != "" { 328 // Repeated nullable types are not supported by BigQuery. 329 return nil, unsupportedFieldTypeError{fieldName, rt} 330 } 331 f, err := inferFieldSchema(fieldName, et, false) 332 if err != nil { 333 return nil, err 334 } 335 f.Repeated = true 336 f.Required = false 337 return f, nil 338 case reflect.Ptr: 339 if rt.Elem().Kind() != reflect.Struct { 340 return nil, unsupportedFieldTypeError{fieldName, rt} 341 } 342 fallthrough 343 case reflect.Struct: 344 nested, err := inferStruct(rt) 345 if err != nil { 346 return nil, err 347 } 348 return &FieldSchema{Required: !nullable, Type: RecordFieldType, Schema: nested}, nil 349 case reflect.String: 350 return &FieldSchema{Required: !nullable, Type: StringFieldType}, nil 351 case reflect.Bool: 352 return &FieldSchema{Required: !nullable, Type: BooleanFieldType}, nil 353 case reflect.Float32, reflect.Float64: 354 return &FieldSchema{Required: !nullable, Type: FloatFieldType}, nil 355 default: 356 return nil, unsupportedFieldTypeError{fieldName, rt} 357 } 358} 359 360// inferFields extracts all exported field types from struct type. 361func inferFields(rt reflect.Type) (Schema, error) { 362 var s Schema 363 fields, err := fieldCache.Fields(rt) 364 if err != nil { 365 return nil, err 366 } 367 for _, field := range fields { 368 var nullable bool 369 for _, opt := range field.ParsedTag.([]string) { 370 if opt == nullableTagOption { 371 nullable = true 372 break 373 } 374 } 375 f, err := inferFieldSchema(field.Name, field.Type, nullable) 376 if err != nil { 377 return nil, err 378 } 379 f.Name = field.Name 380 s = append(s, f) 381 } 382 return s, nil 383} 384 385// isSupportedIntType reports whether t is an int type that can be properly 386// represented by the BigQuery INTEGER/INT64 type. 387func isSupportedIntType(t reflect.Type) bool { 388 switch t.Kind() { 389 case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int: 390 return true 391 default: 392 return false 393 } 394} 395 396// isSupportedIntType reports whether t is a uint type that can be properly 397// represented by the BigQuery INTEGER/INT64 type. 398func isSupportedUintType(t reflect.Type) bool { 399 switch t.Kind() { 400 case reflect.Uint8, reflect.Uint16, reflect.Uint32: 401 return true 402 default: 403 return false 404 } 405} 406 407// typeList is a linked list of reflect.Types. 408type typeList struct { 409 t reflect.Type 410 next *typeList 411} 412 413func (l *typeList) has(t reflect.Type) bool { 414 for l != nil { 415 if l.t == t { 416 return true 417 } 418 l = l.next 419 } 420 return false 421} 422 423// hasRecursiveType reports whether t or any type inside t refers to itself, directly or indirectly, 424// via exported fields. (Schema inference ignores unexported fields.) 425func hasRecursiveType(t reflect.Type, seen *typeList) (bool, error) { 426 for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice || t.Kind() == reflect.Array { 427 t = t.Elem() 428 } 429 if t.Kind() != reflect.Struct { 430 return false, nil 431 } 432 if seen.has(t) { 433 return true, nil 434 } 435 fields, err := fieldCache.Fields(t) 436 if err != nil { 437 return false, err 438 } 439 seen = &typeList{t, seen} 440 // Because seen is a linked list, additions to it from one field's 441 // recursive call will not affect the value for subsequent fields' calls. 442 for _, field := range fields { 443 ok, err := hasRecursiveType(field.Type, seen) 444 if err != nil { 445 return false, err 446 } 447 if ok { 448 return true, nil 449 } 450 } 451 return false, nil 452} 453 454// bigQuerySchemaJSONField is an individual field in a JSON BigQuery table schema definition 455// (as generated by https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema). 456type bigQueryJSONField struct { 457 Description string `json:"description"` 458 Fields []bigQueryJSONField `json:"fields"` 459 Mode string `json:"mode"` 460 Name string `json:"name"` 461 Type string `json:"type"` 462} 463 464// convertSchemaFromJSON generates a Schema: 465func convertSchemaFromJSON(fs []bigQueryJSONField) (Schema, error) { 466 convertedSchema := Schema{} 467 for _, f := range fs { 468 convertedFieldSchema := &FieldSchema{ 469 Description: f.Description, 470 Name: f.Name, 471 Required: f.Mode == "REQUIRED", 472 Repeated: f.Mode == "REPEATED", 473 } 474 if len(f.Fields) > 0 { 475 convertedNestedFieldSchema, err := convertSchemaFromJSON(f.Fields) 476 if err != nil { 477 return nil, err 478 } 479 convertedFieldSchema.Schema = convertedNestedFieldSchema 480 } 481 482 // Check that the field-type (string) maps to a known FieldType: 483 if _, ok := fieldTypes[FieldType(f.Type)]; !ok { 484 return nil, fmt.Errorf("unknown field type (%v)", f.Type) 485 } 486 convertedFieldSchema.Type = FieldType(f.Type) 487 488 convertedSchema = append(convertedSchema, convertedFieldSchema) 489 } 490 return convertedSchema, nil 491} 492 493// SchemaFromJSON takes a JSON BigQuery table schema definition 494// (as generated by https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema) 495// and returns a fully-populated Schema. 496func SchemaFromJSON(schemaJSON []byte) (Schema, error) { 497 498 var bigQuerySchema []bigQueryJSONField 499 500 // Make sure we actually have some content: 501 if len(schemaJSON) == 0 { 502 return nil, errEmptyJSONSchema 503 } 504 505 if err := json.Unmarshal(schemaJSON, &bigQuerySchema); err != nil { 506 return nil, err 507 } 508 509 return convertSchemaFromJSON(bigQuerySchema) 510} 511 512type noStructError struct { 513 typ reflect.Type 514} 515 516func (e noStructError) Error() string { 517 return fmt.Sprintf("bigquery: can only infer schema from struct or pointer to struct, not %s", e.typ) 518} 519 520type badNullableError struct { 521 name string 522 typ reflect.Type 523} 524 525func (e badNullableError) Error() string { 526 return fmt.Sprintf(`bigquery: field %q of type %s: use "nullable" only for []byte and struct pointers; for all other types, use a NullXXX type`, e.name, e.typ) 527} 528 529type unsupportedFieldTypeError struct { 530 name string 531 typ reflect.Type 532} 533 534func (e unsupportedFieldTypeError) Error() string { 535 return fmt.Sprintf("bigquery: field %q: type %s is not supported", e.name, e.typ) 536} 537