1// Copyright 2016 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package bigquery 16 17import ( 18 "io" 19 20 bq "google.golang.org/api/bigquery/v2" 21) 22 23// A ReaderSource is a source for a load operation that gets 24// data from an io.Reader. 25// 26// When a ReaderSource is part of a LoadConfig obtained via Job.Config, 27// its internal io.Reader will be nil, so it cannot be used for a 28// subsequent load operation. 29type ReaderSource struct { 30 r io.Reader 31 FileConfig 32} 33 34// NewReaderSource creates a ReaderSource from an io.Reader. You may 35// optionally configure properties on the ReaderSource that describe the 36// data being read, before passing it to Table.LoaderFrom. 37func NewReaderSource(r io.Reader) *ReaderSource { 38 return &ReaderSource{r: r} 39} 40 41func (r *ReaderSource) populateLoadConfig(lc *bq.JobConfigurationLoad) io.Reader { 42 r.FileConfig.populateLoadConfig(lc) 43 return r.r 44} 45 46// FileConfig contains configuration options that pertain to files, typically 47// text files that require interpretation to be used as a BigQuery table. A 48// file may live in Google Cloud Storage (see GCSReference), or it may be 49// loaded into a table via the Table.LoaderFromReader. 50type FileConfig struct { 51 // SourceFormat is the format of the data to be read. 52 // Allowed values are: Avro, CSV, DatastoreBackup, JSON, ORC, and Parquet. The default is CSV. 53 SourceFormat DataFormat 54 55 // Indicates if we should automatically infer the options and 56 // schema for CSV and JSON sources. 57 AutoDetect bool 58 59 // MaxBadRecords is the maximum number of bad records that will be ignored 60 // when reading data. 61 MaxBadRecords int64 62 63 // IgnoreUnknownValues causes values not matching the schema to be 64 // tolerated. Unknown values are ignored. For CSV this ignores extra values 65 // at the end of a line. For JSON this ignores named values that do not 66 // match any column name. If this field is not set, records containing 67 // unknown values are treated as bad records. The MaxBadRecords field can 68 // be used to customize how bad records are handled. 69 IgnoreUnknownValues bool 70 71 // Schema describes the data. It is required when reading CSV or JSON data, 72 // unless the data is being loaded into a table that already exists. 73 Schema Schema 74 75 // Additional options for CSV files. 76 CSVOptions 77} 78 79func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) { 80 conf.SkipLeadingRows = fc.SkipLeadingRows 81 conf.SourceFormat = string(fc.SourceFormat) 82 conf.Autodetect = fc.AutoDetect 83 conf.AllowJaggedRows = fc.AllowJaggedRows 84 conf.AllowQuotedNewlines = fc.AllowQuotedNewlines 85 conf.Encoding = string(fc.Encoding) 86 conf.FieldDelimiter = fc.FieldDelimiter 87 conf.IgnoreUnknownValues = fc.IgnoreUnknownValues 88 conf.MaxBadRecords = fc.MaxBadRecords 89 if fc.Schema != nil { 90 conf.Schema = fc.Schema.toBQ() 91 } 92 conf.Quote = fc.quote() 93} 94 95func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) { 96 fc.SourceFormat = DataFormat(conf.SourceFormat) 97 fc.AutoDetect = conf.Autodetect 98 fc.MaxBadRecords = conf.MaxBadRecords 99 fc.IgnoreUnknownValues = conf.IgnoreUnknownValues 100 fc.Schema = bqToSchema(conf.Schema) 101 fc.SkipLeadingRows = conf.SkipLeadingRows 102 fc.AllowJaggedRows = conf.AllowJaggedRows 103 fc.AllowQuotedNewlines = conf.AllowQuotedNewlines 104 fc.Encoding = Encoding(conf.Encoding) 105 fc.FieldDelimiter = conf.FieldDelimiter 106 fc.CSVOptions.setQuote(conf.Quote) 107} 108 109func (fc *FileConfig) populateExternalDataConfig(conf *bq.ExternalDataConfiguration) { 110 format := fc.SourceFormat 111 if format == "" { 112 // Format must be explicitly set for external data sources. 113 format = CSV 114 } 115 conf.Autodetect = fc.AutoDetect 116 conf.IgnoreUnknownValues = fc.IgnoreUnknownValues 117 conf.MaxBadRecords = fc.MaxBadRecords 118 conf.SourceFormat = string(format) 119 if fc.Schema != nil { 120 conf.Schema = fc.Schema.toBQ() 121 } 122 if format == CSV { 123 fc.CSVOptions.populateExternalDataConfig(conf) 124 } 125} 126 127// Encoding specifies the character encoding of data to be loaded into BigQuery. 128// See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding 129// for more details about how this is used. 130type Encoding string 131 132const ( 133 // UTF_8 specifies the UTF-8 encoding type. 134 UTF_8 Encoding = "UTF-8" 135 // ISO_8859_1 specifies the ISO-8859-1 encoding type. 136 ISO_8859_1 Encoding = "ISO-8859-1" 137) 138