1// Copyright 2016 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package bigquery 16 17import ( 18 "io" 19 20 bq "google.golang.org/api/bigquery/v2" 21) 22 23// A ReaderSource is a source for a load operation that gets 24// data from an io.Reader. 25// 26// When a ReaderSource is part of a LoadConfig obtained via Job.Config, 27// its internal io.Reader will be nil, so it cannot be used for a 28// subsequent load operation. 29type ReaderSource struct { 30 r io.Reader 31 FileConfig 32} 33 34// NewReaderSource creates a ReaderSource from an io.Reader. You may 35// optionally configure properties on the ReaderSource that describe the 36// data being read, before passing it to Table.LoaderFrom. 37func NewReaderSource(r io.Reader) *ReaderSource { 38 return &ReaderSource{r: r} 39} 40 41func (r *ReaderSource) populateLoadConfig(lc *bq.JobConfigurationLoad) io.Reader { 42 r.FileConfig.populateLoadConfig(lc) 43 return r.r 44} 45 46// FileConfig contains configuration options that pertain to files, typically 47// text files that require interpretation to be used as a BigQuery table. A 48// file may live in Google Cloud Storage (see GCSReference), or it may be 49// loaded into a table via the Table.LoaderFromReader. 50type FileConfig struct { 51 // SourceFormat is the format of the data to be read. 52 // Allowed values are: Avro, CSV, DatastoreBackup, JSON, ORC, and Parquet. The default is CSV. 53 SourceFormat DataFormat 54 55 // Indicates if we should automatically infer the options and 56 // schema for CSV and JSON sources. 57 AutoDetect bool 58 59 // MaxBadRecords is the maximum number of bad records that will be ignored 60 // when reading data. 61 MaxBadRecords int64 62 63 // IgnoreUnknownValues causes values not matching the schema to be 64 // tolerated. Unknown values are ignored. For CSV this ignores extra values 65 // at the end of a line. For JSON this ignores named values that do not 66 // match any column name. If this field is not set, records containing 67 // unknown values are treated as bad records. The MaxBadRecords field can 68 // be used to customize how bad records are handled. 69 IgnoreUnknownValues bool 70 71 // Schema describes the data. It is required when reading CSV or JSON data, 72 // unless the data is being loaded into a table that already exists. 73 Schema Schema 74 75 // Additional options for CSV files. 76 CSVOptions 77 78 // Additional options for Parquet files. 79 ParquetOptions *ParquetOptions 80} 81 82func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) { 83 conf.SkipLeadingRows = fc.SkipLeadingRows 84 conf.SourceFormat = string(fc.SourceFormat) 85 conf.Autodetect = fc.AutoDetect 86 conf.AllowJaggedRows = fc.AllowJaggedRows 87 conf.AllowQuotedNewlines = fc.AllowQuotedNewlines 88 conf.Encoding = string(fc.Encoding) 89 conf.FieldDelimiter = fc.FieldDelimiter 90 conf.IgnoreUnknownValues = fc.IgnoreUnknownValues 91 conf.MaxBadRecords = fc.MaxBadRecords 92 if fc.Schema != nil { 93 conf.Schema = fc.Schema.toBQ() 94 } 95 if fc.ParquetOptions != nil { 96 conf.ParquetOptions = &bq.ParquetOptions{ 97 EnumAsString: fc.ParquetOptions.EnumAsString, 98 EnableListInference: fc.ParquetOptions.EnableListInference, 99 } 100 } 101 conf.Quote = fc.quote() 102} 103 104func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) { 105 fc.SourceFormat = DataFormat(conf.SourceFormat) 106 fc.AutoDetect = conf.Autodetect 107 fc.MaxBadRecords = conf.MaxBadRecords 108 fc.IgnoreUnknownValues = conf.IgnoreUnknownValues 109 fc.Schema = bqToSchema(conf.Schema) 110 fc.SkipLeadingRows = conf.SkipLeadingRows 111 fc.AllowJaggedRows = conf.AllowJaggedRows 112 fc.AllowQuotedNewlines = conf.AllowQuotedNewlines 113 fc.Encoding = Encoding(conf.Encoding) 114 fc.FieldDelimiter = conf.FieldDelimiter 115 fc.CSVOptions.setQuote(conf.Quote) 116} 117 118func (fc *FileConfig) populateExternalDataConfig(conf *bq.ExternalDataConfiguration) { 119 format := fc.SourceFormat 120 if format == "" { 121 // Format must be explicitly set for external data sources. 122 format = CSV 123 } 124 conf.Autodetect = fc.AutoDetect 125 conf.IgnoreUnknownValues = fc.IgnoreUnknownValues 126 conf.MaxBadRecords = fc.MaxBadRecords 127 conf.SourceFormat = string(format) 128 if fc.Schema != nil { 129 conf.Schema = fc.Schema.toBQ() 130 } 131 if format == CSV { 132 fc.CSVOptions.populateExternalDataConfig(conf) 133 } 134 if fc.ParquetOptions != nil { 135 conf.ParquetOptions = &bq.ParquetOptions{ 136 EnumAsString: fc.ParquetOptions.EnumAsString, 137 EnableListInference: fc.ParquetOptions.EnableListInference, 138 } 139 } 140} 141 142// Encoding specifies the character encoding of data to be loaded into BigQuery. 143// See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding 144// for more details about how this is used. 145type Encoding string 146 147const ( 148 // UTF_8 specifies the UTF-8 encoding type. 149 UTF_8 Encoding = "UTF-8" 150 // ISO_8859_1 specifies the ISO-8859-1 encoding type. 151 ISO_8859_1 Encoding = "ISO-8859-1" 152) 153