1// Copyright 2016 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package bigquery 16 17import ( 18 "io" 19 20 bq "google.golang.org/api/bigquery/v2" 21) 22 23// A ReaderSource is a source for a load operation that gets 24// data from an io.Reader. 25// 26// When a ReaderSource is part of a LoadConfig obtained via Job.Config, 27// its internal io.Reader will be nil, so it cannot be used for a 28// subsequent load operation. 29type ReaderSource struct { 30 r io.Reader 31 FileConfig 32} 33 34// NewReaderSource creates a ReaderSource from an io.Reader. You may 35// optionally configure properties on the ReaderSource that describe the 36// data being read, before passing it to Table.LoaderFrom. 37func NewReaderSource(r io.Reader) *ReaderSource { 38 return &ReaderSource{r: r} 39} 40 41func (r *ReaderSource) populateLoadConfig(lc *bq.JobConfigurationLoad) io.Reader { 42 r.FileConfig.populateLoadConfig(lc) 43 return r.r 44} 45 46// FileConfig contains configuration options that pertain to files, typically 47// text files that require interpretation to be used as a BigQuery table. A 48// file may live in Google Cloud Storage (see GCSReference), or it may be 49// loaded into a table via the Table.LoaderFromReader. 50type FileConfig struct { 51 // SourceFormat is the format of the data to be read. 52 // Allowed values are: Avro, CSV, DatastoreBackup, JSON, ORC, and Parquet. The default is CSV. 53 SourceFormat DataFormat 54 55 // Indicates if we should automatically infer the options and 56 // schema for CSV and JSON sources. 57 AutoDetect bool 58 59 // MaxBadRecords is the maximum number of bad records that will be ignored 60 // when reading data. 61 MaxBadRecords int64 62 63 // IgnoreUnknownValues causes values not matching the schema to be 64 // tolerated. Unknown values are ignored. For CSV this ignores extra values 65 // at the end of a line. For JSON this ignores named values that do not 66 // match any column name. If this field is not set, records containing 67 // unknown values are treated as bad records. The MaxBadRecords field can 68 // be used to customize how bad records are handled. 69 IgnoreUnknownValues bool 70 71 // Schema describes the data. It is required when reading CSV or JSON data, 72 // unless the data is being loaded into a table that already exists. 73 Schema Schema 74 75 // Additional options for CSV files. 76 CSVOptions 77 78 // Additional options for Parquet files. 79 ParquetOptions *ParquetOptions 80 81 // Additional options for Avro files. 82 AvroOptions *AvroOptions 83} 84 85func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) { 86 conf.SkipLeadingRows = fc.SkipLeadingRows 87 conf.SourceFormat = string(fc.SourceFormat) 88 conf.Autodetect = fc.AutoDetect 89 conf.AllowJaggedRows = fc.AllowJaggedRows 90 conf.AllowQuotedNewlines = fc.AllowQuotedNewlines 91 conf.Encoding = string(fc.Encoding) 92 conf.FieldDelimiter = fc.FieldDelimiter 93 conf.IgnoreUnknownValues = fc.IgnoreUnknownValues 94 conf.MaxBadRecords = fc.MaxBadRecords 95 if fc.Schema != nil { 96 conf.Schema = fc.Schema.toBQ() 97 } 98 if fc.ParquetOptions != nil { 99 conf.ParquetOptions = &bq.ParquetOptions{ 100 EnumAsString: fc.ParquetOptions.EnumAsString, 101 EnableListInference: fc.ParquetOptions.EnableListInference, 102 } 103 } 104 if fc.AvroOptions != nil { 105 conf.UseAvroLogicalTypes = fc.AvroOptions.UseAvroLogicalTypes 106 } 107 conf.Quote = fc.quote() 108} 109 110func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) { 111 fc.SourceFormat = DataFormat(conf.SourceFormat) 112 fc.AutoDetect = conf.Autodetect 113 fc.MaxBadRecords = conf.MaxBadRecords 114 fc.IgnoreUnknownValues = conf.IgnoreUnknownValues 115 fc.Schema = bqToSchema(conf.Schema) 116 fc.SkipLeadingRows = conf.SkipLeadingRows 117 fc.AllowJaggedRows = conf.AllowJaggedRows 118 fc.AllowQuotedNewlines = conf.AllowQuotedNewlines 119 fc.Encoding = Encoding(conf.Encoding) 120 fc.FieldDelimiter = conf.FieldDelimiter 121 fc.CSVOptions.setQuote(conf.Quote) 122} 123 124func (fc *FileConfig) populateExternalDataConfig(conf *bq.ExternalDataConfiguration) { 125 format := fc.SourceFormat 126 if format == "" { 127 // Format must be explicitly set for external data sources. 128 format = CSV 129 } 130 conf.Autodetect = fc.AutoDetect 131 conf.IgnoreUnknownValues = fc.IgnoreUnknownValues 132 conf.MaxBadRecords = fc.MaxBadRecords 133 conf.SourceFormat = string(format) 134 if fc.Schema != nil { 135 conf.Schema = fc.Schema.toBQ() 136 } 137 if format == CSV { 138 fc.CSVOptions.populateExternalDataConfig(conf) 139 } 140 if fc.AvroOptions != nil { 141 conf.AvroOptions = &bq.AvroOptions{ 142 UseAvroLogicalTypes: fc.AvroOptions.UseAvroLogicalTypes, 143 } 144 } 145 if fc.ParquetOptions != nil { 146 conf.ParquetOptions = &bq.ParquetOptions{ 147 EnumAsString: fc.ParquetOptions.EnumAsString, 148 EnableListInference: fc.ParquetOptions.EnableListInference, 149 } 150 } 151} 152 153// Encoding specifies the character encoding of data to be loaded into BigQuery. 154// See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding 155// for more details about how this is used. 156type Encoding string 157 158const ( 159 // UTF_8 specifies the UTF-8 encoding type. 160 UTF_8 Encoding = "UTF-8" 161 // ISO_8859_1 specifies the ISO-8859-1 encoding type. 162 ISO_8859_1 Encoding = "ISO-8859-1" 163) 164