1// Copyright 2016 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bigquery
16
17import (
18	"io"
19
20	bq "google.golang.org/api/bigquery/v2"
21)
22
23// A ReaderSource is a source for a load operation that gets
24// data from an io.Reader.
25//
26// When a ReaderSource is part of a LoadConfig obtained via Job.Config,
27// its internal io.Reader will be nil, so it cannot be used for a
28// subsequent load operation.
29type ReaderSource struct {
30	r io.Reader
31	FileConfig
32}
33
34// NewReaderSource creates a ReaderSource from an io.Reader. You may
35// optionally configure properties on the ReaderSource that describe the
36// data being read, before passing it to Table.LoaderFrom.
37func NewReaderSource(r io.Reader) *ReaderSource {
38	return &ReaderSource{r: r}
39}
40
41func (r *ReaderSource) populateLoadConfig(lc *bq.JobConfigurationLoad) io.Reader {
42	r.FileConfig.populateLoadConfig(lc)
43	return r.r
44}
45
46// FileConfig contains configuration options that pertain to files, typically
47// text files that require interpretation to be used as a BigQuery table. A
48// file may live in Google Cloud Storage (see GCSReference), or it may be
49// loaded into a table via the Table.LoaderFromReader.
50type FileConfig struct {
51	// SourceFormat is the format of the data to be read.
52	// Allowed values are: Avro, CSV, DatastoreBackup, JSON, ORC, and Parquet.  The default is CSV.
53	SourceFormat DataFormat
54
55	// Indicates if we should automatically infer the options and
56	// schema for CSV and JSON sources.
57	AutoDetect bool
58
59	// MaxBadRecords is the maximum number of bad records that will be ignored
60	// when reading data.
61	MaxBadRecords int64
62
63	// IgnoreUnknownValues causes values not matching the schema to be
64	// tolerated. Unknown values are ignored. For CSV this ignores extra values
65	// at the end of a line. For JSON this ignores named values that do not
66	// match any column name. If this field is not set, records containing
67	// unknown values are treated as bad records. The MaxBadRecords field can
68	// be used to customize how bad records are handled.
69	IgnoreUnknownValues bool
70
71	// Schema describes the data. It is required when reading CSV or JSON data,
72	// unless the data is being loaded into a table that already exists.
73	Schema Schema
74
75	// Additional options for CSV files.
76	CSVOptions
77
78	// Additional options for Parquet files.
79	ParquetOptions *ParquetOptions
80
81	// Additional options for Avro files.
82	AvroOptions *AvroOptions
83}
84
85func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
86	conf.SkipLeadingRows = fc.SkipLeadingRows
87	conf.SourceFormat = string(fc.SourceFormat)
88	conf.Autodetect = fc.AutoDetect
89	conf.AllowJaggedRows = fc.AllowJaggedRows
90	conf.AllowQuotedNewlines = fc.AllowQuotedNewlines
91	conf.Encoding = string(fc.Encoding)
92	conf.FieldDelimiter = fc.FieldDelimiter
93	conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
94	conf.MaxBadRecords = fc.MaxBadRecords
95	if fc.Schema != nil {
96		conf.Schema = fc.Schema.toBQ()
97	}
98	if fc.ParquetOptions != nil {
99		conf.ParquetOptions = &bq.ParquetOptions{
100			EnumAsString:        fc.ParquetOptions.EnumAsString,
101			EnableListInference: fc.ParquetOptions.EnableListInference,
102		}
103	}
104	if fc.AvroOptions != nil {
105		conf.UseAvroLogicalTypes = fc.AvroOptions.UseAvroLogicalTypes
106	}
107	conf.Quote = fc.quote()
108}
109
110func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
111	fc.SourceFormat = DataFormat(conf.SourceFormat)
112	fc.AutoDetect = conf.Autodetect
113	fc.MaxBadRecords = conf.MaxBadRecords
114	fc.IgnoreUnknownValues = conf.IgnoreUnknownValues
115	fc.Schema = bqToSchema(conf.Schema)
116	fc.SkipLeadingRows = conf.SkipLeadingRows
117	fc.AllowJaggedRows = conf.AllowJaggedRows
118	fc.AllowQuotedNewlines = conf.AllowQuotedNewlines
119	fc.Encoding = Encoding(conf.Encoding)
120	fc.FieldDelimiter = conf.FieldDelimiter
121	fc.CSVOptions.setQuote(conf.Quote)
122}
123
124func (fc *FileConfig) populateExternalDataConfig(conf *bq.ExternalDataConfiguration) {
125	format := fc.SourceFormat
126	if format == "" {
127		// Format must be explicitly set for external data sources.
128		format = CSV
129	}
130	conf.Autodetect = fc.AutoDetect
131	conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
132	conf.MaxBadRecords = fc.MaxBadRecords
133	conf.SourceFormat = string(format)
134	if fc.Schema != nil {
135		conf.Schema = fc.Schema.toBQ()
136	}
137	if format == CSV {
138		fc.CSVOptions.populateExternalDataConfig(conf)
139	}
140	if fc.AvroOptions != nil {
141		conf.AvroOptions = &bq.AvroOptions{
142			UseAvroLogicalTypes: fc.AvroOptions.UseAvroLogicalTypes,
143		}
144	}
145	if fc.ParquetOptions != nil {
146		conf.ParquetOptions = &bq.ParquetOptions{
147			EnumAsString:        fc.ParquetOptions.EnumAsString,
148			EnableListInference: fc.ParquetOptions.EnableListInference,
149		}
150	}
151}
152
153// Encoding specifies the character encoding of data to be loaded into BigQuery.
154// See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding
155// for more details about how this is used.
156type Encoding string
157
158const (
159	// UTF_8 specifies the UTF-8 encoding type.
160	UTF_8 Encoding = "UTF-8"
161	// ISO_8859_1 specifies the ISO-8859-1 encoding type.
162	ISO_8859_1 Encoding = "ISO-8859-1"
163)
164