1// Copyright 2015 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15/* 16Package bigquery provides a client for the BigQuery service. 17 18The following assumes a basic familiarity with BigQuery concepts. 19See https://cloud.google.com/bigquery/docs. 20 21See https://godoc.org/cloud.google.com/go for authentication, timeouts, 22connection pooling and similar aspects of this package. 23 24 25Creating a Client 26 27To start working with this package, create a client: 28 29 ctx := context.Background() 30 client, err := bigquery.NewClient(ctx, projectID) 31 if err != nil { 32 // TODO: Handle error. 33 } 34 35Querying 36 37To query existing tables, create a Query and call its Read method: 38 39 q := client.Query(` 40 SELECT year, SUM(number) as num 41 FROM ` + "`bigquery-public-data.usa_names.usa_1910_2013`" + ` 42 WHERE name = "William" 43 GROUP BY year 44 ORDER BY year 45 `) 46 it, err := q.Read(ctx) 47 if err != nil { 48 // TODO: Handle error. 49 } 50 51Then iterate through the resulting rows. You can store a row using 52anything that implements the ValueLoader interface, or with a slice or map of bigquery.Value. 53A slice is simplest: 54 55 for { 56 var values []bigquery.Value 57 err := it.Next(&values) 58 if err == iterator.Done { 59 break 60 } 61 if err != nil { 62 // TODO: Handle error. 63 } 64 fmt.Println(values) 65 } 66 67You can also use a struct whose exported fields match the query: 68 69 type Count struct { 70 Year int 71 Num int 72 } 73 for { 74 var c Count 75 err := it.Next(&c) 76 if err == iterator.Done { 77 break 78 } 79 if err != nil { 80 // TODO: Handle error. 81 } 82 fmt.Println(c) 83 } 84 85You can also start the query running and get the results later. 86Create the query as above, but call Run instead of Read. This returns a Job, 87which represents an asynchronous operation. 88 89 job, err := q.Run(ctx) 90 if err != nil { 91 // TODO: Handle error. 92 } 93 94Get the job's ID, a printable string. You can save this string to retrieve 95the results at a later time, even in another process. 96 97 jobID := job.ID() 98 fmt.Printf("The job ID is %s\n", jobID) 99 100To retrieve the job's results from the ID, first look up the Job: 101 102 job, err = client.JobFromID(ctx, jobID) 103 if err != nil { 104 // TODO: Handle error. 105 } 106 107Use the Job.Read method to obtain an iterator, and loop over the rows. 108Query.Read is just a convenience method that combines Query.Run and Job.Read. 109 110 it, err = job.Read(ctx) 111 if err != nil { 112 // TODO: Handle error. 113 } 114 // Proceed with iteration as above. 115 116Datasets and Tables 117 118You can refer to datasets in the client's project with the Dataset method, and 119in other projects with the DatasetInProject method: 120 121 myDataset := client.Dataset("my_dataset") 122 yourDataset := client.DatasetInProject("your-project-id", "your_dataset") 123 124These methods create references to datasets, not the datasets themselves. You can have 125a dataset reference even if the dataset doesn't exist yet. Use Dataset.Create to 126create a dataset from a reference: 127 128 if err := myDataset.Create(ctx, nil); err != nil { 129 // TODO: Handle error. 130 } 131 132You can refer to tables with Dataset.Table. Like bigquery.Dataset, bigquery.Table is a reference 133to an object in BigQuery that may or may not exist. 134 135 table := myDataset.Table("my_table") 136 137You can create, delete and update the metadata of tables with methods on Table. 138For instance, you could create a temporary table with: 139 140 err = myDataset.Table("temp").Create(ctx, &bigquery.TableMetadata{ 141 ExpirationTime: time.Now().Add(1*time.Hour)}) 142 if err != nil { 143 // TODO: Handle error. 144 } 145 146We'll see how to create a table with a schema in the next section. 147 148Schemas 149 150There are two ways to construct schemas with this package. 151You can build a schema by hand, like so: 152 153 schema1 := bigquery.Schema{ 154 {Name: "Name", Required: true, Type: bigquery.StringFieldType}, 155 {Name: "Grades", Repeated: true, Type: bigquery.IntegerFieldType}, 156 {Name: "Optional", Required: false, Type: bigquery.IntegerFieldType}, 157 } 158 159Or you can infer the schema from a struct: 160 161 type student struct { 162 Name string 163 Grades []int 164 Optional bigquery.NullInt64 165 } 166 schema2, err := bigquery.InferSchema(student{}) 167 if err != nil { 168 // TODO: Handle error. 169 } 170 // schema1 and schema2 are identical. 171 172Struct inference supports tags like those of the encoding/json package, so you can 173change names, ignore fields, or mark a field as nullable (non-required). Fields 174declared as one of the Null types (NullInt64, NullFloat64, NullString, NullBool, 175NullTimestamp, NullDate, NullTime, NullDateTime, and NullGeography) are 176automatically inferred as nullable, so the "nullable" tag is only needed for []byte, 177*big.Rat and pointer-to-struct fields. 178 179 type student2 struct { 180 Name string `bigquery:"full_name"` 181 Grades []int 182 Secret string `bigquery:"-"` 183 Optional []byte `bigquery:",nullable" 184 } 185 schema3, err := bigquery.InferSchema(student2{}) 186 if err != nil { 187 // TODO: Handle error. 188 } 189 // schema3 has required fields "full_name" and "Grade", and nullable BYTES field "Optional". 190 191Having constructed a schema, you can create a table with it like so: 192 193 if err := table.Create(ctx, &bigquery.TableMetadata{Schema: schema1}); err != nil { 194 // TODO: Handle error. 195 } 196 197Copying 198 199You can copy one or more tables to another table. Begin by constructing a Copier 200describing the copy. Then set any desired copy options, and finally call Run to get a Job: 201 202 copier := myDataset.Table("dest").CopierFrom(myDataset.Table("src")) 203 copier.WriteDisposition = bigquery.WriteTruncate 204 job, err = copier.Run(ctx) 205 if err != nil { 206 // TODO: Handle error. 207 } 208 209You can chain the call to Run if you don't want to set options: 210 211 job, err = myDataset.Table("dest").CopierFrom(myDataset.Table("src")).Run(ctx) 212 if err != nil { 213 // TODO: Handle error. 214 } 215 216You can wait for your job to complete: 217 218 status, err := job.Wait(ctx) 219 if err != nil { 220 // TODO: Handle error. 221 } 222 223Job.Wait polls with exponential backoff. You can also poll yourself, if you 224wish: 225 226 for { 227 status, err := job.Status(ctx) 228 if err != nil { 229 // TODO: Handle error. 230 } 231 if status.Done() { 232 if status.Err() != nil { 233 log.Fatalf("Job failed with error %v", status.Err()) 234 } 235 break 236 } 237 time.Sleep(pollInterval) 238 } 239 240Loading and Uploading 241 242There are two ways to populate a table with this package: load the data from a Google Cloud Storage 243object, or upload rows directly from your program. 244 245For loading, first create a GCSReference, configuring it if desired. Then make a Loader, optionally configure 246it as well, and call its Run method. 247 248 gcsRef := bigquery.NewGCSReference("gs://my-bucket/my-object") 249 gcsRef.AllowJaggedRows = true 250 loader := myDataset.Table("dest").LoaderFrom(gcsRef) 251 loader.CreateDisposition = bigquery.CreateNever 252 job, err = loader.Run(ctx) 253 // Poll the job for completion if desired, as above. 254 255To upload, first define a type that implements the ValueSaver interface, which has a single method named Save. 256Then create an Inserter, and call its Put method with a slice of values. 257 258 u := table.Inserter() 259 // Item implements the ValueSaver interface. 260 items := []*Item{ 261 {Name: "n1", Size: 32.6, Count: 7}, 262 {Name: "n2", Size: 4, Count: 2}, 263 {Name: "n3", Size: 101.5, Count: 1}, 264 } 265 if err := u.Put(ctx, items); err != nil { 266 // TODO: Handle error. 267 } 268 269You can also upload a struct that doesn't implement ValueSaver. Use the StructSaver type 270to specify the schema and insert ID by hand, or just supply the struct or struct pointer 271directly and the schema will be inferred: 272 273 type Item2 struct { 274 Name string 275 Size float64 276 Count int 277 } 278 // Item implements the ValueSaver interface. 279 items2 := []*Item2{ 280 {Name: "n1", Size: 32.6, Count: 7}, 281 {Name: "n2", Size: 4, Count: 2}, 282 {Name: "n3", Size: 101.5, Count: 1}, 283 } 284 if err := u.Put(ctx, items2); err != nil { 285 // TODO: Handle error. 286 } 287 288BigQuery allows for higher throughput when omitting insertion IDs. To enable this, 289specify the sentinel `NoDedupeID` value for the insertion ID when implementing a ValueSaver. 290 291Extracting 292 293If you've been following so far, extracting data from a BigQuery table 294into a Google Cloud Storage object will feel familiar. First create an 295Extractor, then optionally configure it, and lastly call its Run method. 296 297 extractor := table.ExtractorTo(gcsRef) 298 extractor.DisableHeader = true 299 job, err = extractor.Run(ctx) 300 // Poll the job for completion if desired, as above. 301 302Errors 303 304Errors returned by this client are often of the type googleapi.Error: https://godoc.org/google.golang.org/api/googleapi#Error 305 306These errors can be introspected for more information by type asserting to the richer *googleapi.Error type. For example: 307 308 if e, ok := err.(*googleapi.Error); ok { 309 if e.Code = 409 { ... } 310 } 311 312In some cases, your client may received unstructured googleapi.Error error responses. In such cases, it is likely that 313you have exceeded BigQuery request limits, documented at: https://cloud.google.com/bigquery/quotas 314 315*/ 316package bigquery // import "cloud.google.com/go/bigquery" 317