1/* 2Copyright 2014 The Perkeep Authors 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15*/ 16 17// Package picasa implements an importer for picasa.com accounts. 18package picasa // import "perkeep.org/pkg/importer/picasa" 19 20// TODO: removing camliPath from gallery permanode when pic deleted from gallery 21 22import ( 23 "context" 24 "errors" 25 "fmt" 26 "io" 27 "log" 28 "net/http" 29 "net/url" 30 "os" 31 "strconv" 32 "strings" 33 "time" 34 35 "github.com/tgulacsi/picago" 36 "go4.org/ctxutil" 37 "go4.org/syncutil" 38 "golang.org/x/oauth2" 39 "golang.org/x/oauth2/google" 40 "perkeep.org/internal/httputil" 41 "perkeep.org/pkg/blob" 42 "perkeep.org/pkg/importer" 43 "perkeep.org/pkg/schema" 44 "perkeep.org/pkg/schema/nodeattr" 45 "perkeep.org/pkg/search" 46) 47 48const ( 49 scopeURL = "https://picasaweb.google.com/data/" 50 51 // runCompleteVersion is a cache-busting version number of the 52 // importer code. It should be incremented whenever the 53 // behavior of this importer is updated enough to warrant a 54 // complete run. Otherwise, if the importer runs to 55 // completion, this version number is recorded on the account 56 // permanode and subsequent importers can stop early. 57 runCompleteVersion = "4" 58 59 // attrPicasaId is used for both picasa photo IDs and gallery IDs. 60 attrPicasaId = "picasaId" 61 62 // acctAttrOAuthToken stores access + " " + refresh + " " + expiry 63 // See encodeToken and decodeToken. 64 acctAttrOAuthToken = "oauthToken" 65 66 // AttrMediaURL is an attribute set on each picasa photo permanode. It 67 // is the public URL for fetching the contents of the photo file. 68 AttrMediaURL = "picasaMediaURL" 69) 70 71var ( 72 _ importer.Importer = imp{} 73 _ importer.ImporterSetupHTMLer = imp{} 74) 75 76func init() { 77 importer.Register("picasa", imp{}) 78} 79 80// imp is the implementation of the Picasa importer. 81type imp struct { 82 importer.OAuth2 83} 84 85func (imp) Properties() importer.Properties { 86 return importer.Properties{ 87 Title: "Google Photos (via Picasa API)", 88 Description: "import your photos from Google Photos. (limited to 10,000 photos per Google Photos API bug for now)", 89 SupportsIncremental: true, 90 NeedsAPIKey: true, 91 } 92} 93 94type userInfo struct { 95 ID string // numeric picasa user ID ("11583474931002155675") 96 Name string // "Jane Smith" 97} 98 99func (imp) getUserInfo(ctx context.Context) (*userInfo, error) { 100 u, err := picago.GetUser(ctxutil.Client(ctx), "default") 101 if err != nil { 102 return nil, err 103 } 104 return &userInfo{ID: u.ID, Name: u.Name}, nil 105} 106 107func (imp) IsAccountReady(acctNode *importer.Object) (ok bool, err error) { 108 if acctNode.Attr(importer.AcctAttrUserID) != "" && acctNode.Attr(acctAttrOAuthToken) != "" { 109 return true, nil 110 } 111 return false, nil 112} 113 114func (im imp) SummarizeAccount(acct *importer.Object) string { 115 ok, err := im.IsAccountReady(acct) 116 if err != nil || !ok { 117 return "" 118 } 119 if acct.Attr(importer.AcctAttrGivenName) == "" && acct.Attr(importer.AcctAttrFamilyName) == "" { 120 return fmt.Sprintf("userid %s", acct.Attr(importer.AcctAttrUserID)) 121 } 122 return fmt.Sprintf("userid %s (%s %s)", 123 acct.Attr(importer.AcctAttrUserID), 124 acct.Attr(importer.AcctAttrGivenName), 125 acct.Attr(importer.AcctAttrFamilyName)) 126} 127 128func (im imp) ServeSetup(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) error { 129 oauthConfig, err := im.auth(ctx) 130 if err == nil { 131 // we will get back this with the token, so use it for preserving account info 132 state := "acct:" + ctx.AccountNode.PermanodeRef().String() 133 // AccessType needs to be "offline", as the user is not here all the time; 134 // ApprovalPrompt needs to be "force" to be able to get a RefreshToken 135 // everytime, even for Re-logins, too. 136 // 137 // Source: https://developers.google.com/youtube/v3/guides/authentication#server-side-apps 138 http.Redirect(w, r, oauthConfig.AuthCodeURL(state, oauth2.AccessTypeOffline, oauth2.ApprovalForce), 302) 139 } 140 return err 141} 142 143// CallbackURLParameters returns the needed callback parameters - empty for Google Picasa. 144func (im imp) CallbackURLParameters(acctRef blob.Ref) url.Values { 145 return url.Values{} 146} 147 148func (im imp) ServeCallback(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) { 149 oauthConfig, err := im.auth(ctx) 150 if err != nil { 151 httputil.ServeError(w, r, fmt.Errorf("Error getting oauth config: %v", err)) 152 return 153 } 154 155 if r.Method != "GET" { 156 http.Error(w, "Expected a GET", 400) 157 return 158 } 159 code := r.FormValue("code") 160 if code == "" { 161 http.Error(w, "Expected a code", 400) 162 return 163 } 164 165 token, err := oauthConfig.Exchange(ctx, code) 166 if err != nil { 167 log.Printf("importer/picasa: token exchange error: %v", err) 168 httputil.ServeError(w, r, fmt.Errorf("token exchange error: %v", err)) 169 return 170 } 171 172 log.Printf("importer/picasa: got exhanged token.") 173 picagoCtx := context.WithValue(ctx, ctxutil.HTTPClient, oauthConfig.Client(ctx, token)) 174 175 userInfo, err := im.getUserInfo(picagoCtx) 176 if err != nil { 177 log.Printf("Couldn't get username: %v", err) 178 httputil.ServeError(w, r, fmt.Errorf("can't get username: %v", err)) 179 return 180 } 181 182 if err := ctx.AccountNode.SetAttrs( 183 importer.AcctAttrUserID, userInfo.ID, 184 importer.AcctAttrName, userInfo.Name, 185 acctAttrOAuthToken, encodeToken(token), 186 ); err != nil { 187 httputil.ServeError(w, r, fmt.Errorf("Error setting attribute: %v", err)) 188 return 189 } 190 http.Redirect(w, r, ctx.AccountURL(), http.StatusFound) 191} 192 193// encodeToken encodes the oauth2.Token as 194// AccessToken + " " + RefreshToken + " " + Expiry.Unix() 195func encodeToken(token *oauth2.Token) string { 196 if token == nil { 197 return "" 198 } 199 var seconds int64 200 if !token.Expiry.IsZero() { 201 seconds = token.Expiry.Unix() 202 } 203 return token.AccessToken + " " + token.RefreshToken + " " + strconv.FormatInt(seconds, 10) 204} 205 206// decodeToken parses an access token, refresh token, and optional 207// expiry unix timestamp separated by spaces into an oauth2.Token. 208// It returns as much as it can. 209func decodeToken(encoded string) *oauth2.Token { 210 t := new(oauth2.Token) 211 f := strings.Fields(encoded) 212 if len(f) > 0 { 213 t.AccessToken = f[0] 214 } 215 if len(f) > 1 { 216 t.RefreshToken = f[1] 217 } 218 if len(f) > 2 && f[2] != "0" { 219 sec, err := strconv.ParseInt(f[2], 10, 64) 220 if err == nil { 221 t.Expiry = time.Unix(sec, 0) 222 } 223 } 224 return t 225} 226 227func (im imp) auth(ctx *importer.SetupContext) (*oauth2.Config, error) { 228 clientID, secret, err := ctx.Credentials() 229 if err != nil { 230 return nil, err 231 } 232 conf := &oauth2.Config{ 233 Endpoint: google.Endpoint, 234 RedirectURL: ctx.CallbackURL(), 235 ClientID: clientID, 236 ClientSecret: secret, 237 Scopes: []string{scopeURL}, 238 } 239 return conf, nil 240} 241 242func (imp) AccountSetupHTML(host *importer.Host) string { 243 // Picasa doesn't allow a path in the origin. Remove it. 244 origin := host.ImporterBaseURL() 245 if u, err := url.Parse(origin); err == nil { 246 u.Path = "" 247 origin = u.String() 248 } 249 250 callback := host.ImporterBaseURL() + "picasa/callback" 251 gphotosURL := host.ImporterBaseURL() + "gphotos" 252 return fmt.Sprintf(` 253<h1>Configuring Picasa</h1> 254<p>Please note that because of a bug in the Picasa API, you cannot retrieve more than 10000 photos. If you have more than 10000 photos, you should use the <a href='%s'>Google Photos importer</a> instead.</p> 255<p>Visit <a href='https://console.developers.google.com/'>https://console.developers.google.com/</a> 256and click <b>"Create Project"</b>.</p> 257<p>Then under "APIs & Auth" in the left sidebar, click on "Credentials", then click the button <b>"Create new Client ID"</b>.</p> 258<p>Use the following settings:</p> 259<ul> 260 <li>Web application</li> 261 <li>Authorized JavaScript origins: <b>%s</b></li> 262 <li>Authorized Redirect URI: <b>%s</b></li> 263</ul> 264<p>Click "Create Client ID". Copy the "Client ID" and "Client Secret" into the boxes above.</p> 265`, gphotosURL, origin, callback) 266} 267 268// A run is our state for a given run of the importer. 269type run struct { 270 *importer.RunContext 271 incremental bool // whether we've completed a run in the past 272 photoGate *syncutil.Gate 273} 274 275var forceFullImport, _ = strconv.ParseBool(os.Getenv("CAMLI_PICASA_FULL_IMPORT")) 276 277func (imp) Run(rctx *importer.RunContext) error { 278 clientID, secret, err := rctx.Credentials() 279 if err != nil { 280 return err 281 } 282 acctNode := rctx.AccountNode() 283 284 ocfg := &oauth2.Config{ 285 Endpoint: google.Endpoint, 286 ClientID: clientID, 287 ClientSecret: secret, 288 Scopes: []string{scopeURL}, 289 } 290 291 token := decodeToken(acctNode.Attr(acctAttrOAuthToken)) 292 baseCtx := rctx.Context() 293 ctx := context.WithValue(baseCtx, ctxutil.HTTPClient, ocfg.Client(baseCtx, token)) 294 295 root := rctx.RootNode() 296 if root.Attr(nodeattr.Title) == "" { 297 if err := root.SetAttr( 298 nodeattr.Title, 299 fmt.Sprintf("%s - Google Photos", acctNode.Attr(importer.AcctAttrName)), 300 ); err != nil { 301 return err 302 } 303 } 304 305 r := &run{ 306 RunContext: rctx, 307 incremental: !forceFullImport && acctNode.Attr(importer.AcctAttrCompletedVersion) == runCompleteVersion, 308 photoGate: syncutil.NewGate(3), 309 } 310 if err := r.importAlbums(ctx); err != nil { 311 return err 312 } 313 314 if err := acctNode.SetAttrs(importer.AcctAttrCompletedVersion, runCompleteVersion); err != nil { 315 return err 316 } 317 318 return nil 319} 320 321func (r *run) importAlbums(ctx context.Context) error { 322 albums, err := picago.GetAlbums(ctxutil.Client(ctx), "default") 323 if err != nil { 324 return fmt.Errorf("importAlbums: error listing albums: %v", err) 325 } 326 albumsNode, err := r.getTopLevelNode("albums", "Albums") 327 for _, album := range albums { 328 select { 329 case <-ctx.Done(): 330 return ctx.Err() 331 default: 332 } 333 if err := r.importAlbum(ctx, albumsNode, album); err != nil { 334 return fmt.Errorf("picasa importer: error importing album %s: %v", album, err) 335 } 336 } 337 return nil 338} 339 340func (r *run) importAlbum(ctx context.Context, albumsNode *importer.Object, album picago.Album) (ret error) { 341 if album.ID == "" { 342 return errors.New("album has no ID") 343 } 344 albumNode, err := albumsNode.ChildPathObject(album.ID) 345 if err != nil { 346 return fmt.Errorf("importAlbum: error listing album: %v", err) 347 } 348 349 dateMod := schema.RFC3339FromTime(album.Updated) 350 351 // Data reference: https://developers.google.com/picasa-web/docs/2.0/reference 352 // TODO(tgulacsi): add more album info 353 changes, err := albumNode.SetAttrs2( 354 attrPicasaId, album.ID, 355 nodeattr.Type, "picasaweb.google.com:album", 356 nodeattr.Title, album.Title, 357 nodeattr.DatePublished, schema.RFC3339FromTime(album.Published), 358 nodeattr.LocationText, album.Location, 359 nodeattr.Description, album.Description, 360 nodeattr.URL, album.URL, 361 ) 362 if err != nil { 363 return fmt.Errorf("error setting album attributes: %v", err) 364 } 365 if !changes && r.incremental && albumNode.Attr(nodeattr.DateModified) == dateMod { 366 return nil 367 } 368 defer func() { 369 // Don't update DateModified on the album node until 370 // we've successfully imported all the photos. 371 if ret == nil { 372 ret = albumNode.SetAttr(nodeattr.DateModified, dateMod) 373 } 374 }() 375 376 log.Printf("Importing album %v: %v/%v (published %v, updated %v)", album.ID, album.Name, album.Title, album.Published, album.Updated) 377 378 // TODO(bradfitz): GetPhotos does multiple HTTP requests to 379 // return a slice of all photos. My "InstantUpload/Auto 380 // Backup" album has 6678 photos (and growing) and this 381 // currently takes like 40 seconds. Fix. 382 photos, err := picago.GetPhotos(ctxutil.Client(ctx), "default", album.ID) 383 if err != nil { 384 return err 385 } 386 387 log.Printf("Importing %d photos from album %q (%s)", len(photos), albumNode.Attr(nodeattr.Title), 388 albumNode.PermanodeRef()) 389 390 var grp syncutil.Group 391 for i := range photos { 392 select { 393 case <-ctx.Done(): 394 return ctx.Err() 395 default: 396 } 397 photo := photos[i] 398 r.photoGate.Start() 399 grp.Go(func() error { 400 defer r.photoGate.Done() 401 return r.updatePhotoInAlbum(ctx, albumNode, photo) 402 }) 403 } 404 return grp.Err() 405} 406 407func (r *run) updatePhotoInAlbum(ctx context.Context, albumNode *importer.Object, photo picago.Photo) (ret error) { 408 if photo.ID == "" { 409 return errors.New("photo has no ID") 410 } 411 412 getMediaBytes := func() (io.ReadCloser, error) { 413 log.Printf("Importing media from %v", photo.URL) 414 resp, err := ctxutil.Client(ctx).Get(photo.URL) 415 if err != nil { 416 return nil, fmt.Errorf("importing photo %s: %v", photo.ID, err) 417 } 418 if resp.StatusCode != http.StatusOK { 419 resp.Body.Close() 420 return nil, fmt.Errorf("importing photo %s: status code = %d", photo.ID, resp.StatusCode) 421 } 422 return resp.Body, nil 423 } 424 425 var fileRefStr string 426 idFilename := photo.ID + "-" + photo.Filename 427 photoNode, err := albumNode.ChildPathObjectOrFunc(idFilename, func() (*importer.Object, error) { 428 h := blob.NewHash() 429 rc, err := getMediaBytes() 430 if err != nil { 431 return nil, err 432 } 433 fileRef, err := schema.WriteFileFromReader(r.Context(), r.Host.Target(), photo.Filename, io.TeeReader(rc, h)) 434 if err != nil { 435 return nil, err 436 } 437 fileRefStr = fileRef.String() 438 wholeRef := blob.RefFromHash(h) 439 if pn, err := findExistingPermanode(r.Context(), r.Host.Searcher(), wholeRef); err == nil { 440 return r.Host.ObjectFromRef(pn) 441 } 442 return r.Host.NewObject() 443 }) 444 if err != nil { 445 return err 446 } 447 448 if fileRefStr == "" { 449 fileRefStr = photoNode.Attr(nodeattr.CamliContent) 450 // Only re-download the source photo if its URL has changed. 451 // Empirically this seems to work: cropping a photo in the 452 // photos.google.com UI causes its URL to change. And it makes 453 // sense, looking at the ugliness of the URLs with all their 454 // encoded/signed state. 455 if !mediaURLsEqual(photoNode.Attr(AttrMediaURL), photo.URL) { 456 rc, err := getMediaBytes() 457 if err != nil { 458 return err 459 } 460 fileRef, err := schema.WriteFileFromReader(r.Context(), r.Host.Target(), photo.Filename, rc) 461 rc.Close() 462 if err != nil { 463 return err 464 } 465 fileRefStr = fileRef.String() 466 } 467 } 468 469 title := strings.TrimSpace(photo.Description) 470 if strings.Contains(title, "\n") { 471 title = title[:strings.Index(title, "\n")] 472 } 473 if title == "" && schema.IsInterestingTitle(photo.Filename) { 474 title = photo.Filename 475 } 476 477 // TODO(tgulacsi): add more attrs (comments ?) 478 // for names, see http://schema.org/ImageObject and http://schema.org/CreativeWork 479 attrs := []string{ 480 nodeattr.CamliContent, fileRefStr, 481 attrPicasaId, photo.ID, 482 nodeattr.Title, title, 483 nodeattr.Description, photo.Description, 484 nodeattr.LocationText, photo.Location, 485 nodeattr.DateModified, schema.RFC3339FromTime(photo.Updated), 486 nodeattr.DatePublished, schema.RFC3339FromTime(photo.Published), 487 nodeattr.URL, photo.PageURL, 488 } 489 if photo.Latitude != 0 || photo.Longitude != 0 { 490 attrs = append(attrs, 491 nodeattr.Latitude, fmt.Sprintf("%f", photo.Latitude), 492 nodeattr.Longitude, fmt.Sprintf("%f", photo.Longitude), 493 ) 494 } 495 if err := photoNode.SetAttrs(attrs...); err != nil { 496 return err 497 } 498 if err := photoNode.SetAttrValues("tag", photo.Keywords); err != nil { 499 return err 500 } 501 if photo.Position > 0 { 502 if err := albumNode.SetAttr( 503 nodeattr.CamliPathOrderColon+strconv.Itoa(photo.Position-1), 504 photoNode.PermanodeRef().String()); err != nil { 505 return err 506 } 507 } 508 509 // Do this last, after we're sure the "camliContent" attribute 510 // has been saved successfully, because this is the one that 511 // causes us to do it again in the future or not. 512 if err := photoNode.SetAttrs(AttrMediaURL, photo.URL); err != nil { 513 return err 514 } 515 return nil 516} 517 518var testTopLevelNode *importer.Object 519 520func (r *run) getTopLevelNode(path string, title string) (*importer.Object, error) { 521 if testTopLevelNode != nil { 522 return testTopLevelNode, nil 523 } 524 childObject, err := r.RootNode().ChildPathObject(path) 525 if err != nil { 526 return nil, err 527 } 528 529 if err := childObject.SetAttr(nodeattr.Title, title); err != nil { 530 return nil, err 531 } 532 return childObject, nil 533} 534 535var sensitiveAttrs = []string{ 536 nodeattr.Type, 537 attrPicasaId, 538 nodeattr.Title, 539 nodeattr.DateModified, 540 nodeattr.DatePublished, 541 nodeattr.Latitude, 542 nodeattr.Longitude, 543 nodeattr.Description, 544} 545 546// findExistingPermanode finds an existing permanode that has a 547// camliContent pointing to a file with the provided wholeRef and 548// doesn't have any conflicting attributes that would prevent the 549// picasa importer from re-using that permanode for its own use. 550func findExistingPermanode(ctx context.Context, qs search.QueryDescriber, wholeRef blob.Ref) (pn blob.Ref, err error) { 551 res, err := qs.Query(ctx, &search.SearchQuery{ 552 Constraint: &search.Constraint{ 553 Permanode: &search.PermanodeConstraint{ 554 Attr: "camliContent", 555 ValueInSet: &search.Constraint{ 556 File: &search.FileConstraint{ 557 WholeRef: wholeRef, 558 }, 559 }, 560 }, 561 }, 562 Describe: &search.DescribeRequest{ 563 Depth: 1, 564 }, 565 }) 566 if err != nil { 567 return 568 } 569 if res.Describe == nil { 570 return pn, os.ErrNotExist 571 } 572Res: 573 for _, resBlob := range res.Blobs { 574 br := resBlob.Blob 575 desBlob, ok := res.Describe.Meta[br.String()] 576 if !ok || desBlob.Permanode == nil { 577 continue 578 } 579 attrs := desBlob.Permanode.Attr 580 for _, attr := range sensitiveAttrs { 581 if attrs.Get(attr) != "" { 582 continue Res 583 } 584 } 585 return br, nil 586 } 587 return pn, os.ErrNotExist 588} 589 590func mediaURLsEqual(a, b string) bool { 591 const sub = ".googleusercontent.com/" 592 ai := strings.Index(a, sub) 593 bi := strings.Index(b, sub) 594 if ai >= 0 && bi >= 0 { 595 return a[ai:] == b[bi:] 596 } 597 return a == b 598} 599