1/*
2Copyright 2014 The Perkeep Authors
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8     http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17// Package picasa implements an importer for picasa.com accounts.
18package picasa // import "perkeep.org/pkg/importer/picasa"
19
20// TODO: removing camliPath from gallery permanode when pic deleted from gallery
21
22import (
23	"context"
24	"errors"
25	"fmt"
26	"io"
27	"log"
28	"net/http"
29	"net/url"
30	"os"
31	"strconv"
32	"strings"
33	"time"
34
35	"github.com/tgulacsi/picago"
36	"go4.org/ctxutil"
37	"go4.org/syncutil"
38	"golang.org/x/oauth2"
39	"golang.org/x/oauth2/google"
40	"perkeep.org/internal/httputil"
41	"perkeep.org/pkg/blob"
42	"perkeep.org/pkg/importer"
43	"perkeep.org/pkg/schema"
44	"perkeep.org/pkg/schema/nodeattr"
45	"perkeep.org/pkg/search"
46)
47
48const (
49	scopeURL = "https://picasaweb.google.com/data/"
50
51	// runCompleteVersion is a cache-busting version number of the
52	// importer code. It should be incremented whenever the
53	// behavior of this importer is updated enough to warrant a
54	// complete run.  Otherwise, if the importer runs to
55	// completion, this version number is recorded on the account
56	// permanode and subsequent importers can stop early.
57	runCompleteVersion = "4"
58
59	// attrPicasaId is used for both picasa photo IDs and gallery IDs.
60	attrPicasaId = "picasaId"
61
62	// acctAttrOAuthToken stores access + " " + refresh + " " + expiry
63	// See encodeToken and decodeToken.
64	acctAttrOAuthToken = "oauthToken"
65
66	// AttrMediaURL is an attribute set on each picasa photo permanode. It
67	// is the public URL for fetching the contents of the photo file.
68	AttrMediaURL = "picasaMediaURL"
69)
70
71var (
72	_ importer.Importer            = imp{}
73	_ importer.ImporterSetupHTMLer = imp{}
74)
75
76func init() {
77	importer.Register("picasa", imp{})
78}
79
80// imp is the implementation of the Picasa importer.
81type imp struct {
82	importer.OAuth2
83}
84
85func (imp) Properties() importer.Properties {
86	return importer.Properties{
87		Title:               "Google Photos (via Picasa API)",
88		Description:         "import your photos from Google Photos. (limited to 10,000 photos per Google Photos API bug for now)",
89		SupportsIncremental: true,
90		NeedsAPIKey:         true,
91	}
92}
93
94type userInfo struct {
95	ID   string // numeric picasa user ID ("11583474931002155675")
96	Name string // "Jane Smith"
97}
98
99func (imp) getUserInfo(ctx context.Context) (*userInfo, error) {
100	u, err := picago.GetUser(ctxutil.Client(ctx), "default")
101	if err != nil {
102		return nil, err
103	}
104	return &userInfo{ID: u.ID, Name: u.Name}, nil
105}
106
107func (imp) IsAccountReady(acctNode *importer.Object) (ok bool, err error) {
108	if acctNode.Attr(importer.AcctAttrUserID) != "" && acctNode.Attr(acctAttrOAuthToken) != "" {
109		return true, nil
110	}
111	return false, nil
112}
113
114func (im imp) SummarizeAccount(acct *importer.Object) string {
115	ok, err := im.IsAccountReady(acct)
116	if err != nil || !ok {
117		return ""
118	}
119	if acct.Attr(importer.AcctAttrGivenName) == "" && acct.Attr(importer.AcctAttrFamilyName) == "" {
120		return fmt.Sprintf("userid %s", acct.Attr(importer.AcctAttrUserID))
121	}
122	return fmt.Sprintf("userid %s (%s %s)",
123		acct.Attr(importer.AcctAttrUserID),
124		acct.Attr(importer.AcctAttrGivenName),
125		acct.Attr(importer.AcctAttrFamilyName))
126}
127
128func (im imp) ServeSetup(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) error {
129	oauthConfig, err := im.auth(ctx)
130	if err == nil {
131		// we will get back this with the token, so use it for preserving account info
132		state := "acct:" + ctx.AccountNode.PermanodeRef().String()
133		// AccessType needs to be "offline", as the user is not here all the time;
134		// ApprovalPrompt needs to be "force" to be able to get a RefreshToken
135		// everytime, even for Re-logins, too.
136		//
137		// Source: https://developers.google.com/youtube/v3/guides/authentication#server-side-apps
138		http.Redirect(w, r, oauthConfig.AuthCodeURL(state, oauth2.AccessTypeOffline, oauth2.ApprovalForce), 302)
139	}
140	return err
141}
142
143// CallbackURLParameters returns the needed callback parameters - empty for Google Picasa.
144func (im imp) CallbackURLParameters(acctRef blob.Ref) url.Values {
145	return url.Values{}
146}
147
148func (im imp) ServeCallback(w http.ResponseWriter, r *http.Request, ctx *importer.SetupContext) {
149	oauthConfig, err := im.auth(ctx)
150	if err != nil {
151		httputil.ServeError(w, r, fmt.Errorf("Error getting oauth config: %v", err))
152		return
153	}
154
155	if r.Method != "GET" {
156		http.Error(w, "Expected a GET", 400)
157		return
158	}
159	code := r.FormValue("code")
160	if code == "" {
161		http.Error(w, "Expected a code", 400)
162		return
163	}
164
165	token, err := oauthConfig.Exchange(ctx, code)
166	if err != nil {
167		log.Printf("importer/picasa: token exchange error: %v", err)
168		httputil.ServeError(w, r, fmt.Errorf("token exchange error: %v", err))
169		return
170	}
171
172	log.Printf("importer/picasa: got exhanged token.")
173	picagoCtx := context.WithValue(ctx, ctxutil.HTTPClient, oauthConfig.Client(ctx, token))
174
175	userInfo, err := im.getUserInfo(picagoCtx)
176	if err != nil {
177		log.Printf("Couldn't get username: %v", err)
178		httputil.ServeError(w, r, fmt.Errorf("can't get username: %v", err))
179		return
180	}
181
182	if err := ctx.AccountNode.SetAttrs(
183		importer.AcctAttrUserID, userInfo.ID,
184		importer.AcctAttrName, userInfo.Name,
185		acctAttrOAuthToken, encodeToken(token),
186	); err != nil {
187		httputil.ServeError(w, r, fmt.Errorf("Error setting attribute: %v", err))
188		return
189	}
190	http.Redirect(w, r, ctx.AccountURL(), http.StatusFound)
191}
192
193// encodeToken encodes the oauth2.Token as
194// AccessToken + " " + RefreshToken + " " + Expiry.Unix()
195func encodeToken(token *oauth2.Token) string {
196	if token == nil {
197		return ""
198	}
199	var seconds int64
200	if !token.Expiry.IsZero() {
201		seconds = token.Expiry.Unix()
202	}
203	return token.AccessToken + " " + token.RefreshToken + " " + strconv.FormatInt(seconds, 10)
204}
205
206// decodeToken parses an access token, refresh token, and optional
207// expiry unix timestamp separated by spaces into an oauth2.Token.
208// It returns as much as it can.
209func decodeToken(encoded string) *oauth2.Token {
210	t := new(oauth2.Token)
211	f := strings.Fields(encoded)
212	if len(f) > 0 {
213		t.AccessToken = f[0]
214	}
215	if len(f) > 1 {
216		t.RefreshToken = f[1]
217	}
218	if len(f) > 2 && f[2] != "0" {
219		sec, err := strconv.ParseInt(f[2], 10, 64)
220		if err == nil {
221			t.Expiry = time.Unix(sec, 0)
222		}
223	}
224	return t
225}
226
227func (im imp) auth(ctx *importer.SetupContext) (*oauth2.Config, error) {
228	clientID, secret, err := ctx.Credentials()
229	if err != nil {
230		return nil, err
231	}
232	conf := &oauth2.Config{
233		Endpoint:     google.Endpoint,
234		RedirectURL:  ctx.CallbackURL(),
235		ClientID:     clientID,
236		ClientSecret: secret,
237		Scopes:       []string{scopeURL},
238	}
239	return conf, nil
240}
241
242func (imp) AccountSetupHTML(host *importer.Host) string {
243	// Picasa doesn't allow a path in the origin. Remove it.
244	origin := host.ImporterBaseURL()
245	if u, err := url.Parse(origin); err == nil {
246		u.Path = ""
247		origin = u.String()
248	}
249
250	callback := host.ImporterBaseURL() + "picasa/callback"
251	gphotosURL := host.ImporterBaseURL() + "gphotos"
252	return fmt.Sprintf(`
253<h1>Configuring Picasa</h1>
254<p>Please note that because of a bug in the Picasa API, you cannot retrieve more than 10000 photos. If you have more than 10000 photos, you should use the <a href='%s'>Google Photos importer</a> instead.</p>
255<p>Visit <a href='https://console.developers.google.com/'>https://console.developers.google.com/</a>
256and click <b>"Create Project"</b>.</p>
257<p>Then under "APIs & Auth" in the left sidebar, click on "Credentials", then click the button <b>"Create new Client ID"</b>.</p>
258<p>Use the following settings:</p>
259<ul>
260  <li>Web application</li>
261  <li>Authorized JavaScript origins: <b>%s</b></li>
262  <li>Authorized Redirect URI: <b>%s</b></li>
263</ul>
264<p>Click "Create Client ID".  Copy the "Client ID" and "Client Secret" into the boxes above.</p>
265`, gphotosURL, origin, callback)
266}
267
268// A run is our state for a given run of the importer.
269type run struct {
270	*importer.RunContext
271	incremental bool // whether we've completed a run in the past
272	photoGate   *syncutil.Gate
273}
274
275var forceFullImport, _ = strconv.ParseBool(os.Getenv("CAMLI_PICASA_FULL_IMPORT"))
276
277func (imp) Run(rctx *importer.RunContext) error {
278	clientID, secret, err := rctx.Credentials()
279	if err != nil {
280		return err
281	}
282	acctNode := rctx.AccountNode()
283
284	ocfg := &oauth2.Config{
285		Endpoint:     google.Endpoint,
286		ClientID:     clientID,
287		ClientSecret: secret,
288		Scopes:       []string{scopeURL},
289	}
290
291	token := decodeToken(acctNode.Attr(acctAttrOAuthToken))
292	baseCtx := rctx.Context()
293	ctx := context.WithValue(baseCtx, ctxutil.HTTPClient, ocfg.Client(baseCtx, token))
294
295	root := rctx.RootNode()
296	if root.Attr(nodeattr.Title) == "" {
297		if err := root.SetAttr(
298			nodeattr.Title,
299			fmt.Sprintf("%s - Google Photos", acctNode.Attr(importer.AcctAttrName)),
300		); err != nil {
301			return err
302		}
303	}
304
305	r := &run{
306		RunContext:  rctx,
307		incremental: !forceFullImport && acctNode.Attr(importer.AcctAttrCompletedVersion) == runCompleteVersion,
308		photoGate:   syncutil.NewGate(3),
309	}
310	if err := r.importAlbums(ctx); err != nil {
311		return err
312	}
313
314	if err := acctNode.SetAttrs(importer.AcctAttrCompletedVersion, runCompleteVersion); err != nil {
315		return err
316	}
317
318	return nil
319}
320
321func (r *run) importAlbums(ctx context.Context) error {
322	albums, err := picago.GetAlbums(ctxutil.Client(ctx), "default")
323	if err != nil {
324		return fmt.Errorf("importAlbums: error listing albums: %v", err)
325	}
326	albumsNode, err := r.getTopLevelNode("albums", "Albums")
327	for _, album := range albums {
328		select {
329		case <-ctx.Done():
330			return ctx.Err()
331		default:
332		}
333		if err := r.importAlbum(ctx, albumsNode, album); err != nil {
334			return fmt.Errorf("picasa importer: error importing album %s: %v", album, err)
335		}
336	}
337	return nil
338}
339
340func (r *run) importAlbum(ctx context.Context, albumsNode *importer.Object, album picago.Album) (ret error) {
341	if album.ID == "" {
342		return errors.New("album has no ID")
343	}
344	albumNode, err := albumsNode.ChildPathObject(album.ID)
345	if err != nil {
346		return fmt.Errorf("importAlbum: error listing album: %v", err)
347	}
348
349	dateMod := schema.RFC3339FromTime(album.Updated)
350
351	// Data reference: https://developers.google.com/picasa-web/docs/2.0/reference
352	// TODO(tgulacsi): add more album info
353	changes, err := albumNode.SetAttrs2(
354		attrPicasaId, album.ID,
355		nodeattr.Type, "picasaweb.google.com:album",
356		nodeattr.Title, album.Title,
357		nodeattr.DatePublished, schema.RFC3339FromTime(album.Published),
358		nodeattr.LocationText, album.Location,
359		nodeattr.Description, album.Description,
360		nodeattr.URL, album.URL,
361	)
362	if err != nil {
363		return fmt.Errorf("error setting album attributes: %v", err)
364	}
365	if !changes && r.incremental && albumNode.Attr(nodeattr.DateModified) == dateMod {
366		return nil
367	}
368	defer func() {
369		// Don't update DateModified on the album node until
370		// we've successfully imported all the photos.
371		if ret == nil {
372			ret = albumNode.SetAttr(nodeattr.DateModified, dateMod)
373		}
374	}()
375
376	log.Printf("Importing album %v: %v/%v (published %v, updated %v)", album.ID, album.Name, album.Title, album.Published, album.Updated)
377
378	// TODO(bradfitz): GetPhotos does multiple HTTP requests to
379	// return a slice of all photos. My "InstantUpload/Auto
380	// Backup" album has 6678 photos (and growing) and this
381	// currently takes like 40 seconds. Fix.
382	photos, err := picago.GetPhotos(ctxutil.Client(ctx), "default", album.ID)
383	if err != nil {
384		return err
385	}
386
387	log.Printf("Importing %d photos from album %q (%s)", len(photos), albumNode.Attr(nodeattr.Title),
388		albumNode.PermanodeRef())
389
390	var grp syncutil.Group
391	for i := range photos {
392		select {
393		case <-ctx.Done():
394			return ctx.Err()
395		default:
396		}
397		photo := photos[i]
398		r.photoGate.Start()
399		grp.Go(func() error {
400			defer r.photoGate.Done()
401			return r.updatePhotoInAlbum(ctx, albumNode, photo)
402		})
403	}
404	return grp.Err()
405}
406
407func (r *run) updatePhotoInAlbum(ctx context.Context, albumNode *importer.Object, photo picago.Photo) (ret error) {
408	if photo.ID == "" {
409		return errors.New("photo has no ID")
410	}
411
412	getMediaBytes := func() (io.ReadCloser, error) {
413		log.Printf("Importing media from %v", photo.URL)
414		resp, err := ctxutil.Client(ctx).Get(photo.URL)
415		if err != nil {
416			return nil, fmt.Errorf("importing photo %s: %v", photo.ID, err)
417		}
418		if resp.StatusCode != http.StatusOK {
419			resp.Body.Close()
420			return nil, fmt.Errorf("importing photo %s: status code = %d", photo.ID, resp.StatusCode)
421		}
422		return resp.Body, nil
423	}
424
425	var fileRefStr string
426	idFilename := photo.ID + "-" + photo.Filename
427	photoNode, err := albumNode.ChildPathObjectOrFunc(idFilename, func() (*importer.Object, error) {
428		h := blob.NewHash()
429		rc, err := getMediaBytes()
430		if err != nil {
431			return nil, err
432		}
433		fileRef, err := schema.WriteFileFromReader(r.Context(), r.Host.Target(), photo.Filename, io.TeeReader(rc, h))
434		if err != nil {
435			return nil, err
436		}
437		fileRefStr = fileRef.String()
438		wholeRef := blob.RefFromHash(h)
439		if pn, err := findExistingPermanode(r.Context(), r.Host.Searcher(), wholeRef); err == nil {
440			return r.Host.ObjectFromRef(pn)
441		}
442		return r.Host.NewObject()
443	})
444	if err != nil {
445		return err
446	}
447
448	if fileRefStr == "" {
449		fileRefStr = photoNode.Attr(nodeattr.CamliContent)
450		// Only re-download the source photo if its URL has changed.
451		// Empirically this seems to work: cropping a photo in the
452		// photos.google.com UI causes its URL to change. And it makes
453		// sense, looking at the ugliness of the URLs with all their
454		// encoded/signed state.
455		if !mediaURLsEqual(photoNode.Attr(AttrMediaURL), photo.URL) {
456			rc, err := getMediaBytes()
457			if err != nil {
458				return err
459			}
460			fileRef, err := schema.WriteFileFromReader(r.Context(), r.Host.Target(), photo.Filename, rc)
461			rc.Close()
462			if err != nil {
463				return err
464			}
465			fileRefStr = fileRef.String()
466		}
467	}
468
469	title := strings.TrimSpace(photo.Description)
470	if strings.Contains(title, "\n") {
471		title = title[:strings.Index(title, "\n")]
472	}
473	if title == "" && schema.IsInterestingTitle(photo.Filename) {
474		title = photo.Filename
475	}
476
477	// TODO(tgulacsi): add more attrs (comments ?)
478	// for names, see http://schema.org/ImageObject and http://schema.org/CreativeWork
479	attrs := []string{
480		nodeattr.CamliContent, fileRefStr,
481		attrPicasaId, photo.ID,
482		nodeattr.Title, title,
483		nodeattr.Description, photo.Description,
484		nodeattr.LocationText, photo.Location,
485		nodeattr.DateModified, schema.RFC3339FromTime(photo.Updated),
486		nodeattr.DatePublished, schema.RFC3339FromTime(photo.Published),
487		nodeattr.URL, photo.PageURL,
488	}
489	if photo.Latitude != 0 || photo.Longitude != 0 {
490		attrs = append(attrs,
491			nodeattr.Latitude, fmt.Sprintf("%f", photo.Latitude),
492			nodeattr.Longitude, fmt.Sprintf("%f", photo.Longitude),
493		)
494	}
495	if err := photoNode.SetAttrs(attrs...); err != nil {
496		return err
497	}
498	if err := photoNode.SetAttrValues("tag", photo.Keywords); err != nil {
499		return err
500	}
501	if photo.Position > 0 {
502		if err := albumNode.SetAttr(
503			nodeattr.CamliPathOrderColon+strconv.Itoa(photo.Position-1),
504			photoNode.PermanodeRef().String()); err != nil {
505			return err
506		}
507	}
508
509	// Do this last, after we're sure the "camliContent" attribute
510	// has been saved successfully, because this is the one that
511	// causes us to do it again in the future or not.
512	if err := photoNode.SetAttrs(AttrMediaURL, photo.URL); err != nil {
513		return err
514	}
515	return nil
516}
517
518var testTopLevelNode *importer.Object
519
520func (r *run) getTopLevelNode(path string, title string) (*importer.Object, error) {
521	if testTopLevelNode != nil {
522		return testTopLevelNode, nil
523	}
524	childObject, err := r.RootNode().ChildPathObject(path)
525	if err != nil {
526		return nil, err
527	}
528
529	if err := childObject.SetAttr(nodeattr.Title, title); err != nil {
530		return nil, err
531	}
532	return childObject, nil
533}
534
535var sensitiveAttrs = []string{
536	nodeattr.Type,
537	attrPicasaId,
538	nodeattr.Title,
539	nodeattr.DateModified,
540	nodeattr.DatePublished,
541	nodeattr.Latitude,
542	nodeattr.Longitude,
543	nodeattr.Description,
544}
545
546// findExistingPermanode finds an existing permanode that has a
547// camliContent pointing to a file with the provided wholeRef and
548// doesn't have any conflicting attributes that would prevent the
549// picasa importer from re-using that permanode for its own use.
550func findExistingPermanode(ctx context.Context, qs search.QueryDescriber, wholeRef blob.Ref) (pn blob.Ref, err error) {
551	res, err := qs.Query(ctx, &search.SearchQuery{
552		Constraint: &search.Constraint{
553			Permanode: &search.PermanodeConstraint{
554				Attr: "camliContent",
555				ValueInSet: &search.Constraint{
556					File: &search.FileConstraint{
557						WholeRef: wholeRef,
558					},
559				},
560			},
561		},
562		Describe: &search.DescribeRequest{
563			Depth: 1,
564		},
565	})
566	if err != nil {
567		return
568	}
569	if res.Describe == nil {
570		return pn, os.ErrNotExist
571	}
572Res:
573	for _, resBlob := range res.Blobs {
574		br := resBlob.Blob
575		desBlob, ok := res.Describe.Meta[br.String()]
576		if !ok || desBlob.Permanode == nil {
577			continue
578		}
579		attrs := desBlob.Permanode.Attr
580		for _, attr := range sensitiveAttrs {
581			if attrs.Get(attr) != "" {
582				continue Res
583			}
584		}
585		return br, nil
586	}
587	return pn, os.ErrNotExist
588}
589
590func mediaURLsEqual(a, b string) bool {
591	const sub = ".googleusercontent.com/"
592	ai := strings.Index(a, sub)
593	bi := strings.Index(b, sub)
594	if ai >= 0 && bi >= 0 {
595		return a[ai:] == b[bi:]
596	}
597	return a == b
598}
599