1/*
2Copyright 2013 The Perkeep Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8     http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package main
18
19import (
20	"archive/zip"
21	"context"
22	"crypto/sha1"
23	"fmt"
24	"io"
25	"log"
26	"mime"
27	"net/http"
28	"path"
29	"sort"
30
31	"perkeep.org/internal/httputil"
32	"perkeep.org/pkg/blob"
33	"perkeep.org/pkg/schema"
34	"perkeep.org/pkg/search"
35	"perkeep.org/pkg/types/camtypes"
36)
37
38type zipHandler struct {
39	fetcher blob.Fetcher
40	cl      client // Used for search and describe requests.
41	// root is the "parent" permanode of everything to zip.
42	// Either a directory permanode, or a permanode with members.
43	root blob.Ref
44	// Optional name to use in the response header
45	filename string
46}
47
48// blobFile contains all the information we need about
49// a file blob to add the corresponding file to a zip.
50type blobFile struct {
51	blobRef blob.Ref
52	// path is the full path of the file from the root of the zip.
53	// slashes are always forward slashes, per the zip spec.
54	path string
55}
56
57type sortedFiles []*blobFile
58
59func (s sortedFiles) Less(i, j int) bool { return s[i].path < s[j].path }
60func (s sortedFiles) Len() int           { return len(s) }
61func (s sortedFiles) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
62
63func (zh *zipHandler) describeMembers(br blob.Ref) (*search.DescribeResponse, error) {
64	res, err := zh.cl.Query(context.TODO(), &search.SearchQuery{
65		Constraint: &search.Constraint{
66			BlobRefPrefix: br.String(),
67			CamliType:     "permanode",
68		},
69		Describe: &search.DescribeRequest{
70			Depth: 1,
71			Rules: []*search.DescribeRule{
72				{
73					Attrs: []string{"camliContent", "camliContentImage", "camliMember"},
74				},
75			},
76		},
77		Limit: -1,
78	})
79	if err != nil {
80		return nil, fmt.Errorf("Could not describe %v: %v", br, err)
81	}
82	if res == nil || res.Describe == nil {
83		return nil, fmt.Errorf("no describe result for %v", br)
84	}
85	return res.Describe, nil
86}
87
88// blobList returns the list of file blobs "under" dirBlob.
89// It traverses permanode directories and permanode with members (collections).
90func (zh *zipHandler) blobList(dirPath string, dirBlob blob.Ref) ([]*blobFile, error) {
91	//	dr := zh.search.NewDescribeRequest()
92	//	dr.Describe(dirBlob, 3)
93	//	res, err := dr.Result()
94	//	if err != nil {
95	//		return nil, fmt.Errorf("Could not describe %v: %v", dirBlob, err)
96	//	}
97	res, err := zh.describeMembers(dirBlob)
98	if err != nil {
99		return nil, err
100	}
101
102	described := res.Meta[dirBlob.String()]
103	members := described.Members()
104	dirBlobPath, _, isDir := described.PermanodeDir()
105	if len(members) == 0 && !isDir {
106		return nil, nil
107	}
108	var list []*blobFile
109	if isDir {
110		dirRoot := dirBlobPath[1]
111		children, err := zh.blobsFromDir("", dirRoot)
112		if err != nil {
113			return nil, fmt.Errorf("Could not get list of blobs from %v: %v", dirRoot, err)
114		}
115		list = append(list, children...)
116		return list, nil
117	}
118	for _, member := range members {
119		if fileBlobPath, fileInfo, ok := getFileInfo(member.BlobRef, res.Meta); ok {
120			// file
121			list = append(list,
122				&blobFile{fileBlobPath[1], path.Join(dirPath, fileInfo.FileName)})
123			continue
124		}
125		if dirBlobPath, dirInfo, ok := getDirInfo(member.BlobRef, res.Meta); ok {
126			// directory
127			newZipRoot := dirBlobPath[1]
128			children, err := zh.blobsFromDir(
129				path.Join(dirPath, dirInfo.FileName), newZipRoot)
130			if err != nil {
131				return nil, fmt.Errorf("Could not get list of blobs from %v: %v", newZipRoot, err)
132			}
133			list = append(list, children...)
134			// TODO(mpl): we assume a directory permanode does not also have members.
135			// I know there is nothing preventing it, but does it make any sense?
136			continue
137		}
138		// it might have members, so recurse
139		// If it does have members, we must consider it as a pseudo dir,
140		// so we can build a fullpath for each of its members.
141		// As a dir name, we're using its title if it has one, its (shortened)
142		// blobref otherwise.
143		pseudoDirName := member.Title()
144		if pseudoDirName == "" {
145			pseudoDirName = member.BlobRef.DigestPrefix(10)
146		}
147		fullpath := path.Join(dirPath, pseudoDirName)
148		moreMembers, err := zh.blobList(fullpath, member.BlobRef)
149		if err != nil {
150			return nil, fmt.Errorf("Could not get list of blobs from %v: %v", member.BlobRef, err)
151		}
152		list = append(list, moreMembers...)
153	}
154	return list, nil
155}
156
157// blobsFromDir returns the list of file blobs in directory dirBlob.
158// It only traverses permanode directories.
159func (zh *zipHandler) blobsFromDir(dirPath string, dirBlob blob.Ref) ([]*blobFile, error) {
160	var list []*blobFile
161	dr, err := schema.NewDirReader(context.TODO(), zh.fetcher, dirBlob)
162	if err != nil {
163		return nil, fmt.Errorf("Could not read dir blob %v: %v", dirBlob, err)
164	}
165	ent, err := dr.Readdir(context.TODO(), -1)
166	if err != nil {
167		return nil, fmt.Errorf("Could not read dir entries: %v", err)
168	}
169	for _, v := range ent {
170		fullpath := path.Join(dirPath, v.FileName())
171		switch v.CamliType() {
172		case "file":
173			list = append(list, &blobFile{v.BlobRef(), fullpath})
174		case "directory":
175			children, err := zh.blobsFromDir(fullpath, v.BlobRef())
176			if err != nil {
177				return nil, fmt.Errorf("Could not get list of blobs from %v: %v", v.BlobRef(), err)
178			}
179			list = append(list, children...)
180		}
181	}
182	return list, nil
183}
184
185// renameDuplicates goes through bf to check for duplicate filepaths.
186// It renames duplicate filepaths and returns a new slice, sorted by
187// file path.
188func renameDuplicates(bf []*blobFile) sortedFiles {
189	noDup := make(map[string]blob.Ref)
190	// use a map to detect duplicates and rename them
191	for _, file := range bf {
192		if _, ok := noDup[file.path]; ok {
193			// path already exists, so rename
194			suffix := 0
195			var newname string
196			for {
197				suffix++
198				ext := path.Ext(file.path)
199				newname = fmt.Sprintf("%s(%d)%s",
200					file.path[:len(file.path)-len(ext)], suffix, ext)
201				if _, ok := noDup[newname]; !ok {
202					break
203				}
204			}
205			noDup[newname] = file.blobRef
206		} else {
207			noDup[file.path] = file.blobRef
208		}
209	}
210
211	// reinsert in a slice and sort it
212	var sorted sortedFiles
213	for p, b := range noDup {
214		sorted = append(sorted, &blobFile{path: p, blobRef: b})
215	}
216	sort.Sort(sorted)
217	return sorted
218}
219
220// ServeHTTP streams a zip archive of all the files "under"
221// zh.root. That is, all the files pointed by file permanodes,
222// which are directly members of zh.root or recursively down
223// directory permanodes and permanodes members.
224// To build the fullpath of a file in a collection, it uses
225// the collection title if present, its blobRef otherwise, as
226// a directory name.
227func (zh *zipHandler) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
228	// TODO: use http.ServeContent, so Range requests work and downloads can be resumed.
229	// Will require calculating the zip length once first (ideally as cheaply as possible,
230	// with dummy counting writer and dummy all-zero-byte-files of a fixed size),
231	// and then making a dummy ReadSeeker for ServeContent that can seek to the end,
232	// and then seek back to the beginning, but then seeks forward make it remember
233	// to skip that many bytes from the archive/zip writer when answering Reads.
234	if !httputil.IsGet(req) {
235		http.Error(rw, "Invalid method", http.StatusMethodNotAllowed)
236		return
237	}
238	bf, err := zh.blobList("", zh.root)
239	if err != nil {
240		log.Printf("Could not serve zip for %v: %v", zh.root, err)
241		http.Error(rw, "Server error", http.StatusInternalServerError)
242		return
243	}
244	blobFiles := renameDuplicates(bf)
245
246	// TODO(mpl): streaming directly won't work on appengine if the size goes
247	// over 32 MB. Deal with that.
248	h := rw.Header()
249	h.Set("Content-Type", "application/zip")
250	filename := zh.filename
251	if filename == "" {
252		filename = "download.zip"
253	}
254	h.Set("Content-Disposition", mime.FormatMediaType("attachment", map[string]string{"filename": filename}))
255	zw := zip.NewWriter(rw)
256	etag := sha1.New()
257	for _, file := range blobFiles {
258		etag.Write([]byte(file.blobRef.String()))
259	}
260	h.Set("Etag", fmt.Sprintf(`"%x"`, etag.Sum(nil)))
261
262	for _, file := range blobFiles {
263		fr, err := schema.NewFileReader(context.TODO(), zh.fetcher, file.blobRef)
264		if err != nil {
265			log.Printf("Can not add %v in zip, not a file: %v", file.blobRef, err)
266			http.Error(rw, "Server error", http.StatusInternalServerError)
267			return
268		}
269		zh := zip.FileHeader{
270			Name:   file.path,
271			Method: zip.Store,
272		}
273		zh.SetModTime(fr.ModTime())
274		f, err := zw.CreateHeader(&zh)
275		if err != nil {
276			log.Printf("Could not create %q in zip: %v", file.path, err)
277			http.Error(rw, "Server error", http.StatusInternalServerError)
278			return
279		}
280		_, err = io.Copy(f, fr)
281		fr.Close()
282		if err != nil {
283			log.Printf("Could not zip %q: %v", file.path, err)
284			return
285		}
286	}
287	err = zw.Close()
288	if err != nil {
289		log.Printf("Could not close zipwriter: %v", err)
290		return
291	}
292}
293
294// TODO(mpl): refactor with getFileInfo
295func getDirInfo(item blob.Ref, peers map[string]*search.DescribedBlob) (path []blob.Ref, di *camtypes.FileInfo, ok bool) {
296	described := peers[item.String()]
297	if described == nil ||
298		described.Permanode == nil ||
299		described.Permanode.Attr == nil {
300		return
301	}
302	contentRef := described.Permanode.Attr.Get("camliContent")
303	if contentRef == "" {
304		return
305	}
306	if cdes := peers[contentRef]; cdes != nil && cdes.Dir != nil {
307		return []blob.Ref{described.BlobRef, cdes.BlobRef}, cdes.Dir, true
308	}
309	return
310}
311