1/* 2Copyright 2013 The Perkeep Authors. 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15*/ 16 17package main 18 19import ( 20 "archive/zip" 21 "context" 22 "crypto/sha1" 23 "fmt" 24 "io" 25 "log" 26 "mime" 27 "net/http" 28 "path" 29 "sort" 30 31 "perkeep.org/internal/httputil" 32 "perkeep.org/pkg/blob" 33 "perkeep.org/pkg/schema" 34 "perkeep.org/pkg/search" 35 "perkeep.org/pkg/types/camtypes" 36) 37 38type zipHandler struct { 39 fetcher blob.Fetcher 40 cl client // Used for search and describe requests. 41 // root is the "parent" permanode of everything to zip. 42 // Either a directory permanode, or a permanode with members. 43 root blob.Ref 44 // Optional name to use in the response header 45 filename string 46} 47 48// blobFile contains all the information we need about 49// a file blob to add the corresponding file to a zip. 50type blobFile struct { 51 blobRef blob.Ref 52 // path is the full path of the file from the root of the zip. 53 // slashes are always forward slashes, per the zip spec. 54 path string 55} 56 57type sortedFiles []*blobFile 58 59func (s sortedFiles) Less(i, j int) bool { return s[i].path < s[j].path } 60func (s sortedFiles) Len() int { return len(s) } 61func (s sortedFiles) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 62 63func (zh *zipHandler) describeMembers(br blob.Ref) (*search.DescribeResponse, error) { 64 res, err := zh.cl.Query(context.TODO(), &search.SearchQuery{ 65 Constraint: &search.Constraint{ 66 BlobRefPrefix: br.String(), 67 CamliType: "permanode", 68 }, 69 Describe: &search.DescribeRequest{ 70 Depth: 1, 71 Rules: []*search.DescribeRule{ 72 { 73 Attrs: []string{"camliContent", "camliContentImage", "camliMember"}, 74 }, 75 }, 76 }, 77 Limit: -1, 78 }) 79 if err != nil { 80 return nil, fmt.Errorf("Could not describe %v: %v", br, err) 81 } 82 if res == nil || res.Describe == nil { 83 return nil, fmt.Errorf("no describe result for %v", br) 84 } 85 return res.Describe, nil 86} 87 88// blobList returns the list of file blobs "under" dirBlob. 89// It traverses permanode directories and permanode with members (collections). 90func (zh *zipHandler) blobList(dirPath string, dirBlob blob.Ref) ([]*blobFile, error) { 91 // dr := zh.search.NewDescribeRequest() 92 // dr.Describe(dirBlob, 3) 93 // res, err := dr.Result() 94 // if err != nil { 95 // return nil, fmt.Errorf("Could not describe %v: %v", dirBlob, err) 96 // } 97 res, err := zh.describeMembers(dirBlob) 98 if err != nil { 99 return nil, err 100 } 101 102 described := res.Meta[dirBlob.String()] 103 members := described.Members() 104 dirBlobPath, _, isDir := described.PermanodeDir() 105 if len(members) == 0 && !isDir { 106 return nil, nil 107 } 108 var list []*blobFile 109 if isDir { 110 dirRoot := dirBlobPath[1] 111 children, err := zh.blobsFromDir("", dirRoot) 112 if err != nil { 113 return nil, fmt.Errorf("Could not get list of blobs from %v: %v", dirRoot, err) 114 } 115 list = append(list, children...) 116 return list, nil 117 } 118 for _, member := range members { 119 if fileBlobPath, fileInfo, ok := getFileInfo(member.BlobRef, res.Meta); ok { 120 // file 121 list = append(list, 122 &blobFile{fileBlobPath[1], path.Join(dirPath, fileInfo.FileName)}) 123 continue 124 } 125 if dirBlobPath, dirInfo, ok := getDirInfo(member.BlobRef, res.Meta); ok { 126 // directory 127 newZipRoot := dirBlobPath[1] 128 children, err := zh.blobsFromDir( 129 path.Join(dirPath, dirInfo.FileName), newZipRoot) 130 if err != nil { 131 return nil, fmt.Errorf("Could not get list of blobs from %v: %v", newZipRoot, err) 132 } 133 list = append(list, children...) 134 // TODO(mpl): we assume a directory permanode does not also have members. 135 // I know there is nothing preventing it, but does it make any sense? 136 continue 137 } 138 // it might have members, so recurse 139 // If it does have members, we must consider it as a pseudo dir, 140 // so we can build a fullpath for each of its members. 141 // As a dir name, we're using its title if it has one, its (shortened) 142 // blobref otherwise. 143 pseudoDirName := member.Title() 144 if pseudoDirName == "" { 145 pseudoDirName = member.BlobRef.DigestPrefix(10) 146 } 147 fullpath := path.Join(dirPath, pseudoDirName) 148 moreMembers, err := zh.blobList(fullpath, member.BlobRef) 149 if err != nil { 150 return nil, fmt.Errorf("Could not get list of blobs from %v: %v", member.BlobRef, err) 151 } 152 list = append(list, moreMembers...) 153 } 154 return list, nil 155} 156 157// blobsFromDir returns the list of file blobs in directory dirBlob. 158// It only traverses permanode directories. 159func (zh *zipHandler) blobsFromDir(dirPath string, dirBlob blob.Ref) ([]*blobFile, error) { 160 var list []*blobFile 161 dr, err := schema.NewDirReader(context.TODO(), zh.fetcher, dirBlob) 162 if err != nil { 163 return nil, fmt.Errorf("Could not read dir blob %v: %v", dirBlob, err) 164 } 165 ent, err := dr.Readdir(context.TODO(), -1) 166 if err != nil { 167 return nil, fmt.Errorf("Could not read dir entries: %v", err) 168 } 169 for _, v := range ent { 170 fullpath := path.Join(dirPath, v.FileName()) 171 switch v.CamliType() { 172 case "file": 173 list = append(list, &blobFile{v.BlobRef(), fullpath}) 174 case "directory": 175 children, err := zh.blobsFromDir(fullpath, v.BlobRef()) 176 if err != nil { 177 return nil, fmt.Errorf("Could not get list of blobs from %v: %v", v.BlobRef(), err) 178 } 179 list = append(list, children...) 180 } 181 } 182 return list, nil 183} 184 185// renameDuplicates goes through bf to check for duplicate filepaths. 186// It renames duplicate filepaths and returns a new slice, sorted by 187// file path. 188func renameDuplicates(bf []*blobFile) sortedFiles { 189 noDup := make(map[string]blob.Ref) 190 // use a map to detect duplicates and rename them 191 for _, file := range bf { 192 if _, ok := noDup[file.path]; ok { 193 // path already exists, so rename 194 suffix := 0 195 var newname string 196 for { 197 suffix++ 198 ext := path.Ext(file.path) 199 newname = fmt.Sprintf("%s(%d)%s", 200 file.path[:len(file.path)-len(ext)], suffix, ext) 201 if _, ok := noDup[newname]; !ok { 202 break 203 } 204 } 205 noDup[newname] = file.blobRef 206 } else { 207 noDup[file.path] = file.blobRef 208 } 209 } 210 211 // reinsert in a slice and sort it 212 var sorted sortedFiles 213 for p, b := range noDup { 214 sorted = append(sorted, &blobFile{path: p, blobRef: b}) 215 } 216 sort.Sort(sorted) 217 return sorted 218} 219 220// ServeHTTP streams a zip archive of all the files "under" 221// zh.root. That is, all the files pointed by file permanodes, 222// which are directly members of zh.root or recursively down 223// directory permanodes and permanodes members. 224// To build the fullpath of a file in a collection, it uses 225// the collection title if present, its blobRef otherwise, as 226// a directory name. 227func (zh *zipHandler) ServeHTTP(rw http.ResponseWriter, req *http.Request) { 228 // TODO: use http.ServeContent, so Range requests work and downloads can be resumed. 229 // Will require calculating the zip length once first (ideally as cheaply as possible, 230 // with dummy counting writer and dummy all-zero-byte-files of a fixed size), 231 // and then making a dummy ReadSeeker for ServeContent that can seek to the end, 232 // and then seek back to the beginning, but then seeks forward make it remember 233 // to skip that many bytes from the archive/zip writer when answering Reads. 234 if !httputil.IsGet(req) { 235 http.Error(rw, "Invalid method", http.StatusMethodNotAllowed) 236 return 237 } 238 bf, err := zh.blobList("", zh.root) 239 if err != nil { 240 log.Printf("Could not serve zip for %v: %v", zh.root, err) 241 http.Error(rw, "Server error", http.StatusInternalServerError) 242 return 243 } 244 blobFiles := renameDuplicates(bf) 245 246 // TODO(mpl): streaming directly won't work on appengine if the size goes 247 // over 32 MB. Deal with that. 248 h := rw.Header() 249 h.Set("Content-Type", "application/zip") 250 filename := zh.filename 251 if filename == "" { 252 filename = "download.zip" 253 } 254 h.Set("Content-Disposition", mime.FormatMediaType("attachment", map[string]string{"filename": filename})) 255 zw := zip.NewWriter(rw) 256 etag := sha1.New() 257 for _, file := range blobFiles { 258 etag.Write([]byte(file.blobRef.String())) 259 } 260 h.Set("Etag", fmt.Sprintf(`"%x"`, etag.Sum(nil))) 261 262 for _, file := range blobFiles { 263 fr, err := schema.NewFileReader(context.TODO(), zh.fetcher, file.blobRef) 264 if err != nil { 265 log.Printf("Can not add %v in zip, not a file: %v", file.blobRef, err) 266 http.Error(rw, "Server error", http.StatusInternalServerError) 267 return 268 } 269 zh := zip.FileHeader{ 270 Name: file.path, 271 Method: zip.Store, 272 } 273 zh.SetModTime(fr.ModTime()) 274 f, err := zw.CreateHeader(&zh) 275 if err != nil { 276 log.Printf("Could not create %q in zip: %v", file.path, err) 277 http.Error(rw, "Server error", http.StatusInternalServerError) 278 return 279 } 280 _, err = io.Copy(f, fr) 281 fr.Close() 282 if err != nil { 283 log.Printf("Could not zip %q: %v", file.path, err) 284 return 285 } 286 } 287 err = zw.Close() 288 if err != nil { 289 log.Printf("Could not close zipwriter: %v", err) 290 return 291 } 292} 293 294// TODO(mpl): refactor with getFileInfo 295func getDirInfo(item blob.Ref, peers map[string]*search.DescribedBlob) (path []blob.Ref, di *camtypes.FileInfo, ok bool) { 296 described := peers[item.String()] 297 if described == nil || 298 described.Permanode == nil || 299 described.Permanode.Attr == nil { 300 return 301 } 302 contentRef := described.Permanode.Attr.Get("camliContent") 303 if contentRef == "" { 304 return 305 } 306 if cdes := peers[contentRef]; cdes != nil && cdes.Dir != nil { 307 return []blob.Ref{described.BlobRef, cdes.BlobRef}, cdes.Dir, true 308 } 309 return 310} 311