1/* 2Copyright 2011 The Perkeep Authors 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15*/ 16 17// Package schema manipulates Camlistore schema blobs. 18// 19// A schema blob is a JSON-encoded blob that describes other blobs. 20// See documentation in Perkeep's doc/schema/ directory. 21package schema // import "perkeep.org/pkg/schema" 22 23import ( 24 "bytes" 25 "context" 26 "crypto/rand" 27 "encoding/base64" 28 "encoding/json" 29 "errors" 30 "fmt" 31 "hash" 32 "io" 33 "log" 34 "os" 35 "regexp" 36 "strconv" 37 "strings" 38 "sync" 39 "time" 40 "unicode/utf8" 41 42 "github.com/bradfitz/latlong" 43 "perkeep.org/pkg/blob" 44 45 "github.com/rwcarlsen/goexif/exif" 46 "github.com/rwcarlsen/goexif/tiff" 47 "go4.org/strutil" 48 "go4.org/types" 49) 50 51func init() { 52 // Intern common strings as used by schema blobs (camliType values), to reduce 53 // index memory usage, which uses strutil.StringFromBytes. 54 strutil.RegisterCommonString( 55 "bytes", 56 "claim", 57 "directory", 58 "file", 59 "permanode", 60 "share", 61 "static-set", 62 "symlink", 63 ) 64} 65 66// MaxSchemaBlobSize represents the upper bound for how large 67// a schema blob may be. 68const MaxSchemaBlobSize = 1 << 20 69 70var ( 71 ErrNoCamliVersion = errors.New("schema: no camliVersion key in map") 72) 73 74var clockNow = time.Now 75 76type StatHasher interface { 77 Lstat(fileName string) (os.FileInfo, error) 78 Hash(fileName string) (blob.Ref, error) 79} 80 81// File is the interface returned when opening a DirectoryEntry that 82// is a regular file. 83type File interface { 84 io.Closer 85 io.ReaderAt 86 io.Reader 87 Size() int64 88} 89 90// Directory is a read-only interface to a "directory" schema blob. 91type Directory interface { 92 // Readdir reads the contents of the directory associated with dr 93 // and returns an array of up to n DirectoryEntries structures. 94 // Subsequent calls on the same file will yield further 95 // DirectoryEntries. 96 // If n > 0, Readdir returns at most n DirectoryEntry structures. In 97 // this case, if Readdir returns an empty slice, it will return 98 // a non-nil error explaining why. At the end of a directory, 99 // the error is os.EOF. 100 // If n <= 0, Readdir returns all the DirectoryEntries from the 101 // directory in a single slice. In this case, if Readdir succeeds 102 // (reads all the way to the end of the directory), it returns the 103 // slice and a nil os.Error. If it encounters an error before the 104 // end of the directory, Readdir returns the DirectoryEntry read 105 // until that point and a non-nil error. 106 Readdir(ctx context.Context, n int) ([]DirectoryEntry, error) 107} 108 109type Symlink interface { 110 // .. TODO 111} 112 113// FIFO is the read-only interface to a "fifo" schema blob. 114type FIFO interface { 115 // .. TODO 116} 117 118// Socket is the read-only interface to a "socket" schema blob. 119type Socket interface { 120 // .. TODO 121} 122 123// DirectoryEntry is a read-only interface to an entry in a (static) 124// directory. 125type DirectoryEntry interface { 126 // CamliType returns the schema blob's "camliType" field. 127 // This may be "file", "directory", "symlink", or other more 128 // obscure types added in the future. 129 CamliType() string 130 131 FileName() string 132 BlobRef() blob.Ref 133 134 File(ctx context.Context) (File, error) // if camliType is "file" 135 Directory(ctx context.Context) (Directory, error) // if camliType is "directory" 136 Symlink() (Symlink, error) // if camliType is "symlink" 137 FIFO() (FIFO, error) // if camliType is "fifo" 138 Socket() (Socket, error) // If camliType is "socket" 139} 140 141// dirEntry is the default implementation of DirectoryEntry 142type dirEntry struct { 143 ss superset 144 fetcher blob.Fetcher 145 fr *FileReader // or nil if not a file 146 dr *DirReader // or nil if not a directory 147} 148 149// A SearchQuery must be of type *search.SearchQuery. 150// This type breaks an otherwise-circular dependency. 151type SearchQuery interface{} 152 153func (de *dirEntry) CamliType() string { 154 return de.ss.Type 155} 156 157func (de *dirEntry) FileName() string { 158 return de.ss.FileNameString() 159} 160 161func (de *dirEntry) BlobRef() blob.Ref { 162 return de.ss.BlobRef 163} 164 165func (de *dirEntry) File(ctx context.Context) (File, error) { 166 if de.fr == nil { 167 if de.ss.Type != "file" { 168 return nil, fmt.Errorf("DirectoryEntry is camliType %q, not %q", de.ss.Type, "file") 169 } 170 fr, err := NewFileReader(ctx, de.fetcher, de.ss.BlobRef) 171 if err != nil { 172 return nil, err 173 } 174 de.fr = fr 175 } 176 return de.fr, nil 177} 178 179func (de *dirEntry) Directory(ctx context.Context) (Directory, error) { 180 if de.dr == nil { 181 if de.ss.Type != "directory" { 182 return nil, fmt.Errorf("DirectoryEntry is camliType %q, not %q", de.ss.Type, "directory") 183 } 184 dr, err := NewDirReader(ctx, de.fetcher, de.ss.BlobRef) 185 if err != nil { 186 return nil, err 187 } 188 de.dr = dr 189 } 190 return de.dr, nil 191} 192 193func (de *dirEntry) Symlink() (Symlink, error) { 194 return 0, errors.New("TODO: Symlink not implemented") 195} 196 197func (de *dirEntry) FIFO() (FIFO, error) { 198 return 0, errors.New("TODO: FIFO not implemented") 199} 200 201func (de *dirEntry) Socket() (Socket, error) { 202 return 0, errors.New("TODO: Socket not implemented") 203} 204 205// newDirectoryEntry takes a superset and returns a DirectoryEntry if 206// the Supserset is valid and represents an entry in a directory. It 207// must by of type "file", "directory", "symlink" or "socket". 208// TODO: "char", block", probably. later. 209func newDirectoryEntry(fetcher blob.Fetcher, ss *superset) (DirectoryEntry, error) { 210 if ss == nil { 211 return nil, errors.New("ss was nil") 212 } 213 if !ss.BlobRef.Valid() { 214 return nil, errors.New("ss.BlobRef was invalid") 215 } 216 switch ss.Type { 217 case "file", "directory", "symlink", "fifo", "socket": 218 // Okay 219 default: 220 return nil, fmt.Errorf("invalid DirectoryEntry camliType of %q", ss.Type) 221 } 222 de := &dirEntry{ss: *ss, fetcher: fetcher} // defensive copy 223 return de, nil 224} 225 226// NewDirectoryEntryFromBlobRef takes a BlobRef and returns a 227// DirectoryEntry if the BlobRef contains a type "file", "directory", 228// "symlink", "fifo" or "socket". 229// TODO: ""char", "block", probably. later. 230func NewDirectoryEntryFromBlobRef(ctx context.Context, fetcher blob.Fetcher, blobRef blob.Ref) (DirectoryEntry, error) { 231 ss := new(superset) 232 err := ss.setFromBlobRef(ctx, fetcher, blobRef) 233 if err != nil { 234 return nil, fmt.Errorf("schema/filereader: can't fill superset: %v", err) 235 } 236 return newDirectoryEntry(fetcher, ss) 237} 238 239// superset represents the superset of common Perkeep JSON schema 240// keys as a convenient json.Unmarshal target. 241// TODO(bradfitz): unexport this type. Getting too gross. Move to schema.Blob 242type superset struct { 243 // BlobRef isn't for a particular metadata blob field, but included 244 // for convenience. 245 BlobRef blob.Ref 246 247 Version int `json:"camliVersion"` 248 Type string `json:"camliType"` 249 250 Signer blob.Ref `json:"camliSigner"` 251 Sig string `json:"camliSig"` 252 253 ClaimType string `json:"claimType"` 254 ClaimDate types.Time3339 `json:"claimDate"` 255 256 Permanode blob.Ref `json:"permaNode"` 257 Attribute string `json:"attribute"` 258 Value string `json:"value"` 259 260 // FileName and FileNameBytes represent one of the two 261 // representations of file names in schema blobs. They should 262 // not be accessed directly. Use the FileNameString accessor 263 // instead, which also sanitizes malicious values. 264 FileName string `json:"fileName"` 265 FileNameBytes []interface{} `json:"fileNameBytes"` 266 267 SymlinkTarget string `json:"symlinkTarget"` 268 SymlinkTargetBytes []interface{} `json:"symlinkTargetBytes"` 269 270 UnixPermission string `json:"unixPermission"` 271 UnixOwnerId int `json:"unixOwnerId"` 272 UnixOwner string `json:"unixOwner"` 273 UnixGroupId int `json:"unixGroupId"` 274 UnixGroup string `json:"unixGroup"` 275 UnixMtime string `json:"unixMtime"` 276 UnixCtime string `json:"unixCtime"` 277 UnixAtime string `json:"unixAtime"` 278 279 // Parts are references to the data chunks of a regular file (or a "bytes" schema blob). 280 // See doc/schema/bytes.txt and doc/schema/files/file.txt. 281 Parts []*BytesPart `json:"parts"` 282 283 Entries blob.Ref `json:"entries"` // for directories, a blobref to a static-set 284 Members []blob.Ref `json:"members"` // for static sets (for directory static-sets: blobrefs to child dirs/files) 285 MergeSets []blob.Ref `json:"mergeSets"` // each is a "sub static-set", that has either Members or MergeSets. For large dirs. 286 287 // Search allows a "share" blob to share an entire search. Contrast with "target". 288 Search SearchQuery `json:"search"` 289 // Target is a "share" blob's target (the thing being shared) 290 // Or it is the object being deleted in a DeleteClaim claim. 291 Target blob.Ref `json:"target"` 292 // Transitive is a property of a "share" blob. 293 Transitive bool `json:"transitive"` 294 // AuthType is a "share" blob's authentication type that is required. 295 // Currently (2013-01-02) just "haveref" (if you know the share's blobref, 296 // you get access: the secret URL model) 297 AuthType string `json:"authType"` 298 Expires types.Time3339 `json:"expires"` // or zero for no expiration 299} 300 301func parseSuperset(r io.Reader) (*superset, error) { 302 var ss superset 303 if err := json.NewDecoder(io.LimitReader(r, MaxSchemaBlobSize)).Decode(&ss); err != nil { 304 return nil, err 305 } 306 return &ss, nil 307} 308 309// BlobFromReader returns a new Blob from the provided Reader r, 310// which should be the body of the provided blobref. 311// Note: the hash checksum is not verified. 312func BlobFromReader(ref blob.Ref, r io.Reader) (*Blob, error) { 313 if !ref.Valid() { 314 return nil, errors.New("schema.BlobFromReader: invalid blobref") 315 } 316 var buf bytes.Buffer 317 tee := io.TeeReader(r, &buf) 318 ss, err := parseSuperset(tee) 319 if err != nil { 320 return nil, err 321 } 322 var wb [16]byte 323 afterObj := 0 324 for { 325 n, err := tee.Read(wb[:]) 326 afterObj += n 327 for i := 0; i < n; i++ { 328 if !isASCIIWhite(wb[i]) { 329 return nil, fmt.Errorf("invalid bytes after JSON schema blob in %v", ref) 330 } 331 } 332 if afterObj > MaxSchemaBlobSize { 333 break 334 } 335 if err == io.EOF { 336 break 337 } 338 if err != nil { 339 return nil, err 340 } 341 } 342 json := buf.String() 343 if len(json) > MaxSchemaBlobSize { 344 return nil, fmt.Errorf("schema: metadata blob %v is over expected limit; size=%d", ref, len(json)) 345 } 346 return &Blob{ref, json, ss}, nil 347} 348 349func isASCIIWhite(b byte) bool { 350 switch b { 351 case ' ', '\t', '\r', '\n': 352 return true 353 } 354 return false 355} 356 357// BytesPart is the type representing one of the "parts" in a "file" 358// or "bytes" JSON schema. 359// 360// See doc/schema/bytes.txt and doc/schema/files/file.txt. 361type BytesPart struct { 362 // Size is the number of bytes that this part contributes to the overall segment. 363 Size uint64 `json:"size"` 364 365 // At most one of BlobRef or BytesRef must be non-zero 366 // (Valid), but it's illegal for both. 367 // If neither are set, this BytesPart represents Size zero bytes. 368 // BlobRef refers to raw bytes. BytesRef references a "bytes" schema blob. 369 BlobRef blob.Ref `json:"blobRef,omitempty"` 370 BytesRef blob.Ref `json:"bytesRef,omitempty"` 371 372 // Offset optionally specifies the offset into BlobRef to skip 373 // when reading Size bytes. 374 Offset uint64 `json:"offset,omitempty"` 375} 376 377// stringFromMixedArray joins a slice of either strings or float64 378// values (as retrieved from JSON decoding) into a string. These are 379// used for non-UTF8 filenames in "fileNameBytes" fields. The strings 380// are UTF-8 segments and the float64s (actually uint8 values) are 381// byte values. 382func stringFromMixedArray(parts []interface{}) string { 383 var buf bytes.Buffer 384 for _, part := range parts { 385 if s, ok := part.(string); ok { 386 buf.WriteString(s) 387 continue 388 } 389 if num, ok := part.(float64); ok { 390 buf.WriteByte(byte(num)) 391 continue 392 } 393 } 394 return buf.String() 395} 396 397// mixedArrayFromString is the inverse of stringFromMixedArray. It 398// splits a string to a series of either UTF-8 strings and non-UTF-8 399// bytes. 400func mixedArrayFromString(s string) (parts []interface{}) { 401 for len(s) > 0 { 402 if n := utf8StrLen(s); n > 0 { 403 parts = append(parts, s[:n]) 404 s = s[n:] 405 } else { 406 parts = append(parts, s[0]) 407 s = s[1:] 408 } 409 } 410 return parts 411} 412 413// utf8StrLen returns how many prefix bytes of s are valid UTF-8. 414func utf8StrLen(s string) int { 415 for i, r := range s { 416 for r == utf8.RuneError { 417 // The RuneError value can be an error 418 // sentinel value (if it's size 1) or the same 419 // value encoded properly. Decode it to see if 420 // it's the 1 byte sentinel value. 421 _, size := utf8.DecodeRuneInString(s[i:]) 422 if size == 1 { 423 return i 424 } 425 } 426 } 427 return len(s) 428} 429 430func (ss *superset) SumPartsSize() (size uint64) { 431 for _, part := range ss.Parts { 432 size += uint64(part.Size) 433 } 434 return size 435} 436 437func (ss *superset) SymlinkTargetString() string { 438 if ss.SymlinkTarget != "" { 439 return ss.SymlinkTarget 440 } 441 return stringFromMixedArray(ss.SymlinkTargetBytes) 442} 443 444// FileNameString returns the schema blob's base filename. 445// 446// If the fileName field of the blob accidentally or maliciously 447// contains a slash, this function returns an empty string instead. 448func (ss *superset) FileNameString() string { 449 v := ss.FileName 450 if v == "" { 451 v = stringFromMixedArray(ss.FileNameBytes) 452 } 453 if v != "" { 454 if strings.Contains(v, "/") { 455 // Bogus schema blob; ignore. 456 return "" 457 } 458 if strings.Contains(v, "\\") { 459 // Bogus schema blob; ignore. 460 return "" 461 } 462 } 463 return v 464} 465 466func (ss *superset) HasFilename(name string) bool { 467 return ss.FileNameString() == name 468} 469 470func (b *Blob) FileMode() os.FileMode { 471 // TODO: move this to a different type, off *Blob 472 return b.ss.FileMode() 473} 474 475func (ss *superset) FileMode() os.FileMode { 476 var mode os.FileMode 477 hasPerm := ss.UnixPermission != "" 478 if hasPerm { 479 m64, err := strconv.ParseUint(ss.UnixPermission, 8, 64) 480 if err == nil { 481 mode = mode | os.FileMode(m64) 482 } 483 } 484 485 // TODO: add other types (block, char, etc) 486 switch ss.Type { 487 case "directory": 488 mode = mode | os.ModeDir 489 case "file": 490 // No extra bit. 491 case "symlink": 492 mode = mode | os.ModeSymlink 493 case "fifo": 494 mode = mode | os.ModeNamedPipe 495 case "socket": 496 mode = mode | os.ModeSocket 497 } 498 if !hasPerm { 499 switch ss.Type { 500 case "directory": 501 mode |= 0755 502 default: 503 mode |= 0644 504 } 505 } 506 return mode 507} 508 509// MapUid returns the most appropriate mapping from this file's owner 510// to the local machine's owner, trying first a match by name, 511// followed by just mapping the number through directly. 512func (b *Blob) MapUid() int { return b.ss.MapUid() } 513 514// MapGid returns the most appropriate mapping from this file's group 515// to the local machine's group, trying first a match by name, 516// followed by just mapping the number through directly. 517func (b *Blob) MapGid() int { return b.ss.MapGid() } 518 519func (ss *superset) MapUid() int { 520 if ss.UnixOwner != "" { 521 uid, ok := getUidFromName(ss.UnixOwner) 522 if ok { 523 return uid 524 } 525 } 526 return ss.UnixOwnerId // TODO: will be 0 if unset, which isn't ideal 527} 528 529func (ss *superset) MapGid() int { 530 if ss.UnixGroup != "" { 531 gid, ok := getGidFromName(ss.UnixGroup) 532 if ok { 533 return gid 534 } 535 } 536 return ss.UnixGroupId // TODO: will be 0 if unset, which isn't ideal 537} 538 539func (ss *superset) ModTime() time.Time { 540 if ss.UnixMtime == "" { 541 return time.Time{} 542 } 543 t, err := time.Parse(time.RFC3339, ss.UnixMtime) 544 if err != nil { 545 return time.Time{} 546 } 547 return t 548} 549 550var DefaultStatHasher = &defaultStatHasher{} 551 552type defaultStatHasher struct{} 553 554func (d *defaultStatHasher) Lstat(fileName string) (os.FileInfo, error) { 555 return os.Lstat(fileName) 556} 557 558func (d *defaultStatHasher) Hash(fileName string) (blob.Ref, error) { 559 h := blob.NewHash() 560 file, err := os.Open(fileName) 561 if err != nil { 562 return blob.Ref{}, err 563 } 564 defer file.Close() 565 _, err = io.Copy(h, file) 566 if err != nil { 567 return blob.Ref{}, err 568 } 569 return blob.RefFromHash(h), nil 570} 571 572// maximum number of static-set members in a static-set schema. As noted in 573// https://github.com/camlistore/camlistore/issues/924 , 33k members result in a 574// 1.7MB blob, so 10k members seems reasonable to stay under the MaxSchemaBlobSize (1MB) 575// limit. This is not a const, so we can lower it during tests and test the logic 576// without having to create thousands of blobs. 577var maxStaticSetMembers = 10000 578 579// NewStaticSet returns the "static-set" schema for a directory. Its members 580// should be populated with SetStaticSetMembers. 581func NewStaticSet() *Builder { 582 return base(1, "static-set") 583} 584 585// SetStaticSetMembers sets the given members as the static-set members of this 586// builder. If the members are so numerous that they would not fit on a schema 587// blob, they are spread (recursively, if needed) onto sub static-sets. In which 588// case, these subsets are set as "mergeSets" of this builder. All the created 589// subsets are returned, so the caller can upload them along with the top 590// static-set created from this builder. 591// SetStaticSetMembers panics if bb isn't a "static-set" claim type. 592func (bb *Builder) SetStaticSetMembers(members []blob.Ref) []*Blob { 593 if bb.Type() != "static-set" { 594 panic("called SetStaticSetMembers on non static-set") 595 } 596 597 if len(members) <= maxStaticSetMembers { 598 ms := make([]string, len(members)) 599 for i := range members { 600 ms[i] = members[i].String() 601 } 602 bb.m["members"] = ms 603 return nil 604 } 605 606 // too many members to fit in one static-set, so we spread them in 607 // several sub static-sets. 608 subsetsNumber := len(members) / maxStaticSetMembers 609 var perSubset int 610 if subsetsNumber < maxStaticSetMembers { 611 // this means we can fill each subset up to maxStaticSetMembers, 612 // and stash the rest in one last subset. 613 perSubset = maxStaticSetMembers 614 } else { 615 // otherwise we need to divide the members evenly in 616 // (maxStaticSetMembers - 1) subsets, and each of these subsets 617 // will also (recursively) have subsets of its own. There might 618 // also be a rest in one last subset, as above. 619 subsetsNumber = maxStaticSetMembers - 1 620 perSubset = len(members) / subsetsNumber 621 } 622 // only the subsets at this level 623 subsets := make([]*Blob, 0, subsetsNumber) 624 // subsets at this level, plus all the children subsets. 625 allSubsets := make([]*Blob, 0, subsetsNumber) 626 for i := 0; i < subsetsNumber; i++ { 627 ss := NewStaticSet() 628 subss := ss.SetStaticSetMembers(members[i*perSubset : (i+1)*perSubset]) 629 subsets = append(subsets, ss.Blob()) 630 allSubsets = append(allSubsets, ss.Blob()) 631 for _, v := range subss { 632 allSubsets = append(allSubsets, v) 633 } 634 } 635 636 // Deal with the rest (of the euclidian division) 637 if perSubset*subsetsNumber < len(members) { 638 ss := NewStaticSet() 639 ss.SetStaticSetMembers(members[perSubset*subsetsNumber:]) 640 allSubsets = append(allSubsets, ss.Blob()) 641 subsets = append(subsets, ss.Blob()) 642 } 643 644 mss := make([]string, len(subsets)) 645 for i := range subsets { 646 mss[i] = subsets[i].BlobRef().String() 647 } 648 bb.m["mergeSets"] = mss 649 return allSubsets 650} 651 652func base(version int, ctype string) *Builder { 653 return &Builder{map[string]interface{}{ 654 "camliVersion": version, 655 "camliType": ctype, 656 }} 657} 658 659// NewUnsignedPermanode returns a new random permanode, not yet signed. 660func NewUnsignedPermanode() *Builder { 661 bb := base(1, "permanode") 662 chars := make([]byte, 20) 663 _, err := io.ReadFull(rand.Reader, chars) 664 if err != nil { 665 panic("error reading random bytes: " + err.Error()) 666 } 667 bb.m["random"] = base64.StdEncoding.EncodeToString(chars) 668 return bb 669} 670 671// NewPlannedPermanode returns a permanode with a fixed key. Like 672// NewUnsignedPermanode, this builder is also not yet signed. Callers of 673// NewPlannedPermanode must sign the map with a fixed claimDate and 674// GPG date to create consistent JSON encodings of the Map (its 675// blobref), between runs. 676func NewPlannedPermanode(key string) *Builder { 677 bb := base(1, "permanode") 678 bb.m["key"] = key 679 return bb 680} 681 682// NewHashPlannedPermanode returns a planned permanode with the sum 683// of the hash, prefixed with "sha1-", as the key. 684func NewHashPlannedPermanode(h hash.Hash) *Builder { 685 return NewPlannedPermanode(blob.RefFromHash(h).String()) 686} 687 688// JSON returns the map m encoded as JSON in its 689// recommended canonical form. The canonical form is readable with newlines and indentation, 690// and always starts with the header bytes: 691// 692// {"camliVersion": 693// 694func mapJSON(m map[string]interface{}) (string, error) { 695 version, hasVersion := m["camliVersion"] 696 if !hasVersion { 697 return "", ErrNoCamliVersion 698 } 699 delete(m, "camliVersion") 700 jsonBytes, err := json.MarshalIndent(m, "", " ") 701 if err != nil { 702 return "", err 703 } 704 m["camliVersion"] = version 705 var buf bytes.Buffer 706 fmt.Fprintf(&buf, "{\"camliVersion\": %v,\n", version) 707 buf.Write(jsonBytes[2:]) 708 return buf.String(), nil 709} 710 711// NewFileMap returns a new builder of a type "file" schema for the provided fileName. 712// The chunk parts of the file are not populated. 713func NewFileMap(fileName string) *Builder { 714 return newCommonFilenameMap(fileName).SetType("file") 715} 716 717// NewDirMap returns a new builder of a type "directory" schema for the provided fileName. 718func NewDirMap(fileName string) *Builder { 719 return newCommonFilenameMap(fileName).SetType("directory") 720} 721 722func newCommonFilenameMap(fileName string) *Builder { 723 bb := base(1, "" /* no type yet */) 724 if fileName != "" { 725 bb.SetFileName(fileName) 726 } 727 return bb 728} 729 730var populateSchemaStat []func(schemaMap map[string]interface{}, fi os.FileInfo) 731 732func NewCommonFileMap(fileName string, fi os.FileInfo) *Builder { 733 bb := newCommonFilenameMap(fileName) 734 // Common elements (from file-common.txt) 735 if fi.Mode()&os.ModeSymlink == 0 { 736 bb.m["unixPermission"] = fmt.Sprintf("0%o", fi.Mode().Perm()) 737 } 738 739 // OS-specific population; defined in schema_posix.go, etc. (not on App Engine) 740 for _, f := range populateSchemaStat { 741 f(bb.m, fi) 742 } 743 744 if mtime := fi.ModTime(); !mtime.IsZero() { 745 bb.m["unixMtime"] = RFC3339FromTime(mtime) 746 } 747 return bb 748} 749 750// PopulateParts sets the "parts" field of the blob with the provided 751// parts. The sum of the sizes of parts must match the provided size 752// or an error is returned. Also, each BytesPart may only contain either 753// a BytesPart or a BlobRef, but not both. 754func (bb *Builder) PopulateParts(size int64, parts []BytesPart) error { 755 return populateParts(bb.m, size, parts) 756} 757 758func populateParts(m map[string]interface{}, size int64, parts []BytesPart) error { 759 sumSize := int64(0) 760 mparts := make([]map[string]interface{}, len(parts)) 761 for idx, part := range parts { 762 mpart := make(map[string]interface{}) 763 mparts[idx] = mpart 764 switch { 765 case part.BlobRef.Valid() && part.BytesRef.Valid(): 766 return errors.New("schema: part contains both BlobRef and BytesRef") 767 case part.BlobRef.Valid(): 768 mpart["blobRef"] = part.BlobRef.String() 769 case part.BytesRef.Valid(): 770 mpart["bytesRef"] = part.BytesRef.String() 771 default: 772 return errors.New("schema: part must contain either a BlobRef or BytesRef") 773 } 774 mpart["size"] = part.Size 775 sumSize += int64(part.Size) 776 if part.Offset != 0 { 777 mpart["offset"] = part.Offset 778 } 779 } 780 if sumSize != size { 781 return fmt.Errorf("schema: declared size %d doesn't match sum of parts size %d", size, sumSize) 782 } 783 m["parts"] = mparts 784 return nil 785} 786 787func newBytes() *Builder { 788 return base(1, "bytes") 789} 790 791// ClaimType is one of the valid "claimType" fields in a "claim" schema blob. See doc/schema/claims/. 792type ClaimType string 793 794const ( 795 SetAttributeClaim ClaimType = "set-attribute" 796 AddAttributeClaim ClaimType = "add-attribute" 797 DelAttributeClaim ClaimType = "del-attribute" 798 ShareClaim ClaimType = "share" 799 // DeleteClaim deletes a permanode or another claim. 800 // A delete claim can itself be deleted, and so on. 801 DeleteClaim ClaimType = "delete" 802) 803 804// claimParam is used to populate a claim map when building a new claim 805type claimParam struct { 806 claimType ClaimType 807 808 // Params specific to *Attribute claims: 809 permanode blob.Ref // modified permanode 810 attribute string // required 811 value string // optional if Type == DelAttributeClaim 812 813 // Params specific to ShareClaim claims: 814 authType string 815 transitive bool 816 817 // Params specific to ShareClaim and DeleteClaim claims. 818 target blob.Ref 819} 820 821func newClaim(claims ...*claimParam) *Builder { 822 bb := base(1, "claim") 823 bb.SetClaimDate(clockNow()) 824 if len(claims) == 1 { 825 cp := claims[0] 826 populateClaimMap(bb.m, cp) 827 return bb 828 } 829 var claimList []interface{} 830 for _, cp := range claims { 831 m := map[string]interface{}{} 832 populateClaimMap(m, cp) 833 claimList = append(claimList, m) 834 } 835 bb.m["claimType"] = "multi" 836 bb.m["claims"] = claimList 837 return bb 838} 839 840func populateClaimMap(m map[string]interface{}, cp *claimParam) { 841 m["claimType"] = string(cp.claimType) 842 switch cp.claimType { 843 case ShareClaim: 844 m["authType"] = cp.authType 845 m["transitive"] = cp.transitive 846 case DeleteClaim: 847 m["target"] = cp.target.String() 848 default: 849 m["permaNode"] = cp.permanode.String() 850 m["attribute"] = cp.attribute 851 if !(cp.claimType == DelAttributeClaim && cp.value == "") { 852 m["value"] = cp.value 853 } 854 } 855} 856 857// NewShareRef creates a *Builder for a "share" claim. 858func NewShareRef(authType string, transitive bool) *Builder { 859 return newClaim(&claimParam{ 860 claimType: ShareClaim, 861 authType: authType, 862 transitive: transitive, 863 }) 864} 865 866func NewSetAttributeClaim(permaNode blob.Ref, attr, value string) *Builder { 867 return newClaim(&claimParam{ 868 permanode: permaNode, 869 claimType: SetAttributeClaim, 870 attribute: attr, 871 value: value, 872 }) 873} 874 875func NewAddAttributeClaim(permaNode blob.Ref, attr, value string) *Builder { 876 return newClaim(&claimParam{ 877 permanode: permaNode, 878 claimType: AddAttributeClaim, 879 attribute: attr, 880 value: value, 881 }) 882} 883 884// NewDelAttributeClaim creates a new claim to remove value from the 885// values set for the attribute attr of permaNode. If value is empty then 886// all the values for attribute are cleared. 887func NewDelAttributeClaim(permaNode blob.Ref, attr, value string) *Builder { 888 return newClaim(&claimParam{ 889 permanode: permaNode, 890 claimType: DelAttributeClaim, 891 attribute: attr, 892 value: value, 893 }) 894} 895 896// NewDeleteClaim creates a new claim to delete a target claim or permanode. 897func NewDeleteClaim(target blob.Ref) *Builder { 898 return newClaim(&claimParam{ 899 target: target, 900 claimType: DeleteClaim, 901 }) 902} 903 904// ShareHaveRef is the auth type specifying that if you "have the 905// reference" (know the blobref to the haveref share blob), then you 906// have access to the referenced object from that share blob. 907// This is the "send a link to a friend" access model. 908const ShareHaveRef = "haveref" 909 910// UnknownLocation is a magic timezone value used when the actual location 911// of a time is unknown. For instance, EXIF files commonly have a time without 912// a corresponding location or timezone offset. 913var UnknownLocation = time.FixedZone("Unknown", -60) // 1 minute west 914 915// IsZoneKnown reports whether t is in a known timezone. 916// Perkeep uses the magic timezone offset of 1 minute west of UTC 917// to mean that the timezone wasn't known. 918func IsZoneKnown(t time.Time) bool { 919 if t.Location() == UnknownLocation { 920 return false 921 } 922 if _, off := t.Zone(); off == -60 { 923 return false 924 } 925 return true 926} 927 928// RFC3339FromTime returns an RFC3339-formatted time. 929// 930// If the timezone is known, the time will be converted to UTC and 931// returned with a "Z" suffix. For unknown zones, the timezone will be 932// "-00:01" (1 minute west of UTC). 933// 934// Fractional seconds are only included if the time has fractional 935// seconds. 936func RFC3339FromTime(t time.Time) string { 937 if IsZoneKnown(t) { 938 t = t.UTC() 939 } 940 if t.UnixNano()%1e9 == 0 { 941 return t.Format(time.RFC3339) 942 } 943 return t.Format(time.RFC3339Nano) 944} 945 946var bytesCamliVersion = []byte("camliVersion") 947 948// LikelySchemaBlob returns quickly whether buf likely contains (or is 949// the prefix of) a schema blob. 950func LikelySchemaBlob(buf []byte) bool { 951 if len(buf) == 0 || buf[0] != '{' { 952 return false 953 } 954 return bytes.Contains(buf, bytesCamliVersion) 955} 956 957// findSize checks if v is an *os.File or if it has 958// a Size() int64 method, to find its size. 959// It returns 0, false otherwise. 960func findSize(v interface{}) (size int64, ok bool) { 961 if fi, ok := v.(*os.File); ok { 962 v, _ = fi.Stat() 963 } 964 if sz, ok := v.(interface { 965 Size() int64 966 }); ok { 967 return sz.Size(), true 968 } 969 // For bytes.Reader, strings.Reader, etc: 970 if li, ok := v.(interface { 971 Len() int 972 }); ok { 973 ln := int64(li.Len()) // unread portion, typically 974 // If it's also a seeker, remove add any seek offset: 975 if sk, ok := v.(io.Seeker); ok { 976 if cur, err := sk.Seek(0, 1); err == nil { 977 ln += cur 978 } 979 } 980 return ln, true 981 } 982 return 0, false 983} 984 985// FileTime returns the best guess of the file's creation time (or modtime). 986// If the file doesn't have its own metadata indication the creation time (such as in EXIF), 987// FileTime uses the modification time from the file system. 988// It there was a valid EXIF but an error while trying to get a date from it, 989// it logs the error and tries the other methods. 990func FileTime(f io.ReaderAt) (time.Time, error) { 991 var ct time.Time 992 defaultTime := func() (time.Time, error) { 993 if osf, ok := f.(*os.File); ok { 994 fi, err := osf.Stat() 995 if err != nil { 996 return ct, fmt.Errorf("Failed to find a modtime: stat: %v", err) 997 } 998 return fi.ModTime(), nil 999 } 1000 return ct, errors.New("all methods failed to find a creation time or modtime") 1001 } 1002 1003 size, ok := findSize(f) 1004 if !ok { 1005 size = 256 << 10 // enough to get the EXIF 1006 } 1007 r := io.NewSectionReader(f, 0, size) 1008 var tiffErr error 1009 ex, err := exif.Decode(r) 1010 if err != nil { 1011 tiffErr = err 1012 if exif.IsShortReadTagValueError(err) { 1013 return ct, io.ErrUnexpectedEOF 1014 } 1015 if exif.IsCriticalError(err) || exif.IsExifError(err) { 1016 return defaultTime() 1017 } 1018 } 1019 ct, err = ex.DateTime() 1020 if err != nil { 1021 return defaultTime() 1022 } 1023 // If the EXIF file only had local timezone, but it did have 1024 // GPS, then lookup the timezone and correct the time. 1025 if ct.Location() == time.Local { 1026 if exif.IsGPSError(tiffErr) { 1027 log.Printf("Invalid EXIF GPS data: %v", tiffErr) 1028 return ct, nil 1029 } 1030 if lat, long, err := ex.LatLong(); err == nil { 1031 if loc := lookupLocation(latlong.LookupZoneName(lat, long)); loc != nil { 1032 if t, err := exifDateTimeInLocation(ex, loc); err == nil { 1033 return t, nil 1034 } 1035 } 1036 } else if !exif.IsTagNotPresentError(err) { 1037 log.Printf("Invalid EXIF GPS data: %v", err) 1038 } 1039 } 1040 return ct, nil 1041} 1042 1043// This is basically a copy of the exif.Exif.DateTime() method, except: 1044// * it takes a *time.Location to assume 1045// * the caller already assumes there's no timezone offset or GPS time 1046// in the EXIF, so any of that code can be ignored. 1047func exifDateTimeInLocation(x *exif.Exif, loc *time.Location) (time.Time, error) { 1048 tag, err := x.Get(exif.DateTimeOriginal) 1049 if err != nil { 1050 tag, err = x.Get(exif.DateTime) 1051 if err != nil { 1052 return time.Time{}, err 1053 } 1054 } 1055 if tag.Format() != tiff.StringVal { 1056 return time.Time{}, errors.New("DateTime[Original] not in string format") 1057 } 1058 const exifTimeLayout = "2006:01:02 15:04:05" 1059 dateStr := strings.TrimRight(string(tag.Val), "\x00") 1060 return time.ParseInLocation(exifTimeLayout, dateStr, loc) 1061} 1062 1063var zoneCache struct { 1064 sync.RWMutex 1065 m map[string]*time.Location 1066} 1067 1068func lookupLocation(zone string) *time.Location { 1069 if zone == "" { 1070 return nil 1071 } 1072 zoneCache.RLock() 1073 l, ok := zoneCache.m[zone] 1074 zoneCache.RUnlock() 1075 if ok { 1076 return l 1077 } 1078 // could use singleflight here, but doesn't really 1079 // matter if two callers both do this. 1080 loc, err := time.LoadLocation(zone) 1081 1082 zoneCache.Lock() 1083 if zoneCache.m == nil { 1084 zoneCache.m = make(map[string]*time.Location) 1085 } 1086 zoneCache.m[zone] = loc // even if nil 1087 zoneCache.Unlock() 1088 1089 if err != nil { 1090 log.Printf("failed to lookup timezone %q: %v", zone, err) 1091 return nil 1092 } 1093 return loc 1094} 1095 1096var boringTitlePattern = regexp.MustCompile(`^(?:IMG_|DSC|PANO_|ESR_).*$`) 1097 1098// IsInterestingTitle returns whether title would be interesting information as 1099// a title for a permanode. For example, filenames automatically created by 1100// cameras, such as IMG_XXXX.JPG, do not add any interesting value. 1101func IsInterestingTitle(title string) bool { 1102 return !boringTitlePattern.MatchString(title) 1103} 1104