schema.go - OpenGrok cross reference for /dports/misc/perkeep/perkeep-0.11/pkg/schema/schema.go

/*
Copyright 2011 The Perkeep Authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package schema manipulates Camlistore schema blobs.
//
// A schema blob is a JSON-encoded blob that describes other blobs.
// See documentation in Perkeep's doc/schema/ directory.
package schema // import "perkeep.org/pkg/schema"

import (
	"bytes"
	"context"
	"crypto/rand"
	"encoding/base64"
	"encoding/json"
	"errors"
	"fmt"
	"hash"
	"io"
	"log"
	"os"
	"regexp"
	"strconv"
	"strings"
	"sync"
	"time"
	"unicode/utf8"

	"github.com/bradfitz/latlong"
	"perkeep.org/pkg/blob"

	"github.com/rwcarlsen/goexif/exif"
	"github.com/rwcarlsen/goexif/tiff"
	"go4.org/strutil"
	"go4.org/types"
)

func init() {
	// Intern common strings as used by schema blobs (camliType values), to reduce
	// index memory usage, which uses strutil.StringFromBytes.
	strutil.RegisterCommonString(
		"bytes",
		"claim",
		"directory",
		"file",
		"permanode",
		"share",
		"static-set",
		"symlink",
	)
}

// MaxSchemaBlobSize represents the upper bound for how large
// a schema blob may be.
const MaxSchemaBlobSize = 1 << 20

var (
	ErrNoCamliVersion = errors.New("schema: no camliVersion key in map")
)

var clockNow = time.Now

type StatHasher interface {
	Lstat(fileName string) (os.FileInfo, error)
	Hash(fileName string) (blob.Ref, error)
}

// File is the interface returned when opening a DirectoryEntry that
// is a regular file.
type File interface {
	io.Closer
	io.ReaderAt
	io.Reader
	Size() int64
}

// Directory is a read-only interface to a "directory" schema blob.
type Directory interface {
	// Readdir reads the contents of the directory associated with dr
	// and returns an array of up to n DirectoryEntries structures.
	// Subsequent calls on the same file will yield further
	// DirectoryEntries.
	// If n > 0, Readdir returns at most n DirectoryEntry structures. In
	// this case, if Readdir returns an empty slice, it will return
	// a non-nil error explaining why. At the end of a directory,
	// the error is os.EOF.
	// If n <= 0, Readdir returns all the DirectoryEntries from the
	// directory in a single slice. In this case, if Readdir succeeds
	// (reads all the way to the end of the directory), it returns the
	// slice and a nil os.Error. If it encounters an error before the
	// end of the directory, Readdir returns the DirectoryEntry read
	// until that point and a non-nil error.
	Readdir(ctx context.Context, n int) ([]DirectoryEntry, error)
}

type Symlink interface {
	// .. TODO
}

// FIFO is the read-only interface to a "fifo" schema blob.
type FIFO interface {
	// .. TODO
}

// Socket is the read-only interface to a "socket" schema blob.
type Socket interface {
	// .. TODO
}

// DirectoryEntry is a read-only interface to an entry in a (static)
// directory.
type DirectoryEntry interface {
	// CamliType returns the schema blob's "camliType" field.
	// This may be "file", "directory", "symlink", or other more
	// obscure types added in the future.
	CamliType() string

	FileName() string
	BlobRef() blob.Ref

	File(ctx context.Context) (File, error)           // if camliType is "file"
	Directory(ctx context.Context) (Directory, error) // if camliType is "directory"
	Symlink() (Symlink, error)                        // if camliType is "symlink"
	FIFO() (FIFO, error)                              // if camliType is "fifo"
	Socket() (Socket, error)                          // If camliType is "socket"
}

// dirEntry is the default implementation of DirectoryEntry
type dirEntry struct {
	ss      superset
	fetcher blob.Fetcher
	fr      *FileReader // or nil if not a file
	dr      *DirReader  // or nil if not a directory
}

// A SearchQuery must be of type *search.SearchQuery.
// This type breaks an otherwise-circular dependency.
type SearchQuery interface{}

func (de *dirEntry) CamliType() string {
	return de.ss.Type
}

func (de *dirEntry) FileName() string {
	return de.ss.FileNameString()
}

func (de *dirEntry) BlobRef() blob.Ref {
	return de.ss.BlobRef
}

func (de *dirEntry) File(ctx context.Context) (File, error) {
	if de.fr == nil {
		if de.ss.Type != "file" {
			return nil, fmt.Errorf("DirectoryEntry is camliType %q, not %q", de.ss.Type, "file")
		}
		fr, err := NewFileReader(ctx, de.fetcher, de.ss.BlobRef)
		if err != nil {
			return nil, err
		}
		de.fr = fr
	}
	return de.fr, nil
}

func (de *dirEntry) Directory(ctx context.Context) (Directory, error) {
	if de.dr == nil {
		if de.ss.Type != "directory" {
			return nil, fmt.Errorf("DirectoryEntry is camliType %q, not %q", de.ss.Type, "directory")
		}
		dr, err := NewDirReader(ctx, de.fetcher, de.ss.BlobRef)
		if err != nil {
			return nil, err
		}
		de.dr = dr
	}
	return de.dr, nil
}

func (de *dirEntry) Symlink() (Symlink, error) {
	return 0, errors.New("TODO: Symlink not implemented")
}

func (de *dirEntry) FIFO() (FIFO, error) {
	return 0, errors.New("TODO: FIFO not implemented")
}

func (de *dirEntry) Socket() (Socket, error) {
	return 0, errors.New("TODO: Socket not implemented")
}

// newDirectoryEntry takes a superset and returns a DirectoryEntry if
// the Supserset is valid and represents an entry in a directory.  It
// must by of type "file", "directory", "symlink" or "socket".
// TODO: "char", block", probably.  later.
func newDirectoryEntry(fetcher blob.Fetcher, ss *superset) (DirectoryEntry, error) {
	if ss == nil {
		return nil, errors.New("ss was nil")
	}
	if !ss.BlobRef.Valid() {
		return nil, errors.New("ss.BlobRef was invalid")
	}
	switch ss.Type {
	case "file", "directory", "symlink", "fifo", "socket":
		// Okay
	default:
		return nil, fmt.Errorf("invalid DirectoryEntry camliType of %q", ss.Type)
	}
	de := &dirEntry{ss: *ss, fetcher: fetcher} // defensive copy
	return de, nil
}

// NewDirectoryEntryFromBlobRef takes a BlobRef and returns a
//  DirectoryEntry if the BlobRef contains a type "file", "directory",
//  "symlink", "fifo" or "socket".
// TODO: ""char", "block", probably.  later.
func NewDirectoryEntryFromBlobRef(ctx context.Context, fetcher blob.Fetcher, blobRef blob.Ref) (DirectoryEntry, error) {
	ss := new(superset)
	err := ss.setFromBlobRef(ctx, fetcher, blobRef)
	if err != nil {
		return nil, fmt.Errorf("schema/filereader: can't fill superset: %v", err)
	}
	return newDirectoryEntry(fetcher, ss)
}

// superset represents the superset of common Perkeep JSON schema
// keys as a convenient json.Unmarshal target.
// TODO(bradfitz): unexport this type. Getting too gross. Move to schema.Blob
type superset struct {
	// BlobRef isn't for a particular metadata blob field, but included
	// for convenience.
	BlobRef blob.Ref

	Version int    `json:"camliVersion"`
	Type    string `json:"camliType"`

	Signer blob.Ref `json:"camliSigner"`
	Sig    string   `json:"camliSig"`

	ClaimType string         `json:"claimType"`
	ClaimDate types.Time3339 `json:"claimDate"`

	Permanode blob.Ref `json:"permaNode"`
	Attribute string   `json:"attribute"`
	Value     string   `json:"value"`

	// FileName and FileNameBytes represent one of the two
	// representations of file names in schema blobs.  They should
	// not be accessed directly.  Use the FileNameString accessor
	// instead, which also sanitizes malicious values.
	FileName      string        `json:"fileName"`
	FileNameBytes []interface{} `json:"fileNameBytes"`

	SymlinkTarget      string        `json:"symlinkTarget"`
	SymlinkTargetBytes []interface{} `json:"symlinkTargetBytes"`

	UnixPermission string `json:"unixPermission"`
	UnixOwnerId    int    `json:"unixOwnerId"`
	UnixOwner      string `json:"unixOwner"`
	UnixGroupId    int    `json:"unixGroupId"`
	UnixGroup      string `json:"unixGroup"`
	UnixMtime      string `json:"unixMtime"`
	UnixCtime      string `json:"unixCtime"`
	UnixAtime      string `json:"unixAtime"`

	// Parts are references to the data chunks of a regular file (or a "bytes" schema blob).
	// See doc/schema/bytes.txt and doc/schema/files/file.txt.
	Parts []*BytesPart `json:"parts"`

	Entries   blob.Ref   `json:"entries"`   // for directories, a blobref to a static-set
	Members   []blob.Ref `json:"members"`   // for static sets (for directory static-sets: blobrefs to child dirs/files)
	MergeSets []blob.Ref `json:"mergeSets"` // each is a "sub static-set", that has either Members or MergeSets. For large dirs.

	// Search allows a "share" blob to share an entire search. Contrast with "target".
	Search SearchQuery `json:"search"`
	// Target is a "share" blob's target (the thing being shared)
	// Or it is the object being deleted in a DeleteClaim claim.
	Target blob.Ref `json:"target"`
	// Transitive is a property of a "share" blob.
	Transitive bool `json:"transitive"`
	// AuthType is a "share" blob's authentication type that is required.
	// Currently (2013-01-02) just "haveref" (if you know the share's blobref,
	// you get access: the secret URL model)
	AuthType string         `json:"authType"`
	Expires  types.Time3339 `json:"expires"` // or zero for no expiration
}

func parseSuperset(r io.Reader) (*superset, error) {
	var ss superset
	if err := json.NewDecoder(io.LimitReader(r, MaxSchemaBlobSize)).Decode(&ss); err != nil {
		return nil, err
	}
	return &ss, nil
}

// BlobFromReader returns a new Blob from the provided Reader r,
// which should be the body of the provided blobref.
// Note: the hash checksum is not verified.
func BlobFromReader(ref blob.Ref, r io.Reader) (*Blob, error) {
	if !ref.Valid() {
		return nil, errors.New("schema.BlobFromReader: invalid blobref")
	}
	var buf bytes.Buffer
	tee := io.TeeReader(r, &buf)
	ss, err := parseSuperset(tee)
	if err != nil {
		return nil, err
	}
	var wb [16]byte
	afterObj := 0
	for {
		n, err := tee.Read(wb[:])
		afterObj += n
		for i := 0; i < n; i++ {
			if !isASCIIWhite(wb[i]) {
				return nil, fmt.Errorf("invalid bytes after JSON schema blob in %v", ref)
			}
		}
		if afterObj > MaxSchemaBlobSize {
			break
		}
		if err == io.EOF {
			break
		}
		if err != nil {
			return nil, err
		}
	}
	json := buf.String()
	if len(json) > MaxSchemaBlobSize {
		return nil, fmt.Errorf("schema: metadata blob %v is over expected limit; size=%d", ref, len(json))
	}
	return &Blob{ref, json, ss}, nil
}

func isASCIIWhite(b byte) bool {
	switch b {
	case ' ', '\t', '\r', '\n':
		return true
	}
	return false
}

// BytesPart is the type representing one of the "parts" in a "file"
// or "bytes" JSON schema.
//
// See doc/schema/bytes.txt and doc/schema/files/file.txt.
type BytesPart struct {
	// Size is the number of bytes that this part contributes to the overall segment.
	Size uint64 `json:"size"`

	// At most one of BlobRef or BytesRef must be non-zero
	// (Valid), but it's illegal for both.
	// If neither are set, this BytesPart represents Size zero bytes.
	// BlobRef refers to raw bytes. BytesRef references a "bytes" schema blob.
	BlobRef  blob.Ref `json:"blobRef,omitempty"`
	BytesRef blob.Ref `json:"bytesRef,omitempty"`

	// Offset optionally specifies the offset into BlobRef to skip
	// when reading Size bytes.
	Offset uint64 `json:"offset,omitempty"`
}

// stringFromMixedArray joins a slice of either strings or float64
// values (as retrieved from JSON decoding) into a string.  These are
// used for non-UTF8 filenames in "fileNameBytes" fields.  The strings
// are UTF-8 segments and the float64s (actually uint8 values) are
// byte values.
func stringFromMixedArray(parts []interface{}) string {
	var buf bytes.Buffer
	for _, part := range parts {
		if s, ok := part.(string); ok {
			buf.WriteString(s)
			continue
		}
		if num, ok := part.(float64); ok {
			buf.WriteByte(byte(num))
			continue
		}
	}
	return buf.String()
}

// mixedArrayFromString is the inverse of stringFromMixedArray. It
// splits a string to a series of either UTF-8 strings and non-UTF-8
// bytes.
func mixedArrayFromString(s string) (parts []interface{}) {
	for len(s) > 0 {
		if n := utf8StrLen(s); n > 0 {
			parts = append(parts, s[:n])
			s = s[n:]
		} else {
			parts = append(parts, s[0])
			s = s[1:]
		}
	}
	return parts
}

// utf8StrLen returns how many prefix bytes of s are valid UTF-8.
func utf8StrLen(s string) int {
	for i, r := range s {
		for r == utf8.RuneError {
			// The RuneError value can be an error
			// sentinel value (if it's size 1) or the same
			// value encoded properly. Decode it to see if
			// it's the 1 byte sentinel value.
			_, size := utf8.DecodeRuneInString(s[i:])
			if size == 1 {
				return i
			}
		}
	}
	return len(s)
}

func (ss *superset) SumPartsSize() (size uint64) {
	for _, part := range ss.Parts {
		size += uint64(part.Size)
	}
	return size
}

func (ss *superset) SymlinkTargetString() string {
	if ss.SymlinkTarget != "" {
		return ss.SymlinkTarget
	}
	return stringFromMixedArray(ss.SymlinkTargetBytes)
}

// FileNameString returns the schema blob's base filename.
//
// If the fileName field of the blob accidentally or maliciously
// contains a slash, this function returns an empty string instead.
func (ss *superset) FileNameString() string {
	v := ss.FileName
	if v == "" {
		v = stringFromMixedArray(ss.FileNameBytes)
	}
	if v != "" {
		if strings.Contains(v, "/") {
			// Bogus schema blob; ignore.
			return ""
		}
		if strings.Contains(v, "\\") {
			// Bogus schema blob; ignore.
			return ""
		}
	}
	return v
}

func (ss *superset) HasFilename(name string) bool {
	return ss.FileNameString() == name
}

func (b *Blob) FileMode() os.FileMode {
	// TODO: move this to a different type, off *Blob
	return b.ss.FileMode()
}

func (ss *superset) FileMode() os.FileMode {
	var mode os.FileMode
	hasPerm := ss.UnixPermission != ""
	if hasPerm {
		m64, err := strconv.ParseUint(ss.UnixPermission, 8, 64)
		if err == nil {
			mode = mode | os.FileMode(m64)
		}
	}

	// TODO: add other types (block, char, etc)
	switch ss.Type {
	case "directory":
		mode = mode | os.ModeDir
	case "file":
		// No extra bit.
	case "symlink":
		mode = mode | os.ModeSymlink
	case "fifo":
		mode = mode | os.ModeNamedPipe
	case "socket":
		mode = mode | os.ModeSocket
	}
	if !hasPerm {
		switch ss.Type {
		case "directory":
			mode |= 0755
		default:
			mode |= 0644
		}
	}
	return mode
}

// MapUid returns the most appropriate mapping from this file's owner
// to the local machine's owner, trying first a match by name,
// followed by just mapping the number through directly.
func (b *Blob) MapUid() int { return b.ss.MapUid() }

// MapGid returns the most appropriate mapping from this file's group
// to the local machine's group, trying first a match by name,
// followed by just mapping the number through directly.
func (b *Blob) MapGid() int { return b.ss.MapGid() }

func (ss *superset) MapUid() int {
	if ss.UnixOwner != "" {
		uid, ok := getUidFromName(ss.UnixOwner)
		if ok {
			return uid
		}
	}
	return ss.UnixOwnerId // TODO: will be 0 if unset, which isn't ideal
}

func (ss *superset) MapGid() int {
	if ss.UnixGroup != "" {
		gid, ok := getGidFromName(ss.UnixGroup)
		if ok {
			return gid
		}
	}
	return ss.UnixGroupId // TODO: will be 0 if unset, which isn't ideal
}

func (ss *superset) ModTime() time.Time {
	if ss.UnixMtime == "" {
		return time.Time{}
	}
	t, err := time.Parse(time.RFC3339, ss.UnixMtime)
	if err != nil {
		return time.Time{}
	}
	return t
}

var DefaultStatHasher = &defaultStatHasher{}

type defaultStatHasher struct{}

func (d *defaultStatHasher) Lstat(fileName string) (os.FileInfo, error) {
	return os.Lstat(fileName)
}

func (d *defaultStatHasher) Hash(fileName string) (blob.Ref, error) {
	h := blob.NewHash()
	file, err := os.Open(fileName)
	if err != nil {
		return blob.Ref{}, err
	}
	defer file.Close()
	_, err = io.Copy(h, file)
	if err != nil {
		return blob.Ref{}, err
	}
	return blob.RefFromHash(h), nil
}

// maximum number of static-set members in a static-set schema. As noted in
// https://github.com/camlistore/camlistore/issues/924 , 33k members result in a
// 1.7MB blob, so 10k members seems reasonable to stay under the MaxSchemaBlobSize (1MB)
// limit. This is not a const, so we can lower it during tests and test the logic
// without having to create thousands of blobs.
var maxStaticSetMembers = 10000

// NewStaticSet returns the "static-set" schema for a directory. Its members
// should be populated with SetStaticSetMembers.
func NewStaticSet() *Builder {
	return base(1, "static-set")
}

// SetStaticSetMembers sets the given members as the static-set members of this
// builder. If the members are so numerous that they would not fit on a schema
// blob, they are spread (recursively, if needed) onto sub static-sets. In which
// case, these subsets are set as "mergeSets" of this builder. All the created
// subsets are returned, so the caller can upload them along with the top
// static-set created from this builder.
// SetStaticSetMembers panics if bb isn't a "static-set" claim type.
func (bb *Builder) SetStaticSetMembers(members []blob.Ref) []*Blob {
	if bb.Type() != "static-set" {
		panic("called SetStaticSetMembers on non static-set")
	}

	if len(members) <= maxStaticSetMembers {
		ms := make([]string, len(members))
		for i := range members {
			ms[i] = members[i].String()
		}
		bb.m["members"] = ms
		return nil
	}

	// too many members to fit in one static-set, so we spread them in
	// several sub static-sets.
	subsetsNumber := len(members) / maxStaticSetMembers
	var perSubset int
	if subsetsNumber < maxStaticSetMembers {
		// this means we can fill each subset up to maxStaticSetMembers,
		// and stash the rest in one last subset.
		perSubset = maxStaticSetMembers
	} else {
		// otherwise we need to divide the members evenly in
		// (maxStaticSetMembers - 1) subsets, and each of these subsets
		// will also (recursively) have subsets of its own. There might
		// also be a rest in one last subset, as above.
		subsetsNumber = maxStaticSetMembers - 1
		perSubset = len(members) / subsetsNumber
	}
	// only the subsets at this level
	subsets := make([]*Blob, 0, subsetsNumber)
	// subsets at this level, plus all the children subsets.
	allSubsets := make([]*Blob, 0, subsetsNumber)
	for i := 0; i < subsetsNumber; i++ {
		ss := NewStaticSet()
		subss := ss.SetStaticSetMembers(members[i*perSubset : (i+1)*perSubset])
		subsets = append(subsets, ss.Blob())
		allSubsets = append(allSubsets, ss.Blob())
		for _, v := range subss {
			allSubsets = append(allSubsets, v)
		}
	}

	// Deal with the rest (of the euclidian division)
	if perSubset*subsetsNumber < len(members) {
		ss := NewStaticSet()
		ss.SetStaticSetMembers(members[perSubset*subsetsNumber:])
		allSubsets = append(allSubsets, ss.Blob())
		subsets = append(subsets, ss.Blob())
	}

	mss := make([]string, len(subsets))
	for i := range subsets {
		mss[i] = subsets[i].BlobRef().String()
	}
	bb.m["mergeSets"] = mss
	return allSubsets
}

func base(version int, ctype string) *Builder {
	return &Builder{map[string]interface{}{
		"camliVersion": version,
		"camliType":    ctype,
	}}
}

// NewUnsignedPermanode returns a new random permanode, not yet signed.
func NewUnsignedPermanode() *Builder {
	bb := base(1, "permanode")
	chars := make([]byte, 20)
	_, err := io.ReadFull(rand.Reader, chars)
	if err != nil {
		panic("error reading random bytes: " + err.Error())
	}
	bb.m["random"] = base64.StdEncoding.EncodeToString(chars)
	return bb
}

// NewPlannedPermanode returns a permanode with a fixed key.  Like
// NewUnsignedPermanode, this builder is also not yet signed.  Callers of
// NewPlannedPermanode must sign the map with a fixed claimDate and
// GPG date to create consistent JSON encodings of the Map (its
// blobref), between runs.
func NewPlannedPermanode(key string) *Builder {
	bb := base(1, "permanode")
	bb.m["key"] = key
	return bb
}

// NewHashPlannedPermanode returns a planned permanode with the sum
// of the hash, prefixed with "sha1-", as the key.
func NewHashPlannedPermanode(h hash.Hash) *Builder {
	return NewPlannedPermanode(blob.RefFromHash(h).String())
}

// JSON returns the map m encoded as JSON in its
// recommended canonical form. The canonical form is readable with newlines and indentation,
// and always starts with the header bytes:
//
//   {"camliVersion":
//
func mapJSON(m map[string]interface{}) (string, error) {
	version, hasVersion := m["camliVersion"]
	if !hasVersion {
		return "", ErrNoCamliVersion
	}
	delete(m, "camliVersion")
	jsonBytes, err := json.MarshalIndent(m, "", "  ")
	if err != nil {
		return "", err
	}
	m["camliVersion"] = version
	var buf bytes.Buffer
	fmt.Fprintf(&buf, "{\"camliVersion\": %v,\n", version)
	buf.Write(jsonBytes[2:])
	return buf.String(), nil
}

// NewFileMap returns a new builder of a type "file" schema for the provided fileName.
// The chunk parts of the file are not populated.
func NewFileMap(fileName string) *Builder {
	return newCommonFilenameMap(fileName).SetType("file")
}

// NewDirMap returns a new builder of a type "directory" schema for the provided fileName.
func NewDirMap(fileName string) *Builder {
	return newCommonFilenameMap(fileName).SetType("directory")
}

func newCommonFilenameMap(fileName string) *Builder {
	bb := base(1, "" /* no type yet */)
	if fileName != "" {
		bb.SetFileName(fileName)
	}
	return bb
}

var populateSchemaStat []func(schemaMap map[string]interface{}, fi os.FileInfo)

func NewCommonFileMap(fileName string, fi os.FileInfo) *Builder {
	bb := newCommonFilenameMap(fileName)
	// Common elements (from file-common.txt)
	if fi.Mode()&os.ModeSymlink == 0 {
		bb.m["unixPermission"] = fmt.Sprintf("0%o", fi.Mode().Perm())
	}

	// OS-specific population; defined in schema_posix.go, etc. (not on App Engine)
	for _, f := range populateSchemaStat {
		f(bb.m, fi)
	}

	if mtime := fi.ModTime(); !mtime.IsZero() {
		bb.m["unixMtime"] = RFC3339FromTime(mtime)
	}
	return bb
}

// PopulateParts sets the "parts" field of the blob with the provided
// parts.  The sum of the sizes of parts must match the provided size
// or an error is returned.  Also, each BytesPart may only contain either
// a BytesPart or a BlobRef, but not both.
func (bb *Builder) PopulateParts(size int64, parts []BytesPart) error {
	return populateParts(bb.m, size, parts)
}

func populateParts(m map[string]interface{}, size int64, parts []BytesPart) error {
	sumSize := int64(0)
	mparts := make([]map[string]interface{}, len(parts))
	for idx, part := range parts {
		mpart := make(map[string]interface{})
		mparts[idx] = mpart
		switch {
		case part.BlobRef.Valid() && part.BytesRef.Valid():
			return errors.New("schema: part contains both BlobRef and BytesRef")
		case part.BlobRef.Valid():
			mpart["blobRef"] = part.BlobRef.String()
		case part.BytesRef.Valid():
			mpart["bytesRef"] = part.BytesRef.String()
		default:
			return errors.New("schema: part must contain either a BlobRef or BytesRef")
		}
		mpart["size"] = part.Size
		sumSize += int64(part.Size)
		if part.Offset != 0 {
			mpart["offset"] = part.Offset
		}
	}
	if sumSize != size {
		return fmt.Errorf("schema: declared size %d doesn't match sum of parts size %d", size, sumSize)
	}
	m["parts"] = mparts
	return nil
}

func newBytes() *Builder {
	return base(1, "bytes")
}

// ClaimType is one of the valid "claimType" fields in a "claim" schema blob. See doc/schema/claims/.
type ClaimType string

const (
	SetAttributeClaim ClaimType = "set-attribute"
	AddAttributeClaim ClaimType = "add-attribute"
	DelAttributeClaim ClaimType = "del-attribute"
	ShareClaim        ClaimType = "share"
	// DeleteClaim deletes a permanode or another claim.
	// A delete claim can itself be deleted, and so on.
	DeleteClaim ClaimType = "delete"
)

// claimParam is used to populate a claim map when building a new claim
type claimParam struct {
	claimType ClaimType

	// Params specific to *Attribute claims:
	permanode blob.Ref // modified permanode
	attribute string   // required
	value     string   // optional if Type == DelAttributeClaim

	// Params specific to ShareClaim claims:
	authType   string
	transitive bool

	// Params specific to ShareClaim and DeleteClaim claims.
	target blob.Ref
}

func newClaim(claims ...*claimParam) *Builder {
	bb := base(1, "claim")
	bb.SetClaimDate(clockNow())
	if len(claims) == 1 {
		cp := claims[0]
		populateClaimMap(bb.m, cp)
		return bb
	}
	var claimList []interface{}
	for _, cp := range claims {
		m := map[string]interface{}{}
		populateClaimMap(m, cp)
		claimList = append(claimList, m)
	}
	bb.m["claimType"] = "multi"
	bb.m["claims"] = claimList
	return bb
}

func populateClaimMap(m map[string]interface{}, cp *claimParam) {
	m["claimType"] = string(cp.claimType)
	switch cp.claimType {
	case ShareClaim:
		m["authType"] = cp.authType
		m["transitive"] = cp.transitive
	case DeleteClaim:
		m["target"] = cp.target.String()
	default:
		m["permaNode"] = cp.permanode.String()
		m["attribute"] = cp.attribute
		if !(cp.claimType == DelAttributeClaim && cp.value == "") {
			m["value"] = cp.value
		}
	}
}

// NewShareRef creates a *Builder for a "share" claim.
func NewShareRef(authType string, transitive bool) *Builder {
	return newClaim(&claimParam{
		claimType:  ShareClaim,
		authType:   authType,
		transitive: transitive,
	})
}

func NewSetAttributeClaim(permaNode blob.Ref, attr, value string) *Builder {
	return newClaim(&claimParam{
		permanode: permaNode,
		claimType: SetAttributeClaim,
		attribute: attr,
		value:     value,
	})
}

func NewAddAttributeClaim(permaNode blob.Ref, attr, value string) *Builder {
	return newClaim(&claimParam{
		permanode: permaNode,
		claimType: AddAttributeClaim,
		attribute: attr,
		value:     value,
	})
}

// NewDelAttributeClaim creates a new claim to remove value from the
// values set for the attribute attr of permaNode. If value is empty then
// all the values for attribute are cleared.
func NewDelAttributeClaim(permaNode blob.Ref, attr, value string) *Builder {
	return newClaim(&claimParam{
		permanode: permaNode,
		claimType: DelAttributeClaim,
		attribute: attr,
		value:     value,
	})
}

// NewDeleteClaim creates a new claim to delete a target claim or permanode.
func NewDeleteClaim(target blob.Ref) *Builder {
	return newClaim(&claimParam{
		target:    target,
		claimType: DeleteClaim,
	})
}

// ShareHaveRef is the auth type specifying that if you "have the
// reference" (know the blobref to the haveref share blob), then you
// have access to the referenced object from that share blob.
// This is the "send a link to a friend" access model.
const ShareHaveRef = "haveref"

// UnknownLocation is a magic timezone value used when the actual location
// of a time is unknown. For instance, EXIF files commonly have a time without
// a corresponding location or timezone offset.
var UnknownLocation = time.FixedZone("Unknown", -60) // 1 minute west

// IsZoneKnown reports whether t is in a known timezone.
// Perkeep uses the magic timezone offset of 1 minute west of UTC
// to mean that the timezone wasn't known.
func IsZoneKnown(t time.Time) bool {
	if t.Location() == UnknownLocation {
		return false
	}
	if _, off := t.Zone(); off == -60 {
		return false
	}
	return true
}

// RFC3339FromTime returns an RFC3339-formatted time.
//
// If the timezone is known, the time will be converted to UTC and
// returned with a "Z" suffix. For unknown zones, the timezone will be
// "-00:01" (1 minute west of UTC).
//
// Fractional seconds are only included if the time has fractional
// seconds.
func RFC3339FromTime(t time.Time) string {
	if IsZoneKnown(t) {
		t = t.UTC()
	}
	if t.UnixNano()%1e9 == 0 {
		return t.Format(time.RFC3339)
	}
	return t.Format(time.RFC3339Nano)
}

var bytesCamliVersion = []byte("camliVersion")

// LikelySchemaBlob returns quickly whether buf likely contains (or is
// the prefix of) a schema blob.
func LikelySchemaBlob(buf []byte) bool {
	if len(buf) == 0 || buf[0] != '{' {
		return false
	}
	return bytes.Contains(buf, bytesCamliVersion)
}

// findSize checks if v is an *os.File or if it has
// a Size() int64 method, to find its size.
// It returns 0, false otherwise.
func findSize(v interface{}) (size int64, ok bool) {
	if fi, ok := v.(*os.File); ok {
		v, _ = fi.Stat()
	}
	if sz, ok := v.(interface {
		Size() int64
	}); ok {
		return sz.Size(), true
	}
	// For bytes.Reader, strings.Reader, etc:
	if li, ok := v.(interface {
		Len() int
	}); ok {
		ln := int64(li.Len()) // unread portion, typically
		// If it's also a seeker, remove add any seek offset:
		if sk, ok := v.(io.Seeker); ok {
			if cur, err := sk.Seek(0, 1); err == nil {
				ln += cur
			}
		}
		return ln, true
	}
	return 0, false
}

// FileTime returns the best guess of the file's creation time (or modtime).
// If the file doesn't have its own metadata indication the creation time (such as in EXIF),
// FileTime uses the modification time from the file system.
// It there was a valid EXIF but an error while trying to get a date from it,
// it logs the error and tries the other methods.
func FileTime(f io.ReaderAt) (time.Time, error) {
	var ct time.Time
	defaultTime := func() (time.Time, error) {
		if osf, ok := f.(*os.File); ok {
			fi, err := osf.Stat()
			if err != nil {
				return ct, fmt.Errorf("Failed to find a modtime: stat: %v", err)
			}
			return fi.ModTime(), nil
		}
		return ct, errors.New("all methods failed to find a creation time or modtime")
	}

	size, ok := findSize(f)
	if !ok {
		size = 256 << 10 // enough to get the EXIF
	}
	r := io.NewSectionReader(f, 0, size)
	var tiffErr error
	ex, err := exif.Decode(r)
	if err != nil {
		tiffErr = err
		if exif.IsShortReadTagValueError(err) {
			return ct, io.ErrUnexpectedEOF
		}
		if exif.IsCriticalError(err) || exif.IsExifError(err) {
			return defaultTime()
		}
	}
	ct, err = ex.DateTime()
	if err != nil {
		return defaultTime()
	}
	// If the EXIF file only had local timezone, but it did have
	// GPS, then lookup the timezone and correct the time.
	if ct.Location() == time.Local {
		if exif.IsGPSError(tiffErr) {
			log.Printf("Invalid EXIF GPS data: %v", tiffErr)
			return ct, nil
		}
		if lat, long, err := ex.LatLong(); err == nil {
			if loc := lookupLocation(latlong.LookupZoneName(lat, long)); loc != nil {
				if t, err := exifDateTimeInLocation(ex, loc); err == nil {
					return t, nil
				}
			}
		} else if !exif.IsTagNotPresentError(err) {
			log.Printf("Invalid EXIF GPS data: %v", err)
		}
	}
	return ct, nil
}

// This is basically a copy of the exif.Exif.DateTime() method, except:
//   * it takes a *time.Location to assume
//   * the caller already assumes there's no timezone offset or GPS time
//     in the EXIF, so any of that code can be ignored.
func exifDateTimeInLocation(x *exif.Exif, loc *time.Location) (time.Time, error) {
	tag, err := x.Get(exif.DateTimeOriginal)
	if err != nil {
		tag, err = x.Get(exif.DateTime)
		if err != nil {
			return time.Time{}, err
		}
	}
	if tag.Format() != tiff.StringVal {
		return time.Time{}, errors.New("DateTime[Original] not in string format")
	}
	const exifTimeLayout = "2006:01:02 15:04:05"
	dateStr := strings.TrimRight(string(tag.Val), "\x00")
	return time.ParseInLocation(exifTimeLayout, dateStr, loc)
}

var zoneCache struct {
	sync.RWMutex
	m map[string]*time.Location
}

func lookupLocation(zone string) *time.Location {
	if zone == "" {
		return nil
	}
	zoneCache.RLock()
	l, ok := zoneCache.m[zone]
	zoneCache.RUnlock()
	if ok {
		return l
	}
	// could use singleflight here, but doesn't really
	// matter if two callers both do this.
	loc, err := time.LoadLocation(zone)

	zoneCache.Lock()
	if zoneCache.m == nil {
		zoneCache.m = make(map[string]*time.Location)
	}
	zoneCache.m[zone] = loc // even if nil
	zoneCache.Unlock()

	if err != nil {
		log.Printf("failed to lookup timezone %q: %v", zone, err)
		return nil
	}
	return loc
}

var boringTitlePattern = regexp.MustCompile(`^(?:IMG_|DSC|PANO_|ESR_).*$`)

// IsInterestingTitle returns whether title would be interesting information as
// a title for a permanode. For example, filenames automatically created by
// cameras, such as IMG_XXXX.JPG, do not add any interesting value.
func IsInterestingTitle(title string) bool {
	return !boringTitlePattern.MatchString(title)
}