1package main
2
3import (
4	"context"
5	"encoding/json"
6	"sort"
7	"strings"
8	"time"
9
10	"github.com/spf13/cobra"
11
12	"github.com/restic/restic/internal/debug"
13	"github.com/restic/restic/internal/errors"
14	"github.com/restic/restic/internal/filter"
15	"github.com/restic/restic/internal/restic"
16	"github.com/restic/restic/internal/walker"
17)
18
19var cmdFind = &cobra.Command{
20	Use:   "find [flags] PATTERN...",
21	Short: "Find a file, a directory or restic IDs",
22	Long: `
23The "find" command searches for files or directories in snapshots stored in the
24repo.
25It can also be used to search for restic blobs or trees for troubleshooting.`,
26	Example: `restic find config.json
27restic find --json "*.yml" "*.json"
28restic find --json --blob 420f620f b46ebe8a ddd38656
29restic find --show-pack-id --blob 420f620f
30restic find --tree 577c2bc9 f81f2e22 a62827a9
31restic find --pack 025c1d06
32
33EXIT STATUS
34===========
35
36Exit status is 0 if the command was successful, and non-zero if there was any error.
37`,
38	DisableAutoGenTag: true,
39	RunE: func(cmd *cobra.Command, args []string) error {
40		return runFind(findOptions, globalOptions, args)
41	},
42}
43
44// FindOptions bundles all options for the find command.
45type FindOptions struct {
46	Oldest             string
47	Newest             string
48	Snapshots          []string
49	BlobID, TreeID     bool
50	PackID, ShowPackID bool
51	CaseInsensitive    bool
52	ListLong           bool
53	Hosts              []string
54	Paths              []string
55	Tags               restic.TagLists
56}
57
58var findOptions FindOptions
59
60func init() {
61	cmdRoot.AddCommand(cmdFind)
62
63	f := cmdFind.Flags()
64	f.StringVarP(&findOptions.Oldest, "oldest", "O", "", "oldest modification date/time")
65	f.StringVarP(&findOptions.Newest, "newest", "N", "", "newest modification date/time")
66	f.StringArrayVarP(&findOptions.Snapshots, "snapshot", "s", nil, "snapshot `id` to search in (can be given multiple times)")
67	f.BoolVar(&findOptions.BlobID, "blob", false, "pattern is a blob-ID")
68	f.BoolVar(&findOptions.TreeID, "tree", false, "pattern is a tree-ID")
69	f.BoolVar(&findOptions.PackID, "pack", false, "pattern is a pack-ID")
70	f.BoolVar(&findOptions.ShowPackID, "show-pack-id", false, "display the pack-ID the blobs belong to (with --blob or --tree)")
71	f.BoolVarP(&findOptions.CaseInsensitive, "ignore-case", "i", false, "ignore case for pattern")
72	f.BoolVarP(&findOptions.ListLong, "long", "l", false, "use a long listing format showing size and mode")
73
74	f.StringArrayVarP(&findOptions.Hosts, "host", "H", nil, "only consider snapshots for this `host`, when no snapshot ID is given (can be specified multiple times)")
75	f.Var(&findOptions.Tags, "tag", "only consider snapshots which include this `taglist`, when no snapshot-ID is given")
76	f.StringArrayVar(&findOptions.Paths, "path", nil, "only consider snapshots which include this (absolute) `path`, when no snapshot-ID is given")
77}
78
79type findPattern struct {
80	oldest, newest time.Time
81	pattern        []string
82	ignoreCase     bool
83}
84
85var timeFormats = []string{
86	"2006-01-02",
87	"2006-01-02 15:04",
88	"2006-01-02 15:04:05",
89	"2006-01-02 15:04:05 -0700",
90	"2006-01-02 15:04:05 MST",
91	"02.01.2006",
92	"02.01.2006 15:04",
93	"02.01.2006 15:04:05",
94	"02.01.2006 15:04:05 -0700",
95	"02.01.2006 15:04:05 MST",
96	"Mon Jan 2 15:04:05 -0700 MST 2006",
97}
98
99func parseTime(str string) (time.Time, error) {
100	for _, fmt := range timeFormats {
101		if t, err := time.ParseInLocation(fmt, str, time.Local); err == nil {
102			return t, nil
103		}
104	}
105
106	return time.Time{}, errors.Fatalf("unable to parse time: %q", str)
107}
108
109type statefulOutput struct {
110	ListLong bool
111	JSON     bool
112	inuse    bool
113	newsn    *restic.Snapshot
114	oldsn    *restic.Snapshot
115	hits     int
116}
117
118func (s *statefulOutput) PrintPatternJSON(path string, node *restic.Node) {
119	type findNode restic.Node
120	b, err := json.Marshal(struct {
121		// Add these attributes
122		Path        string `json:"path,omitempty"`
123		Permissions string `json:"permissions,omitempty"`
124
125		*findNode
126
127		// Make the following attributes disappear
128		Name               byte `json:"name,omitempty"`
129		Inode              byte `json:"inode,omitempty"`
130		ExtendedAttributes byte `json:"extended_attributes,omitempty"`
131		Device             byte `json:"device,omitempty"`
132		Content            byte `json:"content,omitempty"`
133		Subtree            byte `json:"subtree,omitempty"`
134	}{
135		Path:        path,
136		Permissions: node.Mode.String(),
137		findNode:    (*findNode)(node),
138	})
139	if err != nil {
140		Warnf("Marshall failed: %v\n", err)
141		return
142	}
143	if !s.inuse {
144		Printf("[")
145		s.inuse = true
146	}
147	if s.newsn != s.oldsn {
148		if s.oldsn != nil {
149			Printf("],\"hits\":%d,\"snapshot\":%q},", s.hits, s.oldsn.ID())
150		}
151		Printf(`{"matches":[`)
152		s.oldsn = s.newsn
153		s.hits = 0
154	}
155	if s.hits > 0 {
156		Printf(",")
157	}
158	Print(string(b))
159	s.hits++
160}
161
162func (s *statefulOutput) PrintPatternNormal(path string, node *restic.Node) {
163	if s.newsn != s.oldsn {
164		if s.oldsn != nil {
165			Verbosef("\n")
166		}
167		s.oldsn = s.newsn
168		Verbosef("Found matching entries in snapshot %s from %s\n", s.oldsn.ID().Str(), s.oldsn.Time.Local().Format(TimeFormat))
169	}
170	Println(formatNode(path, node, s.ListLong))
171}
172
173func (s *statefulOutput) PrintPattern(path string, node *restic.Node) {
174	if s.JSON {
175		s.PrintPatternJSON(path, node)
176	} else {
177		s.PrintPatternNormal(path, node)
178	}
179}
180
181func (s *statefulOutput) PrintObjectJSON(kind, id, nodepath, treeID string, sn *restic.Snapshot) {
182	b, err := json.Marshal(struct {
183		// Add these attributes
184		ObjectType string    `json:"object_type"`
185		ID         string    `json:"id"`
186		Path       string    `json:"path"`
187		ParentTree string    `json:"parent_tree,omitempty"`
188		SnapshotID string    `json:"snapshot"`
189		Time       time.Time `json:"time,omitempty"`
190	}{
191		ObjectType: kind,
192		ID:         id,
193		Path:       nodepath,
194		SnapshotID: sn.ID().String(),
195		ParentTree: treeID,
196		Time:       sn.Time,
197	})
198	if err != nil {
199		Warnf("Marshall failed: %v\n", err)
200		return
201	}
202	if !s.inuse {
203		Printf("[")
204		s.inuse = true
205	}
206	if s.hits > 0 {
207		Printf(",")
208	}
209	Print(string(b))
210	s.hits++
211}
212
213func (s *statefulOutput) PrintObjectNormal(kind, id, nodepath, treeID string, sn *restic.Snapshot) {
214	Printf("Found %s %s\n", kind, id)
215	if kind == "blob" {
216		Printf(" ... in file %s\n", nodepath)
217		Printf("     (tree %s)\n", treeID)
218	} else {
219		Printf(" ... path %s\n", nodepath)
220	}
221	Printf(" ... in snapshot %s (%s)\n", sn.ID().Str(), sn.Time.Local().Format(TimeFormat))
222}
223
224func (s *statefulOutput) PrintObject(kind, id, nodepath, treeID string, sn *restic.Snapshot) {
225	if s.JSON {
226		s.PrintObjectJSON(kind, id, nodepath, treeID, sn)
227	} else {
228		s.PrintObjectNormal(kind, id, nodepath, treeID, sn)
229	}
230}
231
232func (s *statefulOutput) Finish() {
233	if s.JSON {
234		// do some finishing up
235		if s.oldsn != nil {
236			Printf("],\"hits\":%d,\"snapshot\":%q}", s.hits, s.oldsn.ID())
237		}
238		if s.inuse {
239			Printf("]\n")
240		} else {
241			Printf("[]\n")
242		}
243		return
244	}
245}
246
247// Finder bundles information needed to find a file or directory.
248type Finder struct {
249	repo        restic.Repository
250	pat         findPattern
251	out         statefulOutput
252	ignoreTrees restic.IDSet
253	blobIDs     map[string]struct{}
254	treeIDs     map[string]struct{}
255	itemsFound  int
256}
257
258func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error {
259	debug.Log("searching in snapshot %s\n  for entries within [%s %s]", sn.ID(), f.pat.oldest, f.pat.newest)
260
261	if sn.Tree == nil {
262		return errors.Errorf("snapshot %v has no tree", sn.ID().Str())
263	}
264
265	f.out.newsn = sn
266	return walker.Walk(ctx, f.repo, *sn.Tree, f.ignoreTrees, func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
267		if err != nil {
268			debug.Log("Error loading tree %v: %v", parentTreeID, err)
269
270			Printf("Unable to load tree %s\n ... which belongs to snapshot %s.\n", parentTreeID, sn.ID())
271
272			return false, walker.ErrSkipNode
273		}
274
275		if node == nil {
276			return false, nil
277		}
278
279		normalizedNodepath := nodepath
280		if f.pat.ignoreCase {
281			normalizedNodepath = strings.ToLower(nodepath)
282		}
283
284		var foundMatch bool
285
286		for _, pat := range f.pat.pattern {
287			found, err := filter.Match(pat, normalizedNodepath)
288			if err != nil {
289				return false, err
290			}
291			if found {
292				foundMatch = true
293				break
294			}
295		}
296
297		var (
298			ignoreIfNoMatch = true
299			errIfNoMatch    error
300		)
301		if node.Type == "dir" {
302			var childMayMatch bool
303			for _, pat := range f.pat.pattern {
304				mayMatch, err := filter.ChildMatch(pat, normalizedNodepath)
305				if err != nil {
306					return false, err
307				}
308				if mayMatch {
309					childMayMatch = true
310					break
311				}
312			}
313
314			if !childMayMatch {
315				ignoreIfNoMatch = true
316				errIfNoMatch = walker.ErrSkipNode
317			} else {
318				ignoreIfNoMatch = false
319			}
320		}
321
322		if !foundMatch {
323			return ignoreIfNoMatch, errIfNoMatch
324		}
325
326		if !f.pat.oldest.IsZero() && node.ModTime.Before(f.pat.oldest) {
327			debug.Log("    ModTime is older than %s\n", f.pat.oldest)
328			return ignoreIfNoMatch, errIfNoMatch
329		}
330
331		if !f.pat.newest.IsZero() && node.ModTime.After(f.pat.newest) {
332			debug.Log("    ModTime is newer than %s\n", f.pat.newest)
333			return ignoreIfNoMatch, errIfNoMatch
334		}
335
336		debug.Log("    found match\n")
337		f.out.PrintPattern(nodepath, node)
338		return false, nil
339	})
340}
341
342func (f *Finder) findIDs(ctx context.Context, sn *restic.Snapshot) error {
343	debug.Log("searching IDs in snapshot %s", sn.ID())
344
345	if sn.Tree == nil {
346		return errors.Errorf("snapshot %v has no tree", sn.ID().Str())
347	}
348
349	f.out.newsn = sn
350	return walker.Walk(ctx, f.repo, *sn.Tree, f.ignoreTrees, func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
351		if err != nil {
352			debug.Log("Error loading tree %v: %v", parentTreeID, err)
353
354			Printf("Unable to load tree %s\n ... which belongs to snapshot %s.\n", parentTreeID, sn.ID())
355
356			return false, walker.ErrSkipNode
357		}
358
359		if node == nil {
360			return false, nil
361		}
362
363		if node.Type == "dir" && f.treeIDs != nil {
364			treeID := node.Subtree
365			found := false
366			if _, ok := f.treeIDs[treeID.Str()]; ok {
367				found = true
368			} else if _, ok := f.treeIDs[treeID.String()]; ok {
369				found = true
370			}
371			if found {
372				f.out.PrintObject("tree", treeID.String(), nodepath, "", sn)
373				f.itemsFound++
374				// Terminate if we have found all trees (and we are not
375				// looking for blobs)
376				if f.itemsFound >= len(f.treeIDs) && f.blobIDs == nil {
377					// Return an error to terminate the Walk
378					return true, errors.New("OK")
379				}
380			}
381		}
382
383		if node.Type == "file" && f.blobIDs != nil {
384			for _, id := range node.Content {
385				idStr := id.String()
386				if _, ok := f.blobIDs[idStr]; !ok {
387					// Look for short ID form
388					if _, ok := f.blobIDs[id.Str()]; !ok {
389						continue
390					}
391					// Replace the short ID with the long one
392					f.blobIDs[idStr] = struct{}{}
393					delete(f.blobIDs, id.Str())
394				}
395				f.out.PrintObject("blob", idStr, nodepath, parentTreeID.String(), sn)
396			}
397		}
398
399		return false, nil
400	})
401}
402
403var errAllPacksFound = errors.New("all packs found")
404
405// packsToBlobs converts the list of pack IDs to a list of blob IDs that
406// belong to those packs.
407func (f *Finder) packsToBlobs(ctx context.Context, packs []string) error {
408	packIDs := make(map[string]struct{})
409	for _, p := range packs {
410		packIDs[p] = struct{}{}
411	}
412	if f.blobIDs == nil {
413		f.blobIDs = make(map[string]struct{})
414	}
415
416	debug.Log("Looking for packs...")
417	err := f.repo.List(ctx, restic.PackFile, func(id restic.ID, size int64) error {
418		idStr := id.String()
419		if _, ok := packIDs[idStr]; !ok {
420			// Look for short ID form
421			if _, ok := packIDs[id.Str()]; !ok {
422				return nil
423			}
424			delete(packIDs, id.Str())
425		} else {
426			// forget found id
427			delete(packIDs, idStr)
428		}
429		debug.Log("Found pack %s", idStr)
430		blobs, _, err := f.repo.ListPack(ctx, id, size)
431		if err != nil {
432			return err
433		}
434		for _, b := range blobs {
435			f.blobIDs[b.ID.String()] = struct{}{}
436		}
437		// Stop searching when all packs have been found
438		if len(packIDs) == 0 {
439			return errAllPacksFound
440		}
441		return nil
442	})
443
444	if err != nil && err != errAllPacksFound {
445		return err
446	}
447
448	if err != errAllPacksFound {
449		// try to resolve unknown pack ids from the index
450		packIDs = f.indexPacksToBlobs(ctx, packIDs)
451	}
452
453	if len(packIDs) > 0 {
454		list := make([]string, 0, len(packIDs))
455		for h := range packIDs {
456			list = append(list, h)
457		}
458
459		sort.Strings(list)
460		return errors.Fatalf("unable to find pack(s): %v", list)
461	}
462
463	debug.Log("%d blobs found", len(f.blobIDs))
464	return nil
465}
466
467func (f *Finder) indexPacksToBlobs(ctx context.Context, packIDs map[string]struct{}) map[string]struct{} {
468	wctx, cancel := context.WithCancel(ctx)
469	defer cancel()
470
471	// remember which packs were found in the index
472	indexPackIDs := make(map[string]struct{})
473	for pb := range f.repo.Index().Each(wctx) {
474		idStr := pb.PackID.String()
475		// keep entry in packIDs as Each() returns individual index entries
476		matchingID := false
477		if _, ok := packIDs[idStr]; ok {
478			matchingID = true
479		} else {
480			if _, ok := packIDs[pb.PackID.Str()]; ok {
481				// expand id
482				delete(packIDs, pb.PackID.Str())
483				packIDs[idStr] = struct{}{}
484				matchingID = true
485			}
486		}
487		if matchingID {
488			f.blobIDs[pb.ID.String()] = struct{}{}
489			indexPackIDs[idStr] = struct{}{}
490		}
491	}
492
493	for id := range indexPackIDs {
494		delete(packIDs, id)
495	}
496
497	if len(indexPackIDs) > 0 {
498		list := make([]string, 0, len(indexPackIDs))
499		for h := range indexPackIDs {
500			list = append(list, h)
501		}
502		Warnf("some pack files are missing from the repository, getting their blobs from the repository index: %v\n\n", list)
503	}
504	return packIDs
505}
506
507func (f *Finder) findObjectPack(ctx context.Context, id string, t restic.BlobType) {
508	idx := f.repo.Index()
509
510	rid, err := restic.ParseID(id)
511	if err != nil {
512		Printf("Note: cannot find pack for object '%s', unable to parse ID: %v\n", id, err)
513		return
514	}
515
516	blobs := idx.Lookup(restic.BlobHandle{ID: rid, Type: t})
517	if len(blobs) == 0 {
518		Printf("Object %s not found in the index\n", rid.Str())
519		return
520	}
521
522	for _, b := range blobs {
523		if b.ID.Equal(rid) {
524			Printf("Object belongs to pack %s\n ... Pack %s: %s\n", b.PackID, b.PackID.Str(), b.String())
525			break
526		}
527	}
528}
529
530func (f *Finder) findObjectsPacks(ctx context.Context) {
531	for i := range f.blobIDs {
532		f.findObjectPack(ctx, i, restic.DataBlob)
533	}
534
535	for i := range f.treeIDs {
536		f.findObjectPack(ctx, i, restic.TreeBlob)
537	}
538}
539
540func runFind(opts FindOptions, gopts GlobalOptions, args []string) error {
541	if len(args) == 0 {
542		return errors.Fatal("wrong number of arguments")
543	}
544
545	var err error
546	pat := findPattern{pattern: args}
547	if opts.CaseInsensitive {
548		for i := range pat.pattern {
549			pat.pattern[i] = strings.ToLower(pat.pattern[i])
550		}
551		pat.ignoreCase = true
552	}
553
554	if opts.Oldest != "" {
555		if pat.oldest, err = parseTime(opts.Oldest); err != nil {
556			return err
557		}
558	}
559
560	if opts.Newest != "" {
561		if pat.newest, err = parseTime(opts.Newest); err != nil {
562			return err
563		}
564	}
565
566	// Check at most only one kind of IDs is provided: currently we
567	// can't mix types
568	if (opts.BlobID && opts.TreeID) ||
569		(opts.BlobID && opts.PackID) ||
570		(opts.TreeID && opts.PackID) {
571		return errors.Fatal("cannot have several ID types")
572	}
573
574	repo, err := OpenRepository(gopts)
575	if err != nil {
576		return err
577	}
578
579	if !gopts.NoLock {
580		lock, err := lockRepo(gopts.ctx, repo)
581		defer unlockRepo(lock)
582		if err != nil {
583			return err
584		}
585	}
586
587	if err = repo.LoadIndex(gopts.ctx); err != nil {
588		return err
589	}
590
591	ctx, cancel := context.WithCancel(gopts.ctx)
592	defer cancel()
593
594	f := &Finder{
595		repo:        repo,
596		pat:         pat,
597		out:         statefulOutput{ListLong: opts.ListLong, JSON: globalOptions.JSON},
598		ignoreTrees: restic.NewIDSet(),
599	}
600
601	if opts.BlobID {
602		f.blobIDs = make(map[string]struct{})
603		for _, pat := range f.pat.pattern {
604			f.blobIDs[pat] = struct{}{}
605		}
606	}
607	if opts.TreeID {
608		f.treeIDs = make(map[string]struct{})
609		for _, pat := range f.pat.pattern {
610			f.treeIDs[pat] = struct{}{}
611		}
612	}
613
614	if opts.PackID {
615		err := f.packsToBlobs(ctx, f.pat.pattern)
616		if err != nil {
617			return err
618		}
619	}
620
621	for sn := range FindFilteredSnapshots(ctx, repo, opts.Hosts, opts.Tags, opts.Paths, opts.Snapshots) {
622		if f.blobIDs != nil || f.treeIDs != nil {
623			if err = f.findIDs(ctx, sn); err != nil && err.Error() != "OK" {
624				return err
625			}
626			continue
627		}
628		if err = f.findInSnapshot(ctx, sn); err != nil {
629			return err
630		}
631	}
632	f.out.Finish()
633
634	if opts.ShowPackID && (f.blobIDs != nil || f.treeIDs != nil) {
635		f.findObjectsPacks(ctx)
636	}
637
638	return nil
639}
640