1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package fastwalk provides a faster version of filepath.Walk for file system
6// scanning tools.
7package fastwalk
8
9import (
10	"errors"
11	"os"
12	"path/filepath"
13	"runtime"
14	"sync"
15)
16
17// TraverseLink is used as a return value from WalkFuncs to indicate that the
18// symlink named in the call may be traversed.
19var TraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory")
20
21// SkipFiles is a used as a return value from WalkFuncs to indicate that the
22// callback should not be called for any other files in the current directory.
23// Child directories will still be traversed.
24var SkipFiles = errors.New("fastwalk: skip remaining files in directory")
25
26// Walk is a faster implementation of filepath.Walk.
27//
28// filepath.Walk's design necessarily calls os.Lstat on each file,
29// even if the caller needs less info.
30// Many tools need only the type of each file.
31// On some platforms, this information is provided directly by the readdir
32// system call, avoiding the need to stat each file individually.
33// fastwalk_unix.go contains a fork of the syscall routines.
34//
35// See golang.org/issue/16399
36//
37// Walk walks the file tree rooted at root, calling walkFn for
38// each file or directory in the tree, including root.
39//
40// If fastWalk returns filepath.SkipDir, the directory is skipped.
41//
42// Unlike filepath.Walk:
43//   * file stat calls must be done by the user.
44//     The only provided metadata is the file type, which does not include
45//     any permission bits.
46//   * multiple goroutines stat the filesystem concurrently. The provided
47//     walkFn must be safe for concurrent use.
48//   * fastWalk can follow symlinks if walkFn returns the TraverseLink
49//     sentinel error. It is the walkFn's responsibility to prevent
50//     fastWalk from going into symlink cycles.
51func Walk(root string, walkFn func(path string, typ os.FileMode) error) error {
52	// TODO(bradfitz): make numWorkers configurable? We used a
53	// minimum of 4 to give the kernel more info about multiple
54	// things we want, in hopes its I/O scheduling can take
55	// advantage of that. Hopefully most are in cache. Maybe 4 is
56	// even too low of a minimum. Profile more.
57	numWorkers := 4
58	if n := runtime.NumCPU(); n > numWorkers {
59		numWorkers = n
60	}
61
62	// Make sure to wait for all workers to finish, otherwise
63	// walkFn could still be called after returning. This Wait call
64	// runs after close(e.donec) below.
65	var wg sync.WaitGroup
66	defer wg.Wait()
67
68	w := &walker{
69		fn:       walkFn,
70		enqueuec: make(chan walkItem, numWorkers), // buffered for performance
71		workc:    make(chan walkItem, numWorkers), // buffered for performance
72		donec:    make(chan struct{}),
73
74		// buffered for correctness & not leaking goroutines:
75		resc: make(chan error, numWorkers),
76	}
77	defer close(w.donec)
78
79	for i := 0; i < numWorkers; i++ {
80		wg.Add(1)
81		go w.doWork(&wg)
82	}
83	todo := []walkItem{{dir: root}}
84	out := 0
85	for {
86		workc := w.workc
87		var workItem walkItem
88		if len(todo) == 0 {
89			workc = nil
90		} else {
91			workItem = todo[len(todo)-1]
92		}
93		select {
94		case workc <- workItem:
95			todo = todo[:len(todo)-1]
96			out++
97		case it := <-w.enqueuec:
98			todo = append(todo, it)
99		case err := <-w.resc:
100			out--
101			if err != nil {
102				return err
103			}
104			if out == 0 && len(todo) == 0 {
105				// It's safe to quit here, as long as the buffered
106				// enqueue channel isn't also readable, which might
107				// happen if the worker sends both another unit of
108				// work and its result before the other select was
109				// scheduled and both w.resc and w.enqueuec were
110				// readable.
111				select {
112				case it := <-w.enqueuec:
113					todo = append(todo, it)
114				default:
115					return nil
116				}
117			}
118		}
119	}
120}
121
122// doWork reads directories as instructed (via workc) and runs the
123// user's callback function.
124func (w *walker) doWork(wg *sync.WaitGroup) {
125	defer wg.Done()
126	for {
127		select {
128		case <-w.donec:
129			return
130		case it := <-w.workc:
131			select {
132			case <-w.donec:
133				return
134			case w.resc <- w.walk(it.dir, !it.callbackDone):
135			}
136		}
137	}
138}
139
140type walker struct {
141	fn func(path string, typ os.FileMode) error
142
143	donec    chan struct{} // closed on fastWalk's return
144	workc    chan walkItem // to workers
145	enqueuec chan walkItem // from workers
146	resc     chan error    // from workers
147}
148
149type walkItem struct {
150	dir          string
151	callbackDone bool // callback already called; don't do it again
152}
153
154func (w *walker) enqueue(it walkItem) {
155	select {
156	case w.enqueuec <- it:
157	case <-w.donec:
158	}
159}
160
161func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error {
162	joined := dirName + string(os.PathSeparator) + baseName
163	if typ == os.ModeDir {
164		w.enqueue(walkItem{dir: joined})
165		return nil
166	}
167
168	err := w.fn(joined, typ)
169	if typ == os.ModeSymlink {
170		if err == TraverseLink {
171			// Set callbackDone so we don't call it twice for both the
172			// symlink-as-symlink and the symlink-as-directory later:
173			w.enqueue(walkItem{dir: joined, callbackDone: true})
174			return nil
175		}
176		if err == filepath.SkipDir {
177			// Permit SkipDir on symlinks too.
178			return nil
179		}
180	}
181	return err
182}
183
184func (w *walker) walk(root string, runUserCallback bool) error {
185	if runUserCallback {
186		err := w.fn(root, os.ModeDir)
187		if err == filepath.SkipDir {
188			return nil
189		}
190		if err != nil {
191			return err
192		}
193	}
194
195	return readDir(root, w.onDirEnt)
196}
197