1// Copyright 2016 the Go-FUSE Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package nodefs
6
7// This file contains the internal logic of the
8// FileSystemConnector. The functions for satisfying the raw interface
9// are in fsops.go
10
11import (
12	"log"
13	"path/filepath"
14	"strings"
15	"sync"
16	"time"
17	"unsafe"
18
19	"github.com/hanwen/go-fuse/v2/fuse"
20)
21
22// Tests should set to true.
23var paranoia = false
24
25// FileSystemConnector translates the raw FUSE protocol (serialized
26// structs of uint32/uint64) to operations on Go objects representing
27// files and directories.
28type FileSystemConnector struct {
29	debug bool
30
31	// Callbacks for talking back to the kernel.
32	server *fuse.Server
33
34	// Translate between uint64 handles and *Inode.
35	inodeMap handleMap
36
37	// The root of the FUSE file system.
38	rootNode *Inode
39
40	// This lock prevents Lookup() and Forget() from running concurrently.
41	// Locking at this level is a big hammer, but makes sure we don't return
42	// forgotten nodes to the kernel. Problems solved by this lock:
43	// https://github.com/hanwen/go-fuse/issues/168
44	// https://github.com/rfjakob/gocryptfs/issues/322
45	//
46	// The lock is shared: several concurrent Lookups are allowed to be
47	// run simultaneously, while Forget is exclusive.
48	lookupLock sync.RWMutex
49}
50
51// NewOptions generates FUSE options that correspond to libfuse's
52// defaults.
53func NewOptions() *Options {
54	return &Options{
55		NegativeTimeout: 0,
56		AttrTimeout:     time.Second,
57		EntryTimeout:    time.Second,
58		Owner:           fuse.CurrentOwner(),
59	}
60}
61
62// NewFileSystemConnector creates a FileSystemConnector with the given
63// options.
64func NewFileSystemConnector(root Node, opts *Options) (c *FileSystemConnector) {
65	c = new(FileSystemConnector)
66	if opts == nil {
67		opts = NewOptions()
68	}
69	c.inodeMap = newPortableHandleMap()
70	c.rootNode = newInode(true, root)
71
72	c.verify()
73	c.mountRoot(opts)
74
75	// FUSE does not issue a LOOKUP for 1 (obviously), but it does
76	// issue a forget.  This lookupUpdate is to make the counts match.
77	c.lookupUpdate(c.rootNode)
78	c.debug = opts.Debug
79
80	return c
81}
82
83// Server returns the fuse.Server that talking to the kernel.
84func (c *FileSystemConnector) Server() *fuse.Server {
85	return c.server
86}
87
88// SetDebug toggles printing of debug information. This function is
89// deprecated. Set the Debug option in the Options struct instead.
90func (c *FileSystemConnector) SetDebug(debug bool) {
91	c.debug = debug
92}
93
94// This verifies invariants of the data structure.  This routine
95// acquires tree locks as it walks the inode tree.
96func (c *FileSystemConnector) verify() {
97	if !paranoia {
98		return
99	}
100	root := c.rootNode
101	root.verify(c.rootNode.mountPoint)
102}
103
104// childLookup fills entry information for a newly created child inode
105func (c *rawBridge) childLookup(out *fuse.EntryOut, n *Inode, context *fuse.Context) {
106	n.Node().GetAttr(&out.Attr, nil, context)
107	n.mount.fillEntry(out)
108	out.NodeId, out.Generation = c.fsConn().lookupUpdate(n)
109	if out.Ino == 0 {
110		out.Ino = out.NodeId
111	}
112	if out.Nlink == 0 {
113		// With Nlink == 0, newer kernels will refuse link
114		// operations.
115		out.Nlink = 1
116	}
117}
118
119func (c *rawBridge) toInode(nodeid uint64) *Inode {
120	if nodeid == fuse.FUSE_ROOT_ID {
121		return c.rootNode
122	}
123	i := (*Inode)(unsafe.Pointer(c.inodeMap.Decode(nodeid)))
124	return i
125}
126
127// Must run outside treeLock.  Returns the nodeId and generation.
128func (c *FileSystemConnector) lookupUpdate(node *Inode) (id, generation uint64) {
129	id, generation = c.inodeMap.Register(&node.handled)
130	c.verify()
131	return
132}
133
134// forgetUpdate decrements the reference counter for "nodeID" by "forgetCount".
135// Must run outside treeLock.
136func (c *FileSystemConnector) forgetUpdate(nodeID uint64, forgetCount int) {
137	if nodeID == fuse.FUSE_ROOT_ID {
138		c.rootNode.Node().OnUnmount()
139
140		// We never got a lookup for root, so don't try to
141		// forget root.
142		return
143	}
144
145	// Prevent concurrent modification of the tree while we are processing
146	// the FORGET
147	node := (*Inode)(unsafe.Pointer(c.inodeMap.Decode(nodeID)))
148	node.mount.treeLock.Lock()
149	defer node.mount.treeLock.Unlock()
150
151	if forgotten, _ := c.inodeMap.Forget(nodeID, forgetCount); forgotten {
152		if len(node.children) > 0 || !node.Node().Deletable() ||
153			node == c.rootNode || node.mountPoint != nil {
154			// We cannot forget a directory that still has children as these
155			// would become unreachable.
156			return
157		}
158		// We have to remove ourself from all parents.
159		// Create a copy of node.parents so we can safely iterate over it
160		// while modifying the original.
161		parents := make(map[parentData]struct{}, len(node.parents))
162		for k, v := range node.parents {
163			parents[k] = v
164		}
165
166		for p := range parents {
167			// This also modifies node.parents
168			p.parent.rmChild(p.name)
169		}
170
171		node.fsInode.OnForget()
172	}
173	// TODO - try to drop children even forget was not successful.
174	c.verify()
175}
176
177// InodeCount returns the number of inodes registered with the kernel.
178func (c *FileSystemConnector) InodeHandleCount() int {
179	return c.inodeMap.Count()
180}
181
182// Finds a node within the currently known inodes, returns the last
183// known node and the remaining unknown path components.  If parent is
184// nil, start from FUSE mountpoint.
185func (c *FileSystemConnector) Node(parent *Inode, fullPath string) (*Inode, []string) {
186	if parent == nil {
187		parent = c.rootNode
188	}
189	if fullPath == "" {
190		return parent, nil
191	}
192
193	sep := string(filepath.Separator)
194	fullPath = strings.TrimLeft(filepath.Clean(fullPath), sep)
195	comps := strings.Split(fullPath, sep)
196
197	node := parent
198	if node.mountPoint == nil {
199		node.mount.treeLock.RLock()
200		defer node.mount.treeLock.RUnlock()
201	}
202
203	for i, component := range comps {
204		if len(component) == 0 {
205			continue
206		}
207
208		if node.mountPoint != nil {
209			node.mount.treeLock.RLock()
210			defer node.mount.treeLock.RUnlock()
211		}
212
213		next := node.children[component]
214		if next == nil {
215			return node, comps[i:]
216		}
217		node = next
218	}
219
220	return node, nil
221}
222
223// Follows the path from the given parent, doing lookups as
224// necessary. The path should be '/' separated without leading slash.
225func (c *FileSystemConnector) LookupNode(parent *Inode, path string) *Inode {
226	if path == "" {
227		return parent
228	}
229
230	components := strings.Split(path, "/")
231	for _, r := range components {
232		var a fuse.Attr
233		// This will not affect inode ID lookup counts, which
234		// are only update in response to kernel requests.
235		var dummy fuse.InHeader
236		child, _ := c.internalLookup(nil, &a, parent, r, &dummy)
237		if child == nil {
238			return nil
239		}
240
241		parent = child
242	}
243
244	return parent
245}
246
247func (c *FileSystemConnector) mountRoot(opts *Options) {
248	c.rootNode.mountFs(opts)
249	c.rootNode.mount.connector = c
250	c.verify()
251}
252
253// Mount() generates a synthetic directory node, and mounts the file
254// system there.  If opts is nil, the mount options of the root file
255// system are inherited.  The encompassing filesystem should pretend
256// the mount point does not exist.
257//
258// It returns ENOENT if the directory containing the mount point does
259// not exist, and EBUSY if the intended mount point already exists.
260func (c *FileSystemConnector) Mount(parent *Inode, name string, root Node, opts *Options) fuse.Status {
261	node, code := c.lockMount(parent, name, root, opts)
262	if !code.Ok() {
263		return code
264	}
265
266	node.Node().OnMount(c)
267	return code
268}
269
270func (c *FileSystemConnector) lockMount(parent *Inode, name string, root Node, opts *Options) (*Inode, fuse.Status) {
271	defer c.verify()
272	parent.mount.treeLock.Lock()
273	defer parent.mount.treeLock.Unlock()
274	node := parent.children[name]
275	if node != nil {
276		return nil, fuse.EBUSY
277	}
278
279	node = newInode(true, root)
280	if opts == nil {
281		opts = c.rootNode.mountPoint.options
282	}
283
284	node.mountFs(opts)
285	node.mount.connector = c
286	parent.addChild(name, node)
287
288	node.mountPoint.parentInode = parent
289	if c.debug {
290		log.Printf("Mount %T on subdir %s, parent i%d", node,
291			name, c.inodeMap.Handle(&parent.handled))
292	}
293	return node, fuse.OK
294}
295
296// Unmount() tries to unmount the given inode.  It returns EINVAL if the
297// path does not exist, or is not a mount point, and EBUSY if there
298// are open files or submounts below this node.
299func (c *FileSystemConnector) Unmount(node *Inode) fuse.Status {
300	// TODO - racy.
301	if node.mountPoint == nil {
302		log.Println("not a mountpoint:", c.inodeMap.Handle(&node.handled))
303		return fuse.EINVAL
304	}
305
306	nodeID := c.inodeMap.Handle(&node.handled)
307
308	// Must lock parent to update tree structure.
309	parentNode := node.mountPoint.parentInode
310	parentNode.mount.treeLock.Lock()
311	defer parentNode.mount.treeLock.Unlock()
312
313	mount := node.mountPoint
314	name := node.mountPoint.mountName()
315	if mount.openFiles.Count() > 0 {
316		return fuse.EBUSY
317	}
318
319	node.mount.treeLock.Lock()
320	defer node.mount.treeLock.Unlock()
321
322	if mount.mountInode != node {
323		log.Panicf("got two different mount inodes %v vs %v",
324			c.inodeMap.Handle(&mount.mountInode.handled),
325			c.inodeMap.Handle(&node.handled))
326	}
327
328	if !node.canUnmount() {
329		return fuse.EBUSY
330	}
331
332	delete(parentNode.children, name)
333	node.Node().OnUnmount()
334
335	parentId := c.inodeMap.Handle(&parentNode.handled)
336	if parentNode == c.rootNode {
337		// TODO - test coverage. Currently covered by zipfs/multizip_test.go
338		parentId = fuse.FUSE_ROOT_ID
339	}
340
341	// We have to wait until the kernel has forgotten the
342	// mountpoint, so the write to node.mountPoint is no longer
343	// racy.
344	mount.treeLock.Unlock()
345	parentNode.mount.treeLock.Unlock()
346	code := c.server.DeleteNotify(parentId, nodeID, name)
347
348	if code.Ok() {
349		delay := 100 * time.Microsecond
350
351		for {
352			// This operation is rare, so we kludge it to avoid
353			// contention.
354			time.Sleep(delay)
355			delay = delay * 2
356			if !c.inodeMap.Has(nodeID) {
357				break
358			}
359
360			if delay >= time.Second {
361				// We limit the wait at one second. If
362				// it takes longer, something else is
363				// amiss, and we would be waiting forever.
364				log.Println("kernel did not issue FORGET for node on Unmount.")
365				break
366			}
367		}
368
369	}
370
371	parentNode.mount.treeLock.Lock()
372	mount.treeLock.Lock()
373	mount.mountInode = nil
374	node.mountPoint = nil
375
376	return fuse.OK
377}
378
379// FileNotify notifies the kernel that data and metadata of this inode
380// has changed.  After this call completes, the kernel will issue a
381// new GetAttr requests for metadata and new Read calls for content.
382// Use negative offset for metadata-only invalidation, and zero-length
383// for invalidating all content.
384func (c *FileSystemConnector) FileNotify(node *Inode, off int64, length int64) fuse.Status {
385	var nID uint64
386	if node == c.rootNode {
387		nID = fuse.FUSE_ROOT_ID
388	} else {
389		nID = c.inodeMap.Handle(&node.handled)
390	}
391
392	if nID == 0 {
393		return fuse.OK
394	}
395	return c.server.InodeNotify(nID, off, length)
396}
397
398// FileNotifyStoreCache notifies the kernel about changed data of the inode.
399//
400// This call is similar to FileNotify, but instead of only invalidating a data
401// region, it puts updated data directly to the kernel cache:
402//
403// After this call completes, the kernel has put updated data into the inode's cache,
404// and will use data from that cache for non direct-IO reads from the inode
405// in corresponding data region. After kernel's cache data is evicted, the kernel
406// will have to issue new Read calls on user request to get data content.
407//
408// ENOENT is returned if the kernel does not currently have entry for this
409// inode in its dentry cache.
410func (c *FileSystemConnector) FileNotifyStoreCache(node *Inode, off int64, data []byte) fuse.Status {
411	var nID uint64
412	if node == c.rootNode {
413		nID = fuse.FUSE_ROOT_ID
414	} else {
415		nID = c.inodeMap.Handle(&node.handled)
416	}
417
418	if nID == 0 {
419		// the kernel does not currently know about this inode.
420		return fuse.ENOENT
421	}
422	return c.server.InodeNotifyStoreCache(nID, off, data)
423}
424
425// FileRetrieveCache retrieves data from kernel's inode cache.
426//
427// This call retrieves data from kernel's inode cache @ offset and up to
428// len(dest) bytes. If kernel cache has fewer consecutive data starting at
429// offset, that fewer amount is returned. In particular if inode data at offset
430// is not cached (0, OK) is returned.
431//
432// If the kernel does not currently have entry for this inode in its dentry
433// cache (0, OK) is still returned, pretending that the inode could be known to
434// the kernel, but kernel's inode cache is empty.
435func (c *FileSystemConnector) FileRetrieveCache(node *Inode, off int64, dest []byte) (n int, st fuse.Status) {
436	var nID uint64
437	if node == c.rootNode {
438		nID = fuse.FUSE_ROOT_ID
439	} else {
440		nID = c.inodeMap.Handle(&node.handled)
441	}
442
443	if nID == 0 {
444		// the kernel does not currently know about this inode.
445		// -> we can pretend that its cache for the inode is empty.
446		return 0, fuse.OK
447	}
448	return c.server.InodeRetrieveCache(nID, off, dest)
449}
450
451// EntryNotify makes the kernel forget the entry data from the given
452// name from a directory.  After this call, the kernel will issue a
453// new lookup request for the given name when necessary. No filesystem
454// related locks should be held when calling this.
455func (c *FileSystemConnector) EntryNotify(node *Inode, name string) fuse.Status {
456	var nID uint64
457	if node == c.rootNode {
458		nID = fuse.FUSE_ROOT_ID
459	} else {
460		nID = c.inodeMap.Handle(&node.handled)
461	}
462
463	if nID == 0 {
464		return fuse.OK
465	}
466	return c.server.EntryNotify(nID, name)
467}
468
469// DeleteNotify signals to the kernel that the named entry in dir for
470// the child disappeared. No filesystem related locks should be held
471// when calling this.
472func (c *FileSystemConnector) DeleteNotify(dir *Inode, child *Inode, name string) fuse.Status {
473	var nID uint64
474
475	if dir == c.rootNode {
476		nID = fuse.FUSE_ROOT_ID
477	} else {
478		nID = c.inodeMap.Handle(&dir.handled)
479	}
480
481	if nID == 0 {
482		return fuse.OK
483	}
484
485	chId := c.inodeMap.Handle(&child.handled)
486
487	return c.server.DeleteNotify(nID, chId, name)
488}
489