1// Copyright 2016 Keybase Inc. All rights reserved.
2// Use of this source code is governed by a BSD
3// license that can be found in the LICENSE file.
4
5package libkbfs
6
7import (
8	"context"
9	"path/filepath"
10	"strings"
11	"sync"
12
13	"github.com/keybase/client/go/kbfs/ioutil"
14	"github.com/keybase/client/go/kbfs/kbfsblock"
15	"github.com/keybase/client/go/kbfs/kbfscodec"
16	"github.com/keybase/client/go/kbfs/kbfscrypto"
17	"github.com/keybase/go-codec/codec"
18	"github.com/pkg/errors"
19)
20
21// blockDiskStore stores block data in flat files on disk.
22//
23// The directory layout looks like:
24//
25// dir/0100/0...01/data
26// dir/0100/0...01/id
27// dir/0100/0...01/ksh
28// dir/0100/0...01/refs
29// ...
30// dir/01cc/5...55/id
31// dir/01cc/5...55/refs
32// ...
33// dir/01dd/6...66/data
34// dir/01dd/6...66/id
35// dir/01dd/6...66/ksh
36// ...
37// dir/01ff/f...ff/data
38// dir/01ff/f...ff/id
39// dir/01ff/f...ff/ksh
40// dir/01ff/f...ff/refs
41//
42// Each block has its own subdirectory with its ID truncated to 17
43// bytes (34 characters) as a name. The block subdirectories are
44// splayed over (# of possible hash types) * 256 subdirectories -- one
45// byte for the hash type (currently only one) plus the first byte of
46// the hash data -- using the first four characters of the name to
47// keep the number of directories in dir itself to a manageable
48// number, similar to git.
49//
50// Each block directory has the following files:
51//
52//   - id:   The full block ID in binary format. Always present.
53//   - data: The raw block data that should hash to the block ID.
54//           May be missing.
55//   - ksh:  The raw data for the associated key server half.
56//           May be missing, but should be present when data is.
57//   - refs: The list of references to the block, along with other
58//           block-specific info, encoded as a serialized
59//           blockJournalInfo. May be missing.  TODO: rename this to
60//           something more generic if we ever upgrade the journal
61//           version.
62//
63// Future versions of the disk store might add more files to this
64// directory; if any code is written to move blocks around, it should
65// be careful to preserve any unknown files in a block directory.
66//
67// The maximum number of characters added to the root dir by a block
68// disk store is 44:
69//
70//   /01ff/f...(30 characters total)...ff/data
71//
72// blockDiskStore is goroutine-safe on a per-operation, per-block ID
73// basis, so any code that uses it can make concurrent calls.
74// However, it's likely that much of the time, the caller will require
75// consistency across multiple calls (i.e., one call to
76// `getDataSize()`, followed by a call to `remove()`), so higher-level
77// locking is also recommended.  For performance reasons though, it's
78// recommended that the `put()` method can be called concurrently, as
79// needed.
80type blockDiskStore struct {
81	codec kbfscodec.Codec
82	dir   string
83
84	lock sync.Mutex
85	puts map[kbfsblock.ID]<-chan struct{}
86}
87
88// filesPerBlockMax is an upper bound for the number of files
89// (including directories) to store one block: 4 for the regular
90// files, 2 for the (splayed) directories, and 1 for the journal
91// entry.
92const filesPerBlockMax = 7
93
94// makeBlockDiskStore returns a new blockDiskStore for the given
95// directory.
96func makeBlockDiskStore(codec kbfscodec.Codec, dir string) *blockDiskStore {
97	return &blockDiskStore{
98		codec: codec,
99		dir:   dir,
100		puts:  make(map[kbfsblock.ID]<-chan struct{}),
101	}
102}
103
104// The functions below are for building various paths.
105
106func (s *blockDiskStore) blockPath(id kbfsblock.ID) string {
107	// Truncate to 34 characters, which corresponds to 16 random
108	// bytes (since the first byte is a hash type) or 128 random
109	// bits, which means that the expected number of blocks
110	// generated before getting a path collision is 2^64 (see
111	// https://en.wikipedia.org/wiki/Birthday_problem#Cast_as_a_collision_problem
112	// ).
113	idStr := id.String()
114	return filepath.Join(s.dir, idStr[:4], idStr[4:34])
115}
116
117func (s *blockDiskStore) dataPath(id kbfsblock.ID) string {
118	return filepath.Join(s.blockPath(id), "data")
119}
120
121const idFilename = "id"
122
123func (s *blockDiskStore) idPath(id kbfsblock.ID) string {
124	return filepath.Join(s.blockPath(id), idFilename)
125}
126
127func (s *blockDiskStore) keyServerHalfPath(id kbfsblock.ID) string {
128	return filepath.Join(s.blockPath(id), "ksh")
129}
130
131func (s *blockDiskStore) infoPath(id kbfsblock.ID) string {
132	// TODO: change the file name to "info" the next we change the
133	// journal layout.
134	return filepath.Join(s.blockPath(id), "refs")
135}
136
137// makeDir makes the directory for the given block ID and writes the
138// ID file, if necessary.
139func (s *blockDiskStore) makeDir(id kbfsblock.ID) error {
140	err := ioutil.MkdirAll(s.blockPath(id), 0700)
141	if err != nil {
142		return err
143	}
144
145	// TODO: Only write if the file doesn't exist.
146
147	return ioutil.WriteFile(s.idPath(id), []byte(id.String()), 0600)
148}
149
150// blockJournalInfo contains info about a particular block in the
151// journal, such as the set of references to it.
152type blockJournalInfo struct {
153	Refs    blockRefMap
154	Flushed bool `codec:"f,omitempty"`
155
156	codec.UnknownFieldSetHandler
157}
158
159// TODO: Add caching for refs
160
161func (s *blockDiskStore) startOpOrWait(id kbfsblock.ID) (
162	closeCh chan<- struct{}, waitCh <-chan struct{}) {
163	s.lock.Lock()
164	defer s.lock.Unlock()
165
166	waitCh = s.puts[id]
167	if waitCh == nil {
168		// If this caller is getting exclusive access to this `id`,
169		// make a channel, store it as receive-only in `puts`, and
170		// return it as send-only to the caller, to ensure that only
171		// this caller can finish the exclusive access.
172		ch := make(chan struct{})
173		s.puts[id] = ch
174		closeCh = ch
175	}
176
177	return closeCh, waitCh
178}
179
180func (s *blockDiskStore) finishOp(id kbfsblock.ID, closeCh chan<- struct{}) {
181	s.lock.Lock()
182	defer s.lock.Unlock()
183	delete(s.puts, id)
184	close(closeCh)
185}
186
187func (s *blockDiskStore) exclusify(ctx context.Context, id kbfsblock.ID) (
188	cleanupFn func(), err error) {
189	// Get a guarantee that we're acting exclusively on this
190	// particular block ID.
191	for {
192		// Repeatedly request exclusive access until until we don't
193		// get a non-nil channel to wait on.
194		closeCh, waitCh := s.startOpOrWait(id)
195		if waitCh == nil {
196			// If there's nothing to wait on, then we have exclusive
197			// access, so return.
198			return func() { s.finishOp(id, closeCh) }, nil
199		}
200		select {
201		case <-waitCh:
202		case <-ctx.Done():
203			return nil, errors.WithStack(ctx.Err())
204		}
205	}
206}
207
208// getRefInfo returns the references for the given ID.  exclusify must
209// have been called by the caller.
210func (s *blockDiskStore) getInfo(id kbfsblock.ID) (blockJournalInfo, error) {
211	var info blockJournalInfo
212	err := kbfscodec.DeserializeFromFile(s.codec, s.infoPath(id), &info)
213	if !ioutil.IsNotExist(err) && err != nil {
214		return blockJournalInfo{}, err
215	}
216
217	if info.Refs == nil {
218		info.Refs = make(blockRefMap)
219	}
220
221	return info, nil
222}
223
224// putRefInfo stores the given references for the given ID.  exclusify
225// must have been called by the caller.
226func (s *blockDiskStore) putInfo(
227	id kbfsblock.ID, info blockJournalInfo) error {
228	return kbfscodec.SerializeToFile(s.codec, info, s.infoPath(id))
229}
230
231// addRefs adds references for the given contexts to the given ID, all
232// with the same status and tag.  `exclusify` must be called by the
233// caller.
234func (s *blockDiskStore) addRefsExclusive(
235	id kbfsblock.ID, contexts []kbfsblock.Context, status blockRefStatus,
236	tag string) error {
237	info, err := s.getInfo(id)
238	if err != nil {
239		return err
240	}
241
242	if len(info.Refs) > 0 {
243		// Check existing contexts, if any.
244		for _, context := range contexts {
245			_, _, err := info.Refs.checkExists(context)
246			if err != nil {
247				return err
248			}
249		}
250	}
251
252	for _, context := range contexts {
253		err = info.Refs.put(context, status, tag)
254		if err != nil {
255			return err
256		}
257	}
258
259	return s.putInfo(id, info)
260}
261
262// getData returns the data and server half for the given ID, if
263// present.  `exclusify` must be called by the caller.
264func (s *blockDiskStore) getDataExclusive(id kbfsblock.ID) (
265	[]byte, kbfscrypto.BlockCryptKeyServerHalf, error) {
266	data, err := ioutil.ReadFile(s.dataPath(id))
267	if ioutil.IsNotExist(err) {
268		return nil, kbfscrypto.BlockCryptKeyServerHalf{},
269			blockNonExistentError{id}
270	} else if err != nil {
271		return nil, kbfscrypto.BlockCryptKeyServerHalf{}, err
272	}
273
274	keyServerHalfPath := s.keyServerHalfPath(id)
275	buf, err := ioutil.ReadFile(keyServerHalfPath)
276	if ioutil.IsNotExist(err) {
277		return nil, kbfscrypto.BlockCryptKeyServerHalf{},
278			blockNonExistentError{id}
279	} else if err != nil {
280		return nil, kbfscrypto.BlockCryptKeyServerHalf{}, err
281	}
282
283	// Check integrity.
284
285	err = verifyLocalBlockIDMaybe(data, id)
286	if err != nil {
287		return nil, kbfscrypto.BlockCryptKeyServerHalf{}, err
288	}
289
290	var serverHalf kbfscrypto.BlockCryptKeyServerHalf
291	err = serverHalf.UnmarshalBinary(buf)
292	if err != nil {
293		return nil, kbfscrypto.BlockCryptKeyServerHalf{}, err
294	}
295
296	return data, serverHalf, nil
297}
298
299// getData returns the data and server half for the given ID, if
300// present.
301func (s *blockDiskStore) getData(ctx context.Context, id kbfsblock.ID) (
302	[]byte, kbfscrypto.BlockCryptKeyServerHalf, error) {
303	cleanup, err := s.exclusify(ctx, id)
304	if err != nil {
305		return nil, kbfscrypto.BlockCryptKeyServerHalf{}, err
306	}
307	defer cleanup()
308
309	return s.getDataExclusive(id)
310}
311
312// All functions below are public functions.
313
314func (s *blockDiskStore) hasAnyRefExclusive(
315	id kbfsblock.ID) (bool, error) {
316	info, err := s.getInfo(id)
317	if err != nil {
318		return false, err
319	}
320
321	return len(info.Refs) > 0, nil
322}
323
324func (s *blockDiskStore) hasAnyRef(
325	ctx context.Context, id kbfsblock.ID) (bool, error) {
326	cleanup, err := s.exclusify(ctx, id)
327	if err != nil {
328		return false, err
329	}
330	defer cleanup()
331
332	return s.hasAnyRefExclusive(id)
333}
334
335func (s *blockDiskStore) hasNonArchivedRef(
336	ctx context.Context, id kbfsblock.ID) (bool, error) {
337	cleanup, err := s.exclusify(ctx, id)
338	if err != nil {
339		return false, err
340	}
341	defer cleanup()
342
343	info, err := s.getInfo(id)
344	if err != nil {
345		return false, err
346	}
347
348	return info.Refs.hasNonArchivedRef(), nil
349}
350
351func (s *blockDiskStore) getLiveCount(
352	ctx context.Context, id kbfsblock.ID) (int, error) {
353	cleanup, err := s.exclusify(ctx, id)
354	if err != nil {
355		return 0, err
356	}
357	defer cleanup()
358
359	info, err := s.getInfo(id)
360	if err != nil {
361		return 0, err
362	}
363
364	return info.Refs.getLiveCount(), nil
365}
366
367func (s *blockDiskStore) hasContextExclusive(
368	id kbfsblock.ID, context kbfsblock.Context) (
369	bool, blockRefStatus, error) {
370	info, err := s.getInfo(id)
371	if err != nil {
372		return false, unknownBlockRef, err
373	}
374
375	return info.Refs.checkExists(context)
376}
377
378func (s *blockDiskStore) hasContext(
379	ctx context.Context, id kbfsblock.ID, context kbfsblock.Context) (
380	bool, blockRefStatus, error) {
381	cleanup, err := s.exclusify(ctx, id)
382	if err != nil {
383		return false, unknownBlockRef, err
384	}
385	defer cleanup()
386
387	return s.hasContextExclusive(id, context)
388}
389
390func (s *blockDiskStore) hasDataExclusive(id kbfsblock.ID) (bool, error) {
391	_, err := ioutil.Stat(s.dataPath(id))
392	if ioutil.IsNotExist(err) {
393		return false, nil
394	} else if err != nil {
395		return false, err
396	}
397	return true, nil
398}
399
400func (s *blockDiskStore) hasData(
401	ctx context.Context, id kbfsblock.ID) (bool, error) {
402	cleanup, err := s.exclusify(ctx, id)
403	if err != nil {
404		return false, err
405	}
406	defer cleanup()
407
408	_, err = ioutil.Stat(s.dataPath(id))
409	if ioutil.IsNotExist(err) {
410		return false, nil
411	} else if err != nil {
412		return false, err
413	}
414	return true, nil
415}
416
417func (s *blockDiskStore) isUnflushed(
418	ctx context.Context, id kbfsblock.ID) (bool, error) {
419	cleanup, err := s.exclusify(ctx, id)
420	if err != nil {
421		return false, err
422	}
423	defer cleanup()
424
425	ok, err := s.hasDataExclusive(id)
426	if err != nil {
427		return false, err
428	}
429
430	if !ok {
431		return false, nil
432	}
433
434	// The data is there; has it been flushed?
435	info, err := s.getInfo(id)
436	if err != nil {
437		return false, err
438	}
439
440	return !info.Flushed, nil
441}
442
443func (s *blockDiskStore) markFlushed(
444	ctx context.Context, id kbfsblock.ID) error {
445	cleanup, err := s.exclusify(ctx, id)
446	if err != nil {
447		return err
448	}
449	defer cleanup()
450
451	info, err := s.getInfo(id)
452	if err != nil {
453		return err
454	}
455
456	info.Flushed = true
457	return s.putInfo(id, info)
458}
459
460func (s *blockDiskStore) getDataSize(
461	ctx context.Context, id kbfsblock.ID) (int64, error) {
462	cleanup, err := s.exclusify(ctx, id)
463	if err != nil {
464		return 0, err
465	}
466	defer cleanup()
467
468	fi, err := ioutil.Stat(s.dataPath(id))
469	if ioutil.IsNotExist(err) {
470		return 0, nil
471	} else if err != nil {
472		return 0, err
473	}
474	return fi.Size(), nil
475}
476
477func (s *blockDiskStore) getDataWithContextExclusive(
478	id kbfsblock.ID, context kbfsblock.Context) (
479	[]byte, kbfscrypto.BlockCryptKeyServerHalf, error) {
480	hasContext, _, err := s.hasContextExclusive(id, context)
481	if err != nil {
482		return nil, kbfscrypto.BlockCryptKeyServerHalf{}, err
483	}
484	if !hasContext {
485		return nil, kbfscrypto.BlockCryptKeyServerHalf{},
486			blockNonExistentError{id}
487	}
488
489	return s.getDataExclusive(id)
490}
491
492func (s *blockDiskStore) getDataWithContext(
493	ctx context.Context, id kbfsblock.ID, context kbfsblock.Context) (
494	[]byte, kbfscrypto.BlockCryptKeyServerHalf, error) {
495	cleanup, err := s.exclusify(ctx, id)
496	if err != nil {
497		return nil, kbfscrypto.BlockCryptKeyServerHalf{}, err
498	}
499	defer cleanup()
500
501	return s.getDataWithContextExclusive(id, context)
502}
503
504func (s *blockDiskStore) getAllRefsForTest() (map[kbfsblock.ID]blockRefMap, error) {
505	res := make(map[kbfsblock.ID]blockRefMap)
506
507	fileInfos, err := ioutil.ReadDir(s.dir)
508	if ioutil.IsNotExist(err) {
509		return res, nil
510	} else if err != nil {
511		return nil, err
512	}
513
514	for _, fi := range fileInfos {
515		name := fi.Name()
516		if !fi.IsDir() {
517			return nil, errors.Errorf("Unexpected non-dir %q", name)
518		}
519
520		subFileInfos, err := ioutil.ReadDir(filepath.Join(s.dir, name))
521		if err != nil {
522			return nil, err
523		}
524
525		for _, sfi := range subFileInfos {
526			subName := sfi.Name()
527			if !sfi.IsDir() {
528				return nil, errors.Errorf("Unexpected non-dir %q",
529					subName)
530			}
531
532			idPath := filepath.Join(
533				s.dir, name, subName, idFilename)
534			idBytes, err := ioutil.ReadFile(idPath)
535			if err != nil {
536				return nil, err
537			}
538
539			id, err := kbfsblock.IDFromString(string(idBytes))
540			if err != nil {
541				return nil, errors.WithStack(err)
542			}
543
544			if !strings.HasPrefix(id.String(), name+subName) {
545				return nil, errors.Errorf(
546					"%q unexpectedly not a prefix of %q",
547					name+subName, id.String())
548			}
549
550			info, err := s.getInfo(id)
551			if err != nil {
552				return nil, err
553			}
554
555			if len(info.Refs) > 0 {
556				res[id] = info.Refs
557			}
558		}
559	}
560
561	return res, nil
562}
563
564// put puts the given data for the block, which may already exist, and
565// adds a reference for the given context. If isRegularPut is true,
566// additional validity checks are performed.  If err is nil, putData
567// indicates whether the data didn't already exist and was put; if
568// false, it means that the data already exists, but this might have
569// added a new ref.
570//
571// For performance reasons, this method can be called concurrently by
572// many goroutines for different blocks.
573//
574// Note that the block won't be get-able until `addReference`
575// explicitly adds a tag for it.
576func (s *blockDiskStore) put(
577	ctx context.Context, isRegularPut bool, id kbfsblock.ID,
578	context kbfsblock.Context, buf []byte,
579	serverHalf kbfscrypto.BlockCryptKeyServerHalf) (
580	putData bool, err error) {
581	cleanup, err := s.exclusify(ctx, id)
582	if err != nil {
583		return false, err
584	}
585	defer cleanup()
586
587	err = validateBlockPut(isRegularPut, id, context, buf)
588	if err != nil {
589		return false, err
590	}
591
592	// Check the data and retrieve the server half, if they exist.
593	_, existingServerHalf, err := s.getDataWithContextExclusive(id, context)
594	var exists bool
595	switch err.(type) {
596	case blockNonExistentError:
597		exists = false
598	case nil:
599		exists = true
600	default:
601		return false, err
602	}
603
604	if exists {
605		// If the entry already exists, everything should be
606		// the same, except for possibly additional
607		// references.
608
609		// We checked that both buf and the existing data hash
610		// to id, so no need to check that they're both equal.
611
612		if isRegularPut && existingServerHalf != serverHalf {
613			return false, errors.Errorf(
614				"key server half mismatch: expected %s, got %s",
615				existingServerHalf, serverHalf)
616		}
617	} else {
618		err = s.makeDir(id)
619		if err != nil {
620			return false, err
621		}
622
623		err = ioutil.WriteFile(s.dataPath(id), buf, 0600)
624		if err != nil {
625			return false, err
626		}
627
628		// TODO: Add integrity-checking for key server half?
629
630		data, err := serverHalf.MarshalBinary()
631		if err != nil {
632			return false, err
633		}
634		err = ioutil.WriteFile(s.keyServerHalfPath(id), data, 0600)
635		if err != nil {
636			return false, err
637		}
638	}
639
640	return !exists, nil
641}
642
643func (s *blockDiskStore) addReference(
644	ctx context.Context, id kbfsblock.ID, context kbfsblock.Context,
645	tag string) error {
646	cleanup, err := s.exclusify(ctx, id)
647	if err != nil {
648		return err
649	}
650	defer cleanup()
651
652	err = s.makeDir(id)
653	if err != nil {
654		return err
655	}
656
657	return s.addRefsExclusive(
658		id, []kbfsblock.Context{context}, liveBlockRef, tag)
659}
660
661func (s *blockDiskStore) archiveReference(
662	ctx context.Context, id kbfsblock.ID, idContexts []kbfsblock.Context,
663	tag string) error {
664	cleanup, err := s.exclusify(ctx, id)
665	if err != nil {
666		return err
667	}
668	defer cleanup()
669
670	err = s.makeDir(id)
671	if err != nil {
672		return err
673	}
674
675	return s.addRefsExclusive(id, idContexts, archivedBlockRef, tag)
676}
677
678func (s *blockDiskStore) archiveReferences(
679	ctx context.Context, contexts kbfsblock.ContextMap, tag string) error {
680	for id, idContexts := range contexts {
681		err := s.archiveReference(ctx, id, idContexts, tag)
682		if err != nil {
683			return err
684		}
685	}
686
687	return nil
688}
689
690// removeReferences removes references for the given contexts from
691// their respective IDs. If tag is non-empty, then a reference will be
692// removed only if its most recent tag (passed in to addRefs) matches
693// the given one.
694func (s *blockDiskStore) removeReferences(
695	ctx context.Context, id kbfsblock.ID, contexts []kbfsblock.Context,
696	tag string) (liveCount int, err error) {
697	cleanup, err := s.exclusify(ctx, id)
698	if err != nil {
699		return 0, err
700	}
701	defer cleanup()
702
703	info, err := s.getInfo(id)
704	if err != nil {
705		return 0, err
706	}
707	if len(info.Refs) == 0 {
708		return 0, nil
709	}
710
711	for _, context := range contexts {
712		err := info.Refs.remove(context, tag)
713		if err != nil {
714			return 0, err
715		}
716		if len(info.Refs) == 0 {
717			break
718		}
719	}
720
721	err = s.putInfo(id, info)
722	if err != nil {
723		return 0, err
724	}
725
726	return len(info.Refs), nil
727}
728
729// remove removes any existing data for the given ID, which must not
730// have any references left.
731func (s *blockDiskStore) remove(ctx context.Context, id kbfsblock.ID) error {
732	cleanup, err := s.exclusify(ctx, id)
733	if err != nil {
734		return err
735	}
736	defer cleanup()
737
738	hasAnyRef, err := s.hasAnyRefExclusive(id)
739	if err != nil {
740		return err
741	}
742	if hasAnyRef {
743		return errors.Errorf(
744			"Trying to remove data for referenced block %s", id)
745	}
746	path := s.blockPath(id)
747
748	err = ioutil.RemoveAll(path)
749	if err != nil {
750		return err
751	}
752
753	// Remove the parent (splayed) directory if it exists and is
754	// empty.
755	err = ioutil.Remove(filepath.Dir(path))
756	if ioutil.IsNotExist(err) || ioutil.IsExist(err) {
757		err = nil
758	}
759	return err
760}
761
762func (s *blockDiskStore) clear() error {
763	return ioutil.RemoveAll(s.dir)
764}
765