1// Copyright 2017 Keybase Inc. All rights reserved.
2// Use of this source code is governed by a BSD
3// license that can be found in the LICENSE file.
4
5package libkbfs
6
7import (
8	"context"
9	"fmt"
10	"sync"
11	"time"
12
13	"github.com/keybase/client/go/kbfs/data"
14	"github.com/keybase/client/go/kbfs/kbfssync"
15	"github.com/keybase/client/go/kbfs/tlf"
16	"github.com/keybase/client/go/logger"
17)
18
19/*
20
21 This file defines a finite state machine (FSM) for rekey operation scheduling.
22 The state chart is described in following dot graph:
23
24digraph rekeyFSM {
25  graph [rankdir=LR]
26  start [shape=plaintext]
27
28  Idle -> Idle [label="*"]
29  Scheduled -> Scheduled [label="*"]
30  Started -> Started [label="*"]
31
32  start -> Idle
33  Idle -> Scheduled [label=Request]
34  Scheduled -> Scheduled [label="Request,RekeyNotNeeded"]
35  Scheduled -> Started [label=Timeup]
36  Started -> Scheduled [label="Finished(TTL valid && (rekey done || needs paper))"]
37  Started -> Idle [label="Finished (*)"]
38}
39
40*/
41
42// CtxRekeyTagKey is the type used for unique context tags within an
43// enqueued Rekey.
44type CtxRekeyTagKey int
45
46const (
47	// CtxRekeyIDKey is the type of the tag for unique operation IDs
48	// within an enqueued Rekey.
49	CtxRekeyIDKey CtxRekeyTagKey = iota
50)
51
52// CtxRekeyOpID is the display name for the unique operation
53// enqueued rekey ID tag.
54const CtxRekeyOpID = "REKEYID"
55
56type rekeyEventType int
57
58const (
59	_ rekeyEventType = iota
60	rekeyRequestEvent
61	rekeyFinishedEvent
62	rekeyTimeupEvent
63	rekeyNotNeededEvent
64	rekeyKickoffEvent
65
66	rekeyShutdownEvent
67
68	rekeyCancelEventForTest
69)
70
71func (e rekeyEventType) String() string {
72	switch e {
73	case rekeyRequestEvent:
74		return "rekeyRequestEvent"
75	case rekeyFinishedEvent:
76		return "rekeyFinishedEvent"
77	case rekeyTimeupEvent:
78		return "rekeyTimeupEvent"
79	case rekeyNotNeededEvent:
80		return "rekeyNotNeededEvent"
81	case rekeyShutdownEvent:
82		return "rekeyShutdownEvent"
83	case rekeyKickoffEvent:
84		return "rekeyKickoffEvent"
85	case rekeyCancelEventForTest:
86		return "rekeyCancelEventForTest"
87	default:
88		return "unknown"
89	}
90}
91
92// rekeyTask describes a rekey task.
93type rekeyTask struct {
94	// timeout, if non-nil, causes rekey to fail if it takes more than this
95	// duration since it enters rekeyStateStarted.
96	timeout     *time.Duration
97	ttl         int
98	promptPaper bool
99
100	ctx *protectedContext
101}
102
103// rekeyRequest describes a rekey request.
104type rekeyRequest struct {
105	// delay is the duration to wait for since the request enters the FSM until
106	// starting the rekey.
107	delay time.Duration
108	rekeyTask
109}
110
111// rekeyFinished describes a rekeyFinishedEvent. It contains results from an
112// actual rekey operation.
113type rekeyFinished struct {
114	RekeyResult
115	err error
116}
117
118// RekeyEvent describes an event to send into the RekeyFSM. A function, e.g.,
119// NewRekeyRequestEvent, should be used to construct one.
120type RekeyEvent struct {
121	eventType rekeyEventType
122	request   *rekeyRequest
123	finished  *rekeyFinished
124}
125
126func (e RekeyEvent) String() string {
127	switch e.eventType {
128	case rekeyRequestEvent:
129		return fmt.Sprintf("%s [%#+v]", e.eventType, e.request)
130	case rekeyFinishedEvent:
131		return fmt.Sprintf("%s [%#+v]", e.eventType, e.finished)
132	default:
133		return e.eventType.String()
134	}
135}
136
137func newRekeyRequestEvent(req rekeyRequest) RekeyEvent {
138	return RekeyEvent{
139		eventType: rekeyRequestEvent,
140		request:   &req,
141	}
142}
143
144func newRekeyRequestEventWithContext(ctx context.Context) RekeyEvent {
145	return newRekeyRequestEvent(rekeyRequest{
146		delay: 0,
147		rekeyTask: rekeyTask{
148			timeout:     nil,
149			promptPaper: false,
150			ttl:         rekeyInitialTTL,
151			ctx:         newProtectedContext(ctx, nil),
152		},
153	})
154}
155
156// NewRekeyRequestWithPaperPromptEvent creates a non-delayed rekey request
157// Event that causes a paper prompt.
158func NewRekeyRequestWithPaperPromptEvent() RekeyEvent {
159	e := NewRekeyRequestEvent()
160	d := rekeyWithPromptWaitTimeDefault
161	e.request.promptPaper = true
162	e.request.timeout = &d
163	return e
164}
165
166// NewRekeyRequestEvent creates a non-delayed rekey request Event.
167func NewRekeyRequestEvent() RekeyEvent {
168	return newRekeyRequestEventWithContext(CtxWithRandomIDReplayable(
169		context.Background(), CtxRekeyIDKey, CtxRekeyOpID, nil))
170}
171
172// NewRekeyNotNeededEvent creates a rekeyNotNeededEvent typed event. If the FSM
173// is in rekeyStateScheduled, this causes FSM to unset paperkey prompt. In
174// other states nothing happens. This event is sent to the FSM when we see a MD
175// update with rekey flag unset. It can be an indication that an old
176// outstanding rekey request has been served by another device, or just a
177// regular rekey updates.
178func NewRekeyNotNeededEvent() RekeyEvent {
179	return RekeyEvent{
180		eventType: rekeyNotNeededEvent,
181	}
182}
183
184func newRekeyFinishedEvent(res RekeyResult, err error) RekeyEvent {
185	return RekeyEvent{
186		eventType: rekeyFinishedEvent,
187		finished: &rekeyFinished{
188			RekeyResult: res,
189			err:         err,
190		},
191	}
192}
193
194func newRekeyTimeupEvent() RekeyEvent {
195	return RekeyEvent{
196		eventType: rekeyTimeupEvent,
197	}
198}
199
200func newRekeyShutdownEvent() RekeyEvent {
201	return RekeyEvent{
202		eventType: rekeyShutdownEvent,
203	}
204}
205
206func newRekeyKickoffEvent() RekeyEvent {
207	return RekeyEvent{
208		eventType: rekeyKickoffEvent,
209	}
210}
211
212func newRekeyCancelEventForTest() RekeyEvent {
213	return RekeyEvent{
214		eventType: rekeyCancelEventForTest,
215	}
216}
217
218// rekeyState models a state in the FSM. rekeyFSM keeps exactly one instance of
219// rekeyState at any given time.
220type rekeyState interface {
221	// reactToEvent defines how this state reacts to an event. Implementations of
222	// rekeyState should handle necessary transition actions in reactToEvent(),
223	// and return a new rekeyState instance after transition is finished.
224	// rekeyFSM sends event to the rekeyState instance it holds whenever it
225	// receives an event, and use the returned rekeyState instance as new state.
226	// It's OK to return the receiver itself as "new" state.
227	//
228	// rekeyFSM runs an event loop in a dedicated goroutine that calls
229	// reactToEvent and updates states. In other words, it's safe to assume
230	// reactToEvent is only called within the same goroutine, and that it's
231	// impossible that multiple reactToEvent calls are issued concurrently.
232	reactToEvent(event RekeyEvent) rekeyState
233}
234
235type rekeyStateIdle struct {
236	fsm *rekeyFSM
237}
238
239func newRekeyStateIdle(fsm *rekeyFSM) *rekeyStateIdle {
240	return &rekeyStateIdle{fsm: fsm}
241}
242
243func (r *rekeyStateIdle) reactToEvent(event RekeyEvent) rekeyState {
244	switch event.eventType {
245	case rekeyRequestEvent:
246		return newRekeyStateScheduled(r.fsm,
247			event.request.delay, event.request.rekeyTask)
248	default:
249		return r
250	}
251}
252
253type rekeyStateScheduled struct {
254	fsm *rekeyFSM
255
256	timer    *time.Timer
257	deadline time.Time
258
259	task rekeyTask
260}
261
262func newRekeyStateScheduled(
263	fsm *rekeyFSM, delay time.Duration, task rekeyTask) *rekeyStateScheduled {
264	task.ctx.setLogger(fsm.log)
265	return &rekeyStateScheduled{
266		fsm: fsm,
267		timer: time.AfterFunc(delay, func() {
268			fsm.Event(newRekeyTimeupEvent())
269		}),
270		deadline: time.Now().Add(delay),
271		task:     task,
272	}
273}
274
275func (r *rekeyStateScheduled) reactToEvent(event RekeyEvent) rekeyState {
276	switch event.eventType {
277	case rekeyTimeupEvent:
278		// This blocks (which inheritently blocks the entire FSM) if too many
279		// are active.
280		if r.fsm.fbo.config.GetRekeyFSMLimiter().WaitToStart(
281			r.task.ctx.context()) != nil {
282			return r
283		}
284		return newRekeyStateStarted(r.fsm, r.task)
285	case rekeyRequestEvent:
286		if r.task.promptPaper && !event.request.promptPaper {
287			// KBFS-2251: If fbo concludes that paper key would be needed in
288			// order for rekey to proceed, it writes a MD to mdserver with
289			// rekey set at the same time. To prevent the FSM from being kicked
290			// of to rekeyStateStarted right away after receiving this update
291			// (through FoldersNeedRekey) from mdserver, we just reuse the same
292			// timer if r.task.promptPaper is set.
293			//
294			// If the request has promptPaper set, then it's from the KBFS
295			// client, likely due to a read request. In this case, we should
296			// shorten the wait timer according the the request.
297			r.fsm.log.CDebugf(r.task.ctx.context(), "Reusing existing timer "+
298				"without possibly shortening due to r.task.promptPaper==true")
299			return r
300		}
301
302		task := r.task
303		task.promptPaper = task.promptPaper || event.request.promptPaper
304		if task.timeout == nil {
305			task.timeout = event.request.timeout
306		}
307		task.ttl = event.request.ttl
308		task.ctx.maybeReplaceContext(event.request.ctx.context())
309		if !r.deadline.After(time.Now().Add(event.request.delay)) {
310			r.fsm.log.CDebugf(task.ctx.context(), "Reusing existing timer")
311			r.task = task
312			return r
313		}
314		r.timer.Stop()
315		return newRekeyStateScheduled(r.fsm, event.request.delay, task)
316	case rekeyNotNeededEvent:
317		// KBFS-2254: if another device finished rekey, we should unset the
318		// paperkey prompt so that if this other device goes offline before a
319		// third device triggers a rekey request, the timer can be preempted.
320		// What if the FoldersNeedRekey call comes in before this and we still
321		// miss the rekey request? Well now we also send a rekey request into
322		// the FSM on MD updates with rekey flag set. Since the MD updates are
323		// applied in order, and that FSM's state transition is
324		// single-goroutined, we are safe here.
325		r.task.promptPaper = false
326		return r
327	case rekeyKickoffEvent:
328		r.timer.Reset(time.Millisecond)
329		return r
330	case rekeyCancelEventForTest:
331		r.timer.Stop()
332		return newRekeyStateIdle(r.fsm)
333	case rekeyShutdownEvent:
334		r.timer.Stop()
335		return r
336	default:
337		return r
338	}
339}
340
341type rekeyStateStarted struct {
342	fsm  *rekeyFSM
343	task rekeyTask
344}
345
346func newRekeyStateStarted(fsm *rekeyFSM, task rekeyTask) *rekeyStateStarted {
347	ctx := task.ctx.context()
348	var cancel context.CancelFunc
349	if task.timeout != nil {
350		ctx, cancel = context.WithTimeout(task.ctx.context(), *task.timeout)
351	}
352	go func() {
353		defer fsm.fbo.config.GetRekeyFSMLimiter().Done()
354		if cancel != nil {
355			defer cancel()
356		}
357		fsm.log.CDebugf(ctx, "Processing rekey for %s", fsm.fbo.folderBranch.Tlf)
358		var res RekeyResult
359		err := fsm.fbo.doMDWriteWithRetryUnlessCanceled(ctx,
360			func(lState *kbfssync.LockState) (err error) {
361				res, err = fsm.fbo.rekeyLocked(ctx, lState, task.promptPaper)
362				return err
363			})
364		fsm.log.CDebugf(ctx, "Rekey finished with res=%#+v, error=%v", res, err)
365		fsm.Event(newRekeyFinishedEvent(res, err))
366	}()
367	return &rekeyStateStarted{
368		fsm:  fsm,
369		task: task,
370	}
371}
372
373func (r *rekeyStateStarted) reactToEvent(event RekeyEvent) rekeyState {
374	switch event.eventType {
375	case rekeyFinishedEvent:
376		ttl := r.task.ttl - 1
377		r.fsm.log.CDebugf(r.task.ctx.context(),
378			"Rekey finished, ttl: %d -> %d", r.task.ttl, ttl)
379
380		if ttl <= 0 {
381			r.fsm.log.CDebugf(r.task.ctx.context(),
382				"Not scheduling new rekey because TTL expired")
383			return newRekeyStateIdle(r.fsm)
384		}
385
386		switch event.finished.err {
387		case nil:
388		default:
389			r.fsm.log.CDebugf(r.task.ctx.context(),
390				"Rekey errored; scheduling new rekey in %s", rekeyRecheckInterval)
391			return newRekeyStateScheduled(r.fsm, rekeyRecheckInterval, rekeyTask{
392				timeout:     r.task.timeout,
393				promptPaper: r.task.promptPaper,
394				ttl:         ttl,
395				ctx:         r.task.ctx,
396			})
397		}
398
399		d := r.fsm.fbo.config.RekeyWithPromptWaitTime()
400		if event.finished.NeedsPaperKey {
401			r.fsm.log.CDebugf(r.task.ctx.context(),
402				"Scheduling rekey due to NeedsPaperKey==true")
403			return newRekeyStateScheduled(r.fsm, d, rekeyTask{
404				timeout:     &d,
405				promptPaper: true,
406				ttl:         ttl,
407				ctx:         r.task.ctx,
408			})
409		}
410
411		if event.finished.DidRekey {
412			// We enqueue the rekey here again, in case we missed a device due to a
413			// race condition. This is specifically for the situation where user
414			// provisions two devices in a row, and the key update for the 2nd device
415			// only comes in after rekey for a TLF is done, which didn't include the
416			// second device. At this point, there wouldn't be a new MD with rekey
417			// bit set since it's already set. As a result, the TLF won't get rekeyed
418			// for the second device until the next 1-hour timer triggers another
419			// scan.
420			r.fsm.log.CDebugf(r.task.ctx.context(),
421				"Scheduling rekey (recheck) due to DidRekey==true")
422			return newRekeyStateScheduled(r.fsm, rekeyRecheckInterval, rekeyTask{
423				timeout:     nil,
424				promptPaper: false,
425				ttl:         ttl,
426				ctx:         r.task.ctx,
427			})
428		}
429
430		r.fsm.log.CDebugf(r.task.ctx.context(),
431			"Not scheduling rekey because no more rekeys or rechecks are needed")
432		return newRekeyStateIdle(r.fsm)
433	default:
434		return r
435	}
436}
437
438type rekeyFSMListener struct {
439	repeatedly bool
440	onEvent    func(RekeyEvent)
441}
442
443type rekeyFSM struct {
444	shutdownCh chan struct{}
445	reqs       chan RekeyEvent
446
447	fbo *folderBranchOps
448	log logger.Logger
449
450	current rekeyState
451
452	muListeners sync.Mutex
453	listeners   map[rekeyEventType][]rekeyFSMListener
454}
455
456// NewRekeyFSM creates a new rekey FSM.
457func NewRekeyFSM(fbo *folderBranchOps) RekeyFSM {
458	fsm := &rekeyFSM{
459		reqs:       make(chan RekeyEvent, fbo.config.Mode().RekeyQueueSize()),
460		shutdownCh: make(chan struct{}),
461		fbo:        fbo,
462		log:        fbo.config.MakeLogger("RekeyFSM"),
463
464		listeners: make(map[rekeyEventType][]rekeyFSMListener),
465	}
466	fsm.current = newRekeyStateIdle(fsm)
467	if fbo.bType == standard {
468		go fsm.loop()
469	}
470	return fsm
471}
472
473func (m *rekeyFSM) loop() {
474	reqs := m.reqs
475	for {
476		select {
477		case e := <-reqs:
478			next := m.current.reactToEvent(e)
479			if e.eventType == rekeyShutdownEvent {
480				// Set reqs to nil so on next iteration, we will skip any
481				// content in reqs. So if there are multiple
482				// rekeyShutdownEvent, we won't close m.shutdownCh multiple
483				// times.
484				reqs = nil
485				close(m.shutdownCh)
486			} else {
487				// Only log if we're not shutting down, otherwise `go vet`
488				// yells at us in tests.
489				m.log.Debug("RekeyFSM transition: %T + %s -> %T",
490					m.current, e, next)
491			}
492			m.current = next
493			m.triggerCallbacksForTest(e)
494
495		case <-m.shutdownCh:
496			return
497		}
498	}
499}
500
501// Event implements RekeyFSM interface for rekeyFSM.
502func (m *rekeyFSM) Event(event RekeyEvent) {
503	select {
504	case m.reqs <- event:
505	case <-m.shutdownCh:
506	}
507}
508
509// Shutdown implements RekeyFSM interface for rekeyFSM.
510func (m *rekeyFSM) Shutdown() {
511	m.Event(newRekeyShutdownEvent())
512}
513
514func (m *rekeyFSM) triggerCallbacksForTest(e RekeyEvent) {
515	var cbs []rekeyFSMListener
516	func() {
517		m.muListeners.Lock()
518		defer m.muListeners.Unlock()
519		cbs = m.listeners[e.eventType]
520		m.listeners[e.eventType] = nil
521		for _, cb := range cbs {
522			if cb.repeatedly {
523				m.listeners[e.eventType] = append(
524					m.listeners[e.eventType], cb)
525			}
526		}
527	}()
528	for _, cb := range cbs {
529		cb.onEvent(e)
530	}
531}
532
533// listenOnEvent implements RekeyFSM interface for rekeyFSM.
534func (m *rekeyFSM) listenOnEvent(
535	event rekeyEventType, callback func(RekeyEvent), repeatedly bool) {
536	m.muListeners.Lock()
537	defer m.muListeners.Unlock()
538	m.listeners[event] = append(m.listeners[event], rekeyFSMListener{
539		onEvent:    callback,
540		repeatedly: repeatedly,
541	})
542}
543
544func getRekeyFSM(ctx context.Context, ops KBFSOps, tlfID tlf.ID) RekeyFSM {
545	switch o := ops.(type) {
546	case *KBFSOpsStandard:
547		return o.getOpsNoAdd(
548			ctx, data.FolderBranch{
549				Tlf:    tlfID,
550				Branch: data.MasterBranch,
551			}).rekeyFSM
552	default:
553		panic("unknown KBFSOps")
554	}
555}
556
557// RequestRekeyAndWaitForOneFinishEvent sends a rekey request to the FSM
558// associated with tlfID, and wait for exact one rekeyFinished event. This can
559// be useful for waiting for a rekey result in tests.
560//
561// Note that the supplied ctx is injected to the rekey task, so canceling ctx
562// would actually cancel the rekey.
563//
564// Currently this is only used in tests and RekeyFile. Normal rekey activities
565// should go through the FSM asychronously.
566func RequestRekeyAndWaitForOneFinishEvent(ctx context.Context,
567	ops KBFSOps, tlfID tlf.ID) (res RekeyResult, err error) {
568	fsm := getRekeyFSM(ctx, ops, tlfID)
569	rekeyWaiter := make(chan struct{})
570	fsm.listenOnEvent(rekeyFinishedEvent, func(e RekeyEvent) {
571		res = e.finished.RekeyResult
572		err = e.finished.err
573		close(rekeyWaiter)
574	}, false)
575	fsm.Event(newRekeyRequestEventWithContext(ctx))
576	<-rekeyWaiter
577	return res, err
578}
579