1// Copyright 2017 Keybase Inc. All rights reserved. 2// Use of this source code is governed by a BSD 3// license that can be found in the LICENSE file. 4 5package libkbfs 6 7import ( 8 "context" 9 "fmt" 10 "sync" 11 "time" 12 13 "github.com/keybase/client/go/kbfs/data" 14 "github.com/keybase/client/go/kbfs/kbfssync" 15 "github.com/keybase/client/go/kbfs/tlf" 16 "github.com/keybase/client/go/logger" 17) 18 19/* 20 21 This file defines a finite state machine (FSM) for rekey operation scheduling. 22 The state chart is described in following dot graph: 23 24digraph rekeyFSM { 25 graph [rankdir=LR] 26 start [shape=plaintext] 27 28 Idle -> Idle [label="*"] 29 Scheduled -> Scheduled [label="*"] 30 Started -> Started [label="*"] 31 32 start -> Idle 33 Idle -> Scheduled [label=Request] 34 Scheduled -> Scheduled [label="Request,RekeyNotNeeded"] 35 Scheduled -> Started [label=Timeup] 36 Started -> Scheduled [label="Finished(TTL valid && (rekey done || needs paper))"] 37 Started -> Idle [label="Finished (*)"] 38} 39 40*/ 41 42// CtxRekeyTagKey is the type used for unique context tags within an 43// enqueued Rekey. 44type CtxRekeyTagKey int 45 46const ( 47 // CtxRekeyIDKey is the type of the tag for unique operation IDs 48 // within an enqueued Rekey. 49 CtxRekeyIDKey CtxRekeyTagKey = iota 50) 51 52// CtxRekeyOpID is the display name for the unique operation 53// enqueued rekey ID tag. 54const CtxRekeyOpID = "REKEYID" 55 56type rekeyEventType int 57 58const ( 59 _ rekeyEventType = iota 60 rekeyRequestEvent 61 rekeyFinishedEvent 62 rekeyTimeupEvent 63 rekeyNotNeededEvent 64 rekeyKickoffEvent 65 66 rekeyShutdownEvent 67 68 rekeyCancelEventForTest 69) 70 71func (e rekeyEventType) String() string { 72 switch e { 73 case rekeyRequestEvent: 74 return "rekeyRequestEvent" 75 case rekeyFinishedEvent: 76 return "rekeyFinishedEvent" 77 case rekeyTimeupEvent: 78 return "rekeyTimeupEvent" 79 case rekeyNotNeededEvent: 80 return "rekeyNotNeededEvent" 81 case rekeyShutdownEvent: 82 return "rekeyShutdownEvent" 83 case rekeyKickoffEvent: 84 return "rekeyKickoffEvent" 85 case rekeyCancelEventForTest: 86 return "rekeyCancelEventForTest" 87 default: 88 return "unknown" 89 } 90} 91 92// rekeyTask describes a rekey task. 93type rekeyTask struct { 94 // timeout, if non-nil, causes rekey to fail if it takes more than this 95 // duration since it enters rekeyStateStarted. 96 timeout *time.Duration 97 ttl int 98 promptPaper bool 99 100 ctx *protectedContext 101} 102 103// rekeyRequest describes a rekey request. 104type rekeyRequest struct { 105 // delay is the duration to wait for since the request enters the FSM until 106 // starting the rekey. 107 delay time.Duration 108 rekeyTask 109} 110 111// rekeyFinished describes a rekeyFinishedEvent. It contains results from an 112// actual rekey operation. 113type rekeyFinished struct { 114 RekeyResult 115 err error 116} 117 118// RekeyEvent describes an event to send into the RekeyFSM. A function, e.g., 119// NewRekeyRequestEvent, should be used to construct one. 120type RekeyEvent struct { 121 eventType rekeyEventType 122 request *rekeyRequest 123 finished *rekeyFinished 124} 125 126func (e RekeyEvent) String() string { 127 switch e.eventType { 128 case rekeyRequestEvent: 129 return fmt.Sprintf("%s [%#+v]", e.eventType, e.request) 130 case rekeyFinishedEvent: 131 return fmt.Sprintf("%s [%#+v]", e.eventType, e.finished) 132 default: 133 return e.eventType.String() 134 } 135} 136 137func newRekeyRequestEvent(req rekeyRequest) RekeyEvent { 138 return RekeyEvent{ 139 eventType: rekeyRequestEvent, 140 request: &req, 141 } 142} 143 144func newRekeyRequestEventWithContext(ctx context.Context) RekeyEvent { 145 return newRekeyRequestEvent(rekeyRequest{ 146 delay: 0, 147 rekeyTask: rekeyTask{ 148 timeout: nil, 149 promptPaper: false, 150 ttl: rekeyInitialTTL, 151 ctx: newProtectedContext(ctx, nil), 152 }, 153 }) 154} 155 156// NewRekeyRequestWithPaperPromptEvent creates a non-delayed rekey request 157// Event that causes a paper prompt. 158func NewRekeyRequestWithPaperPromptEvent() RekeyEvent { 159 e := NewRekeyRequestEvent() 160 d := rekeyWithPromptWaitTimeDefault 161 e.request.promptPaper = true 162 e.request.timeout = &d 163 return e 164} 165 166// NewRekeyRequestEvent creates a non-delayed rekey request Event. 167func NewRekeyRequestEvent() RekeyEvent { 168 return newRekeyRequestEventWithContext(CtxWithRandomIDReplayable( 169 context.Background(), CtxRekeyIDKey, CtxRekeyOpID, nil)) 170} 171 172// NewRekeyNotNeededEvent creates a rekeyNotNeededEvent typed event. If the FSM 173// is in rekeyStateScheduled, this causes FSM to unset paperkey prompt. In 174// other states nothing happens. This event is sent to the FSM when we see a MD 175// update with rekey flag unset. It can be an indication that an old 176// outstanding rekey request has been served by another device, or just a 177// regular rekey updates. 178func NewRekeyNotNeededEvent() RekeyEvent { 179 return RekeyEvent{ 180 eventType: rekeyNotNeededEvent, 181 } 182} 183 184func newRekeyFinishedEvent(res RekeyResult, err error) RekeyEvent { 185 return RekeyEvent{ 186 eventType: rekeyFinishedEvent, 187 finished: &rekeyFinished{ 188 RekeyResult: res, 189 err: err, 190 }, 191 } 192} 193 194func newRekeyTimeupEvent() RekeyEvent { 195 return RekeyEvent{ 196 eventType: rekeyTimeupEvent, 197 } 198} 199 200func newRekeyShutdownEvent() RekeyEvent { 201 return RekeyEvent{ 202 eventType: rekeyShutdownEvent, 203 } 204} 205 206func newRekeyKickoffEvent() RekeyEvent { 207 return RekeyEvent{ 208 eventType: rekeyKickoffEvent, 209 } 210} 211 212func newRekeyCancelEventForTest() RekeyEvent { 213 return RekeyEvent{ 214 eventType: rekeyCancelEventForTest, 215 } 216} 217 218// rekeyState models a state in the FSM. rekeyFSM keeps exactly one instance of 219// rekeyState at any given time. 220type rekeyState interface { 221 // reactToEvent defines how this state reacts to an event. Implementations of 222 // rekeyState should handle necessary transition actions in reactToEvent(), 223 // and return a new rekeyState instance after transition is finished. 224 // rekeyFSM sends event to the rekeyState instance it holds whenever it 225 // receives an event, and use the returned rekeyState instance as new state. 226 // It's OK to return the receiver itself as "new" state. 227 // 228 // rekeyFSM runs an event loop in a dedicated goroutine that calls 229 // reactToEvent and updates states. In other words, it's safe to assume 230 // reactToEvent is only called within the same goroutine, and that it's 231 // impossible that multiple reactToEvent calls are issued concurrently. 232 reactToEvent(event RekeyEvent) rekeyState 233} 234 235type rekeyStateIdle struct { 236 fsm *rekeyFSM 237} 238 239func newRekeyStateIdle(fsm *rekeyFSM) *rekeyStateIdle { 240 return &rekeyStateIdle{fsm: fsm} 241} 242 243func (r *rekeyStateIdle) reactToEvent(event RekeyEvent) rekeyState { 244 switch event.eventType { 245 case rekeyRequestEvent: 246 return newRekeyStateScheduled(r.fsm, 247 event.request.delay, event.request.rekeyTask) 248 default: 249 return r 250 } 251} 252 253type rekeyStateScheduled struct { 254 fsm *rekeyFSM 255 256 timer *time.Timer 257 deadline time.Time 258 259 task rekeyTask 260} 261 262func newRekeyStateScheduled( 263 fsm *rekeyFSM, delay time.Duration, task rekeyTask) *rekeyStateScheduled { 264 task.ctx.setLogger(fsm.log) 265 return &rekeyStateScheduled{ 266 fsm: fsm, 267 timer: time.AfterFunc(delay, func() { 268 fsm.Event(newRekeyTimeupEvent()) 269 }), 270 deadline: time.Now().Add(delay), 271 task: task, 272 } 273} 274 275func (r *rekeyStateScheduled) reactToEvent(event RekeyEvent) rekeyState { 276 switch event.eventType { 277 case rekeyTimeupEvent: 278 // This blocks (which inheritently blocks the entire FSM) if too many 279 // are active. 280 if r.fsm.fbo.config.GetRekeyFSMLimiter().WaitToStart( 281 r.task.ctx.context()) != nil { 282 return r 283 } 284 return newRekeyStateStarted(r.fsm, r.task) 285 case rekeyRequestEvent: 286 if r.task.promptPaper && !event.request.promptPaper { 287 // KBFS-2251: If fbo concludes that paper key would be needed in 288 // order for rekey to proceed, it writes a MD to mdserver with 289 // rekey set at the same time. To prevent the FSM from being kicked 290 // of to rekeyStateStarted right away after receiving this update 291 // (through FoldersNeedRekey) from mdserver, we just reuse the same 292 // timer if r.task.promptPaper is set. 293 // 294 // If the request has promptPaper set, then it's from the KBFS 295 // client, likely due to a read request. In this case, we should 296 // shorten the wait timer according the the request. 297 r.fsm.log.CDebugf(r.task.ctx.context(), "Reusing existing timer "+ 298 "without possibly shortening due to r.task.promptPaper==true") 299 return r 300 } 301 302 task := r.task 303 task.promptPaper = task.promptPaper || event.request.promptPaper 304 if task.timeout == nil { 305 task.timeout = event.request.timeout 306 } 307 task.ttl = event.request.ttl 308 task.ctx.maybeReplaceContext(event.request.ctx.context()) 309 if !r.deadline.After(time.Now().Add(event.request.delay)) { 310 r.fsm.log.CDebugf(task.ctx.context(), "Reusing existing timer") 311 r.task = task 312 return r 313 } 314 r.timer.Stop() 315 return newRekeyStateScheduled(r.fsm, event.request.delay, task) 316 case rekeyNotNeededEvent: 317 // KBFS-2254: if another device finished rekey, we should unset the 318 // paperkey prompt so that if this other device goes offline before a 319 // third device triggers a rekey request, the timer can be preempted. 320 // What if the FoldersNeedRekey call comes in before this and we still 321 // miss the rekey request? Well now we also send a rekey request into 322 // the FSM on MD updates with rekey flag set. Since the MD updates are 323 // applied in order, and that FSM's state transition is 324 // single-goroutined, we are safe here. 325 r.task.promptPaper = false 326 return r 327 case rekeyKickoffEvent: 328 r.timer.Reset(time.Millisecond) 329 return r 330 case rekeyCancelEventForTest: 331 r.timer.Stop() 332 return newRekeyStateIdle(r.fsm) 333 case rekeyShutdownEvent: 334 r.timer.Stop() 335 return r 336 default: 337 return r 338 } 339} 340 341type rekeyStateStarted struct { 342 fsm *rekeyFSM 343 task rekeyTask 344} 345 346func newRekeyStateStarted(fsm *rekeyFSM, task rekeyTask) *rekeyStateStarted { 347 ctx := task.ctx.context() 348 var cancel context.CancelFunc 349 if task.timeout != nil { 350 ctx, cancel = context.WithTimeout(task.ctx.context(), *task.timeout) 351 } 352 go func() { 353 defer fsm.fbo.config.GetRekeyFSMLimiter().Done() 354 if cancel != nil { 355 defer cancel() 356 } 357 fsm.log.CDebugf(ctx, "Processing rekey for %s", fsm.fbo.folderBranch.Tlf) 358 var res RekeyResult 359 err := fsm.fbo.doMDWriteWithRetryUnlessCanceled(ctx, 360 func(lState *kbfssync.LockState) (err error) { 361 res, err = fsm.fbo.rekeyLocked(ctx, lState, task.promptPaper) 362 return err 363 }) 364 fsm.log.CDebugf(ctx, "Rekey finished with res=%#+v, error=%v", res, err) 365 fsm.Event(newRekeyFinishedEvent(res, err)) 366 }() 367 return &rekeyStateStarted{ 368 fsm: fsm, 369 task: task, 370 } 371} 372 373func (r *rekeyStateStarted) reactToEvent(event RekeyEvent) rekeyState { 374 switch event.eventType { 375 case rekeyFinishedEvent: 376 ttl := r.task.ttl - 1 377 r.fsm.log.CDebugf(r.task.ctx.context(), 378 "Rekey finished, ttl: %d -> %d", r.task.ttl, ttl) 379 380 if ttl <= 0 { 381 r.fsm.log.CDebugf(r.task.ctx.context(), 382 "Not scheduling new rekey because TTL expired") 383 return newRekeyStateIdle(r.fsm) 384 } 385 386 switch event.finished.err { 387 case nil: 388 default: 389 r.fsm.log.CDebugf(r.task.ctx.context(), 390 "Rekey errored; scheduling new rekey in %s", rekeyRecheckInterval) 391 return newRekeyStateScheduled(r.fsm, rekeyRecheckInterval, rekeyTask{ 392 timeout: r.task.timeout, 393 promptPaper: r.task.promptPaper, 394 ttl: ttl, 395 ctx: r.task.ctx, 396 }) 397 } 398 399 d := r.fsm.fbo.config.RekeyWithPromptWaitTime() 400 if event.finished.NeedsPaperKey { 401 r.fsm.log.CDebugf(r.task.ctx.context(), 402 "Scheduling rekey due to NeedsPaperKey==true") 403 return newRekeyStateScheduled(r.fsm, d, rekeyTask{ 404 timeout: &d, 405 promptPaper: true, 406 ttl: ttl, 407 ctx: r.task.ctx, 408 }) 409 } 410 411 if event.finished.DidRekey { 412 // We enqueue the rekey here again, in case we missed a device due to a 413 // race condition. This is specifically for the situation where user 414 // provisions two devices in a row, and the key update for the 2nd device 415 // only comes in after rekey for a TLF is done, which didn't include the 416 // second device. At this point, there wouldn't be a new MD with rekey 417 // bit set since it's already set. As a result, the TLF won't get rekeyed 418 // for the second device until the next 1-hour timer triggers another 419 // scan. 420 r.fsm.log.CDebugf(r.task.ctx.context(), 421 "Scheduling rekey (recheck) due to DidRekey==true") 422 return newRekeyStateScheduled(r.fsm, rekeyRecheckInterval, rekeyTask{ 423 timeout: nil, 424 promptPaper: false, 425 ttl: ttl, 426 ctx: r.task.ctx, 427 }) 428 } 429 430 r.fsm.log.CDebugf(r.task.ctx.context(), 431 "Not scheduling rekey because no more rekeys or rechecks are needed") 432 return newRekeyStateIdle(r.fsm) 433 default: 434 return r 435 } 436} 437 438type rekeyFSMListener struct { 439 repeatedly bool 440 onEvent func(RekeyEvent) 441} 442 443type rekeyFSM struct { 444 shutdownCh chan struct{} 445 reqs chan RekeyEvent 446 447 fbo *folderBranchOps 448 log logger.Logger 449 450 current rekeyState 451 452 muListeners sync.Mutex 453 listeners map[rekeyEventType][]rekeyFSMListener 454} 455 456// NewRekeyFSM creates a new rekey FSM. 457func NewRekeyFSM(fbo *folderBranchOps) RekeyFSM { 458 fsm := &rekeyFSM{ 459 reqs: make(chan RekeyEvent, fbo.config.Mode().RekeyQueueSize()), 460 shutdownCh: make(chan struct{}), 461 fbo: fbo, 462 log: fbo.config.MakeLogger("RekeyFSM"), 463 464 listeners: make(map[rekeyEventType][]rekeyFSMListener), 465 } 466 fsm.current = newRekeyStateIdle(fsm) 467 if fbo.bType == standard { 468 go fsm.loop() 469 } 470 return fsm 471} 472 473func (m *rekeyFSM) loop() { 474 reqs := m.reqs 475 for { 476 select { 477 case e := <-reqs: 478 next := m.current.reactToEvent(e) 479 if e.eventType == rekeyShutdownEvent { 480 // Set reqs to nil so on next iteration, we will skip any 481 // content in reqs. So if there are multiple 482 // rekeyShutdownEvent, we won't close m.shutdownCh multiple 483 // times. 484 reqs = nil 485 close(m.shutdownCh) 486 } else { 487 // Only log if we're not shutting down, otherwise `go vet` 488 // yells at us in tests. 489 m.log.Debug("RekeyFSM transition: %T + %s -> %T", 490 m.current, e, next) 491 } 492 m.current = next 493 m.triggerCallbacksForTest(e) 494 495 case <-m.shutdownCh: 496 return 497 } 498 } 499} 500 501// Event implements RekeyFSM interface for rekeyFSM. 502func (m *rekeyFSM) Event(event RekeyEvent) { 503 select { 504 case m.reqs <- event: 505 case <-m.shutdownCh: 506 } 507} 508 509// Shutdown implements RekeyFSM interface for rekeyFSM. 510func (m *rekeyFSM) Shutdown() { 511 m.Event(newRekeyShutdownEvent()) 512} 513 514func (m *rekeyFSM) triggerCallbacksForTest(e RekeyEvent) { 515 var cbs []rekeyFSMListener 516 func() { 517 m.muListeners.Lock() 518 defer m.muListeners.Unlock() 519 cbs = m.listeners[e.eventType] 520 m.listeners[e.eventType] = nil 521 for _, cb := range cbs { 522 if cb.repeatedly { 523 m.listeners[e.eventType] = append( 524 m.listeners[e.eventType], cb) 525 } 526 } 527 }() 528 for _, cb := range cbs { 529 cb.onEvent(e) 530 } 531} 532 533// listenOnEvent implements RekeyFSM interface for rekeyFSM. 534func (m *rekeyFSM) listenOnEvent( 535 event rekeyEventType, callback func(RekeyEvent), repeatedly bool) { 536 m.muListeners.Lock() 537 defer m.muListeners.Unlock() 538 m.listeners[event] = append(m.listeners[event], rekeyFSMListener{ 539 onEvent: callback, 540 repeatedly: repeatedly, 541 }) 542} 543 544func getRekeyFSM(ctx context.Context, ops KBFSOps, tlfID tlf.ID) RekeyFSM { 545 switch o := ops.(type) { 546 case *KBFSOpsStandard: 547 return o.getOpsNoAdd( 548 ctx, data.FolderBranch{ 549 Tlf: tlfID, 550 Branch: data.MasterBranch, 551 }).rekeyFSM 552 default: 553 panic("unknown KBFSOps") 554 } 555} 556 557// RequestRekeyAndWaitForOneFinishEvent sends a rekey request to the FSM 558// associated with tlfID, and wait for exact one rekeyFinished event. This can 559// be useful for waiting for a rekey result in tests. 560// 561// Note that the supplied ctx is injected to the rekey task, so canceling ctx 562// would actually cancel the rekey. 563// 564// Currently this is only used in tests and RekeyFile. Normal rekey activities 565// should go through the FSM asychronously. 566func RequestRekeyAndWaitForOneFinishEvent(ctx context.Context, 567 ops KBFSOps, tlfID tlf.ID) (res RekeyResult, err error) { 568 fsm := getRekeyFSM(ctx, ops, tlfID) 569 rekeyWaiter := make(chan struct{}) 570 fsm.listenOnEvent(rekeyFinishedEvent, func(e RekeyEvent) { 571 res = e.finished.RekeyResult 572 err = e.finished.err 573 close(rekeyWaiter) 574 }, false) 575 fsm.Event(newRekeyRequestEventWithContext(ctx)) 576 <-rekeyWaiter 577 return res, err 578} 579