1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2002, 2014 Oracle and/or its affiliates. All rights reserved. 5 * 6 */ 7 package com.sleepycat.je.rep.stream; 8 9 import static com.sleepycat.je.utilint.VLSN.NULL_VLSN; 10 11 import java.io.IOException; 12 import java.util.Arrays; 13 import java.util.HashSet; 14 import java.util.logging.Level; 15 import java.util.logging.Logger; 16 17 import com.sleepycat.je.DatabaseException; 18 import com.sleepycat.je.EnvironmentFailureException; 19 import com.sleepycat.je.config.EnvironmentParams; 20 import com.sleepycat.je.dbi.EnvironmentImpl; 21 import com.sleepycat.je.rep.InsufficientLogException; 22 import com.sleepycat.je.rep.ReplicationNode; 23 import com.sleepycat.je.rep.RollbackException; 24 import com.sleepycat.je.rep.RollbackProhibitedException; 25 import com.sleepycat.je.rep.SyncupProgress; 26 import com.sleepycat.je.rep.impl.RepImpl; 27 import com.sleepycat.je.rep.impl.RepParams; 28 import com.sleepycat.je.rep.impl.node.LocalCBVLSNTracker; 29 import com.sleepycat.je.rep.impl.node.RepNode; 30 import com.sleepycat.je.rep.impl.node.Replay; 31 import com.sleepycat.je.rep.impl.node.Replica; 32 import com.sleepycat.je.rep.impl.node.Replica.HardRecoveryElectionException; 33 import com.sleepycat.je.rep.stream.Protocol.AlternateMatchpoint; 34 import com.sleepycat.je.rep.stream.Protocol.Entry; 35 import com.sleepycat.je.rep.stream.Protocol.EntryNotFound; 36 import com.sleepycat.je.rep.stream.Protocol.RestoreResponse; 37 import com.sleepycat.je.rep.stream.ReplicaSyncupReader.SkipGapException; 38 import com.sleepycat.je.rep.utilint.BinaryProtocol.Message; 39 import com.sleepycat.je.rep.utilint.NamedChannel; 40 import com.sleepycat.je.rep.vlsn.VLSNIndex; 41 import com.sleepycat.je.rep.vlsn.VLSNRange; 42 import com.sleepycat.je.utilint.DbLsn; 43 import com.sleepycat.je.utilint.LoggerUtils; 44 import com.sleepycat.je.utilint.VLSN; 45 46 /** 47 * Establish where the replication stream should start for a replica and feeder 48 * pair. The replica compares what is in its log with what the feeder has, to 49 * determine the latest common log entry matchpoint 50 * 51 * - If the replica has applied log entries after that matchpoint, roll them 52 * back 53 * - If a common matchpoint can't be found, the replica will need to do 54 * a network restore. 55 */ 56 public class ReplicaFeederSyncup { 57 58 private final Logger logger; 59 60 private final NamedChannel namedChannel; 61 private final Protocol protocol; 62 private final RepNode repNode; 63 private final VLSNIndex vlsnIndex; 64 private final Replay replay; 65 private final RepImpl repImpl; 66 private ReplicaSyncupReader backwardsReader; 67 68 /* The VLSN, lsn and log entry at which a match was made. */ 69 private VLSN matchpointVLSN = NULL_VLSN; 70 private Long matchedVLSNTime = 0L; 71 72 private final boolean hardRecoveryNeedsElection; 73 74 /* 75 * searchResults are the bundled outputs from the backwards scan by the 76 * ReplicaSyncReader during its search for a matchpoint. 77 */ 78 private final MatchpointSearchResults searchResults; 79 80 /** 81 * For unit tests only. 82 */ 83 private static TestHook<Object> globalSyncupEndHook; 84 private final TestHook<Object> syncupEndHook; 85 ReplicaFeederSyncup(RepNode repNode, Replay replay, NamedChannel namedChannel, Protocol protocol, boolean hardRecoveryNeedsElection)86 public ReplicaFeederSyncup(RepNode repNode, 87 Replay replay, 88 NamedChannel namedChannel, 89 Protocol protocol, 90 boolean hardRecoveryNeedsElection) { 91 this.replay = replay; 92 logger = LoggerUtils.getLogger(getClass()); 93 this.repNode = repNode; 94 this.vlsnIndex = repNode.getVLSNIndex(); 95 this.namedChannel = namedChannel; 96 this.protocol = protocol; 97 this.repImpl = repNode.getRepImpl(); 98 this.hardRecoveryNeedsElection = hardRecoveryNeedsElection; 99 searchResults = new MatchpointSearchResults(repNode.getRepImpl()); 100 syncupEndHook = repNode.replica().getReplicaFeederSyncupHook(); 101 } 102 getMatchedVLSNTime()103 public long getMatchedVLSNTime() { 104 return matchedVLSNTime; 105 } 106 getMatchedVLSN()107 public VLSN getMatchedVLSN() { 108 return matchpointVLSN; 109 } 110 111 /** 112 * The replica's side of the protocol. 113 * @throws InterruptedException 114 * @throws InsufficientLogException 115 * @throws HardRecoveryElectionException 116 */ execute(LocalCBVLSNTracker cbvlsnTracker)117 public void execute(LocalCBVLSNTracker cbvlsnTracker) 118 throws IOException, 119 DatabaseException, 120 InterruptedException, 121 InsufficientLogException, 122 HardRecoveryElectionException { 123 124 final long startTime = System.currentTimeMillis(); 125 String feederName = namedChannel.getNameIdPair().getName(); 126 LoggerUtils.info(logger, repImpl, 127 "Replica-feeder " + feederName + 128 " syncup started. Replica range: " + 129 repNode.getVLSNIndex().getRange()); 130 131 /* Prohibit global CBVLSN update. */ 132 repNode.syncupStarted(); 133 try { 134 135 /* 136 * Find a replication stream matchpoint and a place to start 137 * the replication stream. If the feeder cannot service this 138 * protocol because it has run out of replication stream, 139 * findMatchpoint will throw a InsufficientLogException. If the 140 */ 141 VLSNRange range = vlsnIndex.getRange(); 142 findMatchpoint(range); 143 144 /* 145 * If we can't rollback to the found matchpoint, verifyRollback 146 * will throw the appropriate exception. 147 */ 148 verifyRollback(range); 149 150 replay.rollback(matchpointVLSN, searchResults.getMatchpointLSN()); 151 152 /* Update the vlsnIndex, it will commit synchronously. */ 153 VLSN startVLSN = matchpointVLSN.getNext(); 154 vlsnIndex.truncateFromTail(startVLSN, 155 searchResults.getMatchpointLSN()); 156 157 protocol.write(protocol.new StartStream(startVLSN), namedChannel); 158 LoggerUtils.info(logger, repImpl, 159 "Replica-feeder " + feederName + 160 " start stream at VLSN: " + startVLSN); 161 162 /* 163 * Initialize this node's local CBVLSN while global CBVLSN updates 164 * are prohibited. Hang onto the vlsn at the matchpoint -- don't 165 * let that be cleaned, because it may be of use for other replicas 166 * who need to sync up. Right now, this seems to be the best 167 * matchpoint in the group. 168 */ 169 cbvlsnTracker.registerMatchpoint(matchpointVLSN); 170 } finally { 171 172 /* For unit test support only. */ 173 assert runHook(); 174 175 repNode.syncupEnded(); 176 LoggerUtils.info 177 (logger, repImpl, 178 String.format 179 ("Replica-feeder " + feederName + 180 " syncup ended. Elapsed time: %,dms", 181 (System.currentTimeMillis() - startTime))); 182 repImpl.setSyncupProgress(SyncupProgress.END); 183 } 184 } 185 186 /** 187 * A matchpoint has been found. What happens next depends on the position 188 * of the matchpoint and its relationship to the last transaction end 189 * record. 190 * 191 * In following table, 192 * M = some non-null matchpoint VLSN value, 193 * T = some non-null last txn end value 194 * S = some non-null last sync value 195 * 196 * txn end last sync found action 197 * VLSN VLSN matchpoint 198 * ---------- --------- --------- ------------------------ 199 * NULL_VLSN NULL_VLSN NULL_VLSN rollback everything 200 * NULL_VLSN NULL_VLSN M can't occur 201 * NULL_VLSN S NULL_VLSN rollback everything 202 * NULL_VLSN S M rollback to M 203 * T NULL_VLSN NULL_VLSN can't occur 204 * T NULL_VLSN M can't occur 205 * T S NULL_VLSN network restore, though 206 * could also do hard recov 207 * T <= M S M rollback to matchpoint 208 * T > M, truncate not ok S M network restore 209 * T > M, truncation limit 210 * exceeded S M throw RollbackProhibited 211 * T > M, truncate ok S M hard recovery 212 * @throws IOException 213 * @throws HardRecoveryElectionException 214 */ verifyRollback(VLSNRange range)215 private void verifyRollback(VLSNRange range) 216 throws RollbackException, InsufficientLogException, 217 HardRecoveryElectionException, IOException { 218 repImpl.setSyncupProgress(SyncupProgress.CHECK_FOR_ROLLBACK); 219 VLSN lastTxnEnd = range.getLastTxnEnd(); 220 VLSN lastSync = range.getLastSync(); 221 222 LoggerUtils.finest(logger, repImpl, "verify rollback" + 223 " vlsn range=" + range + 224 " searchResults=" + searchResults); 225 /* 226 * If the lastTxnEnd VLSN is null, we don't have to worry about hard 227 * recovery. See truth table above. 228 */ 229 if (lastTxnEnd.isNull()) { 230 if (range.getLastSync().isNull() && !matchpointVLSN.isNull()) { 231 throw EnvironmentFailureException.unexpectedState 232 (repNode.getRepImpl(), "Shouldn't be possible to find a "+ 233 "matchpoint of " + matchpointVLSN + 234 " when the sync VLSN is null. Range=" + range); 235 } 236 237 /* We'll be doing a normal rollback. */ 238 LoggerUtils.fine(logger, repImpl, "normal rollback, no txn end"); 239 return; 240 } 241 242 if (lastSync.isNull()) { 243 throw EnvironmentFailureException.unexpectedState 244 (repNode.getRepImpl(), 245 "Shouldn't be possible to have a null sync VLSN when the " 246 + " lastTxnVLSN " + lastTxnEnd + " is not null. Range=" + 247 range); 248 } 249 250 /* 251 * There is a non-null lastTxnEnd VLSN, so check if the found 252 * matchpoint precedes it. If it doesn't, we can't rollback. 253 */ 254 if (matchpointVLSN.isNull()) { 255 256 /* 257 * We could actually also try to do a hard recovery and truncate 258 * all committed txns, but for now, let's assume that it will cost 259 * less to copy the log files. 260 */ 261 LoggerUtils.info(logger, repImpl, 262 "This node had a txn end at vlsn = " + lastTxnEnd + 263 "but no matchpoint found."); 264 throw setupLogRefresh(matchpointVLSN); 265 } 266 267 /* 268 * The matchpoint is after or equal to the last txn end, no problem 269 * with doing a normal rollback. 270 */ 271 if ((lastTxnEnd.compareTo(matchpointVLSN) <= 0) && 272 (searchResults.getNumPassedCommits() == 0)) { 273 LoggerUtils.fine(logger, repImpl, "txn end vlsn of " + lastTxnEnd + 274 "<= matchpointVLSN of " + matchpointVLSN + 275 ", normal rollback"); 276 return; 277 } 278 279 /* Rolling back past a commit or abort. */ 280 281 if (hardRecoveryNeedsElection) { 282 throw new Replica.HardRecoveryElectionException 283 (repNode.getMasterStatus().getNodeMasterNameId(), 284 lastTxnEnd, matchpointVLSN); 285 } 286 287 /* 288 * We're planning on rolling back past a commit or abort. The more 289 * optimal course of action is to truncate the log and run a hard 290 * recovery, but if the matchpoint precedes a checkpoint which deleted 291 * log files, the truncation is not permissible because the resulting 292 * log might be missing needed files. Instead, we have to do a network 293 * restore. 294 */ 295 if (searchResults.getPassedCheckpointEnd()) { 296 LoggerUtils.info(logger, repImpl, "matchpointVLSN of " + 297 matchpointVLSN + " precedes a checkpoint end, " + 298 "needs network restore."); 299 throw setupLogRefresh(matchpointVLSN); 300 } 301 302 /* 303 * Likewise, if we skipped over a gap in the log files, we can't be 304 * sure if we passed a ckpt with deleted log files. Do a network 305 * restore rather than a hard recovery. 306 */ 307 if (searchResults.getSkippedGap()) { 308 LoggerUtils.info(logger, repImpl, "matchpointVLSN of " + 309 matchpointVLSN + " was found in a replica log " + 310 "with gaps. Since we can't be sure if it " + 311 "preceeds a checkpoint end, do network restore."); 312 throw setupLogRefresh(matchpointVLSN); 313 } 314 315 /* 316 * We're planning on rolling back past a commit or abort, and we know 317 * that we have not passed a barrier checkpoint. See if we have 318 * exceeded the number of rolledback commits limit. 319 */ 320 EnvironmentImpl envImpl = repNode.getRepImpl(); 321 int rollbackTxnLimit = 322 envImpl.getConfigManager().getInt(RepParams.TXN_ROLLBACK_LIMIT); 323 324 if (searchResults.getNumPassedCommits() > rollbackTxnLimit) { 325 326 LoggerUtils.severe(logger, repImpl, 327 "Limited list of transactions that would " + 328 " be truncated for hard recovery:\n" + 329 searchResults.dumpPassedTxns()); 330 331 throw new RollbackProhibitedException(repNode.getRepImpl(), 332 rollbackTxnLimit, 333 matchpointVLSN, 334 searchResults); 335 } 336 337 /* 338 * After passing all the earlier qualifications, do a truncation and 339 * hard recovery. 340 */ 341 throw setupHardRecovery(range); 342 } 343 344 /** 345 * Find a matchpoint, which is a log entry in the replication stream which 346 * is the same on feeder and replica. Assign the matchpointVLSN field. The 347 * matchpoint log entry must be be tagged with an environment id. If no 348 * matching entry is found, the matchpoint is effectively the NULL_VLSN. 349 * 350 * To determine the matchpoint, exchange messages with the feeder and 351 * compare log entries. If the feeder does not have enough log entries, 352 * throw InsufficientLogException. 353 * @throws InsufficientLogException 354 */ findMatchpoint(VLSNRange range)355 private void findMatchpoint(VLSNRange range) 356 throws IOException, 357 InsufficientLogException { 358 359 int matchCounter = 0; 360 repImpl.setSyncupProgress(SyncupProgress.FIND_MATCHPOINT, 361 matchCounter++, -1); 362 VLSN candidateMatchpoint = range.getLastSync(); 363 if (candidateMatchpoint.equals(NULL_VLSN)) { 364 365 /* 366 * If the replica has no sync-able log entries at all, the 367 * matchpoint is the NULL_VLSN, and we should start the replication 368 * stream at VLSN 1. Check if the feeder has the VLSN 1. If it 369 * doesn't, getFeederRecord() will throw a 370 * InsufficientLogException. We can assume that a non-cleaned 371 * feeder always has VLSN 1, because a ReplicatedEnvironment always 372 * creates a few replicated vlsns, such as the name db, at 373 * initial startup. 374 */ 375 getFeederRecord(range, VLSN.FIRST_VLSN, 376 false /*acceptAlternative*/); 377 return; 378 } 379 380 /* 381 * CandidateMatchpoint is not null, so ask the feeder for the log 382 * record at that vlsn. 383 */ 384 InputWireRecord feederRecord = 385 getFeederRecord(range, candidateMatchpoint, 386 true /*acceptAlternative*/); 387 388 /* 389 * The feeder may have suggested an alternative matchpoint, so reset 390 * candidate matchpoint. 391 */ 392 candidateMatchpoint = feederRecord.getVLSN(); 393 if (logger.isLoggable(Level.FINE)) { 394 LoggerUtils.fine(logger, repImpl, 395 "first candidate matchpoint: " + 396 candidateMatchpoint); 397 } 398 /* 399 * Start comparing feeder records to replica records. Instead of using 400 * the VLSNIndex to direct our search, we must scan from the end of the 401 * log, recording entries that have an impact on our ability to 402 * rollback, like checkpoints. 403 * 404 * Start by finding the candidate matchpoint in the Replica. 405 */ 406 backwardsReader = setupBackwardsReader 407 (candidateMatchpoint, 408 repNode.getRepImpl().getFileManager().getLastUsedLsn()); 409 OutputWireRecord replicaRecord = getReplicaRecord(candidateMatchpoint); 410 411 while (!replicaRecord.match(feederRecord)) { 412 repImpl.setSyncupProgress(SyncupProgress.FIND_MATCHPOINT, 413 matchCounter++, -1); 414 415 /* 416 * That first bid didn't match, now just keep looking at all 417 * potential matchpoints. 418 */ 419 replicaRecord = scanMatchpointEntries(); 420 421 if (replicaRecord == null) { 422 423 /* 424 * The search for the previous sync log entry went past our 425 * available contiguous VLSN range, so there is no 426 * matchpoint. 427 */ 428 LoggerUtils.info(logger, repImpl, 429 "Looking at candidate matchpoint vlsn " + 430 candidateMatchpoint + 431 " but this node went past its available" + 432 " contiguous VLSN range, need network" + 433 " restore."); 434 throw setupLogRefresh(candidateMatchpoint); 435 } 436 437 /* 438 * Ask the feeder for the record. If the feeder doesn't have 439 * it, we'll throw out and do a network restore. 440 */ 441 candidateMatchpoint = replicaRecord.getVLSN(); 442 if (logger.isLoggable(Level.FINE)) { 443 LoggerUtils.fine(logger, repImpl, 444 "Next candidate matchpoint: " + 445 candidateMatchpoint); 446 } 447 feederRecord = getFeederRecord(range, candidateMatchpoint, 448 false); 449 } 450 451 /* We've found the matchpoint. */ 452 matchedVLSNTime = replicaRecord.getTimeStamp(); 453 matchpointVLSN = candidateMatchpoint; 454 searchResults.setMatchpoint(backwardsReader.getLastLsn()); 455 LoggerUtils.finest(logger, repImpl, 456 "after setting matchpoint, searchResults=" + 457 searchResults); 458 } 459 setupBackwardsReader(VLSN startScanVLSN, long startScanLsn)460 private ReplicaSyncupReader setupBackwardsReader(VLSN startScanVLSN, 461 long startScanLsn) { 462 463 EnvironmentImpl envImpl = repNode.getRepImpl(); 464 int readBufferSize = envImpl.getConfigManager(). 465 getInt(EnvironmentParams.LOG_ITERATOR_READ_SIZE); 466 467 return new ReplicaSyncupReader 468 (envImpl, 469 repNode.getVLSNIndex(), 470 startScanLsn, 471 readBufferSize, 472 repNode.getNameIdPair(), 473 startScanVLSN, 474 DbLsn.makeLsn(repNode.getCleanerBarrierFile(), 0), 475 searchResults); 476 } 477 478 /** 479 * Search backwards for the replica's log record at this target VLSN. The 480 * target record is either the replica's first suggestion for a matchpoint, 481 * or feeder's counter offer. We have checked earlier that the counter 482 * offer is within the replica's vlsn range. 483 */ getReplicaRecord(VLSN candidateMatchpoint)484 private OutputWireRecord getReplicaRecord(VLSN candidateMatchpoint) { 485 486 OutputWireRecord replicaRecord = null; 487 do { 488 try { 489 replicaRecord = 490 backwardsReader.scanBackwards(candidateMatchpoint); 491 492 /* 493 * We're hunting for a VLSN that should be in the VLSN range, 494 * and it should exist. 495 */ 496 if (replicaRecord == null) { 497 throw EnvironmentFailureException.unexpectedState 498 (repImpl, 499 "Searching for candidate matchpoint " + 500 candidateMatchpoint + 501 " but got null record back "); 502 } 503 504 /* We've found the record at candidateMatchpoint */ 505 return replicaRecord; 506 } catch (SkipGapException e) { 507 /* 508 * The ReplicaSyncupReader will throw a SkipGapException if it 509 * encounters a cleaned files gap in the log. There can be 510 * multiple gaps on its way toward finding the candidate 511 * vlsn. The ReplicaSyncupReader is obliged to traverse the 512 * log, in order to note checkpoints, rather than simply using 513 * the vlsn index. When a gap is detected, the vlsn on the left 514 * side of the gap is used to re-init a new reader. For 515 * example, suppose the log looks like this: 516 * 517 * file 100 has vlsns 41-50 518 * file 200 has vlsns 51-60 519 * file 300 has vlsns 61-70 520 * 521 * and the candidate matchpoint is 45, the search will start at 522 * vlsn 70. 523 * t1: SkipGapException thrown at gap between file 200 & 300, 524 * create new reader positioned at vlsn 60 525 * t2: SkipGapException thrown at gap between file 100 & 200, 526 * create new reader positioned at vlsn 50 527 */ 528 VLSN gapRepositionVLSN = e.getVLSN(); 529 if (gapRepositionVLSN.compareTo(candidateMatchpoint) < 0) { 530 throw EnvironmentFailureException.unexpectedState 531 ("Gap reposition point of " + gapRepositionVLSN + 532 " should always be >= candidate matchpoint VLSN of " + 533 candidateMatchpoint); 534 } 535 536 long startScanLsn = vlsnIndex.getGTELsn(gapRepositionVLSN); 537 backwardsReader = setupBackwardsReader(candidateMatchpoint, 538 startScanLsn); 539 /* 540 * If we skip a gap, there is a chance that we will have passed 541 * a checkpoint which had deleted log files. This has no impact 542 * if we are doing a soft rollback, but if we do a hard 543 * recovery, it would prevent us from truncating the log. It 544 * would require doing a network restore if we need to rollback 545 * committed txns. 546 */ 547 searchResults.noteSkippedGap(); 548 } 549 } while (true); 550 } 551 552 /** 553 * Search backwards for potential matchpoints in the replica log, 554 * accounting for potential gaps. 555 */ scanMatchpointEntries()556 private OutputWireRecord scanMatchpointEntries() { 557 OutputWireRecord replicaRecord = null; 558 boolean firstAttempt = true; 559 do { 560 try { 561 /* 562 * The first time around, when firstAttempt is true, ask the 563 * reader to search for the vlsn before the currentVLSN, 564 * because we entered this method having searched to a given 565 * target matchpoint. All subsequent times, we are in search of 566 * the reader's currentVLSN, but haven't found it yet, because 567 * we hit a gap, so leave the currentVLSN alone. 568 */ 569 replicaRecord = 570 backwardsReader.findPrevSyncEntry(firstAttempt); 571 572 /* 573 * Either se've found a possible matchpoint, or we've come to 574 * the end and the replicaRecord is null. One way or another, 575 * return the results of the scan. 576 */ 577 return replicaRecord; 578 } catch (SkipGapException e) { 579 /* 580 * The ReplicaSyncupReader will throw a SkipGapException if it 581 * encounters a cleaned files gap in the log. There can be 582 * multiple gaps on its way toward finding the next potential 583 * matchpoint. The ReplicaSyncupReader is obliged to traverse 584 * the log, in order to note checkpoints, rather than simply 585 * using the vlsn index. When a gap is detected, the vlsn on 586 * the left side of the gap is used to re-init a new 587 * reader. For example, suppose the log looks like this and the 588 * search starts at vlsn 70 589 * 590 * file 100 has vlsns 51-60 591 * file 200 has no vlsns 592 * file 300 has no vlsns 593 * file 400 has vlsns 61-70 594 * 595 * SkipGapException thrown at gap between file 300 & 400, 596 * when the reader's currentVLSN is 60. Create a new reader, 597 * positioned at vlsn 60, skipping over files 200 and 300. 598 */ 599 600 VLSN gapRepositionVLSN = e.getVLSN(); 601 backwardsReader = setupBackwardsReader 602 (gapRepositionVLSN, 603 vlsnIndex.getGTELsn(gapRepositionVLSN)); 604 firstAttempt = false; 605 searchResults.noteSkippedGap(); 606 } 607 } while(true); 608 } 609 610 /** 611 * Ask the feeder for information to add to InsufficientLogException, 612 * and then throw the exception. 613 * 614 * The endVLSN marks the last VLSN that this node will want from 615 * the network restore. That information helps ensure that the restore 616 * source has enough vlsns to satisfy this replica. 617 * 618 * The replication node list identifies possible log provider members. 619 * @throws IOException 620 */ setupLogRefresh(VLSN failedMatchpoint)621 private InsufficientLogException setupLogRefresh(VLSN failedMatchpoint) 622 throws IOException { 623 624 protocol.write(protocol.new RestoreRequest(failedMatchpoint), 625 namedChannel); 626 RestoreResponse response = 627 (RestoreResponse) protocol.read(namedChannel); 628 629 return new InsufficientLogException 630 (repNode, 631 response.getCBVLSN(), 632 new HashSet<ReplicationNode>(Arrays.asList 633 (response.getLogProviders()))); 634 } 635 636 637 /** 638 * Hard recovery: truncate the files, repeat recovery. 639 * If this hard recovery came about before the ReplicatedEnvironment was 640 * fully instantiated, we will recreate the environment under the 641 * covers. If this came while the replica was up and supporting existing 642 * Environment handles, we must invalidate the environment, and ask the 643 * application to reopen. 644 * @throws IOException 645 */ setupHardRecovery(VLSNRange range)646 public RollbackException setupHardRecovery(VLSNRange range) 647 throws IOException { 648 649 /* Creating the exception invalidates the environment. */ 650 RollbackException r = new RollbackException(repImpl, 651 matchpointVLSN, 652 searchResults); 653 LoggerUtils.severe(logger, repImpl, 654 "Limited list of transactions truncated for " + 655 "hard recovery:\n" + 656 searchResults.dumpPassedTxns()); 657 658 /* 659 * Truncate after the environment is invalidated, which happens 660 * when we instantiate RollbackException. 661 */ 662 long matchpointLSN = searchResults.getMatchpointLSN(); 663 repImpl.getFileManager().truncateLog 664 (DbLsn.getFileNumber(matchpointLSN), 665 DbLsn.getFileOffset(matchpointLSN)); 666 667 return r; 668 } 669 670 /** 671 * Request a log entry from the feeder at this VLSN. The Feeder will only 672 * return the log record or say that it isn't available. 673 * 674 * @throws InsufficientLogException 675 */ getFeederRecord(VLSNRange range, VLSN requestVLSN, boolean acceptAlternative)676 private InputWireRecord getFeederRecord(VLSNRange range, 677 VLSN requestVLSN, 678 boolean acceptAlternative) 679 throws IOException, InsufficientLogException { 680 681 /* Ask the feeder for the matchpoint log record. */ 682 protocol.write(protocol.new EntryRequest(requestVLSN), namedChannel); 683 684 /* 685 * Expect 686 * a) the requested log record 687 * b) message that says this feeder doesn't have RequestVLSN 688 * c) if acceptAlternative == true and the feeder didn't have 689 * requestVLSN, but had an earlier entry, the feeder may send an 690 * earlier, alternative matchpoint 691 */ 692 Message message = protocol.read(namedChannel); 693 if (message instanceof Entry) { 694 Entry entry = (Entry) message; 695 return entry.getWireRecord(); 696 } 697 698 if (message instanceof EntryNotFound) { 699 LoggerUtils.info(logger, repImpl, "Requested " + requestVLSN + 700 " from " + namedChannel.getNameIdPair() + 701 " but that node did not have that vlsn."); 702 throw setupLogRefresh(requestVLSN); 703 } 704 705 if ((acceptAlternative) && 706 (message instanceof AlternateMatchpoint)) { 707 708 AlternateMatchpoint alt = (AlternateMatchpoint) message; 709 InputWireRecord feederRecord = alt.getAlternateWireRecord(); 710 VLSN altMatchpoint = feederRecord.getVLSN(); 711 if (range.getFirst().compareTo(altMatchpoint) > 0) { 712 713 /* 714 * The feeder suggest a different matchpoint, but it's outside 715 * the replica's range. Give up and do a network restore. 716 */ 717 throw setupLogRefresh(altMatchpoint); 718 } 719 return feederRecord; 720 } 721 722 throw EnvironmentFailureException.unexpectedState 723 (repNode.getRepImpl(), 724 "Sent EntryRequest, got unexpected response of " + message); 725 } 726 727 728 setGlobalSyncupEndHook(TestHook<Object> syncupEndHook)729 public static void setGlobalSyncupEndHook(TestHook<Object> syncupEndHook) { 730 ReplicaFeederSyncup.globalSyncupEndHook = syncupEndHook; 731 } 732 runHook()733 private boolean runHook() 734 throws InterruptedException { 735 736 if (syncupEndHook != null) { 737 syncupEndHook.doHook(); 738 } 739 740 if (globalSyncupEndHook != null) { 741 globalSyncupEndHook.doHook(); 742 } 743 return true; 744 } 745 746 /** 747 * This interface is used instead of com.sleepycat.je.utilint.TestHook 748 * because the doHook method needs to throw InterruptedException. 749 */ 750 public interface TestHook<T> { doHook()751 public void doHook() throws InterruptedException; 752 } 753 } 754