1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2002, 2014 Oracle and/or its affiliates. All rights reserved. 5 * 6 */ 7 8 package com.sleepycat.je.log; 9 10 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_BYTES_READ_FROM_WRITEQUEUE; 11 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_BYTES_WRITTEN_FROM_WRITEQUEUE; 12 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_FILE_OPENS; 13 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_LOG_FSYNCS; 14 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_OPEN_FILES; 15 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_READS; 16 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_READ_BYTES; 17 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_WRITES; 18 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_WRITE_BYTES; 19 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_READS_FROM_WRITEQUEUE; 20 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_READS; 21 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_READ_BYTES; 22 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_WRITES; 23 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_WRITE_BYTES; 24 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITEQUEUE_OVERFLOW; 25 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITEQUEUE_OVERFLOW_FAILURES; 26 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITES_FROM_WRITEQUEUE; 27 28 import java.io.File; 29 import java.io.FileNotFoundException; 30 import java.io.FilenameFilter; 31 import java.io.IOException; 32 import java.io.RandomAccessFile; 33 import java.nio.ByteBuffer; 34 import java.nio.channels.ClosedChannelException; 35 import java.nio.channels.FileChannel; 36 import java.nio.channels.FileLock; 37 import java.nio.channels.OverlappingFileLockException; 38 import java.util.ArrayList; 39 import java.util.Arrays; 40 import java.util.Collections; 41 import java.util.Comparator; 42 import java.util.HashMap; 43 import java.util.Hashtable; 44 import java.util.Iterator; 45 import java.util.LinkedList; 46 import java.util.List; 47 import java.util.Map; 48 import java.util.Random; 49 import java.util.Set; 50 import java.util.concurrent.locks.ReentrantLock; 51 52 import com.sleepycat.je.DatabaseException; 53 import com.sleepycat.je.EnvironmentFailureException; 54 import com.sleepycat.je.EnvironmentLockedException; 55 import com.sleepycat.je.LogWriteException; 56 import com.sleepycat.je.StatsConfig; 57 import com.sleepycat.je.ThreadInterruptedException; 58 import com.sleepycat.je.config.EnvironmentParams; 59 import com.sleepycat.je.dbi.DbConfigManager; 60 import com.sleepycat.je.dbi.EnvironmentFailureReason; 61 import com.sleepycat.je.dbi.EnvironmentImpl; 62 import com.sleepycat.je.log.entry.FileHeaderEntry; 63 import com.sleepycat.je.log.entry.LogEntry; 64 import com.sleepycat.je.utilint.DbLsn; 65 import com.sleepycat.je.utilint.HexFormatter; 66 import com.sleepycat.je.utilint.IntStat; 67 import com.sleepycat.je.utilint.LongStat; 68 import com.sleepycat.je.utilint.RelatchRequiredException; 69 import com.sleepycat.je.utilint.StatGroup; 70 71 /** 72 * The FileManager presents the abstraction of one contiguous file. It doles 73 * out LSNs. 74 */ 75 public class FileManager { 76 77 public enum FileMode { 78 READ_MODE("r", false), 79 READWRITE_MODE("rw", true), 80 READWRITE_ODSYNC_MODE("rwd", true), 81 READWRITE_OSYNC_MODE("rws", true); 82 83 private String fileModeValue; 84 private boolean isWritable; 85 FileMode(String fileModeValue, boolean isWritable)86 private FileMode(String fileModeValue, boolean isWritable) { 87 this.fileModeValue = fileModeValue; 88 this.isWritable = isWritable; 89 } 90 getModeValue()91 public String getModeValue() { 92 return fileModeValue; 93 } 94 isWritable()95 public boolean isWritable() { 96 return isWritable; 97 } 98 } 99 100 static boolean IO_EXCEPTION_TESTING_ON_WRITE = false; 101 static boolean IO_EXCEPTION_TESTING_ON_READ = false; 102 static boolean THROW_RRE_FOR_UNIT_TESTS = false; 103 private static final String DEBUG_NAME = FileManager.class.getName(); 104 private static final boolean DEBUG = false; 105 106 /** 107 * Returns whether we are in a test mode where we attempt to continue 108 * after a write IOException. This method will never return true in 109 * production use. 110 */ continueAfterWriteException()111 public static boolean continueAfterWriteException() { 112 return IO_EXCEPTION_TESTING_ON_WRITE && !THROW_RRE_FOR_UNIT_TESTS; 113 } 114 115 /* 116 * The number of writes that have been performed. 117 * 118 * public so that unit tests can diddle them. 119 */ 120 public static long WRITE_COUNT = 0; 121 122 /* 123 * The write count value where we should stop or throw. 124 */ 125 public static long STOP_ON_WRITE_COUNT = Long.MAX_VALUE; 126 127 /* 128 * If we're throwing, then throw on write #'s WRITE_COUNT through 129 * WRITE_COUNT + N_BAD_WRITES - 1 (inclusive). 130 */ 131 public static long N_BAD_WRITES = Long.MAX_VALUE; 132 133 /* 134 * If true, then throw an IOException on write #'s WRITE_COUNT through 135 * WRITE_COUNT + N_BAD_WRITES - 1 (inclusive). 136 */ 137 public static boolean THROW_ON_WRITE = false; 138 139 public static final String JE_SUFFIX = ".jdb"; // regular log files 140 public static final String DEL_SUFFIX = ".del"; // cleaned files 141 public static final String BAD_SUFFIX = ".bad"; // corrupt files 142 private static final String LOCK_FILE = "je.lck";// lock file 143 static final String[] DEL_SUFFIXES = { DEL_SUFFIX }; 144 static final String[] JE_SUFFIXES = { JE_SUFFIX }; 145 private static final String[] JE_AND_DEL_SUFFIXES = 146 { JE_SUFFIX, DEL_SUFFIX }; 147 148 /* 149 * The suffix used to denote a file that is in the process of being 150 * transferred during a network backup. The file may not have been 151 * completely transferred, or its digest verified. 152 */ 153 public static final String TMP_SUFFIX = ".tmp"; 154 155 /* 156 * The suffix used to rename files out of the way, if they are being 157 * retained during a backup. Note that the suffix is used in conjunction 158 * with a backup number as described in <code>NetworkBackup</code> 159 */ 160 public static final String BUP_SUFFIX = ".bup"; 161 162 /* May be set to false to speed unit tests. */ 163 private boolean syncAtFileEnd = true; 164 165 private final EnvironmentImpl envImpl; 166 private final long maxFileSize; 167 private final File dbEnvHome; 168 private final File[] dbEnvDataDirs; 169 170 /* True if .del files should be included in the list of log files. */ 171 private boolean includeDeletedFiles = false; 172 173 /* File cache */ 174 private final FileCache fileCache; 175 176 private FileCacheWarmer fileCacheWarmer; 177 178 /* The channel and lock for the je.lck file. */ 179 private RandomAccessFile lockFile; 180 private FileChannel channel; 181 private FileLock envLock; 182 private FileLock exclLock; 183 184 /* True if all files should be opened readonly. */ 185 private final boolean readOnly; 186 187 /* Handles onto log position */ 188 private long currentFileNum; // number of the current file 189 private long nextAvailableLsn; // nextLSN is the next one available 190 private long lastUsedLsn; // last LSN used in the current log file 191 private long prevOffset; // Offset to use for the previous pointer 192 private boolean forceNewFile; // Force new file on next write 193 194 /* 195 * Saved versions of above. Save this in case a write causes an 196 * IOException, we can back the log up to the last known good LSN. 197 */ 198 private long savedCurrentFileNum; 199 private long savedNextAvailableLsn; // nextLSN is the next one available 200 private long savedLastUsedLsn; // last LSN used in the current log file 201 private long savedPrevOffset; // Offset to use for the previous pointer 202 private boolean savedForceNewFile; 203 204 /* endOfLog is used for writes and fsyncs to the end of the log. */ 205 private final LogEndFileDescriptor endOfLog; 206 207 /* 208 * When we bump the LSNs over to a new file, we must remember the last LSN 209 * of the previous file so we can set the prevOffset field of the file 210 * header appropriately. We have to save it in a map because there's a time 211 * lag between when we know what the last LSN is and when we actually do 212 * the file write, because LSN bumping is done before we get a write 213 * buffer. This map is keyed by file num->last LSN. 214 */ 215 private final Map<Long, Long> perFileLastUsedLsn; 216 217 /* 218 * True if we should use the Write Queue. This queue is enabled by default 219 * and contains any write() operations which were attempted but would have 220 * blocked because an fsync() or another write() was in progress at the 221 * time. The operations on the Write Queue are later executed by the next 222 * operation that is able to grab the fsync latch. File systems like ext3 223 * need this queue in order to achieve reasonable throughput since it 224 * acquires an exclusive mutex on the inode during any IO operation 225 * (seek/read/write/fsync). OS's like Windows and Solaris do not since 226 * they are able to handle concurrent IO operations on a single file. 227 */ 228 private final boolean useWriteQueue; 229 230 /* The starting size of the Write Queue. */ 231 private final int writeQueueSize; 232 233 /* 234 * Use O_DSYNC to open JE log files. 235 */ 236 private final boolean useODSYNC; 237 238 /* public for unit tests. */ 239 public boolean VERIFY_CHECKSUMS = false; 240 241 /* 242 * Non-0 means to use envHome/data001 through envHome/data00N for the 243 * environment directories, where N is nDataDirs. Distribute *.jdb files 244 * through dataNNN directories round-robin. 245 */ 246 private final int nDataDirs; 247 248 /* 249 * Last file to which any IO was done. 250 */ 251 long lastFileNumberTouched = -1; 252 253 /* 254 * Current file offset of lastFile. 255 */ 256 long lastFileTouchedOffset = 0; 257 258 /* 259 * For IO stats, this is a measure of what is "close enough" to constitute 260 * a sequential IO vs a random IO. 1MB for now. Generally a seek within a 261 * few tracks of the current disk track is "fast" and only requires a 262 * single rotational latency. 263 */ 264 private static final long ADJACENT_TRACK_SEEK_DELTA = 1 << 20; 265 266 /* 267 * Stats 268 */ 269 final StatGroup stats; 270 final LongStat nRandomReads; 271 final LongStat nRandomWrites; 272 final LongStat nSequentialReads; 273 final LongStat nSequentialWrites; 274 final LongStat nRandomReadBytes; 275 final LongStat nRandomWriteBytes; 276 final LongStat nSequentialReadBytes; 277 final LongStat nSequentialWriteBytes; 278 final IntStat nFileOpens; 279 final IntStat nOpenFiles; 280 final LongStat nBytesReadFromWriteQueue; 281 final LongStat nBytesWrittenFromWriteQueue; 282 final LongStat nReadsFromWriteQueue; 283 final LongStat nWritesFromWriteQueue; 284 final LongStat nWriteQueueOverflow; 285 final LongStat nWriteQueueOverflowFailures; 286 /* all fsyncs, includes those issued for group commit */ 287 final LongStat nLogFSyncs; 288 final LongStat nFSyncTime; 289 290 /** 291 * Set up the file cache and initialize the file manager to point to the 292 * beginning of the log. 293 * 294 * @param dbEnvHome environment home directory 295 * 296 * @throws IllegalArgumentException via Environment ctor 297 * 298 * @throws EnvironmentLockedException via Environment ctor 299 */ FileManager(EnvironmentImpl envImpl, File dbEnvHome, boolean readOnly)300 public FileManager(EnvironmentImpl envImpl, 301 File dbEnvHome, 302 boolean readOnly) 303 throws EnvironmentLockedException { 304 305 this.envImpl = envImpl; 306 this.dbEnvHome = dbEnvHome; 307 this.readOnly = readOnly; 308 309 boolean success = false; 310 311 stats = new StatGroup(LogStatDefinition.FILEMGR_GROUP_NAME, 312 LogStatDefinition.FILEMGR_GROUP_DESC); 313 nRandomReads = new LongStat(stats, FILEMGR_RANDOM_READS); 314 nRandomWrites = new LongStat(stats, FILEMGR_RANDOM_WRITES); 315 nSequentialReads = new LongStat(stats, FILEMGR_SEQUENTIAL_READS); 316 nSequentialWrites = new LongStat(stats, FILEMGR_SEQUENTIAL_WRITES); 317 nRandomReadBytes = new LongStat(stats, FILEMGR_RANDOM_READ_BYTES); 318 nRandomWriteBytes = new LongStat(stats, FILEMGR_RANDOM_WRITE_BYTES); 319 nSequentialReadBytes = 320 new LongStat(stats, FILEMGR_SEQUENTIAL_READ_BYTES); 321 nSequentialWriteBytes = 322 new LongStat(stats, FILEMGR_SEQUENTIAL_WRITE_BYTES); 323 nFileOpens = new IntStat(stats, FILEMGR_FILE_OPENS); 324 nOpenFiles = new IntStat(stats, FILEMGR_OPEN_FILES); 325 nBytesReadFromWriteQueue = 326 new LongStat(stats, FILEMGR_BYTES_READ_FROM_WRITEQUEUE); 327 nBytesWrittenFromWriteQueue = 328 new LongStat(stats, FILEMGR_BYTES_WRITTEN_FROM_WRITEQUEUE); 329 nReadsFromWriteQueue = 330 new LongStat(stats, FILEMGR_READS_FROM_WRITEQUEUE); 331 nWritesFromWriteQueue = 332 new LongStat(stats, FILEMGR_WRITES_FROM_WRITEQUEUE); 333 nWriteQueueOverflow = new LongStat(stats, FILEMGR_WRITEQUEUE_OVERFLOW); 334 nWriteQueueOverflowFailures = 335 new LongStat(stats, FILEMGR_WRITEQUEUE_OVERFLOW_FAILURES); 336 nLogFSyncs = new LongStat(stats, FILEMGR_LOG_FSYNCS); 337 nFSyncTime = new LongStat(stats, LogStatDefinition.GRPCMGR_FSYNC_TIME); 338 339 try { 340 /* Read configurations. */ 341 DbConfigManager configManager = envImpl.getConfigManager(); 342 maxFileSize = 343 configManager.getLong(EnvironmentParams.LOG_FILE_MAX); 344 345 useWriteQueue = configManager.getBoolean 346 (EnvironmentParams.LOG_USE_WRITE_QUEUE); 347 writeQueueSize = configManager.getInt 348 (EnvironmentParams.LOG_WRITE_QUEUE_SIZE); 349 useODSYNC = configManager.getBoolean 350 (EnvironmentParams.LOG_USE_ODSYNC); 351 VERIFY_CHECKSUMS = configManager.getBoolean 352 (EnvironmentParams.LOG_VERIFY_CHECKSUMS); 353 nDataDirs = 354 configManager.getInt(EnvironmentParams.LOG_N_DATA_DIRECTORIES); 355 if (nDataDirs != 0) { 356 dbEnvDataDirs = gatherDataDirs(); 357 } else { 358 checkNoDataDirs(); 359 dbEnvDataDirs = null; 360 } 361 362 if (!envImpl.isMemOnly()) { 363 if (!dbEnvHome.exists()) { 364 throw new IllegalArgumentException 365 ("Environment home " + dbEnvHome + " doesn't exist"); 366 } 367 if (!lockEnvironment(readOnly, false)) { 368 throw new EnvironmentLockedException 369 (envImpl, 370 "The environment cannot be locked for " + 371 (readOnly ? "shared" : "single writer") + " access."); 372 } 373 } 374 375 /* Cache of files. */ 376 fileCache = new FileCache(configManager); 377 378 /* Start out as if no log existed. */ 379 currentFileNum = 0L; 380 nextAvailableLsn = 381 DbLsn.makeLsn(currentFileNum, firstLogEntryOffset()); 382 lastUsedLsn = DbLsn.NULL_LSN; 383 perFileLastUsedLsn = 384 Collections.synchronizedMap(new HashMap<Long, Long>()); 385 prevOffset = 0L; 386 endOfLog = new LogEndFileDescriptor(); 387 forceNewFile = false; 388 saveLastPosition(); 389 390 final String stopOnWriteCountName = "je.debug.stopOnWriteCount"; 391 final String stopOnWriteCountProp = 392 System.getProperty(stopOnWriteCountName); 393 if (stopOnWriteCountProp != null) { 394 try { 395 STOP_ON_WRITE_COUNT = Long.parseLong(stopOnWriteCountProp); 396 } catch (NumberFormatException e) { 397 throw new IllegalArgumentException 398 ("Could not parse: " + stopOnWriteCountName, e); 399 } 400 } 401 402 final String stopOnWriteActionName = "je.debug.stopOnWriteAction"; 403 final String stopOnWriteActionProp = 404 System.getProperty(stopOnWriteActionName); 405 if (stopOnWriteActionProp != null) { 406 if (stopOnWriteActionProp.compareToIgnoreCase("throw") == 0) { 407 THROW_ON_WRITE = true; 408 } else if (stopOnWriteActionProp. 409 compareToIgnoreCase("stop") == 0) { 410 THROW_ON_WRITE = false; 411 } else { 412 throw new IllegalArgumentException 413 ("Unknown value for: " + stopOnWriteActionName + 414 stopOnWriteActionProp); 415 } 416 } 417 418 success = true; 419 } finally { 420 if (!success) { 421 try { 422 close(); 423 } catch (IOException e) { 424 425 /* 426 * Klockwork - ok 427 * Eat it, we want to throw the original exception. 428 */ 429 } 430 } 431 } 432 } 433 434 /** 435 * Set the file manager's "end of log". 436 * 437 * @param nextAvailableLsn LSN to be used for the next log entry 438 * @param lastUsedLsn last LSN to have a valid entry, may be null 439 * @param prevOffset value to use for the prevOffset of the next entry. 440 * If the beginning of the file, this is 0. 441 */ setLastPosition(long nextAvailableLsn, long lastUsedLsn, long prevOffset)442 public void setLastPosition(long nextAvailableLsn, 443 long lastUsedLsn, 444 long prevOffset) { 445 this.lastUsedLsn = lastUsedLsn; 446 perFileLastUsedLsn.put(Long.valueOf(DbLsn.getFileNumber(lastUsedLsn)), 447 Long.valueOf(lastUsedLsn)); 448 this.nextAvailableLsn = nextAvailableLsn; 449 currentFileNum = DbLsn.getFileNumber(this.nextAvailableLsn); 450 this.prevOffset = prevOffset; 451 saveLastPosition(); 452 } 453 454 /* 455 * Cause the current LSN state to be saved in case we fail after we have 456 * bumped the LSN pointer but before we've successfully marshalled into the 457 * log buffer. 458 */ saveLastPosition()459 void saveLastPosition() { 460 savedNextAvailableLsn = nextAvailableLsn; 461 savedLastUsedLsn = lastUsedLsn; 462 savedPrevOffset = prevOffset; 463 savedForceNewFile = forceNewFile; 464 savedCurrentFileNum = currentFileNum; 465 } 466 restoreLastPosition()467 void restoreLastPosition() { 468 nextAvailableLsn = savedNextAvailableLsn; 469 lastUsedLsn = savedLastUsedLsn; 470 prevOffset = savedPrevOffset; 471 forceNewFile = savedForceNewFile; 472 currentFileNum = savedCurrentFileNum; 473 } 474 475 /** 476 * May be used to disable sync at file end to speed unit tests. 477 * Must only be used for unit testing, since log corruption may result. 478 */ setSyncAtFileEnd(boolean sync)479 public void setSyncAtFileEnd(boolean sync) { 480 syncAtFileEnd = sync; 481 } 482 483 /* 484 * File management 485 */ 486 487 /** 488 * public for cleaner. 489 * 490 * @return the number of the first file in this environment. 491 */ getFirstFileNum()492 public Long getFirstFileNum() { 493 return getFileNum(true); 494 } 495 getReadOnly()496 public boolean getReadOnly() { 497 return readOnly; 498 } 499 500 /** 501 * @return the number of the last file in this environment. 502 */ getLastFileNum()503 public Long getLastFileNum() { 504 return getFileNum(false); 505 } 506 507 /** 508 * Returns the highest (current) file number. Because a long value cannot 509 * be read atomically without synchronization, this method should be called 510 * while holding the log write latch. 511 */ getCurrentFileNum()512 public long getCurrentFileNum() { 513 return currentFileNum; 514 } 515 516 /** 517 * For unit tests. 518 */ getUseWriteQueue()519 boolean getUseWriteQueue() { 520 return useWriteQueue; 521 } 522 523 /** 524 * For assertions that check whether a file is valid or has been deleted 525 * via log cleaning. 526 */ isFileValid(long fileNum)527 public boolean isFileValid(long fileNum) { 528 529 /* 530 * If the file is the current file, it may be buffered and not yet 531 * created. If the env is memory-only, we will never create or delete 532 * log files. 533 */ 534 if (fileNum == currentFileNum || envImpl.isMemOnly()) { 535 return true; 536 } 537 538 /* Check for file existence. */ 539 String fileName = getFullFileName(fileNum, FileManager.JE_SUFFIX); 540 File file = new File(fileName); 541 return file.exists(); 542 } 543 setIncludeDeletedFiles(boolean includeDeletedFiles)544 public void setIncludeDeletedFiles(boolean includeDeletedFiles) { 545 this.includeDeletedFiles = includeDeletedFiles; 546 } 547 548 /** 549 * Get all JE file numbers. 550 * @return an array of all JE file numbers. 551 */ getAllFileNumbers()552 public Long[] getAllFileNumbers() { 553 /* Get all the names in sorted order. */ 554 String[] names = listFileNames(JE_SUFFIXES); 555 Long[] nums = new Long[names.length]; 556 for (int i = 0; i < nums.length; i += 1) { 557 String name = names[i]; 558 long num = nums[i] = getNumFromName(name); 559 if (nDataDirs != 0) { 560 int dbEnvDataDirsIdx = getDataDirIndexFromName(name) - 1; 561 if (dbEnvDataDirsIdx != (num % nDataDirs)) { 562 throw EnvironmentFailureException.unexpectedState 563 ("Found file " + name + " but it should have been in " + 564 "data directory " + (dbEnvDataDirsIdx + 1) + 565 ". Perhaps it was moved or restored incorrectly?"); 566 } 567 } 568 } 569 return nums; 570 } 571 572 /** 573 * Get the next file number before/after currentFileNum. 574 * @param currentFileNum1 the file we're at right now. Note that 575 * it may not exist, if it's been cleaned and renamed. 576 * @param forward if true, we want the next larger file, if false 577 * we want the previous file 578 * @return null if there is no following file, or if filenum doesn't exist 579 */ getFollowingFileNum(long currentFileNum1, boolean forward)580 public Long getFollowingFileNum(long currentFileNum1, boolean forward) { 581 /* Get all the names in sorted order. */ 582 String[] names = listFileNames(JE_SUFFIXES); 583 584 /* Search for the current file. */ 585 String searchName = getFileName(currentFileNum1, JE_SUFFIX); 586 int foundIdx = Arrays.binarySearch(names, searchName, stringComparator); 587 588 boolean foundTarget = false; 589 if (foundIdx >= 0) { 590 if (forward) { 591 foundIdx++; 592 } else { 593 foundIdx--; 594 } 595 } else { 596 597 /* 598 * currentFileNum not found (might have been cleaned). FoundIdx 599 * will be (-insertionPoint - 1). 600 */ 601 foundIdx = Math.abs(foundIdx + 1); 602 if (!forward) { 603 foundIdx--; 604 } 605 } 606 607 /* The current fileNum is found, return the next or prev file. */ 608 if (forward && (foundIdx < names.length)) { 609 foundTarget = true; 610 } else if (!forward && (foundIdx > -1)) { 611 foundTarget = true; 612 } 613 614 if (foundTarget) { 615 return getNumFromName(names[foundIdx]); 616 } 617 return null; 618 } 619 620 /** 621 * @return true if there are any files at all. 622 */ filesExist()623 public boolean filesExist() { 624 String[] names = listFileNames(JE_SUFFIXES); 625 return (names.length != 0); 626 } 627 628 /** 629 * Get the first or last file number in the set of JE files. 630 * 631 * @param first if true, get the first file, else get the last file 632 * @return the file number or null if no files exist 633 */ getFileNum(boolean first)634 private Long getFileNum(boolean first) { 635 String[] names = listFileNames(JE_SUFFIXES); 636 if (names.length == 0) { 637 return null; 638 } 639 int index = 0; 640 if (!first) { 641 index = names.length - 1; 642 } 643 return getNumFromName(names[index]); 644 } 645 646 /** 647 * Get the data dir index from a file name. 648 * 649 * @return index into dbEnvDataDirs of this fileName's data directory. 650 * -1 if multiple data directories are not being used. 651 */ getDataDirIndexFromName(String fileName)652 private int getDataDirIndexFromName(String fileName) { 653 if (nDataDirs == 0) { 654 return -1; 655 } 656 657 int dataDirEnd = fileName.lastIndexOf(File.separator); 658 String dataDir = fileName.substring(0, dataDirEnd); 659 return Integer.valueOf 660 (Integer.parseInt(dataDir.substring("data".length()))); 661 } 662 663 /** 664 * Get the file number from a file name. 665 * 666 * @param fileName the file name 667 * @return the file number 668 */ getNumFromName(String fileName)669 public Long getNumFromName(String fileName) { 670 String name = fileName; 671 if (nDataDirs != 0) { 672 name = name.substring(name.lastIndexOf(File.separator) + 1); 673 } 674 String fileNumber = name.substring(0, name.indexOf(".")); 675 return Long.valueOf(Long.parseLong(fileNumber, 16)); 676 } 677 678 /** 679 * Find JE files. Return names sorted in ascending fashion. 680 * @param suffixes which type of file we're looking for 681 * @return array of file names 682 * 683 * Used by unit tests so package protection. 684 */ listFileNames(String[] suffixes)685 String[] listFileNames(String[] suffixes) { 686 JEFileFilter fileFilter = new JEFileFilter(suffixes); 687 return listFileNamesInternal(fileFilter); 688 } 689 690 /** 691 * Find .jdb files which are >= the minimimum file number and 692 * <= the maximum file number. 693 * Return names sorted in ascending fashion. 694 * 695 * @return array of file names 696 */ listFileNames(long minFileNumber, long maxFileNumber)697 public String[] listFileNames(long minFileNumber, long maxFileNumber) { 698 JEFileFilter fileFilter = 699 new JEFileFilter(JE_SUFFIXES, minFileNumber, maxFileNumber); 700 return listFileNamesInternal(fileFilter); 701 } 702 703 private static Comparator<File> fileComparator = 704 new Comparator<File>() { 705 706 private String getFileNum(File file) { 707 String fname = file.toString(); 708 return fname.substring(fname.indexOf(File.separator) + 1); 709 } 710 711 public int compare(File o1, File o2) { 712 String fnum1 = getFileNum(o1); 713 String fnum2 = getFileNum(o2); 714 return o1.compareTo(o2); 715 } 716 }; 717 718 private static Comparator<String> stringComparator = 719 new Comparator<String>() { 720 721 private String getFileNum(String fname) { 722 return fname.substring(fname.indexOf(File.separator) + 1); 723 } 724 725 public int compare(String o1, String o2) { 726 String fnum1 = getFileNum(o1); 727 String fnum2 = getFileNum(o2); 728 return fnum1.compareTo(fnum2); 729 } 730 }; 731 732 /** 733 * Find JE files, flavor for unit test support. 734 * 735 * @param suffixes which type of file we're looking for 736 * @return array of file names 737 */ listFiles(File envDirFile, String[] suffixes, boolean envMultiSubDir)738 public static String[] listFiles(File envDirFile, 739 String[] suffixes, 740 boolean envMultiSubDir) { 741 String[] names = envDirFile.list(new JEFileFilter(suffixes)); 742 743 ArrayList<String> subFileNames = new ArrayList<String>(); 744 if (envMultiSubDir) { 745 for (File file : envDirFile.listFiles()) { 746 if (file.isDirectory() && file.getName().startsWith("data")) { 747 File[] subFiles = 748 file.listFiles(new JEFileFilter(suffixes)); 749 for (File subFile : subFiles) { 750 subFileNames.add(file.getName() + 751 File.separator + subFile.getName()); 752 } 753 } 754 } 755 756 String[] totalFileNames = 757 new String[names.length + subFileNames.size()]; 758 for (int i = 0; i < totalFileNames.length; i++) { 759 if (i < names.length) { 760 totalFileNames[i] = names[i]; 761 } else { 762 totalFileNames[i] = subFileNames.get(i - names.length); 763 } 764 } 765 names = totalFileNames; 766 } 767 768 if (names != null) { 769 Arrays.sort(names, stringComparator); 770 } else { 771 names = new String[0]; 772 } 773 774 return names; 775 } 776 listJDBFiles()777 public File[] listJDBFiles() { 778 if (nDataDirs == 0) { 779 return listJDBFilesInternalSingleDir(new JEFileFilter(JE_SUFFIXES)); 780 } else { 781 return listJDBFilesInternalMultiDir(new JEFileFilter(JE_SUFFIXES)); 782 } 783 } 784 listJDBFilesInternalSingleDir(JEFileFilter fileFilter)785 public File[] listJDBFilesInternalSingleDir(JEFileFilter fileFilter) { 786 File[] files = dbEnvHome.listFiles(fileFilter); 787 if (files != null) { 788 Arrays.sort(files); 789 } else { 790 files = new File[0]; 791 } 792 793 return files; 794 } 795 listJDBFilesInternalMultiDir(JEFileFilter fileFilter)796 public File[] listJDBFilesInternalMultiDir(JEFileFilter fileFilter) { 797 File[][] files = new File[nDataDirs][]; 798 int nTotalFiles = 0; 799 int i = 0; 800 for (File envDir : dbEnvDataDirs) { 801 files[i] = envDir.listFiles(fileFilter); 802 nTotalFiles += files[i].length; 803 i++; 804 } 805 806 if (nTotalFiles == 0) { 807 return new File[0]; 808 } 809 810 File[] ret = new File[nTotalFiles]; 811 i = 0; 812 for (File[] envFiles : files) { 813 for (File envFile : envFiles) { 814 ret[i++] = envFile; 815 } 816 } 817 818 Arrays.sort(ret, fileComparator); 819 return ret; 820 } 821 listFileNamesInternal(JEFileFilter fileFilter)822 private String[] listFileNamesInternal(JEFileFilter fileFilter) { 823 if (nDataDirs == 0) { 824 return listFileNamesInternalSingleDir(fileFilter); 825 } else { 826 return listFileNamesInternalMultiDirs(fileFilter); 827 } 828 } 829 listFileNamesInternalSingleDir(JEFileFilter fileFilter)830 private String[] listFileNamesInternalSingleDir(JEFileFilter fileFilter) { 831 String[] fileNames = dbEnvHome.list(fileFilter); 832 if (fileNames != null) { 833 Arrays.sort(fileNames); 834 } else { 835 fileNames = new String[0]; 836 } 837 return fileNames; 838 } 839 listFileNamesInternalMultiDirs(JEFileFilter filter)840 private String[] listFileNamesInternalMultiDirs(JEFileFilter filter) { 841 String[][] files = new String[nDataDirs][]; 842 int nTotalFiles = 0; 843 int i = 0; 844 for (File envDir : dbEnvDataDirs) { 845 files[i] = envDir.list(filter); 846 847 String envDirName = envDir.toString(); 848 String dataDirName = envDirName. 849 substring(envDirName.lastIndexOf(File.separator) + 1); 850 851 for (int j = 0; j < files[i].length; j += 1) { 852 files[i][j] = dataDirName + File.separator + files[i][j]; 853 } 854 855 nTotalFiles += files[i].length; 856 i++; 857 } 858 859 if (nTotalFiles == 0) { 860 return new String[0]; 861 } 862 863 String[] ret = new String[nTotalFiles]; 864 i = 0; 865 for (String[] envFiles : files) { 866 for (String envFile : envFiles) { 867 ret[i++] = envFile; 868 } 869 } 870 871 Arrays.sort(ret, stringComparator); 872 return ret; 873 } 874 checkNoDataDirs()875 private void checkNoDataDirs() { 876 String[] dataDirNames = 877 dbEnvHome.list(new FilenameFilter() { 878 public boolean accept(File dir, String name) { 879 /* We'll validate the subdirNum later. */ 880 return name != null && 881 name.length() == "dataNNN".length() && 882 name.startsWith("data"); 883 } 884 } 885 ); 886 if (dataDirNames != null && dataDirNames.length != 0) { 887 throw EnvironmentFailureException.unexpectedState 888 (EnvironmentParams.LOG_N_DATA_DIRECTORIES.getName() + 889 " was not set and expected to find no" + 890 " data directories, but found " + 891 dataDirNames.length + " data directories instead."); 892 } 893 } 894 gatherDataDirs()895 public File[] gatherDataDirs() { 896 String[] dataDirNames = 897 dbEnvHome.list(new FilenameFilter() { 898 public boolean accept(File dir, String name) { 899 /* We'll validate the subdirNum later. */ 900 return name != null && 901 name.length() == "dataNNN".length() && 902 name.startsWith("data"); 903 } 904 } 905 ); 906 if (dataDirNames != null) { 907 Arrays.sort(dataDirNames); 908 } else { 909 dataDirNames = new String[0]; 910 } 911 912 if (dataDirNames.length != nDataDirs) { 913 throw EnvironmentFailureException.unexpectedState 914 (EnvironmentParams.LOG_N_DATA_DIRECTORIES.getName() + 915 " was set and expected to find " + nDataDirs + 916 " data directories, but found " + 917 dataDirNames.length + " instead."); 918 } 919 920 int ddNum = 1; 921 File[] dataDirs = new File[nDataDirs]; 922 for (String fn : dataDirNames) { 923 String subdirNumStr = fn.substring(4); 924 try { 925 int subdirNum = Integer.parseInt(subdirNumStr); 926 if (subdirNum != ddNum) { 927 throw EnvironmentFailureException.unexpectedState 928 ("Expected to find data subdir: data" + 929 paddedDirNum(ddNum) + 930 " but found data" + 931 subdirNumStr + " instead."); 932 933 } 934 935 File dataDir = new File(dbEnvHome, fn); 936 if (!dataDir.exists()) { 937 throw EnvironmentFailureException.unexpectedState 938 ("Data dir: " + dataDir + " doesn't exist."); 939 } 940 if (!dataDir.isDirectory()) { 941 throw EnvironmentFailureException.unexpectedState 942 ("Data dir: " + dataDir + " is not a directory."); 943 } 944 dataDirs[ddNum - 1] = dataDir; 945 } catch (NumberFormatException E) { 946 throw EnvironmentFailureException.unexpectedState 947 ("Illegal data subdir: data" + subdirNumStr); 948 } 949 ddNum++; 950 } 951 return dataDirs; 952 } 953 paddedDirNum(int dirNum)954 private String paddedDirNum(int dirNum) { 955 String paddedStr = "000" + dirNum; 956 int len = paddedStr.length(); 957 return paddedStr.substring(len - 3); 958 } 959 960 /** 961 * @return the full file name and path for the nth JE file. 962 */ getFullFileNames(long fileNum)963 String[] getFullFileNames(long fileNum) { 964 if (includeDeletedFiles) { 965 int nSuffixes = JE_AND_DEL_SUFFIXES.length; 966 String[] ret = new String[nSuffixes]; 967 for (int i = 0; i < nSuffixes; i++) { 968 ret[i] = getFullFileName(fileNum, JE_AND_DEL_SUFFIXES[i]); 969 } 970 return ret; 971 } 972 return new String[] { getFullFileName(fileNum, JE_SUFFIX) }; 973 } 974 getDataDir(long fileNum)975 private File getDataDir(long fileNum) { 976 return (nDataDirs == 0) ? 977 dbEnvHome : 978 dbEnvDataDirs[((int) (fileNum % nDataDirs))]; 979 } 980 getFullFileName(long fileNum)981 public String getFullFileName(long fileNum) { 982 return getFullFileName(fileNum, JE_SUFFIX); 983 } 984 985 /** 986 * @return the full file name and path for this file name. 987 */ getFullFileName(long fileNum, String suffix)988 public String getFullFileName(long fileNum, String suffix) { 989 File dbEnvDataDir = getDataDir(fileNum); 990 return dbEnvDataDir + File.separator + getFileName(fileNum, suffix); 991 } 992 993 /* 994 * Return the full file name of a specified log file name, including the 995 * sub directories names if needed. 996 */ getFullFileName(String fileName)997 public String getFullFileName(String fileName) { 998 final int suffixStartPos = fileName.indexOf("."); 999 String suffix = fileName.substring(suffixStartPos, fileName.length()); 1000 assert suffix != null; 1001 String fileNum = fileName.substring(0, suffixStartPos); 1002 1003 return getFullFileName 1004 (Long.valueOf(Long.parseLong(fileNum, 16)), suffix); 1005 } 1006 1007 /** 1008 * @return the file name for the nth file. 1009 */ getFileName(long fileNum, String suffix)1010 public static String getFileName(long fileNum, String suffix) { 1011 return (getFileNumberString(fileNum) + suffix); 1012 } 1013 1014 /** @return the file name for the nth log (*.jdb) file. */ getFileName(long fileNum)1015 public static String getFileName(long fileNum) { 1016 return getFileName(fileNum, JE_SUFFIX); 1017 } 1018 1019 /** 1020 * HexFormatter generates a 0 padded string starting with 0x. We want 1021 * the right most 8 digits, so start at 10. 1022 */ getFileNumberString(long fileNum)1023 private static String getFileNumberString(long fileNum) { 1024 return HexFormatter.formatLong(fileNum).substring(10); 1025 } 1026 1027 /** 1028 * @return true if successful, false if File.renameTo returns false, which 1029 * can occur on Windows if the file was recently closed. 1030 */ renameFile(final long fileNum, final String newSuffix)1031 public boolean renameFile(final long fileNum, final String newSuffix) 1032 throws IOException, DatabaseException { 1033 1034 return renameFile(fileNum, newSuffix, null) != null; 1035 } 1036 1037 /** 1038 * Rename this file to NNNNNNNN.suffix. If that file already exists, try 1039 * NNNNNNNN.suffix.1, etc. Used for deleting files or moving corrupt files 1040 * aside. 1041 * 1042 * @param fileNum the file we want to move 1043 * 1044 * @param newSuffix the new file suffix 1045 * 1046 * @param subDir the data directory sub-directory to rename the file into. 1047 * The subDir must already exist. May be null to leave the file in its 1048 * current data directory. 1049 * 1050 * @return renamed File if successful, or null if File.renameTo returns 1051 * false, which can occur on Windows if the file was recently closed. 1052 */ renameFile(final long fileNum, final String newSuffix, final String subDir)1053 public File renameFile(final long fileNum, 1054 final String newSuffix, 1055 final String subDir) 1056 throws IOException { 1057 1058 final File oldDir = getDataDir(fileNum); 1059 final String oldName = getFileName(fileNum); 1060 final File oldFile = new File(oldDir, oldName); 1061 1062 final File newDir = 1063 (subDir != null) ? (new File(oldDir, subDir)) : oldDir; 1064 1065 final String newName = getFileName(fileNum, newSuffix); 1066 1067 String generation = ""; 1068 int repeatNum = 0; 1069 1070 while (true) { 1071 final File newFile = new File(newDir, newName + generation); 1072 1073 if (newFile.exists()) { 1074 repeatNum++; 1075 generation = "." + repeatNum; 1076 continue; 1077 } 1078 1079 clearFileCache(fileNum); 1080 1081 final boolean success = oldFile.renameTo(newFile); 1082 return success ? newFile : null; 1083 } 1084 } 1085 1086 /** 1087 * Delete log file NNNNNNNN. 1088 * 1089 * @param fileNum the file we want to move 1090 * 1091 * @return true if successful, false if File.delete returns false, which 1092 * can occur on Windows if the file was recently closed. 1093 */ deleteFile(final long fileNum)1094 public boolean deleteFile(final long fileNum) 1095 throws IOException, DatabaseException { 1096 1097 final String fileName = getFullFileNames(fileNum)[0]; 1098 clearFileCache(fileNum); 1099 final File file = new File(fileName); 1100 return file.delete(); 1101 } 1102 1103 /** 1104 * Returns the log version for the given file. 1105 */ getFileLogVersion(long fileNum)1106 public int getFileLogVersion(long fileNum) 1107 throws DatabaseException { 1108 1109 try { 1110 FileHandle handle = getFileHandle(fileNum); 1111 int logVersion = handle.getLogVersion(); 1112 handle.release(); 1113 return logVersion; 1114 } catch (FileNotFoundException e) { 1115 throw new EnvironmentFailureException 1116 (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, e); 1117 } catch (ChecksumException e) { 1118 throw new EnvironmentFailureException 1119 (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, e); 1120 } 1121 } 1122 1123 /** 1124 * Return a read only file handle that corresponds to this file number. 1125 * Retrieve it from the cache or open it anew and validate the file header. 1126 * This method takes a latch on this file, so that the file descriptor will 1127 * be held in the cache as long as it's in use. When the user is done with 1128 * the file, the latch must be released. 1129 * 1130 * @param fileNum which file 1131 * @return the file handle for the existing or newly created file 1132 */ getFileHandle(long fileNum)1133 public FileHandle getFileHandle(long fileNum) 1134 throws FileNotFoundException, ChecksumException, DatabaseException { 1135 1136 /* Check the file cache for this file. */ 1137 Long fileId = Long.valueOf(fileNum); 1138 FileHandle fileHandle = null; 1139 1140 /** 1141 * Loop until we get an open FileHandle. 1142 */ 1143 try { 1144 while (true) { 1145 1146 /* 1147 * The file cache is intentionally not latched here so that 1148 * it's not a bottleneck in the fast path. We check that the 1149 * file handle that we get back is really still open after we 1150 * latch it down below. 1151 */ 1152 fileHandle = fileCache.get(fileId); 1153 1154 /* 1155 * If the file isn't in the cache, latch the cache and check 1156 * again. Under the latch, if the file is not in the cache we 1157 * add it to the cache but do not open the file yet. We latch 1158 * the handle here, and open the file further below after 1159 * releasing the cache latch. This prevents blocking other 1160 * threads that are opening other files while we open this 1161 * file. The latch on the handle blocks other threads waiting 1162 * to open the same file, which is necessary. 1163 */ 1164 boolean newHandle = false; 1165 if (fileHandle == null) { 1166 synchronized (fileCache) { 1167 fileHandle = fileCache.get(fileId); 1168 if (fileHandle == null) { 1169 newHandle = true; 1170 fileHandle = addFileHandle(fileId); 1171 } 1172 } 1173 } 1174 1175 if (newHandle) { 1176 1177 /* 1178 * Open the file with the fileHandle latched. It was 1179 * latched by addFileHandle above. 1180 */ 1181 boolean success = false; 1182 try { 1183 openFileHandle(fileHandle, FileMode.READ_MODE, 1184 null /*existingHandle*/); 1185 success = true; 1186 } finally { 1187 if (!success) { 1188 /* An exception is in flight -- clean up. */ 1189 fileHandle.release(); 1190 clearFileCache(fileNum); 1191 } 1192 } 1193 } else { 1194 /* 1195 * The handle was found in the cache. Latch the fileHandle 1196 * before checking getFile below and returning. 1197 */ 1198 if (!fileHandle.latchNoWait()) { 1199 1200 /* 1201 * But the handle was latched. Rather than wait, let's 1202 * just make a new transient handle. It doesn't need 1203 * to be latched, but it does need to be closed. 1204 */ 1205 final FileHandle existingHandle = fileHandle; 1206 fileHandle = new FileHandle( 1207 envImpl, fileId, getFileNumberString(fileId)) { 1208 @Override 1209 public void release() 1210 throws DatabaseException { 1211 1212 try { 1213 close(); 1214 } catch (IOException E) { 1215 // Ignore 1216 } 1217 } 1218 }; 1219 1220 openFileHandle(fileHandle, FileMode.READ_MODE, 1221 existingHandle); 1222 } 1223 } 1224 1225 /* 1226 * We may have obtained this file handle outside the file cache 1227 * latch, so we have to test that the handle is still valid. 1228 * If it's not, then loop back and try again. 1229 */ 1230 if (fileHandle.getFile() == null) { 1231 fileHandle.release(); 1232 } else { 1233 break; 1234 } 1235 } 1236 } catch (FileNotFoundException e) { 1237 /* Handle at higher levels. */ 1238 throw e; 1239 } catch (IOException e) { 1240 throw new EnvironmentFailureException 1241 (envImpl, EnvironmentFailureReason.LOG_READ, e); 1242 } 1243 1244 return fileHandle; 1245 } 1246 1247 /** 1248 * Creates a new FileHandle and adds it to the cache, but does not open 1249 * the file. 1250 * @return the latched FileHandle. 1251 */ addFileHandle(Long fileNum)1252 private FileHandle addFileHandle(Long fileNum) 1253 throws IOException, DatabaseException { 1254 1255 FileHandle fileHandle = 1256 new FileHandle(envImpl, fileNum, getFileNumberString(fileNum)); 1257 fileCache.add(fileNum, fileHandle); 1258 fileHandle.latch(); 1259 return fileHandle; 1260 } 1261 getAppropriateReadWriteMode()1262 private FileMode getAppropriateReadWriteMode() { 1263 if (useODSYNC) { 1264 return FileMode.READWRITE_ODSYNC_MODE; 1265 } 1266 return FileMode.READWRITE_MODE; 1267 } 1268 1269 /** 1270 * Creates a new handle and opens it. Does not add the handle to the 1271 * cache. 1272 */ makeFileHandle(long fileNum, FileMode mode)1273 private FileHandle makeFileHandle(long fileNum, FileMode mode) 1274 throws FileNotFoundException, ChecksumException { 1275 1276 FileHandle fileHandle = 1277 new FileHandle(envImpl, fileNum, getFileNumberString(fileNum)); 1278 openFileHandle(fileHandle, mode, null /*existingHandle*/); 1279 return fileHandle; 1280 } 1281 1282 /** 1283 * Opens the file for the given handle and initializes it. 1284 * 1285 * @param existingHandle is an already open handle for the same file or 1286 * null. If non-null it is used to avoid the cost of reading the file 1287 * header. 1288 */ openFileHandle(FileHandle fileHandle, FileMode mode, FileHandle existingHandle)1289 private void openFileHandle(FileHandle fileHandle, 1290 FileMode mode, 1291 FileHandle existingHandle) 1292 throws FileNotFoundException, ChecksumException { 1293 1294 nFileOpens.increment(); 1295 long fileNum = fileHandle.getFileNum(); 1296 String[] fileNames = getFullFileNames(fileNum); 1297 RandomAccessFile newFile = null; 1298 String fileName = null; 1299 boolean success = false; 1300 try { 1301 1302 /* 1303 * Open the file. Note that we are going to try a few names to open 1304 * this file -- we'll try for N.jdb, and if that doesn't exist and 1305 * we're configured to look for all types, we'll look for N.del. 1306 */ 1307 FileNotFoundException FNFE = null; 1308 for (String fileName2 : fileNames) { 1309 fileName = fileName2; 1310 try { 1311 newFile = fileFactory.createFile(dbEnvHome, fileName, 1312 mode.getModeValue()); 1313 break; 1314 } catch (FileNotFoundException e) { 1315 /* Save the first exception thrown. */ 1316 if (FNFE == null) { 1317 FNFE = e; 1318 } 1319 } 1320 } 1321 1322 /* 1323 * If we didn't find the file or couldn't create it, rethrow the 1324 * exception. 1325 */ 1326 if (newFile == null) { 1327 assert FNFE != null; 1328 throw FNFE; 1329 } 1330 1331 /* 1332 * If there is an existing open handle, there is no need to read or 1333 * validate the header. Note that the log version is zero if the 1334 * existing handle is not fully initialized. 1335 */ 1336 if (existingHandle != null) { 1337 final int logVersion = existingHandle.getLogVersion(); 1338 if (logVersion > 0) { 1339 fileHandle.init(newFile, logVersion); 1340 success = true; 1341 return; 1342 } 1343 } 1344 1345 int logVersion = LogEntryType.LOG_VERSION; 1346 1347 if (newFile.length() == 0) { 1348 1349 /* 1350 * If the file is empty, reinitialize it if we can. If not, 1351 * send the file handle back up; the calling code will deal 1352 * with the fact that there's nothing there. 1353 */ 1354 if (mode.isWritable()) { 1355 /* An empty file, write a header. */ 1356 long lastLsn = DbLsn.longToLsn(perFileLastUsedLsn.remove 1357 (Long.valueOf(fileNum - 1))); 1358 long headerPrevOffset = 0; 1359 if (lastLsn != DbLsn.NULL_LSN) { 1360 headerPrevOffset = DbLsn.getFileOffset(lastLsn); 1361 } 1362 if ((headerPrevOffset == 0) && 1363 (fileNum > 1) && 1364 syncAtFileEnd) { 1365 /* Get more info if this happens again. [#20732] */ 1366 throw EnvironmentFailureException.unexpectedState 1367 (envImpl, 1368 "Zero prevOffset fileNum=0x" + 1369 Long.toHexString(fileNum) + 1370 " lastLsn=" + DbLsn.getNoFormatString(lastLsn) + 1371 " perFileLastUsedLsn=" + perFileLastUsedLsn + 1372 " fileLen=" + newFile.length()); 1373 } 1374 FileHeader fileHeader = 1375 new FileHeader(fileNum, headerPrevOffset); 1376 writeFileHeader(newFile, fileName, fileHeader, fileNum); 1377 } 1378 } else { 1379 /* A non-empty file, check the header */ 1380 logVersion = 1381 readAndValidateFileHeader(newFile, fileName, fileNum); 1382 } 1383 fileHandle.init(newFile, logVersion); 1384 success = true; 1385 } catch (FileNotFoundException e) { 1386 /* Handle at higher levels. */ 1387 throw e; 1388 } catch (IOException e) { 1389 throw new EnvironmentFailureException 1390 (envImpl, EnvironmentFailureReason.LOG_READ, 1391 "Couldn't open file " + fileName, e); 1392 } catch (DatabaseException e) { 1393 1394 /* 1395 * Let this exception go as a checksum exception, so it sets the 1396 * run recovery state correctly. 1397 */ 1398 closeFileInErrorCase(newFile); 1399 e.addErrorMessage("Couldn't open file " + fileName); 1400 throw e; 1401 } finally { 1402 if (!success) { 1403 closeFileInErrorCase(newFile); 1404 } 1405 } 1406 } 1407 1408 /** 1409 * Close this file and eat any exceptions. Used in catch clauses. 1410 */ closeFileInErrorCase(RandomAccessFile file)1411 private void closeFileInErrorCase(RandomAccessFile file) { 1412 try { 1413 if (file != null) { 1414 file.close(); 1415 } 1416 } catch (Exception e) { 1417 } 1418 } 1419 1420 /** 1421 * Read the given JE log file and validate the header. 1422 * 1423 * @throws DatabaseException if the file header isn't valid 1424 * 1425 * @return file header log version. 1426 */ readAndValidateFileHeader(RandomAccessFile file, String fileName, long fileNum)1427 private int readAndValidateFileHeader(RandomAccessFile file, 1428 String fileName, 1429 long fileNum) 1430 throws ChecksumException, DatabaseException { 1431 1432 /* 1433 * Read the file header from this file. It's always the first log 1434 * entry. 1435 * 1436 * The special UNKNOWN_FILE_HEADER_VERSION value is passed for reading 1437 * the entry header. The actual log version is read as part of the 1438 * FileHeader entry. [#16939] 1439 */ 1440 LogManager logManager = envImpl.getLogManager(); 1441 LogEntry headerEntry = logManager.getLogEntryAllowChecksumException 1442 (DbLsn.makeLsn(fileNum, 0), file, 1443 LogEntryType.UNKNOWN_FILE_HEADER_VERSION); 1444 FileHeader header = (FileHeader) headerEntry.getMainItem(); 1445 return header.validate(envImpl, fileName, fileNum); 1446 } 1447 1448 /** 1449 * Write a proper file header to the given file. 1450 */ writeFileHeader(RandomAccessFile file, String fileName, FileHeader header, long fileNum)1451 private void writeFileHeader(RandomAccessFile file, 1452 String fileName, 1453 FileHeader header, 1454 long fileNum) 1455 throws DatabaseException { 1456 1457 /* Fail loudly if the environment is invalid. */ 1458 envImpl.checkIfInvalid(); 1459 1460 /* 1461 * Fail silent if the environment is not open. 1462 */ 1463 if (envImpl.mayNotWrite()) { 1464 return; 1465 } 1466 1467 /* Write file header into this buffer in the usual log entry format. */ 1468 LogEntry headerLogEntry = 1469 new FileHeaderEntry(LogEntryType.LOG_FILE_HEADER, header); 1470 ByteBuffer headerBuf = envImpl.getLogManager(). 1471 putIntoBuffer(headerLogEntry, 1472 0); // prevLogEntryOffset 1473 1474 /* Write the buffer into the channel. */ 1475 int bytesWritten; 1476 try { 1477 if (LOGWRITE_EXCEPTION_TESTING) { 1478 generateLogWriteException(file, headerBuf, 0, fileNum); 1479 } 1480 1481 /* 1482 * Always flush header so that file.length() will be non-zero when 1483 * this method returns and two threads won't attempt to create the 1484 * header. [#20732] 1485 */ 1486 bytesWritten = writeToFile(file, headerBuf, 0, fileNum, 1487 true /*flushRequired*/); 1488 1489 if (fileNum > savedCurrentFileNum) { 1490 1491 /* 1492 * Writing the new file header succeeded without an IOE. This 1493 * can not be undone in the event of another IOE (Out Of Disk 1494 * Space) on the next write so update the saved LSN state with 1495 * the new info. Do not update the nextAvailableLsn with a 1496 * smaller (earlier) LSN in case there's already something in a 1497 * buffer that is after the new header. [#15754] 1498 */ 1499 long lsnAfterHeader = DbLsn.makeLsn(fileNum, bytesWritten); 1500 if (DbLsn.compareTo(nextAvailableLsn, lsnAfterHeader) < 0) { 1501 nextAvailableLsn = lsnAfterHeader; 1502 } 1503 1504 lastUsedLsn = DbLsn.makeLsn(fileNum, bytesWritten); 1505 prevOffset = bytesWritten; 1506 forceNewFile = false; 1507 currentFileNum = fileNum; 1508 saveLastPosition(); 1509 } 1510 } catch (ClosedChannelException e) { 1511 1512 /* 1513 * The channel should never be closed. It may be closed because 1514 * of an interrupt received by another thread. See SR [#10463] 1515 */ 1516 throw new ThreadInterruptedException 1517 (envImpl, "Channel closed, may be due to thread interrupt", e); 1518 } catch (IOException e) { 1519 /* Possibly an out of disk exception. */ 1520 throw new LogWriteException(envImpl, e); 1521 } 1522 1523 if (bytesWritten != headerLogEntry.getSize() + 1524 LogEntryHeader.MIN_HEADER_SIZE) { 1525 throw new EnvironmentFailureException 1526 (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, 1527 "File " + fileName + 1528 " was created with an incomplete header. Only " + 1529 bytesWritten + " bytes were written."); 1530 } 1531 } 1532 1533 /** 1534 * @return the prevOffset field stored in the file header. 1535 */ getFileHeaderPrevOffset(long fileNum)1536 long getFileHeaderPrevOffset(long fileNum) 1537 throws ChecksumException, DatabaseException { 1538 1539 try { 1540 LogEntry headerEntry = 1541 envImpl.getLogManager().getLogEntryAllowChecksumException 1542 (DbLsn.makeLsn(fileNum, 0)); 1543 FileHeader header = (FileHeader) headerEntry.getMainItem(); 1544 return header.getLastEntryInPrevFileOffset(); 1545 } catch (FileNotFoundException e) { 1546 throw new EnvironmentFailureException 1547 (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, e); 1548 } 1549 } 1550 1551 /* 1552 * Support for writing new log entries 1553 */ 1554 1555 /** 1556 * @return the file offset of the last LSN that was used. For constructing 1557 * the headers of log entries. If the last LSN that was used was in a 1558 * previous file, or this is the very first LSN of the whole system, return 1559 * 0. 1560 */ getPrevEntryOffset()1561 long getPrevEntryOffset() { 1562 return prevOffset; 1563 } 1564 1565 /** 1566 * Increase the current log position by "size" bytes. Move the prevOffset 1567 * pointer along. 1568 * 1569 * @param size is an unsigned int 1570 * @return true if we flipped to the next log file. 1571 */ bumpLsn(long size)1572 boolean bumpLsn(long size) { 1573 1574 /* Save copy of initial LSN state. */ 1575 saveLastPosition(); 1576 1577 boolean flippedFiles = false; 1578 1579 if (forceNewFile || 1580 (DbLsn.getFileOffset(nextAvailableLsn) + size) > maxFileSize) { 1581 1582 forceNewFile = false; 1583 1584 /* Move to another file. */ 1585 currentFileNum++; 1586 1587 /* Remember the last used LSN of the previous file. */ 1588 if (lastUsedLsn != DbLsn.NULL_LSN) { 1589 perFileLastUsedLsn.put 1590 (Long.valueOf(DbLsn.getFileNumber(lastUsedLsn)), 1591 Long.valueOf(lastUsedLsn)); 1592 } 1593 prevOffset = 0; 1594 lastUsedLsn = 1595 DbLsn.makeLsn(currentFileNum, firstLogEntryOffset()); 1596 flippedFiles = true; 1597 } else { 1598 if (lastUsedLsn == DbLsn.NULL_LSN) { 1599 prevOffset = 0; 1600 } else { 1601 prevOffset = DbLsn.getFileOffset(lastUsedLsn); 1602 } 1603 lastUsedLsn = nextAvailableLsn; 1604 } 1605 nextAvailableLsn = 1606 DbLsn.makeLsn(DbLsn.getFileNumber(lastUsedLsn), 1607 (DbLsn.getFileOffset(lastUsedLsn) + size)); 1608 1609 return flippedFiles; 1610 } 1611 1612 /** 1613 * Write out a log buffer to the file. 1614 * @param fullBuffer buffer to write 1615 * @param flushRequired true if this write can not be queued on the 1616 * Write Queue. 1617 */ writeLogBuffer(LogBuffer fullBuffer, boolean flushRequired)1618 void writeLogBuffer(LogBuffer fullBuffer, boolean flushRequired) 1619 throws DatabaseException { 1620 1621 /* Fail loudly if the environment is invalid. */ 1622 envImpl.checkIfInvalid(); 1623 1624 /* 1625 * Fail silent if the environment is not open. 1626 */ 1627 if (envImpl.mayNotWrite()) { 1628 return; 1629 } 1630 1631 /* Use the LSN to figure out what file to write this buffer to. */ 1632 long firstLsn = fullBuffer.getFirstLsn(); 1633 1634 /* 1635 * Is there anything in this write buffer? We could have been called by 1636 * the environment shutdown, and nothing is actually in the buffer. 1637 */ 1638 if (firstLsn != DbLsn.NULL_LSN) { 1639 1640 RandomAccessFile file = 1641 endOfLog.getWritableFile(DbLsn.getFileNumber(firstLsn), true); 1642 ByteBuffer data = fullBuffer.getDataBuffer(); 1643 1644 try { 1645 1646 /* 1647 * Check that we do not overwrite unless the file only contains 1648 * a header [#11915] [#12616]. 1649 */ 1650 assert fullBuffer.getRewriteAllowed() || 1651 (DbLsn.getFileOffset(firstLsn) >= file.length() || 1652 file.length() == firstLogEntryOffset()) : 1653 "FileManager would overwrite non-empty file 0x" + 1654 Long.toHexString(DbLsn.getFileNumber(firstLsn)) + 1655 " lsnOffset=0x" + 1656 Long.toHexString(DbLsn.getFileOffset(firstLsn)) + 1657 " fileLength=0x" + 1658 Long.toHexString(file.length()); 1659 1660 if (IO_EXCEPTION_TESTING_ON_WRITE) { 1661 throw new IOException("generated for testing (write)"); 1662 } 1663 if (LOGWRITE_EXCEPTION_TESTING) { 1664 generateLogWriteException 1665 (file, data, DbLsn.getFileOffset(firstLsn), 1666 DbLsn.getFileNumber(firstLsn)); 1667 } 1668 writeToFile(file, data, DbLsn.getFileOffset(firstLsn), 1669 DbLsn.getFileNumber(firstLsn), 1670 flushRequired); 1671 } catch (ClosedChannelException e) { 1672 1673 /* 1674 * The file should never be closed. It may be closed because 1675 * of an interrupt received by another thread. See SR [#10463]. 1676 */ 1677 throw new ThreadInterruptedException 1678 (envImpl, "File closed, may be due to thread interrupt", 1679 e); 1680 } catch (IOException e) { 1681 1682 if (!continueAfterWriteException()) { 1683 throw new LogWriteException(envImpl, e); 1684 } 1685 1686 /* 1687 * Possibly an out of disk exception, but java.io will only 1688 * tell us IOException with no indication of whether it's out 1689 * of disk or something else. Better support may exist in 1690 * Java6. 1691 * 1692 * Since we can't tell what sectors were actually written to 1693 * disk, we need to change any commit records that might have 1694 * made it out to disk to abort records. If they made it to 1695 * disk on the write, then rewriting should allow them to be 1696 * rewritten. See [11271]. 1697 * 1698 * Rewriting committed transactions in replication is highly 1699 * problematic, and can lead to divergence between the replica 1700 * and master. If this path is re-enabled, we must assess its 1701 * impact in replication, since the log entries may already 1702 * be sent to other nodes. 1703 */ 1704 abortCommittedTxns(data); 1705 try { 1706 if (IO_EXCEPTION_TESTING_ON_WRITE) { 1707 throw new IOException 1708 ("generated for testing (write)"); 1709 } 1710 writeToFile(file, data, DbLsn.getFileOffset(firstLsn), 1711 DbLsn.getFileNumber(firstLsn), flushRequired); 1712 } catch (IOException e2) { 1713 fullBuffer.setRewriteAllowed(); 1714 /* Use an exception that does not invalidate the env. */ 1715 throw EnvironmentFailureException.unexpectedException(e2); 1716 } 1717 } 1718 1719 assert EnvironmentImpl.maybeForceYield(); 1720 } 1721 } 1722 1723 /** 1724 * Write a buffer to a file at a given offset. 1725 */ writeToFile(RandomAccessFile file, ByteBuffer data, long destOffset, long fileNum, boolean flushRequired)1726 private int writeToFile(RandomAccessFile file, 1727 ByteBuffer data, 1728 long destOffset, 1729 long fileNum, 1730 boolean flushRequired) 1731 throws IOException, DatabaseException { 1732 1733 int totalBytesWritten = 0; 1734 1735 bumpWriteCount("write"); 1736 1737 int pos = data.position(); 1738 int size = data.limit() - pos; 1739 1740 if (lastFileNumberTouched == fileNum && 1741 (Math.abs(destOffset - lastFileTouchedOffset) < 1742 ADJACENT_TRACK_SEEK_DELTA)) { 1743 nSequentialWrites.increment(); 1744 nSequentialWriteBytes.add(size); 1745 } else { 1746 nRandomWrites.increment(); 1747 nRandomWriteBytes.add(size); 1748 } 1749 1750 if (VERIFY_CHECKSUMS) { 1751 verifyChecksums(data, destOffset, "pre-write"); 1752 } 1753 1754 /* 1755 * Perform a RandomAccessFile write and update the buffer position. 1756 * ByteBuffer.array() is safe to use since all non-direct ByteBuffers 1757 * have a backing array. 1758 * 1759 * Synchronization on the file object is needed because two threads may 1760 * call seek() on the same file object. 1761 * 1762 * If the Write Queue is enabled, attempt to get the fsync latch. If 1763 * we can't get it, then an fsync or write is in progress and we'd 1764 * block anyway. In that case, queue the write operation. 1765 */ 1766 boolean fsyncLatchAcquired = 1767 endOfLog.fsyncFileSynchronizer.tryLock(); 1768 boolean enqueueSuccess = false; 1769 if (!fsyncLatchAcquired && 1770 useWriteQueue && 1771 !flushRequired) { 1772 enqueueSuccess = 1773 endOfLog.enqueueWrite(fileNum, data.array(), destOffset, 1774 pos + data.arrayOffset(), size); 1775 } 1776 1777 if (!enqueueSuccess) { 1778 if (!fsyncLatchAcquired) { 1779 endOfLog.fsyncFileSynchronizer.lock(); 1780 } 1781 try { 1782 if (useWriteQueue) { 1783 endOfLog.dequeuePendingWrites1(); 1784 } 1785 1786 synchronized (file) { 1787 file.seek(destOffset); 1788 file.write 1789 (data.array(), pos + data.arrayOffset(), size); 1790 if (VERIFY_CHECKSUMS) { 1791 file.seek(destOffset); 1792 file.read 1793 (data.array(), pos + data.arrayOffset(), size); 1794 verifyChecksums(data, destOffset, "post-write"); 1795 } 1796 } 1797 } finally { 1798 endOfLog.fsyncFileSynchronizer.unlock(); 1799 } 1800 } 1801 data.position(pos + size); 1802 totalBytesWritten = size; 1803 1804 lastFileNumberTouched = fileNum; 1805 lastFileTouchedOffset = destOffset + size; 1806 return totalBytesWritten; 1807 } 1808 bumpWriteCount(final String debugMsg)1809 private void bumpWriteCount(final String debugMsg) 1810 throws IOException { 1811 1812 if (DEBUG) { 1813 System.out.println("Write: " + WRITE_COUNT + " " + debugMsg); 1814 } 1815 1816 if (++WRITE_COUNT >= STOP_ON_WRITE_COUNT && 1817 WRITE_COUNT < (STOP_ON_WRITE_COUNT + N_BAD_WRITES)) { 1818 if (THROW_ON_WRITE) { 1819 throw new IOException 1820 ("IOException generated for testing: " + WRITE_COUNT + 1821 " " + debugMsg); 1822 } 1823 Runtime.getRuntime().halt(0xff); 1824 } 1825 } 1826 1827 /** 1828 * Read a buffer from a file at a given offset. We know that the desired 1829 * data exists in this file. There's no need to incur extra costs 1830 * such as checks of the file length, nor to return status as to whether 1831 * this file contains the data. 1832 */ readFromFile(RandomAccessFile file, ByteBuffer readBuffer, long offset, long fileNo)1833 void readFromFile(RandomAccessFile file, 1834 ByteBuffer readBuffer, 1835 long offset, 1836 long fileNo) 1837 throws DatabaseException { 1838 readFromFile(file, readBuffer, offset, fileNo, 1839 true /* dataKnownToBeInFile */); 1840 } 1841 1842 /** 1843 * Read a buffer from a file at a given offset. 1844 * 1845 * @return true if the read buffer is filled, false, if there is nothing 1846 * left in the file to read 1847 */ readFromFile(RandomAccessFile file, ByteBuffer readBuffer, long offset, long fileNo, boolean dataKnownToBeInFile)1848 boolean readFromFile(RandomAccessFile file, 1849 ByteBuffer readBuffer, 1850 long offset, 1851 long fileNo, 1852 boolean dataKnownToBeInFile) 1853 throws DatabaseException { 1854 1855 /* 1856 * All IOExceptions on read turn into EnvironmentFailureExceptions 1857 * [#15768]. 1858 */ 1859 try { 1860 1861 /* 1862 * Check if there's a pending write(s) in the write queue for this 1863 * fileNo/offset and if so, use it to fulfill this read request. 1864 */ 1865 if (useWriteQueue && 1866 endOfLog.checkWriteCache(readBuffer, offset, fileNo)) { 1867 return true; 1868 } 1869 1870 /* 1871 * Nothing queued, all data for this file must be in the file. 1872 * Note that there's no synchronization between the check of the 1873 * write queue above, and this check of file length. It's possible 1874 * that a newly written log entry could show up between the 1875 * statements, and enter the write queue just after we finish the 1876 * check. 1877 * 1878 * Because of this, callers of this method must abide by one of 1879 * three conditions: 1880 * 1. They guarantee that the attempt to read a chunk of new data 1881 * comes after the new data has been logged by the LogManager. 1882 * 2. The files are quiescent when the read is going on. 1883 * 3. The caller is sure the data is in this file. 1884 * 1885 * The replication feeder reader abides by (1) while all other file 1886 * readers abide by (2). Callers which are fetching specific log 1887 * entries fall under (3). 1888 */ 1889 boolean readThisFile = true; 1890 if (!dataKnownToBeInFile) { 1891 /* 1892 * Callers who are not sure whether the desired data is in this 1893 * file or the next incur the cost of a check of file.length(), 1894 * which is a system call. 1895 */ 1896 readThisFile = (offset < file.length()); 1897 } 1898 1899 if (readThisFile) { 1900 readFromFileInternal(file, readBuffer, offset, fileNo); 1901 return true; 1902 } 1903 1904 return false; 1905 } catch (ClosedChannelException e) { 1906 1907 /* 1908 * The channel should never be closed. It may be closed because 1909 * of an interrupt received by another thread. See SR [#10463] 1910 */ 1911 throw new ThreadInterruptedException 1912 (envImpl, "Channel closed, may be due to thread interrupt", e); 1913 } catch (IOException e) { 1914 throw new EnvironmentFailureException 1915 (envImpl, EnvironmentFailureReason.LOG_READ, e); 1916 } 1917 } 1918 readFromFileInternal(RandomAccessFile file, ByteBuffer readBuffer, long offset, long fileNum)1919 private void readFromFileInternal(RandomAccessFile file, 1920 ByteBuffer readBuffer, 1921 long offset, 1922 long fileNum) 1923 throws IOException { 1924 1925 /* 1926 * Perform a RandomAccessFile read and update the buffer position. 1927 * ByteBuffer.array() is safe to use since all non-direct ByteBuffers 1928 * have a backing array. Synchronization on the file object is needed 1929 * because two threads may call seek() on the same file object. 1930 */ 1931 synchronized (file) { 1932 int pos = readBuffer.position(); 1933 int size = readBuffer.limit() - pos; 1934 1935 if (lastFileNumberTouched == fileNum && 1936 (Math.abs(offset - lastFileTouchedOffset) < 1937 ADJACENT_TRACK_SEEK_DELTA)) { 1938 nSequentialReads.increment(); 1939 nSequentialReadBytes.add(size); 1940 } else { 1941 nRandomReads.increment(); 1942 nRandomReadBytes.add(size); 1943 } 1944 1945 file.seek(offset); 1946 if (IO_EXCEPTION_TESTING_ON_READ) { 1947 throw new IOException("generated for testing (read)"); 1948 } 1949 int bytesRead = file.read(readBuffer.array(), 1950 pos + readBuffer.arrayOffset(), 1951 size); 1952 if (bytesRead > 0) { 1953 readBuffer.position(pos + bytesRead); 1954 } 1955 1956 lastFileNumberTouched = fileNum; 1957 lastFileTouchedOffset = offset + bytesRead; 1958 } 1959 } 1960 verifyChecksums(ByteBuffer entryBuffer, long lsn, String comment)1961 private void verifyChecksums(ByteBuffer entryBuffer, 1962 long lsn, 1963 String comment) { 1964 int curPos = entryBuffer.position(); 1965 try { 1966 while (entryBuffer.remaining() > 0) { 1967 int recStartPos = entryBuffer.position(); 1968 /* Write buffer contains current log version entries. */ 1969 LogEntryHeader header = 1970 new LogEntryHeader(entryBuffer, LogEntryType.LOG_VERSION); 1971 verifyChecksum(entryBuffer, header, lsn, comment); 1972 entryBuffer.position(recStartPos + header.getSize() + 1973 header.getItemSize()); 1974 } 1975 } catch (ChecksumException e) { 1976 System.err.println("ChecksumException: (" + comment + ") " + e); 1977 System.err.println("start stack trace"); 1978 e.printStackTrace(System.err); 1979 System.err.println("end stack trace"); 1980 } 1981 entryBuffer.position(curPos); 1982 } 1983 verifyChecksum(ByteBuffer entryBuffer, LogEntryHeader header, long lsn, String comment)1984 private void verifyChecksum(ByteBuffer entryBuffer, 1985 LogEntryHeader header, 1986 long lsn, 1987 String comment) 1988 throws ChecksumException { 1989 1990 ChecksumValidator validator = null; 1991 /* Add header to checksum bytes */ 1992 validator = new ChecksumValidator(); 1993 int headerSizeMinusChecksum = header.getSizeMinusChecksum(); 1994 int itemStart = entryBuffer.position(); 1995 entryBuffer.position(itemStart - headerSizeMinusChecksum); 1996 validator.update(entryBuffer, headerSizeMinusChecksum); 1997 entryBuffer.position(itemStart); 1998 1999 /* 2000 * Now that we know the size, read the rest of the entry if the first 2001 * read didn't get enough. 2002 */ 2003 int itemSize = header.getItemSize(); 2004 if (entryBuffer.remaining() < itemSize) { 2005 System.err.println("Couldn't verify checksum (" + comment + ")"); 2006 return; 2007 } 2008 2009 /* 2010 * Do entry validation. Run checksum before checking the entry 2011 * type, it will be the more encompassing error. 2012 */ 2013 validator.update(entryBuffer, itemSize); 2014 validator.validate(header.getChecksum(), lsn); 2015 } 2016 2017 /* 2018 * Iterate through a buffer looking for commit records. Change all commit 2019 * records to abort records. 2020 */ abortCommittedTxns(ByteBuffer data)2021 private void abortCommittedTxns(ByteBuffer data) 2022 throws DatabaseException { 2023 2024 final byte commitType = LogEntryType.LOG_TXN_COMMIT.getTypeNum(); 2025 data.position(0); 2026 2027 while (data.remaining() > 0) { 2028 int recStartPos = data.position(); 2029 LogEntryHeader header; 2030 try { 2031 /* Write buffer contains current log version entries. */ 2032 header = new LogEntryHeader(data, LogEntryType.LOG_VERSION); 2033 } catch (ChecksumException e) { 2034 throw EnvironmentFailureException.unexpectedException(e); 2035 } 2036 if (header.getType() == commitType) { 2037 /* Change the log entry type, and recalculate the checksum. */ 2038 header.convertCommitToAbort(data); 2039 } 2040 data.position(recStartPos + header.getSize() + 2041 header.getItemSize()); 2042 } 2043 data.position(0); 2044 } 2045 2046 /** 2047 * FSync the end of the log. 2048 */ syncLogEnd()2049 void syncLogEnd() 2050 throws DatabaseException { 2051 2052 try { 2053 endOfLog.force(); 2054 } catch (IOException e) { 2055 throw new LogWriteException 2056 (envImpl, "IOException during fsync", e); 2057 } 2058 } 2059 2060 /** 2061 * Sync the end of the log, close off this log file. Should only be called 2062 * under the log write latch. 2063 */ syncLogEndAndFinishFile()2064 void syncLogEndAndFinishFile() 2065 throws DatabaseException, IOException { 2066 2067 if (syncAtFileEnd) { 2068 syncLogEnd(); 2069 } 2070 endOfLog.close(); 2071 } 2072 2073 /** 2074 * Returns whether anything is in the write queue. 2075 */ hasQueuedWrites()2076 public boolean hasQueuedWrites() { 2077 return endOfLog.hasQueuedWrites(); 2078 } 2079 2080 /** 2081 * For unit testing only. 2082 */ testWriteQueueLock()2083 public void testWriteQueueLock() { 2084 endOfLog.fsyncFileSynchronizer.lock(); 2085 } 2086 2087 /** 2088 * For unit testing only. 2089 */ testWriteQueueUnlock()2090 public void testWriteQueueUnlock() { 2091 endOfLog.fsyncFileSynchronizer.unlock(); 2092 } 2093 startFileCacheWarmer(final long recoveryStartLsn)2094 public void startFileCacheWarmer(final long recoveryStartLsn){ 2095 assert fileCacheWarmer == null; 2096 2097 final DbConfigManager cm = envImpl.getConfigManager(); 2098 2099 final int warmUpSize = cm.getInt( 2100 EnvironmentParams.LOG_FILE_WARM_UP_SIZE); 2101 2102 if (warmUpSize == 0) { 2103 return; 2104 } 2105 2106 final int bufSize = cm.getInt( 2107 EnvironmentParams.LOG_FILE_WARM_UP_BUF_SIZE); 2108 2109 fileCacheWarmer = new FileCacheWarmer( 2110 envImpl, recoveryStartLsn, lastUsedLsn, warmUpSize, bufSize); 2111 2112 fileCacheWarmer.start(); 2113 } 2114 stopFileCacheWarmer()2115 private void stopFileCacheWarmer(){ 2116 2117 /* 2118 * Use fcw local var because fileCacheWarmer can be set to null by 2119 * other threads calling clearFileCacheWarmer, namely the cache warmer 2120 * thread. 2121 */ 2122 final FileCacheWarmer fcw = fileCacheWarmer; 2123 2124 if (fcw == null) { 2125 return; 2126 } 2127 2128 fcw.shutdown(); 2129 2130 clearFileCacheWarmer(); 2131 } 2132 2133 /* Allow cache warmer thread to be GC'd. */ clearFileCacheWarmer()2134 void clearFileCacheWarmer() { 2135 fileCacheWarmer = null; 2136 } 2137 2138 /** 2139 * Close all file handles and empty the cache. 2140 */ clear()2141 public void clear() 2142 throws IOException, DatabaseException { 2143 2144 synchronized (fileCache) { 2145 fileCache.clear(); 2146 } 2147 2148 endOfLog.close(); 2149 } 2150 2151 /** 2152 * Clear the file lock. 2153 */ close()2154 public void close() 2155 throws IOException { 2156 2157 stopFileCacheWarmer(); 2158 2159 if (envLock != null) { 2160 envLock.release(); 2161 envLock = null; 2162 } 2163 2164 if (exclLock != null) { 2165 exclLock.release(); 2166 exclLock = null; 2167 } 2168 2169 if (channel != null) { 2170 channel.close(); 2171 channel = null; 2172 } 2173 2174 if (lockFile != null) { 2175 lockFile.close(); 2176 lockFile = null; 2177 } 2178 } 2179 2180 /** 2181 * Lock the environment. Return true if the lock was acquired. If 2182 * exclusive is false, then this implements a single writer, multiple 2183 * reader lock. If exclusive is true, then implement an exclusive lock. 2184 * 2185 * There is a lock file and there are two regions of the lock file: byte 0, 2186 * and byte 1. Byte 0 is the exclusive writer process area of the lock 2187 * file. If an environment is opened for write, then it attempts to take 2188 * an exclusive write lock on byte 0. Byte 1 is the shared reader process 2189 * area of the lock file. If an environment is opened for read-only, then 2190 * it attempts to take a shared lock on byte 1. This is how we implement 2191 * single writer, multi reader semantics. 2192 * 2193 * The cleaner, each time it is invoked, attempts to take an exclusive lock 2194 * on byte 1. The owning process already either has an exclusive lock on 2195 * byte 0, or a shared lock on byte 1. This will necessarily conflict with 2196 * any shared locks on byte 1, even if it's in the same process and there 2197 * are no other holders of that shared lock. So if there is only one 2198 * read-only process, it will have byte 1 for shared access, and the 2199 * cleaner can not run in it because it will attempt to get an exclusive 2200 * lock on byte 1 (which is already locked for shared access by itself). 2201 * If a write process comes along and tries to run the cleaner, it will 2202 * attempt to get an exclusive lock on byte 1. If there are no other 2203 * reader processes (with shared locks on byte 1), and no other writers 2204 * (which are running cleaners on with exclusive locks on byte 1), then the 2205 * cleaner will run. 2206 */ lockEnvironment(boolean rdOnly, boolean exclusive)2207 public boolean lockEnvironment(boolean rdOnly, boolean exclusive) { 2208 try { 2209 if (checkEnvHomePermissions(rdOnly)) { 2210 return true; 2211 } 2212 2213 if (lockFile == null) { 2214 lockFile = 2215 new RandomAccessFile 2216 (new File(dbEnvHome, LOCK_FILE), 2217 FileMode.READWRITE_MODE.getModeValue()); 2218 } 2219 2220 channel = lockFile.getChannel(); 2221 2222 try { 2223 if (exclusive) { 2224 2225 /* 2226 * To lock exclusive, must have exclusive on 2227 * shared reader area (byte 1). 2228 */ 2229 exclLock = channel.tryLock(1, 1, false); 2230 if (exclLock == null) { 2231 return false; 2232 } 2233 return true; 2234 } 2235 if (rdOnly) { 2236 envLock = channel.tryLock(1, 1, true); 2237 } else { 2238 envLock = channel.tryLock(0, 1, false); 2239 } 2240 if (envLock == null) { 2241 return false; 2242 } 2243 return true; 2244 } catch (OverlappingFileLockException e) { 2245 return false; 2246 } 2247 } catch (IOException e) { 2248 throw new EnvironmentFailureException 2249 (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e); 2250 } 2251 } 2252 releaseExclusiveLock()2253 public void releaseExclusiveLock() 2254 throws DatabaseException { 2255 2256 try { 2257 if (exclLock != null) { 2258 exclLock.release(); 2259 } 2260 } catch (IOException e) { 2261 throw new EnvironmentFailureException 2262 (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e); 2263 } 2264 } 2265 2266 /** 2267 * Ensure that if the environment home dir is on readonly media or in a 2268 * readonly directory that the environment has been opened for readonly 2269 * access. 2270 * 2271 * @return true if the environment home dir is readonly. 2272 * 2273 * @throws IllegalArgumentException via Environment ctor 2274 */ checkEnvHomePermissions(boolean rdOnly)2275 public boolean checkEnvHomePermissions(boolean rdOnly) 2276 throws DatabaseException { 2277 2278 if (nDataDirs == 0) { 2279 return checkEnvHomePermissionsSingleEnvDir(dbEnvHome, rdOnly); 2280 } else { 2281 return checkEnvHomePermissionsMultiEnvDir(rdOnly); 2282 } 2283 } 2284 checkEnvHomePermissionsSingleEnvDir(File dbEnvHome, boolean rdOnly)2285 private boolean checkEnvHomePermissionsSingleEnvDir(File dbEnvHome, 2286 boolean rdOnly) 2287 throws DatabaseException { 2288 2289 boolean envDirIsReadOnly = !dbEnvHome.canWrite(); 2290 if (envDirIsReadOnly && !rdOnly) { 2291 2292 /* 2293 * Use the absolute path in the exception message, to 2294 * make a mis-specified relative path problem more obvious. 2295 */ 2296 throw new IllegalArgumentException 2297 ("The Environment directory " + 2298 dbEnvHome.getAbsolutePath() + 2299 " is not writable, but the " + 2300 "Environment was opened for read-write access."); 2301 } 2302 2303 return envDirIsReadOnly; 2304 } 2305 checkEnvHomePermissionsMultiEnvDir(boolean rdOnly)2306 private boolean checkEnvHomePermissionsMultiEnvDir(boolean rdOnly) 2307 throws DatabaseException { 2308 2309 for (File dbEnvDir : dbEnvDataDirs) { 2310 if (!checkEnvHomePermissionsSingleEnvDir(dbEnvDir, rdOnly)) { 2311 return false; 2312 } 2313 } 2314 2315 return true; 2316 } 2317 2318 /** 2319 * Truncate a log at this position. Used by recovery to a timestamp 2320 * utilities and by recovery to set the end-of-log position, see 2321 * LastFileReader.setEndOfFile(). 2322 * 2323 * <p>This method forces a new log file to be written next, if the last 2324 * file (the file truncated to) has an old version in its header. This 2325 * ensures that when the log is opened by an old version of JE, a version 2326 * incompatibility will be detected. [#11243]</p> 2327 */ truncateSingleFile(long fileNum, long offset)2328 public void truncateSingleFile(long fileNum, long offset) 2329 throws IOException, DatabaseException { 2330 2331 try { 2332 FileHandle handle = 2333 makeFileHandle(fileNum, getAppropriateReadWriteMode()); 2334 RandomAccessFile file = handle.getFile(); 2335 2336 try { 2337 file.getChannel().truncate(offset); 2338 } finally { 2339 file.close(); 2340 } 2341 2342 if (handle.isOldHeaderVersion()) { 2343 forceNewFile = true; 2344 } 2345 } catch (ChecksumException e) { 2346 throw new EnvironmentFailureException 2347 (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, e); 2348 } 2349 } 2350 2351 /* 2352 * Truncate all log entries after a specified log entry, the position of 2353 * that entry is specified by the fileNum and offset, we do this to avoid 2354 * the log file gap. Used by replication hard recovery and the 2355 * DbTruncateLog utility, see SR [#19463]. 2356 */ truncateLog(long fileNum, long offset)2357 public void truncateLog(long fileNum, long offset) 2358 throws IOException, DatabaseException { 2359 2360 /* 2361 * Truncate the log files following by this log file in descending 2362 * order to avoid the log entry gap, see SR [#19463]. 2363 */ 2364 for (long i = getLastFileNum(); i >= fileNum; i--) { 2365 /* Do nothing if this file doesn't exist. */ 2366 if (!isFileValid(i)) { 2367 continue; 2368 } 2369 2370 /* 2371 * If this is the file that truncation starts, invoke 2372 * truncateSingleFile. If the offset is 0, which means the 2373 * FileHeader is also deleted, delete the whole file to avoid a log 2374 * file gap. 2375 */ 2376 if (i == fileNum) { 2377 truncateSingleFile(fileNum, offset); 2378 if (offset != 0) { 2379 continue; 2380 } 2381 } 2382 2383 boolean deleted = deleteFile(i); 2384 assert deleted : "File " + getFullFileName(i, JE_SUFFIX) + 2385 " not deleted during truncateLog"; 2386 } 2387 } 2388 2389 /** 2390 * Mark the specified log entries as invisible and obsolete. The entries 2391 * are written here, but are fsync'ed later. If there is any problem or 2392 * exception during the setting, the method will throw an 2393 * EnvironmentFailureException. 2394 * 2395 * These changes are made directly to the file, but recently logged log 2396 * entries may also be resident in the log buffers. The caller must take 2397 * care to call LogManager.flush() before this method, to ensure that all 2398 * entries are on disk. 2399 * 2400 * In addition, we must ensure that after this step, the affected log 2401 * entries will only be read via a FileReader, and will not be faulted in 2402 * by the LogManager. Entries may be present in the log and in the log 2403 * buffers, but only the on disk version is modified by this method. The 2404 * LogManager can read directly from the log buffers and may read the 2405 * incorrect, non-invisible version of the log entry, rather than the 2406 * invisible version from the file. This should not be an issue, because 2407 * invisible log entries should be detached from the in-memory tree before 2408 * they are made invisible. 2409 * 2410 * @param fileNum target file. 2411 * @param lsns The list of LSNs to make invisible, must be sorted in 2412 * ascending order. 2413 */ makeInvisible(long fileNum, List<Long> lsns)2414 public void makeInvisible(long fileNum, List<Long> lsns) { 2415 if (lsns.size() == 0) { 2416 return; 2417 } 2418 2419 /* Open this file. */ 2420 FileHandle handle = null; 2421 try { 2422 2423 /* 2424 * Note that we are getting a new, non-cached file handle for 2425 * specific use by this method. 2426 */ 2427 handle = makeFileHandle(fileNum, getAppropriateReadWriteMode()); 2428 } catch (ChecksumException e) { 2429 throw new EnvironmentFailureException 2430 (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, 2431 "Opening file " + fileNum + " for invisible marking ", e); 2432 } catch (FileNotFoundException e) { 2433 throw new EnvironmentFailureException 2434 (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, 2435 "Opening file " + fileNum + " for invisible marking ", e); 2436 } 2437 RandomAccessFile file = handle.getFile(); 2438 2439 /* Set the invisible bit for each entry. */ 2440 try { 2441 for (Long lsn : lsns) { 2442 if (DbLsn.getFileNumber(lsn) != fileNum) { 2443 2444 /* 2445 * This failure will not invalidate the environment right 2446 * away. But since it causes replication syncup to fail, 2447 * the environment will shutdown, which is the effect we 2448 * want. 2449 */ 2450 throw new EnvironmentFailureException 2451 (envImpl, EnvironmentFailureReason.UNEXPECTED_STATE, 2452 "LSN of " + DbLsn.getNoFormatString(lsn) + 2453 " did not match file number" + fileNum); 2454 } 2455 2456 int entryFlagsOffset = (int) 2457 (DbLsn.getFileOffset(lsn) + LogEntryHeader.FLAGS_OFFSET); 2458 file.seek(entryFlagsOffset); 2459 byte flags = file.readByte(); 2460 byte newFlags = LogEntryHeader.makeInvisible(flags); 2461 file.seek(entryFlagsOffset); 2462 file.writeByte(newFlags); 2463 } 2464 } catch (IOException e) { 2465 throw new EnvironmentFailureException 2466 (envImpl, EnvironmentFailureReason.LOG_WRITE, 2467 "Flipping invisibility in file " + fileNum, e); 2468 } finally { 2469 /* 2470 * Just close the file. Fsyncs will be done later on, in the hope 2471 * that the OS has already synced asynchronously. 2472 */ 2473 try { 2474 file.close(); 2475 } catch (IOException e) { 2476 throw new EnvironmentFailureException 2477 (envImpl, EnvironmentFailureReason.LOG_WRITE, 2478 "Closing after invisibility cloaking: file " + fileNum, e); 2479 } 2480 } 2481 } 2482 2483 /** 2484 * Fsync this set of log files. Used for replication syncup rollback. 2485 */ force(Set<Long> fileNums)2486 public void force(Set<Long> fileNums) { 2487 for (long fileNum : fileNums) { 2488 RandomAccessFile file = null; 2489 try { 2490 FileHandle handle = 2491 makeFileHandle(fileNum, getAppropriateReadWriteMode()); 2492 file = handle.getFile(); 2493 file.getChannel().force(false); 2494 nLogFSyncs.increment(); 2495 } catch (FileNotFoundException e) { 2496 throw new EnvironmentFailureException 2497 (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, 2498 "Invisible fsyncing file " + fileNum, e); 2499 } catch (ChecksumException e) { 2500 throw new EnvironmentFailureException 2501 (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, 2502 "Invisible fsyncing file " + fileNum, e); 2503 } catch (IOException e) { 2504 throw new EnvironmentFailureException 2505 (envImpl, EnvironmentFailureReason.LOG_WRITE, 2506 "Invisible fsyncing file " + fileNum, e); 2507 } finally { 2508 if (file != null) { 2509 try { 2510 file.close(); 2511 } catch (IOException e) { 2512 throw new EnvironmentFailureException 2513 (envImpl, EnvironmentFailureReason.LOG_WRITE, 2514 "Invisible fsyncing file " + fileNum, e); 2515 } 2516 } 2517 } 2518 } 2519 } 2520 2521 /** 2522 * Set the flag that causes a new file to be written before the next write. 2523 */ forceNewLogFile()2524 public void forceNewLogFile() { 2525 forceNewFile = true; 2526 } 2527 2528 /** 2529 * Return the offset of the first log entry after the file header. 2530 */ 2531 2532 /** 2533 * @return the size in bytes of the file header log entry. 2534 */ firstLogEntryOffset()2535 public static int firstLogEntryOffset() { 2536 return FileHeader.entrySize() + LogEntryHeader.MIN_HEADER_SIZE; 2537 } 2538 2539 /** 2540 * Return the next available LSN in the log. Note that this is 2541 * unsynchronized, so is only valid as an approximation of log size. 2542 */ getNextLsn()2543 public long getNextLsn() { 2544 return nextAvailableLsn; 2545 } 2546 2547 /** 2548 * Return the last allocated LSN in the log. Note that this is 2549 * unsynchronized, so if it is called outside the log write latch it is 2550 * only valid as an approximation of log size. 2551 */ getLastUsedLsn()2552 public long getLastUsedLsn() { 2553 return lastUsedLsn; 2554 } 2555 loadStats(StatsConfig config)2556 StatGroup loadStats(StatsConfig config) { 2557 nOpenFiles.set(fileCache.size()); 2558 StatGroup copyStats = stats.cloneGroup(config.getClear()); 2559 2560 return copyStats; 2561 } 2562 2563 /* 2564 * Unit test support 2565 */ 2566 2567 /* 2568 * @return ids of files in cache 2569 */ getCacheKeys()2570 Set<Long> getCacheKeys() { 2571 return fileCache.getCacheKeys(); 2572 } 2573 2574 /** 2575 * Clear a file out of the file cache regardless of mode type. 2576 */ clearFileCache(long fileNum)2577 private void clearFileCache(long fileNum) 2578 throws IOException, DatabaseException { 2579 2580 synchronized (fileCache) { 2581 fileCache.remove(fileNum); 2582 } 2583 } 2584 2585 /* 2586 * The file cache keeps N RandomAccessFile objects cached for file 2587 * access. The cache consists of two parts: a Hashtable that doesn't 2588 * require extra synchronization, for the most common access, and a linked 2589 * list of files to support cache administration. Looking up a file from 2590 * the hash table doesn't require extra latching, but adding or deleting a 2591 * file does. 2592 */ 2593 private static class FileCache { 2594 private final Map<Long, FileHandle> fileMap; // Long->file 2595 private final List<Long> fileList; // list of file numbers 2596 private final int fileCacheSize; 2597 FileCache(DbConfigManager configManager)2598 FileCache(DbConfigManager configManager) { 2599 2600 /* 2601 * A fileMap maps the file number to FileHandles (RandomAccessFile, 2602 * latch). The fileList is a list of Longs to determine which files 2603 * to eject out of the file cache if it's too small. 2604 */ 2605 fileMap = new Hashtable<Long, FileHandle>(); 2606 fileList = new LinkedList<Long>(); 2607 fileCacheSize = 2608 configManager.getInt(EnvironmentParams.LOG_FILE_CACHE_SIZE); 2609 } 2610 get(Long fileId)2611 private FileHandle get(Long fileId) { 2612 return fileMap.get(fileId); 2613 } 2614 add(Long fileId, FileHandle fileHandle)2615 private void add(Long fileId, FileHandle fileHandle) 2616 throws IOException, DatabaseException { 2617 2618 /* 2619 * Does the cache have any room or do we have to evict? Hunt down 2620 * the file list for an unused file. Note that the file cache might 2621 * actually grow past the prescribed size if there is nothing 2622 * evictable. Should we try to shrink the file cache? Presently if 2623 * it grows, it doesn't shrink. 2624 */ 2625 if (fileList.size() >= fileCacheSize) { 2626 Iterator<Long> iter = fileList.iterator(); 2627 while (iter.hasNext()) { 2628 Long evictId = iter.next(); 2629 FileHandle evictTarget = fileMap.get(evictId); 2630 2631 /* 2632 * Try to latch. If latchNoWait returns false, then another 2633 * thread owns this latch. Note that a thread that's trying 2634 * to get a new file handle should never already own the 2635 * latch on another file handle, because these latches are 2636 * meant to be short lived and only held over the i/o out 2637 * of the file. 2638 */ 2639 if (evictTarget.latchNoWait()) { 2640 try { 2641 fileMap.remove(evictId); 2642 iter.remove(); 2643 evictTarget.close(); 2644 } finally { 2645 evictTarget.release(); 2646 } 2647 break; 2648 } 2649 } 2650 } 2651 2652 /* 2653 * We've done our best to evict. Add the file the the cache now 2654 * whether or not we did evict. 2655 */ 2656 fileList.add(fileId); 2657 fileMap.put(fileId, fileHandle); 2658 } 2659 2660 /** 2661 * Take any file handles corresponding to this file name out of the 2662 * cache. A file handle could be there twice, in rd only and in r/w 2663 * mode. 2664 */ remove(long fileNum)2665 private void remove(long fileNum) 2666 throws IOException, DatabaseException { 2667 2668 Iterator<Long> iter = fileList.iterator(); 2669 while (iter.hasNext()) { 2670 Long evictId = iter.next(); 2671 if (evictId.longValue() == fileNum) { 2672 FileHandle evictTarget = fileMap.get(evictId); 2673 try { 2674 evictTarget.latch(); 2675 fileMap.remove(evictId); 2676 iter.remove(); 2677 evictTarget.close(); 2678 } finally { 2679 evictTarget.release(); 2680 } 2681 } 2682 } 2683 } 2684 clear()2685 private void clear() 2686 throws IOException, DatabaseException { 2687 2688 Iterator<FileHandle> iter = fileMap.values().iterator(); 2689 while (iter.hasNext()) { 2690 FileHandle fileHandle = iter.next(); 2691 try { 2692 fileHandle.latch(); 2693 fileHandle.close(); 2694 iter.remove(); 2695 } finally { 2696 fileHandle.release(); 2697 } 2698 } 2699 fileMap.clear(); 2700 fileList.clear(); 2701 } 2702 getCacheKeys()2703 private Set<Long> getCacheKeys() { 2704 return fileMap.keySet(); 2705 } 2706 size()2707 private int size() { 2708 return fileMap.size(); 2709 } 2710 } 2711 2712 /** 2713 * The LogEndFileDescriptor is used to write and fsync the end of the log. 2714 * Because the JE log is append only, there is only one logical R/W file 2715 * descriptor for the whole environment. This class actually implements two 2716 * RandomAccessFile instances, one for writing and one for fsyncing, so the 2717 * two types of operations don't block each other. 2718 * 2719 * The write file descriptor is considered the master. Manipulation of 2720 * this class is done under the log write latch. Here's an explanation of 2721 * why the log write latch is sufficient to safeguard all operations. 2722 * 2723 * There are two types of callers who may use this file descriptor: the 2724 * thread that is currently writing to the end of the log and any threads 2725 * that are fsyncing on behalf of the FSyncManager. 2726 * 2727 * The writing thread appends data to the file and fsyncs the file when we 2728 * flip over to a new log file. The file is only instantiated at the point 2729 * that it must do so -- which is either when the first fsync is required 2730 * by JE or when the log file is full and we flip files. Therefore, the 2731 * writing thread has two actions that change this descriptor -- we 2732 * initialize the file descriptor for the given log file at the first write 2733 * to the file, and we close the file descriptor when the log file is full. 2734 * Therefore is a period when there is no log descriptor -- when we have 2735 * not yet written a log buffer into a given log file. 2736 * 2737 * The fsyncing threads ask for the log end file descriptor asynchronously, 2738 * but will never modify it. These threads may arrive at the point when 2739 * the file descriptor is null, and therefore skip their fysnc, but that is 2740 * fine because it means a writing thread already flipped that target file 2741 * and has moved on to the next file. 2742 * 2743 * Time Activity 2744 * 10 thread 1 writes log entry A into file 0x0, issues fsync 2745 * outside of log write latch, yields the processor 2746 * 20 thread 2 writes log entry B, piggybacks off thread 1 2747 * 30 thread 3 writes log entry C, but no room left in that file, 2748 * so it flips the log, and fsyncs file 0x0, all under the log 2749 * write latch. It nulls out endOfLogRWFile, moves onto file 2750 * 0x1, but doesn't create the file yet. 2751 * 40 thread 1 finally comes along, but endOfLogRWFile is null-- 2752 * no need to fsync in that case, 0x0 got fsynced. 2753 * 2754 * If a write is attempted and an fsync is already in progress, then the 2755 * information pertaining to the data to be written (data, offset, length) 2756 * is saved away in the "queuedWrites" array. When the fsync completes, 2757 * the queuedWrites buffer is emptied. This ensures that writes continue 2758 * to execute on file systems which block all IO calls during an fsync() 2759 * call (e.g. ext3). 2760 */ 2761 class LogEndFileDescriptor { 2762 private RandomAccessFile endOfLogRWFile = null; 2763 private RandomAccessFile endOfLogSyncFile = null; 2764 private final ReentrantLock fsyncFileSynchronizer = new ReentrantLock(); 2765 2766 /* 2767 * Holds all data for writes which have been queued due to their 2768 * being blocked by an fsync when the original write was attempted. 2769 * The next thread to execute an fsync or write will execute any 2770 * queued writes in this buffer. 2771 * Latch order is fsyncFileSynchronizer, followed by the queuedWrites 2772 * mutex [ synchronized (queuedWrites) {} ]. 2773 * 2774 * Default protection for unit tests. 2775 */ 2776 private final byte[] queuedWrites = 2777 useWriteQueue ? new byte[writeQueueSize] : null; 2778 2779 /* Current position in the queuedWrites array. */ 2780 private int queuedWritesPosition = 0; 2781 2782 /* The starting offset on disk of the first byte in queuedWrites. */ 2783 private long qwStartingOffset; 2784 2785 /* The file number that the queuedWrites are destined for. */ 2786 private long qwFileNum = -1; 2787 2788 /* For unit tests. */ setQueueFileNum(final long qwFileNum)2789 void setQueueFileNum(final long qwFileNum) { 2790 this.qwFileNum = qwFileNum; 2791 } 2792 2793 /* 2794 * Check if fileNo/offset is present in queuedWrites, and if so, fill 2795 * readBuffer with those bytes. We theorize that this is needed 2796 * because HA will be reading at the very end of the log and those 2797 * writes, if enqueued, may no longer be in LogBuffers in the 2798 * LogBufferPool. This might happen in the case of lots of concurrent 2799 * non-synchronous writes (with synchronous commits) which become 2800 * enqueued in the queuedWrites cache, but cycle out of the LBP. In 2801 * general, using synchronous commits with HA is a bad idea. 2802 * 2803 * Default protection for unit tests. 2804 * @return true if more data was available. If so, the read buffer 2805 * will be filled up. 2806 */ 2807 /* private */ checkWriteCache(final ByteBuffer readBuffer, final long requestedOffset, final long fileNum)2808 boolean checkWriteCache(final ByteBuffer readBuffer, 2809 final long requestedOffset, 2810 final long fileNum) { 2811 2812 int pos = readBuffer.position(); 2813 int targetBufSize = readBuffer.limit() - pos; 2814 synchronized (queuedWrites) { 2815 if (qwFileNum != fileNum) { 2816 return false; 2817 } 2818 2819 if (queuedWritesPosition == 0) { 2820 return false; 2821 } 2822 2823 if (requestedOffset < qwStartingOffset || 2824 (qwStartingOffset + queuedWritesPosition) <= 2825 requestedOffset) { 2826 return false; 2827 } 2828 2829 /* We have the bytes available. */ 2830 int nBytesToCopy = (int) 2831 (queuedWritesPosition - 2832 (requestedOffset - qwStartingOffset)); 2833 nBytesToCopy = Math.min(nBytesToCopy, targetBufSize); 2834 readBuffer.put(queuedWrites, 2835 (int) (requestedOffset - qwStartingOffset), 2836 nBytesToCopy); 2837 nBytesReadFromWriteQueue.add(nBytesToCopy); 2838 nReadsFromWriteQueue.increment(); 2839 return true; 2840 } 2841 } 2842 2843 /* 2844 * Enqueue a blocked write call for later execution by the next thread 2845 * to do either an fsync or write call. fsyncFileSynchronizer is not 2846 * held when this is called. 2847 * 2848 * Default protection for unit tests. 2849 */ 2850 /* private */ enqueueWrite(final long fileNum, final byte[] data, final long destOffset, final int arrayOffset, final int size)2851 boolean enqueueWrite(final long fileNum, 2852 final byte[] data, 2853 final long destOffset, 2854 final int arrayOffset, 2855 final int size) 2856 throws DatabaseException { 2857 2858 assert !fsyncFileSynchronizer.isHeldByCurrentThread(); 2859 2860 for (int i = 0; i < 2; i++) { 2861 try { 2862 enqueueWrite1(fileNum, data, destOffset, 2863 arrayOffset, size); 2864 return true; 2865 } catch (RelatchRequiredException RE) { 2866 dequeuePendingWrites(); 2867 } 2868 } 2869 2870 /* Give up after two tries. */ 2871 nWriteQueueOverflowFailures.increment(); 2872 return false; 2873 } 2874 enqueueWrite1(final long fileNum, final byte[] data, final long destOffset, final int arrayOffset, final int size)2875 private void enqueueWrite1(final long fileNum, 2876 final byte[] data, 2877 final long destOffset, 2878 final int arrayOffset, 2879 final int size) 2880 throws RelatchRequiredException, DatabaseException { 2881 2882 /* 2883 * The queuedWrites queue only ever holds writes for a single file. 2884 * 2885 * This check is safe because qwFileNum can only ever change inside 2886 * enqueueWrite which can only ever be called while the Log Write 2887 * Latch is held. 2888 * 2889 * NOTE: We believe the commented out second condition is safe 2890 * to add to the code if we ever see contention with this call to 2891 * dequeuePendingWrites against an fsync. Here is the reasoning: 2892 * 2893 * queuedWritesPosition is changed in two places: (1) enqueueWrite1 2894 * where it is incremented, and (2) dequeuePendingWrites1 where it 2895 * is zeroed. Both of these places are proected by the queuedWrites 2896 * mutex. The zero'ing (2) will only make the dequeue unnecessary 2897 * so the extra commented out check below is safe since it will 2898 * only result in eliminating an unnecessary dequeuePendingWrites 2899 * call. 2900 */ 2901 if (qwFileNum < fileNum /* && queuedWritesPosition > 0 */) { 2902 dequeuePendingWrites(); 2903 qwFileNum = fileNum; 2904 } 2905 2906 synchronized (queuedWrites) { 2907 boolean overflow = 2908 (writeQueueSize - queuedWritesPosition) < size; 2909 if (overflow) { 2910 nWriteQueueOverflow.increment(); 2911 2912 /* 2913 * Since we can't write this "write call" into the 2914 * ByteBuffer without overflowing, we will try to dequeue 2915 * all current writes in the buffer. But that requires 2916 * holding the fsyncFileSynchronizer latch first which 2917 * would be latching out of order relative to the 2918 * queuedWrites mutex. 2919 */ 2920 throw RelatchRequiredException.relatchRequiredException; 2921 } 2922 2923 assert qwFileNum == fileNum; 2924 int curPos = queuedWritesPosition; 2925 if (curPos == 0) { 2926 2927 /* 2928 * This is the first entry in queue. Set qwStartingOffset. 2929 */ 2930 qwStartingOffset = destOffset; 2931 } 2932 2933 if (curPos + qwStartingOffset != destOffset) { 2934 throw new EnvironmentFailureException 2935 (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, 2936 "non-consecutive writes queued. " + 2937 "qwPos=" + queuedWritesPosition + 2938 " write destOffset=" + destOffset); 2939 } 2940 2941 System.arraycopy(data, arrayOffset, 2942 queuedWrites, queuedWritesPosition, 2943 size); 2944 queuedWritesPosition += size; 2945 } 2946 } 2947 2948 /** 2949 * Returns whether anything is in the write queue. 2950 */ 2951 boolean hasQueuedWrites() { 2952 return queuedWritesPosition > 0; 2953 } 2954 2955 /* 2956 * Execute pending writes. Assumes fsyncFileSynchronizer is not held. 2957 */ dequeuePendingWrites()2958 private void dequeuePendingWrites() 2959 throws DatabaseException { 2960 2961 assert !fsyncFileSynchronizer.isHeldByCurrentThread(); 2962 2963 fsyncFileSynchronizer.lock(); 2964 try { 2965 dequeuePendingWrites1(); 2966 } finally { 2967 fsyncFileSynchronizer.unlock(); 2968 } 2969 } 2970 2971 /* 2972 * Execute pending writes. Assumes fsyncFileSynchronizer is held. 2973 */ dequeuePendingWrites1()2974 private void dequeuePendingWrites1() 2975 throws DatabaseException { 2976 2977 assert fsyncFileSynchronizer.isHeldByCurrentThread(); 2978 2979 try { 2980 synchronized (queuedWrites) { 2981 /* Nothing to see here. Move along. */ 2982 if (queuedWritesPosition == 0) { 2983 return; 2984 } 2985 2986 RandomAccessFile file = getWritableFile(qwFileNum, false); 2987 synchronized (file) { 2988 file.seek(qwStartingOffset); 2989 file.write(queuedWrites, 0, queuedWritesPosition); 2990 nBytesWrittenFromWriteQueue.add(queuedWritesPosition); 2991 nWritesFromWriteQueue.increment(); 2992 if (VERIFY_CHECKSUMS) { 2993 file.seek(qwStartingOffset); 2994 file.read(queuedWrites, 0, queuedWritesPosition); 2995 ByteBuffer bb = 2996 ByteBuffer.allocate(queuedWritesPosition); 2997 bb.put(queuedWrites, 0, queuedWritesPosition); 2998 bb.position(0); 2999 verifyChecksums 3000 (bb, qwStartingOffset, "post-write"); 3001 } 3002 } 3003 3004 /* We flushed the queue. Reset the buffer. */ 3005 queuedWritesPosition = 0; 3006 } 3007 } catch (IOException e) { 3008 throw new LogWriteException 3009 (envImpl, "IOException during fsync", e); 3010 } 3011 } 3012 3013 /** 3014 * getWritableFile must be called under the log write latch. 3015 * 3016 * Typically, endOfLogRWFile is not null. Hence the 3017 * fsyncFileSynchronizer does not need to be locked (which would 3018 * block the write queue from operating. 3019 */ getWritableFile(final long fileNumber, final boolean doLock)3020 private RandomAccessFile getWritableFile(final long fileNumber, 3021 final boolean doLock) { 3022 try { 3023 if (endOfLogRWFile == null) { 3024 3025 /* 3026 * We need to make a file descriptor for the end of the 3027 * log. This is guaranteed to be called under the log 3028 * write latch. 3029 * 3030 * Protect both the RWFile and SyncFile under this lock, 3031 * to avoid a race for creating the file and writing the 3032 * header. [#20732] 3033 */ 3034 if (doLock) { 3035 fsyncFileSynchronizer.lock(); 3036 } 3037 try { 3038 endOfLogRWFile = 3039 makeFileHandle(fileNumber, 3040 getAppropriateReadWriteMode()). 3041 getFile(); 3042 endOfLogSyncFile = 3043 makeFileHandle(fileNumber, 3044 getAppropriateReadWriteMode()). 3045 getFile(); 3046 } finally { 3047 if (doLock) { 3048 fsyncFileSynchronizer.unlock(); 3049 } 3050 } 3051 } 3052 3053 return endOfLogRWFile; 3054 } catch (Exception e) { 3055 3056 /* 3057 * If we can't get a write channel, we need to invalidate the 3058 * environment. 3059 */ 3060 throw new EnvironmentFailureException 3061 (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e); 3062 } 3063 } 3064 3065 /** 3066 * FSync the log file that makes up the end of the log. 3067 */ force()3068 private void force() 3069 throws DatabaseException, IOException { 3070 3071 /* 3072 * Get a local copy of the end of the log file descriptor, it could 3073 * change. No need to latch, no harm done if we get an old file 3074 * descriptor, because we forcibly fsync under the log write latch 3075 * when we switch files. 3076 * 3077 * If there is no current end file descriptor, we know that the log 3078 * file has flipped to a new file since the fsync was issued. 3079 */ 3080 fsyncFileSynchronizer.lock(); 3081 try { 3082 3083 /* Flush any queued writes. */ 3084 if (useWriteQueue) { 3085 dequeuePendingWrites1(); 3086 } 3087 3088 RandomAccessFile file = endOfLogSyncFile; 3089 if (file != null) { 3090 bumpWriteCount("fsync"); 3091 FileChannel ch = file.getChannel(); 3092 try { 3093 long start = System.currentTimeMillis(); 3094 ch.force(false); 3095 nLogFSyncs.increment(); 3096 final long fsyncMs = System.currentTimeMillis() - start; 3097 nFSyncTime.add(fsyncMs); 3098 } catch (ClosedChannelException e) { 3099 3100 /* 3101 * The channel should never be closed. It may be closed 3102 * because of an interrupt received by another thread. 3103 * See SR [#10463]. 3104 */ 3105 throw new ThreadInterruptedException 3106 (envImpl, 3107 "Channel closed, may be due to thread interrupt", 3108 e); 3109 } 3110 3111 assert EnvironmentImpl.maybeForceYield(); 3112 } 3113 3114 /* Flush any writes which were queued while fsync'ing. */ 3115 if (useWriteQueue) { 3116 dequeuePendingWrites1(); 3117 } 3118 } finally { 3119 fsyncFileSynchronizer.unlock(); 3120 } 3121 } 3122 3123 /** 3124 * Close the end of the log file descriptor. Use atomic assignment to 3125 * ensure that we won't force and close on the same descriptor. 3126 */ close()3127 void close() 3128 throws IOException { 3129 3130 /* 3131 * Protect both the RWFile and SyncFile under this lock out of 3132 * paranoia, although we don't expect two threads to call close 3133 * concurrently. [#20732] 3134 */ 3135 fsyncFileSynchronizer.lock(); 3136 try { 3137 IOException firstException = null; 3138 if (endOfLogRWFile != null) { 3139 RandomAccessFile file = endOfLogRWFile; 3140 3141 /* 3142 * Null out so that other threads know endOfLogRWFile is no 3143 * longer available. 3144 */ 3145 endOfLogRWFile = null; 3146 try { 3147 file.close(); 3148 } catch (IOException e) { 3149 /* Save this exception, so we can try second close. */ 3150 firstException = e; 3151 } 3152 } 3153 if (endOfLogSyncFile != null) { 3154 RandomAccessFile file = endOfLogSyncFile; 3155 3156 /* 3157 * Null out so that other threads know endOfLogSyncFile is 3158 * no longer available. 3159 */ 3160 endOfLogSyncFile = null; 3161 file.close(); 3162 } 3163 3164 if (firstException != null) { 3165 throw firstException; 3166 } 3167 } finally { 3168 fsyncFileSynchronizer.unlock(); 3169 } 3170 } 3171 } 3172 3173 /* 3174 * Generate IOExceptions for testing. 3175 */ 3176 3177 /* Testing switch. public so others can read the value. */ 3178 public static final boolean LOGWRITE_EXCEPTION_TESTING; 3179 private static String RRET_PROPERTY_NAME = "je.logwrite.exception.testing"; 3180 3181 static { 3182 LOGWRITE_EXCEPTION_TESTING = 3183 (System.getProperty(RRET_PROPERTY_NAME) != null); 3184 } 3185 3186 /* Max write counter value. */ 3187 private static final int LOGWRITE_EXCEPTION_MAX = 100; 3188 /* Current write counter value. */ 3189 private int logWriteExceptionCounter = 0; 3190 /* Whether an exception has been thrown. */ 3191 private boolean logWriteExceptionThrown = false; 3192 /* Random number generator. */ 3193 private Random logWriteExceptionRandom = null; 3194 generateLogWriteException(RandomAccessFile file, ByteBuffer data, long destOffset, long fileNum)3195 private void generateLogWriteException(RandomAccessFile file, 3196 ByteBuffer data, 3197 long destOffset, 3198 long fileNum) 3199 throws DatabaseException, IOException { 3200 3201 if (logWriteExceptionThrown) { 3202 (new Exception("Write after LogWriteException")). 3203 printStackTrace(); 3204 } 3205 logWriteExceptionCounter += 1; 3206 if (logWriteExceptionCounter >= LOGWRITE_EXCEPTION_MAX) { 3207 logWriteExceptionCounter = 0; 3208 } 3209 if (logWriteExceptionRandom == null) { 3210 logWriteExceptionRandom = new Random(System.currentTimeMillis()); 3211 } 3212 if (logWriteExceptionCounter == 3213 logWriteExceptionRandom.nextInt(LOGWRITE_EXCEPTION_MAX)) { 3214 int len = logWriteExceptionRandom.nextInt(data.remaining()); 3215 if (len > 0) { 3216 byte[] a = new byte[len]; 3217 data.get(a, 0, len); 3218 ByteBuffer buf = ByteBuffer.wrap(a); 3219 writeToFile(file, buf, destOffset, fileNum, 3220 false /*flushRequired*/); 3221 } 3222 logWriteExceptionThrown = true; 3223 throw new IOException("Randomly generated for testing"); 3224 } 3225 } 3226 3227 /** 3228 * The factory interface for creating RandomAccessFiles. For production 3229 * use, the default factory is always used and a DefaultRandomAccessFile is 3230 * always created. For testing, the factory can be overridden to return a 3231 * subclass of DefaultRandomAccessFile that overrides methods and injects 3232 * faults, for example. 3233 */ 3234 public interface FileFactory { 3235 3236 /** 3237 * @param envHome can be used to distinguish environments in a test 3238 * program that opens multiple environments. Not for production use. 3239 * 3240 * @param fullName the full file name to be passed to the 3241 * RandomAccessFile constructor. 3242 * 3243 * @param mode the file mode to be passed to the RandomAccessFile 3244 * constructor. 3245 */ createFile(File envHome, String fullName, String mode)3246 RandomAccessFile createFile(File envHome, String fullName, String mode) 3247 throws FileNotFoundException; 3248 } 3249 3250 /** 3251 * The RandomAccessFile for production use. Tests that override the 3252 * default FileFactory should return a RandomAccessFile that subclasses 3253 * this class to inherit workarounds such as the overridden length method. 3254 */ 3255 public static class DefaultRandomAccessFile extends RandomAccessFile { 3256 DefaultRandomAccessFile(String fullName, String mode)3257 public DefaultRandomAccessFile(String fullName, String mode) 3258 throws FileNotFoundException { 3259 3260 super(fullName, mode); 3261 } 3262 3263 /** 3264 * RandomAccessFile.length() is not thread safe and side-effects the 3265 * file pointer if interrupted in the middle. It is synchronized here 3266 * to work around that problem. 3267 */ 3268 @Override length()3269 public synchronized long length() 3270 throws IOException { 3271 3272 return super.length(); 3273 } 3274 } 3275 3276 /** 3277 * The factory instance used to create RandomAccessFiles. This field is 3278 * intentionally public and non-static so it may be set by tests. See 3279 * FileFactory. 3280 */ 3281 public static FileFactory fileFactory = new FileFactory() { 3282 3283 public RandomAccessFile createFile(File envHome, 3284 String fullName, 3285 String mode) 3286 throws FileNotFoundException { 3287 3288 return new DefaultRandomAccessFile(fullName, mode); 3289 } 3290 }; 3291 } 3292