/*- * See the file LICENSE for redistribution information. * * Copyright (c) 2002, 2014 Oracle and/or its affiliates. All rights reserved. * */ package com.sleepycat.je.log; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_BYTES_READ_FROM_WRITEQUEUE; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_BYTES_WRITTEN_FROM_WRITEQUEUE; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_FILE_OPENS; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_LOG_FSYNCS; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_OPEN_FILES; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_READS; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_READ_BYTES; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_WRITES; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_WRITE_BYTES; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_READS_FROM_WRITEQUEUE; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_READS; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_READ_BYTES; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_WRITES; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_WRITE_BYTES; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITEQUEUE_OVERFLOW; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITEQUEUE_OVERFLOW_FAILURES; import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITES_FROM_WRITEQUEUE; import java.io.File; import java.io.FileNotFoundException; import java.io.FilenameFilter; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.ClosedChannelException; import java.nio.channels.FileChannel; import java.nio.channels.FileLock; import java.nio.channels.OverlappingFileLockException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Hashtable; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import java.util.concurrent.locks.ReentrantLock; import com.sleepycat.je.DatabaseException; import com.sleepycat.je.EnvironmentFailureException; import com.sleepycat.je.EnvironmentLockedException; import com.sleepycat.je.LogWriteException; import com.sleepycat.je.StatsConfig; import com.sleepycat.je.ThreadInterruptedException; import com.sleepycat.je.config.EnvironmentParams; import com.sleepycat.je.dbi.DbConfigManager; import com.sleepycat.je.dbi.EnvironmentFailureReason; import com.sleepycat.je.dbi.EnvironmentImpl; import com.sleepycat.je.log.entry.FileHeaderEntry; import com.sleepycat.je.log.entry.LogEntry; import com.sleepycat.je.utilint.DbLsn; import com.sleepycat.je.utilint.HexFormatter; import com.sleepycat.je.utilint.IntStat; import com.sleepycat.je.utilint.LongStat; import com.sleepycat.je.utilint.RelatchRequiredException; import com.sleepycat.je.utilint.StatGroup; /** * The FileManager presents the abstraction of one contiguous file. It doles * out LSNs. */ public class FileManager { public enum FileMode { READ_MODE("r", false), READWRITE_MODE("rw", true), READWRITE_ODSYNC_MODE("rwd", true), READWRITE_OSYNC_MODE("rws", true); private String fileModeValue; private boolean isWritable; private FileMode(String fileModeValue, boolean isWritable) { this.fileModeValue = fileModeValue; this.isWritable = isWritable; } public String getModeValue() { return fileModeValue; } public boolean isWritable() { return isWritable; } } static boolean IO_EXCEPTION_TESTING_ON_WRITE = false; static boolean IO_EXCEPTION_TESTING_ON_READ = false; static boolean THROW_RRE_FOR_UNIT_TESTS = false; private static final String DEBUG_NAME = FileManager.class.getName(); private static final boolean DEBUG = false; /** * Returns whether we are in a test mode where we attempt to continue * after a write IOException. This method will never return true in * production use. */ public static boolean continueAfterWriteException() { return IO_EXCEPTION_TESTING_ON_WRITE && !THROW_RRE_FOR_UNIT_TESTS; } /* * The number of writes that have been performed. * * public so that unit tests can diddle them. */ public static long WRITE_COUNT = 0; /* * The write count value where we should stop or throw. */ public static long STOP_ON_WRITE_COUNT = Long.MAX_VALUE; /* * If we're throwing, then throw on write #'s WRITE_COUNT through * WRITE_COUNT + N_BAD_WRITES - 1 (inclusive). */ public static long N_BAD_WRITES = Long.MAX_VALUE; /* * If true, then throw an IOException on write #'s WRITE_COUNT through * WRITE_COUNT + N_BAD_WRITES - 1 (inclusive). */ public static boolean THROW_ON_WRITE = false; public static final String JE_SUFFIX = ".jdb"; // regular log files public static final String DEL_SUFFIX = ".del"; // cleaned files public static final String BAD_SUFFIX = ".bad"; // corrupt files private static final String LOCK_FILE = "je.lck";// lock file static final String[] DEL_SUFFIXES = { DEL_SUFFIX }; static final String[] JE_SUFFIXES = { JE_SUFFIX }; private static final String[] JE_AND_DEL_SUFFIXES = { JE_SUFFIX, DEL_SUFFIX }; /* * The suffix used to denote a file that is in the process of being * transferred during a network backup. The file may not have been * completely transferred, or its digest verified. */ public static final String TMP_SUFFIX = ".tmp"; /* * The suffix used to rename files out of the way, if they are being * retained during a backup. Note that the suffix is used in conjunction * with a backup number as described in NetworkBackup */ public static final String BUP_SUFFIX = ".bup"; /* May be set to false to speed unit tests. */ private boolean syncAtFileEnd = true; private final EnvironmentImpl envImpl; private final long maxFileSize; private final File dbEnvHome; private final File[] dbEnvDataDirs; /* True if .del files should be included in the list of log files. */ private boolean includeDeletedFiles = false; /* File cache */ private final FileCache fileCache; private FileCacheWarmer fileCacheWarmer; /* The channel and lock for the je.lck file. */ private RandomAccessFile lockFile; private FileChannel channel; private FileLock envLock; private FileLock exclLock; /* True if all files should be opened readonly. */ private final boolean readOnly; /* Handles onto log position */ private long currentFileNum; // number of the current file private long nextAvailableLsn; // nextLSN is the next one available private long lastUsedLsn; // last LSN used in the current log file private long prevOffset; // Offset to use for the previous pointer private boolean forceNewFile; // Force new file on next write /* * Saved versions of above. Save this in case a write causes an * IOException, we can back the log up to the last known good LSN. */ private long savedCurrentFileNum; private long savedNextAvailableLsn; // nextLSN is the next one available private long savedLastUsedLsn; // last LSN used in the current log file private long savedPrevOffset; // Offset to use for the previous pointer private boolean savedForceNewFile; /* endOfLog is used for writes and fsyncs to the end of the log. */ private final LogEndFileDescriptor endOfLog; /* * When we bump the LSNs over to a new file, we must remember the last LSN * of the previous file so we can set the prevOffset field of the file * header appropriately. We have to save it in a map because there's a time * lag between when we know what the last LSN is and when we actually do * the file write, because LSN bumping is done before we get a write * buffer. This map is keyed by file num->last LSN. */ private final Map perFileLastUsedLsn; /* * True if we should use the Write Queue. This queue is enabled by default * and contains any write() operations which were attempted but would have * blocked because an fsync() or another write() was in progress at the * time. The operations on the Write Queue are later executed by the next * operation that is able to grab the fsync latch. File systems like ext3 * need this queue in order to achieve reasonable throughput since it * acquires an exclusive mutex on the inode during any IO operation * (seek/read/write/fsync). OS's like Windows and Solaris do not since * they are able to handle concurrent IO operations on a single file. */ private final boolean useWriteQueue; /* The starting size of the Write Queue. */ private final int writeQueueSize; /* * Use O_DSYNC to open JE log files. */ private final boolean useODSYNC; /* public for unit tests. */ public boolean VERIFY_CHECKSUMS = false; /* * Non-0 means to use envHome/data001 through envHome/data00N for the * environment directories, where N is nDataDirs. Distribute *.jdb files * through dataNNN directories round-robin. */ private final int nDataDirs; /* * Last file to which any IO was done. */ long lastFileNumberTouched = -1; /* * Current file offset of lastFile. */ long lastFileTouchedOffset = 0; /* * For IO stats, this is a measure of what is "close enough" to constitute * a sequential IO vs a random IO. 1MB for now. Generally a seek within a * few tracks of the current disk track is "fast" and only requires a * single rotational latency. */ private static final long ADJACENT_TRACK_SEEK_DELTA = 1 << 20; /* * Stats */ final StatGroup stats; final LongStat nRandomReads; final LongStat nRandomWrites; final LongStat nSequentialReads; final LongStat nSequentialWrites; final LongStat nRandomReadBytes; final LongStat nRandomWriteBytes; final LongStat nSequentialReadBytes; final LongStat nSequentialWriteBytes; final IntStat nFileOpens; final IntStat nOpenFiles; final LongStat nBytesReadFromWriteQueue; final LongStat nBytesWrittenFromWriteQueue; final LongStat nReadsFromWriteQueue; final LongStat nWritesFromWriteQueue; final LongStat nWriteQueueOverflow; final LongStat nWriteQueueOverflowFailures; /* all fsyncs, includes those issued for group commit */ final LongStat nLogFSyncs; final LongStat nFSyncTime; /** * Set up the file cache and initialize the file manager to point to the * beginning of the log. * * @param dbEnvHome environment home directory * * @throws IllegalArgumentException via Environment ctor * * @throws EnvironmentLockedException via Environment ctor */ public FileManager(EnvironmentImpl envImpl, File dbEnvHome, boolean readOnly) throws EnvironmentLockedException { this.envImpl = envImpl; this.dbEnvHome = dbEnvHome; this.readOnly = readOnly; boolean success = false; stats = new StatGroup(LogStatDefinition.FILEMGR_GROUP_NAME, LogStatDefinition.FILEMGR_GROUP_DESC); nRandomReads = new LongStat(stats, FILEMGR_RANDOM_READS); nRandomWrites = new LongStat(stats, FILEMGR_RANDOM_WRITES); nSequentialReads = new LongStat(stats, FILEMGR_SEQUENTIAL_READS); nSequentialWrites = new LongStat(stats, FILEMGR_SEQUENTIAL_WRITES); nRandomReadBytes = new LongStat(stats, FILEMGR_RANDOM_READ_BYTES); nRandomWriteBytes = new LongStat(stats, FILEMGR_RANDOM_WRITE_BYTES); nSequentialReadBytes = new LongStat(stats, FILEMGR_SEQUENTIAL_READ_BYTES); nSequentialWriteBytes = new LongStat(stats, FILEMGR_SEQUENTIAL_WRITE_BYTES); nFileOpens = new IntStat(stats, FILEMGR_FILE_OPENS); nOpenFiles = new IntStat(stats, FILEMGR_OPEN_FILES); nBytesReadFromWriteQueue = new LongStat(stats, FILEMGR_BYTES_READ_FROM_WRITEQUEUE); nBytesWrittenFromWriteQueue = new LongStat(stats, FILEMGR_BYTES_WRITTEN_FROM_WRITEQUEUE); nReadsFromWriteQueue = new LongStat(stats, FILEMGR_READS_FROM_WRITEQUEUE); nWritesFromWriteQueue = new LongStat(stats, FILEMGR_WRITES_FROM_WRITEQUEUE); nWriteQueueOverflow = new LongStat(stats, FILEMGR_WRITEQUEUE_OVERFLOW); nWriteQueueOverflowFailures = new LongStat(stats, FILEMGR_WRITEQUEUE_OVERFLOW_FAILURES); nLogFSyncs = new LongStat(stats, FILEMGR_LOG_FSYNCS); nFSyncTime = new LongStat(stats, LogStatDefinition.GRPCMGR_FSYNC_TIME); try { /* Read configurations. */ DbConfigManager configManager = envImpl.getConfigManager(); maxFileSize = configManager.getLong(EnvironmentParams.LOG_FILE_MAX); useWriteQueue = configManager.getBoolean (EnvironmentParams.LOG_USE_WRITE_QUEUE); writeQueueSize = configManager.getInt (EnvironmentParams.LOG_WRITE_QUEUE_SIZE); useODSYNC = configManager.getBoolean (EnvironmentParams.LOG_USE_ODSYNC); VERIFY_CHECKSUMS = configManager.getBoolean (EnvironmentParams.LOG_VERIFY_CHECKSUMS); nDataDirs = configManager.getInt(EnvironmentParams.LOG_N_DATA_DIRECTORIES); if (nDataDirs != 0) { dbEnvDataDirs = gatherDataDirs(); } else { checkNoDataDirs(); dbEnvDataDirs = null; } if (!envImpl.isMemOnly()) { if (!dbEnvHome.exists()) { throw new IllegalArgumentException ("Environment home " + dbEnvHome + " doesn't exist"); } if (!lockEnvironment(readOnly, false)) { throw new EnvironmentLockedException (envImpl, "The environment cannot be locked for " + (readOnly ? "shared" : "single writer") + " access."); } } /* Cache of files. */ fileCache = new FileCache(configManager); /* Start out as if no log existed. */ currentFileNum = 0L; nextAvailableLsn = DbLsn.makeLsn(currentFileNum, firstLogEntryOffset()); lastUsedLsn = DbLsn.NULL_LSN; perFileLastUsedLsn = Collections.synchronizedMap(new HashMap()); prevOffset = 0L; endOfLog = new LogEndFileDescriptor(); forceNewFile = false; saveLastPosition(); final String stopOnWriteCountName = "je.debug.stopOnWriteCount"; final String stopOnWriteCountProp = System.getProperty(stopOnWriteCountName); if (stopOnWriteCountProp != null) { try { STOP_ON_WRITE_COUNT = Long.parseLong(stopOnWriteCountProp); } catch (NumberFormatException e) { throw new IllegalArgumentException ("Could not parse: " + stopOnWriteCountName, e); } } final String stopOnWriteActionName = "je.debug.stopOnWriteAction"; final String stopOnWriteActionProp = System.getProperty(stopOnWriteActionName); if (stopOnWriteActionProp != null) { if (stopOnWriteActionProp.compareToIgnoreCase("throw") == 0) { THROW_ON_WRITE = true; } else if (stopOnWriteActionProp. compareToIgnoreCase("stop") == 0) { THROW_ON_WRITE = false; } else { throw new IllegalArgumentException ("Unknown value for: " + stopOnWriteActionName + stopOnWriteActionProp); } } success = true; } finally { if (!success) { try { close(); } catch (IOException e) { /* * Klockwork - ok * Eat it, we want to throw the original exception. */ } } } } /** * Set the file manager's "end of log". * * @param nextAvailableLsn LSN to be used for the next log entry * @param lastUsedLsn last LSN to have a valid entry, may be null * @param prevOffset value to use for the prevOffset of the next entry. * If the beginning of the file, this is 0. */ public void setLastPosition(long nextAvailableLsn, long lastUsedLsn, long prevOffset) { this.lastUsedLsn = lastUsedLsn; perFileLastUsedLsn.put(Long.valueOf(DbLsn.getFileNumber(lastUsedLsn)), Long.valueOf(lastUsedLsn)); this.nextAvailableLsn = nextAvailableLsn; currentFileNum = DbLsn.getFileNumber(this.nextAvailableLsn); this.prevOffset = prevOffset; saveLastPosition(); } /* * Cause the current LSN state to be saved in case we fail after we have * bumped the LSN pointer but before we've successfully marshalled into the * log buffer. */ void saveLastPosition() { savedNextAvailableLsn = nextAvailableLsn; savedLastUsedLsn = lastUsedLsn; savedPrevOffset = prevOffset; savedForceNewFile = forceNewFile; savedCurrentFileNum = currentFileNum; } void restoreLastPosition() { nextAvailableLsn = savedNextAvailableLsn; lastUsedLsn = savedLastUsedLsn; prevOffset = savedPrevOffset; forceNewFile = savedForceNewFile; currentFileNum = savedCurrentFileNum; } /** * May be used to disable sync at file end to speed unit tests. * Must only be used for unit testing, since log corruption may result. */ public void setSyncAtFileEnd(boolean sync) { syncAtFileEnd = sync; } /* * File management */ /** * public for cleaner. * * @return the number of the first file in this environment. */ public Long getFirstFileNum() { return getFileNum(true); } public boolean getReadOnly() { return readOnly; } /** * @return the number of the last file in this environment. */ public Long getLastFileNum() { return getFileNum(false); } /** * Returns the highest (current) file number. Because a long value cannot * be read atomically without synchronization, this method should be called * while holding the log write latch. */ public long getCurrentFileNum() { return currentFileNum; } /** * For unit tests. */ boolean getUseWriteQueue() { return useWriteQueue; } /** * For assertions that check whether a file is valid or has been deleted * via log cleaning. */ public boolean isFileValid(long fileNum) { /* * If the file is the current file, it may be buffered and not yet * created. If the env is memory-only, we will never create or delete * log files. */ if (fileNum == currentFileNum || envImpl.isMemOnly()) { return true; } /* Check for file existence. */ String fileName = getFullFileName(fileNum, FileManager.JE_SUFFIX); File file = new File(fileName); return file.exists(); } public void setIncludeDeletedFiles(boolean includeDeletedFiles) { this.includeDeletedFiles = includeDeletedFiles; } /** * Get all JE file numbers. * @return an array of all JE file numbers. */ public Long[] getAllFileNumbers() { /* Get all the names in sorted order. */ String[] names = listFileNames(JE_SUFFIXES); Long[] nums = new Long[names.length]; for (int i = 0; i < nums.length; i += 1) { String name = names[i]; long num = nums[i] = getNumFromName(name); if (nDataDirs != 0) { int dbEnvDataDirsIdx = getDataDirIndexFromName(name) - 1; if (dbEnvDataDirsIdx != (num % nDataDirs)) { throw EnvironmentFailureException.unexpectedState ("Found file " + name + " but it should have been in " + "data directory " + (dbEnvDataDirsIdx + 1) + ". Perhaps it was moved or restored incorrectly?"); } } } return nums; } /** * Get the next file number before/after currentFileNum. * @param currentFileNum1 the file we're at right now. Note that * it may not exist, if it's been cleaned and renamed. * @param forward if true, we want the next larger file, if false * we want the previous file * @return null if there is no following file, or if filenum doesn't exist */ public Long getFollowingFileNum(long currentFileNum1, boolean forward) { /* Get all the names in sorted order. */ String[] names = listFileNames(JE_SUFFIXES); /* Search for the current file. */ String searchName = getFileName(currentFileNum1, JE_SUFFIX); int foundIdx = Arrays.binarySearch(names, searchName, stringComparator); boolean foundTarget = false; if (foundIdx >= 0) { if (forward) { foundIdx++; } else { foundIdx--; } } else { /* * currentFileNum not found (might have been cleaned). FoundIdx * will be (-insertionPoint - 1). */ foundIdx = Math.abs(foundIdx + 1); if (!forward) { foundIdx--; } } /* The current fileNum is found, return the next or prev file. */ if (forward && (foundIdx < names.length)) { foundTarget = true; } else if (!forward && (foundIdx > -1)) { foundTarget = true; } if (foundTarget) { return getNumFromName(names[foundIdx]); } return null; } /** * @return true if there are any files at all. */ public boolean filesExist() { String[] names = listFileNames(JE_SUFFIXES); return (names.length != 0); } /** * Get the first or last file number in the set of JE files. * * @param first if true, get the first file, else get the last file * @return the file number or null if no files exist */ private Long getFileNum(boolean first) { String[] names = listFileNames(JE_SUFFIXES); if (names.length == 0) { return null; } int index = 0; if (!first) { index = names.length - 1; } return getNumFromName(names[index]); } /** * Get the data dir index from a file name. * * @return index into dbEnvDataDirs of this fileName's data directory. * -1 if multiple data directories are not being used. */ private int getDataDirIndexFromName(String fileName) { if (nDataDirs == 0) { return -1; } int dataDirEnd = fileName.lastIndexOf(File.separator); String dataDir = fileName.substring(0, dataDirEnd); return Integer.valueOf (Integer.parseInt(dataDir.substring("data".length()))); } /** * Get the file number from a file name. * * @param fileName the file name * @return the file number */ public Long getNumFromName(String fileName) { String name = fileName; if (nDataDirs != 0) { name = name.substring(name.lastIndexOf(File.separator) + 1); } String fileNumber = name.substring(0, name.indexOf(".")); return Long.valueOf(Long.parseLong(fileNumber, 16)); } /** * Find JE files. Return names sorted in ascending fashion. * @param suffixes which type of file we're looking for * @return array of file names * * Used by unit tests so package protection. */ String[] listFileNames(String[] suffixes) { JEFileFilter fileFilter = new JEFileFilter(suffixes); return listFileNamesInternal(fileFilter); } /** * Find .jdb files which are >= the minimimum file number and * <= the maximum file number. * Return names sorted in ascending fashion. * * @return array of file names */ public String[] listFileNames(long minFileNumber, long maxFileNumber) { JEFileFilter fileFilter = new JEFileFilter(JE_SUFFIXES, minFileNumber, maxFileNumber); return listFileNamesInternal(fileFilter); } private static Comparator fileComparator = new Comparator() { private String getFileNum(File file) { String fname = file.toString(); return fname.substring(fname.indexOf(File.separator) + 1); } public int compare(File o1, File o2) { String fnum1 = getFileNum(o1); String fnum2 = getFileNum(o2); return o1.compareTo(o2); } }; private static Comparator stringComparator = new Comparator() { private String getFileNum(String fname) { return fname.substring(fname.indexOf(File.separator) + 1); } public int compare(String o1, String o2) { String fnum1 = getFileNum(o1); String fnum2 = getFileNum(o2); return fnum1.compareTo(fnum2); } }; /** * Find JE files, flavor for unit test support. * * @param suffixes which type of file we're looking for * @return array of file names */ public static String[] listFiles(File envDirFile, String[] suffixes, boolean envMultiSubDir) { String[] names = envDirFile.list(new JEFileFilter(suffixes)); ArrayList subFileNames = new ArrayList(); if (envMultiSubDir) { for (File file : envDirFile.listFiles()) { if (file.isDirectory() && file.getName().startsWith("data")) { File[] subFiles = file.listFiles(new JEFileFilter(suffixes)); for (File subFile : subFiles) { subFileNames.add(file.getName() + File.separator + subFile.getName()); } } } String[] totalFileNames = new String[names.length + subFileNames.size()]; for (int i = 0; i < totalFileNames.length; i++) { if (i < names.length) { totalFileNames[i] = names[i]; } else { totalFileNames[i] = subFileNames.get(i - names.length); } } names = totalFileNames; } if (names != null) { Arrays.sort(names, stringComparator); } else { names = new String[0]; } return names; } public File[] listJDBFiles() { if (nDataDirs == 0) { return listJDBFilesInternalSingleDir(new JEFileFilter(JE_SUFFIXES)); } else { return listJDBFilesInternalMultiDir(new JEFileFilter(JE_SUFFIXES)); } } public File[] listJDBFilesInternalSingleDir(JEFileFilter fileFilter) { File[] files = dbEnvHome.listFiles(fileFilter); if (files != null) { Arrays.sort(files); } else { files = new File[0]; } return files; } public File[] listJDBFilesInternalMultiDir(JEFileFilter fileFilter) { File[][] files = new File[nDataDirs][]; int nTotalFiles = 0; int i = 0; for (File envDir : dbEnvDataDirs) { files[i] = envDir.listFiles(fileFilter); nTotalFiles += files[i].length; i++; } if (nTotalFiles == 0) { return new File[0]; } File[] ret = new File[nTotalFiles]; i = 0; for (File[] envFiles : files) { for (File envFile : envFiles) { ret[i++] = envFile; } } Arrays.sort(ret, fileComparator); return ret; } private String[] listFileNamesInternal(JEFileFilter fileFilter) { if (nDataDirs == 0) { return listFileNamesInternalSingleDir(fileFilter); } else { return listFileNamesInternalMultiDirs(fileFilter); } } private String[] listFileNamesInternalSingleDir(JEFileFilter fileFilter) { String[] fileNames = dbEnvHome.list(fileFilter); if (fileNames != null) { Arrays.sort(fileNames); } else { fileNames = new String[0]; } return fileNames; } private String[] listFileNamesInternalMultiDirs(JEFileFilter filter) { String[][] files = new String[nDataDirs][]; int nTotalFiles = 0; int i = 0; for (File envDir : dbEnvDataDirs) { files[i] = envDir.list(filter); String envDirName = envDir.toString(); String dataDirName = envDirName. substring(envDirName.lastIndexOf(File.separator) + 1); for (int j = 0; j < files[i].length; j += 1) { files[i][j] = dataDirName + File.separator + files[i][j]; } nTotalFiles += files[i].length; i++; } if (nTotalFiles == 0) { return new String[0]; } String[] ret = new String[nTotalFiles]; i = 0; for (String[] envFiles : files) { for (String envFile : envFiles) { ret[i++] = envFile; } } Arrays.sort(ret, stringComparator); return ret; } private void checkNoDataDirs() { String[] dataDirNames = dbEnvHome.list(new FilenameFilter() { public boolean accept(File dir, String name) { /* We'll validate the subdirNum later. */ return name != null && name.length() == "dataNNN".length() && name.startsWith("data"); } } ); if (dataDirNames != null && dataDirNames.length != 0) { throw EnvironmentFailureException.unexpectedState (EnvironmentParams.LOG_N_DATA_DIRECTORIES.getName() + " was not set and expected to find no" + " data directories, but found " + dataDirNames.length + " data directories instead."); } } public File[] gatherDataDirs() { String[] dataDirNames = dbEnvHome.list(new FilenameFilter() { public boolean accept(File dir, String name) { /* We'll validate the subdirNum later. */ return name != null && name.length() == "dataNNN".length() && name.startsWith("data"); } } ); if (dataDirNames != null) { Arrays.sort(dataDirNames); } else { dataDirNames = new String[0]; } if (dataDirNames.length != nDataDirs) { throw EnvironmentFailureException.unexpectedState (EnvironmentParams.LOG_N_DATA_DIRECTORIES.getName() + " was set and expected to find " + nDataDirs + " data directories, but found " + dataDirNames.length + " instead."); } int ddNum = 1; File[] dataDirs = new File[nDataDirs]; for (String fn : dataDirNames) { String subdirNumStr = fn.substring(4); try { int subdirNum = Integer.parseInt(subdirNumStr); if (subdirNum != ddNum) { throw EnvironmentFailureException.unexpectedState ("Expected to find data subdir: data" + paddedDirNum(ddNum) + " but found data" + subdirNumStr + " instead."); } File dataDir = new File(dbEnvHome, fn); if (!dataDir.exists()) { throw EnvironmentFailureException.unexpectedState ("Data dir: " + dataDir + " doesn't exist."); } if (!dataDir.isDirectory()) { throw EnvironmentFailureException.unexpectedState ("Data dir: " + dataDir + " is not a directory."); } dataDirs[ddNum - 1] = dataDir; } catch (NumberFormatException E) { throw EnvironmentFailureException.unexpectedState ("Illegal data subdir: data" + subdirNumStr); } ddNum++; } return dataDirs; } private String paddedDirNum(int dirNum) { String paddedStr = "000" + dirNum; int len = paddedStr.length(); return paddedStr.substring(len - 3); } /** * @return the full file name and path for the nth JE file. */ String[] getFullFileNames(long fileNum) { if (includeDeletedFiles) { int nSuffixes = JE_AND_DEL_SUFFIXES.length; String[] ret = new String[nSuffixes]; for (int i = 0; i < nSuffixes; i++) { ret[i] = getFullFileName(fileNum, JE_AND_DEL_SUFFIXES[i]); } return ret; } return new String[] { getFullFileName(fileNum, JE_SUFFIX) }; } private File getDataDir(long fileNum) { return (nDataDirs == 0) ? dbEnvHome : dbEnvDataDirs[((int) (fileNum % nDataDirs))]; } public String getFullFileName(long fileNum) { return getFullFileName(fileNum, JE_SUFFIX); } /** * @return the full file name and path for this file name. */ public String getFullFileName(long fileNum, String suffix) { File dbEnvDataDir = getDataDir(fileNum); return dbEnvDataDir + File.separator + getFileName(fileNum, suffix); } /* * Return the full file name of a specified log file name, including the * sub directories names if needed. */ public String getFullFileName(String fileName) { final int suffixStartPos = fileName.indexOf("."); String suffix = fileName.substring(suffixStartPos, fileName.length()); assert suffix != null; String fileNum = fileName.substring(0, suffixStartPos); return getFullFileName (Long.valueOf(Long.parseLong(fileNum, 16)), suffix); } /** * @return the file name for the nth file. */ public static String getFileName(long fileNum, String suffix) { return (getFileNumberString(fileNum) + suffix); } /** @return the file name for the nth log (*.jdb) file. */ public static String getFileName(long fileNum) { return getFileName(fileNum, JE_SUFFIX); } /** * HexFormatter generates a 0 padded string starting with 0x. We want * the right most 8 digits, so start at 10. */ private static String getFileNumberString(long fileNum) { return HexFormatter.formatLong(fileNum).substring(10); } /** * @return true if successful, false if File.renameTo returns false, which * can occur on Windows if the file was recently closed. */ public boolean renameFile(final long fileNum, final String newSuffix) throws IOException, DatabaseException { return renameFile(fileNum, newSuffix, null) != null; } /** * Rename this file to NNNNNNNN.suffix. If that file already exists, try * NNNNNNNN.suffix.1, etc. Used for deleting files or moving corrupt files * aside. * * @param fileNum the file we want to move * * @param newSuffix the new file suffix * * @param subDir the data directory sub-directory to rename the file into. * The subDir must already exist. May be null to leave the file in its * current data directory. * * @return renamed File if successful, or null if File.renameTo returns * false, which can occur on Windows if the file was recently closed. */ public File renameFile(final long fileNum, final String newSuffix, final String subDir) throws IOException { final File oldDir = getDataDir(fileNum); final String oldName = getFileName(fileNum); final File oldFile = new File(oldDir, oldName); final File newDir = (subDir != null) ? (new File(oldDir, subDir)) : oldDir; final String newName = getFileName(fileNum, newSuffix); String generation = ""; int repeatNum = 0; while (true) { final File newFile = new File(newDir, newName + generation); if (newFile.exists()) { repeatNum++; generation = "." + repeatNum; continue; } clearFileCache(fileNum); final boolean success = oldFile.renameTo(newFile); return success ? newFile : null; } } /** * Delete log file NNNNNNNN. * * @param fileNum the file we want to move * * @return true if successful, false if File.delete returns false, which * can occur on Windows if the file was recently closed. */ public boolean deleteFile(final long fileNum) throws IOException, DatabaseException { final String fileName = getFullFileNames(fileNum)[0]; clearFileCache(fileNum); final File file = new File(fileName); return file.delete(); } /** * Returns the log version for the given file. */ public int getFileLogVersion(long fileNum) throws DatabaseException { try { FileHandle handle = getFileHandle(fileNum); int logVersion = handle.getLogVersion(); handle.release(); return logVersion; } catch (FileNotFoundException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, e); } catch (ChecksumException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, e); } } /** * Return a read only file handle that corresponds to this file number. * Retrieve it from the cache or open it anew and validate the file header. * This method takes a latch on this file, so that the file descriptor will * be held in the cache as long as it's in use. When the user is done with * the file, the latch must be released. * * @param fileNum which file * @return the file handle for the existing or newly created file */ public FileHandle getFileHandle(long fileNum) throws FileNotFoundException, ChecksumException, DatabaseException { /* Check the file cache for this file. */ Long fileId = Long.valueOf(fileNum); FileHandle fileHandle = null; /** * Loop until we get an open FileHandle. */ try { while (true) { /* * The file cache is intentionally not latched here so that * it's not a bottleneck in the fast path. We check that the * file handle that we get back is really still open after we * latch it down below. */ fileHandle = fileCache.get(fileId); /* * If the file isn't in the cache, latch the cache and check * again. Under the latch, if the file is not in the cache we * add it to the cache but do not open the file yet. We latch * the handle here, and open the file further below after * releasing the cache latch. This prevents blocking other * threads that are opening other files while we open this * file. The latch on the handle blocks other threads waiting * to open the same file, which is necessary. */ boolean newHandle = false; if (fileHandle == null) { synchronized (fileCache) { fileHandle = fileCache.get(fileId); if (fileHandle == null) { newHandle = true; fileHandle = addFileHandle(fileId); } } } if (newHandle) { /* * Open the file with the fileHandle latched. It was * latched by addFileHandle above. */ boolean success = false; try { openFileHandle(fileHandle, FileMode.READ_MODE, null /*existingHandle*/); success = true; } finally { if (!success) { /* An exception is in flight -- clean up. */ fileHandle.release(); clearFileCache(fileNum); } } } else { /* * The handle was found in the cache. Latch the fileHandle * before checking getFile below and returning. */ if (!fileHandle.latchNoWait()) { /* * But the handle was latched. Rather than wait, let's * just make a new transient handle. It doesn't need * to be latched, but it does need to be closed. */ final FileHandle existingHandle = fileHandle; fileHandle = new FileHandle( envImpl, fileId, getFileNumberString(fileId)) { @Override public void release() throws DatabaseException { try { close(); } catch (IOException E) { // Ignore } } }; openFileHandle(fileHandle, FileMode.READ_MODE, existingHandle); } } /* * We may have obtained this file handle outside the file cache * latch, so we have to test that the handle is still valid. * If it's not, then loop back and try again. */ if (fileHandle.getFile() == null) { fileHandle.release(); } else { break; } } } catch (FileNotFoundException e) { /* Handle at higher levels. */ throw e; } catch (IOException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_READ, e); } return fileHandle; } /** * Creates a new FileHandle and adds it to the cache, but does not open * the file. * @return the latched FileHandle. */ private FileHandle addFileHandle(Long fileNum) throws IOException, DatabaseException { FileHandle fileHandle = new FileHandle(envImpl, fileNum, getFileNumberString(fileNum)); fileCache.add(fileNum, fileHandle); fileHandle.latch(); return fileHandle; } private FileMode getAppropriateReadWriteMode() { if (useODSYNC) { return FileMode.READWRITE_ODSYNC_MODE; } return FileMode.READWRITE_MODE; } /** * Creates a new handle and opens it. Does not add the handle to the * cache. */ private FileHandle makeFileHandle(long fileNum, FileMode mode) throws FileNotFoundException, ChecksumException { FileHandle fileHandle = new FileHandle(envImpl, fileNum, getFileNumberString(fileNum)); openFileHandle(fileHandle, mode, null /*existingHandle*/); return fileHandle; } /** * Opens the file for the given handle and initializes it. * * @param existingHandle is an already open handle for the same file or * null. If non-null it is used to avoid the cost of reading the file * header. */ private void openFileHandle(FileHandle fileHandle, FileMode mode, FileHandle existingHandle) throws FileNotFoundException, ChecksumException { nFileOpens.increment(); long fileNum = fileHandle.getFileNum(); String[] fileNames = getFullFileNames(fileNum); RandomAccessFile newFile = null; String fileName = null; boolean success = false; try { /* * Open the file. Note that we are going to try a few names to open * this file -- we'll try for N.jdb, and if that doesn't exist and * we're configured to look for all types, we'll look for N.del. */ FileNotFoundException FNFE = null; for (String fileName2 : fileNames) { fileName = fileName2; try { newFile = fileFactory.createFile(dbEnvHome, fileName, mode.getModeValue()); break; } catch (FileNotFoundException e) { /* Save the first exception thrown. */ if (FNFE == null) { FNFE = e; } } } /* * If we didn't find the file or couldn't create it, rethrow the * exception. */ if (newFile == null) { assert FNFE != null; throw FNFE; } /* * If there is an existing open handle, there is no need to read or * validate the header. Note that the log version is zero if the * existing handle is not fully initialized. */ if (existingHandle != null) { final int logVersion = existingHandle.getLogVersion(); if (logVersion > 0) { fileHandle.init(newFile, logVersion); success = true; return; } } int logVersion = LogEntryType.LOG_VERSION; if (newFile.length() == 0) { /* * If the file is empty, reinitialize it if we can. If not, * send the file handle back up; the calling code will deal * with the fact that there's nothing there. */ if (mode.isWritable()) { /* An empty file, write a header. */ long lastLsn = DbLsn.longToLsn(perFileLastUsedLsn.remove (Long.valueOf(fileNum - 1))); long headerPrevOffset = 0; if (lastLsn != DbLsn.NULL_LSN) { headerPrevOffset = DbLsn.getFileOffset(lastLsn); } if ((headerPrevOffset == 0) && (fileNum > 1) && syncAtFileEnd) { /* Get more info if this happens again. [#20732] */ throw EnvironmentFailureException.unexpectedState (envImpl, "Zero prevOffset fileNum=0x" + Long.toHexString(fileNum) + " lastLsn=" + DbLsn.getNoFormatString(lastLsn) + " perFileLastUsedLsn=" + perFileLastUsedLsn + " fileLen=" + newFile.length()); } FileHeader fileHeader = new FileHeader(fileNum, headerPrevOffset); writeFileHeader(newFile, fileName, fileHeader, fileNum); } } else { /* A non-empty file, check the header */ logVersion = readAndValidateFileHeader(newFile, fileName, fileNum); } fileHandle.init(newFile, logVersion); success = true; } catch (FileNotFoundException e) { /* Handle at higher levels. */ throw e; } catch (IOException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_READ, "Couldn't open file " + fileName, e); } catch (DatabaseException e) { /* * Let this exception go as a checksum exception, so it sets the * run recovery state correctly. */ closeFileInErrorCase(newFile); e.addErrorMessage("Couldn't open file " + fileName); throw e; } finally { if (!success) { closeFileInErrorCase(newFile); } } } /** * Close this file and eat any exceptions. Used in catch clauses. */ private void closeFileInErrorCase(RandomAccessFile file) { try { if (file != null) { file.close(); } } catch (Exception e) { } } /** * Read the given JE log file and validate the header. * * @throws DatabaseException if the file header isn't valid * * @return file header log version. */ private int readAndValidateFileHeader(RandomAccessFile file, String fileName, long fileNum) throws ChecksumException, DatabaseException { /* * Read the file header from this file. It's always the first log * entry. * * The special UNKNOWN_FILE_HEADER_VERSION value is passed for reading * the entry header. The actual log version is read as part of the * FileHeader entry. [#16939] */ LogManager logManager = envImpl.getLogManager(); LogEntry headerEntry = logManager.getLogEntryAllowChecksumException (DbLsn.makeLsn(fileNum, 0), file, LogEntryType.UNKNOWN_FILE_HEADER_VERSION); FileHeader header = (FileHeader) headerEntry.getMainItem(); return header.validate(envImpl, fileName, fileNum); } /** * Write a proper file header to the given file. */ private void writeFileHeader(RandomAccessFile file, String fileName, FileHeader header, long fileNum) throws DatabaseException { /* Fail loudly if the environment is invalid. */ envImpl.checkIfInvalid(); /* * Fail silent if the environment is not open. */ if (envImpl.mayNotWrite()) { return; } /* Write file header into this buffer in the usual log entry format. */ LogEntry headerLogEntry = new FileHeaderEntry(LogEntryType.LOG_FILE_HEADER, header); ByteBuffer headerBuf = envImpl.getLogManager(). putIntoBuffer(headerLogEntry, 0); // prevLogEntryOffset /* Write the buffer into the channel. */ int bytesWritten; try { if (LOGWRITE_EXCEPTION_TESTING) { generateLogWriteException(file, headerBuf, 0, fileNum); } /* * Always flush header so that file.length() will be non-zero when * this method returns and two threads won't attempt to create the * header. [#20732] */ bytesWritten = writeToFile(file, headerBuf, 0, fileNum, true /*flushRequired*/); if (fileNum > savedCurrentFileNum) { /* * Writing the new file header succeeded without an IOE. This * can not be undone in the event of another IOE (Out Of Disk * Space) on the next write so update the saved LSN state with * the new info. Do not update the nextAvailableLsn with a * smaller (earlier) LSN in case there's already something in a * buffer that is after the new header. [#15754] */ long lsnAfterHeader = DbLsn.makeLsn(fileNum, bytesWritten); if (DbLsn.compareTo(nextAvailableLsn, lsnAfterHeader) < 0) { nextAvailableLsn = lsnAfterHeader; } lastUsedLsn = DbLsn.makeLsn(fileNum, bytesWritten); prevOffset = bytesWritten; forceNewFile = false; currentFileNum = fileNum; saveLastPosition(); } } catch (ClosedChannelException e) { /* * The channel should never be closed. It may be closed because * of an interrupt received by another thread. See SR [#10463] */ throw new ThreadInterruptedException (envImpl, "Channel closed, may be due to thread interrupt", e); } catch (IOException e) { /* Possibly an out of disk exception. */ throw new LogWriteException(envImpl, e); } if (bytesWritten != headerLogEntry.getSize() + LogEntryHeader.MIN_HEADER_SIZE) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, "File " + fileName + " was created with an incomplete header. Only " + bytesWritten + " bytes were written."); } } /** * @return the prevOffset field stored in the file header. */ long getFileHeaderPrevOffset(long fileNum) throws ChecksumException, DatabaseException { try { LogEntry headerEntry = envImpl.getLogManager().getLogEntryAllowChecksumException (DbLsn.makeLsn(fileNum, 0)); FileHeader header = (FileHeader) headerEntry.getMainItem(); return header.getLastEntryInPrevFileOffset(); } catch (FileNotFoundException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, e); } } /* * Support for writing new log entries */ /** * @return the file offset of the last LSN that was used. For constructing * the headers of log entries. If the last LSN that was used was in a * previous file, or this is the very first LSN of the whole system, return * 0. */ long getPrevEntryOffset() { return prevOffset; } /** * Increase the current log position by "size" bytes. Move the prevOffset * pointer along. * * @param size is an unsigned int * @return true if we flipped to the next log file. */ boolean bumpLsn(long size) { /* Save copy of initial LSN state. */ saveLastPosition(); boolean flippedFiles = false; if (forceNewFile || (DbLsn.getFileOffset(nextAvailableLsn) + size) > maxFileSize) { forceNewFile = false; /* Move to another file. */ currentFileNum++; /* Remember the last used LSN of the previous file. */ if (lastUsedLsn != DbLsn.NULL_LSN) { perFileLastUsedLsn.put (Long.valueOf(DbLsn.getFileNumber(lastUsedLsn)), Long.valueOf(lastUsedLsn)); } prevOffset = 0; lastUsedLsn = DbLsn.makeLsn(currentFileNum, firstLogEntryOffset()); flippedFiles = true; } else { if (lastUsedLsn == DbLsn.NULL_LSN) { prevOffset = 0; } else { prevOffset = DbLsn.getFileOffset(lastUsedLsn); } lastUsedLsn = nextAvailableLsn; } nextAvailableLsn = DbLsn.makeLsn(DbLsn.getFileNumber(lastUsedLsn), (DbLsn.getFileOffset(lastUsedLsn) + size)); return flippedFiles; } /** * Write out a log buffer to the file. * @param fullBuffer buffer to write * @param flushRequired true if this write can not be queued on the * Write Queue. */ void writeLogBuffer(LogBuffer fullBuffer, boolean flushRequired) throws DatabaseException { /* Fail loudly if the environment is invalid. */ envImpl.checkIfInvalid(); /* * Fail silent if the environment is not open. */ if (envImpl.mayNotWrite()) { return; } /* Use the LSN to figure out what file to write this buffer to. */ long firstLsn = fullBuffer.getFirstLsn(); /* * Is there anything in this write buffer? We could have been called by * the environment shutdown, and nothing is actually in the buffer. */ if (firstLsn != DbLsn.NULL_LSN) { RandomAccessFile file = endOfLog.getWritableFile(DbLsn.getFileNumber(firstLsn), true); ByteBuffer data = fullBuffer.getDataBuffer(); try { /* * Check that we do not overwrite unless the file only contains * a header [#11915] [#12616]. */ assert fullBuffer.getRewriteAllowed() || (DbLsn.getFileOffset(firstLsn) >= file.length() || file.length() == firstLogEntryOffset()) : "FileManager would overwrite non-empty file 0x" + Long.toHexString(DbLsn.getFileNumber(firstLsn)) + " lsnOffset=0x" + Long.toHexString(DbLsn.getFileOffset(firstLsn)) + " fileLength=0x" + Long.toHexString(file.length()); if (IO_EXCEPTION_TESTING_ON_WRITE) { throw new IOException("generated for testing (write)"); } if (LOGWRITE_EXCEPTION_TESTING) { generateLogWriteException (file, data, DbLsn.getFileOffset(firstLsn), DbLsn.getFileNumber(firstLsn)); } writeToFile(file, data, DbLsn.getFileOffset(firstLsn), DbLsn.getFileNumber(firstLsn), flushRequired); } catch (ClosedChannelException e) { /* * The file should never be closed. It may be closed because * of an interrupt received by another thread. See SR [#10463]. */ throw new ThreadInterruptedException (envImpl, "File closed, may be due to thread interrupt", e); } catch (IOException e) { if (!continueAfterWriteException()) { throw new LogWriteException(envImpl, e); } /* * Possibly an out of disk exception, but java.io will only * tell us IOException with no indication of whether it's out * of disk or something else. Better support may exist in * Java6. * * Since we can't tell what sectors were actually written to * disk, we need to change any commit records that might have * made it out to disk to abort records. If they made it to * disk on the write, then rewriting should allow them to be * rewritten. See [11271]. * * Rewriting committed transactions in replication is highly * problematic, and can lead to divergence between the replica * and master. If this path is re-enabled, we must assess its * impact in replication, since the log entries may already * be sent to other nodes. */ abortCommittedTxns(data); try { if (IO_EXCEPTION_TESTING_ON_WRITE) { throw new IOException ("generated for testing (write)"); } writeToFile(file, data, DbLsn.getFileOffset(firstLsn), DbLsn.getFileNumber(firstLsn), flushRequired); } catch (IOException e2) { fullBuffer.setRewriteAllowed(); /* Use an exception that does not invalidate the env. */ throw EnvironmentFailureException.unexpectedException(e2); } } assert EnvironmentImpl.maybeForceYield(); } } /** * Write a buffer to a file at a given offset. */ private int writeToFile(RandomAccessFile file, ByteBuffer data, long destOffset, long fileNum, boolean flushRequired) throws IOException, DatabaseException { int totalBytesWritten = 0; bumpWriteCount("write"); int pos = data.position(); int size = data.limit() - pos; if (lastFileNumberTouched == fileNum && (Math.abs(destOffset - lastFileTouchedOffset) < ADJACENT_TRACK_SEEK_DELTA)) { nSequentialWrites.increment(); nSequentialWriteBytes.add(size); } else { nRandomWrites.increment(); nRandomWriteBytes.add(size); } if (VERIFY_CHECKSUMS) { verifyChecksums(data, destOffset, "pre-write"); } /* * Perform a RandomAccessFile write and update the buffer position. * ByteBuffer.array() is safe to use since all non-direct ByteBuffers * have a backing array. * * Synchronization on the file object is needed because two threads may * call seek() on the same file object. * * If the Write Queue is enabled, attempt to get the fsync latch. If * we can't get it, then an fsync or write is in progress and we'd * block anyway. In that case, queue the write operation. */ boolean fsyncLatchAcquired = endOfLog.fsyncFileSynchronizer.tryLock(); boolean enqueueSuccess = false; if (!fsyncLatchAcquired && useWriteQueue && !flushRequired) { enqueueSuccess = endOfLog.enqueueWrite(fileNum, data.array(), destOffset, pos + data.arrayOffset(), size); } if (!enqueueSuccess) { if (!fsyncLatchAcquired) { endOfLog.fsyncFileSynchronizer.lock(); } try { if (useWriteQueue) { endOfLog.dequeuePendingWrites1(); } synchronized (file) { file.seek(destOffset); file.write (data.array(), pos + data.arrayOffset(), size); if (VERIFY_CHECKSUMS) { file.seek(destOffset); file.read (data.array(), pos + data.arrayOffset(), size); verifyChecksums(data, destOffset, "post-write"); } } } finally { endOfLog.fsyncFileSynchronizer.unlock(); } } data.position(pos + size); totalBytesWritten = size; lastFileNumberTouched = fileNum; lastFileTouchedOffset = destOffset + size; return totalBytesWritten; } private void bumpWriteCount(final String debugMsg) throws IOException { if (DEBUG) { System.out.println("Write: " + WRITE_COUNT + " " + debugMsg); } if (++WRITE_COUNT >= STOP_ON_WRITE_COUNT && WRITE_COUNT < (STOP_ON_WRITE_COUNT + N_BAD_WRITES)) { if (THROW_ON_WRITE) { throw new IOException ("IOException generated for testing: " + WRITE_COUNT + " " + debugMsg); } Runtime.getRuntime().halt(0xff); } } /** * Read a buffer from a file at a given offset. We know that the desired * data exists in this file. There's no need to incur extra costs * such as checks of the file length, nor to return status as to whether * this file contains the data. */ void readFromFile(RandomAccessFile file, ByteBuffer readBuffer, long offset, long fileNo) throws DatabaseException { readFromFile(file, readBuffer, offset, fileNo, true /* dataKnownToBeInFile */); } /** * Read a buffer from a file at a given offset. * * @return true if the read buffer is filled, false, if there is nothing * left in the file to read */ boolean readFromFile(RandomAccessFile file, ByteBuffer readBuffer, long offset, long fileNo, boolean dataKnownToBeInFile) throws DatabaseException { /* * All IOExceptions on read turn into EnvironmentFailureExceptions * [#15768]. */ try { /* * Check if there's a pending write(s) in the write queue for this * fileNo/offset and if so, use it to fulfill this read request. */ if (useWriteQueue && endOfLog.checkWriteCache(readBuffer, offset, fileNo)) { return true; } /* * Nothing queued, all data for this file must be in the file. * Note that there's no synchronization between the check of the * write queue above, and this check of file length. It's possible * that a newly written log entry could show up between the * statements, and enter the write queue just after we finish the * check. * * Because of this, callers of this method must abide by one of * three conditions: * 1. They guarantee that the attempt to read a chunk of new data * comes after the new data has been logged by the LogManager. * 2. The files are quiescent when the read is going on. * 3. The caller is sure the data is in this file. * * The replication feeder reader abides by (1) while all other file * readers abide by (2). Callers which are fetching specific log * entries fall under (3). */ boolean readThisFile = true; if (!dataKnownToBeInFile) { /* * Callers who are not sure whether the desired data is in this * file or the next incur the cost of a check of file.length(), * which is a system call. */ readThisFile = (offset < file.length()); } if (readThisFile) { readFromFileInternal(file, readBuffer, offset, fileNo); return true; } return false; } catch (ClosedChannelException e) { /* * The channel should never be closed. It may be closed because * of an interrupt received by another thread. See SR [#10463] */ throw new ThreadInterruptedException (envImpl, "Channel closed, may be due to thread interrupt", e); } catch (IOException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_READ, e); } } private void readFromFileInternal(RandomAccessFile file, ByteBuffer readBuffer, long offset, long fileNum) throws IOException { /* * Perform a RandomAccessFile read and update the buffer position. * ByteBuffer.array() is safe to use since all non-direct ByteBuffers * have a backing array. Synchronization on the file object is needed * because two threads may call seek() on the same file object. */ synchronized (file) { int pos = readBuffer.position(); int size = readBuffer.limit() - pos; if (lastFileNumberTouched == fileNum && (Math.abs(offset - lastFileTouchedOffset) < ADJACENT_TRACK_SEEK_DELTA)) { nSequentialReads.increment(); nSequentialReadBytes.add(size); } else { nRandomReads.increment(); nRandomReadBytes.add(size); } file.seek(offset); if (IO_EXCEPTION_TESTING_ON_READ) { throw new IOException("generated for testing (read)"); } int bytesRead = file.read(readBuffer.array(), pos + readBuffer.arrayOffset(), size); if (bytesRead > 0) { readBuffer.position(pos + bytesRead); } lastFileNumberTouched = fileNum; lastFileTouchedOffset = offset + bytesRead; } } private void verifyChecksums(ByteBuffer entryBuffer, long lsn, String comment) { int curPos = entryBuffer.position(); try { while (entryBuffer.remaining() > 0) { int recStartPos = entryBuffer.position(); /* Write buffer contains current log version entries. */ LogEntryHeader header = new LogEntryHeader(entryBuffer, LogEntryType.LOG_VERSION); verifyChecksum(entryBuffer, header, lsn, comment); entryBuffer.position(recStartPos + header.getSize() + header.getItemSize()); } } catch (ChecksumException e) { System.err.println("ChecksumException: (" + comment + ") " + e); System.err.println("start stack trace"); e.printStackTrace(System.err); System.err.println("end stack trace"); } entryBuffer.position(curPos); } private void verifyChecksum(ByteBuffer entryBuffer, LogEntryHeader header, long lsn, String comment) throws ChecksumException { ChecksumValidator validator = null; /* Add header to checksum bytes */ validator = new ChecksumValidator(); int headerSizeMinusChecksum = header.getSizeMinusChecksum(); int itemStart = entryBuffer.position(); entryBuffer.position(itemStart - headerSizeMinusChecksum); validator.update(entryBuffer, headerSizeMinusChecksum); entryBuffer.position(itemStart); /* * Now that we know the size, read the rest of the entry if the first * read didn't get enough. */ int itemSize = header.getItemSize(); if (entryBuffer.remaining() < itemSize) { System.err.println("Couldn't verify checksum (" + comment + ")"); return; } /* * Do entry validation. Run checksum before checking the entry * type, it will be the more encompassing error. */ validator.update(entryBuffer, itemSize); validator.validate(header.getChecksum(), lsn); } /* * Iterate through a buffer looking for commit records. Change all commit * records to abort records. */ private void abortCommittedTxns(ByteBuffer data) throws DatabaseException { final byte commitType = LogEntryType.LOG_TXN_COMMIT.getTypeNum(); data.position(0); while (data.remaining() > 0) { int recStartPos = data.position(); LogEntryHeader header; try { /* Write buffer contains current log version entries. */ header = new LogEntryHeader(data, LogEntryType.LOG_VERSION); } catch (ChecksumException e) { throw EnvironmentFailureException.unexpectedException(e); } if (header.getType() == commitType) { /* Change the log entry type, and recalculate the checksum. */ header.convertCommitToAbort(data); } data.position(recStartPos + header.getSize() + header.getItemSize()); } data.position(0); } /** * FSync the end of the log. */ void syncLogEnd() throws DatabaseException { try { endOfLog.force(); } catch (IOException e) { throw new LogWriteException (envImpl, "IOException during fsync", e); } } /** * Sync the end of the log, close off this log file. Should only be called * under the log write latch. */ void syncLogEndAndFinishFile() throws DatabaseException, IOException { if (syncAtFileEnd) { syncLogEnd(); } endOfLog.close(); } /** * Returns whether anything is in the write queue. */ public boolean hasQueuedWrites() { return endOfLog.hasQueuedWrites(); } /** * For unit testing only. */ public void testWriteQueueLock() { endOfLog.fsyncFileSynchronizer.lock(); } /** * For unit testing only. */ public void testWriteQueueUnlock() { endOfLog.fsyncFileSynchronizer.unlock(); } public void startFileCacheWarmer(final long recoveryStartLsn){ assert fileCacheWarmer == null; final DbConfigManager cm = envImpl.getConfigManager(); final int warmUpSize = cm.getInt( EnvironmentParams.LOG_FILE_WARM_UP_SIZE); if (warmUpSize == 0) { return; } final int bufSize = cm.getInt( EnvironmentParams.LOG_FILE_WARM_UP_BUF_SIZE); fileCacheWarmer = new FileCacheWarmer( envImpl, recoveryStartLsn, lastUsedLsn, warmUpSize, bufSize); fileCacheWarmer.start(); } private void stopFileCacheWarmer(){ /* * Use fcw local var because fileCacheWarmer can be set to null by * other threads calling clearFileCacheWarmer, namely the cache warmer * thread. */ final FileCacheWarmer fcw = fileCacheWarmer; if (fcw == null) { return; } fcw.shutdown(); clearFileCacheWarmer(); } /* Allow cache warmer thread to be GC'd. */ void clearFileCacheWarmer() { fileCacheWarmer = null; } /** * Close all file handles and empty the cache. */ public void clear() throws IOException, DatabaseException { synchronized (fileCache) { fileCache.clear(); } endOfLog.close(); } /** * Clear the file lock. */ public void close() throws IOException { stopFileCacheWarmer(); if (envLock != null) { envLock.release(); envLock = null; } if (exclLock != null) { exclLock.release(); exclLock = null; } if (channel != null) { channel.close(); channel = null; } if (lockFile != null) { lockFile.close(); lockFile = null; } } /** * Lock the environment. Return true if the lock was acquired. If * exclusive is false, then this implements a single writer, multiple * reader lock. If exclusive is true, then implement an exclusive lock. * * There is a lock file and there are two regions of the lock file: byte 0, * and byte 1. Byte 0 is the exclusive writer process area of the lock * file. If an environment is opened for write, then it attempts to take * an exclusive write lock on byte 0. Byte 1 is the shared reader process * area of the lock file. If an environment is opened for read-only, then * it attempts to take a shared lock on byte 1. This is how we implement * single writer, multi reader semantics. * * The cleaner, each time it is invoked, attempts to take an exclusive lock * on byte 1. The owning process already either has an exclusive lock on * byte 0, or a shared lock on byte 1. This will necessarily conflict with * any shared locks on byte 1, even if it's in the same process and there * are no other holders of that shared lock. So if there is only one * read-only process, it will have byte 1 for shared access, and the * cleaner can not run in it because it will attempt to get an exclusive * lock on byte 1 (which is already locked for shared access by itself). * If a write process comes along and tries to run the cleaner, it will * attempt to get an exclusive lock on byte 1. If there are no other * reader processes (with shared locks on byte 1), and no other writers * (which are running cleaners on with exclusive locks on byte 1), then the * cleaner will run. */ public boolean lockEnvironment(boolean rdOnly, boolean exclusive) { try { if (checkEnvHomePermissions(rdOnly)) { return true; } if (lockFile == null) { lockFile = new RandomAccessFile (new File(dbEnvHome, LOCK_FILE), FileMode.READWRITE_MODE.getModeValue()); } channel = lockFile.getChannel(); try { if (exclusive) { /* * To lock exclusive, must have exclusive on * shared reader area (byte 1). */ exclLock = channel.tryLock(1, 1, false); if (exclLock == null) { return false; } return true; } if (rdOnly) { envLock = channel.tryLock(1, 1, true); } else { envLock = channel.tryLock(0, 1, false); } if (envLock == null) { return false; } return true; } catch (OverlappingFileLockException e) { return false; } } catch (IOException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e); } } public void releaseExclusiveLock() throws DatabaseException { try { if (exclLock != null) { exclLock.release(); } } catch (IOException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e); } } /** * Ensure that if the environment home dir is on readonly media or in a * readonly directory that the environment has been opened for readonly * access. * * @return true if the environment home dir is readonly. * * @throws IllegalArgumentException via Environment ctor */ public boolean checkEnvHomePermissions(boolean rdOnly) throws DatabaseException { if (nDataDirs == 0) { return checkEnvHomePermissionsSingleEnvDir(dbEnvHome, rdOnly); } else { return checkEnvHomePermissionsMultiEnvDir(rdOnly); } } private boolean checkEnvHomePermissionsSingleEnvDir(File dbEnvHome, boolean rdOnly) throws DatabaseException { boolean envDirIsReadOnly = !dbEnvHome.canWrite(); if (envDirIsReadOnly && !rdOnly) { /* * Use the absolute path in the exception message, to * make a mis-specified relative path problem more obvious. */ throw new IllegalArgumentException ("The Environment directory " + dbEnvHome.getAbsolutePath() + " is not writable, but the " + "Environment was opened for read-write access."); } return envDirIsReadOnly; } private boolean checkEnvHomePermissionsMultiEnvDir(boolean rdOnly) throws DatabaseException { for (File dbEnvDir : dbEnvDataDirs) { if (!checkEnvHomePermissionsSingleEnvDir(dbEnvDir, rdOnly)) { return false; } } return true; } /** * Truncate a log at this position. Used by recovery to a timestamp * utilities and by recovery to set the end-of-log position, see * LastFileReader.setEndOfFile(). * *

This method forces a new log file to be written next, if the last * file (the file truncated to) has an old version in its header. This * ensures that when the log is opened by an old version of JE, a version * incompatibility will be detected. [#11243]

*/ public void truncateSingleFile(long fileNum, long offset) throws IOException, DatabaseException { try { FileHandle handle = makeFileHandle(fileNum, getAppropriateReadWriteMode()); RandomAccessFile file = handle.getFile(); try { file.getChannel().truncate(offset); } finally { file.close(); } if (handle.isOldHeaderVersion()) { forceNewFile = true; } } catch (ChecksumException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, e); } } /* * Truncate all log entries after a specified log entry, the position of * that entry is specified by the fileNum and offset, we do this to avoid * the log file gap. Used by replication hard recovery and the * DbTruncateLog utility, see SR [#19463]. */ public void truncateLog(long fileNum, long offset) throws IOException, DatabaseException { /* * Truncate the log files following by this log file in descending * order to avoid the log entry gap, see SR [#19463]. */ for (long i = getLastFileNum(); i >= fileNum; i--) { /* Do nothing if this file doesn't exist. */ if (!isFileValid(i)) { continue; } /* * If this is the file that truncation starts, invoke * truncateSingleFile. If the offset is 0, which means the * FileHeader is also deleted, delete the whole file to avoid a log * file gap. */ if (i == fileNum) { truncateSingleFile(fileNum, offset); if (offset != 0) { continue; } } boolean deleted = deleteFile(i); assert deleted : "File " + getFullFileName(i, JE_SUFFIX) + " not deleted during truncateLog"; } } /** * Mark the specified log entries as invisible and obsolete. The entries * are written here, but are fsync'ed later. If there is any problem or * exception during the setting, the method will throw an * EnvironmentFailureException. * * These changes are made directly to the file, but recently logged log * entries may also be resident in the log buffers. The caller must take * care to call LogManager.flush() before this method, to ensure that all * entries are on disk. * * In addition, we must ensure that after this step, the affected log * entries will only be read via a FileReader, and will not be faulted in * by the LogManager. Entries may be present in the log and in the log * buffers, but only the on disk version is modified by this method. The * LogManager can read directly from the log buffers and may read the * incorrect, non-invisible version of the log entry, rather than the * invisible version from the file. This should not be an issue, because * invisible log entries should be detached from the in-memory tree before * they are made invisible. * * @param fileNum target file. * @param lsns The list of LSNs to make invisible, must be sorted in * ascending order. */ public void makeInvisible(long fileNum, List lsns) { if (lsns.size() == 0) { return; } /* Open this file. */ FileHandle handle = null; try { /* * Note that we are getting a new, non-cached file handle for * specific use by this method. */ handle = makeFileHandle(fileNum, getAppropriateReadWriteMode()); } catch (ChecksumException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, "Opening file " + fileNum + " for invisible marking ", e); } catch (FileNotFoundException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, "Opening file " + fileNum + " for invisible marking ", e); } RandomAccessFile file = handle.getFile(); /* Set the invisible bit for each entry. */ try { for (Long lsn : lsns) { if (DbLsn.getFileNumber(lsn) != fileNum) { /* * This failure will not invalidate the environment right * away. But since it causes replication syncup to fail, * the environment will shutdown, which is the effect we * want. */ throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.UNEXPECTED_STATE, "LSN of " + DbLsn.getNoFormatString(lsn) + " did not match file number" + fileNum); } int entryFlagsOffset = (int) (DbLsn.getFileOffset(lsn) + LogEntryHeader.FLAGS_OFFSET); file.seek(entryFlagsOffset); byte flags = file.readByte(); byte newFlags = LogEntryHeader.makeInvisible(flags); file.seek(entryFlagsOffset); file.writeByte(newFlags); } } catch (IOException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_WRITE, "Flipping invisibility in file " + fileNum, e); } finally { /* * Just close the file. Fsyncs will be done later on, in the hope * that the OS has already synced asynchronously. */ try { file.close(); } catch (IOException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_WRITE, "Closing after invisibility cloaking: file " + fileNum, e); } } } /** * Fsync this set of log files. Used for replication syncup rollback. */ public void force(Set fileNums) { for (long fileNum : fileNums) { RandomAccessFile file = null; try { FileHandle handle = makeFileHandle(fileNum, getAppropriateReadWriteMode()); file = handle.getFile(); file.getChannel().force(false); nLogFSyncs.increment(); } catch (FileNotFoundException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, "Invisible fsyncing file " + fileNum, e); } catch (ChecksumException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, "Invisible fsyncing file " + fileNum, e); } catch (IOException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_WRITE, "Invisible fsyncing file " + fileNum, e); } finally { if (file != null) { try { file.close(); } catch (IOException e) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_WRITE, "Invisible fsyncing file " + fileNum, e); } } } } } /** * Set the flag that causes a new file to be written before the next write. */ public void forceNewLogFile() { forceNewFile = true; } /** * Return the offset of the first log entry after the file header. */ /** * @return the size in bytes of the file header log entry. */ public static int firstLogEntryOffset() { return FileHeader.entrySize() + LogEntryHeader.MIN_HEADER_SIZE; } /** * Return the next available LSN in the log. Note that this is * unsynchronized, so is only valid as an approximation of log size. */ public long getNextLsn() { return nextAvailableLsn; } /** * Return the last allocated LSN in the log. Note that this is * unsynchronized, so if it is called outside the log write latch it is * only valid as an approximation of log size. */ public long getLastUsedLsn() { return lastUsedLsn; } StatGroup loadStats(StatsConfig config) { nOpenFiles.set(fileCache.size()); StatGroup copyStats = stats.cloneGroup(config.getClear()); return copyStats; } /* * Unit test support */ /* * @return ids of files in cache */ Set getCacheKeys() { return fileCache.getCacheKeys(); } /** * Clear a file out of the file cache regardless of mode type. */ private void clearFileCache(long fileNum) throws IOException, DatabaseException { synchronized (fileCache) { fileCache.remove(fileNum); } } /* * The file cache keeps N RandomAccessFile objects cached for file * access. The cache consists of two parts: a Hashtable that doesn't * require extra synchronization, for the most common access, and a linked * list of files to support cache administration. Looking up a file from * the hash table doesn't require extra latching, but adding or deleting a * file does. */ private static class FileCache { private final Map fileMap; // Long->file private final List fileList; // list of file numbers private final int fileCacheSize; FileCache(DbConfigManager configManager) { /* * A fileMap maps the file number to FileHandles (RandomAccessFile, * latch). The fileList is a list of Longs to determine which files * to eject out of the file cache if it's too small. */ fileMap = new Hashtable(); fileList = new LinkedList(); fileCacheSize = configManager.getInt(EnvironmentParams.LOG_FILE_CACHE_SIZE); } private FileHandle get(Long fileId) { return fileMap.get(fileId); } private void add(Long fileId, FileHandle fileHandle) throws IOException, DatabaseException { /* * Does the cache have any room or do we have to evict? Hunt down * the file list for an unused file. Note that the file cache might * actually grow past the prescribed size if there is nothing * evictable. Should we try to shrink the file cache? Presently if * it grows, it doesn't shrink. */ if (fileList.size() >= fileCacheSize) { Iterator iter = fileList.iterator(); while (iter.hasNext()) { Long evictId = iter.next(); FileHandle evictTarget = fileMap.get(evictId); /* * Try to latch. If latchNoWait returns false, then another * thread owns this latch. Note that a thread that's trying * to get a new file handle should never already own the * latch on another file handle, because these latches are * meant to be short lived and only held over the i/o out * of the file. */ if (evictTarget.latchNoWait()) { try { fileMap.remove(evictId); iter.remove(); evictTarget.close(); } finally { evictTarget.release(); } break; } } } /* * We've done our best to evict. Add the file the the cache now * whether or not we did evict. */ fileList.add(fileId); fileMap.put(fileId, fileHandle); } /** * Take any file handles corresponding to this file name out of the * cache. A file handle could be there twice, in rd only and in r/w * mode. */ private void remove(long fileNum) throws IOException, DatabaseException { Iterator iter = fileList.iterator(); while (iter.hasNext()) { Long evictId = iter.next(); if (evictId.longValue() == fileNum) { FileHandle evictTarget = fileMap.get(evictId); try { evictTarget.latch(); fileMap.remove(evictId); iter.remove(); evictTarget.close(); } finally { evictTarget.release(); } } } } private void clear() throws IOException, DatabaseException { Iterator iter = fileMap.values().iterator(); while (iter.hasNext()) { FileHandle fileHandle = iter.next(); try { fileHandle.latch(); fileHandle.close(); iter.remove(); } finally { fileHandle.release(); } } fileMap.clear(); fileList.clear(); } private Set getCacheKeys() { return fileMap.keySet(); } private int size() { return fileMap.size(); } } /** * The LogEndFileDescriptor is used to write and fsync the end of the log. * Because the JE log is append only, there is only one logical R/W file * descriptor for the whole environment. This class actually implements two * RandomAccessFile instances, one for writing and one for fsyncing, so the * two types of operations don't block each other. * * The write file descriptor is considered the master. Manipulation of * this class is done under the log write latch. Here's an explanation of * why the log write latch is sufficient to safeguard all operations. * * There are two types of callers who may use this file descriptor: the * thread that is currently writing to the end of the log and any threads * that are fsyncing on behalf of the FSyncManager. * * The writing thread appends data to the file and fsyncs the file when we * flip over to a new log file. The file is only instantiated at the point * that it must do so -- which is either when the first fsync is required * by JE or when the log file is full and we flip files. Therefore, the * writing thread has two actions that change this descriptor -- we * initialize the file descriptor for the given log file at the first write * to the file, and we close the file descriptor when the log file is full. * Therefore is a period when there is no log descriptor -- when we have * not yet written a log buffer into a given log file. * * The fsyncing threads ask for the log end file descriptor asynchronously, * but will never modify it. These threads may arrive at the point when * the file descriptor is null, and therefore skip their fysnc, but that is * fine because it means a writing thread already flipped that target file * and has moved on to the next file. * * Time Activity * 10 thread 1 writes log entry A into file 0x0, issues fsync * outside of log write latch, yields the processor * 20 thread 2 writes log entry B, piggybacks off thread 1 * 30 thread 3 writes log entry C, but no room left in that file, * so it flips the log, and fsyncs file 0x0, all under the log * write latch. It nulls out endOfLogRWFile, moves onto file * 0x1, but doesn't create the file yet. * 40 thread 1 finally comes along, but endOfLogRWFile is null-- * no need to fsync in that case, 0x0 got fsynced. * * If a write is attempted and an fsync is already in progress, then the * information pertaining to the data to be written (data, offset, length) * is saved away in the "queuedWrites" array. When the fsync completes, * the queuedWrites buffer is emptied. This ensures that writes continue * to execute on file systems which block all IO calls during an fsync() * call (e.g. ext3). */ class LogEndFileDescriptor { private RandomAccessFile endOfLogRWFile = null; private RandomAccessFile endOfLogSyncFile = null; private final ReentrantLock fsyncFileSynchronizer = new ReentrantLock(); /* * Holds all data for writes which have been queued due to their * being blocked by an fsync when the original write was attempted. * The next thread to execute an fsync or write will execute any * queued writes in this buffer. * Latch order is fsyncFileSynchronizer, followed by the queuedWrites * mutex [ synchronized (queuedWrites) {} ]. * * Default protection for unit tests. */ private final byte[] queuedWrites = useWriteQueue ? new byte[writeQueueSize] : null; /* Current position in the queuedWrites array. */ private int queuedWritesPosition = 0; /* The starting offset on disk of the first byte in queuedWrites. */ private long qwStartingOffset; /* The file number that the queuedWrites are destined for. */ private long qwFileNum = -1; /* For unit tests. */ void setQueueFileNum(final long qwFileNum) { this.qwFileNum = qwFileNum; } /* * Check if fileNo/offset is present in queuedWrites, and if so, fill * readBuffer with those bytes. We theorize that this is needed * because HA will be reading at the very end of the log and those * writes, if enqueued, may no longer be in LogBuffers in the * LogBufferPool. This might happen in the case of lots of concurrent * non-synchronous writes (with synchronous commits) which become * enqueued in the queuedWrites cache, but cycle out of the LBP. In * general, using synchronous commits with HA is a bad idea. * * Default protection for unit tests. * @return true if more data was available. If so, the read buffer * will be filled up. */ /* private */ boolean checkWriteCache(final ByteBuffer readBuffer, final long requestedOffset, final long fileNum) { int pos = readBuffer.position(); int targetBufSize = readBuffer.limit() - pos; synchronized (queuedWrites) { if (qwFileNum != fileNum) { return false; } if (queuedWritesPosition == 0) { return false; } if (requestedOffset < qwStartingOffset || (qwStartingOffset + queuedWritesPosition) <= requestedOffset) { return false; } /* We have the bytes available. */ int nBytesToCopy = (int) (queuedWritesPosition - (requestedOffset - qwStartingOffset)); nBytesToCopy = Math.min(nBytesToCopy, targetBufSize); readBuffer.put(queuedWrites, (int) (requestedOffset - qwStartingOffset), nBytesToCopy); nBytesReadFromWriteQueue.add(nBytesToCopy); nReadsFromWriteQueue.increment(); return true; } } /* * Enqueue a blocked write call for later execution by the next thread * to do either an fsync or write call. fsyncFileSynchronizer is not * held when this is called. * * Default protection for unit tests. */ /* private */ boolean enqueueWrite(final long fileNum, final byte[] data, final long destOffset, final int arrayOffset, final int size) throws DatabaseException { assert !fsyncFileSynchronizer.isHeldByCurrentThread(); for (int i = 0; i < 2; i++) { try { enqueueWrite1(fileNum, data, destOffset, arrayOffset, size); return true; } catch (RelatchRequiredException RE) { dequeuePendingWrites(); } } /* Give up after two tries. */ nWriteQueueOverflowFailures.increment(); return false; } private void enqueueWrite1(final long fileNum, final byte[] data, final long destOffset, final int arrayOffset, final int size) throws RelatchRequiredException, DatabaseException { /* * The queuedWrites queue only ever holds writes for a single file. * * This check is safe because qwFileNum can only ever change inside * enqueueWrite which can only ever be called while the Log Write * Latch is held. * * NOTE: We believe the commented out second condition is safe * to add to the code if we ever see contention with this call to * dequeuePendingWrites against an fsync. Here is the reasoning: * * queuedWritesPosition is changed in two places: (1) enqueueWrite1 * where it is incremented, and (2) dequeuePendingWrites1 where it * is zeroed. Both of these places are proected by the queuedWrites * mutex. The zero'ing (2) will only make the dequeue unnecessary * so the extra commented out check below is safe since it will * only result in eliminating an unnecessary dequeuePendingWrites * call. */ if (qwFileNum < fileNum /* && queuedWritesPosition > 0 */) { dequeuePendingWrites(); qwFileNum = fileNum; } synchronized (queuedWrites) { boolean overflow = (writeQueueSize - queuedWritesPosition) < size; if (overflow) { nWriteQueueOverflow.increment(); /* * Since we can't write this "write call" into the * ByteBuffer without overflowing, we will try to dequeue * all current writes in the buffer. But that requires * holding the fsyncFileSynchronizer latch first which * would be latching out of order relative to the * queuedWrites mutex. */ throw RelatchRequiredException.relatchRequiredException; } assert qwFileNum == fileNum; int curPos = queuedWritesPosition; if (curPos == 0) { /* * This is the first entry in queue. Set qwStartingOffset. */ qwStartingOffset = destOffset; } if (curPos + qwStartingOffset != destOffset) { throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, "non-consecutive writes queued. " + "qwPos=" + queuedWritesPosition + " write destOffset=" + destOffset); } System.arraycopy(data, arrayOffset, queuedWrites, queuedWritesPosition, size); queuedWritesPosition += size; } } /** * Returns whether anything is in the write queue. */ boolean hasQueuedWrites() { return queuedWritesPosition > 0; } /* * Execute pending writes. Assumes fsyncFileSynchronizer is not held. */ private void dequeuePendingWrites() throws DatabaseException { assert !fsyncFileSynchronizer.isHeldByCurrentThread(); fsyncFileSynchronizer.lock(); try { dequeuePendingWrites1(); } finally { fsyncFileSynchronizer.unlock(); } } /* * Execute pending writes. Assumes fsyncFileSynchronizer is held. */ private void dequeuePendingWrites1() throws DatabaseException { assert fsyncFileSynchronizer.isHeldByCurrentThread(); try { synchronized (queuedWrites) { /* Nothing to see here. Move along. */ if (queuedWritesPosition == 0) { return; } RandomAccessFile file = getWritableFile(qwFileNum, false); synchronized (file) { file.seek(qwStartingOffset); file.write(queuedWrites, 0, queuedWritesPosition); nBytesWrittenFromWriteQueue.add(queuedWritesPosition); nWritesFromWriteQueue.increment(); if (VERIFY_CHECKSUMS) { file.seek(qwStartingOffset); file.read(queuedWrites, 0, queuedWritesPosition); ByteBuffer bb = ByteBuffer.allocate(queuedWritesPosition); bb.put(queuedWrites, 0, queuedWritesPosition); bb.position(0); verifyChecksums (bb, qwStartingOffset, "post-write"); } } /* We flushed the queue. Reset the buffer. */ queuedWritesPosition = 0; } } catch (IOException e) { throw new LogWriteException (envImpl, "IOException during fsync", e); } } /** * getWritableFile must be called under the log write latch. * * Typically, endOfLogRWFile is not null. Hence the * fsyncFileSynchronizer does not need to be locked (which would * block the write queue from operating. */ private RandomAccessFile getWritableFile(final long fileNumber, final boolean doLock) { try { if (endOfLogRWFile == null) { /* * We need to make a file descriptor for the end of the * log. This is guaranteed to be called under the log * write latch. * * Protect both the RWFile and SyncFile under this lock, * to avoid a race for creating the file and writing the * header. [#20732] */ if (doLock) { fsyncFileSynchronizer.lock(); } try { endOfLogRWFile = makeFileHandle(fileNumber, getAppropriateReadWriteMode()). getFile(); endOfLogSyncFile = makeFileHandle(fileNumber, getAppropriateReadWriteMode()). getFile(); } finally { if (doLock) { fsyncFileSynchronizer.unlock(); } } } return endOfLogRWFile; } catch (Exception e) { /* * If we can't get a write channel, we need to invalidate the * environment. */ throw new EnvironmentFailureException (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e); } } /** * FSync the log file that makes up the end of the log. */ private void force() throws DatabaseException, IOException { /* * Get a local copy of the end of the log file descriptor, it could * change. No need to latch, no harm done if we get an old file * descriptor, because we forcibly fsync under the log write latch * when we switch files. * * If there is no current end file descriptor, we know that the log * file has flipped to a new file since the fsync was issued. */ fsyncFileSynchronizer.lock(); try { /* Flush any queued writes. */ if (useWriteQueue) { dequeuePendingWrites1(); } RandomAccessFile file = endOfLogSyncFile; if (file != null) { bumpWriteCount("fsync"); FileChannel ch = file.getChannel(); try { long start = System.currentTimeMillis(); ch.force(false); nLogFSyncs.increment(); final long fsyncMs = System.currentTimeMillis() - start; nFSyncTime.add(fsyncMs); } catch (ClosedChannelException e) { /* * The channel should never be closed. It may be closed * because of an interrupt received by another thread. * See SR [#10463]. */ throw new ThreadInterruptedException (envImpl, "Channel closed, may be due to thread interrupt", e); } assert EnvironmentImpl.maybeForceYield(); } /* Flush any writes which were queued while fsync'ing. */ if (useWriteQueue) { dequeuePendingWrites1(); } } finally { fsyncFileSynchronizer.unlock(); } } /** * Close the end of the log file descriptor. Use atomic assignment to * ensure that we won't force and close on the same descriptor. */ void close() throws IOException { /* * Protect both the RWFile and SyncFile under this lock out of * paranoia, although we don't expect two threads to call close * concurrently. [#20732] */ fsyncFileSynchronizer.lock(); try { IOException firstException = null; if (endOfLogRWFile != null) { RandomAccessFile file = endOfLogRWFile; /* * Null out so that other threads know endOfLogRWFile is no * longer available. */ endOfLogRWFile = null; try { file.close(); } catch (IOException e) { /* Save this exception, so we can try second close. */ firstException = e; } } if (endOfLogSyncFile != null) { RandomAccessFile file = endOfLogSyncFile; /* * Null out so that other threads know endOfLogSyncFile is * no longer available. */ endOfLogSyncFile = null; file.close(); } if (firstException != null) { throw firstException; } } finally { fsyncFileSynchronizer.unlock(); } } } /* * Generate IOExceptions for testing. */ /* Testing switch. public so others can read the value. */ public static final boolean LOGWRITE_EXCEPTION_TESTING; private static String RRET_PROPERTY_NAME = "je.logwrite.exception.testing"; static { LOGWRITE_EXCEPTION_TESTING = (System.getProperty(RRET_PROPERTY_NAME) != null); } /* Max write counter value. */ private static final int LOGWRITE_EXCEPTION_MAX = 100; /* Current write counter value. */ private int logWriteExceptionCounter = 0; /* Whether an exception has been thrown. */ private boolean logWriteExceptionThrown = false; /* Random number generator. */ private Random logWriteExceptionRandom = null; private void generateLogWriteException(RandomAccessFile file, ByteBuffer data, long destOffset, long fileNum) throws DatabaseException, IOException { if (logWriteExceptionThrown) { (new Exception("Write after LogWriteException")). printStackTrace(); } logWriteExceptionCounter += 1; if (logWriteExceptionCounter >= LOGWRITE_EXCEPTION_MAX) { logWriteExceptionCounter = 0; } if (logWriteExceptionRandom == null) { logWriteExceptionRandom = new Random(System.currentTimeMillis()); } if (logWriteExceptionCounter == logWriteExceptionRandom.nextInt(LOGWRITE_EXCEPTION_MAX)) { int len = logWriteExceptionRandom.nextInt(data.remaining()); if (len > 0) { byte[] a = new byte[len]; data.get(a, 0, len); ByteBuffer buf = ByteBuffer.wrap(a); writeToFile(file, buf, destOffset, fileNum, false /*flushRequired*/); } logWriteExceptionThrown = true; throw new IOException("Randomly generated for testing"); } } /** * The factory interface for creating RandomAccessFiles. For production * use, the default factory is always used and a DefaultRandomAccessFile is * always created. For testing, the factory can be overridden to return a * subclass of DefaultRandomAccessFile that overrides methods and injects * faults, for example. */ public interface FileFactory { /** * @param envHome can be used to distinguish environments in a test * program that opens multiple environments. Not for production use. * * @param fullName the full file name to be passed to the * RandomAccessFile constructor. * * @param mode the file mode to be passed to the RandomAccessFile * constructor. */ RandomAccessFile createFile(File envHome, String fullName, String mode) throws FileNotFoundException; } /** * The RandomAccessFile for production use. Tests that override the * default FileFactory should return a RandomAccessFile that subclasses * this class to inherit workarounds such as the overridden length method. */ public static class DefaultRandomAccessFile extends RandomAccessFile { public DefaultRandomAccessFile(String fullName, String mode) throws FileNotFoundException { super(fullName, mode); } /** * RandomAccessFile.length() is not thread safe and side-effects the * file pointer if interrupted in the middle. It is synchronized here * to work around that problem. */ @Override public synchronized long length() throws IOException { return super.length(); } } /** * The factory instance used to create RandomAccessFiles. This field is * intentionally public and non-static so it may be set by tests. See * FileFactory. */ public static FileFactory fileFactory = new FileFactory() { public RandomAccessFile createFile(File envHome, String fullName, String mode) throws FileNotFoundException { return new DefaultRandomAccessFile(fullName, mode); } }; }