1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 2002, 2014 Oracle and/or its affiliates.  All rights reserved.
5  *
6  */
7 
8 package com.sleepycat.je.log;
9 
10 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_BYTES_READ_FROM_WRITEQUEUE;
11 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_BYTES_WRITTEN_FROM_WRITEQUEUE;
12 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_FILE_OPENS;
13 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_LOG_FSYNCS;
14 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_OPEN_FILES;
15 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_READS;
16 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_READ_BYTES;
17 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_WRITES;
18 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_WRITE_BYTES;
19 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_READS_FROM_WRITEQUEUE;
20 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_READS;
21 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_READ_BYTES;
22 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_WRITES;
23 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_WRITE_BYTES;
24 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITEQUEUE_OVERFLOW;
25 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITEQUEUE_OVERFLOW_FAILURES;
26 import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITES_FROM_WRITEQUEUE;
27 
28 import java.io.File;
29 import java.io.FileNotFoundException;
30 import java.io.FilenameFilter;
31 import java.io.IOException;
32 import java.io.RandomAccessFile;
33 import java.nio.ByteBuffer;
34 import java.nio.channels.ClosedChannelException;
35 import java.nio.channels.FileChannel;
36 import java.nio.channels.FileLock;
37 import java.nio.channels.OverlappingFileLockException;
38 import java.util.ArrayList;
39 import java.util.Arrays;
40 import java.util.Collections;
41 import java.util.Comparator;
42 import java.util.HashMap;
43 import java.util.Hashtable;
44 import java.util.Iterator;
45 import java.util.LinkedList;
46 import java.util.List;
47 import java.util.Map;
48 import java.util.Random;
49 import java.util.Set;
50 import java.util.concurrent.locks.ReentrantLock;
51 
52 import com.sleepycat.je.DatabaseException;
53 import com.sleepycat.je.EnvironmentFailureException;
54 import com.sleepycat.je.EnvironmentLockedException;
55 import com.sleepycat.je.LogWriteException;
56 import com.sleepycat.je.StatsConfig;
57 import com.sleepycat.je.ThreadInterruptedException;
58 import com.sleepycat.je.config.EnvironmentParams;
59 import com.sleepycat.je.dbi.DbConfigManager;
60 import com.sleepycat.je.dbi.EnvironmentFailureReason;
61 import com.sleepycat.je.dbi.EnvironmentImpl;
62 import com.sleepycat.je.log.entry.FileHeaderEntry;
63 import com.sleepycat.je.log.entry.LogEntry;
64 import com.sleepycat.je.utilint.DbLsn;
65 import com.sleepycat.je.utilint.HexFormatter;
66 import com.sleepycat.je.utilint.IntStat;
67 import com.sleepycat.je.utilint.LongStat;
68 import com.sleepycat.je.utilint.RelatchRequiredException;
69 import com.sleepycat.je.utilint.StatGroup;
70 
71 /**
72  * The FileManager presents the abstraction of one contiguous file.  It doles
73  * out LSNs.
74  */
75 public class FileManager {
76 
77     public enum FileMode {
78         READ_MODE("r", false),
79         READWRITE_MODE("rw", true),
80         READWRITE_ODSYNC_MODE("rwd", true),
81         READWRITE_OSYNC_MODE("rws", true);
82 
83         private String fileModeValue;
84         private boolean isWritable;
85 
FileMode(String fileModeValue, boolean isWritable)86         private FileMode(String fileModeValue, boolean isWritable) {
87             this.fileModeValue = fileModeValue;
88             this.isWritable = isWritable;
89         }
90 
getModeValue()91         public String getModeValue() {
92             return fileModeValue;
93         }
94 
isWritable()95         public boolean isWritable() {
96             return isWritable;
97         }
98     }
99 
100     static boolean IO_EXCEPTION_TESTING_ON_WRITE = false;
101     static boolean IO_EXCEPTION_TESTING_ON_READ = false;
102     static boolean THROW_RRE_FOR_UNIT_TESTS = false;
103     private static final String DEBUG_NAME = FileManager.class.getName();
104     private static final boolean DEBUG = false;
105 
106     /**
107      * Returns whether we are in a test mode where we attempt to continue
108      * after a write IOException.  This method will never return true in
109      * production use.
110      */
continueAfterWriteException()111     public static boolean continueAfterWriteException() {
112         return IO_EXCEPTION_TESTING_ON_WRITE && !THROW_RRE_FOR_UNIT_TESTS;
113     }
114 
115     /*
116      * The number of writes that have been performed.
117      *
118      * public so that unit tests can diddle them.
119      */
120     public static long WRITE_COUNT = 0;
121 
122     /*
123      * The write count value where we should stop or throw.
124      */
125     public static long STOP_ON_WRITE_COUNT = Long.MAX_VALUE;
126 
127     /*
128      * If we're throwing, then throw on write #'s WRITE_COUNT through
129      * WRITE_COUNT + N_BAD_WRITES - 1 (inclusive).
130      */
131     public static long N_BAD_WRITES = Long.MAX_VALUE;
132 
133     /*
134      * If true, then throw an IOException on write #'s WRITE_COUNT through
135      * WRITE_COUNT + N_BAD_WRITES - 1 (inclusive).
136      */
137     public static boolean THROW_ON_WRITE = false;
138 
139     public static final String JE_SUFFIX = ".jdb";  // regular log files
140     public static final String DEL_SUFFIX = ".del";  // cleaned files
141     public static final String BAD_SUFFIX = ".bad";  // corrupt files
142     private static final String LOCK_FILE = "je.lck";// lock file
143     static final String[] DEL_SUFFIXES = { DEL_SUFFIX };
144     static final String[] JE_SUFFIXES = { JE_SUFFIX };
145     private static final String[] JE_AND_DEL_SUFFIXES =
146     { JE_SUFFIX, DEL_SUFFIX };
147 
148     /*
149      * The suffix used to denote a file that is in the process of being
150      * transferred during a network backup. The file may not have been
151      * completely transferred, or its digest verified.
152      */
153     public static final String TMP_SUFFIX = ".tmp";
154 
155     /*
156      * The suffix used to rename files out of the way, if they are being
157      * retained during a backup. Note that the suffix is used in conjunction
158      * with a backup number as described in <code>NetworkBackup</code>
159      */
160     public static final String BUP_SUFFIX = ".bup";
161 
162     /* May be set to false to speed unit tests. */
163     private boolean syncAtFileEnd = true;
164 
165     private final EnvironmentImpl envImpl;
166     private final long maxFileSize;
167     private final File dbEnvHome;
168     private final File[] dbEnvDataDirs;
169 
170     /* True if .del files should be included in the list of log files. */
171     private boolean includeDeletedFiles = false;
172 
173     /* File cache */
174     private final FileCache fileCache;
175 
176     private FileCacheWarmer fileCacheWarmer;
177 
178     /* The channel and lock for the je.lck file. */
179     private RandomAccessFile lockFile;
180     private FileChannel channel;
181     private FileLock envLock;
182     private FileLock exclLock;
183 
184     /* True if all files should be opened readonly. */
185     private final boolean readOnly;
186 
187     /* Handles onto log position */
188     private long currentFileNum;     // number of the current file
189     private long nextAvailableLsn;   // nextLSN is the next one available
190     private long lastUsedLsn;        // last LSN used in the current log file
191     private long prevOffset;         // Offset to use for the previous pointer
192     private boolean forceNewFile;    // Force new file on next write
193 
194     /*
195      * Saved versions of above.  Save this in case a write causes an
196      * IOException, we can back the log up to the last known good LSN.
197      */
198     private long savedCurrentFileNum;
199     private long savedNextAvailableLsn; // nextLSN is the next one available
200     private long savedLastUsedLsn;   // last LSN used in the current log file
201     private long savedPrevOffset;    // Offset to use for the previous pointer
202     private boolean savedForceNewFile;
203 
204     /* endOfLog is used for writes and fsyncs to the end of the log. */
205     private final LogEndFileDescriptor endOfLog;
206 
207     /*
208      * When we bump the LSNs over to a new file, we must remember the last LSN
209      * of the previous file so we can set the prevOffset field of the file
210      * header appropriately. We have to save it in a map because there's a time
211      * lag between when we know what the last LSN is and when we actually do
212      * the file write, because LSN bumping is done before we get a write
213      * buffer.  This map is keyed by file num->last LSN.
214      */
215     private final Map<Long, Long> perFileLastUsedLsn;
216 
217     /*
218      * True if we should use the Write Queue.  This queue is enabled by default
219      * and contains any write() operations which were attempted but would have
220      * blocked because an fsync() or another write() was in progress at the
221      * time.  The operations on the Write Queue are later executed by the next
222      * operation that is able to grab the fsync latch.  File systems like ext3
223      * need this queue in order to achieve reasonable throughput since it
224      * acquires an exclusive mutex on the inode during any IO operation
225      * (seek/read/write/fsync).  OS's like Windows and Solaris do not since
226      * they are able to handle concurrent IO operations on a single file.
227      */
228     private final boolean useWriteQueue;
229 
230     /* The starting size of the Write Queue. */
231     private final int writeQueueSize;
232 
233     /*
234      * Use O_DSYNC to open JE log files.
235      */
236     private final boolean useODSYNC;
237 
238     /* public for unit tests. */
239     public boolean VERIFY_CHECKSUMS = false;
240 
241     /*
242      * Non-0 means to use envHome/data001 through envHome/data00N for the
243      * environment directories, where N is nDataDirs.  Distribute *.jdb files
244      * through dataNNN directories round-robin.
245      */
246     private final int nDataDirs;
247 
248     /*
249      * Last file to which any IO was done.
250      */
251     long lastFileNumberTouched = -1;
252 
253     /*
254      * Current file offset of lastFile.
255      */
256     long lastFileTouchedOffset = 0;
257 
258     /*
259      * For IO stats, this is a measure of what is "close enough" to constitute
260      * a sequential IO vs a random IO. 1MB for now.  Generally a seek within a
261      * few tracks of the current disk track is "fast" and only requires a
262      * single rotational latency.
263      */
264     private static final long ADJACENT_TRACK_SEEK_DELTA = 1 << 20;
265 
266     /*
267      * Stats
268      */
269     final StatGroup stats;
270     final LongStat nRandomReads;
271     final LongStat nRandomWrites;
272     final LongStat nSequentialReads;
273     final LongStat nSequentialWrites;
274     final LongStat nRandomReadBytes;
275     final LongStat nRandomWriteBytes;
276     final LongStat nSequentialReadBytes;
277     final LongStat nSequentialWriteBytes;
278     final IntStat nFileOpens;
279     final IntStat nOpenFiles;
280     final LongStat nBytesReadFromWriteQueue;
281     final LongStat nBytesWrittenFromWriteQueue;
282     final LongStat nReadsFromWriteQueue;
283     final LongStat nWritesFromWriteQueue;
284     final LongStat nWriteQueueOverflow;
285     final LongStat nWriteQueueOverflowFailures;
286     /* all fsyncs, includes those issued for group commit */
287     final LongStat nLogFSyncs;
288     final LongStat nFSyncTime;
289 
290     /**
291      * Set up the file cache and initialize the file manager to point to the
292      * beginning of the log.
293      *
294      * @param dbEnvHome environment home directory
295      *
296      * @throws IllegalArgumentException via Environment ctor
297      *
298      * @throws EnvironmentLockedException via Environment ctor
299      */
FileManager(EnvironmentImpl envImpl, File dbEnvHome, boolean readOnly)300     public FileManager(EnvironmentImpl envImpl,
301                        File dbEnvHome,
302                        boolean readOnly)
303         throws EnvironmentLockedException {
304 
305         this.envImpl = envImpl;
306         this.dbEnvHome = dbEnvHome;
307         this.readOnly = readOnly;
308 
309         boolean success = false;
310 
311         stats = new StatGroup(LogStatDefinition.FILEMGR_GROUP_NAME,
312                               LogStatDefinition.FILEMGR_GROUP_DESC);
313         nRandomReads = new LongStat(stats, FILEMGR_RANDOM_READS);
314         nRandomWrites = new LongStat(stats, FILEMGR_RANDOM_WRITES);
315         nSequentialReads = new LongStat(stats, FILEMGR_SEQUENTIAL_READS);
316         nSequentialWrites = new LongStat(stats, FILEMGR_SEQUENTIAL_WRITES);
317         nRandomReadBytes = new LongStat(stats, FILEMGR_RANDOM_READ_BYTES);
318         nRandomWriteBytes = new LongStat(stats, FILEMGR_RANDOM_WRITE_BYTES);
319         nSequentialReadBytes =
320             new LongStat(stats, FILEMGR_SEQUENTIAL_READ_BYTES);
321         nSequentialWriteBytes =
322             new LongStat(stats, FILEMGR_SEQUENTIAL_WRITE_BYTES);
323         nFileOpens = new IntStat(stats, FILEMGR_FILE_OPENS);
324         nOpenFiles = new IntStat(stats, FILEMGR_OPEN_FILES);
325         nBytesReadFromWriteQueue =
326             new LongStat(stats, FILEMGR_BYTES_READ_FROM_WRITEQUEUE);
327         nBytesWrittenFromWriteQueue =
328             new LongStat(stats, FILEMGR_BYTES_WRITTEN_FROM_WRITEQUEUE);
329         nReadsFromWriteQueue =
330             new LongStat(stats, FILEMGR_READS_FROM_WRITEQUEUE);
331         nWritesFromWriteQueue =
332             new LongStat(stats, FILEMGR_WRITES_FROM_WRITEQUEUE);
333         nWriteQueueOverflow = new LongStat(stats, FILEMGR_WRITEQUEUE_OVERFLOW);
334         nWriteQueueOverflowFailures =
335             new LongStat(stats, FILEMGR_WRITEQUEUE_OVERFLOW_FAILURES);
336         nLogFSyncs = new LongStat(stats, FILEMGR_LOG_FSYNCS);
337         nFSyncTime = new LongStat(stats, LogStatDefinition.GRPCMGR_FSYNC_TIME);
338 
339         try {
340             /* Read configurations. */
341             DbConfigManager configManager = envImpl.getConfigManager();
342             maxFileSize =
343                 configManager.getLong(EnvironmentParams.LOG_FILE_MAX);
344 
345             useWriteQueue = configManager.getBoolean
346                 (EnvironmentParams.LOG_USE_WRITE_QUEUE);
347             writeQueueSize = configManager.getInt
348                 (EnvironmentParams.LOG_WRITE_QUEUE_SIZE);
349             useODSYNC = configManager.getBoolean
350                 (EnvironmentParams.LOG_USE_ODSYNC);
351             VERIFY_CHECKSUMS = configManager.getBoolean
352                 (EnvironmentParams.LOG_VERIFY_CHECKSUMS);
353             nDataDirs =
354                 configManager.getInt(EnvironmentParams.LOG_N_DATA_DIRECTORIES);
355             if (nDataDirs != 0) {
356                 dbEnvDataDirs = gatherDataDirs();
357             } else {
358                 checkNoDataDirs();
359                 dbEnvDataDirs = null;
360             }
361 
362             if (!envImpl.isMemOnly()) {
363                 if (!dbEnvHome.exists()) {
364                     throw new IllegalArgumentException
365                         ("Environment home " + dbEnvHome + " doesn't exist");
366                 }
367                 if (!lockEnvironment(readOnly, false)) {
368                     throw new EnvironmentLockedException
369                         (envImpl,
370                          "The environment cannot be locked for " +
371                          (readOnly ? "shared" : "single writer") + " access.");
372                 }
373             }
374 
375             /* Cache of files. */
376             fileCache = new FileCache(configManager);
377 
378             /* Start out as if no log existed. */
379             currentFileNum = 0L;
380             nextAvailableLsn =
381                 DbLsn.makeLsn(currentFileNum, firstLogEntryOffset());
382             lastUsedLsn = DbLsn.NULL_LSN;
383             perFileLastUsedLsn =
384                 Collections.synchronizedMap(new HashMap<Long, Long>());
385             prevOffset = 0L;
386             endOfLog = new LogEndFileDescriptor();
387             forceNewFile = false;
388             saveLastPosition();
389 
390             final String stopOnWriteCountName = "je.debug.stopOnWriteCount";
391             final String stopOnWriteCountProp =
392                 System.getProperty(stopOnWriteCountName);
393             if (stopOnWriteCountProp != null) {
394                 try {
395                     STOP_ON_WRITE_COUNT = Long.parseLong(stopOnWriteCountProp);
396                 } catch (NumberFormatException e) {
397                     throw new IllegalArgumentException
398                         ("Could not parse: " + stopOnWriteCountName, e);
399                 }
400             }
401 
402             final String stopOnWriteActionName = "je.debug.stopOnWriteAction";
403             final String stopOnWriteActionProp =
404                 System.getProperty(stopOnWriteActionName);
405             if (stopOnWriteActionProp != null) {
406                 if (stopOnWriteActionProp.compareToIgnoreCase("throw") == 0) {
407                     THROW_ON_WRITE = true;
408                 } else if (stopOnWriteActionProp.
409                            compareToIgnoreCase("stop") == 0) {
410                     THROW_ON_WRITE = false;
411                 } else {
412                     throw new IllegalArgumentException
413                         ("Unknown value for: " + stopOnWriteActionName  +
414                          stopOnWriteActionProp);
415                 }
416             }
417 
418             success = true;
419         } finally {
420             if (!success) {
421                 try {
422                     close();
423                 } catch (IOException e) {
424 
425                     /*
426                      * Klockwork - ok
427                      * Eat it, we want to throw the original exception.
428                      */
429                 }
430             }
431         }
432     }
433 
434     /**
435      * Set the file manager's "end of log".
436      *
437      * @param nextAvailableLsn LSN to be used for the next log entry
438      * @param lastUsedLsn last LSN to have a valid entry, may be null
439      * @param prevOffset value to use for the prevOffset of the next entry.
440      *  If the beginning of the file, this is 0.
441      */
setLastPosition(long nextAvailableLsn, long lastUsedLsn, long prevOffset)442     public void setLastPosition(long nextAvailableLsn,
443                                 long lastUsedLsn,
444                                 long prevOffset) {
445         this.lastUsedLsn = lastUsedLsn;
446         perFileLastUsedLsn.put(Long.valueOf(DbLsn.getFileNumber(lastUsedLsn)),
447                                Long.valueOf(lastUsedLsn));
448         this.nextAvailableLsn = nextAvailableLsn;
449         currentFileNum = DbLsn.getFileNumber(this.nextAvailableLsn);
450         this.prevOffset = prevOffset;
451         saveLastPosition();
452     }
453 
454     /*
455      * Cause the current LSN state to be saved in case we fail after we have
456      * bumped the LSN pointer but before we've successfully marshalled into the
457      * log buffer.
458      */
saveLastPosition()459     void saveLastPosition() {
460         savedNextAvailableLsn = nextAvailableLsn;
461         savedLastUsedLsn = lastUsedLsn;
462         savedPrevOffset = prevOffset;
463         savedForceNewFile = forceNewFile;
464         savedCurrentFileNum = currentFileNum;
465     }
466 
restoreLastPosition()467     void restoreLastPosition() {
468         nextAvailableLsn = savedNextAvailableLsn;
469         lastUsedLsn = savedLastUsedLsn;
470         prevOffset = savedPrevOffset;
471         forceNewFile = savedForceNewFile;
472         currentFileNum = savedCurrentFileNum;
473     }
474 
475     /**
476      * May be used to disable sync at file end to speed unit tests.
477      * Must only be used for unit testing, since log corruption may result.
478      */
setSyncAtFileEnd(boolean sync)479     public void setSyncAtFileEnd(boolean sync) {
480         syncAtFileEnd = sync;
481     }
482 
483     /*
484      * File management
485      */
486 
487     /**
488      * public for cleaner.
489      *
490      * @return the number of the first file in this environment.
491      */
getFirstFileNum()492     public Long getFirstFileNum() {
493         return getFileNum(true);
494     }
495 
getReadOnly()496     public boolean getReadOnly() {
497         return readOnly;
498     }
499 
500     /**
501      * @return the number of the last file in this environment.
502      */
getLastFileNum()503     public Long getLastFileNum() {
504         return getFileNum(false);
505     }
506 
507     /**
508      * Returns the highest (current) file number.  Because a long value cannot
509      * be read atomically without synchronization, this method should be called
510      * while holding the log write latch.
511      */
getCurrentFileNum()512     public long getCurrentFileNum() {
513         return currentFileNum;
514     }
515 
516     /**
517      * For unit tests.
518      */
getUseWriteQueue()519     boolean getUseWriteQueue() {
520         return useWriteQueue;
521     }
522 
523     /**
524      * For assertions that check whether a file is valid or has been deleted
525      * via log cleaning.
526      */
isFileValid(long fileNum)527     public boolean isFileValid(long fileNum) {
528 
529         /*
530          * If the file is the current file, it may be buffered and not yet
531          * created.  If the env is memory-only, we will never create or delete
532          * log files.
533          */
534         if (fileNum == currentFileNum || envImpl.isMemOnly()) {
535             return true;
536         }
537 
538         /* Check for file existence. */
539         String fileName = getFullFileName(fileNum, FileManager.JE_SUFFIX);
540         File file = new File(fileName);
541         return file.exists();
542     }
543 
setIncludeDeletedFiles(boolean includeDeletedFiles)544     public void setIncludeDeletedFiles(boolean includeDeletedFiles) {
545         this.includeDeletedFiles = includeDeletedFiles;
546     }
547 
548     /**
549      * Get all JE file numbers.
550      * @return an array of all JE file numbers.
551      */
getAllFileNumbers()552     public Long[] getAllFileNumbers() {
553         /* Get all the names in sorted order. */
554         String[] names = listFileNames(JE_SUFFIXES);
555         Long[] nums = new Long[names.length];
556         for (int i = 0; i < nums.length; i += 1) {
557             String name = names[i];
558             long num = nums[i] = getNumFromName(name);
559             if (nDataDirs != 0) {
560                 int dbEnvDataDirsIdx = getDataDirIndexFromName(name) - 1;
561                 if (dbEnvDataDirsIdx != (num % nDataDirs)) {
562                     throw EnvironmentFailureException.unexpectedState
563                         ("Found file " + name + " but it should have been in " +
564                          "data directory " + (dbEnvDataDirsIdx + 1) +
565                          ". Perhaps it was moved or restored incorrectly?");
566                 }
567             }
568         }
569         return nums;
570     }
571 
572     /**
573      * Get the next file number before/after currentFileNum.
574      * @param currentFileNum1 the file we're at right now. Note that
575      * it may not exist, if it's been cleaned and renamed.
576      * @param forward if true, we want the next larger file, if false
577      * we want the previous file
578      * @return null if there is no following file, or if filenum doesn't exist
579      */
getFollowingFileNum(long currentFileNum1, boolean forward)580     public Long getFollowingFileNum(long currentFileNum1, boolean forward) {
581         /* Get all the names in sorted order. */
582         String[] names = listFileNames(JE_SUFFIXES);
583 
584         /* Search for the current file. */
585         String searchName = getFileName(currentFileNum1, JE_SUFFIX);
586         int foundIdx = Arrays.binarySearch(names, searchName, stringComparator);
587 
588         boolean foundTarget = false;
589         if (foundIdx >= 0) {
590             if (forward) {
591                 foundIdx++;
592             } else {
593                 foundIdx--;
594             }
595         } else {
596 
597             /*
598              * currentFileNum not found (might have been cleaned). FoundIdx
599              * will be (-insertionPoint - 1).
600              */
601             foundIdx = Math.abs(foundIdx + 1);
602             if (!forward) {
603                 foundIdx--;
604             }
605         }
606 
607         /* The current fileNum is found, return the next or prev file. */
608         if (forward && (foundIdx < names.length)) {
609             foundTarget = true;
610         } else if (!forward && (foundIdx > -1)) {
611             foundTarget = true;
612         }
613 
614         if (foundTarget) {
615             return getNumFromName(names[foundIdx]);
616         }
617         return null;
618     }
619 
620     /**
621      * @return true if there are any files at all.
622      */
filesExist()623     public boolean filesExist() {
624         String[] names = listFileNames(JE_SUFFIXES);
625         return (names.length != 0);
626     }
627 
628     /**
629      * Get the first or last file number in the set of JE files.
630      *
631      * @param first if true, get the first file, else get the last file
632      * @return the file number or null if no files exist
633      */
getFileNum(boolean first)634     private Long getFileNum(boolean first) {
635         String[] names = listFileNames(JE_SUFFIXES);
636         if (names.length == 0) {
637             return null;
638         }
639         int index = 0;
640         if (!first) {
641             index = names.length - 1;
642         }
643         return getNumFromName(names[index]);
644     }
645 
646     /**
647      * Get the data dir index from a file name.
648      *
649      * @return index into dbEnvDataDirs of this fileName's data directory.
650      * -1 if multiple data directories are not being used.
651      */
getDataDirIndexFromName(String fileName)652     private int getDataDirIndexFromName(String fileName) {
653         if (nDataDirs == 0) {
654             return -1;
655         }
656 
657         int dataDirEnd = fileName.lastIndexOf(File.separator);
658         String dataDir = fileName.substring(0, dataDirEnd);
659         return Integer.valueOf
660             (Integer.parseInt(dataDir.substring("data".length())));
661     }
662 
663     /**
664      * Get the file number from a file name.
665      *
666      * @param fileName the file name
667      * @return the file number
668      */
getNumFromName(String fileName)669     public Long getNumFromName(String fileName) {
670         String name = fileName;
671         if (nDataDirs != 0) {
672             name = name.substring(name.lastIndexOf(File.separator) + 1);
673         }
674         String fileNumber = name.substring(0, name.indexOf("."));
675         return Long.valueOf(Long.parseLong(fileNumber, 16));
676     }
677 
678     /**
679      * Find JE files. Return names sorted in ascending fashion.
680      * @param suffixes which type of file we're looking for
681      * @return array of file names
682      *
683      * Used by unit tests so package protection.
684      */
listFileNames(String[] suffixes)685     String[] listFileNames(String[] suffixes) {
686         JEFileFilter fileFilter = new JEFileFilter(suffixes);
687         return listFileNamesInternal(fileFilter);
688     }
689 
690     /**
691      * Find .jdb files which are >= the minimimum file number and
692      * <= the maximum file number.
693      * Return names sorted in ascending fashion.
694      *
695      * @return array of file names
696      */
listFileNames(long minFileNumber, long maxFileNumber)697     public String[] listFileNames(long minFileNumber, long maxFileNumber) {
698         JEFileFilter fileFilter =
699             new JEFileFilter(JE_SUFFIXES, minFileNumber, maxFileNumber);
700         return listFileNamesInternal(fileFilter);
701     }
702 
703     private static Comparator<File> fileComparator =
704         new Comparator<File>() {
705 
706         private String getFileNum(File file) {
707             String fname = file.toString();
708             return fname.substring(fname.indexOf(File.separator) + 1);
709         }
710 
711         public int compare(File o1, File o2) {
712             String fnum1 = getFileNum(o1);
713             String fnum2 = getFileNum(o2);
714             return o1.compareTo(o2);
715         }
716     };
717 
718     private static Comparator<String> stringComparator =
719         new Comparator<String>() {
720 
721         private String getFileNum(String fname) {
722             return fname.substring(fname.indexOf(File.separator) + 1);
723         }
724 
725         public int compare(String o1, String o2) {
726             String fnum1 = getFileNum(o1);
727             String fnum2 = getFileNum(o2);
728             return fnum1.compareTo(fnum2);
729         }
730     };
731 
732     /**
733      * Find JE files, flavor for unit test support.
734      *
735      * @param suffixes which type of file we're looking for
736      * @return array of file names
737      */
listFiles(File envDirFile, String[] suffixes, boolean envMultiSubDir)738     public static String[] listFiles(File envDirFile,
739                                      String[] suffixes,
740                                      boolean envMultiSubDir) {
741         String[] names = envDirFile.list(new JEFileFilter(suffixes));
742 
743         ArrayList<String> subFileNames = new ArrayList<String>();
744         if (envMultiSubDir) {
745             for (File file : envDirFile.listFiles()) {
746                 if (file.isDirectory() && file.getName().startsWith("data")) {
747                     File[] subFiles =
748                         file.listFiles(new JEFileFilter(suffixes));
749                     for (File subFile : subFiles) {
750                         subFileNames.add(file.getName() +
751                                          File.separator + subFile.getName());
752                     }
753                 }
754             }
755 
756             String[] totalFileNames =
757                 new String[names.length + subFileNames.size()];
758             for (int i = 0; i < totalFileNames.length; i++) {
759                 if (i < names.length) {
760                     totalFileNames[i] = names[i];
761                 } else {
762                     totalFileNames[i] = subFileNames.get(i - names.length);
763                 }
764             }
765             names = totalFileNames;
766         }
767 
768         if (names != null) {
769             Arrays.sort(names, stringComparator);
770         } else {
771             names = new String[0];
772         }
773 
774         return names;
775     }
776 
listJDBFiles()777     public File[] listJDBFiles() {
778         if (nDataDirs == 0) {
779             return listJDBFilesInternalSingleDir(new JEFileFilter(JE_SUFFIXES));
780         } else {
781             return listJDBFilesInternalMultiDir(new JEFileFilter(JE_SUFFIXES));
782         }
783     }
784 
listJDBFilesInternalSingleDir(JEFileFilter fileFilter)785     public File[] listJDBFilesInternalSingleDir(JEFileFilter fileFilter) {
786         File[] files = dbEnvHome.listFiles(fileFilter);
787         if (files != null) {
788             Arrays.sort(files);
789         } else {
790             files = new File[0];
791         }
792 
793         return files;
794     }
795 
listJDBFilesInternalMultiDir(JEFileFilter fileFilter)796     public File[] listJDBFilesInternalMultiDir(JEFileFilter fileFilter) {
797         File[][] files = new File[nDataDirs][];
798         int nTotalFiles = 0;
799         int i = 0;
800         for (File envDir : dbEnvDataDirs) {
801             files[i] = envDir.listFiles(fileFilter);
802             nTotalFiles += files[i].length;
803             i++;
804         }
805 
806         if (nTotalFiles == 0) {
807             return new File[0];
808         }
809 
810         File[] ret = new File[nTotalFiles];
811         i = 0;
812         for (File[] envFiles : files) {
813             for (File envFile : envFiles) {
814                 ret[i++] = envFile;
815             }
816         }
817 
818         Arrays.sort(ret, fileComparator);
819         return ret;
820     }
821 
listFileNamesInternal(JEFileFilter fileFilter)822     private String[] listFileNamesInternal(JEFileFilter fileFilter) {
823         if (nDataDirs == 0) {
824             return listFileNamesInternalSingleDir(fileFilter);
825         } else {
826             return listFileNamesInternalMultiDirs(fileFilter);
827         }
828     }
829 
listFileNamesInternalSingleDir(JEFileFilter fileFilter)830     private String[] listFileNamesInternalSingleDir(JEFileFilter fileFilter) {
831         String[] fileNames = dbEnvHome.list(fileFilter);
832         if (fileNames != null) {
833             Arrays.sort(fileNames);
834         } else {
835             fileNames = new String[0];
836         }
837         return fileNames;
838     }
839 
listFileNamesInternalMultiDirs(JEFileFilter filter)840     private String[] listFileNamesInternalMultiDirs(JEFileFilter filter) {
841         String[][] files = new String[nDataDirs][];
842         int nTotalFiles = 0;
843         int i = 0;
844         for (File envDir : dbEnvDataDirs) {
845             files[i] = envDir.list(filter);
846 
847             String envDirName = envDir.toString();
848             String dataDirName = envDirName.
849                 substring(envDirName.lastIndexOf(File.separator) + 1);
850 
851             for (int j = 0; j < files[i].length; j += 1) {
852                 files[i][j] = dataDirName + File.separator + files[i][j];
853             }
854 
855             nTotalFiles += files[i].length;
856             i++;
857         }
858 
859         if (nTotalFiles == 0) {
860             return new String[0];
861         }
862 
863         String[] ret = new String[nTotalFiles];
864         i = 0;
865         for (String[] envFiles : files) {
866             for (String envFile : envFiles) {
867                 ret[i++] = envFile;
868             }
869         }
870 
871         Arrays.sort(ret, stringComparator);
872         return ret;
873     }
874 
checkNoDataDirs()875     private void checkNoDataDirs() {
876         String[] dataDirNames =
877             dbEnvHome.list(new FilenameFilter() {
878                     public boolean accept(File dir, String name) {
879                         /* We'll validate the subdirNum later. */
880                         return name != null &&
881                             name.length() == "dataNNN".length() &&
882                             name.startsWith("data");
883                     }
884                 }
885                 );
886         if (dataDirNames != null && dataDirNames.length != 0) {
887             throw EnvironmentFailureException.unexpectedState
888                 (EnvironmentParams.LOG_N_DATA_DIRECTORIES.getName() +
889                  " was not set and expected to find no" +
890                  " data directories, but found " +
891                  dataDirNames.length + " data directories instead.");
892         }
893     }
894 
gatherDataDirs()895     public File[] gatherDataDirs() {
896         String[] dataDirNames =
897             dbEnvHome.list(new FilenameFilter() {
898                     public boolean accept(File dir, String name) {
899                         /* We'll validate the subdirNum later. */
900                         return name != null &&
901                             name.length() == "dataNNN".length() &&
902                             name.startsWith("data");
903                     }
904                 }
905                 );
906         if (dataDirNames != null) {
907             Arrays.sort(dataDirNames);
908         } else {
909             dataDirNames = new String[0];
910         }
911 
912         if (dataDirNames.length != nDataDirs) {
913             throw EnvironmentFailureException.unexpectedState
914                 (EnvironmentParams.LOG_N_DATA_DIRECTORIES.getName() +
915                  " was set and expected to find " + nDataDirs +
916                  " data directories, but found " +
917                  dataDirNames.length + " instead.");
918         }
919 
920         int ddNum = 1;
921         File[] dataDirs = new File[nDataDirs];
922         for (String fn : dataDirNames) {
923             String subdirNumStr = fn.substring(4);
924             try {
925                 int subdirNum = Integer.parseInt(subdirNumStr);
926                 if (subdirNum != ddNum) {
927                     throw EnvironmentFailureException.unexpectedState
928                         ("Expected to find data subdir: data" +
929                          paddedDirNum(ddNum) +
930                          " but found data" +
931                          subdirNumStr + " instead.");
932 
933                 }
934 
935                 File dataDir = new File(dbEnvHome, fn);
936                 if (!dataDir.exists()) {
937                     throw EnvironmentFailureException.unexpectedState
938                         ("Data dir: " + dataDir + " doesn't exist.");
939                 }
940                 if (!dataDir.isDirectory()) {
941                     throw EnvironmentFailureException.unexpectedState
942                         ("Data dir: " + dataDir + " is not a directory.");
943                 }
944                 dataDirs[ddNum - 1] = dataDir;
945             } catch (NumberFormatException E) {
946                     throw EnvironmentFailureException.unexpectedState
947                         ("Illegal data subdir: data" + subdirNumStr);
948             }
949             ddNum++;
950         }
951         return dataDirs;
952     }
953 
paddedDirNum(int dirNum)954     private String paddedDirNum(int dirNum) {
955         String paddedStr = "000" + dirNum;
956         int len = paddedStr.length();
957         return paddedStr.substring(len - 3);
958     }
959 
960     /**
961      * @return the full file name and path for the nth JE file.
962      */
getFullFileNames(long fileNum)963     String[] getFullFileNames(long fileNum) {
964         if (includeDeletedFiles) {
965             int nSuffixes = JE_AND_DEL_SUFFIXES.length;
966             String[] ret = new String[nSuffixes];
967             for (int i = 0; i < nSuffixes; i++) {
968                 ret[i] = getFullFileName(fileNum, JE_AND_DEL_SUFFIXES[i]);
969             }
970             return ret;
971         }
972         return new String[] { getFullFileName(fileNum, JE_SUFFIX) };
973     }
974 
getDataDir(long fileNum)975     private File getDataDir(long fileNum) {
976         return (nDataDirs == 0) ?
977             dbEnvHome :
978             dbEnvDataDirs[((int) (fileNum % nDataDirs))];
979     }
980 
getFullFileName(long fileNum)981     public String getFullFileName(long fileNum) {
982         return getFullFileName(fileNum, JE_SUFFIX);
983     }
984 
985     /**
986      * @return the full file name and path for this file name.
987      */
getFullFileName(long fileNum, String suffix)988     public String getFullFileName(long fileNum, String suffix) {
989         File dbEnvDataDir = getDataDir(fileNum);
990         return dbEnvDataDir + File.separator + getFileName(fileNum, suffix);
991     }
992 
993     /*
994      * Return the full file name of a specified log file name, including the
995      * sub directories names if needed.
996      */
getFullFileName(String fileName)997     public String getFullFileName(String fileName) {
998         final int suffixStartPos = fileName.indexOf(".");
999         String suffix = fileName.substring(suffixStartPos, fileName.length());
1000         assert suffix != null;
1001         String fileNum = fileName.substring(0, suffixStartPos);
1002 
1003         return getFullFileName
1004             (Long.valueOf(Long.parseLong(fileNum, 16)), suffix);
1005     }
1006 
1007     /**
1008      * @return the file name for the nth file.
1009      */
getFileName(long fileNum, String suffix)1010     public static String getFileName(long fileNum, String suffix) {
1011         return (getFileNumberString(fileNum) + suffix);
1012     }
1013 
1014     /** @return the file name for the nth log (*.jdb) file. */
getFileName(long fileNum)1015     public static String getFileName(long fileNum) {
1016         return getFileName(fileNum, JE_SUFFIX);
1017     }
1018 
1019     /**
1020      * HexFormatter generates a 0 padded string starting with 0x.  We want
1021      * the right most 8 digits, so start at 10.
1022      */
getFileNumberString(long fileNum)1023     private static String getFileNumberString(long fileNum) {
1024         return HexFormatter.formatLong(fileNum).substring(10);
1025     }
1026 
1027     /**
1028      * @return true if successful, false if File.renameTo returns false, which
1029      * can occur on Windows if the file was recently closed.
1030      */
renameFile(final long fileNum, final String newSuffix)1031     public boolean renameFile(final long fileNum, final String newSuffix)
1032         throws IOException, DatabaseException {
1033 
1034         return renameFile(fileNum, newSuffix, null) != null;
1035     }
1036 
1037     /**
1038      * Rename this file to NNNNNNNN.suffix. If that file already exists, try
1039      * NNNNNNNN.suffix.1, etc. Used for deleting files or moving corrupt files
1040      * aside.
1041      *
1042      * @param fileNum the file we want to move
1043      *
1044      * @param newSuffix the new file suffix
1045      *
1046      * @param subDir the data directory sub-directory to rename the file into.
1047      * The subDir must already exist. May be null to leave the file in its
1048      * current data directory.
1049      *
1050      * @return renamed File if successful, or null if File.renameTo returns
1051      * false, which can occur on Windows if the file was recently closed.
1052      */
renameFile(final long fileNum, final String newSuffix, final String subDir)1053     public File renameFile(final long fileNum,
1054                            final String newSuffix,
1055                            final String subDir)
1056         throws IOException {
1057 
1058         final File oldDir = getDataDir(fileNum);
1059         final String oldName = getFileName(fileNum);
1060         final File oldFile = new File(oldDir, oldName);
1061 
1062         final File newDir =
1063             (subDir != null) ? (new File(oldDir, subDir)) : oldDir;
1064 
1065         final String newName = getFileName(fileNum, newSuffix);
1066 
1067         String generation = "";
1068         int repeatNum = 0;
1069 
1070         while (true) {
1071             final File newFile = new File(newDir, newName + generation);
1072 
1073             if (newFile.exists()) {
1074                 repeatNum++;
1075                 generation = "." + repeatNum;
1076                 continue;
1077             }
1078 
1079             clearFileCache(fileNum);
1080 
1081             final boolean success = oldFile.renameTo(newFile);
1082             return success ? newFile : null;
1083         }
1084     }
1085 
1086     /**
1087      * Delete log file NNNNNNNN.
1088      *
1089      * @param fileNum the file we want to move
1090      *
1091      * @return true if successful, false if File.delete returns false, which
1092      * can occur on Windows if the file was recently closed.
1093      */
deleteFile(final long fileNum)1094     public boolean deleteFile(final long fileNum)
1095         throws IOException, DatabaseException {
1096 
1097         final String fileName = getFullFileNames(fileNum)[0];
1098         clearFileCache(fileNum);
1099         final File file = new File(fileName);
1100         return file.delete();
1101     }
1102 
1103     /**
1104      * Returns the log version for the given file.
1105      */
getFileLogVersion(long fileNum)1106     public int getFileLogVersion(long fileNum)
1107         throws DatabaseException  {
1108 
1109         try {
1110             FileHandle handle = getFileHandle(fileNum);
1111             int logVersion = handle.getLogVersion();
1112             handle.release();
1113             return logVersion;
1114         } catch (FileNotFoundException e) {
1115             throw new EnvironmentFailureException
1116                 (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, e);
1117         } catch (ChecksumException e) {
1118             throw new EnvironmentFailureException
1119                 (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, e);
1120         }
1121     }
1122 
1123     /**
1124      * Return a read only file handle that corresponds to this file number.
1125      * Retrieve it from the cache or open it anew and validate the file header.
1126      * This method takes a latch on this file, so that the file descriptor will
1127      * be held in the cache as long as it's in use.  When the user is done with
1128      * the file, the latch must be released.
1129      *
1130      * @param fileNum which file
1131      * @return the file handle for the existing or newly created file
1132      */
getFileHandle(long fileNum)1133     public FileHandle getFileHandle(long fileNum)
1134         throws FileNotFoundException, ChecksumException, DatabaseException  {
1135 
1136         /* Check the file cache for this file. */
1137         Long fileId = Long.valueOf(fileNum);
1138         FileHandle fileHandle = null;
1139 
1140         /**
1141          * Loop until we get an open FileHandle.
1142          */
1143         try {
1144             while (true) {
1145 
1146                 /*
1147                  * The file cache is intentionally not latched here so that
1148                  * it's not a bottleneck in the fast path.  We check that the
1149                  * file handle that we get back is really still open after we
1150                  * latch it down below.
1151                  */
1152                 fileHandle = fileCache.get(fileId);
1153 
1154                 /*
1155                  * If the file isn't in the cache, latch the cache and check
1156                  * again.  Under the latch, if the file is not in the cache we
1157                  * add it to the cache but do not open the file yet.  We latch
1158                  * the handle here, and open the file further below after
1159                  * releasing the cache latch.  This prevents blocking other
1160                  * threads that are opening other files while we open this
1161                  * file.  The latch on the handle blocks other threads waiting
1162                  * to open the same file, which is necessary.
1163                  */
1164                 boolean newHandle = false;
1165                 if (fileHandle == null) {
1166                     synchronized (fileCache) {
1167                         fileHandle = fileCache.get(fileId);
1168                         if (fileHandle == null) {
1169                             newHandle = true;
1170                             fileHandle = addFileHandle(fileId);
1171                         }
1172                     }
1173                 }
1174 
1175                 if (newHandle) {
1176 
1177                     /*
1178                      * Open the file with the fileHandle latched.  It was
1179                      * latched by addFileHandle above.
1180                      */
1181                     boolean success = false;
1182                     try {
1183                         openFileHandle(fileHandle, FileMode.READ_MODE,
1184                                        null /*existingHandle*/);
1185                         success = true;
1186                     } finally {
1187                         if (!success) {
1188                             /* An exception is in flight -- clean up. */
1189                             fileHandle.release();
1190                             clearFileCache(fileNum);
1191                         }
1192                     }
1193                 } else {
1194                     /*
1195                      * The handle was found in the cache.  Latch the fileHandle
1196                      * before checking getFile below and returning.
1197                      */
1198                     if (!fileHandle.latchNoWait()) {
1199 
1200                         /*
1201                          * But the handle was latched.  Rather than wait, let's
1202                          * just make a new transient handle.  It doesn't need
1203                          * to be latched, but it does need to be closed.
1204                          */
1205                         final FileHandle existingHandle = fileHandle;
1206                         fileHandle = new FileHandle(
1207                             envImpl, fileId, getFileNumberString(fileId)) {
1208                                 @Override
1209                                 public void release()
1210                                     throws DatabaseException {
1211 
1212                                     try {
1213                                         close();
1214                                     } catch (IOException E) {
1215                                         // Ignore
1216                                     }
1217                                 }
1218                             };
1219 
1220                         openFileHandle(fileHandle, FileMode.READ_MODE,
1221                                        existingHandle);
1222                     }
1223                 }
1224 
1225                 /*
1226                  * We may have obtained this file handle outside the file cache
1227                  * latch, so we have to test that the handle is still valid.
1228                  * If it's not, then loop back and try again.
1229                  */
1230                 if (fileHandle.getFile() == null) {
1231                     fileHandle.release();
1232                 } else {
1233                     break;
1234                 }
1235             }
1236         } catch (FileNotFoundException e) {
1237             /* Handle at higher levels. */
1238             throw e;
1239         } catch (IOException e) {
1240             throw new EnvironmentFailureException
1241                 (envImpl, EnvironmentFailureReason.LOG_READ, e);
1242         }
1243 
1244         return fileHandle;
1245     }
1246 
1247     /**
1248      * Creates a new FileHandle and adds it to the cache, but does not open
1249      * the file.
1250      * @return the latched FileHandle.
1251      */
addFileHandle(Long fileNum)1252     private FileHandle addFileHandle(Long fileNum)
1253         throws IOException, DatabaseException {
1254 
1255         FileHandle fileHandle =
1256             new FileHandle(envImpl, fileNum, getFileNumberString(fileNum));
1257         fileCache.add(fileNum, fileHandle);
1258         fileHandle.latch();
1259         return fileHandle;
1260     }
1261 
getAppropriateReadWriteMode()1262     private FileMode getAppropriateReadWriteMode() {
1263         if (useODSYNC) {
1264             return FileMode.READWRITE_ODSYNC_MODE;
1265         }
1266         return FileMode.READWRITE_MODE;
1267     }
1268 
1269     /**
1270      * Creates a new handle and opens it.  Does not add the handle to the
1271      * cache.
1272      */
makeFileHandle(long fileNum, FileMode mode)1273     private FileHandle makeFileHandle(long fileNum, FileMode mode)
1274         throws FileNotFoundException, ChecksumException {
1275 
1276         FileHandle fileHandle =
1277             new FileHandle(envImpl, fileNum, getFileNumberString(fileNum));
1278         openFileHandle(fileHandle, mode, null /*existingHandle*/);
1279         return fileHandle;
1280     }
1281 
1282     /**
1283      * Opens the file for the given handle and initializes it.
1284      *
1285      * @param existingHandle is an already open handle for the same file or
1286      * null.  If non-null it is used to avoid the cost of reading the file
1287      * header.
1288      */
openFileHandle(FileHandle fileHandle, FileMode mode, FileHandle existingHandle)1289     private void openFileHandle(FileHandle fileHandle,
1290                                 FileMode mode,
1291                                 FileHandle existingHandle)
1292         throws FileNotFoundException, ChecksumException {
1293 
1294         nFileOpens.increment();
1295         long fileNum = fileHandle.getFileNum();
1296         String[] fileNames = getFullFileNames(fileNum);
1297         RandomAccessFile newFile = null;
1298         String fileName = null;
1299         boolean success = false;
1300         try {
1301 
1302             /*
1303              * Open the file. Note that we are going to try a few names to open
1304              * this file -- we'll try for N.jdb, and if that doesn't exist and
1305              * we're configured to look for all types, we'll look for N.del.
1306              */
1307             FileNotFoundException FNFE = null;
1308             for (String fileName2 : fileNames) {
1309                 fileName = fileName2;
1310                 try {
1311                     newFile = fileFactory.createFile(dbEnvHome, fileName,
1312                                                      mode.getModeValue());
1313                     break;
1314                 } catch (FileNotFoundException e) {
1315                     /* Save the first exception thrown. */
1316                     if (FNFE == null) {
1317                         FNFE = e;
1318                     }
1319                 }
1320             }
1321 
1322             /*
1323              * If we didn't find the file or couldn't create it, rethrow the
1324              * exception.
1325              */
1326             if (newFile == null) {
1327         	assert FNFE != null;
1328                 throw FNFE;
1329             }
1330 
1331             /*
1332              * If there is an existing open handle, there is no need to read or
1333              * validate the header.  Note that the log version is zero if the
1334              * existing handle is not fully initialized.
1335              */
1336             if (existingHandle != null) {
1337                 final int logVersion = existingHandle.getLogVersion();
1338                 if (logVersion > 0) {
1339                     fileHandle.init(newFile, logVersion);
1340                     success = true;
1341                     return;
1342                 }
1343             }
1344 
1345             int logVersion = LogEntryType.LOG_VERSION;
1346 
1347             if (newFile.length() == 0) {
1348 
1349                 /*
1350                  * If the file is empty, reinitialize it if we can. If not,
1351                  * send the file handle back up; the calling code will deal
1352                  * with the fact that there's nothing there.
1353                  */
1354                 if (mode.isWritable()) {
1355                     /* An empty file, write a header. */
1356                     long lastLsn = DbLsn.longToLsn(perFileLastUsedLsn.remove
1357                        (Long.valueOf(fileNum - 1)));
1358                     long headerPrevOffset = 0;
1359                     if (lastLsn != DbLsn.NULL_LSN) {
1360                         headerPrevOffset = DbLsn.getFileOffset(lastLsn);
1361                     }
1362                     if ((headerPrevOffset == 0) &&
1363                         (fileNum > 1) &&
1364                         syncAtFileEnd) {
1365                         /* Get more info if this happens again. [#20732] */
1366                         throw EnvironmentFailureException.unexpectedState
1367                             (envImpl,
1368                              "Zero prevOffset fileNum=0x" +
1369                              Long.toHexString(fileNum) +
1370                              " lastLsn=" + DbLsn.getNoFormatString(lastLsn) +
1371                              " perFileLastUsedLsn=" + perFileLastUsedLsn +
1372                              " fileLen=" + newFile.length());
1373                     }
1374                     FileHeader fileHeader =
1375                         new FileHeader(fileNum, headerPrevOffset);
1376                     writeFileHeader(newFile, fileName, fileHeader, fileNum);
1377                 }
1378             } else {
1379                 /* A non-empty file, check the header */
1380                 logVersion =
1381                     readAndValidateFileHeader(newFile, fileName, fileNum);
1382             }
1383             fileHandle.init(newFile, logVersion);
1384             success = true;
1385         } catch (FileNotFoundException e) {
1386             /* Handle at higher levels. */
1387             throw e;
1388         } catch (IOException e) {
1389             throw new EnvironmentFailureException
1390                 (envImpl, EnvironmentFailureReason.LOG_READ,
1391                  "Couldn't open file " + fileName, e);
1392         } catch (DatabaseException e) {
1393 
1394             /*
1395              * Let this exception go as a checksum exception, so it sets the
1396              * run recovery state correctly.
1397              */
1398             closeFileInErrorCase(newFile);
1399             e.addErrorMessage("Couldn't open file " + fileName);
1400             throw e;
1401         } finally {
1402             if (!success) {
1403                 closeFileInErrorCase(newFile);
1404             }
1405         }
1406     }
1407 
1408     /**
1409      * Close this file and eat any exceptions. Used in catch clauses.
1410      */
closeFileInErrorCase(RandomAccessFile file)1411     private void closeFileInErrorCase(RandomAccessFile file) {
1412         try {
1413             if (file != null) {
1414                 file.close();
1415             }
1416         } catch (Exception e) {
1417         }
1418     }
1419 
1420     /**
1421      * Read the given JE log file and validate the header.
1422      *
1423      * @throws DatabaseException if the file header isn't valid
1424      *
1425      * @return file header log version.
1426      */
readAndValidateFileHeader(RandomAccessFile file, String fileName, long fileNum)1427     private int readAndValidateFileHeader(RandomAccessFile file,
1428                                           String fileName,
1429                                           long fileNum)
1430         throws ChecksumException, DatabaseException {
1431 
1432         /*
1433          * Read the file header from this file. It's always the first log
1434          * entry.
1435          *
1436          * The special UNKNOWN_FILE_HEADER_VERSION value is passed for reading
1437          * the entry header.  The actual log version is read as part of the
1438          * FileHeader entry.  [#16939]
1439          */
1440         LogManager logManager = envImpl.getLogManager();
1441         LogEntry headerEntry = logManager.getLogEntryAllowChecksumException
1442             (DbLsn.makeLsn(fileNum, 0), file,
1443              LogEntryType.UNKNOWN_FILE_HEADER_VERSION);
1444         FileHeader header = (FileHeader) headerEntry.getMainItem();
1445         return header.validate(envImpl, fileName, fileNum);
1446     }
1447 
1448     /**
1449      * Write a proper file header to the given file.
1450      */
writeFileHeader(RandomAccessFile file, String fileName, FileHeader header, long fileNum)1451     private void writeFileHeader(RandomAccessFile file,
1452                                  String fileName,
1453                                  FileHeader header,
1454                                  long fileNum)
1455         throws DatabaseException {
1456 
1457         /* Fail loudly if the environment is invalid. */
1458         envImpl.checkIfInvalid();
1459 
1460         /*
1461          * Fail silent if the environment is not open.
1462          */
1463         if (envImpl.mayNotWrite()) {
1464             return;
1465         }
1466 
1467         /* Write file header into this buffer in the usual log entry format. */
1468         LogEntry headerLogEntry =
1469             new FileHeaderEntry(LogEntryType.LOG_FILE_HEADER, header);
1470         ByteBuffer headerBuf = envImpl.getLogManager().
1471             putIntoBuffer(headerLogEntry,
1472                           0); // prevLogEntryOffset
1473 
1474         /* Write the buffer into the channel. */
1475         int bytesWritten;
1476         try {
1477             if (LOGWRITE_EXCEPTION_TESTING) {
1478                 generateLogWriteException(file, headerBuf, 0, fileNum);
1479             }
1480 
1481             /*
1482              * Always flush header so that file.length() will be non-zero when
1483              * this method returns and two threads won't attempt to create the
1484              * header. [#20732]
1485              */
1486             bytesWritten = writeToFile(file, headerBuf, 0, fileNum,
1487                                        true /*flushRequired*/);
1488 
1489             if (fileNum > savedCurrentFileNum) {
1490 
1491                 /*
1492                  * Writing the new file header succeeded without an IOE.  This
1493                  * can not be undone in the event of another IOE (Out Of Disk
1494                  * Space) on the next write so update the saved LSN state with
1495                  * the new info. Do not update the nextAvailableLsn with a
1496                  * smaller (earlier) LSN in case there's already something in a
1497                  * buffer that is after the new header. [#15754]
1498                  */
1499                 long lsnAfterHeader = DbLsn.makeLsn(fileNum, bytesWritten);
1500                 if (DbLsn.compareTo(nextAvailableLsn, lsnAfterHeader) < 0) {
1501                     nextAvailableLsn = lsnAfterHeader;
1502                 }
1503 
1504                 lastUsedLsn = DbLsn.makeLsn(fileNum, bytesWritten);
1505                 prevOffset = bytesWritten;
1506                 forceNewFile = false;
1507                 currentFileNum = fileNum;
1508                 saveLastPosition();
1509             }
1510         } catch (ClosedChannelException e) {
1511 
1512             /*
1513              * The channel should never be closed. It may be closed because
1514              * of an interrupt received by another thread. See SR [#10463]
1515              */
1516             throw new ThreadInterruptedException
1517                 (envImpl, "Channel closed, may be due to thread interrupt", e);
1518         } catch (IOException e) {
1519             /* Possibly an out of disk exception. */
1520             throw new LogWriteException(envImpl, e);
1521         }
1522 
1523         if (bytesWritten != headerLogEntry.getSize() +
1524             LogEntryHeader.MIN_HEADER_SIZE) {
1525             throw new EnvironmentFailureException
1526                 (envImpl, EnvironmentFailureReason.LOG_INTEGRITY,
1527                  "File " + fileName +
1528                  " was created with an incomplete header. Only " +
1529                  bytesWritten + " bytes were written.");
1530         }
1531     }
1532 
1533     /**
1534      * @return the prevOffset field stored in the file header.
1535      */
getFileHeaderPrevOffset(long fileNum)1536     long getFileHeaderPrevOffset(long fileNum)
1537         throws ChecksumException, DatabaseException {
1538 
1539         try {
1540             LogEntry headerEntry =
1541                 envImpl.getLogManager().getLogEntryAllowChecksumException
1542                     (DbLsn.makeLsn(fileNum, 0));
1543             FileHeader header = (FileHeader) headerEntry.getMainItem();
1544             return header.getLastEntryInPrevFileOffset();
1545         } catch (FileNotFoundException e) {
1546             throw new EnvironmentFailureException
1547                 (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, e);
1548         }
1549     }
1550 
1551     /*
1552      * Support for writing new log entries
1553      */
1554 
1555     /**
1556      * @return the file offset of the last LSN that was used. For constructing
1557      * the headers of log entries. If the last LSN that was used was in a
1558      * previous file, or this is the very first LSN of the whole system, return
1559      * 0.
1560      */
getPrevEntryOffset()1561     long getPrevEntryOffset() {
1562         return prevOffset;
1563     }
1564 
1565     /**
1566      * Increase the current log position by "size" bytes. Move the prevOffset
1567      * pointer along.
1568      *
1569      * @param size is an unsigned int
1570      * @return true if we flipped to the next log file.
1571      */
bumpLsn(long size)1572     boolean bumpLsn(long size) {
1573 
1574         /* Save copy of initial LSN state. */
1575         saveLastPosition();
1576 
1577         boolean flippedFiles = false;
1578 
1579         if (forceNewFile ||
1580             (DbLsn.getFileOffset(nextAvailableLsn) + size) > maxFileSize) {
1581 
1582             forceNewFile = false;
1583 
1584             /* Move to another file. */
1585             currentFileNum++;
1586 
1587             /* Remember the last used LSN of the previous file. */
1588             if (lastUsedLsn != DbLsn.NULL_LSN) {
1589                 perFileLastUsedLsn.put
1590                     (Long.valueOf(DbLsn.getFileNumber(lastUsedLsn)),
1591                      Long.valueOf(lastUsedLsn));
1592             }
1593             prevOffset = 0;
1594             lastUsedLsn =
1595                 DbLsn.makeLsn(currentFileNum, firstLogEntryOffset());
1596             flippedFiles = true;
1597         } else {
1598             if (lastUsedLsn == DbLsn.NULL_LSN) {
1599                 prevOffset = 0;
1600             } else {
1601                 prevOffset = DbLsn.getFileOffset(lastUsedLsn);
1602             }
1603             lastUsedLsn = nextAvailableLsn;
1604         }
1605         nextAvailableLsn =
1606             DbLsn.makeLsn(DbLsn.getFileNumber(lastUsedLsn),
1607                           (DbLsn.getFileOffset(lastUsedLsn) + size));
1608 
1609         return flippedFiles;
1610     }
1611 
1612     /**
1613      * Write out a log buffer to the file.
1614      * @param fullBuffer buffer to write
1615      * @param flushRequired true if this write can not be queued on the
1616      * Write Queue.
1617      */
writeLogBuffer(LogBuffer fullBuffer, boolean flushRequired)1618     void writeLogBuffer(LogBuffer fullBuffer, boolean flushRequired)
1619         throws DatabaseException {
1620 
1621         /* Fail loudly if the environment is invalid. */
1622         envImpl.checkIfInvalid();
1623 
1624         /*
1625          * Fail silent if the environment is not open.
1626          */
1627         if (envImpl.mayNotWrite()) {
1628             return;
1629         }
1630 
1631         /* Use the LSN to figure out what file to write this buffer to. */
1632         long firstLsn = fullBuffer.getFirstLsn();
1633 
1634         /*
1635          * Is there anything in this write buffer? We could have been called by
1636          * the environment shutdown, and nothing is actually in the buffer.
1637          */
1638         if (firstLsn != DbLsn.NULL_LSN) {
1639 
1640             RandomAccessFile file =
1641                 endOfLog.getWritableFile(DbLsn.getFileNumber(firstLsn), true);
1642             ByteBuffer data = fullBuffer.getDataBuffer();
1643 
1644             try {
1645 
1646                 /*
1647                  * Check that we do not overwrite unless the file only contains
1648                  * a header [#11915] [#12616].
1649                  */
1650                 assert fullBuffer.getRewriteAllowed() ||
1651                     (DbLsn.getFileOffset(firstLsn) >= file.length() ||
1652                      file.length() == firstLogEntryOffset()) :
1653                         "FileManager would overwrite non-empty file 0x" +
1654                         Long.toHexString(DbLsn.getFileNumber(firstLsn)) +
1655                         " lsnOffset=0x" +
1656                         Long.toHexString(DbLsn.getFileOffset(firstLsn)) +
1657                         " fileLength=0x" +
1658                         Long.toHexString(file.length());
1659 
1660                 if (IO_EXCEPTION_TESTING_ON_WRITE) {
1661                     throw new IOException("generated for testing (write)");
1662                 }
1663                 if (LOGWRITE_EXCEPTION_TESTING) {
1664                     generateLogWriteException
1665                         (file, data, DbLsn.getFileOffset(firstLsn),
1666                          DbLsn.getFileNumber(firstLsn));
1667                 }
1668                 writeToFile(file, data, DbLsn.getFileOffset(firstLsn),
1669                             DbLsn.getFileNumber(firstLsn),
1670                             flushRequired);
1671             } catch (ClosedChannelException e) {
1672 
1673                 /*
1674                  * The file should never be closed. It may be closed because
1675                  * of an interrupt received by another thread. See SR [#10463].
1676                  */
1677                 throw new ThreadInterruptedException
1678                     (envImpl, "File closed, may be due to thread interrupt",
1679                      e);
1680             } catch (IOException e) {
1681 
1682                 if (!continueAfterWriteException()) {
1683                     throw new LogWriteException(envImpl, e);
1684                 }
1685 
1686                 /*
1687                  * Possibly an out of disk exception, but java.io will only
1688                  * tell us IOException with no indication of whether it's out
1689                  * of disk or something else. Better support may exist in
1690                  * Java6.
1691                  *
1692                  * Since we can't tell what sectors were actually written to
1693                  * disk, we need to change any commit records that might have
1694                  * made it out to disk to abort records. If they made it to
1695                  * disk on the write, then rewriting should allow them to be
1696                  * rewritten. See [11271].
1697                  *
1698                  * Rewriting committed transactions in replication is highly
1699                  * problematic, and can lead to divergence between the replica
1700                  * and master. If this path is re-enabled, we must assess its
1701                  * impact in replication, since the log entries may already
1702                  * be sent to other nodes.
1703                  */
1704                 abortCommittedTxns(data);
1705                 try {
1706                     if (IO_EXCEPTION_TESTING_ON_WRITE) {
1707                         throw new IOException
1708                             ("generated for testing (write)");
1709                     }
1710                     writeToFile(file, data, DbLsn.getFileOffset(firstLsn),
1711                                 DbLsn.getFileNumber(firstLsn), flushRequired);
1712                 } catch (IOException e2) {
1713                     fullBuffer.setRewriteAllowed();
1714                     /* Use an exception that does not invalidate the env. */
1715                     throw EnvironmentFailureException.unexpectedException(e2);
1716                 }
1717             }
1718 
1719             assert EnvironmentImpl.maybeForceYield();
1720         }
1721     }
1722 
1723     /**
1724      * Write a buffer to a file at a given offset.
1725      */
writeToFile(RandomAccessFile file, ByteBuffer data, long destOffset, long fileNum, boolean flushRequired)1726     private int writeToFile(RandomAccessFile file,
1727                             ByteBuffer data,
1728                             long destOffset,
1729                             long fileNum,
1730                             boolean flushRequired)
1731         throws IOException, DatabaseException {
1732 
1733         int totalBytesWritten = 0;
1734 
1735         bumpWriteCount("write");
1736 
1737         int pos = data.position();
1738         int size = data.limit() - pos;
1739 
1740         if (lastFileNumberTouched == fileNum &&
1741             (Math.abs(destOffset - lastFileTouchedOffset) <
1742              ADJACENT_TRACK_SEEK_DELTA)) {
1743             nSequentialWrites.increment();
1744             nSequentialWriteBytes.add(size);
1745         } else {
1746             nRandomWrites.increment();
1747             nRandomWriteBytes.add(size);
1748         }
1749 
1750         if (VERIFY_CHECKSUMS) {
1751             verifyChecksums(data, destOffset, "pre-write");
1752         }
1753 
1754         /*
1755          * Perform a RandomAccessFile write and update the buffer position.
1756          * ByteBuffer.array() is safe to use since all non-direct ByteBuffers
1757          * have a backing array.
1758          *
1759          * Synchronization on the file object is needed because two threads may
1760          * call seek() on the same file object.
1761          *
1762          * If the Write Queue is enabled, attempt to get the fsync latch.  If
1763          * we can't get it, then an fsync or write is in progress and we'd
1764          * block anyway.  In that case, queue the write operation.
1765          */
1766         boolean fsyncLatchAcquired =
1767             endOfLog.fsyncFileSynchronizer.tryLock();
1768         boolean enqueueSuccess = false;
1769         if (!fsyncLatchAcquired &&
1770             useWriteQueue &&
1771             !flushRequired) {
1772             enqueueSuccess =
1773                 endOfLog.enqueueWrite(fileNum, data.array(), destOffset,
1774                                       pos + data.arrayOffset(), size);
1775         }
1776 
1777         if (!enqueueSuccess) {
1778             if (!fsyncLatchAcquired) {
1779                 endOfLog.fsyncFileSynchronizer.lock();
1780             }
1781             try {
1782                 if (useWriteQueue) {
1783                     endOfLog.dequeuePendingWrites1();
1784                 }
1785 
1786                 synchronized (file) {
1787                     file.seek(destOffset);
1788                     file.write
1789                         (data.array(), pos + data.arrayOffset(), size);
1790                     if (VERIFY_CHECKSUMS) {
1791                         file.seek(destOffset);
1792                         file.read
1793                             (data.array(), pos + data.arrayOffset(), size);
1794                         verifyChecksums(data, destOffset, "post-write");
1795                     }
1796                 }
1797             } finally {
1798                 endOfLog.fsyncFileSynchronizer.unlock();
1799             }
1800         }
1801         data.position(pos + size);
1802         totalBytesWritten = size;
1803 
1804         lastFileNumberTouched = fileNum;
1805         lastFileTouchedOffset = destOffset + size;
1806         return totalBytesWritten;
1807     }
1808 
bumpWriteCount(final String debugMsg)1809     private void bumpWriteCount(final String debugMsg)
1810         throws IOException {
1811 
1812         if (DEBUG) {
1813             System.out.println("Write: " + WRITE_COUNT + " " + debugMsg);
1814         }
1815 
1816         if (++WRITE_COUNT >= STOP_ON_WRITE_COUNT &&
1817             WRITE_COUNT < (STOP_ON_WRITE_COUNT + N_BAD_WRITES)) {
1818             if (THROW_ON_WRITE) {
1819                 throw new IOException
1820                     ("IOException generated for testing: " + WRITE_COUNT +
1821                      " " + debugMsg);
1822             }
1823             Runtime.getRuntime().halt(0xff);
1824         }
1825     }
1826 
1827     /**
1828      * Read a buffer from a file at a given offset. We know that the desired
1829      * data exists in this file. There's no need to incur extra costs
1830      * such as checks of the file length, nor to return status as to whether
1831      * this file contains the data.
1832      */
readFromFile(RandomAccessFile file, ByteBuffer readBuffer, long offset, long fileNo)1833     void readFromFile(RandomAccessFile file,
1834                       ByteBuffer readBuffer,
1835                       long offset,
1836                       long fileNo)
1837         throws DatabaseException {
1838         readFromFile(file, readBuffer, offset, fileNo,
1839                      true /* dataKnownToBeInFile */);
1840     }
1841 
1842     /**
1843      * Read a buffer from a file at a given offset.
1844      *
1845      * @return true if the read buffer is filled, false, if there is nothing
1846      * left in the file to read
1847      */
readFromFile(RandomAccessFile file, ByteBuffer readBuffer, long offset, long fileNo, boolean dataKnownToBeInFile)1848     boolean readFromFile(RandomAccessFile file,
1849                          ByteBuffer readBuffer,
1850                          long offset,
1851                          long fileNo,
1852                          boolean dataKnownToBeInFile)
1853         throws DatabaseException {
1854 
1855         /*
1856          * All IOExceptions on read turn into EnvironmentFailureExceptions
1857          * [#15768].
1858          */
1859         try {
1860 
1861             /*
1862              * Check if there's a pending write(s) in the write queue for this
1863              * fileNo/offset and if so, use it to fulfill this read request.
1864              */
1865             if (useWriteQueue &&
1866                 endOfLog.checkWriteCache(readBuffer, offset, fileNo)) {
1867                 return true;
1868             }
1869 
1870             /*
1871              * Nothing queued, all data for this file must be in the file.
1872              * Note that there's no synchronization between the check of the
1873              * write queue above, and this check of file length. It's possible
1874              * that a newly written log entry could show up between the
1875              * statements, and enter the write queue just after we finish the
1876              * check.
1877              *
1878              * Because of this, callers of this method must abide by one of
1879              * three conditions:
1880              * 1. They guarantee that the attempt to read a chunk of new data
1881              *   comes after the new data has been logged by the LogManager.
1882              * 2. The files are quiescent when the read is going on.
1883              * 3. The caller is sure the data is in this file.
1884              *
1885              * The replication feeder reader abides by (1) while all other file
1886              * readers abide by (2). Callers which are fetching specific log
1887              * entries fall under (3).
1888              */
1889             boolean readThisFile = true;
1890             if (!dataKnownToBeInFile) {
1891                 /*
1892                  * Callers who are not sure whether the desired data is in this
1893                  * file or the next incur the cost of a check of file.length(),
1894                  * which is a system call.
1895                  */
1896                 readThisFile = (offset < file.length());
1897             }
1898 
1899             if (readThisFile) {
1900                 readFromFileInternal(file, readBuffer, offset, fileNo);
1901                 return true;
1902             }
1903 
1904             return false;
1905         } catch (ClosedChannelException e) {
1906 
1907             /*
1908              * The channel should never be closed. It may be closed because
1909              * of an interrupt received by another thread. See SR [#10463]
1910              */
1911             throw new ThreadInterruptedException
1912                 (envImpl, "Channel closed, may be due to thread interrupt", e);
1913         } catch (IOException e) {
1914             throw new EnvironmentFailureException
1915                 (envImpl, EnvironmentFailureReason.LOG_READ, e);
1916         }
1917     }
1918 
readFromFileInternal(RandomAccessFile file, ByteBuffer readBuffer, long offset, long fileNum)1919     private void readFromFileInternal(RandomAccessFile file,
1920                                       ByteBuffer readBuffer,
1921                                       long offset,
1922                                       long fileNum)
1923         throws IOException {
1924 
1925         /*
1926          * Perform a RandomAccessFile read and update the buffer position.
1927          * ByteBuffer.array() is safe to use since all non-direct ByteBuffers
1928          * have a backing array.  Synchronization on the file object is needed
1929          * because two threads may call seek() on the same file object.
1930          */
1931         synchronized (file) {
1932             int pos = readBuffer.position();
1933             int size = readBuffer.limit() - pos;
1934 
1935             if (lastFileNumberTouched == fileNum &&
1936                 (Math.abs(offset - lastFileTouchedOffset) <
1937                  ADJACENT_TRACK_SEEK_DELTA)) {
1938                 nSequentialReads.increment();
1939                 nSequentialReadBytes.add(size);
1940             } else {
1941                 nRandomReads.increment();
1942                 nRandomReadBytes.add(size);
1943             }
1944 
1945             file.seek(offset);
1946             if (IO_EXCEPTION_TESTING_ON_READ) {
1947                 throw new IOException("generated for testing (read)");
1948             }
1949             int bytesRead = file.read(readBuffer.array(),
1950                                       pos + readBuffer.arrayOffset(),
1951                                       size);
1952             if (bytesRead > 0) {
1953                 readBuffer.position(pos + bytesRead);
1954             }
1955 
1956             lastFileNumberTouched = fileNum;
1957             lastFileTouchedOffset = offset + bytesRead;
1958         }
1959     }
1960 
verifyChecksums(ByteBuffer entryBuffer, long lsn, String comment)1961     private void verifyChecksums(ByteBuffer entryBuffer,
1962                                  long lsn,
1963                                  String comment) {
1964         int curPos = entryBuffer.position();
1965         try {
1966             while (entryBuffer.remaining() > 0) {
1967                 int recStartPos = entryBuffer.position();
1968                 /* Write buffer contains current log version entries. */
1969                 LogEntryHeader header =
1970                     new LogEntryHeader(entryBuffer, LogEntryType.LOG_VERSION);
1971                 verifyChecksum(entryBuffer, header, lsn, comment);
1972                 entryBuffer.position(recStartPos + header.getSize() +
1973                                      header.getItemSize());
1974             }
1975         } catch (ChecksumException e) {
1976             System.err.println("ChecksumException: (" + comment + ") " + e);
1977             System.err.println("start stack trace");
1978             e.printStackTrace(System.err);
1979             System.err.println("end stack trace");
1980         }
1981         entryBuffer.position(curPos);
1982     }
1983 
verifyChecksum(ByteBuffer entryBuffer, LogEntryHeader header, long lsn, String comment)1984     private void verifyChecksum(ByteBuffer entryBuffer,
1985                                 LogEntryHeader header,
1986                                 long lsn,
1987                                 String comment)
1988         throws ChecksumException {
1989 
1990         ChecksumValidator validator = null;
1991         /* Add header to checksum bytes */
1992         validator = new ChecksumValidator();
1993         int headerSizeMinusChecksum = header.getSizeMinusChecksum();
1994         int itemStart = entryBuffer.position();
1995         entryBuffer.position(itemStart - headerSizeMinusChecksum);
1996         validator.update(entryBuffer, headerSizeMinusChecksum);
1997         entryBuffer.position(itemStart);
1998 
1999         /*
2000          * Now that we know the size, read the rest of the entry if the first
2001          * read didn't get enough.
2002          */
2003         int itemSize = header.getItemSize();
2004         if (entryBuffer.remaining() < itemSize) {
2005             System.err.println("Couldn't verify checksum (" + comment + ")");
2006             return;
2007         }
2008 
2009         /*
2010          * Do entry validation. Run checksum before checking the entry
2011          * type, it will be the more encompassing error.
2012          */
2013         validator.update(entryBuffer, itemSize);
2014         validator.validate(header.getChecksum(), lsn);
2015     }
2016 
2017     /*
2018      * Iterate through a buffer looking for commit records.  Change all commit
2019      * records to abort records.
2020      */
abortCommittedTxns(ByteBuffer data)2021     private void abortCommittedTxns(ByteBuffer data)
2022         throws DatabaseException {
2023 
2024         final byte commitType = LogEntryType.LOG_TXN_COMMIT.getTypeNum();
2025         data.position(0);
2026 
2027         while (data.remaining() > 0) {
2028             int recStartPos = data.position();
2029             LogEntryHeader header;
2030             try {
2031                 /* Write buffer contains current log version entries. */
2032                 header = new LogEntryHeader(data, LogEntryType.LOG_VERSION);
2033             } catch (ChecksumException e) {
2034                 throw EnvironmentFailureException.unexpectedException(e);
2035             }
2036             if (header.getType() == commitType) {
2037                 /* Change the log entry type, and recalculate the checksum. */
2038                 header.convertCommitToAbort(data);
2039             }
2040             data.position(recStartPos + header.getSize() +
2041                           header.getItemSize());
2042         }
2043         data.position(0);
2044     }
2045 
2046     /**
2047      * FSync the end of the log.
2048      */
syncLogEnd()2049     void syncLogEnd()
2050         throws DatabaseException {
2051 
2052         try {
2053             endOfLog.force();
2054         } catch (IOException e) {
2055             throw new LogWriteException
2056                 (envImpl, "IOException during fsync", e);
2057         }
2058     }
2059 
2060     /**
2061      * Sync the end of the log, close off this log file. Should only be called
2062      * under the log write latch.
2063      */
syncLogEndAndFinishFile()2064     void syncLogEndAndFinishFile()
2065         throws DatabaseException, IOException {
2066 
2067         if (syncAtFileEnd) {
2068             syncLogEnd();
2069         }
2070         endOfLog.close();
2071     }
2072 
2073     /**
2074      * Returns whether anything is in the write queue.
2075      */
hasQueuedWrites()2076     public boolean hasQueuedWrites() {
2077         return endOfLog.hasQueuedWrites();
2078     }
2079 
2080     /**
2081      * For unit testing only.
2082      */
testWriteQueueLock()2083     public void testWriteQueueLock() {
2084         endOfLog.fsyncFileSynchronizer.lock();
2085     }
2086 
2087     /**
2088      * For unit testing only.
2089      */
testWriteQueueUnlock()2090     public void testWriteQueueUnlock() {
2091         endOfLog.fsyncFileSynchronizer.unlock();
2092     }
2093 
startFileCacheWarmer(final long recoveryStartLsn)2094     public void startFileCacheWarmer(final long recoveryStartLsn){
2095         assert fileCacheWarmer == null;
2096 
2097         final DbConfigManager cm = envImpl.getConfigManager();
2098 
2099         final int warmUpSize = cm.getInt(
2100             EnvironmentParams.LOG_FILE_WARM_UP_SIZE);
2101 
2102         if (warmUpSize == 0) {
2103             return;
2104         }
2105 
2106         final int bufSize = cm.getInt(
2107             EnvironmentParams.LOG_FILE_WARM_UP_BUF_SIZE);
2108 
2109         fileCacheWarmer = new FileCacheWarmer(
2110             envImpl, recoveryStartLsn, lastUsedLsn, warmUpSize, bufSize);
2111 
2112         fileCacheWarmer.start();
2113     }
2114 
stopFileCacheWarmer()2115     private void stopFileCacheWarmer(){
2116 
2117         /*
2118          * Use fcw local var because fileCacheWarmer can be set to null by
2119          * other threads calling clearFileCacheWarmer, namely the cache warmer
2120          * thread.
2121          */
2122         final FileCacheWarmer fcw = fileCacheWarmer;
2123 
2124         if (fcw == null) {
2125             return;
2126         }
2127 
2128         fcw.shutdown();
2129 
2130         clearFileCacheWarmer();
2131     }
2132 
2133     /* Allow cache warmer thread to be GC'd. */
clearFileCacheWarmer()2134     void clearFileCacheWarmer() {
2135         fileCacheWarmer = null;
2136     }
2137 
2138     /**
2139      * Close all file handles and empty the cache.
2140      */
clear()2141     public void clear()
2142         throws IOException, DatabaseException {
2143 
2144         synchronized (fileCache) {
2145             fileCache.clear();
2146         }
2147 
2148         endOfLog.close();
2149     }
2150 
2151     /**
2152      * Clear the file lock.
2153      */
close()2154     public void close()
2155         throws IOException {
2156 
2157         stopFileCacheWarmer();
2158 
2159         if (envLock != null) {
2160             envLock.release();
2161             envLock = null;
2162         }
2163 
2164         if (exclLock != null) {
2165             exclLock.release();
2166             exclLock = null;
2167         }
2168 
2169         if (channel != null) {
2170             channel.close();
2171             channel = null;
2172         }
2173 
2174         if (lockFile != null) {
2175             lockFile.close();
2176             lockFile = null;
2177         }
2178     }
2179 
2180     /**
2181      * Lock the environment.  Return true if the lock was acquired.  If
2182      * exclusive is false, then this implements a single writer, multiple
2183      * reader lock.  If exclusive is true, then implement an exclusive lock.
2184      *
2185      * There is a lock file and there are two regions of the lock file: byte 0,
2186      * and byte 1.  Byte 0 is the exclusive writer process area of the lock
2187      * file.  If an environment is opened for write, then it attempts to take
2188      * an exclusive write lock on byte 0.  Byte 1 is the shared reader process
2189      * area of the lock file.  If an environment is opened for read-only, then
2190      * it attempts to take a shared lock on byte 1.  This is how we implement
2191      * single writer, multi reader semantics.
2192      *
2193      * The cleaner, each time it is invoked, attempts to take an exclusive lock
2194      * on byte 1.  The owning process already either has an exclusive lock on
2195      * byte 0, or a shared lock on byte 1.  This will necessarily conflict with
2196      * any shared locks on byte 1, even if it's in the same process and there
2197      * are no other holders of that shared lock.  So if there is only one
2198      * read-only process, it will have byte 1 for shared access, and the
2199      * cleaner can not run in it because it will attempt to get an exclusive
2200      * lock on byte 1 (which is already locked for shared access by itself).
2201      * If a write process comes along and tries to run the cleaner, it will
2202      * attempt to get an exclusive lock on byte 1.  If there are no other
2203      * reader processes (with shared locks on byte 1), and no other writers
2204      * (which are running cleaners on with exclusive locks on byte 1), then the
2205      * cleaner will run.
2206      */
lockEnvironment(boolean rdOnly, boolean exclusive)2207     public boolean lockEnvironment(boolean rdOnly, boolean exclusive) {
2208         try {
2209             if (checkEnvHomePermissions(rdOnly)) {
2210                 return true;
2211             }
2212 
2213             if (lockFile == null) {
2214                 lockFile =
2215                     new RandomAccessFile
2216                     (new File(dbEnvHome, LOCK_FILE),
2217                      FileMode.READWRITE_MODE.getModeValue());
2218             }
2219 
2220             channel = lockFile.getChannel();
2221 
2222             try {
2223                 if (exclusive) {
2224 
2225                     /*
2226                      * To lock exclusive, must have exclusive on
2227                      * shared reader area (byte 1).
2228                      */
2229                     exclLock = channel.tryLock(1, 1, false);
2230                     if (exclLock == null) {
2231                         return false;
2232                     }
2233                     return true;
2234                 }
2235                 if (rdOnly) {
2236                     envLock = channel.tryLock(1, 1, true);
2237                 } else {
2238                     envLock = channel.tryLock(0, 1, false);
2239                 }
2240                 if (envLock == null) {
2241                     return false;
2242                 }
2243                 return true;
2244             } catch (OverlappingFileLockException e) {
2245                 return false;
2246             }
2247         } catch (IOException e) {
2248             throw new EnvironmentFailureException
2249                 (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e);
2250         }
2251     }
2252 
releaseExclusiveLock()2253     public void releaseExclusiveLock()
2254         throws DatabaseException {
2255 
2256         try {
2257             if (exclLock != null) {
2258                 exclLock.release();
2259             }
2260         } catch (IOException e) {
2261             throw new EnvironmentFailureException
2262                 (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e);
2263         }
2264     }
2265 
2266     /**
2267      * Ensure that if the environment home dir is on readonly media or in a
2268      * readonly directory that the environment has been opened for readonly
2269      * access.
2270      *
2271      * @return true if the environment home dir is readonly.
2272      *
2273      * @throws IllegalArgumentException via Environment ctor
2274      */
checkEnvHomePermissions(boolean rdOnly)2275     public boolean checkEnvHomePermissions(boolean rdOnly)
2276         throws DatabaseException {
2277 
2278         if (nDataDirs == 0) {
2279             return checkEnvHomePermissionsSingleEnvDir(dbEnvHome, rdOnly);
2280         } else {
2281             return checkEnvHomePermissionsMultiEnvDir(rdOnly);
2282         }
2283     }
2284 
checkEnvHomePermissionsSingleEnvDir(File dbEnvHome, boolean rdOnly)2285     private boolean checkEnvHomePermissionsSingleEnvDir(File dbEnvHome,
2286                                                         boolean rdOnly)
2287         throws DatabaseException {
2288 
2289         boolean envDirIsReadOnly = !dbEnvHome.canWrite();
2290         if (envDirIsReadOnly && !rdOnly) {
2291 
2292             /*
2293              * Use the absolute path in the exception message, to
2294              * make a mis-specified relative path problem more obvious.
2295              */
2296             throw new IllegalArgumentException
2297                 ("The Environment directory " +
2298                  dbEnvHome.getAbsolutePath() +
2299                  " is not writable, but the " +
2300                  "Environment was opened for read-write access.");
2301         }
2302 
2303         return envDirIsReadOnly;
2304     }
2305 
checkEnvHomePermissionsMultiEnvDir(boolean rdOnly)2306     private boolean checkEnvHomePermissionsMultiEnvDir(boolean rdOnly)
2307         throws DatabaseException {
2308 
2309         for (File dbEnvDir : dbEnvDataDirs) {
2310             if (!checkEnvHomePermissionsSingleEnvDir(dbEnvDir, rdOnly)) {
2311                 return false;
2312             }
2313         }
2314 
2315         return true;
2316     }
2317 
2318     /**
2319      * Truncate a log at this position. Used by recovery to a timestamp
2320      * utilities and by recovery to set the end-of-log position, see
2321      * LastFileReader.setEndOfFile().
2322      *
2323      * <p>This method forces a new log file to be written next, if the last
2324      * file (the file truncated to) has an old version in its header. This
2325      * ensures that when the log is opened by an old version of JE, a version
2326      * incompatibility will be detected.  [#11243]</p>
2327      */
truncateSingleFile(long fileNum, long offset)2328     public void truncateSingleFile(long fileNum, long offset)
2329         throws IOException, DatabaseException {
2330 
2331         try {
2332             FileHandle handle =
2333                 makeFileHandle(fileNum, getAppropriateReadWriteMode());
2334             RandomAccessFile file = handle.getFile();
2335 
2336             try {
2337                 file.getChannel().truncate(offset);
2338             } finally {
2339                 file.close();
2340             }
2341 
2342             if (handle.isOldHeaderVersion()) {
2343                 forceNewFile = true;
2344             }
2345         } catch (ChecksumException e) {
2346             throw new EnvironmentFailureException
2347                 (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, e);
2348         }
2349     }
2350 
2351     /*
2352      * Truncate all log entries after a specified log entry, the position of
2353      * that entry is specified by the fileNum and offset, we do this to avoid
2354      * the log file gap. Used by replication hard recovery and the
2355      * DbTruncateLog utility, see SR [#19463].
2356      */
truncateLog(long fileNum, long offset)2357     public void truncateLog(long fileNum, long offset)
2358         throws IOException, DatabaseException {
2359 
2360         /*
2361          * Truncate the log files following by this log file in descending
2362          * order to avoid the log entry gap, see SR [#19463].
2363          */
2364         for (long i = getLastFileNum(); i >= fileNum; i--) {
2365             /* Do nothing if this file doesn't exist. */
2366             if (!isFileValid(i)) {
2367                 continue;
2368             }
2369 
2370             /*
2371              * If this is the file that truncation starts, invoke
2372              * truncateSingleFile. If the offset is 0, which means the
2373              * FileHeader is also deleted, delete the whole file to avoid a log
2374              * file gap.
2375              */
2376             if (i == fileNum) {
2377                 truncateSingleFile(fileNum, offset);
2378                 if (offset != 0) {
2379                     continue;
2380                 }
2381             }
2382 
2383             boolean deleted = deleteFile(i);
2384             assert deleted : "File " + getFullFileName(i, JE_SUFFIX) +
2385                              " not deleted during truncateLog";
2386         }
2387     }
2388 
2389     /**
2390      * Mark the specified log entries as invisible and obsolete. The entries
2391      * are written here, but are fsync'ed later. If there is any problem or
2392      * exception during the setting, the method will throw an
2393      * EnvironmentFailureException.
2394      *
2395      * These changes are made directly to the file, but recently logged log
2396      * entries may also be resident in the log buffers. The caller must take
2397      * care to call LogManager.flush() before this method, to ensure that all
2398      * entries are on disk.
2399      *
2400      * In addition, we must ensure that after this step, the affected log
2401      * entries will only be read via a FileReader, and will not be faulted in
2402      * by the LogManager. Entries may be present in the log and in the log
2403      * buffers, but only the on disk version is modified by this method. The
2404      * LogManager can read directly from the log buffers and may read the
2405      * incorrect, non-invisible version of the log entry, rather than the
2406      * invisible version from the file. This should not be an issue, because
2407      * invisible log entries should be detached from the in-memory tree before
2408      * they are made invisible.
2409      *
2410      * @param fileNum target file.
2411      * @param lsns The list of LSNs to make invisible, must be sorted in
2412      * ascending order.
2413      */
makeInvisible(long fileNum, List<Long> lsns)2414     public void makeInvisible(long fileNum, List<Long> lsns) {
2415         if (lsns.size() == 0) {
2416             return;
2417         }
2418 
2419         /* Open this file. */
2420         FileHandle handle = null;
2421         try {
2422 
2423             /*
2424              * Note that we are getting a new, non-cached file handle for
2425              * specific use by this method.
2426              */
2427             handle = makeFileHandle(fileNum, getAppropriateReadWriteMode());
2428         } catch (ChecksumException e) {
2429             throw new EnvironmentFailureException
2430                 (envImpl, EnvironmentFailureReason.LOG_CHECKSUM,
2431                  "Opening file " + fileNum +  " for invisible marking ", e);
2432         } catch (FileNotFoundException e) {
2433             throw new EnvironmentFailureException
2434                 (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND,
2435                  "Opening file " + fileNum +  " for invisible marking ", e);
2436         }
2437         RandomAccessFile file = handle.getFile();
2438 
2439         /* Set the invisible bit for each entry. */
2440         try {
2441             for (Long lsn : lsns) {
2442                 if (DbLsn.getFileNumber(lsn) != fileNum) {
2443 
2444                     /*
2445                      * This failure will not invalidate the environment right
2446                      * away. But since it causes replication syncup to fail,
2447                      * the environment will shutdown, which is the effect we
2448                      * want.
2449                      */
2450                     throw new EnvironmentFailureException
2451                         (envImpl, EnvironmentFailureReason.UNEXPECTED_STATE,
2452                          "LSN of " + DbLsn.getNoFormatString(lsn) +
2453                          " did not match file number" + fileNum);
2454                 }
2455 
2456                 int entryFlagsOffset = (int)
2457                     (DbLsn.getFileOffset(lsn) + LogEntryHeader.FLAGS_OFFSET);
2458                 file.seek(entryFlagsOffset);
2459                 byte flags = file.readByte();
2460                 byte newFlags = LogEntryHeader.makeInvisible(flags);
2461                 file.seek(entryFlagsOffset);
2462                 file.writeByte(newFlags);
2463             }
2464         } catch (IOException e) {
2465             throw new EnvironmentFailureException
2466                 (envImpl, EnvironmentFailureReason.LOG_WRITE,
2467                  "Flipping invisibility in file " + fileNum, e);
2468         } finally {
2469             /*
2470              * Just close the file. Fsyncs will be done later on, in the hope
2471              * that the OS has already synced asynchronously.
2472              */
2473             try {
2474                 file.close();
2475             } catch (IOException e) {
2476                 throw new EnvironmentFailureException
2477                     (envImpl, EnvironmentFailureReason.LOG_WRITE,
2478                      "Closing after invisibility cloaking: file " + fileNum, e);
2479             }
2480         }
2481     }
2482 
2483     /**
2484      * Fsync this set of log files. Used for replication syncup rollback.
2485      */
force(Set<Long> fileNums)2486     public void force(Set<Long> fileNums) {
2487         for (long fileNum : fileNums) {
2488             RandomAccessFile file = null;
2489             try {
2490                 FileHandle handle =
2491                     makeFileHandle(fileNum, getAppropriateReadWriteMode());
2492                 file = handle.getFile();
2493                 file.getChannel().force(false);
2494                 nLogFSyncs.increment();
2495             } catch (FileNotFoundException e) {
2496                 throw new EnvironmentFailureException
2497                     (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND,
2498                      "Invisible fsyncing file " + fileNum, e);
2499             } catch (ChecksumException e) {
2500                 throw new EnvironmentFailureException
2501                     (envImpl, EnvironmentFailureReason.LOG_CHECKSUM,
2502                      "Invisible fsyncing file " + fileNum, e);
2503             } catch (IOException e) {
2504                 throw new EnvironmentFailureException
2505                     (envImpl, EnvironmentFailureReason.LOG_WRITE,
2506                      "Invisible fsyncing file " + fileNum, e);
2507             } finally {
2508                 if (file != null) {
2509                     try {
2510                         file.close();
2511                     } catch (IOException e) {
2512                         throw new EnvironmentFailureException
2513                             (envImpl, EnvironmentFailureReason.LOG_WRITE,
2514                              "Invisible fsyncing file " + fileNum, e);
2515                     }
2516                 }
2517             }
2518         }
2519     }
2520 
2521     /**
2522      * Set the flag that causes a new file to be written before the next write.
2523      */
forceNewLogFile()2524     public void forceNewLogFile() {
2525         forceNewFile = true;
2526     }
2527 
2528     /**
2529      * Return the offset of the first log entry after the file header.
2530      */
2531 
2532     /**
2533      * @return the size in bytes of the file header log entry.
2534      */
firstLogEntryOffset()2535     public static int firstLogEntryOffset() {
2536         return FileHeader.entrySize() + LogEntryHeader.MIN_HEADER_SIZE;
2537     }
2538 
2539     /**
2540      * Return the next available LSN in the log. Note that this is
2541      * unsynchronized, so is only valid as an approximation of log size.
2542      */
getNextLsn()2543     public long getNextLsn() {
2544         return nextAvailableLsn;
2545     }
2546 
2547     /**
2548      * Return the last allocated LSN in the log. Note that this is
2549      * unsynchronized, so if it is called outside the log write latch it is
2550      * only valid as an approximation of log size.
2551      */
getLastUsedLsn()2552     public long getLastUsedLsn() {
2553         return lastUsedLsn;
2554     }
2555 
loadStats(StatsConfig config)2556     StatGroup loadStats(StatsConfig config) {
2557         nOpenFiles.set(fileCache.size());
2558         StatGroup copyStats = stats.cloneGroup(config.getClear());
2559 
2560         return copyStats;
2561     }
2562 
2563     /*
2564      * Unit test support
2565      */
2566 
2567     /*
2568      * @return ids of files in cache
2569      */
getCacheKeys()2570     Set<Long> getCacheKeys() {
2571         return fileCache.getCacheKeys();
2572     }
2573 
2574     /**
2575      * Clear a file out of the file cache regardless of mode type.
2576      */
clearFileCache(long fileNum)2577     private void clearFileCache(long fileNum)
2578         throws IOException, DatabaseException {
2579 
2580         synchronized (fileCache) {
2581             fileCache.remove(fileNum);
2582         }
2583     }
2584 
2585     /*
2586      * The file cache keeps N RandomAccessFile objects cached for file
2587      * access. The cache consists of two parts: a Hashtable that doesn't
2588      * require extra synchronization, for the most common access, and a linked
2589      * list of files to support cache administration. Looking up a file from
2590      * the hash table doesn't require extra latching, but adding or deleting a
2591      * file does.
2592      */
2593     private static class FileCache {
2594         private final Map<Long, FileHandle> fileMap;            // Long->file
2595         private final List<Long> fileList;    // list of file numbers
2596         private final int fileCacheSize;
2597 
FileCache(DbConfigManager configManager)2598         FileCache(DbConfigManager configManager) {
2599 
2600             /*
2601              * A fileMap maps the file number to FileHandles (RandomAccessFile,
2602              * latch). The fileList is a list of Longs to determine which files
2603              * to eject out of the file cache if it's too small.
2604              */
2605             fileMap = new Hashtable<Long, FileHandle>();
2606             fileList = new LinkedList<Long>();
2607             fileCacheSize =
2608                 configManager.getInt(EnvironmentParams.LOG_FILE_CACHE_SIZE);
2609         }
2610 
get(Long fileId)2611         private FileHandle get(Long fileId) {
2612             return fileMap.get(fileId);
2613         }
2614 
add(Long fileId, FileHandle fileHandle)2615         private void add(Long fileId, FileHandle fileHandle)
2616             throws IOException, DatabaseException {
2617 
2618             /*
2619              * Does the cache have any room or do we have to evict?  Hunt down
2620              * the file list for an unused file. Note that the file cache might
2621              * actually grow past the prescribed size if there is nothing
2622              * evictable. Should we try to shrink the file cache? Presently if
2623              * it grows, it doesn't shrink.
2624              */
2625             if (fileList.size() >= fileCacheSize) {
2626                 Iterator<Long> iter = fileList.iterator();
2627                 while (iter.hasNext()) {
2628                     Long evictId = iter.next();
2629                     FileHandle evictTarget = fileMap.get(evictId);
2630 
2631                     /*
2632                      * Try to latch. If latchNoWait returns false, then another
2633                      * thread owns this latch. Note that a thread that's trying
2634                      * to get a new file handle should never already own the
2635                      * latch on another file handle, because these latches are
2636                      * meant to be short lived and only held over the i/o out
2637                      * of the file.
2638                      */
2639                     if (evictTarget.latchNoWait()) {
2640                         try {
2641                             fileMap.remove(evictId);
2642                             iter.remove();
2643                             evictTarget.close();
2644                         } finally {
2645                             evictTarget.release();
2646                         }
2647                         break;
2648                     }
2649                 }
2650             }
2651 
2652             /*
2653              * We've done our best to evict. Add the file the the cache now
2654              * whether or not we did evict.
2655              */
2656             fileList.add(fileId);
2657             fileMap.put(fileId, fileHandle);
2658         }
2659 
2660         /**
2661          * Take any file handles corresponding to this file name out of the
2662          * cache. A file handle could be there twice, in rd only and in r/w
2663          * mode.
2664          */
remove(long fileNum)2665         private void remove(long fileNum)
2666             throws IOException, DatabaseException {
2667 
2668             Iterator<Long> iter = fileList.iterator();
2669             while (iter.hasNext()) {
2670                 Long evictId = iter.next();
2671                 if (evictId.longValue() == fileNum) {
2672                     FileHandle evictTarget = fileMap.get(evictId);
2673                     try {
2674                         evictTarget.latch();
2675                         fileMap.remove(evictId);
2676                         iter.remove();
2677                         evictTarget.close();
2678                     } finally {
2679                         evictTarget.release();
2680                     }
2681                 }
2682             }
2683         }
2684 
clear()2685         private void clear()
2686             throws IOException, DatabaseException {
2687 
2688             Iterator<FileHandle> iter = fileMap.values().iterator();
2689             while (iter.hasNext()) {
2690                 FileHandle fileHandle = iter.next();
2691                 try {
2692                     fileHandle.latch();
2693                     fileHandle.close();
2694                     iter.remove();
2695                 } finally {
2696                     fileHandle.release();
2697                 }
2698             }
2699             fileMap.clear();
2700             fileList.clear();
2701         }
2702 
getCacheKeys()2703         private Set<Long> getCacheKeys() {
2704             return fileMap.keySet();
2705         }
2706 
size()2707         private int size() {
2708             return fileMap.size();
2709         }
2710     }
2711 
2712     /**
2713      * The LogEndFileDescriptor is used to write and fsync the end of the log.
2714      * Because the JE log is append only, there is only one logical R/W file
2715      * descriptor for the whole environment. This class actually implements two
2716      * RandomAccessFile instances, one for writing and one for fsyncing, so the
2717      * two types of operations don't block each other.
2718      *
2719      * The write file descriptor is considered the master.  Manipulation of
2720      * this class is done under the log write latch. Here's an explanation of
2721      * why the log write latch is sufficient to safeguard all operations.
2722      *
2723      * There are two types of callers who may use this file descriptor: the
2724      * thread that is currently writing to the end of the log and any threads
2725      * that are fsyncing on behalf of the FSyncManager.
2726      *
2727      * The writing thread appends data to the file and fsyncs the file when we
2728      * flip over to a new log file.  The file is only instantiated at the point
2729      * that it must do so -- which is either when the first fsync is required
2730      * by JE or when the log file is full and we flip files.  Therefore, the
2731      * writing thread has two actions that change this descriptor -- we
2732      * initialize the file descriptor for the given log file at the first write
2733      * to the file, and we close the file descriptor when the log file is full.
2734      * Therefore is a period when there is no log descriptor -- when we have
2735      * not yet written a log buffer into a given log file.
2736      *
2737      * The fsyncing threads ask for the log end file descriptor asynchronously,
2738      * but will never modify it.  These threads may arrive at the point when
2739      * the file descriptor is null, and therefore skip their fysnc, but that is
2740      * fine because it means a writing thread already flipped that target file
2741      * and has moved on to the next file.
2742      *
2743      * Time     Activity
2744      * 10       thread 1 writes log entry A into file 0x0, issues fsync
2745      *          outside of log write latch, yields the processor
2746      * 20       thread 2 writes log entry B, piggybacks off thread 1
2747      * 30       thread 3 writes log entry C, but no room left in that file,
2748      *          so it flips the log, and fsyncs file 0x0, all under the log
2749      *          write latch. It nulls out endOfLogRWFile, moves onto file
2750      *          0x1, but doesn't create the file yet.
2751      * 40       thread 1 finally comes along, but endOfLogRWFile is null--
2752      *          no need to fsync in that case, 0x0 got fsynced.
2753      *
2754      * If a write is attempted and an fsync is already in progress, then the
2755      * information pertaining to the data to be written (data, offset, length)
2756      * is saved away in the "queuedWrites" array.  When the fsync completes,
2757      * the queuedWrites buffer is emptied.  This ensures that writes continue
2758      * to execute on file systems which block all IO calls during an fsync()
2759      * call (e.g. ext3).
2760      */
2761     class LogEndFileDescriptor {
2762         private RandomAccessFile endOfLogRWFile = null;
2763         private RandomAccessFile endOfLogSyncFile = null;
2764         private final ReentrantLock fsyncFileSynchronizer = new ReentrantLock();
2765 
2766         /*
2767          * Holds all data for writes which have been queued due to their
2768          * being blocked by an fsync when the original write was attempted.
2769          * The next thread to execute an fsync or write will execute any
2770          * queued writes in this buffer.
2771          * Latch order is fsyncFileSynchronizer, followed by the queuedWrites
2772          * mutex [ synchronized (queuedWrites) {} ].
2773          *
2774          * Default protection for unit tests.
2775          */
2776         private final byte[] queuedWrites =
2777             useWriteQueue ? new byte[writeQueueSize] : null;
2778 
2779         /* Current position in the queuedWrites array. */
2780         private int queuedWritesPosition = 0;
2781 
2782         /* The starting offset on disk of the first byte in queuedWrites. */
2783         private long qwStartingOffset;
2784 
2785         /* The file number that the queuedWrites are destined for. */
2786         private long qwFileNum = -1;
2787 
2788         /* For unit tests. */
setQueueFileNum(final long qwFileNum)2789         void setQueueFileNum(final long qwFileNum) {
2790             this.qwFileNum = qwFileNum;
2791         }
2792 
2793         /*
2794          * Check if fileNo/offset is present in queuedWrites, and if so, fill
2795          * readBuffer with those bytes.  We theorize that this is needed
2796          * because HA will be reading at the very end of the log and those
2797          * writes, if enqueued, may no longer be in LogBuffers in the
2798          * LogBufferPool.  This might happen in the case of lots of concurrent
2799          * non-synchronous writes (with synchronous commits) which become
2800          * enqueued in the queuedWrites cache, but cycle out of the LBP.  In
2801          * general, using synchronous commits with HA is a bad idea.
2802          *
2803          * Default protection for unit tests.
2804          * @return true if more data was available. If so, the read buffer
2805          * will be filled up.
2806          */
2807         /* private */
checkWriteCache(final ByteBuffer readBuffer, final long requestedOffset, final long fileNum)2808         boolean checkWriteCache(final ByteBuffer readBuffer,
2809                                 final long requestedOffset,
2810                                 final long fileNum) {
2811 
2812             int pos = readBuffer.position();
2813             int targetBufSize = readBuffer.limit() - pos;
2814             synchronized (queuedWrites) {
2815                 if (qwFileNum != fileNum) {
2816                     return false;
2817                 }
2818 
2819                 if (queuedWritesPosition == 0) {
2820                     return false;
2821                 }
2822 
2823                 if (requestedOffset < qwStartingOffset ||
2824                     (qwStartingOffset + queuedWritesPosition) <=
2825                     requestedOffset) {
2826                     return false;
2827                 }
2828 
2829                 /* We have the bytes available. */
2830                 int nBytesToCopy = (int)
2831                     (queuedWritesPosition -
2832                      (requestedOffset - qwStartingOffset));
2833                 nBytesToCopy = Math.min(nBytesToCopy, targetBufSize);
2834                 readBuffer.put(queuedWrites,
2835                                (int) (requestedOffset - qwStartingOffset),
2836                                nBytesToCopy);
2837                 nBytesReadFromWriteQueue.add(nBytesToCopy);
2838                 nReadsFromWriteQueue.increment();
2839                 return true;
2840             }
2841         }
2842 
2843         /*
2844          * Enqueue a blocked write call for later execution by the next thread
2845          * to do either an fsync or write call. fsyncFileSynchronizer is not
2846          * held when this is called.
2847          *
2848          * Default protection for unit tests.
2849          */
2850         /* private */
enqueueWrite(final long fileNum, final byte[] data, final long destOffset, final int arrayOffset, final int size)2851         boolean enqueueWrite(final long fileNum,
2852                              final byte[] data,
2853                              final long destOffset,
2854                              final int arrayOffset,
2855                              final int size)
2856             throws DatabaseException {
2857 
2858             assert !fsyncFileSynchronizer.isHeldByCurrentThread();
2859 
2860             for (int i = 0; i < 2; i++) {
2861                 try {
2862                     enqueueWrite1(fileNum, data, destOffset,
2863                                   arrayOffset, size);
2864                     return true;
2865                 } catch (RelatchRequiredException RE) {
2866                     dequeuePendingWrites();
2867                 }
2868             }
2869 
2870             /* Give up after two tries. */
2871             nWriteQueueOverflowFailures.increment();
2872             return false;
2873         }
2874 
enqueueWrite1(final long fileNum, final byte[] data, final long destOffset, final int arrayOffset, final int size)2875         private void enqueueWrite1(final long fileNum,
2876                                    final byte[] data,
2877                                    final long destOffset,
2878                                    final int arrayOffset,
2879                                    final int size)
2880             throws RelatchRequiredException, DatabaseException {
2881 
2882             /*
2883              * The queuedWrites queue only ever holds writes for a single file.
2884              *
2885              * This check is safe because qwFileNum can only ever change inside
2886              * enqueueWrite which can only ever be called while the Log Write
2887              * Latch is held.
2888              *
2889              * NOTE: We believe the commented out second condition is safe
2890              * to add to the code if we ever see contention with this call to
2891              * dequeuePendingWrites against an fsync.  Here is the reasoning:
2892              *
2893              * queuedWritesPosition is changed in two places: (1) enqueueWrite1
2894              * where it is incremented, and (2) dequeuePendingWrites1 where it
2895              * is zeroed. Both of these places are proected by the queuedWrites
2896              * mutex.  The zero'ing (2) will only make the dequeue unnecessary
2897              * so the extra commented out check below is safe since it will
2898              * only result in eliminating an unnecessary dequeuePendingWrites
2899              * call.
2900              */
2901             if (qwFileNum < fileNum /* && queuedWritesPosition > 0 */) {
2902                 dequeuePendingWrites();
2903                 qwFileNum = fileNum;
2904             }
2905 
2906             synchronized (queuedWrites) {
2907                 boolean overflow =
2908                     (writeQueueSize - queuedWritesPosition) < size;
2909                 if (overflow) {
2910                     nWriteQueueOverflow.increment();
2911 
2912                     /*
2913                      * Since we can't write this "write call" into the
2914                      * ByteBuffer without overflowing, we will try to dequeue
2915                      * all current writes in the buffer.  But that requires
2916                      * holding the fsyncFileSynchronizer latch first which
2917                      * would be latching out of order relative to the
2918                      * queuedWrites mutex.
2919                      */
2920                     throw RelatchRequiredException.relatchRequiredException;
2921                 }
2922 
2923                 assert qwFileNum == fileNum;
2924                 int curPos = queuedWritesPosition;
2925                 if (curPos == 0) {
2926 
2927                     /*
2928                      * This is the first entry in queue.  Set qwStartingOffset.
2929                      */
2930                     qwStartingOffset = destOffset;
2931                 }
2932 
2933                 if (curPos + qwStartingOffset != destOffset) {
2934                     throw new EnvironmentFailureException
2935                         (envImpl, EnvironmentFailureReason.LOG_INTEGRITY,
2936                          "non-consecutive writes queued. " +
2937                          "qwPos=" + queuedWritesPosition +
2938                          " write destOffset=" + destOffset);
2939                 }
2940 
2941                 System.arraycopy(data, arrayOffset,
2942                                  queuedWrites, queuedWritesPosition,
2943                                  size);
2944                 queuedWritesPosition += size;
2945             }
2946         }
2947 
2948         /**
2949          * Returns whether anything is in the write queue.
2950          */
2951         boolean hasQueuedWrites() {
2952             return queuedWritesPosition > 0;
2953         }
2954 
2955         /*
2956          * Execute pending writes.  Assumes fsyncFileSynchronizer is not held.
2957          */
dequeuePendingWrites()2958         private void dequeuePendingWrites()
2959             throws DatabaseException {
2960 
2961             assert !fsyncFileSynchronizer.isHeldByCurrentThread();
2962 
2963             fsyncFileSynchronizer.lock();
2964             try {
2965                 dequeuePendingWrites1();
2966             } finally {
2967                 fsyncFileSynchronizer.unlock();
2968             }
2969         }
2970 
2971         /*
2972          * Execute pending writes.  Assumes fsyncFileSynchronizer is held.
2973          */
dequeuePendingWrites1()2974         private void dequeuePendingWrites1()
2975             throws DatabaseException {
2976 
2977             assert fsyncFileSynchronizer.isHeldByCurrentThread();
2978 
2979             try {
2980                 synchronized (queuedWrites) {
2981                     /* Nothing to see here.  Move along. */
2982                     if (queuedWritesPosition == 0) {
2983                         return;
2984                     }
2985 
2986                     RandomAccessFile file = getWritableFile(qwFileNum, false);
2987                     synchronized (file) {
2988                         file.seek(qwStartingOffset);
2989                         file.write(queuedWrites, 0, queuedWritesPosition);
2990                         nBytesWrittenFromWriteQueue.add(queuedWritesPosition);
2991                         nWritesFromWriteQueue.increment();
2992                         if (VERIFY_CHECKSUMS) {
2993                             file.seek(qwStartingOffset);
2994                             file.read(queuedWrites, 0, queuedWritesPosition);
2995                             ByteBuffer bb =
2996                                 ByteBuffer.allocate(queuedWritesPosition);
2997                             bb.put(queuedWrites, 0, queuedWritesPosition);
2998                             bb.position(0);
2999                             verifyChecksums
3000                                 (bb, qwStartingOffset, "post-write");
3001                         }
3002                     }
3003 
3004                     /* We flushed the queue.  Reset the buffer. */
3005                     queuedWritesPosition = 0;
3006                 }
3007             } catch (IOException e) {
3008                 throw new LogWriteException
3009                     (envImpl, "IOException during fsync", e);
3010             }
3011         }
3012 
3013         /**
3014          * getWritableFile must be called under the log write latch.
3015          *
3016          * Typically, endOfLogRWFile is not null.  Hence the
3017          * fsyncFileSynchronizer does not need to be locked (which would
3018          * block the write queue from operating.
3019          */
getWritableFile(final long fileNumber, final boolean doLock)3020         private RandomAccessFile getWritableFile(final long fileNumber,
3021                                                  final boolean doLock) {
3022             try {
3023                 if (endOfLogRWFile == null) {
3024 
3025                     /*
3026                      * We need to make a file descriptor for the end of the
3027                      * log.  This is guaranteed to be called under the log
3028                      * write latch.
3029                      *
3030                      * Protect both the RWFile and SyncFile under this lock,
3031                      * to avoid a race for creating the file and writing the
3032                      * header.  [#20732]
3033                      */
3034                     if (doLock) {
3035                         fsyncFileSynchronizer.lock();
3036                     }
3037                     try {
3038                         endOfLogRWFile =
3039                             makeFileHandle(fileNumber,
3040                                            getAppropriateReadWriteMode()).
3041                             getFile();
3042                         endOfLogSyncFile =
3043                             makeFileHandle(fileNumber,
3044                                            getAppropriateReadWriteMode()).
3045                             getFile();
3046                     } finally {
3047                         if (doLock) {
3048                             fsyncFileSynchronizer.unlock();
3049                         }
3050                     }
3051                 }
3052 
3053                 return endOfLogRWFile;
3054             } catch (Exception e) {
3055 
3056                 /*
3057                  * If we can't get a write channel, we need to invalidate the
3058                  * environment.
3059                  */
3060                 throw new EnvironmentFailureException
3061                     (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e);
3062             }
3063         }
3064 
3065         /**
3066          * FSync the log file that makes up the end of the log.
3067          */
force()3068         private void force()
3069             throws DatabaseException, IOException {
3070 
3071             /*
3072              * Get a local copy of the end of the log file descriptor, it could
3073              * change. No need to latch, no harm done if we get an old file
3074              * descriptor, because we forcibly fsync under the log write latch
3075              * when we switch files.
3076              *
3077              * If there is no current end file descriptor, we know that the log
3078              * file has flipped to a new file since the fsync was issued.
3079              */
3080             fsyncFileSynchronizer.lock();
3081             try {
3082 
3083                 /* Flush any queued writes. */
3084                 if (useWriteQueue) {
3085                     dequeuePendingWrites1();
3086                 }
3087 
3088                 RandomAccessFile file = endOfLogSyncFile;
3089                 if (file != null) {
3090                     bumpWriteCount("fsync");
3091                     FileChannel ch = file.getChannel();
3092                     try {
3093                         long start = System.currentTimeMillis();
3094                         ch.force(false);
3095                         nLogFSyncs.increment();
3096                         final long fsyncMs = System.currentTimeMillis() - start;
3097                         nFSyncTime.add(fsyncMs);
3098                     } catch (ClosedChannelException e) {
3099 
3100                         /*
3101                          * The channel should never be closed. It may be closed
3102                          * because of an interrupt received by another thread.
3103                          * See SR [#10463].
3104                          */
3105                         throw new ThreadInterruptedException
3106                             (envImpl,
3107                              "Channel closed, may be due to thread interrupt",
3108                              e);
3109                     }
3110 
3111                     assert EnvironmentImpl.maybeForceYield();
3112                 }
3113 
3114                 /* Flush any writes which were queued while fsync'ing. */
3115                 if (useWriteQueue) {
3116                     dequeuePendingWrites1();
3117                 }
3118             } finally {
3119                 fsyncFileSynchronizer.unlock();
3120             }
3121         }
3122 
3123         /**
3124          * Close the end of the log file descriptor. Use atomic assignment to
3125          * ensure that we won't force and close on the same descriptor.
3126          */
close()3127         void close()
3128             throws IOException {
3129 
3130             /*
3131              * Protect both the RWFile and SyncFile under this lock out of
3132              * paranoia, although we don't expect two threads to call close
3133              * concurrently.  [#20732]
3134              */
3135             fsyncFileSynchronizer.lock();
3136             try {
3137                 IOException firstException = null;
3138                 if (endOfLogRWFile != null) {
3139                     RandomAccessFile file = endOfLogRWFile;
3140 
3141                     /*
3142                      * Null out so that other threads know endOfLogRWFile is no
3143                      * longer available.
3144                      */
3145                     endOfLogRWFile = null;
3146                     try {
3147                         file.close();
3148                     } catch (IOException e) {
3149                         /* Save this exception, so we can try second close. */
3150                         firstException = e;
3151                     }
3152                 }
3153                 if (endOfLogSyncFile != null) {
3154                     RandomAccessFile file = endOfLogSyncFile;
3155 
3156                     /*
3157                      * Null out so that other threads know endOfLogSyncFile is
3158                      * no longer available.
3159                      */
3160                     endOfLogSyncFile = null;
3161                     file.close();
3162                 }
3163 
3164                 if (firstException != null) {
3165                     throw firstException;
3166                 }
3167             } finally {
3168                 fsyncFileSynchronizer.unlock();
3169             }
3170         }
3171     }
3172 
3173     /*
3174      * Generate IOExceptions for testing.
3175      */
3176 
3177     /* Testing switch. public so others can read the value. */
3178     public static final boolean LOGWRITE_EXCEPTION_TESTING;
3179     private static String RRET_PROPERTY_NAME = "je.logwrite.exception.testing";
3180 
3181     static {
3182         LOGWRITE_EXCEPTION_TESTING =
3183             (System.getProperty(RRET_PROPERTY_NAME) != null);
3184     }
3185 
3186     /* Max write counter value. */
3187     private static final int LOGWRITE_EXCEPTION_MAX = 100;
3188     /* Current write counter value. */
3189     private int logWriteExceptionCounter = 0;
3190     /* Whether an exception has been thrown. */
3191     private boolean logWriteExceptionThrown = false;
3192     /* Random number generator. */
3193     private Random logWriteExceptionRandom = null;
3194 
generateLogWriteException(RandomAccessFile file, ByteBuffer data, long destOffset, long fileNum)3195     private void generateLogWriteException(RandomAccessFile file,
3196                                            ByteBuffer data,
3197                                            long destOffset,
3198                                            long fileNum)
3199         throws DatabaseException, IOException {
3200 
3201         if (logWriteExceptionThrown) {
3202             (new Exception("Write after LogWriteException")).
3203                 printStackTrace();
3204         }
3205         logWriteExceptionCounter += 1;
3206         if (logWriteExceptionCounter >= LOGWRITE_EXCEPTION_MAX) {
3207             logWriteExceptionCounter = 0;
3208         }
3209         if (logWriteExceptionRandom == null) {
3210             logWriteExceptionRandom = new Random(System.currentTimeMillis());
3211         }
3212         if (logWriteExceptionCounter ==
3213             logWriteExceptionRandom.nextInt(LOGWRITE_EXCEPTION_MAX)) {
3214             int len = logWriteExceptionRandom.nextInt(data.remaining());
3215             if (len > 0) {
3216                 byte[] a = new byte[len];
3217                 data.get(a, 0, len);
3218                 ByteBuffer buf = ByteBuffer.wrap(a);
3219                 writeToFile(file, buf, destOffset, fileNum,
3220                             false /*flushRequired*/);
3221             }
3222             logWriteExceptionThrown = true;
3223             throw new IOException("Randomly generated for testing");
3224         }
3225     }
3226 
3227     /**
3228      * The factory interface for creating RandomAccessFiles.  For production
3229      * use, the default factory is always used and a DefaultRandomAccessFile is
3230      * always created.  For testing, the factory can be overridden to return a
3231      * subclass of DefaultRandomAccessFile that overrides methods and injects
3232      * faults, for example.
3233      */
3234     public interface FileFactory {
3235 
3236         /**
3237          * @param envHome can be used to distinguish environments in a test
3238          * program that opens multiple environments.  Not for production use.
3239          *
3240          * @param fullName the full file name to be passed to the
3241          * RandomAccessFile constructor.
3242          *
3243          * @param mode the file mode to be passed to the RandomAccessFile
3244          * constructor.
3245          */
createFile(File envHome, String fullName, String mode)3246         RandomAccessFile createFile(File envHome, String fullName, String mode)
3247             throws FileNotFoundException;
3248     }
3249 
3250     /**
3251      * The RandomAccessFile for production use.  Tests that override the
3252      * default FileFactory should return a RandomAccessFile that subclasses
3253      * this class to inherit workarounds such as the overridden length method.
3254      */
3255     public static class DefaultRandomAccessFile extends RandomAccessFile {
3256 
DefaultRandomAccessFile(String fullName, String mode)3257         public DefaultRandomAccessFile(String fullName, String mode)
3258             throws FileNotFoundException {
3259 
3260             super(fullName, mode);
3261         }
3262 
3263         /**
3264          * RandomAccessFile.length() is not thread safe and side-effects the
3265          * file pointer if interrupted in the middle.  It is synchronized here
3266          * to work around that problem.
3267          */
3268         @Override
length()3269         public synchronized long length()
3270             throws IOException {
3271 
3272             return super.length();
3273         }
3274     }
3275 
3276     /**
3277      * The factory instance used to create RandomAccessFiles.  This field is
3278      * intentionally public and non-static so it may be set by tests.  See
3279      * FileFactory.
3280      */
3281     public static FileFactory fileFactory = new FileFactory() {
3282 
3283         public RandomAccessFile createFile(File envHome,
3284                                            String fullName,
3285                                            String mode)
3286             throws FileNotFoundException {
3287 
3288             return new DefaultRandomAccessFile(fullName, mode);
3289         }
3290     };
3291 }
3292