1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 2002, 2012 Oracle and/or its affiliates.  All rights reserved.
5  *
6  */
7 
8 package com.sleepycat.je.log;
9 
10 import java.nio.ByteBuffer;
11 import java.util.zip.Checksum;
12 
13 import com.sleepycat.je.log.entry.LogEntry;
14 import com.sleepycat.je.utilint.Adler32;
15 import com.sleepycat.je.utilint.VLSN;
16 
17 /**
18  * A LogEntryHeader embodies the header information at the beginning of each
19  * log entry file.
20  */
21 public class LogEntryHeader {
22 
23     /**
24      * Persistent fields. Layout on disk is
25      * (invariant) checksum - 4 bytes
26      * (invariant) entry type - 1 byte
27      * (invariant) entry flags - 1 byte
28      * (invariant) offset of previous log entry - 4 bytes
29      * (invariant) item size (not counting header size) - 4 bytes
30      * (optional) vlsn - 8 bytes
31      *
32      * Flags:
33      * The provisional bit can be set for any log type in the log. It's an
34      * indication to recovery that the entry shouldn't be processed when
35      * rebuilding the tree. See com.sleepycat.je.log.Provisional.java for
36      * the reasons why it's set.
37      *
38      * The replicated bit is set when this particular log entry is
39      * part of the replication stream and contains a VLSN in the header.
40      *
41      * The invisible bit is set when this log entry has been rolled back as
42      * part of replication syncup. The ensuing log entry has not been
43      * checksum-corrected, and to read it, the invisible bit must be cloaked.
44      *
45      * The VLSN_PRESENT bit is set when a VLSN is present for log version 8+,
46      * and is set when the replicated bit is *not* set in the case of a cleaner
47      * migrated LN.  Prior to version 8, the replicated bit alone indicates
48      * that a VLSN is present.  For all versions, if the replicated bit is set
49      * then a VLSN is always present. [#19476]
50      *
51      *                    first version of        migrated LN
52      *                    a replicated LN
53      *                    ---------------         -----------
54      * log version 7-     replicated = true       replicated = false
55      * (JE 4.1 and        vlsn present = false    vlsn present = false
56      *  earlier)          vlsn exists in header   no vlsn in header
57      *
58      * log version 8+     replicated = true       replicated = false
59      * preserve record    vlsn present = true     vlsn present = false
60      * version = false    vlsn exists in header   no vlsn in header
61      *
62      * log version 8+     replicated = true       replicated = false
63      * preserve record    vlsn present = true     vlsn present = true
64      * version = true     vlsn exists in header   vlsn exists in header
65      */
66 
67     /* The invariant size of the log entry header. */
68     public static final int MIN_HEADER_SIZE = 14;
69 
70     /* Only used for tests and asserts. */
71     public static final int MAX_HEADER_SIZE = MIN_HEADER_SIZE + VLSN.LOG_SIZE;
72 
73     public static final int CHECKSUM_BYTES = 4;
74 
75     static final int ENTRYTYPE_OFFSET = 4;
76     static final int FLAGS_OFFSET = 5;
77     private static final int PREV_OFFSET = 6;
78     private static final int ITEMSIZE_OFFSET = 10;
79     public static final int VLSN_OFFSET = MIN_HEADER_SIZE;
80 
81     /*
82      * Flags defined in the entry header.
83      *
84      * WARNING: Flags may not be defined or used in the entry header of the
85      * FileHeader.  All flags defined here may only be used in log entries
86      * other then the FileHeader. [#16939]
87      */
88     private static final byte PROVISIONAL_ALWAYS_MASK = (byte) 0x80;
89     private static final byte PROVISIONAL_BEFORE_CKPT_END_MASK = (byte) 0x40;
90     private static final byte REPLICATED_MASK = (byte) 0x20;
91     private static final byte INVISIBLE = (byte) 0x10;
92     private static final byte IGNORE_INVISIBLE = ~INVISIBLE;
93     private static final byte VLSN_PRESENT = (byte) 0x08;
94     /* Flags stored in version byte for logVersion 6 and below.*/
95     private static final byte VERSION_6_FLAGS =
96         PROVISIONAL_ALWAYS_MASK |
97         PROVISIONAL_BEFORE_CKPT_END_MASK |
98         REPLICATED_MASK;
99     private static final byte IGNORE_VERSION_6_FLAGS = ~VERSION_6_FLAGS;
100 
101     private static final byte FILE_HEADER_TYPE_NUM =
102         LogEntryType.LOG_FILE_HEADER.getTypeNum();
103 
104     private long checksumVal;   // stored in 4 bytes as an unsigned int
105     private final byte entryType;
106     private long prevOffset;
107     private final int itemSize;
108     private VLSN vlsn;
109 
110     /*
111      * Prior to log version 6, a type-specific version was stored in each
112      * entry, and was packed together with the flags in a single byte.
113      *
114      * For version 6, we changed to use a global version (not type specific),
115      * but it was stored in each entry, packed with the flags as in earlier
116      * versions, as well as being stored redundantly in the FileHeader.  The
117      * entry header and file header versions are always the same for all
118      * entries in a file.  We flip the log file to guarantee this, when running
119      * for the first time with an upgraded JE with a new log version.
120      *
121      * For version 7 and above, the version is stored only in the FileHeader,
122      * freeing the space formerly taken by the version in each entry for use
123      * by flag bits.  The version is not stored in each entry; however, the
124      * version is still maintained in this in-memory object for two reasons:
125      *
126      * 1. When reading log files prior to version 6, each entry potentially has
127      *    a different version.
128      * 2. Convenience of access to the version when processing log entries.
129      *
130      * [#16939]
131      */
132     private int entryVersion;
133 
134     /* Version flag fields */
135     private Provisional provisional;
136     private boolean replicated;
137     private boolean invisible;
138     private boolean vlsnPresent;
139 
140     /**
141      * For reading a log entry.
142      *
143      * @param entryBuffer the buffer containing at least the first
144      * MIN_HEADER_SIZE bytes of the entry header.
145      *
146      * @param logVersion is the log version of the file that contains the given
147      * buffer, and is obtained from the file header.  Note that for the file
148      * header entry itself, UNKNOWN_FILE_HEADER_VERSION may be passed.
149      */
LogEntryHeader(ByteBuffer entryBuffer, int logVersion)150     public LogEntryHeader(ByteBuffer entryBuffer, int logVersion)
151         throws ChecksumException {
152 
153         assert logVersion == LogEntryType.UNKNOWN_FILE_HEADER_VERSION ||
154             (logVersion >= LogEntryType.FIRST_LOG_VERSION &&
155              logVersion <= LogEntryType.LOG_VERSION) : logVersion;
156 
157         checksumVal = LogUtils.readUnsignedInt(entryBuffer);
158         entryType = entryBuffer.get();
159         if (!LogEntryType.isValidType(entryType)) {
160             throw new ChecksumException("Read invalid log entry type: " +
161                                         entryType);
162         }
163 
164         if (entryType == FILE_HEADER_TYPE_NUM) {
165             /* Actual version will be set by setFileHeaderVersion. */
166             entryVersion = LogEntryType.UNKNOWN_FILE_HEADER_VERSION;
167             /* Discard flags byte: none are allowed for the file header. */
168             entryBuffer.get();
169             initFlags(0);
170         } else {
171             if ( logVersion == LogEntryType.UNKNOWN_FILE_HEADER_VERSION ) {
172                 /*
173                  * If we are reading a log header the type should be
174                  * FILE_HEADER_TYPE_NUM.
175                  */
176                 throw new ChecksumException("Read invalid log header entry type: " +
177                         entryType);
178             } else if (logVersion <= 6) {
179                 /* Before version 7, flags and version were packed together. */
180                 entryVersion = entryBuffer.get();
181                 initFlags(entryVersion & VERSION_6_FLAGS);
182                 entryVersion &= IGNORE_VERSION_6_FLAGS;
183                 /* For log version 6, the entry version should always match. */
184                 assert (logVersion == 6) ? (entryVersion == 6) : true;
185             } else {
186                 /* For log version 7+, only flags are stored in the entry. */
187                 entryVersion = logVersion;
188                 initFlags(entryBuffer.get());
189             }
190         }
191         prevOffset = LogUtils.readUnsignedInt(entryBuffer);
192         itemSize = LogUtils.readInt(entryBuffer);
193         if (itemSize < 0) {
194             throw new ChecksumException("Read invalid item size: " + itemSize);
195         }
196     }
197 
198     /**
199      * For writing a log header. public for unit tests.
200      */
LogEntryHeader(LogEntry entry, Provisional provisional, ReplicationContext repContext)201     public LogEntryHeader(LogEntry entry,
202                           Provisional provisional,
203                           ReplicationContext repContext) {
204 
205         LogEntryType logEntryType = entry.getLogType();
206         entryType = logEntryType.getTypeNum();
207         entryVersion = LogEntryType.LOG_VERSION;
208         this.itemSize = entry.getSize();
209         this.provisional = provisional;
210 
211         assert (!((!logEntryType.isReplicationPossible()) &&
212                   repContext.inReplicationStream())) :
213                logEntryType + " should never be replicated.";
214 
215         if (logEntryType.isReplicationPossible()) {
216             this.replicated = repContext.inReplicationStream();
217         } else {
218             this.replicated = false;
219         }
220         invisible = false;
221 
222         /*
223          * If we about to write a new replicated entry, the VLSN will be null
224          * and mustGenerateVLSN will return true.  For a cleaner migrated LN
225          * that was replicated, the VLSN will be non-null and mustGenerateVLSN
226          * will return false.  [#19476]
227          */
228         vlsnPresent = repContext.getClientVLSN() != null ||
229             repContext.mustGenerateVLSN();
230     }
231 
232     /**
233      * For reading a replication message. The node-specific parts of the header
234      * are not needed.
235      */
LogEntryHeader(byte entryType, int entryVersion, int itemSize, VLSN vlsn)236     public LogEntryHeader(byte entryType,
237                           int entryVersion,
238                           int itemSize,
239                           VLSN vlsn) {
240 
241         assert ((vlsn != null) && !vlsn.isNull()) :
242                "vlsn = " + vlsn;
243 
244         this.entryType = entryType;
245         this.entryVersion = entryVersion;
246         this.itemSize = itemSize;
247         this.vlsn = vlsn;
248         replicated = true;
249         vlsnPresent = true;
250         provisional = Provisional.NO;
251     }
252 
initFlags(int entryFlags)253     private void initFlags(int entryFlags) {
254         if ((entryFlags & PROVISIONAL_ALWAYS_MASK) != 0) {
255             provisional = Provisional.YES;
256         } else if ((entryFlags & PROVISIONAL_BEFORE_CKPT_END_MASK) != 0) {
257             provisional = Provisional.BEFORE_CKPT_END;
258         } else {
259             provisional = Provisional.NO;
260         }
261         replicated = ((entryFlags & REPLICATED_MASK) != 0);
262         invisible = ((entryFlags & INVISIBLE) != 0);
263         vlsnPresent = ((entryFlags & VLSN_PRESENT) != 0) || replicated;
264     }
265 
266     /**
267      * Called to set the version for a file header entry after reading the
268      * version from the item data.  See FileHeaderEntry.readEntry.  [#16939]
269      */
setFileHeaderVersion(final int logVersion)270     public void setFileHeaderVersion(final int logVersion) {
271         entryVersion = logVersion;
272     }
273 
getChecksum()274     public long getChecksum() {
275         return checksumVal;
276     }
277 
getType()278     public byte getType() {
279         return entryType;
280     }
281 
getVersion()282     public int getVersion() {
283         return entryVersion;
284     }
285 
getPrevOffset()286     public long getPrevOffset() {
287         return prevOffset;
288     }
289 
getItemSize()290     public int getItemSize() {
291         return itemSize;
292     }
293 
getEntrySize()294     public int getEntrySize() {
295         return getSize() + getItemSize();
296     }
297 
getVLSN()298     public VLSN getVLSN() {
299         return vlsn;
300     }
301 
getReplicated()302     public boolean getReplicated() {
303         return replicated;
304     }
305 
getProvisional()306     public Provisional getProvisional() {
307         return provisional;
308     }
309 
isInvisible()310     public boolean isInvisible() {
311         return invisible;
312     }
313 
getVariablePortionSize()314     public int getVariablePortionSize() {
315         return VLSN.LOG_SIZE;
316     }
317 
318     /**
319      * @return number of bytes used to store this header
320      */
getSize()321     public int getSize() {
322         if (vlsnPresent) {
323             return MIN_HEADER_SIZE + VLSN.LOG_SIZE;
324         }
325         return MIN_HEADER_SIZE;
326     }
327 
328     /**
329      * @return the number of bytes used to store the header, excepting
330      * the checksum field.
331      */
getSizeMinusChecksum()332     int getSizeMinusChecksum() {
333         return getSize()- CHECKSUM_BYTES;
334     }
335 
336     /**
337      * @return the number of bytes used to store the header, excepting
338      * the checksum field.
339      */
getInvariantSizeMinusChecksum()340     int getInvariantSizeMinusChecksum() {
341         return MIN_HEADER_SIZE - CHECKSUM_BYTES;
342     }
343 
344     /**
345      * Assumes this is called directly after the constructor, and that the
346      * entryBuffer is positioned right before the VLSN.
347      */
readVariablePortion(ByteBuffer entryBuffer)348     public void readVariablePortion(ByteBuffer entryBuffer) {
349         if (vlsnPresent) {
350             vlsn = new VLSN();
351             vlsn.readFromLog(entryBuffer, entryVersion);
352         }
353     }
354 
355     /**
356      * Serialize this object into the buffer and leave the buffer positioned in
357      * the right place to write the following item.  The checksum, prevEntry,
358      * and vlsn values will filled in later on.
359      *
360      * public for unit tests.
361      */
writeToLog(ByteBuffer entryBuffer)362     public void writeToLog(ByteBuffer entryBuffer) {
363 
364         /* Skip over the checksumVal, proceed to the entry type. */
365         entryBuffer.position(ENTRYTYPE_OFFSET);
366         entryBuffer.put(entryType);
367 
368         /* Flags */
369         byte flags = 0;
370         if (provisional == Provisional.YES) {
371             flags |= PROVISIONAL_ALWAYS_MASK;
372         } else if (provisional == Provisional.BEFORE_CKPT_END) {
373             flags |= PROVISIONAL_BEFORE_CKPT_END_MASK;
374         }
375         if (replicated) {
376             flags |= REPLICATED_MASK;
377         }
378         if (vlsnPresent) {
379             flags |= VLSN_PRESENT;
380         }
381         entryBuffer.put(flags);
382 
383         /*
384          * Leave room for the prev offset, which must be added under
385          * the log write latch. Proceed to write the item size.
386          */
387         entryBuffer.position(ITEMSIZE_OFFSET);
388         LogUtils.writeInt(entryBuffer, itemSize);
389 
390         /*
391          * Leave room for a VLSN if needed, must also be generated
392          * under the log write latch.
393          */
394         if (vlsnPresent) {
395             entryBuffer.position(entryBuffer.position() + VLSN.LOG_SIZE);
396         }
397     }
398 
399     /**
400      * Add those parts of the header that must be calculated later to the
401      * entryBuffer, and also assign the fields in this class.
402      * That's
403      * - the prev offset, which must be done within the log write latch to
404      *   be sure what that lsn is
405      * - the VLSN, for the same reason
406      * - the checksumVal, which must be added last, after all other
407      *   fields are marshalled.
408      * (public for unit tests)
409      */
addPostMarshallingInfo(ByteBuffer entryBuffer, long lastOffset, VLSN vlsn)410     public ByteBuffer addPostMarshallingInfo(ByteBuffer entryBuffer,
411                                              long lastOffset,
412                                              VLSN vlsn) {
413 
414         /* Add the prev pointer */
415         prevOffset = lastOffset;
416         entryBuffer.position(PREV_OFFSET);
417         LogUtils.writeUnsignedInt(entryBuffer, prevOffset);
418 
419         if (vlsn != null) {
420             this.vlsn = vlsn;
421             entryBuffer.position(VLSN_OFFSET);
422 
423             vlsn.writeToLog(entryBuffer);
424         }
425 
426         /*
427          * Now calculate the checksumVal and write it into the buffer.  Be sure
428          * to set the field in this instance, for use later when printing or
429          * debugging the header.
430          */
431         Checksum checksum = Adler32.makeChecksum();
432         checksum.update(entryBuffer.array(),
433                         entryBuffer.arrayOffset() + CHECKSUM_BYTES,
434                         entryBuffer.limit() - CHECKSUM_BYTES);
435         entryBuffer.position(0);
436         checksumVal = checksum.getValue();
437         LogUtils.writeUnsignedInt(entryBuffer, checksumVal);
438 
439         /* Leave this buffer ready for copying into another buffer. */
440         entryBuffer.position(0);
441 
442         return entryBuffer;
443     }
444 
445     /**
446      * @param sb destination string buffer
447      * @param verbose if true, dump the full, verbose version
448      */
dumpLog(StringBuilder sb, boolean verbose)449     public void dumpLog(StringBuilder sb, boolean verbose) {
450         sb.append("<hdr ");
451         dumpLogNoTag(sb, verbose);
452         sb.append("\"/>");
453     }
454 
455     /**
456      * Dump the header without enclosing <header> tags. Used for
457      * DbPrintLog, to make the header attributes in the <entry> tag, for
458      * a more compact rendering.
459      * @param sb destination string buffer
460      * @param verbose if true, dump the full, verbose version
461      */
dumpLogNoTag(StringBuilder sb, boolean verbose)462     void dumpLogNoTag(StringBuilder sb, boolean verbose) {
463         LogEntryType lastEntryType = LogEntryType.findType(entryType);
464 
465         sb.append("type=\"").append(lastEntryType.toStringNoVersion()).
466             append("/").append(entryVersion);
467         if (provisional != Provisional.NO) {
468             sb.append("\" prov=\"");
469             sb.append(provisional);
470         }
471 
472         if (vlsn != null) {
473             sb.append("\" ");
474             vlsn.dumpLog(sb, verbose);
475         } else {
476             sb.append("\"");
477         }
478 
479         if (getReplicated()) {
480             sb.append(" isReplicated=\"1\"");
481         }
482 
483         if (isInvisible()) {
484             sb.append(" isInvisible=\"1\"");
485         }
486 
487         sb.append(" prev=\"0x").append(Long.toHexString(prevOffset));
488         if (verbose) {
489             sb.append("\" size=\"").append(itemSize);
490             sb.append("\" cksum=\"").append(checksumVal);
491         }
492     }
493 
494     /**
495      * For use in special case where commits are transformed to aborts because
496      * of i/o errors during a logBuffer flush. See [11271].
497      * Assumes that the entryBuffer is positioned at the start of the item.
498      * Return with the entryBuffer positioned to the end of the log entry.
499      */
convertCommitToAbort(ByteBuffer entryBuffer)500     void convertCommitToAbort(ByteBuffer entryBuffer) {
501         assert (entryType == LogEntryType.LOG_TXN_COMMIT.getTypeNum());
502 
503         /* Remember the start of the entry item. */
504         int itemStart = entryBuffer.position();
505 
506         /* Back up to where the type is stored and change the type. */
507         int entryTypePosition =
508             itemStart - (getSize() - ENTRYTYPE_OFFSET);
509         entryBuffer.position(entryTypePosition);
510         entryBuffer.put(LogEntryType.LOG_TXN_ABORT.getTypeNum());
511 
512         /*
513          * Recalculate the checksum. This byte buffer could be large,
514          * so don't just turn the whole buffer into an array to pass
515          * into the checksum object.
516          */
517         Checksum checksum = Adler32.makeChecksum();
518         int checksumSize = itemSize + (getSize() - CHECKSUM_BYTES);
519         checksum.update(entryBuffer.array(),
520                         entryTypePosition + entryBuffer.arrayOffset(),
521                         checksumSize);
522         entryBuffer.position(itemStart - getSize());
523         checksumVal = checksum.getValue();
524         LogUtils.writeUnsignedInt(entryBuffer, checksumVal);
525     }
526 
527     @Override
toString()528     public String toString() {
529         StringBuilder sb = new StringBuilder();
530         dumpLog(sb, true /* verbose */);
531         return sb.toString();
532     }
533 
534     /*
535      * Dump only the parts of the header that apply for replicated entries.
536      */
dumpRep(StringBuilder sb)537     public void dumpRep(StringBuilder sb) {
538 
539         LogEntryType lastEntryType = LogEntryType.findType(entryType);
540 
541         sb.append(lastEntryType.toStringNoVersion()).
542             append("/").append(entryVersion);
543 
544         if (vlsn != null) {
545             sb.append(" vlsn=" ).append(vlsn);
546         } else {
547             sb.append("\"");
548         }
549 
550         if (getReplicated()) {
551             sb.append(" isReplicated=\"1\"");
552         }
553 
554         if (isInvisible()) {
555             sb.append(" isInvisible=\"1\"");
556         }
557     }
558 
559     /**
560      * @return true if two log headers are logically the same. This check will
561      * ignore the log version.
562      *
563      * Used by replication.
564      */
logicalEqualsIgnoreVersion(LogEntryHeader other)565     public boolean logicalEqualsIgnoreVersion(LogEntryHeader other) {
566 
567         /*
568          * Note that item size is not part of the logical equality, because
569          * on-disk compression can make itemSize vary if the entry has VLSNs
570          * that were packed differently.
571          */
572         return ((getVLSN().equals(other.getVLSN())) &&
573                 (getReplicated() == other.getReplicated()) &&
574                 (isInvisible() == other.isInvisible()) &&
575                 (LogEntryType.compareTypeAndVersion(getVersion(), getType(),
576                                                     other.getVersion(),
577                                                     other.getType())));
578     }
579 
580     /**
581      * May be called after reading MIN_HEADER_SIZE bytes to determine
582      * whether more bytes (getVariablePortionSize) should be read.
583      */
isVariableLength()584     public boolean isVariableLength() {
585         /* Currently only entries with VLSNs are variable length. */
586         return vlsnPresent;
587     }
588 
589     /**
590      * Set the invisible bit in the given log entry flags.
591      */
makeInvisible(byte flags)592     static byte makeInvisible(byte flags) {
593         return flags |= INVISIBLE;
594     }
595 
596     /**
597      * Turn off the invisible bit in the byte buffer which backs this log entry
598      * header.
599      * @param logHeaderStartPosition the byte position of the start of the log
600      * entry header.
601      */
turnOffInvisible(ByteBuffer buffer, int logHeaderStartPosition)602     public static void turnOffInvisible(ByteBuffer buffer,
603                                         int logHeaderStartPosition) {
604 
605         int flagsPosition = logHeaderStartPosition + FLAGS_OFFSET;
606         byte flags = buffer.get(flagsPosition);
607         flags &= IGNORE_INVISIBLE;
608         buffer.put(flagsPosition, flags);
609     }
610 }
611