1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2002, 2012 Oracle and/or its affiliates. All rights reserved. 5 * 6 */ 7 8 package com.sleepycat.je.log; 9 10 import java.nio.ByteBuffer; 11 import java.util.zip.Checksum; 12 13 import com.sleepycat.je.log.entry.LogEntry; 14 import com.sleepycat.je.utilint.Adler32; 15 import com.sleepycat.je.utilint.VLSN; 16 17 /** 18 * A LogEntryHeader embodies the header information at the beginning of each 19 * log entry file. 20 */ 21 public class LogEntryHeader { 22 23 /** 24 * Persistent fields. Layout on disk is 25 * (invariant) checksum - 4 bytes 26 * (invariant) entry type - 1 byte 27 * (invariant) entry flags - 1 byte 28 * (invariant) offset of previous log entry - 4 bytes 29 * (invariant) item size (not counting header size) - 4 bytes 30 * (optional) vlsn - 8 bytes 31 * 32 * Flags: 33 * The provisional bit can be set for any log type in the log. It's an 34 * indication to recovery that the entry shouldn't be processed when 35 * rebuilding the tree. See com.sleepycat.je.log.Provisional.java for 36 * the reasons why it's set. 37 * 38 * The replicated bit is set when this particular log entry is 39 * part of the replication stream and contains a VLSN in the header. 40 * 41 * The invisible bit is set when this log entry has been rolled back as 42 * part of replication syncup. The ensuing log entry has not been 43 * checksum-corrected, and to read it, the invisible bit must be cloaked. 44 * 45 * The VLSN_PRESENT bit is set when a VLSN is present for log version 8+, 46 * and is set when the replicated bit is *not* set in the case of a cleaner 47 * migrated LN. Prior to version 8, the replicated bit alone indicates 48 * that a VLSN is present. For all versions, if the replicated bit is set 49 * then a VLSN is always present. [#19476] 50 * 51 * first version of migrated LN 52 * a replicated LN 53 * --------------- ----------- 54 * log version 7- replicated = true replicated = false 55 * (JE 4.1 and vlsn present = false vlsn present = false 56 * earlier) vlsn exists in header no vlsn in header 57 * 58 * log version 8+ replicated = true replicated = false 59 * preserve record vlsn present = true vlsn present = false 60 * version = false vlsn exists in header no vlsn in header 61 * 62 * log version 8+ replicated = true replicated = false 63 * preserve record vlsn present = true vlsn present = true 64 * version = true vlsn exists in header vlsn exists in header 65 */ 66 67 /* The invariant size of the log entry header. */ 68 public static final int MIN_HEADER_SIZE = 14; 69 70 /* Only used for tests and asserts. */ 71 public static final int MAX_HEADER_SIZE = MIN_HEADER_SIZE + VLSN.LOG_SIZE; 72 73 public static final int CHECKSUM_BYTES = 4; 74 75 static final int ENTRYTYPE_OFFSET = 4; 76 static final int FLAGS_OFFSET = 5; 77 private static final int PREV_OFFSET = 6; 78 private static final int ITEMSIZE_OFFSET = 10; 79 public static final int VLSN_OFFSET = MIN_HEADER_SIZE; 80 81 /* 82 * Flags defined in the entry header. 83 * 84 * WARNING: Flags may not be defined or used in the entry header of the 85 * FileHeader. All flags defined here may only be used in log entries 86 * other then the FileHeader. [#16939] 87 */ 88 private static final byte PROVISIONAL_ALWAYS_MASK = (byte) 0x80; 89 private static final byte PROVISIONAL_BEFORE_CKPT_END_MASK = (byte) 0x40; 90 private static final byte REPLICATED_MASK = (byte) 0x20; 91 private static final byte INVISIBLE = (byte) 0x10; 92 private static final byte IGNORE_INVISIBLE = ~INVISIBLE; 93 private static final byte VLSN_PRESENT = (byte) 0x08; 94 /* Flags stored in version byte for logVersion 6 and below.*/ 95 private static final byte VERSION_6_FLAGS = 96 PROVISIONAL_ALWAYS_MASK | 97 PROVISIONAL_BEFORE_CKPT_END_MASK | 98 REPLICATED_MASK; 99 private static final byte IGNORE_VERSION_6_FLAGS = ~VERSION_6_FLAGS; 100 101 private static final byte FILE_HEADER_TYPE_NUM = 102 LogEntryType.LOG_FILE_HEADER.getTypeNum(); 103 104 private long checksumVal; // stored in 4 bytes as an unsigned int 105 private final byte entryType; 106 private long prevOffset; 107 private final int itemSize; 108 private VLSN vlsn; 109 110 /* 111 * Prior to log version 6, a type-specific version was stored in each 112 * entry, and was packed together with the flags in a single byte. 113 * 114 * For version 6, we changed to use a global version (not type specific), 115 * but it was stored in each entry, packed with the flags as in earlier 116 * versions, as well as being stored redundantly in the FileHeader. The 117 * entry header and file header versions are always the same for all 118 * entries in a file. We flip the log file to guarantee this, when running 119 * for the first time with an upgraded JE with a new log version. 120 * 121 * For version 7 and above, the version is stored only in the FileHeader, 122 * freeing the space formerly taken by the version in each entry for use 123 * by flag bits. The version is not stored in each entry; however, the 124 * version is still maintained in this in-memory object for two reasons: 125 * 126 * 1. When reading log files prior to version 6, each entry potentially has 127 * a different version. 128 * 2. Convenience of access to the version when processing log entries. 129 * 130 * [#16939] 131 */ 132 private int entryVersion; 133 134 /* Version flag fields */ 135 private Provisional provisional; 136 private boolean replicated; 137 private boolean invisible; 138 private boolean vlsnPresent; 139 140 /** 141 * For reading a log entry. 142 * 143 * @param entryBuffer the buffer containing at least the first 144 * MIN_HEADER_SIZE bytes of the entry header. 145 * 146 * @param logVersion is the log version of the file that contains the given 147 * buffer, and is obtained from the file header. Note that for the file 148 * header entry itself, UNKNOWN_FILE_HEADER_VERSION may be passed. 149 */ LogEntryHeader(ByteBuffer entryBuffer, int logVersion)150 public LogEntryHeader(ByteBuffer entryBuffer, int logVersion) 151 throws ChecksumException { 152 153 assert logVersion == LogEntryType.UNKNOWN_FILE_HEADER_VERSION || 154 (logVersion >= LogEntryType.FIRST_LOG_VERSION && 155 logVersion <= LogEntryType.LOG_VERSION) : logVersion; 156 157 checksumVal = LogUtils.readUnsignedInt(entryBuffer); 158 entryType = entryBuffer.get(); 159 if (!LogEntryType.isValidType(entryType)) { 160 throw new ChecksumException("Read invalid log entry type: " + 161 entryType); 162 } 163 164 if (entryType == FILE_HEADER_TYPE_NUM) { 165 /* Actual version will be set by setFileHeaderVersion. */ 166 entryVersion = LogEntryType.UNKNOWN_FILE_HEADER_VERSION; 167 /* Discard flags byte: none are allowed for the file header. */ 168 entryBuffer.get(); 169 initFlags(0); 170 } else { 171 if ( logVersion == LogEntryType.UNKNOWN_FILE_HEADER_VERSION ) { 172 /* 173 * If we are reading a log header the type should be 174 * FILE_HEADER_TYPE_NUM. 175 */ 176 throw new ChecksumException("Read invalid log header entry type: " + 177 entryType); 178 } else if (logVersion <= 6) { 179 /* Before version 7, flags and version were packed together. */ 180 entryVersion = entryBuffer.get(); 181 initFlags(entryVersion & VERSION_6_FLAGS); 182 entryVersion &= IGNORE_VERSION_6_FLAGS; 183 /* For log version 6, the entry version should always match. */ 184 assert (logVersion == 6) ? (entryVersion == 6) : true; 185 } else { 186 /* For log version 7+, only flags are stored in the entry. */ 187 entryVersion = logVersion; 188 initFlags(entryBuffer.get()); 189 } 190 } 191 prevOffset = LogUtils.readUnsignedInt(entryBuffer); 192 itemSize = LogUtils.readInt(entryBuffer); 193 if (itemSize < 0) { 194 throw new ChecksumException("Read invalid item size: " + itemSize); 195 } 196 } 197 198 /** 199 * For writing a log header. public for unit tests. 200 */ LogEntryHeader(LogEntry entry, Provisional provisional, ReplicationContext repContext)201 public LogEntryHeader(LogEntry entry, 202 Provisional provisional, 203 ReplicationContext repContext) { 204 205 LogEntryType logEntryType = entry.getLogType(); 206 entryType = logEntryType.getTypeNum(); 207 entryVersion = LogEntryType.LOG_VERSION; 208 this.itemSize = entry.getSize(); 209 this.provisional = provisional; 210 211 assert (!((!logEntryType.isReplicationPossible()) && 212 repContext.inReplicationStream())) : 213 logEntryType + " should never be replicated."; 214 215 if (logEntryType.isReplicationPossible()) { 216 this.replicated = repContext.inReplicationStream(); 217 } else { 218 this.replicated = false; 219 } 220 invisible = false; 221 222 /* 223 * If we about to write a new replicated entry, the VLSN will be null 224 * and mustGenerateVLSN will return true. For a cleaner migrated LN 225 * that was replicated, the VLSN will be non-null and mustGenerateVLSN 226 * will return false. [#19476] 227 */ 228 vlsnPresent = repContext.getClientVLSN() != null || 229 repContext.mustGenerateVLSN(); 230 } 231 232 /** 233 * For reading a replication message. The node-specific parts of the header 234 * are not needed. 235 */ LogEntryHeader(byte entryType, int entryVersion, int itemSize, VLSN vlsn)236 public LogEntryHeader(byte entryType, 237 int entryVersion, 238 int itemSize, 239 VLSN vlsn) { 240 241 assert ((vlsn != null) && !vlsn.isNull()) : 242 "vlsn = " + vlsn; 243 244 this.entryType = entryType; 245 this.entryVersion = entryVersion; 246 this.itemSize = itemSize; 247 this.vlsn = vlsn; 248 replicated = true; 249 vlsnPresent = true; 250 provisional = Provisional.NO; 251 } 252 initFlags(int entryFlags)253 private void initFlags(int entryFlags) { 254 if ((entryFlags & PROVISIONAL_ALWAYS_MASK) != 0) { 255 provisional = Provisional.YES; 256 } else if ((entryFlags & PROVISIONAL_BEFORE_CKPT_END_MASK) != 0) { 257 provisional = Provisional.BEFORE_CKPT_END; 258 } else { 259 provisional = Provisional.NO; 260 } 261 replicated = ((entryFlags & REPLICATED_MASK) != 0); 262 invisible = ((entryFlags & INVISIBLE) != 0); 263 vlsnPresent = ((entryFlags & VLSN_PRESENT) != 0) || replicated; 264 } 265 266 /** 267 * Called to set the version for a file header entry after reading the 268 * version from the item data. See FileHeaderEntry.readEntry. [#16939] 269 */ setFileHeaderVersion(final int logVersion)270 public void setFileHeaderVersion(final int logVersion) { 271 entryVersion = logVersion; 272 } 273 getChecksum()274 public long getChecksum() { 275 return checksumVal; 276 } 277 getType()278 public byte getType() { 279 return entryType; 280 } 281 getVersion()282 public int getVersion() { 283 return entryVersion; 284 } 285 getPrevOffset()286 public long getPrevOffset() { 287 return prevOffset; 288 } 289 getItemSize()290 public int getItemSize() { 291 return itemSize; 292 } 293 getEntrySize()294 public int getEntrySize() { 295 return getSize() + getItemSize(); 296 } 297 getVLSN()298 public VLSN getVLSN() { 299 return vlsn; 300 } 301 getReplicated()302 public boolean getReplicated() { 303 return replicated; 304 } 305 getProvisional()306 public Provisional getProvisional() { 307 return provisional; 308 } 309 isInvisible()310 public boolean isInvisible() { 311 return invisible; 312 } 313 getVariablePortionSize()314 public int getVariablePortionSize() { 315 return VLSN.LOG_SIZE; 316 } 317 318 /** 319 * @return number of bytes used to store this header 320 */ getSize()321 public int getSize() { 322 if (vlsnPresent) { 323 return MIN_HEADER_SIZE + VLSN.LOG_SIZE; 324 } 325 return MIN_HEADER_SIZE; 326 } 327 328 /** 329 * @return the number of bytes used to store the header, excepting 330 * the checksum field. 331 */ getSizeMinusChecksum()332 int getSizeMinusChecksum() { 333 return getSize()- CHECKSUM_BYTES; 334 } 335 336 /** 337 * @return the number of bytes used to store the header, excepting 338 * the checksum field. 339 */ getInvariantSizeMinusChecksum()340 int getInvariantSizeMinusChecksum() { 341 return MIN_HEADER_SIZE - CHECKSUM_BYTES; 342 } 343 344 /** 345 * Assumes this is called directly after the constructor, and that the 346 * entryBuffer is positioned right before the VLSN. 347 */ readVariablePortion(ByteBuffer entryBuffer)348 public void readVariablePortion(ByteBuffer entryBuffer) { 349 if (vlsnPresent) { 350 vlsn = new VLSN(); 351 vlsn.readFromLog(entryBuffer, entryVersion); 352 } 353 } 354 355 /** 356 * Serialize this object into the buffer and leave the buffer positioned in 357 * the right place to write the following item. The checksum, prevEntry, 358 * and vlsn values will filled in later on. 359 * 360 * public for unit tests. 361 */ writeToLog(ByteBuffer entryBuffer)362 public void writeToLog(ByteBuffer entryBuffer) { 363 364 /* Skip over the checksumVal, proceed to the entry type. */ 365 entryBuffer.position(ENTRYTYPE_OFFSET); 366 entryBuffer.put(entryType); 367 368 /* Flags */ 369 byte flags = 0; 370 if (provisional == Provisional.YES) { 371 flags |= PROVISIONAL_ALWAYS_MASK; 372 } else if (provisional == Provisional.BEFORE_CKPT_END) { 373 flags |= PROVISIONAL_BEFORE_CKPT_END_MASK; 374 } 375 if (replicated) { 376 flags |= REPLICATED_MASK; 377 } 378 if (vlsnPresent) { 379 flags |= VLSN_PRESENT; 380 } 381 entryBuffer.put(flags); 382 383 /* 384 * Leave room for the prev offset, which must be added under 385 * the log write latch. Proceed to write the item size. 386 */ 387 entryBuffer.position(ITEMSIZE_OFFSET); 388 LogUtils.writeInt(entryBuffer, itemSize); 389 390 /* 391 * Leave room for a VLSN if needed, must also be generated 392 * under the log write latch. 393 */ 394 if (vlsnPresent) { 395 entryBuffer.position(entryBuffer.position() + VLSN.LOG_SIZE); 396 } 397 } 398 399 /** 400 * Add those parts of the header that must be calculated later to the 401 * entryBuffer, and also assign the fields in this class. 402 * That's 403 * - the prev offset, which must be done within the log write latch to 404 * be sure what that lsn is 405 * - the VLSN, for the same reason 406 * - the checksumVal, which must be added last, after all other 407 * fields are marshalled. 408 * (public for unit tests) 409 */ addPostMarshallingInfo(ByteBuffer entryBuffer, long lastOffset, VLSN vlsn)410 public ByteBuffer addPostMarshallingInfo(ByteBuffer entryBuffer, 411 long lastOffset, 412 VLSN vlsn) { 413 414 /* Add the prev pointer */ 415 prevOffset = lastOffset; 416 entryBuffer.position(PREV_OFFSET); 417 LogUtils.writeUnsignedInt(entryBuffer, prevOffset); 418 419 if (vlsn != null) { 420 this.vlsn = vlsn; 421 entryBuffer.position(VLSN_OFFSET); 422 423 vlsn.writeToLog(entryBuffer); 424 } 425 426 /* 427 * Now calculate the checksumVal and write it into the buffer. Be sure 428 * to set the field in this instance, for use later when printing or 429 * debugging the header. 430 */ 431 Checksum checksum = Adler32.makeChecksum(); 432 checksum.update(entryBuffer.array(), 433 entryBuffer.arrayOffset() + CHECKSUM_BYTES, 434 entryBuffer.limit() - CHECKSUM_BYTES); 435 entryBuffer.position(0); 436 checksumVal = checksum.getValue(); 437 LogUtils.writeUnsignedInt(entryBuffer, checksumVal); 438 439 /* Leave this buffer ready for copying into another buffer. */ 440 entryBuffer.position(0); 441 442 return entryBuffer; 443 } 444 445 /** 446 * @param sb destination string buffer 447 * @param verbose if true, dump the full, verbose version 448 */ dumpLog(StringBuilder sb, boolean verbose)449 public void dumpLog(StringBuilder sb, boolean verbose) { 450 sb.append("<hdr "); 451 dumpLogNoTag(sb, verbose); 452 sb.append("\"/>"); 453 } 454 455 /** 456 * Dump the header without enclosing <header> tags. Used for 457 * DbPrintLog, to make the header attributes in the <entry> tag, for 458 * a more compact rendering. 459 * @param sb destination string buffer 460 * @param verbose if true, dump the full, verbose version 461 */ dumpLogNoTag(StringBuilder sb, boolean verbose)462 void dumpLogNoTag(StringBuilder sb, boolean verbose) { 463 LogEntryType lastEntryType = LogEntryType.findType(entryType); 464 465 sb.append("type=\"").append(lastEntryType.toStringNoVersion()). 466 append("/").append(entryVersion); 467 if (provisional != Provisional.NO) { 468 sb.append("\" prov=\""); 469 sb.append(provisional); 470 } 471 472 if (vlsn != null) { 473 sb.append("\" "); 474 vlsn.dumpLog(sb, verbose); 475 } else { 476 sb.append("\""); 477 } 478 479 if (getReplicated()) { 480 sb.append(" isReplicated=\"1\""); 481 } 482 483 if (isInvisible()) { 484 sb.append(" isInvisible=\"1\""); 485 } 486 487 sb.append(" prev=\"0x").append(Long.toHexString(prevOffset)); 488 if (verbose) { 489 sb.append("\" size=\"").append(itemSize); 490 sb.append("\" cksum=\"").append(checksumVal); 491 } 492 } 493 494 /** 495 * For use in special case where commits are transformed to aborts because 496 * of i/o errors during a logBuffer flush. See [11271]. 497 * Assumes that the entryBuffer is positioned at the start of the item. 498 * Return with the entryBuffer positioned to the end of the log entry. 499 */ convertCommitToAbort(ByteBuffer entryBuffer)500 void convertCommitToAbort(ByteBuffer entryBuffer) { 501 assert (entryType == LogEntryType.LOG_TXN_COMMIT.getTypeNum()); 502 503 /* Remember the start of the entry item. */ 504 int itemStart = entryBuffer.position(); 505 506 /* Back up to where the type is stored and change the type. */ 507 int entryTypePosition = 508 itemStart - (getSize() - ENTRYTYPE_OFFSET); 509 entryBuffer.position(entryTypePosition); 510 entryBuffer.put(LogEntryType.LOG_TXN_ABORT.getTypeNum()); 511 512 /* 513 * Recalculate the checksum. This byte buffer could be large, 514 * so don't just turn the whole buffer into an array to pass 515 * into the checksum object. 516 */ 517 Checksum checksum = Adler32.makeChecksum(); 518 int checksumSize = itemSize + (getSize() - CHECKSUM_BYTES); 519 checksum.update(entryBuffer.array(), 520 entryTypePosition + entryBuffer.arrayOffset(), 521 checksumSize); 522 entryBuffer.position(itemStart - getSize()); 523 checksumVal = checksum.getValue(); 524 LogUtils.writeUnsignedInt(entryBuffer, checksumVal); 525 } 526 527 @Override toString()528 public String toString() { 529 StringBuilder sb = new StringBuilder(); 530 dumpLog(sb, true /* verbose */); 531 return sb.toString(); 532 } 533 534 /* 535 * Dump only the parts of the header that apply for replicated entries. 536 */ dumpRep(StringBuilder sb)537 public void dumpRep(StringBuilder sb) { 538 539 LogEntryType lastEntryType = LogEntryType.findType(entryType); 540 541 sb.append(lastEntryType.toStringNoVersion()). 542 append("/").append(entryVersion); 543 544 if (vlsn != null) { 545 sb.append(" vlsn=" ).append(vlsn); 546 } else { 547 sb.append("\""); 548 } 549 550 if (getReplicated()) { 551 sb.append(" isReplicated=\"1\""); 552 } 553 554 if (isInvisible()) { 555 sb.append(" isInvisible=\"1\""); 556 } 557 } 558 559 /** 560 * @return true if two log headers are logically the same. This check will 561 * ignore the log version. 562 * 563 * Used by replication. 564 */ logicalEqualsIgnoreVersion(LogEntryHeader other)565 public boolean logicalEqualsIgnoreVersion(LogEntryHeader other) { 566 567 /* 568 * Note that item size is not part of the logical equality, because 569 * on-disk compression can make itemSize vary if the entry has VLSNs 570 * that were packed differently. 571 */ 572 return ((getVLSN().equals(other.getVLSN())) && 573 (getReplicated() == other.getReplicated()) && 574 (isInvisible() == other.isInvisible()) && 575 (LogEntryType.compareTypeAndVersion(getVersion(), getType(), 576 other.getVersion(), 577 other.getType()))); 578 } 579 580 /** 581 * May be called after reading MIN_HEADER_SIZE bytes to determine 582 * whether more bytes (getVariablePortionSize) should be read. 583 */ isVariableLength()584 public boolean isVariableLength() { 585 /* Currently only entries with VLSNs are variable length. */ 586 return vlsnPresent; 587 } 588 589 /** 590 * Set the invisible bit in the given log entry flags. 591 */ makeInvisible(byte flags)592 static byte makeInvisible(byte flags) { 593 return flags |= INVISIBLE; 594 } 595 596 /** 597 * Turn off the invisible bit in the byte buffer which backs this log entry 598 * header. 599 * @param logHeaderStartPosition the byte position of the start of the log 600 * entry header. 601 */ turnOffInvisible(ByteBuffer buffer, int logHeaderStartPosition)602 public static void turnOffInvisible(ByteBuffer buffer, 603 int logHeaderStartPosition) { 604 605 int flagsPosition = logHeaderStartPosition + FLAGS_OFFSET; 606 byte flags = buffer.get(flagsPosition); 607 flags &= IGNORE_INVISIBLE; 608 buffer.put(flagsPosition, flags); 609 } 610 } 611