1 /* 2 ** 2011-08-18 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** Internal structure definitions for the LSM module. 13 */ 14 #ifndef _LSM_INT_H 15 #define _LSM_INT_H 16 17 #include "lsm.h" 18 #include <assert.h> 19 #include <string.h> 20 21 #include <stdarg.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 #include <ctype.h> 25 26 #ifdef _WIN32 27 # ifdef _MSC_VER 28 # define snprintf _snprintf 29 # endif 30 #else 31 # include <unistd.h> 32 #endif 33 34 #ifdef NDEBUG 35 # ifdef LSM_DEBUG_EXPENSIVE 36 # undef LSM_DEBUG_EXPENSIVE 37 # endif 38 # ifdef LSM_DEBUG 39 # undef LSM_DEBUG 40 # endif 41 #else 42 # ifndef LSM_DEBUG 43 # define LSM_DEBUG 44 # endif 45 #endif 46 47 /* 48 ** Default values for various data structure parameters. These may be 49 ** overridden by calls to lsm_config(). 50 */ 51 #define LSM_DFLT_PAGE_SIZE (4 * 1024) 52 #define LSM_DFLT_BLOCK_SIZE (1 * 1024 * 1024) 53 #define LSM_DFLT_AUTOFLUSH (1 * 1024 * 1024) 54 #define LSM_DFLT_AUTOCHECKPOINT (i64)(2 * 1024 * 1024) 55 #define LSM_DFLT_AUTOWORK 1 56 #define LSM_DFLT_LOG_SIZE (128*1024) 57 #define LSM_DFLT_AUTOMERGE 4 58 #define LSM_DFLT_SAFETY LSM_SAFETY_NORMAL 59 #define LSM_DFLT_MMAP (LSM_IS_64_BIT ? 1 : 32768) 60 #define LSM_DFLT_MULTIPLE_PROCESSES 1 61 #define LSM_DFLT_USE_LOG 1 62 63 /* Initial values for log file checksums. These are only used if the 64 ** database file does not contain a valid checkpoint. */ 65 #define LSM_CKSUM0_INIT 42 66 #define LSM_CKSUM1_INIT 42 67 68 /* "mmap" mode is currently only used in environments with 64-bit address 69 ** spaces. The following macro is used to test for this. */ 70 #define LSM_IS_64_BIT (sizeof(void*)==8) 71 72 #define LSM_AUTOWORK_QUANT 32 73 74 typedef struct Database Database; 75 typedef struct DbLog DbLog; 76 typedef struct FileSystem FileSystem; 77 typedef struct Freelist Freelist; 78 typedef struct FreelistEntry FreelistEntry; 79 typedef struct Level Level; 80 typedef struct LogMark LogMark; 81 typedef struct LogRegion LogRegion; 82 typedef struct LogWriter LogWriter; 83 typedef struct LsmString LsmString; 84 typedef struct Mempool Mempool; 85 typedef struct Merge Merge; 86 typedef struct MergeInput MergeInput; 87 typedef struct MetaPage MetaPage; 88 typedef struct MultiCursor MultiCursor; 89 typedef struct Page Page; 90 typedef struct Redirect Redirect; 91 typedef struct Segment Segment; 92 typedef struct SegmentMerger SegmentMerger; 93 typedef struct ShmChunk ShmChunk; 94 typedef struct ShmHeader ShmHeader; 95 typedef struct ShmReader ShmReader; 96 typedef struct Snapshot Snapshot; 97 typedef struct TransMark TransMark; 98 typedef struct Tree Tree; 99 typedef struct TreeCursor TreeCursor; 100 typedef struct TreeHeader TreeHeader; 101 typedef struct TreeMark TreeMark; 102 typedef struct TreeRoot TreeRoot; 103 104 #ifndef _SQLITEINT_H_ 105 typedef unsigned char u8; 106 typedef unsigned short int u16; 107 typedef unsigned int u32; 108 typedef lsm_i64 i64; 109 typedef unsigned long long int u64; 110 #endif 111 112 /* A page number is a 64-bit integer. */ 113 typedef i64 Pgno; 114 115 #ifdef LSM_DEBUG 116 int lsmErrorBkpt(int); 117 #else 118 # define lsmErrorBkpt(x) (x) 119 #endif 120 121 #define LSM_PROTOCOL_BKPT lsmErrorBkpt(LSM_PROTOCOL) 122 #define LSM_IOERR_BKPT lsmErrorBkpt(LSM_IOERR) 123 #define LSM_NOMEM_BKPT lsmErrorBkpt(LSM_NOMEM) 124 #define LSM_CORRUPT_BKPT lsmErrorBkpt(LSM_CORRUPT) 125 #define LSM_MISUSE_BKPT lsmErrorBkpt(LSM_MISUSE) 126 127 #define unused_parameter(x) (void)(x) 128 #define array_size(x) (sizeof(x)/sizeof(x[0])) 129 130 131 /* The size of each shared-memory chunk */ 132 #define LSM_SHM_CHUNK_SIZE (32*1024) 133 134 /* The number of bytes reserved at the start of each shm chunk for MM. */ 135 #define LSM_SHM_CHUNK_HDR (sizeof(ShmChunk)) 136 137 /* The number of available read locks. */ 138 #define LSM_LOCK_NREADER 6 139 140 /* The number of available read-write client locks. */ 141 #define LSM_LOCK_NRWCLIENT 16 142 143 /* Lock definitions. 144 */ 145 #define LSM_LOCK_DMS1 1 /* Serialize connect/disconnect ops */ 146 #define LSM_LOCK_DMS2 2 /* Read-write connections */ 147 #define LSM_LOCK_DMS3 3 /* Read-only connections */ 148 #define LSM_LOCK_WRITER 4 149 #define LSM_LOCK_WORKER 5 150 #define LSM_LOCK_CHECKPOINTER 6 151 #define LSM_LOCK_ROTRANS 7 152 #define LSM_LOCK_READER(i) ((i) + LSM_LOCK_ROTRANS + 1) 153 #define LSM_LOCK_RWCLIENT(i) ((i) + LSM_LOCK_READER(LSM_LOCK_NREADER)) 154 155 #define LSM_N_LOCK LSM_LOCK_RWCLIENT(LSM_LOCK_NRWCLIENT) 156 157 /* 158 ** Meta-page size and usable size. 159 */ 160 #define LSM_META_PAGE_SIZE 4096 161 162 #define LSM_META_RW_PAGE_SIZE (LSM_META_PAGE_SIZE - LSM_N_LOCK) 163 164 /* 165 ** Hard limit on the number of free-list entries that may be stored in 166 ** a checkpoint (the remainder are stored as a system record in the LSM). 167 ** See also LSM_CONFIG_MAX_FREELIST. 168 */ 169 #define LSM_MAX_FREELIST_ENTRIES 24 170 171 #define LSM_MAX_BLOCK_REDIRECTS 16 172 173 #define LSM_ATTEMPTS_BEFORE_PROTOCOL 10000 174 175 176 /* 177 ** Each entry stored in the LSM (or in-memory tree structure) has an 178 ** associated mask of the following flags. 179 */ 180 #define LSM_START_DELETE 0x01 /* Start of open-ended delete range */ 181 #define LSM_END_DELETE 0x02 /* End of open-ended delete range */ 182 #define LSM_POINT_DELETE 0x04 /* Delete this key */ 183 #define LSM_INSERT 0x08 /* Insert this key and value */ 184 #define LSM_SEPARATOR 0x10 /* True if entry is separator key only */ 185 #define LSM_SYSTEMKEY 0x20 /* True if entry is a system key (FREELIST) */ 186 187 #define LSM_CONTIGUOUS 0x40 /* Used in lsm_tree.c */ 188 189 /* 190 ** A string that can grow by appending. 191 */ 192 struct LsmString { 193 lsm_env *pEnv; /* Run-time environment */ 194 int n; /* Size of string. -1 indicates error */ 195 int nAlloc; /* Space allocated for z[] */ 196 char *z; /* The string content */ 197 }; 198 199 typedef struct LsmFile LsmFile; 200 struct LsmFile { 201 lsm_file *pFile; 202 LsmFile *pNext; 203 }; 204 205 /* 206 ** An instance of the following type is used to store an ordered list of 207 ** u32 values. 208 ** 209 ** Note: This is a place-holder implementation. It should be replaced by 210 ** a version that avoids making a single large allocation when the array 211 ** contains a large number of values. For this reason, the internals of 212 ** this object should only manipulated by the intArrayXXX() functions in 213 ** lsm_tree.c. 214 */ 215 typedef struct IntArray IntArray; 216 struct IntArray { 217 int nAlloc; 218 int nArray; 219 u32 *aArray; 220 }; 221 222 struct Redirect { 223 int n; /* Number of redirects */ 224 struct RedirectEntry { 225 int iFrom; 226 int iTo; 227 } *a; 228 }; 229 230 /* 231 ** An instance of this structure represents a point in the history of the 232 ** tree structure to roll back to. Refer to comments in lsm_tree.c for 233 ** details. 234 */ 235 struct TreeMark { 236 u32 iRoot; /* Offset of root node in shm file */ 237 u32 nHeight; /* Current height of tree structure */ 238 u32 iWrite; /* Write offset in shm file */ 239 u32 nChunk; /* Number of chunks in shared-memory file */ 240 u32 iFirst; /* First chunk in linked list */ 241 u32 iNextShmid; /* Next id to allocate */ 242 int iRollback; /* Index in lsm->rollback to revert to */ 243 }; 244 245 /* 246 ** An instance of this structure represents a point in the database log. 247 */ 248 struct LogMark { 249 i64 iOff; /* Offset into log (see lsm_log.c) */ 250 int nBuf; /* Size of in-memory buffer here */ 251 u8 aBuf[8]; /* Bytes of content in aBuf[] */ 252 u32 cksum0; /* Checksum 0 at offset (iOff-nBuf) */ 253 u32 cksum1; /* Checksum 1 at offset (iOff-nBuf) */ 254 }; 255 256 struct TransMark { 257 TreeMark tree; 258 LogMark log; 259 }; 260 261 /* 262 ** A structure that defines the start and end offsets of a region in the 263 ** log file. The size of the region in bytes is (iEnd - iStart), so if 264 ** iEnd==iStart the region is zero bytes in size. 265 */ 266 struct LogRegion { 267 i64 iStart; /* Start of region in log file */ 268 i64 iEnd; /* End of region in log file */ 269 }; 270 271 struct DbLog { 272 u32 cksum0; /* Checksum 0 at offset iOff */ 273 u32 cksum1; /* Checksum 1 at offset iOff */ 274 i64 iSnapshotId; /* Log space has been reclaimed to this ss */ 275 LogRegion aRegion[3]; /* Log file regions (see docs in lsm_log.c) */ 276 }; 277 278 struct TreeRoot { 279 u32 iRoot; 280 u32 nHeight; 281 u32 nByte; /* Total size of this tree in bytes */ 282 u32 iTransId; 283 }; 284 285 /* 286 ** Tree header structure. 287 */ 288 struct TreeHeader { 289 u32 iUsedShmid; /* Id of first shm chunk used by this tree */ 290 u32 iNextShmid; /* Shm-id of next chunk allocated */ 291 u32 iFirst; /* Chunk number of smallest shm-id */ 292 u32 nChunk; /* Number of chunks in shared-memory file */ 293 TreeRoot root; /* Root and height of current tree */ 294 u32 iWrite; /* Write offset in shm file */ 295 TreeRoot oldroot; /* Root and height of the previous tree */ 296 u32 iOldShmid; /* Last shm-id used by previous tree */ 297 u32 iUsrVersion; /* get/set_user_version() value */ 298 i64 iOldLog; /* Log offset associated with old tree */ 299 u32 oldcksum0; 300 u32 oldcksum1; 301 DbLog log; /* Current layout of log file */ 302 u32 aCksum[2]; /* Checksums 1 and 2. */ 303 }; 304 305 /* 306 ** Database handle structure. 307 ** 308 ** mLock: 309 ** A bitmask representing the locks currently held by the connection. 310 ** An LSM database supports N distinct locks, where N is some number less 311 ** than or equal to 32. Locks are numbered starting from 1 (see the 312 ** definitions for LSM_LOCK_WRITER and co.). 313 ** 314 ** The least significant 32-bits in mLock represent EXCLUSIVE locks. The 315 ** most significant are SHARED locks. So, if a connection holds a SHARED 316 ** lock on lock region iLock, then the following is true: 317 ** 318 ** (mLock & ((iLock+32-1) << 1)) 319 ** 320 ** Or for an EXCLUSIVE lock: 321 ** 322 ** (mLock & ((iLock-1) << 1)) 323 ** 324 ** pCsr: 325 ** Points to the head of a linked list that contains all currently open 326 ** cursors. Once this list becomes empty, the user has no outstanding 327 ** cursors and the database handle can be successfully closed. 328 ** 329 ** pCsrCache: 330 ** This list contains cursor objects that have been closed using 331 ** lsm_csr_close(). Each time a cursor is closed, it is shifted from 332 ** the pCsr list to this list. When a new cursor is opened, this list 333 ** is inspected to see if there exists a cursor object that can be 334 ** reused. This is an optimization only. 335 */ 336 struct lsm_db { 337 338 /* Database handle configuration */ 339 lsm_env *pEnv; /* runtime environment */ 340 int (*xCmp)(void *, int, void *, int); /* Compare function */ 341 342 /* Values configured by calls to lsm_config */ 343 int eSafety; /* LSM_SAFETY_OFF, NORMAL or FULL */ 344 int bAutowork; /* Configured by LSM_CONFIG_AUTOWORK */ 345 int nTreeLimit; /* Configured by LSM_CONFIG_AUTOFLUSH */ 346 int nMerge; /* Configured by LSM_CONFIG_AUTOMERGE */ 347 int bUseLog; /* Configured by LSM_CONFIG_USE_LOG */ 348 int nDfltPgsz; /* Configured by LSM_CONFIG_PAGE_SIZE */ 349 int nDfltBlksz; /* Configured by LSM_CONFIG_BLOCK_SIZE */ 350 int nMaxFreelist; /* Configured by LSM_CONFIG_MAX_FREELIST */ 351 int iMmap; /* Configured by LSM_CONFIG_MMAP */ 352 i64 nAutockpt; /* Configured by LSM_CONFIG_AUTOCHECKPOINT */ 353 int bMultiProc; /* Configured by L_C_MULTIPLE_PROCESSES */ 354 int bReadonly; /* Configured by LSM_CONFIG_READONLY */ 355 lsm_compress compress; /* Compression callbacks */ 356 lsm_compress_factory factory; /* Compression callback factory */ 357 358 /* Sub-system handles */ 359 FileSystem *pFS; /* On-disk portion of database */ 360 Database *pDatabase; /* Database shared data */ 361 362 int iRwclient; /* Read-write client lock held (-1 == none) */ 363 364 /* Client transaction context */ 365 Snapshot *pClient; /* Client snapshot */ 366 int iReader; /* Read lock held (-1 == unlocked) */ 367 int bRoTrans; /* True if a read-only db trans is open */ 368 MultiCursor *pCsr; /* List of all open cursors */ 369 LogWriter *pLogWriter; /* Context for writing to the log file */ 370 int nTransOpen; /* Number of opened write transactions */ 371 int nTransAlloc; /* Allocated size of aTrans[] array */ 372 TransMark *aTrans; /* Array of marks for transaction rollback */ 373 IntArray rollback; /* List of tree-nodes to roll back */ 374 int bDiscardOld; /* True if lsmTreeDiscardOld() was called */ 375 376 MultiCursor *pCsrCache; /* List of all closed cursors */ 377 378 /* Worker context */ 379 Snapshot *pWorker; /* Worker snapshot (or NULL) */ 380 Freelist *pFreelist; /* See sortedNewToplevel() */ 381 int bUseFreelist; /* True to use pFreelist */ 382 int bIncrMerge; /* True if currently doing a merge */ 383 384 int bInFactory; /* True if within factory.xFactory() */ 385 386 /* Debugging message callback */ 387 void (*xLog)(void *, int, const char *); 388 void *pLogCtx; 389 390 /* Work done notification callback */ 391 void (*xWork)(lsm_db *, void *); 392 void *pWorkCtx; 393 394 u64 mLock; /* Mask of current locks. See lsmShmLock(). */ 395 lsm_db *pNext; /* Next connection to same database */ 396 397 int nShm; /* Size of apShm[] array */ 398 void **apShm; /* Shared memory chunks */ 399 ShmHeader *pShmhdr; /* Live shared-memory header */ 400 TreeHeader treehdr; /* Local copy of tree-header */ 401 u32 aSnapshot[LSM_META_PAGE_SIZE / sizeof(u32)]; 402 }; 403 404 struct Segment { 405 Pgno iFirst; /* First page of this run */ 406 Pgno iLastPg; /* Last page of this run */ 407 Pgno iRoot; /* Root page number (if any) */ 408 int nSize; /* Size of this run in pages */ 409 410 Redirect *pRedirect; /* Block redirects (or NULL) */ 411 }; 412 413 /* 414 ** iSplitTopic/pSplitKey/nSplitKey: 415 ** If nRight>0, this buffer contains a copy of the largest key that has 416 ** already been written to the left-hand-side of the level. 417 */ 418 struct Level { 419 Segment lhs; /* Left-hand (main) segment */ 420 int nRight; /* Size of apRight[] array */ 421 Segment *aRhs; /* Old segments being merged into this */ 422 int iSplitTopic; /* Split key topic (if nRight>0) */ 423 void *pSplitKey; /* Pointer to split-key (if nRight>0) */ 424 int nSplitKey; /* Number of bytes in split-key */ 425 426 u16 iAge; /* Number of times data has been written */ 427 u16 flags; /* Mask of LEVEL_XXX bits */ 428 Merge *pMerge; /* Merge operation currently underway */ 429 Level *pNext; /* Next level in tree */ 430 }; 431 432 /* 433 ** The Level.flags field is set to a combination of the following bits. 434 ** 435 ** LEVEL_FREELIST_ONLY: 436 ** Set if the level consists entirely of free-list entries. 437 ** 438 ** LEVEL_INCOMPLETE: 439 ** This is set while a new toplevel level is being constructed. It is 440 ** never set for any level other than a new toplevel. 441 */ 442 #define LEVEL_FREELIST_ONLY 0x0001 443 #define LEVEL_INCOMPLETE 0x0002 444 445 446 /* 447 ** A structure describing an ongoing merge. There is an instance of this 448 ** structure for every Level currently undergoing a merge in the worker 449 ** snapshot. 450 ** 451 ** It is assumed that code that uses an instance of this structure has 452 ** access to the associated Level struct. 453 ** 454 ** iOutputOff: 455 ** The byte offset to write to next within the last page of the 456 ** output segment. 457 */ 458 struct MergeInput { 459 Pgno iPg; /* Page on which next input is stored */ 460 int iCell; /* Cell containing next input to merge */ 461 }; 462 struct Merge { 463 int nInput; /* Number of input runs being merged */ 464 MergeInput *aInput; /* Array nInput entries in size */ 465 MergeInput splitkey; /* Location in file of current splitkey */ 466 int nSkip; /* Number of separators entries to skip */ 467 int iOutputOff; /* Write offset on output page */ 468 Pgno iCurrentPtr; /* Current pointer value */ 469 }; 470 471 /* 472 ** The first argument to this macro is a pointer to a Segment structure. 473 ** Returns true if the structure instance indicates that the separators 474 ** array is valid. 475 */ 476 #define segmentHasSeparators(pSegment) ((pSegment)->sep.iFirst>0) 477 478 /* 479 ** The values that accompany the lock held by a database reader. 480 */ 481 struct ShmReader { 482 u32 iTreeId; 483 i64 iLsmId; 484 }; 485 486 /* 487 ** An instance of this structure is stored in the first shared-memory 488 ** page. The shared-memory header. 489 ** 490 ** bWriter: 491 ** Immediately after opening a write transaction taking the WRITER lock, 492 ** each writer client sets this flag. It is cleared right before the 493 ** WRITER lock is relinquished. If a subsequent writer finds that this 494 ** flag is already set when a write transaction is opened, this indicates 495 ** that a previous writer failed mid-transaction. 496 ** 497 ** iMetaPage: 498 ** If the database file does not contain a valid, synced, checkpoint, this 499 ** value is set to 0. Otherwise, it is set to the meta-page number that 500 ** contains the most recently written checkpoint (either 1 or 2). 501 ** 502 ** hdr1, hdr2: 503 ** The two copies of the in-memory tree header. Two copies are required 504 ** in case a writer fails while updating one of them. 505 */ 506 struct ShmHeader { 507 u32 aSnap1[LSM_META_PAGE_SIZE / 4]; 508 u32 aSnap2[LSM_META_PAGE_SIZE / 4]; 509 u32 bWriter; 510 u32 iMetaPage; 511 TreeHeader hdr1; 512 TreeHeader hdr2; 513 ShmReader aReader[LSM_LOCK_NREADER]; 514 }; 515 516 /* 517 ** An instance of this structure is stored at the start of each shared-memory 518 ** chunk except the first (which is the header chunk - see above). 519 */ 520 struct ShmChunk { 521 u32 iShmid; 522 u32 iNext; 523 }; 524 525 /* 526 ** Maximum number of shared-memory chunks allowed in the *-shm file. Since 527 ** each shared-memory chunk is 32KB in size, this is a theoretical limit only. 528 */ 529 #define LSM_MAX_SHMCHUNKS (1<<30) 530 531 /* Return true if shm-sequence "a" is larger than or equal to "b" */ 532 #define shm_sequence_ge(a, b) (((u32)a-(u32)b) < LSM_MAX_SHMCHUNKS) 533 534 #define LSM_APPLIST_SZ 4 535 536 /* 537 ** An instance of the following structure stores the in-memory part of 538 ** the current free block list. This structure is to the free block list 539 ** as the in-memory tree is to the users database content. The contents 540 ** of the free block list is found by merging the in-memory components 541 ** with those stored in the LSM, just as the contents of the database is 542 ** found by merging the in-memory tree with the user data entries in the 543 ** LSM. 544 ** 545 ** Each FreelistEntry structure in the array represents either an insert 546 ** or delete operation on the free-list. For deletes, the FreelistEntry.iId 547 ** field is set to -1. For inserts, it is set to zero or greater. 548 ** 549 ** The array of FreelistEntry structures is always sorted in order of 550 ** block number (ascending). 551 ** 552 ** When the in-memory free block list is written into the LSM, each insert 553 ** operation is written separately. The entry key is the bitwise inverse 554 ** of the block number as a 32-bit big-endian integer. This is done so that 555 ** the entries in the LSM are sorted in descending order of block id. 556 ** The associated value is the snapshot id, formated as a varint. 557 */ 558 struct Freelist { 559 FreelistEntry *aEntry; /* Free list entries */ 560 int nEntry; /* Number of valid slots in aEntry[] */ 561 int nAlloc; /* Allocated size of aEntry[] */ 562 }; 563 struct FreelistEntry { 564 u32 iBlk; /* Block number */ 565 i64 iId; /* Largest snapshot id to use this block */ 566 }; 567 568 /* 569 ** A snapshot of a database. A snapshot contains all the information required 570 ** to read or write a database file on disk. See the description of struct 571 ** Database below for futher details. 572 */ 573 struct Snapshot { 574 Database *pDatabase; /* Database this snapshot belongs to */ 575 u32 iCmpId; /* Id of compression scheme */ 576 Level *pLevel; /* Pointer to level 0 of snapshot (or NULL) */ 577 i64 iId; /* Snapshot id */ 578 i64 iLogOff; /* Log file offset */ 579 Redirect redirect; /* Block redirection array */ 580 581 /* Used by worker snapshots only */ 582 int nBlock; /* Number of blocks in database file */ 583 Pgno aiAppend[LSM_APPLIST_SZ]; /* Append point list */ 584 Freelist freelist; /* Free block list */ 585 u32 nWrite; /* Total number of pages written to disk */ 586 }; 587 #define LSM_INITIAL_SNAPSHOT_ID 11 588 589 /* 590 ** Functions from file "lsm_ckpt.c". 591 */ 592 int lsmCheckpointWrite(lsm_db *, u32 *); 593 int lsmCheckpointLevels(lsm_db *, int, void **, int *); 594 int lsmCheckpointLoadLevels(lsm_db *pDb, void *pVal, int nVal); 595 596 int lsmCheckpointRecover(lsm_db *); 597 int lsmCheckpointDeserialize(lsm_db *, int, u32 *, Snapshot **); 598 599 int lsmCheckpointLoadWorker(lsm_db *pDb); 600 int lsmCheckpointStore(lsm_db *pDb, int); 601 602 int lsmCheckpointLoad(lsm_db *pDb, int *); 603 int lsmCheckpointLoadOk(lsm_db *pDb, int); 604 int lsmCheckpointClientCacheOk(lsm_db *); 605 606 u32 lsmCheckpointNBlock(u32 *); 607 i64 lsmCheckpointId(u32 *, int); 608 u32 lsmCheckpointNWrite(u32 *, int); 609 i64 lsmCheckpointLogOffset(u32 *); 610 int lsmCheckpointPgsz(u32 *); 611 int lsmCheckpointBlksz(u32 *); 612 void lsmCheckpointLogoffset(u32 *aCkpt, DbLog *pLog); 613 void lsmCheckpointZeroLogoffset(lsm_db *); 614 615 int lsmCheckpointSaveWorker(lsm_db *pDb, int); 616 int lsmDatabaseFull(lsm_db *pDb); 617 int lsmCheckpointSynced(lsm_db *pDb, i64 *piId, i64 *piLog, u32 *pnWrite); 618 619 int lsmCheckpointSize(lsm_db *db, int *pnByte); 620 621 int lsmInfoCompressionId(lsm_db *db, u32 *piCmpId); 622 623 /* 624 ** Functions from file "lsm_tree.c". 625 */ 626 int lsmTreeNew(lsm_env *, int (*)(void *, int, void *, int), Tree **ppTree); 627 void lsmTreeRelease(lsm_env *, Tree *); 628 int lsmTreeInit(lsm_db *); 629 int lsmTreeRepair(lsm_db *); 630 631 void lsmTreeMakeOld(lsm_db *pDb); 632 void lsmTreeDiscardOld(lsm_db *pDb); 633 int lsmTreeHasOld(lsm_db *pDb); 634 635 int lsmTreeSize(lsm_db *); 636 int lsmTreeEndTransaction(lsm_db *pDb, int bCommit); 637 int lsmTreeLoadHeader(lsm_db *pDb, int *); 638 int lsmTreeLoadHeaderOk(lsm_db *, int); 639 640 int lsmTreeInsert(lsm_db *pDb, void *pKey, int nKey, void *pVal, int nVal); 641 int lsmTreeDelete(lsm_db *db, void *pKey1, int nKey1, void *pKey2, int nKey2); 642 void lsmTreeRollback(lsm_db *pDb, TreeMark *pMark); 643 void lsmTreeMark(lsm_db *pDb, TreeMark *pMark); 644 645 int lsmTreeCursorNew(lsm_db *pDb, int, TreeCursor **); 646 void lsmTreeCursorDestroy(TreeCursor *); 647 648 int lsmTreeCursorSeek(TreeCursor *pCsr, void *pKey, int nKey, int *pRes); 649 int lsmTreeCursorNext(TreeCursor *pCsr); 650 int lsmTreeCursorPrev(TreeCursor *pCsr); 651 int lsmTreeCursorEnd(TreeCursor *pCsr, int bLast); 652 void lsmTreeCursorReset(TreeCursor *pCsr); 653 int lsmTreeCursorKey(TreeCursor *pCsr, int *pFlags, void **ppKey, int *pnKey); 654 int lsmTreeCursorFlags(TreeCursor *pCsr); 655 int lsmTreeCursorValue(TreeCursor *pCsr, void **ppVal, int *pnVal); 656 int lsmTreeCursorValid(TreeCursor *pCsr); 657 int lsmTreeCursorSave(TreeCursor *pCsr); 658 659 void lsmFlagsToString(int flags, char *zFlags); 660 661 /* 662 ** Functions from file "mem.c". 663 */ 664 void *lsmMalloc(lsm_env*, size_t); 665 void lsmFree(lsm_env*, void *); 666 void *lsmRealloc(lsm_env*, void *, size_t); 667 void *lsmReallocOrFree(lsm_env*, void *, size_t); 668 void *lsmReallocOrFreeRc(lsm_env *, void *, size_t, int *); 669 670 void *lsmMallocZeroRc(lsm_env*, size_t, int *); 671 void *lsmMallocRc(lsm_env*, size_t, int *); 672 673 void *lsmMallocZero(lsm_env *pEnv, size_t); 674 char *lsmMallocStrdup(lsm_env *pEnv, const char *); 675 676 /* 677 ** Functions from file "lsm_mutex.c". 678 */ 679 int lsmMutexStatic(lsm_env*, int, lsm_mutex **); 680 int lsmMutexNew(lsm_env*, lsm_mutex **); 681 void lsmMutexDel(lsm_env*, lsm_mutex *); 682 void lsmMutexEnter(lsm_env*, lsm_mutex *); 683 int lsmMutexTry(lsm_env*, lsm_mutex *); 684 void lsmMutexLeave(lsm_env*, lsm_mutex *); 685 686 #ifndef NDEBUG 687 int lsmMutexHeld(lsm_env *, lsm_mutex *); 688 int lsmMutexNotHeld(lsm_env *, lsm_mutex *); 689 #endif 690 691 /************************************************************************** 692 ** Start of functions from "lsm_file.c". 693 */ 694 int lsmFsOpen(lsm_db *, const char *, int); 695 int lsmFsOpenLog(lsm_db *, int *); 696 void lsmFsCloseLog(lsm_db *); 697 void lsmFsClose(FileSystem *); 698 699 int lsmFsUnmap(FileSystem *); 700 701 int lsmFsConfigure(lsm_db *db); 702 703 int lsmFsBlockSize(FileSystem *); 704 void lsmFsSetBlockSize(FileSystem *, int); 705 int lsmFsMoveBlock(FileSystem *pFS, Segment *pSeg, int iTo, int iFrom); 706 707 int lsmFsPageSize(FileSystem *); 708 void lsmFsSetPageSize(FileSystem *, int); 709 710 int lsmFsFileid(lsm_db *pDb, void **ppId, int *pnId); 711 712 /* Creating, populating, gobbling and deleting sorted runs. */ 713 void lsmFsGobble(lsm_db *, Segment *, Pgno *, int); 714 int lsmFsSortedDelete(FileSystem *, Snapshot *, int, Segment *); 715 int lsmFsSortedFinish(FileSystem *, Segment *); 716 int lsmFsSortedAppend(FileSystem *, Snapshot *, Level *, int, Page **); 717 int lsmFsSortedPadding(FileSystem *, Snapshot *, Segment *); 718 719 /* Functions to retrieve the lsm_env pointer from a FileSystem or Page object */ 720 lsm_env *lsmFsEnv(FileSystem *); 721 lsm_env *lsmPageEnv(Page *); 722 FileSystem *lsmPageFS(Page *); 723 724 int lsmFsSectorSize(FileSystem *); 725 726 void lsmSortedSplitkey(lsm_db *, Level *, int *); 727 728 /* Reading sorted run content. */ 729 int lsmFsDbPageLast(FileSystem *pFS, Segment *pSeg, Page **ppPg); 730 int lsmFsDbPageGet(FileSystem *, Segment *, Pgno, Page **); 731 int lsmFsDbPageNext(Segment *, Page *, int eDir, Page **); 732 733 u8 *lsmFsPageData(Page *, int *); 734 int lsmFsPageRelease(Page *); 735 int lsmFsPagePersist(Page *); 736 void lsmFsPageRef(Page *); 737 Pgno lsmFsPageNumber(Page *); 738 739 int lsmFsNRead(FileSystem *); 740 int lsmFsNWrite(FileSystem *); 741 742 int lsmFsMetaPageGet(FileSystem *, int, int, MetaPage **); 743 int lsmFsMetaPageRelease(MetaPage *); 744 u8 *lsmFsMetaPageData(MetaPage *, int *); 745 746 #ifdef LSM_DEBUG 747 int lsmFsDbPageIsLast(Segment *pSeg, Page *pPg); 748 int lsmFsIntegrityCheck(lsm_db *); 749 #endif 750 751 Pgno lsmFsRedirectPage(FileSystem *, Redirect *, Pgno); 752 753 int lsmFsPageWritable(Page *); 754 755 /* Functions to read, write and sync the log file. */ 756 int lsmFsWriteLog(FileSystem *pFS, i64 iOff, LsmString *pStr); 757 int lsmFsSyncLog(FileSystem *pFS); 758 int lsmFsReadLog(FileSystem *pFS, i64 iOff, int nRead, LsmString *pStr); 759 int lsmFsTruncateLog(FileSystem *pFS, i64 nByte); 760 int lsmFsTruncateDb(FileSystem *pFS, i64 nByte); 761 int lsmFsCloseAndDeleteLog(FileSystem *pFS); 762 763 LsmFile *lsmFsDeferClose(FileSystem *pFS); 764 765 /* And to sync the db file */ 766 int lsmFsSyncDb(FileSystem *, int); 767 768 void lsmFsFlushWaiting(FileSystem *, int *); 769 770 /* Used by lsm_info(ARRAY_STRUCTURE) and lsm_config(MMAP) */ 771 int lsmInfoArrayStructure(lsm_db *pDb, int bBlock, Pgno iFirst, char **pzOut); 772 int lsmInfoArrayPages(lsm_db *pDb, Pgno iFirst, char **pzOut); 773 int lsmConfigMmap(lsm_db *pDb, int *piParam); 774 775 int lsmEnvOpen(lsm_env *, const char *, int, lsm_file **); 776 int lsmEnvClose(lsm_env *pEnv, lsm_file *pFile); 777 int lsmEnvLock(lsm_env *pEnv, lsm_file *pFile, int iLock, int eLock); 778 int lsmEnvTestLock(lsm_env *pEnv, lsm_file *pFile, int iLock, int nLock, int); 779 780 int lsmEnvShmMap(lsm_env *, lsm_file *, int, int, void **); 781 void lsmEnvShmBarrier(lsm_env *); 782 void lsmEnvShmUnmap(lsm_env *, lsm_file *, int); 783 784 void lsmEnvSleep(lsm_env *, int); 785 786 int lsmFsReadSyncedId(lsm_db *db, int, i64 *piVal); 787 788 int lsmFsSegmentContainsPg(FileSystem *pFS, Segment *, Pgno, int *); 789 790 void lsmFsPurgeCache(FileSystem *); 791 792 /* 793 ** End of functions from "lsm_file.c". 794 **************************************************************************/ 795 796 /* 797 ** Functions from file "lsm_sorted.c". 798 */ 799 int lsmInfoPageDump(lsm_db *, Pgno, int, char **); 800 void lsmSortedCleanup(lsm_db *); 801 int lsmSortedAutoWork(lsm_db *, int nUnit); 802 803 int lsmSortedWalkFreelist(lsm_db *, int, int (*)(void *, int, i64), void *); 804 805 int lsmSaveWorker(lsm_db *, int); 806 807 int lsmFlushTreeToDisk(lsm_db *pDb); 808 809 void lsmSortedRemap(lsm_db *pDb); 810 811 void lsmSortedFreeLevel(lsm_env *pEnv, Level *); 812 813 int lsmSortedAdvanceAll(lsm_db *pDb); 814 815 int lsmSortedLoadMerge(lsm_db *, Level *, u32 *, int *); 816 int lsmSortedLoadFreelist(lsm_db *pDb, void **, int *); 817 818 void *lsmSortedSplitKey(Level *pLevel, int *pnByte); 819 820 void lsmSortedSaveTreeCursors(lsm_db *); 821 822 int lsmMCursorNew(lsm_db *, MultiCursor **); 823 void lsmMCursorClose(MultiCursor *, int); 824 int lsmMCursorSeek(MultiCursor *, int, void *, int , int); 825 int lsmMCursorFirst(MultiCursor *); 826 int lsmMCursorPrev(MultiCursor *); 827 int lsmMCursorLast(MultiCursor *); 828 int lsmMCursorValid(MultiCursor *); 829 int lsmMCursorNext(MultiCursor *); 830 int lsmMCursorKey(MultiCursor *, void **, int *); 831 int lsmMCursorValue(MultiCursor *, void **, int *); 832 int lsmMCursorType(MultiCursor *, int *); 833 lsm_db *lsmMCursorDb(MultiCursor *); 834 void lsmMCursorFreeCache(lsm_db *); 835 836 int lsmSaveCursors(lsm_db *pDb); 837 int lsmRestoreCursors(lsm_db *pDb); 838 839 void lsmSortedDumpStructure(lsm_db *pDb, Snapshot *, int, int, const char *); 840 void lsmFsDumpBlocklists(lsm_db *); 841 842 void lsmSortedExpandBtreePage(Page *pPg, int nOrig); 843 844 void lsmPutU32(u8 *, u32); 845 u32 lsmGetU32(u8 *); 846 u64 lsmGetU64(u8 *); 847 848 /* 849 ** Functions from "lsm_varint.c". 850 */ 851 int lsmVarintPut32(u8 *, int); 852 int lsmVarintGet32(u8 *, int *); 853 int lsmVarintPut64(u8 *aData, i64 iVal); 854 int lsmVarintGet64(const u8 *aData, i64 *piVal); 855 856 int lsmVarintLen32(int); 857 int lsmVarintSize(u8 c); 858 859 /* 860 ** Functions from file "main.c". 861 */ 862 void lsmLogMessage(lsm_db *, int, const char *, ...); 863 int lsmInfoFreelist(lsm_db *pDb, char **pzOut); 864 865 /* 866 ** Functions from file "lsm_log.c". 867 */ 868 int lsmLogBegin(lsm_db *pDb); 869 int lsmLogWrite(lsm_db *, int, void *, int, void *, int); 870 int lsmLogCommit(lsm_db *); 871 void lsmLogEnd(lsm_db *pDb, int bCommit); 872 void lsmLogTell(lsm_db *, LogMark *); 873 void lsmLogSeek(lsm_db *, LogMark *); 874 void lsmLogClose(lsm_db *); 875 876 int lsmLogRecover(lsm_db *); 877 int lsmInfoLogStructure(lsm_db *pDb, char **pzVal); 878 879 /* Valid values for the second argument to lsmLogWrite(). */ 880 #define LSM_WRITE 0x06 881 #define LSM_DELETE 0x08 882 #define LSM_DRANGE 0x0A 883 884 /************************************************************************** 885 ** Functions from file "lsm_shared.c". 886 */ 887 888 int lsmDbDatabaseConnect(lsm_db*, const char *); 889 void lsmDbDatabaseRelease(lsm_db *); 890 891 int lsmBeginReadTrans(lsm_db *); 892 int lsmBeginWriteTrans(lsm_db *); 893 int lsmBeginFlush(lsm_db *); 894 895 int lsmDetectRoTrans(lsm_db *db, int *); 896 int lsmBeginRoTrans(lsm_db *db); 897 898 int lsmBeginWork(lsm_db *); 899 void lsmFinishWork(lsm_db *, int, int *); 900 901 int lsmFinishRecovery(lsm_db *); 902 void lsmFinishReadTrans(lsm_db *); 903 int lsmFinishWriteTrans(lsm_db *, int); 904 int lsmFinishFlush(lsm_db *, int); 905 906 int lsmSnapshotSetFreelist(lsm_db *, int *, int); 907 908 Snapshot *lsmDbSnapshotClient(lsm_db *); 909 Snapshot *lsmDbSnapshotWorker(lsm_db *); 910 911 void lsmSnapshotSetCkptid(Snapshot *, i64); 912 913 Level *lsmDbSnapshotLevel(Snapshot *); 914 void lsmDbSnapshotSetLevel(Snapshot *, Level *); 915 916 void lsmDbRecoveryComplete(lsm_db *, int); 917 918 int lsmBlockAllocate(lsm_db *, int, int *); 919 int lsmBlockFree(lsm_db *, int); 920 int lsmBlockRefree(lsm_db *, int); 921 922 void lsmFreelistDeltaBegin(lsm_db *); 923 void lsmFreelistDeltaEnd(lsm_db *); 924 int lsmFreelistDelta(lsm_db *pDb); 925 926 DbLog *lsmDatabaseLog(lsm_db *pDb); 927 928 #ifdef LSM_DEBUG 929 int lsmHoldingClientMutex(lsm_db *pDb); 930 int lsmShmAssertLock(lsm_db *db, int iLock, int eOp); 931 int lsmShmAssertWorker(lsm_db *db); 932 #endif 933 934 void lsmFreeSnapshot(lsm_env *, Snapshot *); 935 936 937 /* Candidate values for the 3rd argument to lsmShmLock() */ 938 #define LSM_LOCK_UNLOCK 0 939 #define LSM_LOCK_SHARED 1 940 #define LSM_LOCK_EXCL 2 941 942 int lsmShmCacheChunks(lsm_db *db, int nChunk); 943 int lsmShmLock(lsm_db *db, int iLock, int eOp, int bBlock); 944 int lsmShmTestLock(lsm_db *db, int iLock, int nLock, int eOp); 945 void lsmShmBarrier(lsm_db *db); 946 947 #ifdef LSM_DEBUG 948 void lsmShmHasLock(lsm_db *db, int iLock, int eOp); 949 #else 950 # define lsmShmHasLock(x,y,z) 951 #endif 952 953 int lsmReadlock(lsm_db *, i64 iLsm, u32 iShmMin, u32 iShmMax); 954 955 int lsmLsmInUse(lsm_db *db, i64 iLsmId, int *pbInUse); 956 int lsmTreeInUse(lsm_db *db, u32 iLsmId, int *pbInUse); 957 int lsmFreelistAppend(lsm_env *pEnv, Freelist *p, int iBlk, i64 iId); 958 959 int lsmDbMultiProc(lsm_db *); 960 void lsmDbDeferredClose(lsm_db *, lsm_file *, LsmFile *); 961 LsmFile *lsmDbRecycleFd(lsm_db *); 962 963 int lsmWalkFreelist(lsm_db *, int, int (*)(void *, int, i64), void *); 964 965 int lsmCheckCompressionId(lsm_db *, u32); 966 967 968 /************************************************************************** 969 ** functions in lsm_str.c 970 */ 971 void lsmStringInit(LsmString*, lsm_env *pEnv); 972 int lsmStringExtend(LsmString*, int); 973 int lsmStringAppend(LsmString*, const char *, int); 974 void lsmStringVAppendf(LsmString*, const char *zFormat, va_list, va_list); 975 void lsmStringAppendf(LsmString*, const char *zFormat, ...); 976 void lsmStringClear(LsmString*); 977 char *lsmMallocPrintf(lsm_env*, const char*, ...); 978 int lsmStringBinAppend(LsmString *pStr, const u8 *a, int n); 979 980 int lsmStrlen(const char *zName); 981 982 983 984 /* 985 ** Round up a number to the next larger multiple of 8. This is used 986 ** to force 8-byte alignment on 64-bit architectures. 987 */ 988 #define ROUND8(x) (((x)+7)&~7) 989 990 #define LSM_MIN(x,y) ((x)>(y) ? (y) : (x)) 991 #define LSM_MAX(x,y) ((x)>(y) ? (x) : (y)) 992 993 #endif 994