1 /*
2 ** 2011-08-18
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** Internal structure definitions for the LSM module.
13 */
14 #ifndef _LSM_INT_H
15 #define _LSM_INT_H
16 
17 #include "lsm.h"
18 #include <assert.h>
19 #include <string.h>
20 
21 #include <stdarg.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <ctype.h>
25 
26 #ifdef _WIN32
27 # ifdef _MSC_VER
28 #  define snprintf _snprintf
29 # endif
30 #else
31 # include <unistd.h>
32 #endif
33 
34 #ifdef NDEBUG
35 # ifdef LSM_DEBUG_EXPENSIVE
36 #  undef LSM_DEBUG_EXPENSIVE
37 # endif
38 # ifdef LSM_DEBUG
39 #  undef LSM_DEBUG
40 # endif
41 #else
42 # ifndef LSM_DEBUG
43 #  define LSM_DEBUG
44 # endif
45 #endif
46 
47 /*
48 ** Default values for various data structure parameters. These may be
49 ** overridden by calls to lsm_config().
50 */
51 #define LSM_DFLT_PAGE_SIZE          (4 * 1024)
52 #define LSM_DFLT_BLOCK_SIZE         (1 * 1024 * 1024)
53 #define LSM_DFLT_AUTOFLUSH          (1 * 1024 * 1024)
54 #define LSM_DFLT_AUTOCHECKPOINT     (i64)(2 * 1024 * 1024)
55 #define LSM_DFLT_AUTOWORK           1
56 #define LSM_DFLT_LOG_SIZE           (128*1024)
57 #define LSM_DFLT_AUTOMERGE          4
58 #define LSM_DFLT_SAFETY             LSM_SAFETY_NORMAL
59 #define LSM_DFLT_MMAP               (LSM_IS_64_BIT ? 1 : 32768)
60 #define LSM_DFLT_MULTIPLE_PROCESSES 1
61 #define LSM_DFLT_USE_LOG            1
62 
63 /* Initial values for log file checksums. These are only used if the
64 ** database file does not contain a valid checkpoint.  */
65 #define LSM_CKSUM0_INIT 42
66 #define LSM_CKSUM1_INIT 42
67 
68 /* "mmap" mode is currently only used in environments with 64-bit address
69 ** spaces. The following macro is used to test for this.  */
70 #define LSM_IS_64_BIT (sizeof(void*)==8)
71 
72 #define LSM_AUTOWORK_QUANT 32
73 
74 typedef struct Database Database;
75 typedef struct DbLog DbLog;
76 typedef struct FileSystem FileSystem;
77 typedef struct Freelist Freelist;
78 typedef struct FreelistEntry FreelistEntry;
79 typedef struct Level Level;
80 typedef struct LogMark LogMark;
81 typedef struct LogRegion LogRegion;
82 typedef struct LogWriter LogWriter;
83 typedef struct LsmString LsmString;
84 typedef struct Mempool Mempool;
85 typedef struct Merge Merge;
86 typedef struct MergeInput MergeInput;
87 typedef struct MetaPage MetaPage;
88 typedef struct MultiCursor MultiCursor;
89 typedef struct Page Page;
90 typedef struct Redirect Redirect;
91 typedef struct Segment Segment;
92 typedef struct SegmentMerger SegmentMerger;
93 typedef struct ShmChunk ShmChunk;
94 typedef struct ShmHeader ShmHeader;
95 typedef struct ShmReader ShmReader;
96 typedef struct Snapshot Snapshot;
97 typedef struct TransMark TransMark;
98 typedef struct Tree Tree;
99 typedef struct TreeCursor TreeCursor;
100 typedef struct TreeHeader TreeHeader;
101 typedef struct TreeMark TreeMark;
102 typedef struct TreeRoot TreeRoot;
103 
104 #ifndef _SQLITEINT_H_
105 typedef unsigned char u8;
106 typedef unsigned short int u16;
107 typedef unsigned int u32;
108 typedef lsm_i64 i64;
109 typedef unsigned long long int u64;
110 #endif
111 
112 /* A page number is a 64-bit integer. */
113 typedef i64 Pgno;
114 
115 #ifdef LSM_DEBUG
116 int lsmErrorBkpt(int);
117 #else
118 # define lsmErrorBkpt(x) (x)
119 #endif
120 
121 #define LSM_PROTOCOL_BKPT lsmErrorBkpt(LSM_PROTOCOL)
122 #define LSM_IOERR_BKPT    lsmErrorBkpt(LSM_IOERR)
123 #define LSM_NOMEM_BKPT    lsmErrorBkpt(LSM_NOMEM)
124 #define LSM_CORRUPT_BKPT  lsmErrorBkpt(LSM_CORRUPT)
125 #define LSM_MISUSE_BKPT   lsmErrorBkpt(LSM_MISUSE)
126 
127 #define unused_parameter(x) (void)(x)
128 #define array_size(x) (sizeof(x)/sizeof(x[0]))
129 
130 
131 /* The size of each shared-memory chunk */
132 #define LSM_SHM_CHUNK_SIZE (32*1024)
133 
134 /* The number of bytes reserved at the start of each shm chunk for MM. */
135 #define LSM_SHM_CHUNK_HDR  (sizeof(ShmChunk))
136 
137 /* The number of available read locks. */
138 #define LSM_LOCK_NREADER   6
139 
140 /* The number of available read-write client locks. */
141 #define LSM_LOCK_NRWCLIENT   16
142 
143 /* Lock definitions.
144 */
145 #define LSM_LOCK_DMS1         1   /* Serialize connect/disconnect ops */
146 #define LSM_LOCK_DMS2         2   /* Read-write connections */
147 #define LSM_LOCK_DMS3         3   /* Read-only connections */
148 #define LSM_LOCK_WRITER       4
149 #define LSM_LOCK_WORKER       5
150 #define LSM_LOCK_CHECKPOINTER 6
151 #define LSM_LOCK_ROTRANS      7
152 #define LSM_LOCK_READER(i)    ((i) + LSM_LOCK_ROTRANS + 1)
153 #define LSM_LOCK_RWCLIENT(i)  ((i) + LSM_LOCK_READER(LSM_LOCK_NREADER))
154 
155 #define LSM_N_LOCK LSM_LOCK_RWCLIENT(LSM_LOCK_NRWCLIENT)
156 
157 /*
158 ** Meta-page size and usable size.
159 */
160 #define LSM_META_PAGE_SIZE 4096
161 
162 #define LSM_META_RW_PAGE_SIZE (LSM_META_PAGE_SIZE - LSM_N_LOCK)
163 
164 /*
165 ** Hard limit on the number of free-list entries that may be stored in
166 ** a checkpoint (the remainder are stored as a system record in the LSM).
167 ** See also LSM_CONFIG_MAX_FREELIST.
168 */
169 #define LSM_MAX_FREELIST_ENTRIES 24
170 
171 #define LSM_MAX_BLOCK_REDIRECTS 16
172 
173 #define LSM_ATTEMPTS_BEFORE_PROTOCOL 10000
174 
175 
176 /*
177 ** Each entry stored in the LSM (or in-memory tree structure) has an
178 ** associated mask of the following flags.
179 */
180 #define LSM_START_DELETE 0x01     /* Start of open-ended delete range */
181 #define LSM_END_DELETE   0x02     /* End of open-ended delete range */
182 #define LSM_POINT_DELETE 0x04     /* Delete this key */
183 #define LSM_INSERT       0x08     /* Insert this key and value */
184 #define LSM_SEPARATOR    0x10     /* True if entry is separator key only */
185 #define LSM_SYSTEMKEY    0x20     /* True if entry is a system key (FREELIST) */
186 
187 #define LSM_CONTIGUOUS   0x40     /* Used in lsm_tree.c */
188 
189 /*
190 ** A string that can grow by appending.
191 */
192 struct LsmString {
193   lsm_env *pEnv;              /* Run-time environment */
194   int n;                      /* Size of string.  -1 indicates error */
195   int nAlloc;                 /* Space allocated for z[] */
196   char *z;                    /* The string content */
197 };
198 
199 typedef struct LsmFile LsmFile;
200 struct LsmFile {
201   lsm_file *pFile;
202   LsmFile *pNext;
203 };
204 
205 /*
206 ** An instance of the following type is used to store an ordered list of
207 ** u32 values.
208 **
209 ** Note: This is a place-holder implementation. It should be replaced by
210 ** a version that avoids making a single large allocation when the array
211 ** contains a large number of values. For this reason, the internals of
212 ** this object should only manipulated by the intArrayXXX() functions in
213 ** lsm_tree.c.
214 */
215 typedef struct IntArray IntArray;
216 struct IntArray {
217   int nAlloc;
218   int nArray;
219   u32 *aArray;
220 };
221 
222 struct Redirect {
223   int n;                          /* Number of redirects */
224   struct RedirectEntry {
225     int iFrom;
226     int iTo;
227   } *a;
228 };
229 
230 /*
231 ** An instance of this structure represents a point in the history of the
232 ** tree structure to roll back to. Refer to comments in lsm_tree.c for
233 ** details.
234 */
235 struct TreeMark {
236   u32 iRoot;                      /* Offset of root node in shm file */
237   u32 nHeight;                    /* Current height of tree structure */
238   u32 iWrite;                     /* Write offset in shm file */
239   u32 nChunk;                     /* Number of chunks in shared-memory file */
240   u32 iFirst;                     /* First chunk in linked list */
241   u32 iNextShmid;                 /* Next id to allocate */
242   int iRollback;                  /* Index in lsm->rollback to revert to */
243 };
244 
245 /*
246 ** An instance of this structure represents a point in the database log.
247 */
248 struct LogMark {
249   i64 iOff;                       /* Offset into log (see lsm_log.c) */
250   int nBuf;                       /* Size of in-memory buffer here */
251   u8 aBuf[8];                     /* Bytes of content in aBuf[] */
252   u32 cksum0;                     /* Checksum 0 at offset (iOff-nBuf) */
253   u32 cksum1;                     /* Checksum 1 at offset (iOff-nBuf) */
254 };
255 
256 struct TransMark {
257   TreeMark tree;
258   LogMark log;
259 };
260 
261 /*
262 ** A structure that defines the start and end offsets of a region in the
263 ** log file. The size of the region in bytes is (iEnd - iStart), so if
264 ** iEnd==iStart the region is zero bytes in size.
265 */
266 struct LogRegion {
267   i64 iStart;                     /* Start of region in log file */
268   i64 iEnd;                       /* End of region in log file */
269 };
270 
271 struct DbLog {
272   u32 cksum0;                     /* Checksum 0 at offset iOff */
273   u32 cksum1;                     /* Checksum 1 at offset iOff */
274   i64 iSnapshotId;                /* Log space has been reclaimed to this ss */
275   LogRegion aRegion[3];           /* Log file regions (see docs in lsm_log.c) */
276 };
277 
278 struct TreeRoot {
279   u32 iRoot;
280   u32 nHeight;
281   u32 nByte;                      /* Total size of this tree in bytes */
282   u32 iTransId;
283 };
284 
285 /*
286 ** Tree header structure.
287 */
288 struct TreeHeader {
289   u32 iUsedShmid;                 /* Id of first shm chunk used by this tree */
290   u32 iNextShmid;                 /* Shm-id of next chunk allocated */
291   u32 iFirst;                     /* Chunk number of smallest shm-id */
292   u32 nChunk;                     /* Number of chunks in shared-memory file */
293   TreeRoot root;                  /* Root and height of current tree */
294   u32 iWrite;                     /* Write offset in shm file */
295   TreeRoot oldroot;               /* Root and height of the previous tree */
296   u32 iOldShmid;                  /* Last shm-id used by previous tree */
297   u32 iUsrVersion;                /* get/set_user_version() value */
298   i64 iOldLog;                    /* Log offset associated with old tree */
299   u32 oldcksum0;
300   u32 oldcksum1;
301   DbLog log;                      /* Current layout of log file */
302   u32 aCksum[2];                  /* Checksums 1 and 2. */
303 };
304 
305 /*
306 ** Database handle structure.
307 **
308 ** mLock:
309 **   A bitmask representing the locks currently held by the connection.
310 **   An LSM database supports N distinct locks, where N is some number less
311 **   than or equal to 32. Locks are numbered starting from 1 (see the
312 **   definitions for LSM_LOCK_WRITER and co.).
313 **
314 **   The least significant 32-bits in mLock represent EXCLUSIVE locks. The
315 **   most significant are SHARED locks. So, if a connection holds a SHARED
316 **   lock on lock region iLock, then the following is true:
317 **
318 **       (mLock & ((iLock+32-1) << 1))
319 **
320 **   Or for an EXCLUSIVE lock:
321 **
322 **       (mLock & ((iLock-1) << 1))
323 **
324 ** pCsr:
325 **   Points to the head of a linked list that contains all currently open
326 **   cursors. Once this list becomes empty, the user has no outstanding
327 **   cursors and the database handle can be successfully closed.
328 **
329 ** pCsrCache:
330 **   This list contains cursor objects that have been closed using
331 **   lsm_csr_close(). Each time a cursor is closed, it is shifted from
332 **   the pCsr list to this list. When a new cursor is opened, this list
333 **   is inspected to see if there exists a cursor object that can be
334 **   reused. This is an optimization only.
335 */
336 struct lsm_db {
337 
338   /* Database handle configuration */
339   lsm_env *pEnv;                            /* runtime environment */
340   int (*xCmp)(void *, int, void *, int);    /* Compare function */
341 
342   /* Values configured by calls to lsm_config */
343   int eSafety;                    /* LSM_SAFETY_OFF, NORMAL or FULL */
344   int bAutowork;                  /* Configured by LSM_CONFIG_AUTOWORK */
345   int nTreeLimit;                 /* Configured by LSM_CONFIG_AUTOFLUSH */
346   int nMerge;                     /* Configured by LSM_CONFIG_AUTOMERGE */
347   int bUseLog;                    /* Configured by LSM_CONFIG_USE_LOG */
348   int nDfltPgsz;                  /* Configured by LSM_CONFIG_PAGE_SIZE */
349   int nDfltBlksz;                 /* Configured by LSM_CONFIG_BLOCK_SIZE */
350   int nMaxFreelist;               /* Configured by LSM_CONFIG_MAX_FREELIST */
351   int iMmap;                      /* Configured by LSM_CONFIG_MMAP */
352   i64 nAutockpt;                  /* Configured by LSM_CONFIG_AUTOCHECKPOINT */
353   int bMultiProc;                 /* Configured by L_C_MULTIPLE_PROCESSES */
354   int bReadonly;                  /* Configured by LSM_CONFIG_READONLY */
355   lsm_compress compress;          /* Compression callbacks */
356   lsm_compress_factory factory;   /* Compression callback factory */
357 
358   /* Sub-system handles */
359   FileSystem *pFS;                /* On-disk portion of database */
360   Database *pDatabase;            /* Database shared data */
361 
362   int iRwclient;                  /* Read-write client lock held (-1 == none) */
363 
364   /* Client transaction context */
365   Snapshot *pClient;              /* Client snapshot */
366   int iReader;                    /* Read lock held (-1 == unlocked) */
367   int bRoTrans;                   /* True if a read-only db trans is open */
368   MultiCursor *pCsr;              /* List of all open cursors */
369   LogWriter *pLogWriter;          /* Context for writing to the log file */
370   int nTransOpen;                 /* Number of opened write transactions */
371   int nTransAlloc;                /* Allocated size of aTrans[] array */
372   TransMark *aTrans;              /* Array of marks for transaction rollback */
373   IntArray rollback;              /* List of tree-nodes to roll back */
374   int bDiscardOld;                /* True if lsmTreeDiscardOld() was called */
375 
376   MultiCursor *pCsrCache;         /* List of all closed cursors */
377 
378   /* Worker context */
379   Snapshot *pWorker;              /* Worker snapshot (or NULL) */
380   Freelist *pFreelist;            /* See sortedNewToplevel() */
381   int bUseFreelist;               /* True to use pFreelist */
382   int bIncrMerge;                 /* True if currently doing a merge */
383 
384   int bInFactory;                 /* True if within factory.xFactory() */
385 
386   /* Debugging message callback */
387   void (*xLog)(void *, int, const char *);
388   void *pLogCtx;
389 
390   /* Work done notification callback */
391   void (*xWork)(lsm_db *, void *);
392   void *pWorkCtx;
393 
394   u64 mLock;                      /* Mask of current locks. See lsmShmLock(). */
395   lsm_db *pNext;                  /* Next connection to same database */
396 
397   int nShm;                       /* Size of apShm[] array */
398   void **apShm;                   /* Shared memory chunks */
399   ShmHeader *pShmhdr;             /* Live shared-memory header */
400   TreeHeader treehdr;             /* Local copy of tree-header */
401   u32 aSnapshot[LSM_META_PAGE_SIZE / sizeof(u32)];
402 };
403 
404 struct Segment {
405   Pgno iFirst;                     /* First page of this run */
406   Pgno iLastPg;                    /* Last page of this run */
407   Pgno iRoot;                      /* Root page number (if any) */
408   int nSize;                       /* Size of this run in pages */
409 
410   Redirect *pRedirect;             /* Block redirects (or NULL) */
411 };
412 
413 /*
414 ** iSplitTopic/pSplitKey/nSplitKey:
415 **   If nRight>0, this buffer contains a copy of the largest key that has
416 **   already been written to the left-hand-side of the level.
417 */
418 struct Level {
419   Segment lhs;                    /* Left-hand (main) segment */
420   int nRight;                     /* Size of apRight[] array */
421   Segment *aRhs;                  /* Old segments being merged into this */
422   int iSplitTopic;                /* Split key topic (if nRight>0) */
423   void *pSplitKey;                /* Pointer to split-key (if nRight>0) */
424   int nSplitKey;                  /* Number of bytes in split-key */
425 
426   u16 iAge;                       /* Number of times data has been written */
427   u16 flags;                      /* Mask of LEVEL_XXX bits */
428   Merge *pMerge;                  /* Merge operation currently underway */
429   Level *pNext;                   /* Next level in tree */
430 };
431 
432 /*
433 ** The Level.flags field is set to a combination of the following bits.
434 **
435 ** LEVEL_FREELIST_ONLY:
436 **   Set if the level consists entirely of free-list entries.
437 **
438 ** LEVEL_INCOMPLETE:
439 **   This is set while a new toplevel level is being constructed. It is
440 **   never set for any level other than a new toplevel.
441 */
442 #define LEVEL_FREELIST_ONLY      0x0001
443 #define LEVEL_INCOMPLETE         0x0002
444 
445 
446 /*
447 ** A structure describing an ongoing merge. There is an instance of this
448 ** structure for every Level currently undergoing a merge in the worker
449 ** snapshot.
450 **
451 ** It is assumed that code that uses an instance of this structure has
452 ** access to the associated Level struct.
453 **
454 ** iOutputOff:
455 **   The byte offset to write to next within the last page of the
456 **   output segment.
457 */
458 struct MergeInput {
459   Pgno iPg;                       /* Page on which next input is stored */
460   int iCell;                      /* Cell containing next input to merge */
461 };
462 struct Merge {
463   int nInput;                     /* Number of input runs being merged */
464   MergeInput *aInput;             /* Array nInput entries in size */
465   MergeInput splitkey;            /* Location in file of current splitkey */
466   int nSkip;                      /* Number of separators entries to skip */
467   int iOutputOff;                 /* Write offset on output page */
468   Pgno iCurrentPtr;               /* Current pointer value */
469 };
470 
471 /*
472 ** The first argument to this macro is a pointer to a Segment structure.
473 ** Returns true if the structure instance indicates that the separators
474 ** array is valid.
475 */
476 #define segmentHasSeparators(pSegment) ((pSegment)->sep.iFirst>0)
477 
478 /*
479 ** The values that accompany the lock held by a database reader.
480 */
481 struct ShmReader {
482   u32 iTreeId;
483   i64 iLsmId;
484 };
485 
486 /*
487 ** An instance of this structure is stored in the first shared-memory
488 ** page. The shared-memory header.
489 **
490 ** bWriter:
491 **   Immediately after opening a write transaction taking the WRITER lock,
492 **   each writer client sets this flag. It is cleared right before the
493 **   WRITER lock is relinquished. If a subsequent writer finds that this
494 **   flag is already set when a write transaction is opened, this indicates
495 **   that a previous writer failed mid-transaction.
496 **
497 ** iMetaPage:
498 **   If the database file does not contain a valid, synced, checkpoint, this
499 **   value is set to 0. Otherwise, it is set to the meta-page number that
500 **   contains the most recently written checkpoint (either 1 or 2).
501 **
502 ** hdr1, hdr2:
503 **   The two copies of the in-memory tree header. Two copies are required
504 **   in case a writer fails while updating one of them.
505 */
506 struct ShmHeader {
507   u32 aSnap1[LSM_META_PAGE_SIZE / 4];
508   u32 aSnap2[LSM_META_PAGE_SIZE / 4];
509   u32 bWriter;
510   u32 iMetaPage;
511   TreeHeader hdr1;
512   TreeHeader hdr2;
513   ShmReader aReader[LSM_LOCK_NREADER];
514 };
515 
516 /*
517 ** An instance of this structure is stored at the start of each shared-memory
518 ** chunk except the first (which is the header chunk - see above).
519 */
520 struct ShmChunk {
521   u32 iShmid;
522   u32 iNext;
523 };
524 
525 /*
526 ** Maximum number of shared-memory chunks allowed in the *-shm file. Since
527 ** each shared-memory chunk is 32KB in size, this is a theoretical limit only.
528 */
529 #define LSM_MAX_SHMCHUNKS  (1<<30)
530 
531 /* Return true if shm-sequence "a" is larger than or equal to "b" */
532 #define shm_sequence_ge(a, b) (((u32)a-(u32)b) < LSM_MAX_SHMCHUNKS)
533 
534 #define LSM_APPLIST_SZ 4
535 
536 /*
537 ** An instance of the following structure stores the in-memory part of
538 ** the current free block list. This structure is to the free block list
539 ** as the in-memory tree is to the users database content. The contents
540 ** of the free block list is found by merging the in-memory components
541 ** with those stored in the LSM, just as the contents of the database is
542 ** found by merging the in-memory tree with the user data entries in the
543 ** LSM.
544 **
545 ** Each FreelistEntry structure in the array represents either an insert
546 ** or delete operation on the free-list. For deletes, the FreelistEntry.iId
547 ** field is set to -1. For inserts, it is set to zero or greater.
548 **
549 ** The array of FreelistEntry structures is always sorted in order of
550 ** block number (ascending).
551 **
552 ** When the in-memory free block list is written into the LSM, each insert
553 ** operation is written separately. The entry key is the bitwise inverse
554 ** of the block number as a 32-bit big-endian integer. This is done so that
555 ** the entries in the LSM are sorted in descending order of block id.
556 ** The associated value is the snapshot id, formated as a varint.
557 */
558 struct Freelist {
559   FreelistEntry *aEntry;          /* Free list entries */
560   int nEntry;                     /* Number of valid slots in aEntry[] */
561   int nAlloc;                     /* Allocated size of aEntry[] */
562 };
563 struct FreelistEntry {
564   u32 iBlk;                       /* Block number */
565   i64 iId;                        /* Largest snapshot id to use this block */
566 };
567 
568 /*
569 ** A snapshot of a database. A snapshot contains all the information required
570 ** to read or write a database file on disk. See the description of struct
571 ** Database below for futher details.
572 */
573 struct Snapshot {
574   Database *pDatabase;            /* Database this snapshot belongs to */
575   u32 iCmpId;                     /* Id of compression scheme */
576   Level *pLevel;                  /* Pointer to level 0 of snapshot (or NULL) */
577   i64 iId;                        /* Snapshot id */
578   i64 iLogOff;                    /* Log file offset */
579   Redirect redirect;              /* Block redirection array */
580 
581   /* Used by worker snapshots only */
582   int nBlock;                     /* Number of blocks in database file */
583   Pgno aiAppend[LSM_APPLIST_SZ];  /* Append point list */
584   Freelist freelist;              /* Free block list */
585   u32 nWrite;                     /* Total number of pages written to disk */
586 };
587 #define LSM_INITIAL_SNAPSHOT_ID 11
588 
589 /*
590 ** Functions from file "lsm_ckpt.c".
591 */
592 int lsmCheckpointWrite(lsm_db *, u32 *);
593 int lsmCheckpointLevels(lsm_db *, int, void **, int *);
594 int lsmCheckpointLoadLevels(lsm_db *pDb, void *pVal, int nVal);
595 
596 int lsmCheckpointRecover(lsm_db *);
597 int lsmCheckpointDeserialize(lsm_db *, int, u32 *, Snapshot **);
598 
599 int lsmCheckpointLoadWorker(lsm_db *pDb);
600 int lsmCheckpointStore(lsm_db *pDb, int);
601 
602 int lsmCheckpointLoad(lsm_db *pDb, int *);
603 int lsmCheckpointLoadOk(lsm_db *pDb, int);
604 int lsmCheckpointClientCacheOk(lsm_db *);
605 
606 u32 lsmCheckpointNBlock(u32 *);
607 i64 lsmCheckpointId(u32 *, int);
608 u32 lsmCheckpointNWrite(u32 *, int);
609 i64 lsmCheckpointLogOffset(u32 *);
610 int lsmCheckpointPgsz(u32 *);
611 int lsmCheckpointBlksz(u32 *);
612 void lsmCheckpointLogoffset(u32 *aCkpt, DbLog *pLog);
613 void lsmCheckpointZeroLogoffset(lsm_db *);
614 
615 int lsmCheckpointSaveWorker(lsm_db *pDb, int);
616 int lsmDatabaseFull(lsm_db *pDb);
617 int lsmCheckpointSynced(lsm_db *pDb, i64 *piId, i64 *piLog, u32 *pnWrite);
618 
619 int lsmCheckpointSize(lsm_db *db, int *pnByte);
620 
621 int lsmInfoCompressionId(lsm_db *db, u32 *piCmpId);
622 
623 /*
624 ** Functions from file "lsm_tree.c".
625 */
626 int lsmTreeNew(lsm_env *, int (*)(void *, int, void *, int), Tree **ppTree);
627 void lsmTreeRelease(lsm_env *, Tree *);
628 int lsmTreeInit(lsm_db *);
629 int lsmTreeRepair(lsm_db *);
630 
631 void lsmTreeMakeOld(lsm_db *pDb);
632 void lsmTreeDiscardOld(lsm_db *pDb);
633 int lsmTreeHasOld(lsm_db *pDb);
634 
635 int lsmTreeSize(lsm_db *);
636 int lsmTreeEndTransaction(lsm_db *pDb, int bCommit);
637 int lsmTreeLoadHeader(lsm_db *pDb, int *);
638 int lsmTreeLoadHeaderOk(lsm_db *, int);
639 
640 int lsmTreeInsert(lsm_db *pDb, void *pKey, int nKey, void *pVal, int nVal);
641 int lsmTreeDelete(lsm_db *db, void *pKey1, int nKey1, void *pKey2, int nKey2);
642 void lsmTreeRollback(lsm_db *pDb, TreeMark *pMark);
643 void lsmTreeMark(lsm_db *pDb, TreeMark *pMark);
644 
645 int lsmTreeCursorNew(lsm_db *pDb, int, TreeCursor **);
646 void lsmTreeCursorDestroy(TreeCursor *);
647 
648 int lsmTreeCursorSeek(TreeCursor *pCsr, void *pKey, int nKey, int *pRes);
649 int lsmTreeCursorNext(TreeCursor *pCsr);
650 int lsmTreeCursorPrev(TreeCursor *pCsr);
651 int lsmTreeCursorEnd(TreeCursor *pCsr, int bLast);
652 void lsmTreeCursorReset(TreeCursor *pCsr);
653 int lsmTreeCursorKey(TreeCursor *pCsr, int *pFlags, void **ppKey, int *pnKey);
654 int lsmTreeCursorFlags(TreeCursor *pCsr);
655 int lsmTreeCursorValue(TreeCursor *pCsr, void **ppVal, int *pnVal);
656 int lsmTreeCursorValid(TreeCursor *pCsr);
657 int lsmTreeCursorSave(TreeCursor *pCsr);
658 
659 void lsmFlagsToString(int flags, char *zFlags);
660 
661 /*
662 ** Functions from file "mem.c".
663 */
664 void *lsmMalloc(lsm_env*, size_t);
665 void lsmFree(lsm_env*, void *);
666 void *lsmRealloc(lsm_env*, void *, size_t);
667 void *lsmReallocOrFree(lsm_env*, void *, size_t);
668 void *lsmReallocOrFreeRc(lsm_env *, void *, size_t, int *);
669 
670 void *lsmMallocZeroRc(lsm_env*, size_t, int *);
671 void *lsmMallocRc(lsm_env*, size_t, int *);
672 
673 void *lsmMallocZero(lsm_env *pEnv, size_t);
674 char *lsmMallocStrdup(lsm_env *pEnv, const char *);
675 
676 /*
677 ** Functions from file "lsm_mutex.c".
678 */
679 int lsmMutexStatic(lsm_env*, int, lsm_mutex **);
680 int lsmMutexNew(lsm_env*, lsm_mutex **);
681 void lsmMutexDel(lsm_env*, lsm_mutex *);
682 void lsmMutexEnter(lsm_env*, lsm_mutex *);
683 int lsmMutexTry(lsm_env*, lsm_mutex *);
684 void lsmMutexLeave(lsm_env*, lsm_mutex *);
685 
686 #ifndef NDEBUG
687 int lsmMutexHeld(lsm_env *, lsm_mutex *);
688 int lsmMutexNotHeld(lsm_env *, lsm_mutex *);
689 #endif
690 
691 /**************************************************************************
692 ** Start of functions from "lsm_file.c".
693 */
694 int lsmFsOpen(lsm_db *, const char *, int);
695 int lsmFsOpenLog(lsm_db *, int *);
696 void lsmFsCloseLog(lsm_db *);
697 void lsmFsClose(FileSystem *);
698 
699 int lsmFsUnmap(FileSystem *);
700 
701 int lsmFsConfigure(lsm_db *db);
702 
703 int lsmFsBlockSize(FileSystem *);
704 void lsmFsSetBlockSize(FileSystem *, int);
705 int lsmFsMoveBlock(FileSystem *pFS, Segment *pSeg, int iTo, int iFrom);
706 
707 int lsmFsPageSize(FileSystem *);
708 void lsmFsSetPageSize(FileSystem *, int);
709 
710 int lsmFsFileid(lsm_db *pDb, void **ppId, int *pnId);
711 
712 /* Creating, populating, gobbling and deleting sorted runs. */
713 void lsmFsGobble(lsm_db *, Segment *, Pgno *, int);
714 int lsmFsSortedDelete(FileSystem *, Snapshot *, int, Segment *);
715 int lsmFsSortedFinish(FileSystem *, Segment *);
716 int lsmFsSortedAppend(FileSystem *, Snapshot *, Level *, int, Page **);
717 int lsmFsSortedPadding(FileSystem *, Snapshot *, Segment *);
718 
719 /* Functions to retrieve the lsm_env pointer from a FileSystem or Page object */
720 lsm_env *lsmFsEnv(FileSystem *);
721 lsm_env *lsmPageEnv(Page *);
722 FileSystem *lsmPageFS(Page *);
723 
724 int lsmFsSectorSize(FileSystem *);
725 
726 void lsmSortedSplitkey(lsm_db *, Level *, int *);
727 
728 /* Reading sorted run content. */
729 int lsmFsDbPageLast(FileSystem *pFS, Segment *pSeg, Page **ppPg);
730 int lsmFsDbPageGet(FileSystem *, Segment *, Pgno, Page **);
731 int lsmFsDbPageNext(Segment *, Page *, int eDir, Page **);
732 
733 u8 *lsmFsPageData(Page *, int *);
734 int lsmFsPageRelease(Page *);
735 int lsmFsPagePersist(Page *);
736 void lsmFsPageRef(Page *);
737 Pgno lsmFsPageNumber(Page *);
738 
739 int lsmFsNRead(FileSystem *);
740 int lsmFsNWrite(FileSystem *);
741 
742 int lsmFsMetaPageGet(FileSystem *, int, int, MetaPage **);
743 int lsmFsMetaPageRelease(MetaPage *);
744 u8 *lsmFsMetaPageData(MetaPage *, int *);
745 
746 #ifdef LSM_DEBUG
747 int lsmFsDbPageIsLast(Segment *pSeg, Page *pPg);
748 int lsmFsIntegrityCheck(lsm_db *);
749 #endif
750 
751 Pgno lsmFsRedirectPage(FileSystem *, Redirect *, Pgno);
752 
753 int lsmFsPageWritable(Page *);
754 
755 /* Functions to read, write and sync the log file. */
756 int lsmFsWriteLog(FileSystem *pFS, i64 iOff, LsmString *pStr);
757 int lsmFsSyncLog(FileSystem *pFS);
758 int lsmFsReadLog(FileSystem *pFS, i64 iOff, int nRead, LsmString *pStr);
759 int lsmFsTruncateLog(FileSystem *pFS, i64 nByte);
760 int lsmFsTruncateDb(FileSystem *pFS, i64 nByte);
761 int lsmFsCloseAndDeleteLog(FileSystem *pFS);
762 
763 LsmFile *lsmFsDeferClose(FileSystem *pFS);
764 
765 /* And to sync the db file */
766 int lsmFsSyncDb(FileSystem *, int);
767 
768 void lsmFsFlushWaiting(FileSystem *, int *);
769 
770 /* Used by lsm_info(ARRAY_STRUCTURE) and lsm_config(MMAP) */
771 int lsmInfoArrayStructure(lsm_db *pDb, int bBlock, Pgno iFirst, char **pzOut);
772 int lsmInfoArrayPages(lsm_db *pDb, Pgno iFirst, char **pzOut);
773 int lsmConfigMmap(lsm_db *pDb, int *piParam);
774 
775 int lsmEnvOpen(lsm_env *, const char *, int, lsm_file **);
776 int lsmEnvClose(lsm_env *pEnv, lsm_file *pFile);
777 int lsmEnvLock(lsm_env *pEnv, lsm_file *pFile, int iLock, int eLock);
778 int lsmEnvTestLock(lsm_env *pEnv, lsm_file *pFile, int iLock, int nLock, int);
779 
780 int lsmEnvShmMap(lsm_env *, lsm_file *, int, int, void **);
781 void lsmEnvShmBarrier(lsm_env *);
782 void lsmEnvShmUnmap(lsm_env *, lsm_file *, int);
783 
784 void lsmEnvSleep(lsm_env *, int);
785 
786 int lsmFsReadSyncedId(lsm_db *db, int, i64 *piVal);
787 
788 int lsmFsSegmentContainsPg(FileSystem *pFS, Segment *, Pgno, int *);
789 
790 void lsmFsPurgeCache(FileSystem *);
791 
792 /*
793 ** End of functions from "lsm_file.c".
794 **************************************************************************/
795 
796 /*
797 ** Functions from file "lsm_sorted.c".
798 */
799 int lsmInfoPageDump(lsm_db *, Pgno, int, char **);
800 void lsmSortedCleanup(lsm_db *);
801 int lsmSortedAutoWork(lsm_db *, int nUnit);
802 
803 int lsmSortedWalkFreelist(lsm_db *, int, int (*)(void *, int, i64), void *);
804 
805 int lsmSaveWorker(lsm_db *, int);
806 
807 int lsmFlushTreeToDisk(lsm_db *pDb);
808 
809 void lsmSortedRemap(lsm_db *pDb);
810 
811 void lsmSortedFreeLevel(lsm_env *pEnv, Level *);
812 
813 int lsmSortedAdvanceAll(lsm_db *pDb);
814 
815 int lsmSortedLoadMerge(lsm_db *, Level *, u32 *, int *);
816 int lsmSortedLoadFreelist(lsm_db *pDb, void **, int *);
817 
818 void *lsmSortedSplitKey(Level *pLevel, int *pnByte);
819 
820 void lsmSortedSaveTreeCursors(lsm_db *);
821 
822 int lsmMCursorNew(lsm_db *, MultiCursor **);
823 void lsmMCursorClose(MultiCursor *, int);
824 int lsmMCursorSeek(MultiCursor *, int, void *, int , int);
825 int lsmMCursorFirst(MultiCursor *);
826 int lsmMCursorPrev(MultiCursor *);
827 int lsmMCursorLast(MultiCursor *);
828 int lsmMCursorValid(MultiCursor *);
829 int lsmMCursorNext(MultiCursor *);
830 int lsmMCursorKey(MultiCursor *, void **, int *);
831 int lsmMCursorValue(MultiCursor *, void **, int *);
832 int lsmMCursorType(MultiCursor *, int *);
833 lsm_db *lsmMCursorDb(MultiCursor *);
834 void lsmMCursorFreeCache(lsm_db *);
835 
836 int lsmSaveCursors(lsm_db *pDb);
837 int lsmRestoreCursors(lsm_db *pDb);
838 
839 void lsmSortedDumpStructure(lsm_db *pDb, Snapshot *, int, int, const char *);
840 void lsmFsDumpBlocklists(lsm_db *);
841 
842 void lsmSortedExpandBtreePage(Page *pPg, int nOrig);
843 
844 void lsmPutU32(u8 *, u32);
845 u32 lsmGetU32(u8 *);
846 u64 lsmGetU64(u8 *);
847 
848 /*
849 ** Functions from "lsm_varint.c".
850 */
851 int lsmVarintPut32(u8 *, int);
852 int lsmVarintGet32(u8 *, int *);
853 int lsmVarintPut64(u8 *aData, i64 iVal);
854 int lsmVarintGet64(const u8 *aData, i64 *piVal);
855 
856 int lsmVarintLen32(int);
857 int lsmVarintSize(u8 c);
858 
859 /*
860 ** Functions from file "main.c".
861 */
862 void lsmLogMessage(lsm_db *, int, const char *, ...);
863 int lsmInfoFreelist(lsm_db *pDb, char **pzOut);
864 
865 /*
866 ** Functions from file "lsm_log.c".
867 */
868 int lsmLogBegin(lsm_db *pDb);
869 int lsmLogWrite(lsm_db *, int, void *, int, void *, int);
870 int lsmLogCommit(lsm_db *);
871 void lsmLogEnd(lsm_db *pDb, int bCommit);
872 void lsmLogTell(lsm_db *, LogMark *);
873 void lsmLogSeek(lsm_db *, LogMark *);
874 void lsmLogClose(lsm_db *);
875 
876 int lsmLogRecover(lsm_db *);
877 int lsmInfoLogStructure(lsm_db *pDb, char **pzVal);
878 
879 /* Valid values for the second argument to lsmLogWrite(). */
880 #define LSM_WRITE        0x06
881 #define LSM_DELETE       0x08
882 #define LSM_DRANGE       0x0A
883 
884 /**************************************************************************
885 ** Functions from file "lsm_shared.c".
886 */
887 
888 int lsmDbDatabaseConnect(lsm_db*, const char *);
889 void lsmDbDatabaseRelease(lsm_db *);
890 
891 int lsmBeginReadTrans(lsm_db *);
892 int lsmBeginWriteTrans(lsm_db *);
893 int lsmBeginFlush(lsm_db *);
894 
895 int lsmDetectRoTrans(lsm_db *db, int *);
896 int lsmBeginRoTrans(lsm_db *db);
897 
898 int lsmBeginWork(lsm_db *);
899 void lsmFinishWork(lsm_db *, int, int *);
900 
901 int lsmFinishRecovery(lsm_db *);
902 void lsmFinishReadTrans(lsm_db *);
903 int lsmFinishWriteTrans(lsm_db *, int);
904 int lsmFinishFlush(lsm_db *, int);
905 
906 int lsmSnapshotSetFreelist(lsm_db *, int *, int);
907 
908 Snapshot *lsmDbSnapshotClient(lsm_db *);
909 Snapshot *lsmDbSnapshotWorker(lsm_db *);
910 
911 void lsmSnapshotSetCkptid(Snapshot *, i64);
912 
913 Level *lsmDbSnapshotLevel(Snapshot *);
914 void lsmDbSnapshotSetLevel(Snapshot *, Level *);
915 
916 void lsmDbRecoveryComplete(lsm_db *, int);
917 
918 int lsmBlockAllocate(lsm_db *, int, int *);
919 int lsmBlockFree(lsm_db *, int);
920 int lsmBlockRefree(lsm_db *, int);
921 
922 void lsmFreelistDeltaBegin(lsm_db *);
923 void lsmFreelistDeltaEnd(lsm_db *);
924 int lsmFreelistDelta(lsm_db *pDb);
925 
926 DbLog *lsmDatabaseLog(lsm_db *pDb);
927 
928 #ifdef LSM_DEBUG
929   int lsmHoldingClientMutex(lsm_db *pDb);
930   int lsmShmAssertLock(lsm_db *db, int iLock, int eOp);
931   int lsmShmAssertWorker(lsm_db *db);
932 #endif
933 
934 void lsmFreeSnapshot(lsm_env *, Snapshot *);
935 
936 
937 /* Candidate values for the 3rd argument to lsmShmLock() */
938 #define LSM_LOCK_UNLOCK 0
939 #define LSM_LOCK_SHARED 1
940 #define LSM_LOCK_EXCL   2
941 
942 int lsmShmCacheChunks(lsm_db *db, int nChunk);
943 int lsmShmLock(lsm_db *db, int iLock, int eOp, int bBlock);
944 int lsmShmTestLock(lsm_db *db, int iLock, int nLock, int eOp);
945 void lsmShmBarrier(lsm_db *db);
946 
947 #ifdef LSM_DEBUG
948 void lsmShmHasLock(lsm_db *db, int iLock, int eOp);
949 #else
950 # define lsmShmHasLock(x,y,z)
951 #endif
952 
953 int lsmReadlock(lsm_db *, i64 iLsm, u32 iShmMin, u32 iShmMax);
954 
955 int lsmLsmInUse(lsm_db *db, i64 iLsmId, int *pbInUse);
956 int lsmTreeInUse(lsm_db *db, u32 iLsmId, int *pbInUse);
957 int lsmFreelistAppend(lsm_env *pEnv, Freelist *p, int iBlk, i64 iId);
958 
959 int lsmDbMultiProc(lsm_db *);
960 void lsmDbDeferredClose(lsm_db *, lsm_file *, LsmFile *);
961 LsmFile *lsmDbRecycleFd(lsm_db *);
962 
963 int lsmWalkFreelist(lsm_db *, int, int (*)(void *, int, i64), void *);
964 
965 int lsmCheckCompressionId(lsm_db *, u32);
966 
967 
968 /**************************************************************************
969 ** functions in lsm_str.c
970 */
971 void lsmStringInit(LsmString*, lsm_env *pEnv);
972 int lsmStringExtend(LsmString*, int);
973 int lsmStringAppend(LsmString*, const char *, int);
974 void lsmStringVAppendf(LsmString*, const char *zFormat, va_list, va_list);
975 void lsmStringAppendf(LsmString*, const char *zFormat, ...);
976 void lsmStringClear(LsmString*);
977 char *lsmMallocPrintf(lsm_env*, const char*, ...);
978 int lsmStringBinAppend(LsmString *pStr, const u8 *a, int n);
979 
980 int lsmStrlen(const char *zName);
981 
982 
983 
984 /*
985 ** Round up a number to the next larger multiple of 8.  This is used
986 ** to force 8-byte alignment on 64-bit architectures.
987 */
988 #define ROUND8(x)     (((x)+7)&~7)
989 
990 #define LSM_MIN(x,y) ((x)>(y) ? (y) : (x))
991 #define LSM_MAX(x,y) ((x)>(y) ? (x) : (y))
992 
993 #endif
994