1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2012, 2013 Oracle and/or its affiliates. All rights reserved. 5 */ 6 7 #include <errno.h> 8 9 #include "sqliteInt.h" 10 #include <db.h> 11 12 #ifdef BDBSQL_SHARE_PRIVATE 13 /* BDBSQL_SHARE_PRIVATE implies BDBSQL_SINGLE_PROCESS */ 14 #define BDBSQL_SINGLE_PROCESS 15 #endif 16 17 #define INTKEY_BUFSIZE (sizeof(i64) + 2) /* We add 2 bytes to negatives. */ 18 #define MULTI_BUFSIZE 8 * SQLITE_DEFAULT_PAGE_SIZE 19 #define DBNAME_SIZE 20 20 #define NUMMETA 16 21 #define NUM_DB_PRAGMA 30 22 #define CURSOR_BUFSIZE 32 /* For holding index keys. */ 23 /* This should match SQLite VFS.mxPathname */ 24 #define BT_MAX_PATH 512 25 26 #define BT_MAX_SEQ_NAME 128 27 28 /* 29 * The default size of the Berkeley DB environment's logging area, in 30 * bytes. 31 */ 32 #ifndef BDBSQL_LOG_REGIONMAX 33 # define BDBSQL_LOG_REGIONMAX (300 * 1024) 34 #endif 35 36 /* 37 * The default policy for enabling the transactional bulk insert 38 * optimization. 39 */ 40 #ifndef BDBSQL_TXN_BULK_DEFAULT 41 # define BDBSQL_TXN_BULK_DEFAULT 0 42 #endif 43 44 /* 45 * The default pages number for incremental vacuum 46 */ 47 #ifndef BDBSQL_INCR_VACUUM_PAGES 48 # define BDBSQL_INCR_VACUUM_PAGES 128 49 #endif 50 51 /* 52 * The default fill percent for vacuum 53 */ 54 #ifndef BDBSQL_VACUUM_FILLPERCENT 55 # define BDBSQL_VACUUM_FILLPERCENT 85 56 #endif 57 58 #ifndef UINT32_MAX /* Maximum 32-bit unsigned. */ 59 #define UINT32_MAX 4294967295U 60 #endif 61 #ifndef INT64_MAX 62 #define INT64_MAX ((((i64)0x7fffffff) << 32) | 0xffffffff) 63 #endif 64 #ifndef GIGABYTE 65 #define GIGABYTE 1073741824 66 #endif 67 68 #define MAP_ERR(rc, ret, p) \ 69 ((rc != SQLITE_OK) ? rc : (ret == 0) ? SQLITE_OK : \ 70 dberr2sqlite(ret, p)) 71 72 #define MAP_ERR_LOCKED(rc, ret, p) \ 73 ((rc != SQLITE_OK) ? rc : (ret == 0) ? SQLITE_OK : \ 74 dberr2sqlitelocked(ret, p)) 75 76 /* Declare custom functions added by Berkeley DB to SQL. */ 77 int add_sequence_functions(sqlite3 *db); 78 79 typedef int (*compareFunc)(void*,int,const void*,int,const void*); 80 81 typedef struct { 82 /* 83 * There are two types of tables stored in this cache: 84 * * Normal tables created by SQLite. These have 8 char names. 85 * * Tables used to handle sequences, which can have arbitrary names. 86 */ 87 #define CACHE_KEY_SIZE 9 /* 8 hex characters + NUL */ 88 char key[BT_MAX_SEQ_NAME]; 89 DB *dbp; 90 int is_sequence; 91 db_lockmode_t lock_mode; 92 int created; 93 void *cookie; 94 } CACHED_DB; 95 96 typedef struct { 97 int32_t cache; 98 int64_t min_val; 99 int64_t max_val; 100 int64_t start_val; 101 int32_t incr; 102 u8 decrementing; 103 u8 used; 104 int64_t val; /* If not using a cache - this is the last value. */ 105 char name[BT_MAX_SEQ_NAME]; 106 int32_t name_len; 107 DB_SEQUENCE *handle; /* Never used directly from the DB cache key. */ 108 } SEQ_COOKIE; 109 110 typedef struct { 111 u32 value; 112 u8 cached; 113 } CACHED_META; 114 115 typedef struct { 116 char *value; 117 u32 size; 118 u32 offset; 119 } CACHED_PRAGMA; 120 121 typedef struct DELETED_TABLE DELETED_TABLE; 122 struct DELETED_TABLE { 123 int iTable; 124 DB_TXN *txn; 125 #ifdef BDBSQL_FILE_PER_TABLE 126 int flag; 127 #define DTF_DELETE 0x00 128 #define DTF_DROP 0x01 129 #endif 130 DELETED_TABLE *next; 131 }; 132 133 #ifndef BDBSQL_SINGLE_THREAD 134 typedef struct { 135 BtShared *pBt; 136 KeyInfo *pKeyInfo; 137 int iTable; 138 } TableInfo; 139 #endif 140 141 #ifdef BDBSQL_SHARE_PRIVATE 142 typedef struct { 143 int fd; 144 void *mapAddr; 145 int generation; 146 int readlock_count; 147 int writelock_count; 148 int write_waiting; 149 int in_env_open; 150 sqlite3_mutex *mutex; 151 } LockFileInfo; 152 #endif 153 154 typedef enum { CLEANUP_COMMIT, CLEANUP_ABORT, CLEANUP_CLOSE, 155 CLEANUP_DROP_LOCKS, CLEANUP_GET_LOCKS } cleanup_mode_t; 156 /* There are three possible table types in SQLite. */ 157 typedef enum { DB_STORE_NAMED, DB_STORE_TMP, DB_STORE_INMEM } storage_mode_t; 158 typedef enum { TRANS_NONE, TRANS_READ, TRANS_WRITE } txn_mode_t; 159 typedef enum { LOCKMODE_NONE, LOCKMODE_READ, LOCKMODE_WRITE } lock_mode_t; 160 typedef enum { NO_LSN_RESET, LSN_RESET_FILE } lsn_reset_t; 161 162 /* Declarations for functions that are shared by adapter source files. */ 163 int btreeBeginTransInternal(Btree *p, int wrflag); 164 void *btreeCreateIndexKey(BtCursor *pCur); 165 void btreeGetErrorFile(const BtShared *pBt, char *fname); 166 Index *btreeGetIndex(Btree *p, int iTable); 167 int btreeGetPageCount(Btree *p, int **tables, u32 *pageCount, DB_TXN *txn); 168 int btreeGetUserTable(Btree *p, DB_TXN *pTxn, DB **pDb, int iTable); 169 int btreeGetTables(Btree *, int **, DB_TXN *); 170 int btreeLockSchema(Btree *p, lock_mode_t lockMode); 171 int btreeOpenEnvironment(Btree *p, int needLock); 172 int btreeOpenMetaTables(Btree *p, int *pCreating); 173 int btreeReopenEnvironment(Btree *p, int removingRep); 174 int btreeUpdateBtShared(Btree *p, int needLock); 175 #ifndef SQLITE_OMIT_VACUUM 176 int btreeIncrVacuum(Btree *p, u_int32_t *truncatedPages); 177 int btreeVacuum(Btree *p, char **pzErrMsg); 178 void btreeFreeVacuumInfo(Btree *p); 179 #endif 180 int dberr2sqlite(int, Btree *p); 181 int closeDB(Btree *p, DB *dbp, u_int32_t flags); 182 void *allocateCursorIndex(BtCursor *pCur, u_int32_t amount); 183 int splitIndexKey(BtCursor *pCur); 184 int isDupIndex(int flags, int storage, KeyInfo *keyInfo, DB *db); 185 #ifdef BDBSQL_SHARE_PRIVATE 186 int btreeScopedFileLock(Btree *p, int iswrite, int dontreopen); 187 int btreeScopedFileUnlock(Btree *p, int iswrite); 188 int btreeHasFileLock(Btree *p, int iswrite); 189 #endif 190 #ifdef SQLITE_HAS_CODEC 191 int sqlite3CodecAttach(sqlite3*, int, const void*, int); 192 #endif 193 int getPersistentPragma(Btree *p, const char *pragma_name, char **value, 194 Parse *pParse); 195 int setPersistentPragma(Btree *p, const char *pragma_name, const char *value, 196 Parse *pParse); 197 int encodeI64(u_int8_t *buf, i64 num); 198 int cleanPragmaCache(Btree *p); 199 int getHostPort(const char *hpstr, char **host, u_int *port); 200 int setRepVerboseFile(BtShared *pBt, DB_ENV *dbenv, const char *fname, 201 char *msg); 202 int unsetRepVerboseFile(BtShared *pBt, DB_ENV *dbenv, char **msg); 203 /* Returns the thread id as a void *, which needs to be freed. */ 204 void *getThreadID(sqlite3 *db); 205 /* Checks if the thread id item identifies the current thread. */ 206 int isCurrentThread(void *tid); 207 208 #define CLEAR_PWD(pBt) do { \ 209 memset((pBt)->encrypt_pwd, 0xff, (pBt)->encrypt_pwd_len); \ 210 free((pBt)->encrypt_pwd); \ 211 (pBt)->encrypt_pwd_len = 0; \ 212 (pBt)->encrypt_pwd = NULL; \ 213 } while (0) 214 215 /* 216 * There is some subtlety about which mutex to use: for shared handles, we 217 * update some structures that are protected by the open mutex. In-memory 218 * databases all share the same g_tmp_env handle, so we need to make sure they 219 * get it single-threaded (so the initial open is done once). 220 * 221 * However, we can't use the open mutex to protect transient database opens and 222 * closes: we might already be holding locks in a shared environment when we 223 * try to open the temporary env, which would lead to a lock/mutex deadlock. 224 * We take a different static mutex from SQLite, previously used in the pager. 225 */ 226 #define OPEN_MUTEX(store) ((store == DB_STORE_NAMED) ? \ 227 SQLITE_MUTEX_STATIC_OPEN : SQLITE_MUTEX_STATIC_LRU) 228 229 #ifdef BDBSQL_FILE_PER_TABLE 230 /* Name of the metadata table in BDBSQL_FILE_PER_TABLE */ 231 #define BDBSQL_META_DATA_TABLE "metadata" 232 int getMetaDataFileName(const char *full_name, char **filename); 233 #endif 234 235 struct BtShared { 236 char *dir_name; 237 char *full_name; 238 char *short_name; /* A pointer into orig_name memory. */ 239 char *orig_name; 240 char *err_file; 241 char *err_msg; 242 u_int8_t fileid[DB_FILE_ID_LEN]; 243 char *encrypt_pwd; 244 lsn_reset_t lsn_reset; 245 storage_mode_t dbStorage; 246 u_int32_t env_oflags; 247 DB_ENV *dbenv; 248 int env_opened, encrypted, encrypt_pwd_len, last_table, need_open; 249 /* 250 * Handles for the metadata DB, which holds the SQLite metadata for a 251 * file, and the tables DB, which is the Berkeley DB-internal database 252 * of sub-databases in a file. 253 */ 254 DB *metadb, *tablesdb; 255 /* Caches persistent pragma values. */ 256 CACHED_PRAGMA pragma[NUM_DB_PRAGMA]; 257 sqlite3_mutex *pragma_cache_mutex; 258 u8 cache_loaded; 259 CACHED_META meta[NUMMETA]; 260 Hash db_cache; 261 #ifdef BDBSQL_SHARE_PRIVATE 262 LockFileInfo lockfile; 263 u_int32_t mp_mutex_count; 264 #endif 265 /* 266 * A unique name is assigned to each in memory table. This value is 267 * used to ensure that each BtShared object gets a unique identifier. 268 * NOTE: For DB_STORE_INMEM tables, despite sharing the same environment 269 * handle, the internal table name is unique because it comprises of 270 * both the uid and iTable. 271 */ 272 u_int32_t uid; 273 u_int32_t flags; 274 u_int32_t panic; /* If the environment is not in a usable state. */ 275 u_int32_t db_oflags; 276 u_int32_t transactional; 277 u_int32_t pageSize; 278 u_int32_t pageCount; 279 u_int32_t pageSizeFixed; 280 u_int32_t cacheSize; 281 u_int32_t logFileSize; /* In bytes */ 282 u_int32_t database_existed; /* Did the database file exist on open. */ 283 u_int32_t read_txn_flags; /* Flags passed to the read transaction. */ 284 u8 autoVacuum; /* Is auto-vacuum enabled? */ 285 u8 incrVacuum; /* Is incremental vacuum enabled? */ 286 u8 resultsBuffer; /* Query results are stored in a in-memory buffer */ 287 u8 secureDelete; /* Overwrite deleted data */ 288 /* Non-recursive mutex required to access this struct */ 289 sqlite3_mutex *mutex; 290 BtCursor *first_cursor; 291 292 /* Fields used to maintain the linked list of shared objects. */ 293 BtShared *pNextDb; 294 BtShared *pPrevDb; 295 Btree *btrees; /* A linked list of btrees that have been opened in this BtShared. */ 296 int nRef; 297 int readonly; 298 int repStartMaster; /* Start replication site as initial master? */ 299 FILE *repVerbFile; /* File for replication verbose output. */ 300 int repStarted; /* Replication is configured and started. */ 301 int repForceRecover; /* Force recovery on next open environment. */ 302 int single_process; /* If non-zero, keep all environment on the heap. */ 303 }; 304 305 struct BtCursor { 306 Btree *pBtree; 307 int tableIndex; 308 u_int32_t flags; 309 u8 isDupIndex, isFirst, isIncrblobHandle, wrFlag; 310 CACHED_DB *cached_db; 311 DBC *dbc; 312 DB_TXN *txn; 313 struct KeyInfo *keyInfo; 314 enum { 315 CURSOR_INVALID, CURSOR_VALID, CURSOR_REQUIRESEEK, CURSOR_FAULT 316 } eState; 317 int error, lastRes; 318 i64 cachedRowid, savedIntKey, lastKey; 319 DBT key, data, index; 320 i64 nKey; 321 u8 indexKeyBuf[CURSOR_BUFSIZE]; 322 DBT multiData; 323 void *multiGetPtr, *multiPutPtr; 324 void *threadID; 325 int skipMulti; 326 BtCursor *next; 327 }; 328 329 struct Btree { 330 struct BtShared *pBt; 331 sqlite3 *db; 332 333 int connected; /* Set up with an open environment */ 334 DB_TXN *family_txn; /* Makes txns and cursors lock-compatible. */ 335 DB_TXN *main_txn; /* Base transaction for read and savepoint. */ 336 DB_TXN *read_txn; 337 DB_TXN *savepoint_txn; 338 int nSavepoint; /* The number of open savepoints. */ 339 #ifdef BDBSQL_SHARE_PRIVATE 340 int maintxn_is_write; 341 #endif 342 int vfsFlags; 343 344 void* schema; /* Opaque schema handle used by SQLite */ 345 void (*free_schema)(void*); /* Destructor for schema */ 346 347 DELETED_TABLE *deleted_tables; 348 349 struct VacuumInfo { 350 DBT start; 351 int iTable; 352 struct VacuumInfo* next; 353 } *vacuumInfo; /* Keep incremental vacuum infomation */ 354 u8 inVacuum; /* True if vacuum is in progress */ 355 u8 needVacuum; /* True if the Btree needs vacuum in txn commit */ 356 u32 vacuumPages; /* Num of pages for AutoVacuum/IncrVacuum */ 357 u32 fillPercent; /* fillPercent for Vacuum */ 358 DBC *compact_cursor; /* Walks over table names during vacuum. */ 359 360 txn_mode_t inTrans; 361 lock_mode_t schemaLockMode; 362 DBC *schemaLock; 363 u8 sharable; /* True if we can share pBt with another db */ 364 u8 locked; /* True if db currently has pBt locked */ 365 u8 txn_excl; /* True if in an exclusive transaction */ 366 u8 txn_bulk; /* True to enable the bulk loading optimization */ 367 u32 txn_priority; /* Transaction priority. */ 368 int wantToLock; /* Number of nested calls to sqlite3BtreeEnter() */ 369 int nBackup; /* Number of backup operations reading this btree */ 370 u32 updateDuringBackup; /* An update was performed during a backup. */ 371 int readonly; 372 Btree *pNext; 373 Btree *pPrev; 374 }; 375 376 /* Shared by btree.c and btmutex.c */ 377 typedef enum { 378 LOG_VERBOSE, LOG_DEBUG, LOG_NORMAL, LOG_RELEASE, LOG_NONE 379 } loglevel_t; 380 381 #define CURRENT_LOG_LEVEL LOG_RELEASE 382 383 #ifdef NDEBUG 384 #define log_msg(...) 385 #else 386 /* Utility functions. */ 387 void log_msg(loglevel_t level, const char *fmt, ...); 388 #endif 389 390 /* 391 * Common functions for internal DBSQL btree components (btree.c, vacuum.c, etc) 392 */ 393 int btreeFindOrCreateDataTable(Btree *, int *, CACHED_DB **, int); 394 int btreeGetKeyInfo(Btree *p, int iTable, KeyInfo **pKeyInfo); 395 int btreeTableNameToId(const char *subdb, int len, int *pid); 396 397 /* 398 * Common macros for internal DBSQL btree components (btree.c, vacuum.c, etc) 399 */ 400 #define pDbEnv (pBt->dbenv) 401 #define pMetaDb (pBt->metadb) 402 #define pTablesDb (pBt->tablesdb) 403 #define pFamilyTxn (p->family_txn) 404 #define pReadTxn (p->read_txn) 405 #define pMainTxn (p->main_txn) 406 #define pSavepointTxn (p->savepoint_txn) 407 408 #ifdef BDBSQL_FILE_PER_TABLE 409 #define FIX_TABLENAME(pBt, fileName, tableName) do { \ 410 if (pBt->dbStorage == DB_STORE_NAMED) { \ 411 fileName = tableName; \ 412 } else \ 413 fileName = pBt->short_name; \ 414 } while (0) 415 #else 416 #define FIX_TABLENAME(pBt, fileName, tableName) do { \ 417 fileName = pBt->short_name; \ 418 } while (0) 419 #endif 420 421 #define GET_AUTO_COMMIT(pBt, txn) (((pBt)->transactional && \ 422 (!(txn) || (txn) == pFamilyTxn)) ? DB_AUTO_COMMIT : 0) 423 424 /* 425 * If an update occurs while this Btree is also performing backup then 426 * increase the updateDuringBackup counter. This value is checked before 427 * and after each backup step, and if it has increase then the backup 428 * process is reset. 429 */ 430 #define UPDATE_DURING_BACKUP(p) \ 431 if (p->nBackup > 0) \ 432 p->updateDuringBackup++; 433 434