1 /*
2 ** 2008 October 7
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This file contains code use to implement an in-memory rollback journal.
14 ** The in-memory rollback journal is used to journal transactions for
15 ** ":memory:" databases and when the journal_mode=MEMORY pragma is used.
16 **
17 ** Update: The in-memory journal is also used to temporarily cache
18 ** smaller journals that are not critical for power-loss recovery.
19 ** For example, statement journals that are not too big will be held
20 ** entirely in memory, thus reducing the number of file I/O calls, and
21 ** more importantly, reducing temporary file creation events. If these
22 ** journals become too large for memory, they are spilled to disk. But
23 ** in the common case, they are usually small and no file I/O needs to
24 ** occur.
25 */
26 #include "sqliteInt.h"
27
28 /* Forward references to internal structures */
29 typedef struct MemJournal MemJournal;
30 typedef struct FilePoint FilePoint;
31 typedef struct FileChunk FileChunk;
32
33 /*
34 ** The rollback journal is composed of a linked list of these structures.
35 **
36 ** The zChunk array is always at least 8 bytes in size - usually much more.
37 ** Its actual size is stored in the MemJournal.nChunkSize variable.
38 */
39 struct FileChunk {
40 FileChunk *pNext; /* Next chunk in the journal */
41 u8 zChunk[8]; /* Content of this chunk */
42 };
43
44 /*
45 ** By default, allocate this many bytes of memory for each FileChunk object.
46 */
47 #define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024
48
49 /*
50 ** For chunk size nChunkSize, return the number of bytes that should
51 ** be allocated for each FileChunk structure.
52 */
53 #define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8))
54
55 /*
56 ** An instance of this object serves as a cursor into the rollback journal.
57 ** The cursor can be either for reading or writing.
58 */
59 struct FilePoint {
60 sqlite3_int64 iOffset; /* Offset from the beginning of the file */
61 FileChunk *pChunk; /* Specific chunk into which cursor points */
62 };
63
64 /*
65 ** This structure is a subclass of sqlite3_file. Each open memory-journal
66 ** is an instance of this class.
67 */
68 struct MemJournal {
69 const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */
70 int nChunkSize; /* In-memory chunk-size */
71
72 int nSpill; /* Bytes of data before flushing */
73 int nSize; /* Bytes of data currently in memory */
74 FileChunk *pFirst; /* Head of in-memory chunk-list */
75 FilePoint endpoint; /* Pointer to the end of the file */
76 FilePoint readpoint; /* Pointer to the end of the last xRead() */
77
78 int flags; /* xOpen flags */
79 sqlite3_vfs *pVfs; /* The "real" underlying VFS */
80 const char *zJournal; /* Name of the journal file */
81 };
82
83 /*
84 ** Read data from the in-memory journal file. This is the implementation
85 ** of the sqlite3_vfs.xRead method.
86 */
memjrnlRead(sqlite3_file * pJfd,void * zBuf,int iAmt,sqlite_int64 iOfst)87 static int memjrnlRead(
88 sqlite3_file *pJfd, /* The journal file from which to read */
89 void *zBuf, /* Put the results here */
90 int iAmt, /* Number of bytes to read */
91 sqlite_int64 iOfst /* Begin reading at this offset */
92 ){
93 MemJournal *p = (MemJournal *)pJfd;
94 u8 *zOut = zBuf;
95 int nRead = iAmt;
96 int iChunkOffset;
97 FileChunk *pChunk;
98
99 if( (iAmt+iOfst)>p->endpoint.iOffset ){
100 return SQLITE_IOERR_SHORT_READ;
101 }
102 assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 );
103 if( p->readpoint.iOffset!=iOfst || iOfst==0 ){
104 sqlite3_int64 iOff = 0;
105 for(pChunk=p->pFirst;
106 ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst;
107 pChunk=pChunk->pNext
108 ){
109 iOff += p->nChunkSize;
110 }
111 }else{
112 pChunk = p->readpoint.pChunk;
113 assert( pChunk!=0 );
114 }
115
116 iChunkOffset = (int)(iOfst%p->nChunkSize);
117 do {
118 int iSpace = p->nChunkSize - iChunkOffset;
119 int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset));
120 memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy);
121 zOut += nCopy;
122 nRead -= iSpace;
123 iChunkOffset = 0;
124 } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 );
125 p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0;
126 p->readpoint.pChunk = pChunk;
127
128 return SQLITE_OK;
129 }
130
131 /*
132 ** Free the list of FileChunk structures headed at MemJournal.pFirst.
133 */
memjrnlFreeChunks(MemJournal * p)134 static void memjrnlFreeChunks(MemJournal *p){
135 FileChunk *pIter;
136 FileChunk *pNext;
137 for(pIter=p->pFirst; pIter; pIter=pNext){
138 pNext = pIter->pNext;
139 sqlite3_free(pIter);
140 }
141 p->pFirst = 0;
142 }
143
144 /*
145 ** Flush the contents of memory to a real file on disk.
146 */
memjrnlCreateFile(MemJournal * p)147 static int memjrnlCreateFile(MemJournal *p){
148 int rc;
149 sqlite3_file *pReal = (sqlite3_file*)p;
150 MemJournal copy = *p;
151
152 memset(p, 0, sizeof(MemJournal));
153 rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0);
154 if( rc==SQLITE_OK ){
155 int nChunk = copy.nChunkSize;
156 i64 iOff = 0;
157 FileChunk *pIter;
158 for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){
159 if( iOff + nChunk > copy.endpoint.iOffset ){
160 nChunk = copy.endpoint.iOffset - iOff;
161 }
162 rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff);
163 if( rc ) break;
164 iOff += nChunk;
165 }
166 if( rc==SQLITE_OK ){
167 /* No error has occurred. Free the in-memory buffers. */
168 memjrnlFreeChunks(©);
169 }
170 }
171 if( rc!=SQLITE_OK ){
172 /* If an error occurred while creating or writing to the file, restore
173 ** the original before returning. This way, SQLite uses the in-memory
174 ** journal data to roll back changes made to the internal page-cache
175 ** before this function was called. */
176 sqlite3OsClose(pReal);
177 *p = copy;
178 }
179 return rc;
180 }
181
182
183 /*
184 ** Write data to the file.
185 */
memjrnlWrite(sqlite3_file * pJfd,const void * zBuf,int iAmt,sqlite_int64 iOfst)186 static int memjrnlWrite(
187 sqlite3_file *pJfd, /* The journal file into which to write */
188 const void *zBuf, /* Take data to be written from here */
189 int iAmt, /* Number of bytes to write */
190 sqlite_int64 iOfst /* Begin writing at this offset into the file */
191 ){
192 MemJournal *p = (MemJournal *)pJfd;
193 int nWrite = iAmt;
194 u8 *zWrite = (u8 *)zBuf;
195
196 /* If the file should be created now, create it and write the new data
197 ** into the file on disk. */
198 if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){
199 int rc = memjrnlCreateFile(p);
200 if( rc==SQLITE_OK ){
201 rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst);
202 }
203 return rc;
204 }
205
206 /* If the contents of this write should be stored in memory */
207 else{
208 /* An in-memory journal file should only ever be appended to. Random
209 ** access writes are not required. The only exception to this is when
210 ** the in-memory journal is being used by a connection using the
211 ** atomic-write optimization. In this case the first 28 bytes of the
212 ** journal file may be written as part of committing the transaction. */
213 assert( iOfst==p->endpoint.iOffset || iOfst==0 );
214 #if defined(SQLITE_ENABLE_ATOMIC_WRITE) \
215 || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
216 if( iOfst==0 && p->pFirst ){
217 assert( p->nChunkSize>iAmt );
218 memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt);
219 }else
220 #else
221 assert( iOfst>0 || p->pFirst==0 );
222 #endif
223 {
224 while( nWrite>0 ){
225 FileChunk *pChunk = p->endpoint.pChunk;
226 int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize);
227 int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset);
228
229 if( iChunkOffset==0 ){
230 /* New chunk is required to extend the file. */
231 FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize));
232 if( !pNew ){
233 return SQLITE_IOERR_NOMEM_BKPT;
234 }
235 pNew->pNext = 0;
236 if( pChunk ){
237 assert( p->pFirst );
238 pChunk->pNext = pNew;
239 }else{
240 assert( !p->pFirst );
241 p->pFirst = pNew;
242 }
243 p->endpoint.pChunk = pNew;
244 }
245
246 memcpy((u8*)p->endpoint.pChunk->zChunk + iChunkOffset, zWrite, iSpace);
247 zWrite += iSpace;
248 nWrite -= iSpace;
249 p->endpoint.iOffset += iSpace;
250 }
251 p->nSize = iAmt + iOfst;
252 }
253 }
254
255 return SQLITE_OK;
256 }
257
258 /*
259 ** Truncate the file.
260 **
261 ** If the journal file is already on disk, truncate it there. Or, if it
262 ** is still in main memory but is being truncated to zero bytes in size,
263 ** ignore
264 */
memjrnlTruncate(sqlite3_file * pJfd,sqlite_int64 size)265 static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){
266 MemJournal *p = (MemJournal *)pJfd;
267 if( ALWAYS(size==0) ){
268 memjrnlFreeChunks(p);
269 p->nSize = 0;
270 p->endpoint.pChunk = 0;
271 p->endpoint.iOffset = 0;
272 p->readpoint.pChunk = 0;
273 p->readpoint.iOffset = 0;
274 }
275 return SQLITE_OK;
276 }
277
278 /*
279 ** Close the file.
280 */
memjrnlClose(sqlite3_file * pJfd)281 static int memjrnlClose(sqlite3_file *pJfd){
282 MemJournal *p = (MemJournal *)pJfd;
283 memjrnlFreeChunks(p);
284 return SQLITE_OK;
285 }
286
287 /*
288 ** Sync the file.
289 **
290 ** If the real file has been created, call its xSync method. Otherwise,
291 ** syncing an in-memory journal is a no-op.
292 */
memjrnlSync(sqlite3_file * pJfd,int flags)293 static int memjrnlSync(sqlite3_file *pJfd, int flags){
294 UNUSED_PARAMETER2(pJfd, flags);
295 return SQLITE_OK;
296 }
297
298 /*
299 ** Query the size of the file in bytes.
300 */
memjrnlFileSize(sqlite3_file * pJfd,sqlite_int64 * pSize)301 static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){
302 MemJournal *p = (MemJournal *)pJfd;
303 *pSize = (sqlite_int64) p->endpoint.iOffset;
304 return SQLITE_OK;
305 }
306
307 /*
308 ** Table of methods for MemJournal sqlite3_file object.
309 */
310 static const struct sqlite3_io_methods MemJournalMethods = {
311 1, /* iVersion */
312 memjrnlClose, /* xClose */
313 memjrnlRead, /* xRead */
314 memjrnlWrite, /* xWrite */
315 memjrnlTruncate, /* xTruncate */
316 memjrnlSync, /* xSync */
317 memjrnlFileSize, /* xFileSize */
318 0, /* xLock */
319 0, /* xUnlock */
320 0, /* xCheckReservedLock */
321 0, /* xFileControl */
322 0, /* xSectorSize */
323 0, /* xDeviceCharacteristics */
324 0, /* xShmMap */
325 0, /* xShmLock */
326 0, /* xShmBarrier */
327 0, /* xShmUnmap */
328 0, /* xFetch */
329 0 /* xUnfetch */
330 };
331
332 /*
333 ** Open a journal file.
334 **
335 ** The behaviour of the journal file depends on the value of parameter
336 ** nSpill. If nSpill is 0, then the journal file is always create and
337 ** accessed using the underlying VFS. If nSpill is less than zero, then
338 ** all content is always stored in main-memory. Finally, if nSpill is a
339 ** positive value, then the journal file is initially created in-memory
340 ** but may be flushed to disk later on. In this case the journal file is
341 ** flushed to disk either when it grows larger than nSpill bytes in size,
342 ** or when sqlite3JournalCreate() is called.
343 */
sqlite3JournalOpen(sqlite3_vfs * pVfs,const char * zName,sqlite3_file * pJfd,int flags,int nSpill)344 int sqlite3JournalOpen(
345 sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */
346 const char *zName, /* Name of the journal file */
347 sqlite3_file *pJfd, /* Preallocated, blank file handle */
348 int flags, /* Opening flags */
349 int nSpill /* Bytes buffered before opening the file */
350 ){
351 MemJournal *p = (MemJournal*)pJfd;
352
353 /* Zero the file-handle object. If nSpill was passed zero, initialize
354 ** it using the sqlite3OsOpen() function of the underlying VFS. In this
355 ** case none of the code in this module is executed as a result of calls
356 ** made on the journal file-handle. */
357 memset(p, 0, sizeof(MemJournal));
358 if( nSpill==0 ){
359 return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0);
360 }
361
362 if( nSpill>0 ){
363 p->nChunkSize = nSpill;
364 }else{
365 p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk);
366 assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) );
367 }
368
369 pJfd->pMethods = (const sqlite3_io_methods*)&MemJournalMethods;
370 p->nSpill = nSpill;
371 p->flags = flags;
372 p->zJournal = zName;
373 p->pVfs = pVfs;
374 return SQLITE_OK;
375 }
376
377 /*
378 ** Open an in-memory journal file.
379 */
sqlite3MemJournalOpen(sqlite3_file * pJfd)380 void sqlite3MemJournalOpen(sqlite3_file *pJfd){
381 sqlite3JournalOpen(0, 0, pJfd, 0, -1);
382 }
383
384 #if defined(SQLITE_ENABLE_ATOMIC_WRITE) \
385 || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
386 /*
387 ** If the argument p points to a MemJournal structure that is not an
388 ** in-memory-only journal file (i.e. is one that was opened with a +ve
389 ** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying
390 ** file has not yet been created, create it now.
391 */
sqlite3JournalCreate(sqlite3_file * pJfd)392 int sqlite3JournalCreate(sqlite3_file *pJfd){
393 int rc = SQLITE_OK;
394 MemJournal *p = (MemJournal*)pJfd;
395 if( pJfd->pMethods==&MemJournalMethods && (
396 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
397 p->nSpill>0
398 #else
399 /* While this appears to not be possible without ATOMIC_WRITE, the
400 ** paths are complex, so it seems prudent to leave the test in as
401 ** a NEVER(), in case our analysis is subtly flawed. */
402 NEVER(p->nSpill>0)
403 #endif
404 #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
405 || (p->flags & SQLITE_OPEN_MAIN_JOURNAL)
406 #endif
407 )){
408 rc = memjrnlCreateFile(p);
409 }
410 return rc;
411 }
412 #endif
413
414 /*
415 ** The file-handle passed as the only argument is open on a journal file.
416 ** Return true if this "journal file" is currently stored in heap memory,
417 ** or false otherwise.
418 */
sqlite3JournalIsInMemory(sqlite3_file * p)419 int sqlite3JournalIsInMemory(sqlite3_file *p){
420 return p->pMethods==&MemJournalMethods;
421 }
422
423 /*
424 ** Return the number of bytes required to store a JournalFile that uses vfs
425 ** pVfs to create the underlying on-disk files.
426 */
sqlite3JournalSize(sqlite3_vfs * pVfs)427 int sqlite3JournalSize(sqlite3_vfs *pVfs){
428 return MAX(pVfs->szOsFile, (int)sizeof(MemJournal));
429 }
430