1 /*
2 ** 2008 October 7
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This file contains code use to implement an in-memory rollback journal.
14 ** The in-memory rollback journal is used to journal transactions for
15 ** ":memory:" databases and when the journal_mode=MEMORY pragma is used.
16 **
17 ** Update:  The in-memory journal is also used to temporarily cache
18 ** smaller journals that are not critical for power-loss recovery.
19 ** For example, statement journals that are not too big will be held
20 ** entirely in memory, thus reducing the number of file I/O calls, and
21 ** more importantly, reducing temporary file creation events.  If these
22 ** journals become too large for memory, they are spilled to disk.  But
23 ** in the common case, they are usually small and no file I/O needs to
24 ** occur.
25 */
26 #include "sqliteInt.h"
27 
28 /* Forward references to internal structures */
29 typedef struct MemJournal MemJournal;
30 typedef struct FilePoint FilePoint;
31 typedef struct FileChunk FileChunk;
32 
33 /*
34 ** The rollback journal is composed of a linked list of these structures.
35 **
36 ** The zChunk array is always at least 8 bytes in size - usually much more.
37 ** Its actual size is stored in the MemJournal.nChunkSize variable.
38 */
39 struct FileChunk {
40   FileChunk *pNext;               /* Next chunk in the journal */
41   u8 zChunk[8];                   /* Content of this chunk */
42 };
43 
44 /*
45 ** By default, allocate this many bytes of memory for each FileChunk object.
46 */
47 #define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024
48 
49 /*
50 ** For chunk size nChunkSize, return the number of bytes that should
51 ** be allocated for each FileChunk structure.
52 */
53 #define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8))
54 
55 /*
56 ** An instance of this object serves as a cursor into the rollback journal.
57 ** The cursor can be either for reading or writing.
58 */
59 struct FilePoint {
60   sqlite3_int64 iOffset;          /* Offset from the beginning of the file */
61   FileChunk *pChunk;              /* Specific chunk into which cursor points */
62 };
63 
64 /*
65 ** This structure is a subclass of sqlite3_file. Each open memory-journal
66 ** is an instance of this class.
67 */
68 struct MemJournal {
69   const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */
70   int nChunkSize;                 /* In-memory chunk-size */
71 
72   int nSpill;                     /* Bytes of data before flushing */
73   int nSize;                      /* Bytes of data currently in memory */
74   FileChunk *pFirst;              /* Head of in-memory chunk-list */
75   FilePoint endpoint;             /* Pointer to the end of the file */
76   FilePoint readpoint;            /* Pointer to the end of the last xRead() */
77 
78   int flags;                      /* xOpen flags */
79   sqlite3_vfs *pVfs;              /* The "real" underlying VFS */
80   const char *zJournal;           /* Name of the journal file */
81 };
82 
83 /*
84 ** Read data from the in-memory journal file.  This is the implementation
85 ** of the sqlite3_vfs.xRead method.
86 */
memjrnlRead(sqlite3_file * pJfd,void * zBuf,int iAmt,sqlite_int64 iOfst)87 static int memjrnlRead(
88   sqlite3_file *pJfd,    /* The journal file from which to read */
89   void *zBuf,            /* Put the results here */
90   int iAmt,              /* Number of bytes to read */
91   sqlite_int64 iOfst     /* Begin reading at this offset */
92 ){
93   MemJournal *p = (MemJournal *)pJfd;
94   u8 *zOut = zBuf;
95   int nRead = iAmt;
96   int iChunkOffset;
97   FileChunk *pChunk;
98 
99 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
100   if( (iAmt+iOfst)>p->endpoint.iOffset ){
101     return SQLITE_IOERR_SHORT_READ;
102   }
103 #endif
104 
105   assert( (iAmt+iOfst)<=p->endpoint.iOffset );
106   assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 );
107   if( p->readpoint.iOffset!=iOfst || iOfst==0 ){
108     sqlite3_int64 iOff = 0;
109     for(pChunk=p->pFirst;
110         ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst;
111         pChunk=pChunk->pNext
112     ){
113       iOff += p->nChunkSize;
114     }
115   }else{
116     pChunk = p->readpoint.pChunk;
117     assert( pChunk!=0 );
118   }
119 
120   iChunkOffset = (int)(iOfst%p->nChunkSize);
121   do {
122     int iSpace = p->nChunkSize - iChunkOffset;
123     int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset));
124     memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy);
125     zOut += nCopy;
126     nRead -= iSpace;
127     iChunkOffset = 0;
128   } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 );
129   p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0;
130   p->readpoint.pChunk = pChunk;
131 
132   return SQLITE_OK;
133 }
134 
135 /*
136 ** Free the list of FileChunk structures headed at MemJournal.pFirst.
137 */
memjrnlFreeChunks(MemJournal * p)138 static void memjrnlFreeChunks(MemJournal *p){
139   FileChunk *pIter;
140   FileChunk *pNext;
141   for(pIter=p->pFirst; pIter; pIter=pNext){
142     pNext = pIter->pNext;
143     sqlite3_free(pIter);
144   }
145   p->pFirst = 0;
146 }
147 
148 /*
149 ** Flush the contents of memory to a real file on disk.
150 */
memjrnlCreateFile(MemJournal * p)151 static int memjrnlCreateFile(MemJournal *p){
152   int rc;
153   sqlite3_file *pReal = (sqlite3_file*)p;
154   MemJournal copy = *p;
155 
156   memset(p, 0, sizeof(MemJournal));
157   rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0);
158   if( rc==SQLITE_OK ){
159     int nChunk = copy.nChunkSize;
160     i64 iOff = 0;
161     FileChunk *pIter;
162     for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){
163       if( iOff + nChunk > copy.endpoint.iOffset ){
164         nChunk = copy.endpoint.iOffset - iOff;
165       }
166       rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff);
167       if( rc ) break;
168       iOff += nChunk;
169     }
170     if( rc==SQLITE_OK ){
171       /* No error has occurred. Free the in-memory buffers. */
172       memjrnlFreeChunks(&copy);
173     }
174   }
175   if( rc!=SQLITE_OK ){
176     /* If an error occurred while creating or writing to the file, restore
177     ** the original before returning. This way, SQLite uses the in-memory
178     ** journal data to roll back changes made to the internal page-cache
179     ** before this function was called.  */
180     sqlite3OsClose(pReal);
181     *p = copy;
182   }
183   return rc;
184 }
185 
186 
187 /*
188 ** Write data to the file.
189 */
memjrnlWrite(sqlite3_file * pJfd,const void * zBuf,int iAmt,sqlite_int64 iOfst)190 static int memjrnlWrite(
191   sqlite3_file *pJfd,    /* The journal file into which to write */
192   const void *zBuf,      /* Take data to be written from here */
193   int iAmt,              /* Number of bytes to write */
194   sqlite_int64 iOfst     /* Begin writing at this offset into the file */
195 ){
196   MemJournal *p = (MemJournal *)pJfd;
197   int nWrite = iAmt;
198   u8 *zWrite = (u8 *)zBuf;
199 
200   /* If the file should be created now, create it and write the new data
201   ** into the file on disk. */
202   if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){
203     int rc = memjrnlCreateFile(p);
204     if( rc==SQLITE_OK ){
205       rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst);
206     }
207     return rc;
208   }
209 
210   /* If the contents of this write should be stored in memory */
211   else{
212     /* An in-memory journal file should only ever be appended to. Random
213     ** access writes are not required. The only exception to this is when
214     ** the in-memory journal is being used by a connection using the
215     ** atomic-write optimization. In this case the first 28 bytes of the
216     ** journal file may be written as part of committing the transaction. */
217     assert( iOfst==p->endpoint.iOffset || iOfst==0 );
218 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
219     if( iOfst==0 && p->pFirst ){
220       assert( p->nChunkSize>iAmt );
221       memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt);
222     }else
223 #else
224     assert( iOfst>0 || p->pFirst==0 );
225 #endif
226     {
227       while( nWrite>0 ){
228         FileChunk *pChunk = p->endpoint.pChunk;
229         int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize);
230         int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset);
231 
232         if( iChunkOffset==0 ){
233           /* New chunk is required to extend the file. */
234           FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize));
235           if( !pNew ){
236             return SQLITE_IOERR_NOMEM_BKPT;
237           }
238           pNew->pNext = 0;
239           if( pChunk ){
240             assert( p->pFirst );
241             pChunk->pNext = pNew;
242           }else{
243             assert( !p->pFirst );
244             p->pFirst = pNew;
245           }
246           p->endpoint.pChunk = pNew;
247         }
248 
249         memcpy((u8*)p->endpoint.pChunk->zChunk + iChunkOffset, zWrite, iSpace);
250         zWrite += iSpace;
251         nWrite -= iSpace;
252         p->endpoint.iOffset += iSpace;
253       }
254       p->nSize = iAmt + iOfst;
255     }
256   }
257 
258   return SQLITE_OK;
259 }
260 
261 /*
262 ** Truncate the file.
263 **
264 ** If the journal file is already on disk, truncate it there. Or, if it
265 ** is still in main memory but is being truncated to zero bytes in size,
266 ** ignore
267 */
memjrnlTruncate(sqlite3_file * pJfd,sqlite_int64 size)268 static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){
269   MemJournal *p = (MemJournal *)pJfd;
270   if( ALWAYS(size==0) ){
271     memjrnlFreeChunks(p);
272     p->nSize = 0;
273     p->endpoint.pChunk = 0;
274     p->endpoint.iOffset = 0;
275     p->readpoint.pChunk = 0;
276     p->readpoint.iOffset = 0;
277   }
278   return SQLITE_OK;
279 }
280 
281 /*
282 ** Close the file.
283 */
memjrnlClose(sqlite3_file * pJfd)284 static int memjrnlClose(sqlite3_file *pJfd){
285   MemJournal *p = (MemJournal *)pJfd;
286   memjrnlFreeChunks(p);
287   return SQLITE_OK;
288 }
289 
290 /*
291 ** Sync the file.
292 **
293 ** If the real file has been created, call its xSync method. Otherwise,
294 ** syncing an in-memory journal is a no-op.
295 */
memjrnlSync(sqlite3_file * pJfd,int flags)296 static int memjrnlSync(sqlite3_file *pJfd, int flags){
297   UNUSED_PARAMETER2(pJfd, flags);
298   return SQLITE_OK;
299 }
300 
301 /*
302 ** Query the size of the file in bytes.
303 */
memjrnlFileSize(sqlite3_file * pJfd,sqlite_int64 * pSize)304 static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){
305   MemJournal *p = (MemJournal *)pJfd;
306   *pSize = (sqlite_int64) p->endpoint.iOffset;
307   return SQLITE_OK;
308 }
309 
310 /*
311 ** Table of methods for MemJournal sqlite3_file object.
312 */
313 static const struct sqlite3_io_methods MemJournalMethods = {
314   1,                /* iVersion */
315   memjrnlClose,     /* xClose */
316   memjrnlRead,      /* xRead */
317   memjrnlWrite,     /* xWrite */
318   memjrnlTruncate,  /* xTruncate */
319   memjrnlSync,      /* xSync */
320   memjrnlFileSize,  /* xFileSize */
321   0,                /* xLock */
322   0,                /* xUnlock */
323   0,                /* xCheckReservedLock */
324   0,                /* xFileControl */
325   0,                /* xSectorSize */
326   0,                /* xDeviceCharacteristics */
327   0,                /* xShmMap */
328   0,                /* xShmLock */
329   0,                /* xShmBarrier */
330   0,                /* xShmUnmap */
331   0,                /* xFetch */
332   0                 /* xUnfetch */
333 };
334 
335 /*
336 ** Open a journal file.
337 **
338 ** The behaviour of the journal file depends on the value of parameter
339 ** nSpill. If nSpill is 0, then the journal file is always create and
340 ** accessed using the underlying VFS. If nSpill is less than zero, then
341 ** all content is always stored in main-memory. Finally, if nSpill is a
342 ** positive value, then the journal file is initially created in-memory
343 ** but may be flushed to disk later on. In this case the journal file is
344 ** flushed to disk either when it grows larger than nSpill bytes in size,
345 ** or when sqlite3JournalCreate() is called.
346 */
sqlite3JournalOpen(sqlite3_vfs * pVfs,const char * zName,sqlite3_file * pJfd,int flags,int nSpill)347 int sqlite3JournalOpen(
348   sqlite3_vfs *pVfs,         /* The VFS to use for actual file I/O */
349   const char *zName,         /* Name of the journal file */
350   sqlite3_file *pJfd,        /* Preallocated, blank file handle */
351   int flags,                 /* Opening flags */
352   int nSpill                 /* Bytes buffered before opening the file */
353 ){
354   MemJournal *p = (MemJournal*)pJfd;
355 
356   /* Zero the file-handle object. If nSpill was passed zero, initialize
357   ** it using the sqlite3OsOpen() function of the underlying VFS. In this
358   ** case none of the code in this module is executed as a result of calls
359   ** made on the journal file-handle.  */
360   memset(p, 0, sizeof(MemJournal));
361   if( nSpill==0 ){
362     return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0);
363   }
364 
365   if( nSpill>0 ){
366     p->nChunkSize = nSpill;
367   }else{
368     p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk);
369     assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) );
370   }
371 
372   p->pMethod = (const sqlite3_io_methods*)&MemJournalMethods;
373   p->nSpill = nSpill;
374   p->flags = flags;
375   p->zJournal = zName;
376   p->pVfs = pVfs;
377   return SQLITE_OK;
378 }
379 
380 /*
381 ** Open an in-memory journal file.
382 */
sqlite3MemJournalOpen(sqlite3_file * pJfd)383 void sqlite3MemJournalOpen(sqlite3_file *pJfd){
384   sqlite3JournalOpen(0, 0, pJfd, 0, -1);
385 }
386 
387 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
388 /*
389 ** If the argument p points to a MemJournal structure that is not an
390 ** in-memory-only journal file (i.e. is one that was opened with a +ve
391 ** nSpill parameter), and the underlying file has not yet been created,
392 ** create it now.
393 */
sqlite3JournalCreate(sqlite3_file * p)394 int sqlite3JournalCreate(sqlite3_file *p){
395   int rc = SQLITE_OK;
396   if( p->pMethods==&MemJournalMethods && ((MemJournal*)p)->nSpill>0 ){
397     rc = memjrnlCreateFile((MemJournal*)p);
398   }
399   return rc;
400 }
401 #endif
402 
403 /*
404 ** The file-handle passed as the only argument is open on a journal file.
405 ** Return true if this "journal file" is currently stored in heap memory,
406 ** or false otherwise.
407 */
sqlite3JournalIsInMemory(sqlite3_file * p)408 int sqlite3JournalIsInMemory(sqlite3_file *p){
409   return p->pMethods==&MemJournalMethods;
410 }
411 
412 /*
413 ** Return the number of bytes required to store a JournalFile that uses vfs
414 ** pVfs to create the underlying on-disk files.
415 */
sqlite3JournalSize(sqlite3_vfs * pVfs)416 int sqlite3JournalSize(sqlite3_vfs *pVfs){
417   return MAX(pVfs->szOsFile, (int)sizeof(MemJournal));
418 }
419