1 /*
2  * $Id: cache.c,v 1.5 2009-05-22 04:02:26 dhmunro Exp $
3  * Define caching (disk buffering) scheme used for random access
4  * binary I/O.
5  */
6 /* Copyright (c) 2005, The Regents of the University of California.
7  * All rights reserved.
8  * This file is part of yorick (http://yorick.sourceforge.net).
9  * Read the accompanying LICENSE file for details.
10  */
11 
12 #include "binio.h"
13 #include "pstdlib.h"
14 #include "pstdio.h"
15 #include <string.h>
16 
17 /*--------------------------------------------------------------------------*/
18 
19 /* Cache block addresses always rounded to 4 kbytes (BLOCK_SIZE) */
20 long yMaxBlockSize= 0x080000; /* 512k is default maximum cache block size */
21 long yCacheSize=    0x140000;  /* total of all cache blocks < 1.25 Mbytes */
22 int yCacheNumber= 16;    /* don't try to manage more than 16 cache blocks */
23 
24 long yCacheTotal= 0;  /* sum of sizes of all existing cache blocks */
25 int yCacheN= 0;       /* total number of existing cache blocks */
26 
27 /* YcRead and YcWrite loop on RawRead and RawWrite, respectively.  The
28    raw routines grab just the portion of the request within a single cache
29    block -- creating the block if necessary.  Multiple calls are
30    required if and only if the request spans several existing cache
31    blocks.  */
32 static long RawRead(IOStream *file, void *buffer, long address, long nbytes);
33 static long RawWrite(IOStream *file, const void *buffer,
34                      long address, long nbytes);
35 
36 static int ReadBlock(CacheBlock *block);   /* returns non-0 if EOF hit */
37 static void UseCacheBlock(CacheBlock *block);    /* makes block mostRU */
38 static void PruneCache(long newSize);  /* makes space for newSize, maybe
39                                           deleting existing blocks */
40 
41 static CacheBlock *NewBlock(IOStream *file, long addr, long last);
42 static void FreeCacheBlock(CacheBlock *block);
43 static void FlushCacheBlock(CacheBlock *block);
44 
45 extern void YErrorIO(const char *msg);
46 
47 /*--------------------------------------------------------------------------*/
48 
YcRead(IOStream * file,void * buffer,long address,long nbytes)49 long YcRead(IOStream *file, void *buffer, long address, long nbytes)
50 {
51   long nio= nbytes>0? RawRead(file, buffer, address, nbytes) : 0;
52   long ntotal= nio;
53   while (nbytes>nio) {
54     nbytes-= nio;
55     address+= nio;
56     nio= RawRead(file, (void *)0, address, nbytes);
57     if (nio==0) break;
58     ntotal+= nio;
59   }
60   file->seqAddress= address+nio;
61   return ntotal;
62 }
63 
YcWrite(IOStream * file,const void * buffer,long address,long nbytes)64 void YcWrite(IOStream *file, const void *buffer, long address, long nbytes)
65 {
66   long nio;
67   HistoryInfo *history= file->history;
68 
69   if (history) {
70     /* Enforce restrictions on writing to files with history records.  */
71     if (history->fileNumber!=history->nFamily-1)
72       YError("illegal to write except to last file in a record family");
73     if (history->nRecords>0 && file==history->parent)
74       YError("illegal to write into non-record variable after 1st record");
75   }
76 
77   nio= nbytes>0? RawWrite(file, buffer, address, nbytes) : 0;
78   while (nbytes>nio) {
79     nbytes-= nio;
80     address+= nio;
81     nio= RawWrite(file, (void *)0, address, nbytes);
82     if (nio==0) break;
83   }
84   file->seqAddress= address+nio;
85 }
86 
87 /*--------------------------------------------------------------------------*/
88 /* Implement caching system for binary files.  */
89 
90 /* global most and least recently used cache blocks */
91 CacheBlock *yMostRU, *yLeastRU;
92 
93 struct CacheBlock {
94   IOStream *file;    /* origin of contents of this block */
95   long address;      /* byte address of 1st byte of block in file */
96   long nextAddress;  /* byte immediately following this block in file */
97   long validBefore;  /* byte immediately following valid data in block */
98   long dirtyAfter;   /* 1st byte which has not yet been written to file */
99   CacheBlock *prev, *next;  /* prev is block with lower address, next has
100                                higher address -- blocks do not overlap,
101                                and file->blockList has largest address */
102   CacheBlock *moreRU, *lessRU;  /* links into most and least recently used
103                                    list -- note that CacheBlock is
104                                    quadruply linked, a personal record... */
105 
106   /* The data associated with the CacheBlock follows it in memory, with
107      an offset of sizeof(MemCacheBlock), or CACHE_HEADER.  */
108 };
109 
110 /* sizeof(MemCacheBlock) must be a multiple of the most restrictive
111    alignment of any of the basic data types */
112 union MemCacheBlock {
113   CacheBlock cb;
114   char c; short s; int i; long l; float f; double d; char *q; void *p;
115 };
116 
117 #define CACHE_HEADER sizeof(union MemCacheBlock)
118 
119 /*--------------------------------------------------------------------------*/
120 
121 static char *prevBuffer;
122 static CacheBlock *prevBlock;
123 static int prevEOF;
124 
RawRead(IOStream * file,void * buf,long addr,long len)125 static long RawRead(IOStream *file, void *buf, long addr, long len)
126 {
127   long last= addr+len;
128   if (!(file->permissions & 1))
129     YErrorIO("attempt to read from binary file opened in w or a mode");
130 
131   if (y_vopen_file(file->stream)) {
132     file->ioOps->Seek(file, addr);
133     return file->ioOps->Read(file, buf, sizeof(char), len);
134   }
135 
136   /* similar to strtok -- buf==0 identifies a follow-up call
137      when the first call did not meet the entire request */
138   if (buf) {
139     if (len > yMaxBlockSize-file->blockSize) {
140       /* Request might require a cache block longer than yMaxBlockSize,
141          so read it directly.  This simple calculation assumes that
142          yMaxBlockSize is a multiple of file->blockSize+1, which will
143          be true if both are powers of two, as intended.
144          Note that all reads will be direct if the two are equal.  */
145       FlushFile(file, 0);  /* Be sure any pending writes are done.  */
146       prevEOF= 1; /* Force immediate return if called again with buf==0.  */
147       file->ioOps->Seek(file, addr);
148       return file->ioOps->Read(file, buf, sizeof(char), len);
149 
150     } else {
151       CacheBlock *block= file->blockList;
152       /* Note- By making file->blockList point to the LAST existing
153          cache block, this search will immediately find the proper block
154          if the reads happen to be sequential.  (Conversely, the search
155          will be slowest for a series of reads in reverse order.)  */
156       prevBlock= 0;
157       while (block && block->nextAddress>addr) {
158         prevBlock= block;
159         block= block->prev;
160       }
161       prevBuffer= buf;
162       prevEOF= 0;
163     }
164 
165   } else if (prevEOF) {
166     return 0;
167   }
168 
169   /* prevBlock is 1st cache block with addr < prevBlock->nextAddress */
170 
171   if (!prevBlock) {
172     /* addr is after all existing cache blocks, just get a new one */
173     prevBlock= NewBlock(file, addr, last);
174 
175   } else if (addr<prevBlock->address) {
176     /* at least part of the request precedes prevBlock */
177     if (last>prevBlock->address) {
178       last= prevBlock->address;
179       UseCacheBlock(prevBlock);
180     }
181     /* If request overlaps prevBlock, it has just been made MRU and
182        the call to NewBlock cannot release it -- this assures next
183        pass will be able to copy from cache without reading a new
184        cache block.  */
185     prevBlock= NewBlock(file, addr, last);
186 
187   } else {
188     /* at least part of the request is in prevBlock */
189     UseCacheBlock(prevBlock);
190     if (last>prevBlock->nextAddress) last= prevBlock->nextAddress;
191   }
192 
193   /* prevBlock now contains addr */
194 
195   if (last>prevBlock->validBefore) {
196     /* This request extends beyond the part of the block containing
197        valid data.  Attempt to read the entire block.  */
198     if (ReadBlock(prevBlock)) prevEOF= 1;
199     if (last>prevBlock->validBefore) last= prevBlock->validBefore;
200   }
201 
202   /* Move the data from the cache block into the result buffer.  */
203   len= last-addr;
204   if (len > 0) {
205     memcpy(prevBuffer,
206            &((char *)prevBlock+CACHE_HEADER)[addr-prevBlock->address],
207            len);
208   } else {
209     len= 0;
210   }
211 
212   /* Set up for a subsequent call with buf==0.  */
213   prevBuffer+= len;
214   prevBlock= prevBlock->next;
215   return len;
216 }
217 
218 static const char *wrtBuffer;
219 
RawWrite(IOStream * file,const void * buf,long addr,long len)220 static long RawWrite(IOStream *file, const void *buf, long addr, long len)
221 {
222   long last= addr+len;
223 
224   if (y_vopen_file(file->stream)) {
225     file->ioOps->Seek(file, addr);
226     file->ioOps->Write(file, buf, sizeof(char), len);
227     if (last > file->nextAddress) file->nextAddress = last;
228     return len;
229   }
230 
231   /* similar to strtok -- buf==0 identifies a follow-up call
232      when the first call did not meet the entire request */
233   if (buf) {
234     CacheBlock *block= file->blockList;
235     long blockSize= file->blockSize;  /* power of 2 minus 1 */
236     wrtBuffer= buf;
237     if (len>blockSize+1 && len>yMaxBlockSize-blockSize) {
238       /* Request might require a cache block longer than yMaxBlockSize.
239          Write block fragment at beginning, followed by direct write
240          of center, followed by fragment at end.  This procedure assures
241          that writes always occur in multiples of the block size.  */
242       long first= addr&(~blockSize);
243       long final= last&(~blockSize);
244       const char *bufc= buf;
245       if (first<addr) {
246         first= blockSize+1+first-addr;
247         YcWrite(file, bufc, addr, first); /* clobbers wrtBuffer! */
248         addr+= first;
249         bufc+= first;
250         block = file->blockList;   /* YcWrite may clobber blockList! */
251       }
252       /* Must release any cache blocks in this region before this write
253          (or update and mark them as not dirty).  */
254       while (block && block->address>=final) block= block->prev;
255       while (block && block->nextAddress>addr) {
256         prevBlock= block->prev;
257         FreeCacheBlock(block);  /* take easy way out for now... */
258         block = prevBlock;
259       }
260       prevBlock = 0;
261       file->ioOps->Seek(file, addr);
262       file->ioOps->Write(file, bufc, sizeof(char), final-addr);
263       if (last>final) {
264         first= final-addr;
265         addr+= first;
266         bufc+= first;
267         YcWrite(file, bufc, addr, last-final);
268       } else if (last>file->nextAddress) {
269         file->nextAddress= last;
270       }
271       return len;
272 
273     } else {
274       /* Note- By making file->blockList point to the LAST existing
275          cache block, this search will immediately find the proper block
276          if the writes happen to be sequential.  (Conversely, the search
277          will be slowest for a series of writes in reverse order.)  */
278       prevBlock= 0;
279       while (block && block->nextAddress>addr) {
280         prevBlock= block;
281         block= block->prev;
282       }
283     }
284   }
285 
286   /* prevBlock is 1st cache block with addr < prevBlock->nextAddress */
287 
288   if (!prevBlock) {
289     /* addr is after all existing cache blocks, just get a new one */
290     prevBlock= NewBlock(file, addr, last);
291 
292   } else if (addr<prevBlock->address) {
293     /* at least part of the request precedes prevBlock */
294     if (last>prevBlock->address) {
295       last= prevBlock->address;
296       UseCacheBlock(prevBlock);
297     }
298     /* If request overlaps prevBlock, it has just been made MRU and
299        the call to NewBlock cannot release it -- this assures next
300        pass will be able to copy from cache without reading a new
301        cache block.  */
302     prevBlock= NewBlock(file, addr, last);
303 
304   } else {
305     /* at least part of the request is in prevBlock */
306     UseCacheBlock(prevBlock);
307     if (last>prevBlock->nextAddress) last= prevBlock->nextAddress;
308   }
309 
310   /* prevBlock now contains addr */
311 
312   /* Be sure any data in the block before addr is valid.  */
313   if (addr > prevBlock->validBefore) {
314     ReadBlock(prevBlock);
315     if (addr > prevBlock->validBefore) {
316       /* When writing beyond end-of-file, zero portion of cache block
317          between end-of-file and write address.  This does not guarantee
318          that all bytes not explicitly referenced will be written as
319          zeroes, since the end of a previous cache block might never be
320          written.  It does guarantee that all bytes actually written by
321          the cache package will have been initialized.  Perhaps a
322          "coarseness" parameter should be added to guarantee that things
323          like XDR format will work correctly?  */
324       memset(&((char *)prevBlock+CACHE_HEADER)[prevBlock->validBefore-
325                                                prevBlock->address],
326              0, addr-prevBlock->validBefore);
327       prevBlock->validBefore= addr;
328     }
329   }
330 
331   /* Move the data from the source buffer into the cache block.  */
332   len= last-addr;
333   memcpy(&((char *)prevBlock+CACHE_HEADER)[addr-prevBlock->address],
334          wrtBuffer, len);
335   if (last > prevBlock->validBefore) prevBlock->validBefore= last;
336 
337   /* Mark the block as dirty, but if its last word is being written,
338      go ahead and flush it.  */
339   if (addr < prevBlock->dirtyAfter) prevBlock->dirtyAfter= addr;
340   if (last==prevBlock->nextAddress) FlushCacheBlock(prevBlock);
341 
342   /* Set up for a subsequent call with buf==0.  */
343   wrtBuffer+= len;
344   prevBlock= prevBlock->next;
345   if (last>file->nextAddress) file->nextAddress= last;
346   return len;
347 }
348 
349 /*--------------------------------------------------------------------------*/
350 
351 /* Be sure pending writes are done, optionally discarding all cache
352    blocks associated with file.  */
FlushFile(IOStream * file,int discardCache)353 void FlushFile(IOStream *file, int discardCache)
354 {
355   if (discardCache) {
356     while (file->blockList) FreeCacheBlock(file->blockList);
357   } else {
358     CacheBlock *block= file->blockList;
359     while (block) {
360       FlushCacheBlock(block);
361       block= block->prev;
362     }
363   }
364   p_fflush((p_file *)file->stream);
365   if (file->contentsLog) {
366     HistoryInfo *history= file->history;
367     if (history) {
368       if (file==history->parent) {
369         if (history->nRecords<=0) CLupdate(file);
370       } else {
371         CLupdate(file);
372       }
373     } else if (file->dataTable.nItems) {
374       CLupdate(file);
375     }
376   }
377 }
378 
YCopyFile(IOStream * dst,IOStream * src)379 int YCopyFile(IOStream *dst, IOStream *src)
380 {
381   long address;
382   CacheBlock *srcBlock;
383   long last= src->nextAddress;
384   long blockSize= src->blockSize>dst->blockSize? dst->blockSize :
385                                                  src->blockSize;
386   long chunkSize= 16*(blockSize+1);
387   if (chunkSize >= yMaxBlockSize-blockSize) {
388     chunkSize= yMaxBlockSize-blockSize-1;
389     chunkSize&= blockSize;
390   }
391   if (chunkSize > last) chunkSize= last;
392 
393   /* discard all cache blocks for these files */
394   FlushFile(dst, 1);
395   FlushFile(src, 1);
396 
397   address= 0;
398   while (address < last) {
399     if (address+chunkSize > last) chunkSize= last-address;
400     srcBlock= NewBlock(src, address, address+chunkSize);
401     ReadBlock(srcBlock);
402     if (srcBlock->validBefore<address+chunkSize) {
403       chunkSize= srcBlock->validBefore-address;
404       last= address+chunkSize;
405     }
406     if (chunkSize>0) {
407       dst->ioOps->Seek(dst, address);
408       dst->ioOps->Write(dst,
409                         &((char *)srcBlock+CACHE_HEADER)[address-
410                                                          srcBlock->address],
411                         sizeof(char), chunkSize);
412     }
413     FreeCacheBlock(srcBlock);
414     address+= chunkSize;
415   }
416 
417   if (dst->nextAddress<last) dst->nextAddress= last;
418   dst->seqAddress= src->seqAddress= last;
419 
420   return last<src->nextAddress;
421 }
422 
423 /*--------------------------------------------------------------------------*/
424 
ReadBlock(CacheBlock * block)425 static int ReadBlock(CacheBlock *block)
426 {
427   IOStream *file= block->file;
428   long address= block->validBefore;
429   long length= block->nextAddress - address;
430   long nio= address-block->address;
431   file->ioOps->Seek(file, address);
432   nio= file->ioOps->Read(file, (char *)block+CACHE_HEADER+nio,
433                          sizeof(char), length);
434   block->validBefore= address+nio;
435   return nio<length;    /* returns non-0 on EOF, else 0 */
436 }
437 
FlushCacheBlock(CacheBlock * block)438 static void FlushCacheBlock(CacheBlock *block)
439 {
440   IOStream *file;
441   long address= block->dirtyAfter;
442   long length= block->validBefore - address;
443   if (length<=0) return;
444   file= block->file;
445   file->ioOps->Seek(file, address);
446   file->ioOps->Write(file, (char *)block+CACHE_HEADER+address-block->address,
447                      sizeof(char), length);
448   block->dirtyAfter= block->nextAddress;
449 }
450 
UseCacheBlock(CacheBlock * block)451 static void UseCacheBlock(CacheBlock *block)
452 {
453   CacheBlock *moreRU= block->moreRU;
454   if (moreRU) {
455     /* This is not already the most recently used block, so relink list.  */
456     block->moreRU= 0;            /* there are no more recently used */
457     if (!block->lessRU) yLeastRU= moreRU;      /* no longer the LRU */
458     else block->lessRU->moreRU= moreRU;
459     moreRU->lessRU= block->lessRU;  /* close up "hole" where it was */
460     block->lessRU= yMostRU;                   /* old MRU now second */
461     yMostRU->moreRU= block;
462     yMostRU= block;
463   }
464 }
465 
NewBlock(IOStream * file,long addr,long last)466 static CacheBlock *NewBlock(IOStream *file, long addr, long last)
467 {
468   CacheBlock *block, *next, *prev;
469   long mask= ~file->blockSize;  /* blockSize must be a power of 2 minus 1 */
470 
471   /* round to multiple of file->blockSize+1 and get new cache block */
472   addr&= mask;
473   last= (file->blockSize+1) + ((last-1)&mask);
474   PruneCache(last-addr);
475   block= p_malloc(CACHE_HEADER+last-addr);
476 
477   /* fill in file and address range */
478   block->file= file;
479   block->address= addr;
480   block->nextAddress= last;
481   block->validBefore= addr;  /* no data yet valid */
482   block->dirtyAfter= last;   /* no writes yet pending */
483 
484   /* link into most/least recently used list */
485   block->prev= block->next= 0;  /* temporary protection */
486   block->moreRU= 0;
487   block->lessRU= yMostRU;
488   if (yMostRU) yMostRU->moreRU= block;
489   yMostRU= block;
490   if (!yLeastRU) yLeastRU= block;
491 
492   /* link into file's blockList */
493   prev= file->blockList;
494   next= 0;
495   while (prev && prev->nextAddress>last) {
496     next= prev;
497     prev= prev->prev;
498   }
499   if ((block->prev= prev)) prev->next= block;
500   if ((block->next= next)) next->prev= block;
501   else file->blockList= block;
502 
503   /* increment total space and total number of cache blocks */
504   yCacheTotal+= last-addr;
505   yCacheN++;
506 
507   return block;
508 }
509 
FreeCacheBlock(CacheBlock * block)510 static void FreeCacheBlock(CacheBlock *block)
511 {
512   IOStream *file;
513   if (!block) return;
514   file= block->file;
515 
516   FlushCacheBlock(block);
517 
518   /* unlink from global most/least rectently used list */
519   if (block->lessRU) block->lessRU->moreRU= block->moreRU;
520   if (block->moreRU) block->moreRU->lessRU= block->lessRU;
521   if (block==yMostRU) yMostRU= block->lessRU;
522   if (block==yLeastRU) yLeastRU= block->moreRU;
523   block->lessRU= block->moreRU= 0;
524   yCacheTotal-= block->nextAddress-block->address;
525   yCacheN--;
526 
527   /* unlink from file blockList */
528   if (block->next) block->next->prev= block->prev;
529   else if (file) file->blockList= block->prev;
530   if (block->prev) block->prev->next= block->next;
531 
532   /* probably impossible, but best to be safe */
533   if (block == prevBlock) prevBlock = 0;
534 
535   p_free(block);
536 }
537 
PruneCache(long newSize)538 static void PruneCache(long newSize)
539 {
540   long total= yCacheTotal+newSize;
541   int n= yCacheN+1;
542   CacheBlock *lessRU, *block= 0;
543   long size;
544 
545   /* Starting from the least recently used, work forward until enough
546      cumulative space would be freed.  */
547   while (n>yCacheNumber || total>yCacheSize) {
548     if (!block) block= yLeastRU;
549     else block= block->moreRU;
550     if (!block) {
551       yCacheTotal= 0;
552       yCacheN= 0;
553       return;
554     }
555     n--;
556     total-= block->nextAddress-block->address;
557   }
558 
559   if (!block) return;
560   if (block==yMostRU) {
561     block= block->lessRU;
562     if (!block) return;
563   }
564 
565   size= block->nextAddress-block->address;
566   for (;;) {
567     lessRU= block->lessRU;
568     FreeCacheBlock(block);
569     if (!lessRU) return;
570     block= lessRU;
571     size= block->nextAddress-block->address;
572     while (n+1<=yCacheNumber && total+size<=yCacheSize) {
573       n++;
574       total+= size;
575       block= block->lessRU;
576       if (!block) return;
577       size= block->nextAddress-block->address;
578     }
579   }
580 }
581 
582 /*--------------------------------------------------------------------------*/
583