1 /* Copyright(C) 2005 Takuo Kitame <kitame @ valinux. co. jp>
2 
3   This library is free software; you can redistribute it and/or
4   modify it under the terms of the GNU Lesser General Public
5   License as published by the Free Software Foundation; either
6   version 2.1 of the License, or (at your option) any later version.
7 
8   This library is distributed in the hope that it will be useful,
9   but WITHOUT ANY WARRANTY; without even the implied warranty of
10   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11   Lesser General Public License for more details.
12 
13   You should have received a copy of the GNU Lesser General Public
14   License along with this library; if not, write to the Free Software
15   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17 
18 #ifdef USE_AIO
19 
20 #include "senna_in.h"
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25 
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 
29 #include <sys/param.h>
30 
31 #include <sys/shm.h>
32 #include <sys/mman.h>
33 #include <fcntl.h>
34 
35 #include "cache.h"
36 
37 #define SEN_ATOMIC_DEC(x) { uint32_t _n; SEN_ATOMIC_ADD_EX((x), -1, _n); }
38 
39 /* feature switch */
40 int sen_aio_enabled;
41 int sen_debug_print;
42 
43 /* cache structure  */
44 int sen_cache_size;  /* size of cache (MB) */
45 int sen_cache_block; /* alignment */
46 int sen_hash_size;   /* size of hash array */
47 
48 #define DEFAULT_CACHE_SIZE   256
49 #define DEFAULT_CACHE_BLOCK  4096
50 #define DEFAULT_HASH_SIZE    1024
51 
52 #define MEM_ALIGN    sen_cache_block
53 #define CACHE_SIZE   (sen_cache_size * 1024 * 1024)
54 #define CACHE_NUM    (CACHE_SIZE / MEM_ALIGN) /* number of cache data */
55 #define HASH_SIZE    sen_hash_size
56 
57 /* shm namespaces */
58 #define CACHE_DATA_SHM_NAME    "senna_cache_data"
59 #define CACHE_BIN_SHM_NAME     "senna_cache_bin"
60 #define CACHE_COUNTER_SHM_NAME "senna_cache_counter"
61 #define CACHE_HASH_SHM_NAME    "senna_cache_hash"
62 
63 /* hash block */
64 typedef struct _CacheHash CacheHash;
65 struct _CacheHash {
66     int num; /* first CacheData number */
67     int lock;
68 };
69 
70 /* for debug  */
71 
72 /* mmaped to shm */
73 unsigned int *ccount; /* global counter */
74 void *cache_data; /* CacheData array */
75 void *cache_bin;  /* Cache binary */
76 void *cache_hash; /* Hash array */
77 
78 /* misc macro */
79 #define HASH_VAL (1024 * 128)
80 //#define CALC_HASH(x,y,z)  ((x + y + (z/sen_cache_block)) % HASH_SIZE)
81 #define CALC_HASH(x,y,z)  ((x + y + (z/HASH_VAL)) % HASH_SIZE)
82 #define GET_CD_NUM(num)   ((CacheData*)(cache_data + sizeof(CacheData)*num))
83 #define GET_HASH_NUM(num) ((CacheHash*)(cache_hash + sizeof(CacheHash)*num))
84 #define GET_HASH_CD(cd)   (GET_HASH_NUM(CALC_HASH(cd->dev,cd->inode,cd->offset)))
85 
86 /*
87  * Initialize shm region.
88  */
89 static void
sen_cache_init(void)90 sen_cache_init (void)
91 {
92     int shmfd;
93     int ret;
94     char shm_name[PATH_MAX];
95 
96     if (getenv("SEN_CACHE_SIZE")) {
97         sen_cache_size = atoi(getenv("SEN_CACHE_SIZE"));
98     } else {
99         sen_cache_size = DEFAULT_CACHE_SIZE;
100     }
101     if (getenv("SEN_CACHE_BLOCK")) {
102         sen_cache_block = atoi(getenv("SEN_CACHE_BLOCK"));
103     } else {
104         sen_cache_block = DEFAULT_CACHE_BLOCK;
105     }
106     if (getenv("SEN_HASH_SIZE")) {
107         sen_hash_size = atoi(getenv("SEN_HASH_SIZE"));
108     } else {
109         sen_hash_size = DEFAULT_HASH_SIZE;
110     }
111     dp ("cache size: %d MB, cache block: %d, hash size: %d\n", sen_cache_size, sen_cache_block, sen_hash_size);
112 
113     /* Array of CacheData */
114     snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_DATA_SHM_NAME, MEM_ALIGN);
115     shmfd = shm_open (shm_name, O_CREAT|O_RDWR|O_EXCL, 0666);
116     if (shmfd < 0 && errno == EEXIST) {
117         /* exist : no op */
118     } else {
119         int h;
120         int num;
121         size_t s = sizeof(CacheData) * CACHE_NUM;
122         void *p = malloc (s);
123 
124         for (h = 0; h < HASH_SIZE; h++) {
125             /* Initialize with INVALID */
126             for (num = 0; num < CACHE_NUM / HASH_SIZE; num++) {
127                 int number = num + (h * (CACHE_NUM / HASH_SIZE));
128                 CacheData *cd = p + sizeof(CacheData) * number;
129 
130                 cd->num = number; /* unique number */
131                 cd->ref = 0;
132 
133                 /* vector list */
134                 if (cd->num + 1 < (h+1) * (CACHE_NUM / HASH_SIZE)) {
135                     cd->next = cd->num + 1;
136                 } else {
137                     cd->next = -1; /* end of list */
138                 }
139                 cd->flag = CACHE_INVALID;
140             }
141         }
142 
143         ret = write (shmfd, p, s);
144         free (p);
145         if (ret < 0) {
146             perror ("sen_cache_init: write");
147             exit (1);
148         }
149     }
150 
151     /* Cache binary */
152     snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_BIN_SHM_NAME, MEM_ALIGN);
153     shmfd = shm_open (shm_name, O_CREAT|O_RDWR|O_EXCL, 0666);
154     if (shmfd < 0 && errno == EEXIST) {
155         /* exist : no op */
156     } else {
157         size_t s = CACHE_SIZE;
158         void *p = malloc (s);
159         ret = write (shmfd, p, s);
160         free (p);
161 	if (ret < 0) {
162             perror ("sen_cache_init: write");
163             exit (1);
164 	}
165     }
166 
167     /* global counter */
168     snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_COUNTER_SHM_NAME, MEM_ALIGN);
169     shmfd = shm_open (shm_name, O_CREAT|O_RDWR|O_EXCL, 0666);
170     if (shmfd < 0 && errno == EEXIST) {
171         /* exist : no op */
172     } else {
173         unsigned int i = 1;
174         ret = write (shmfd, &i, sizeof(unsigned int));
175 	if (ret < 0) {
176 	  perror ("sen_cache_init: write");
177 	  exit (1);
178 	}
179     }
180 
181     /* Array of CacheHash */
182     snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_HASH_SHM_NAME, MEM_ALIGN);
183     shmfd = shm_open (shm_name, O_CREAT|O_RDWR|O_EXCL, 0666);
184     if (shmfd < 0 && errno == EEXIST) {
185         /* exist : no op */
186     } else {
187         int i;
188         size_t s = sizeof(CacheHash) * HASH_SIZE;
189         void *p = malloc(s);
190         for (i = 0; i < HASH_SIZE; i++) {
191             CacheHash *hash = p + sizeof(CacheHash) * i;
192 
193             /* first CacheData */
194             hash->num = i * (CACHE_NUM / HASH_SIZE);
195 
196             hash->lock = 0;
197         }
198         write (shmfd, p, s);
199         free (p);
200     }
201 }
202 
203 /*
204  * open shm region, and mmap().
205  */
206 void
sen_cache_open(void)207 sen_cache_open (void)
208 {
209     int shmfd;
210     char shm_name[PATH_MAX];
211 
212     /* no op when disabled */
213     if (!sen_aio_enabled) return;
214 
215     sen_cache_init ();
216 
217     /* global counter */
218     snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_COUNTER_SHM_NAME, MEM_ALIGN);
219     shmfd = shm_open (shm_name, O_CREAT|O_RDWR, 0666);
220     if ( (ccount = (unsigned int *)mmap(NULL, sizeof(unsigned int),
221                                 PROT_READ|PROT_WRITE, MAP_SHARED, shmfd, 0)) == (unsigned int *)-1) {
222         perror("mmap (counter)");
223         exit (1);
224     }
225 
226     /* Cache binary */
227     snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_BIN_SHM_NAME, MEM_ALIGN);
228     shmfd = shm_open (shm_name, O_CREAT|O_RDWR, 0666);
229     if ( (cache_bin = mmap (NULL, CACHE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED,
230 			    shmfd, 0)) == (void *)-1) {
231         perror("mmap (cache)");
232         exit (1);
233     }
234 
235     /* CacheHash */
236     snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_HASH_SHM_NAME, MEM_ALIGN);
237     shmfd = shm_open (shm_name, O_CREAT|O_RDWR, 0666);
238     cache_hash = mmap (NULL, sizeof(CacheHash) * HASH_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED,
239                        shmfd, 0);
240 
241     /* CacheData */
242     snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_DATA_SHM_NAME, MEM_ALIGN);
243     shmfd = shm_open (shm_name, O_CREAT|O_RDWR, 0666);
244     if ( (cache_data = mmap(NULL, sizeof(CacheData) * CACHE_NUM,
245                             PROT_READ|PROT_WRITE, MAP_SHARED, shmfd, 0)) == (void *)-1) {
246         perror("mmap (CacheData)");
247         exit (1);
248     }
249 }
250 
251 /*
252  * search Hit, Free or LRU node.
253  *
254  * @dev: device number
255  * @inode: inode number
256  * @offset: file offset
257  *
258  * return: CacheData or NULL (Cache region is full of referenced)
259  */
260 static CacheData *
sen_cache_hash_search(dev_t dev,ino_t inode,size_t offset,size_t size)261 sen_cache_hash_search (dev_t dev, ino_t inode, size_t offset, size_t size)
262 {
263     int h = CALC_HASH(dev,inode,offset);
264     CacheData *cd = NULL;   /* expire CacheData */
265     CacheData *fcd = NULL;  /* free CacheData */
266     CacheHash *hash = GET_HASH_NUM(h);
267     unsigned int c = *ccount; /* current stamp */
268     unsigned int l = 0; /* oldest stamp */
269     int i;
270     int r = 0; /* retry count */
271 
272 LRU_retry:
273     SEN_ATOMIC_ADD_EX(&(hash->lock), 1, i);
274     if (i > 0) {
275         SEN_ATOMIC_DEC(&(hash->lock));
276         usleep (1);
277         goto LRU_retry;
278     }
279 
280     l = 0;
281     for (i = 0; i < CACHE_NUM / HASH_SIZE; i++) {
282         int number = i + (h * (CACHE_NUM / HASH_SIZE));
283         CacheData *tmp = GET_CD_NUM(number);
284 
285         if (tmp->offset == offset &&
286             tmp->inode == inode &&
287             tmp->dev == dev &&
288             tmp->flag != CACHE_INVALID) {
289 
290             if (tmp->flag == CACHE_LOCKED) {
291                 /* still in locked */
292                 SEN_ATOMIC_DEC(&(hash->lock));
293                 usleep(1);
294                 goto LRU_retry;
295             }
296 
297             tmp->stamp = (*ccount)++;
298 
299             /*
300              *  should ignore CACHE_READ reference,
301              *  because it will be upped by other process.
302              */
303             if (tmp->flag == CACHE_VALID) {
304                 tmp->ref++;
305                 dp ("CacheData[%d]: ref: %d\n", tmp->num, tmp->ref);
306             }
307 
308             SEN_ATOMIC_DEC(&(hash->lock));
309             return tmp;
310         }
311 
312         /* LRU or INVALID search */
313         if (fcd) continue; /* already found free area */
314 
315         if (tmp->flag == CACHE_INVALID && tmp->ref == 0) {
316             fcd = tmp;
317         } else if (!fcd && (unsigned int)(c - tmp->stamp) > l) {
318             l = (unsigned int)(c - tmp->stamp);
319             cd = tmp;
320         }
321     }
322 
323     if (fcd || (cd && cd->flag == CACHE_VALID && cd->ref == 0)) {
324 
325         if (!fcd) {
326             /* Expire LRU and unreferenced CacheData */
327             dp ("Expire[%d] CacheData[%d] stamp=%u dev=%lu inode=%lu offset=%u size=%u\n",
328                 h, cd->num, cd->stamp, (long)cd->dev, (long)cd->inode, cd->offset, cd->size);
329 
330             fcd = cd;
331         }
332 
333         /* free CacheData is now available */
334         fcd->ref++;
335         fcd->stamp = (*ccount)++;
336 
337         fcd->flag = CACHE_LOCKED;
338 
339         fcd->dev = dev;
340         fcd->inode = inode;
341         fcd->offset = offset;
342         fcd->size = size;
343 
344         dp ("CacheData[%d]: ref: %d\n", fcd->num, fcd->ref);
345 
346         SEN_ATOMIC_DEC(&(hash->lock));
347         return fcd;
348     } else {
349         /* Not found Free CacheDate &&  LRU CacheData is used, cannot expire. */
350         SEN_ATOMIC_DEC(&(hash->lock));
351         usleep(2);
352         cd->stamp = c; /* update counter stamp with current */
353         if (++r > CACHE_NUM / HASH_SIZE) {
354             /* no space for new cache in this hash block */
355             dp ("No room for new CacheData: hash[%d] is full of used\n", h);
356             return NULL;
357         }
358         goto LRU_retry;
359     }
360 
361     /* should not be reached */
362     return NULL;
363 }
364 
365 /*
366  * read via shm cache.
367  *
368  * @oper: CacheIOOper, structure of CacheData, aiocb and real-read flag.
369  * @dev: device number
370  * @inode: inode number
371  * @offset: file offset
372  *
373  * return: pointer to buffer  or NULL (no cache room, cannot use cache)
374  *
375  */
376 void *
sen_cache_read(CacheIOOper * oper,dev_t dev,ino_t inode,size_t offset,size_t size)377 sen_cache_read (CacheIOOper *oper, dev_t dev, ino_t inode, size_t offset, size_t size)
378 {
379     void *p = NULL;
380     CacheData *cd = NULL;
381 
382     cd = sen_cache_hash_search (dev, inode, offset, size);
383 
384     if (!cd) return NULL; /* no room */
385 
386     switch (cd->flag) {
387     case CACHE_INVALID:
388     case CACHE_LOCKED:
389         /* Cache Miss */
390 
391         /* Initialize CacheData */
392         cd->flag = CACHE_READ;
393 
394         /* set aiocb */
395         oper->iocb->aio_buf = p = cache_bin + (cd->num * MEM_ALIGN);
396         oper->iocb->aio_nbytes = size;
397         oper->iocb->aio_offset = offset;
398 
399         /* real read() is required */
400         oper->read = 1;
401 
402         dp ("Miss CacheData[%d] stamp=%u dev=%lu inode=%lu offset=%d size=%d flag=CACHE_INVALID ref=%d\n",
403             cd->num, cd->stamp, (long)dev, (long)inode, offset, size, cd->ref);
404         break;
405     case CACHE_VALID:
406         /* Hit complete Cache */
407         dp ("Hit CacheData[%d] stamp=%u dev=%lu inode=%lu offset=%d size=%d flag=CACHE_VALID ref=%d\n",
408             cd->num, cd->stamp, (long)dev, (long)inode, offset, size, cd->ref);
409         p = cache_bin + (cd->num * MEM_ALIGN);
410         break;
411     case CACHE_READ:
412         /* Hit but reading by other operation */
413 
414         dp ("Hit CacheData[%d] stamp=%u dev=%lu inode=%lu offset=%d size=%d flag=CACHE_READ ref=%d\n",
415             cd->num, cd->stamp, (long)dev, (long)inode, offset, size, cd->ref);
416 
417         /* will be ignored, but should not return NULL */
418         p = cache_bin + (cd->num * MEM_ALIGN);
419         break;
420     default:
421         break;
422     }
423 
424     /* IOOper : CacheData */
425     oper->cd = cd;
426 
427     return p;
428 }
429 
430 /*
431  * count up reference
432  *
433  * @number: CacheData number
434  */
435 void
sen_cache_data_unref(int number)436 sen_cache_data_unref (int number)
437 {
438     CacheData *cd = GET_CD_NUM(number);
439     CacheHash *hash = GET_HASH_CD(cd);
440     int c;
441 
442     /*
443      * FIXME: assert cd and hash.
444      */
445 
446 unref_retry:
447     SEN_ATOMIC_ADD_EX(&(hash->lock), 1, c);
448     if (c > 0) {
449         // printf ("in unref_retry: hash=%d pid=%d\n",
450         //          hash->num, getpid());
451         SEN_ATOMIC_DEC(&(hash->lock));
452         usleep (1);
453         goto unref_retry;
454     }
455     cd->ref--;
456     SEN_ATOMIC_DEC(&(hash->lock));
457     dp ("CacheData[%d]: unref: %d\n", cd->num, cd->ref);
458 }
459 
460 /*
461  * count up reference
462  *
463  * @number: CacheData number
464  */
465 void
sen_cache_data_ref(int number)466 sen_cache_data_ref (int number)
467 {
468     CacheData *cd = GET_CD_NUM(number);
469     CacheHash *hash = GET_HASH_CD(cd);
470     int c;
471 
472     /*
473      * FIXME: assert cd and hash.
474      */
475 
476 ref_retry:
477     SEN_ATOMIC_ADD_EX(&(hash->lock), 1, c);
478     if (c > 0) {
479         //printf ("in ref_retry: hash=%d pid=%d\n",
480         // hash->num, getpid());
481         SEN_ATOMIC_DEC(&(hash->lock));
482         usleep (1);
483         goto ref_retry;
484     }
485     cd->ref++;
486     SEN_ATOMIC_DEC(&(hash->lock));
487     dp ("CacheData[%d]: ref: %d\n", cd->num, cd->ref);
488 }
489 
490 /*
491  * mark CacheData as invalid  between pos and pos+size.
492  *
493  * @dev: device number.
494  * @inode: inode number.
495  * @offset: real offset.
496  * @size: real required size.
497  *
498  */
499 void
sen_cache_mark_invalid(dev_t dev,ino_t inode,off_t offset,size_t size)500 sen_cache_mark_invalid (dev_t dev, ino_t inode, off_t offset, size_t size)
501 {
502     off_t voffset = offset - (offset % MEM_ALIGN);
503     size_t vsize = offset + size;
504     int number, hash_begin, hash_end;
505     CacheData *cd;
506 
507     vsize = ((vsize -1) / MEM_ALIGN + 1) *  MEM_ALIGN;
508     vsize = vsize - voffset;
509 
510     hash_begin = CALC_HASH(dev, inode, voffset);
511     hash_end   = CALC_HASH(dev, inode, (voffset+vsize-1));
512     if (hash_end < hash_begin)
513         hash_end = hash_end + HASH_SIZE;
514     /*
515      *  h_b  |     |h_e  |h_e+1     // begin | end | end+1
516      *  oooo  oooo  oooo  oooo      // hash
517      *  n       n++     <           // cd number
518      */
519     /*
520      * CACHE_NUM / HASH_SIZE = number of cd in a HASH.
521      */
522     for (number = hash_begin * (CACHE_NUM / HASH_SIZE);
523          number < (hash_end + 1) * (CACHE_NUM / HASH_SIZE);
524          number++) {
525 
526         if (number >= CACHE_NUM)
527             cd = GET_CD_NUM(number - CACHE_NUM);
528         else
529             cd = GET_CD_NUM(number);
530 
531         if (cd->offset >= voffset &&
532             cd->offset < voffset + vsize &&
533             cd->inode == inode &&
534             cd->dev == dev) {
535             dp ("MAI CacheData[%d] stamp=%u dev=%lu inode=%lu offset=%d size=%d flag=CACHE_READ ref=%d\n",
536                 cd->num, cd->stamp, (long)cd->dev, (long)cd->inode, cd->offset, cd->size, cd->ref);
537             cd->flag = CACHE_INVALID;
538         }
539     }
540 }
541 
542 /*
543  * for DEBUG.
544  *
545  */
546 void
sen_cache_dump(void)547 sen_cache_dump (void)
548 {
549     int i,l;
550     int used;
551     int allofused = 0;
552     for (i = 0; i < HASH_SIZE; i++) {
553         used = 0;
554         for (l = i * (CACHE_NUM / HASH_SIZE); l < (i +1) * (CACHE_NUM/HASH_SIZE); l++) {
555             CacheData *cd = GET_CD_NUM(l);
556             if (cd->flag != CACHE_INVALID)
557                 used++;
558             printf (" HASH[%d] CacheData[%d]: dev=%lu inode=%lu offset=%u flag=%d ref=%d\n",
559                     i, l, (long)cd->dev, (long)cd->inode, cd->offset, cd->flag, cd->ref);
560         }
561         allofused = allofused + used;
562         printf ("HASH %d: %d/%d used.\n---\n", i, used, CACHE_NUM / HASH_SIZE);
563     }
564     printf ("ALL: %d/%d used.\n", allofused, CACHE_NUM);
565 }
566 
567 #endif /* USE_AIO */
568