1 /* Copyright(C) 2005 Takuo Kitame <kitame @ valinux. co. jp>
2
3 This library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Lesser General Public
5 License as published by the Free Software Foundation; either
6 version 2.1 of the License, or (at your option) any later version.
7
8 This library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Lesser General Public License for more details.
12
13 You should have received a copy of the GNU Lesser General Public
14 License along with this library; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 #ifdef USE_AIO
19
20 #include "senna_in.h"
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25
26 #include <sys/types.h>
27 #include <sys/stat.h>
28
29 #include <sys/param.h>
30
31 #include <sys/shm.h>
32 #include <sys/mman.h>
33 #include <fcntl.h>
34
35 #include "cache.h"
36
37 #define SEN_ATOMIC_DEC(x) { uint32_t _n; SEN_ATOMIC_ADD_EX((x), -1, _n); }
38
39 /* feature switch */
40 int sen_aio_enabled;
41 int sen_debug_print;
42
43 /* cache structure */
44 int sen_cache_size; /* size of cache (MB) */
45 int sen_cache_block; /* alignment */
46 int sen_hash_size; /* size of hash array */
47
48 #define DEFAULT_CACHE_SIZE 256
49 #define DEFAULT_CACHE_BLOCK 4096
50 #define DEFAULT_HASH_SIZE 1024
51
52 #define MEM_ALIGN sen_cache_block
53 #define CACHE_SIZE (sen_cache_size * 1024 * 1024)
54 #define CACHE_NUM (CACHE_SIZE / MEM_ALIGN) /* number of cache data */
55 #define HASH_SIZE sen_hash_size
56
57 /* shm namespaces */
58 #define CACHE_DATA_SHM_NAME "senna_cache_data"
59 #define CACHE_BIN_SHM_NAME "senna_cache_bin"
60 #define CACHE_COUNTER_SHM_NAME "senna_cache_counter"
61 #define CACHE_HASH_SHM_NAME "senna_cache_hash"
62
63 /* hash block */
64 typedef struct _CacheHash CacheHash;
65 struct _CacheHash {
66 int num; /* first CacheData number */
67 int lock;
68 };
69
70 /* for debug */
71
72 /* mmaped to shm */
73 unsigned int *ccount; /* global counter */
74 void *cache_data; /* CacheData array */
75 void *cache_bin; /* Cache binary */
76 void *cache_hash; /* Hash array */
77
78 /* misc macro */
79 #define HASH_VAL (1024 * 128)
80 //#define CALC_HASH(x,y,z) ((x + y + (z/sen_cache_block)) % HASH_SIZE)
81 #define CALC_HASH(x,y,z) ((x + y + (z/HASH_VAL)) % HASH_SIZE)
82 #define GET_CD_NUM(num) ((CacheData*)(cache_data + sizeof(CacheData)*num))
83 #define GET_HASH_NUM(num) ((CacheHash*)(cache_hash + sizeof(CacheHash)*num))
84 #define GET_HASH_CD(cd) (GET_HASH_NUM(CALC_HASH(cd->dev,cd->inode,cd->offset)))
85
86 /*
87 * Initialize shm region.
88 */
89 static void
sen_cache_init(void)90 sen_cache_init (void)
91 {
92 int shmfd;
93 int ret;
94 char shm_name[PATH_MAX];
95
96 if (getenv("SEN_CACHE_SIZE")) {
97 sen_cache_size = atoi(getenv("SEN_CACHE_SIZE"));
98 } else {
99 sen_cache_size = DEFAULT_CACHE_SIZE;
100 }
101 if (getenv("SEN_CACHE_BLOCK")) {
102 sen_cache_block = atoi(getenv("SEN_CACHE_BLOCK"));
103 } else {
104 sen_cache_block = DEFAULT_CACHE_BLOCK;
105 }
106 if (getenv("SEN_HASH_SIZE")) {
107 sen_hash_size = atoi(getenv("SEN_HASH_SIZE"));
108 } else {
109 sen_hash_size = DEFAULT_HASH_SIZE;
110 }
111 dp ("cache size: %d MB, cache block: %d, hash size: %d\n", sen_cache_size, sen_cache_block, sen_hash_size);
112
113 /* Array of CacheData */
114 snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_DATA_SHM_NAME, MEM_ALIGN);
115 shmfd = shm_open (shm_name, O_CREAT|O_RDWR|O_EXCL, 0666);
116 if (shmfd < 0 && errno == EEXIST) {
117 /* exist : no op */
118 } else {
119 int h;
120 int num;
121 size_t s = sizeof(CacheData) * CACHE_NUM;
122 void *p = malloc (s);
123
124 for (h = 0; h < HASH_SIZE; h++) {
125 /* Initialize with INVALID */
126 for (num = 0; num < CACHE_NUM / HASH_SIZE; num++) {
127 int number = num + (h * (CACHE_NUM / HASH_SIZE));
128 CacheData *cd = p + sizeof(CacheData) * number;
129
130 cd->num = number; /* unique number */
131 cd->ref = 0;
132
133 /* vector list */
134 if (cd->num + 1 < (h+1) * (CACHE_NUM / HASH_SIZE)) {
135 cd->next = cd->num + 1;
136 } else {
137 cd->next = -1; /* end of list */
138 }
139 cd->flag = CACHE_INVALID;
140 }
141 }
142
143 ret = write (shmfd, p, s);
144 free (p);
145 if (ret < 0) {
146 perror ("sen_cache_init: write");
147 exit (1);
148 }
149 }
150
151 /* Cache binary */
152 snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_BIN_SHM_NAME, MEM_ALIGN);
153 shmfd = shm_open (shm_name, O_CREAT|O_RDWR|O_EXCL, 0666);
154 if (shmfd < 0 && errno == EEXIST) {
155 /* exist : no op */
156 } else {
157 size_t s = CACHE_SIZE;
158 void *p = malloc (s);
159 ret = write (shmfd, p, s);
160 free (p);
161 if (ret < 0) {
162 perror ("sen_cache_init: write");
163 exit (1);
164 }
165 }
166
167 /* global counter */
168 snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_COUNTER_SHM_NAME, MEM_ALIGN);
169 shmfd = shm_open (shm_name, O_CREAT|O_RDWR|O_EXCL, 0666);
170 if (shmfd < 0 && errno == EEXIST) {
171 /* exist : no op */
172 } else {
173 unsigned int i = 1;
174 ret = write (shmfd, &i, sizeof(unsigned int));
175 if (ret < 0) {
176 perror ("sen_cache_init: write");
177 exit (1);
178 }
179 }
180
181 /* Array of CacheHash */
182 snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_HASH_SHM_NAME, MEM_ALIGN);
183 shmfd = shm_open (shm_name, O_CREAT|O_RDWR|O_EXCL, 0666);
184 if (shmfd < 0 && errno == EEXIST) {
185 /* exist : no op */
186 } else {
187 int i;
188 size_t s = sizeof(CacheHash) * HASH_SIZE;
189 void *p = malloc(s);
190 for (i = 0; i < HASH_SIZE; i++) {
191 CacheHash *hash = p + sizeof(CacheHash) * i;
192
193 /* first CacheData */
194 hash->num = i * (CACHE_NUM / HASH_SIZE);
195
196 hash->lock = 0;
197 }
198 write (shmfd, p, s);
199 free (p);
200 }
201 }
202
203 /*
204 * open shm region, and mmap().
205 */
206 void
sen_cache_open(void)207 sen_cache_open (void)
208 {
209 int shmfd;
210 char shm_name[PATH_MAX];
211
212 /* no op when disabled */
213 if (!sen_aio_enabled) return;
214
215 sen_cache_init ();
216
217 /* global counter */
218 snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_COUNTER_SHM_NAME, MEM_ALIGN);
219 shmfd = shm_open (shm_name, O_CREAT|O_RDWR, 0666);
220 if ( (ccount = (unsigned int *)mmap(NULL, sizeof(unsigned int),
221 PROT_READ|PROT_WRITE, MAP_SHARED, shmfd, 0)) == (unsigned int *)-1) {
222 perror("mmap (counter)");
223 exit (1);
224 }
225
226 /* Cache binary */
227 snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_BIN_SHM_NAME, MEM_ALIGN);
228 shmfd = shm_open (shm_name, O_CREAT|O_RDWR, 0666);
229 if ( (cache_bin = mmap (NULL, CACHE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED,
230 shmfd, 0)) == (void *)-1) {
231 perror("mmap (cache)");
232 exit (1);
233 }
234
235 /* CacheHash */
236 snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_HASH_SHM_NAME, MEM_ALIGN);
237 shmfd = shm_open (shm_name, O_CREAT|O_RDWR, 0666);
238 cache_hash = mmap (NULL, sizeof(CacheHash) * HASH_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED,
239 shmfd, 0);
240
241 /* CacheData */
242 snprintf (shm_name, PATH_MAX, "%s_%d", CACHE_DATA_SHM_NAME, MEM_ALIGN);
243 shmfd = shm_open (shm_name, O_CREAT|O_RDWR, 0666);
244 if ( (cache_data = mmap(NULL, sizeof(CacheData) * CACHE_NUM,
245 PROT_READ|PROT_WRITE, MAP_SHARED, shmfd, 0)) == (void *)-1) {
246 perror("mmap (CacheData)");
247 exit (1);
248 }
249 }
250
251 /*
252 * search Hit, Free or LRU node.
253 *
254 * @dev: device number
255 * @inode: inode number
256 * @offset: file offset
257 *
258 * return: CacheData or NULL (Cache region is full of referenced)
259 */
260 static CacheData *
sen_cache_hash_search(dev_t dev,ino_t inode,size_t offset,size_t size)261 sen_cache_hash_search (dev_t dev, ino_t inode, size_t offset, size_t size)
262 {
263 int h = CALC_HASH(dev,inode,offset);
264 CacheData *cd = NULL; /* expire CacheData */
265 CacheData *fcd = NULL; /* free CacheData */
266 CacheHash *hash = GET_HASH_NUM(h);
267 unsigned int c = *ccount; /* current stamp */
268 unsigned int l = 0; /* oldest stamp */
269 int i;
270 int r = 0; /* retry count */
271
272 LRU_retry:
273 SEN_ATOMIC_ADD_EX(&(hash->lock), 1, i);
274 if (i > 0) {
275 SEN_ATOMIC_DEC(&(hash->lock));
276 usleep (1);
277 goto LRU_retry;
278 }
279
280 l = 0;
281 for (i = 0; i < CACHE_NUM / HASH_SIZE; i++) {
282 int number = i + (h * (CACHE_NUM / HASH_SIZE));
283 CacheData *tmp = GET_CD_NUM(number);
284
285 if (tmp->offset == offset &&
286 tmp->inode == inode &&
287 tmp->dev == dev &&
288 tmp->flag != CACHE_INVALID) {
289
290 if (tmp->flag == CACHE_LOCKED) {
291 /* still in locked */
292 SEN_ATOMIC_DEC(&(hash->lock));
293 usleep(1);
294 goto LRU_retry;
295 }
296
297 tmp->stamp = (*ccount)++;
298
299 /*
300 * should ignore CACHE_READ reference,
301 * because it will be upped by other process.
302 */
303 if (tmp->flag == CACHE_VALID) {
304 tmp->ref++;
305 dp ("CacheData[%d]: ref: %d\n", tmp->num, tmp->ref);
306 }
307
308 SEN_ATOMIC_DEC(&(hash->lock));
309 return tmp;
310 }
311
312 /* LRU or INVALID search */
313 if (fcd) continue; /* already found free area */
314
315 if (tmp->flag == CACHE_INVALID && tmp->ref == 0) {
316 fcd = tmp;
317 } else if (!fcd && (unsigned int)(c - tmp->stamp) > l) {
318 l = (unsigned int)(c - tmp->stamp);
319 cd = tmp;
320 }
321 }
322
323 if (fcd || (cd && cd->flag == CACHE_VALID && cd->ref == 0)) {
324
325 if (!fcd) {
326 /* Expire LRU and unreferenced CacheData */
327 dp ("Expire[%d] CacheData[%d] stamp=%u dev=%lu inode=%lu offset=%u size=%u\n",
328 h, cd->num, cd->stamp, (long)cd->dev, (long)cd->inode, cd->offset, cd->size);
329
330 fcd = cd;
331 }
332
333 /* free CacheData is now available */
334 fcd->ref++;
335 fcd->stamp = (*ccount)++;
336
337 fcd->flag = CACHE_LOCKED;
338
339 fcd->dev = dev;
340 fcd->inode = inode;
341 fcd->offset = offset;
342 fcd->size = size;
343
344 dp ("CacheData[%d]: ref: %d\n", fcd->num, fcd->ref);
345
346 SEN_ATOMIC_DEC(&(hash->lock));
347 return fcd;
348 } else {
349 /* Not found Free CacheDate && LRU CacheData is used, cannot expire. */
350 SEN_ATOMIC_DEC(&(hash->lock));
351 usleep(2);
352 cd->stamp = c; /* update counter stamp with current */
353 if (++r > CACHE_NUM / HASH_SIZE) {
354 /* no space for new cache in this hash block */
355 dp ("No room for new CacheData: hash[%d] is full of used\n", h);
356 return NULL;
357 }
358 goto LRU_retry;
359 }
360
361 /* should not be reached */
362 return NULL;
363 }
364
365 /*
366 * read via shm cache.
367 *
368 * @oper: CacheIOOper, structure of CacheData, aiocb and real-read flag.
369 * @dev: device number
370 * @inode: inode number
371 * @offset: file offset
372 *
373 * return: pointer to buffer or NULL (no cache room, cannot use cache)
374 *
375 */
376 void *
sen_cache_read(CacheIOOper * oper,dev_t dev,ino_t inode,size_t offset,size_t size)377 sen_cache_read (CacheIOOper *oper, dev_t dev, ino_t inode, size_t offset, size_t size)
378 {
379 void *p = NULL;
380 CacheData *cd = NULL;
381
382 cd = sen_cache_hash_search (dev, inode, offset, size);
383
384 if (!cd) return NULL; /* no room */
385
386 switch (cd->flag) {
387 case CACHE_INVALID:
388 case CACHE_LOCKED:
389 /* Cache Miss */
390
391 /* Initialize CacheData */
392 cd->flag = CACHE_READ;
393
394 /* set aiocb */
395 oper->iocb->aio_buf = p = cache_bin + (cd->num * MEM_ALIGN);
396 oper->iocb->aio_nbytes = size;
397 oper->iocb->aio_offset = offset;
398
399 /* real read() is required */
400 oper->read = 1;
401
402 dp ("Miss CacheData[%d] stamp=%u dev=%lu inode=%lu offset=%d size=%d flag=CACHE_INVALID ref=%d\n",
403 cd->num, cd->stamp, (long)dev, (long)inode, offset, size, cd->ref);
404 break;
405 case CACHE_VALID:
406 /* Hit complete Cache */
407 dp ("Hit CacheData[%d] stamp=%u dev=%lu inode=%lu offset=%d size=%d flag=CACHE_VALID ref=%d\n",
408 cd->num, cd->stamp, (long)dev, (long)inode, offset, size, cd->ref);
409 p = cache_bin + (cd->num * MEM_ALIGN);
410 break;
411 case CACHE_READ:
412 /* Hit but reading by other operation */
413
414 dp ("Hit CacheData[%d] stamp=%u dev=%lu inode=%lu offset=%d size=%d flag=CACHE_READ ref=%d\n",
415 cd->num, cd->stamp, (long)dev, (long)inode, offset, size, cd->ref);
416
417 /* will be ignored, but should not return NULL */
418 p = cache_bin + (cd->num * MEM_ALIGN);
419 break;
420 default:
421 break;
422 }
423
424 /* IOOper : CacheData */
425 oper->cd = cd;
426
427 return p;
428 }
429
430 /*
431 * count up reference
432 *
433 * @number: CacheData number
434 */
435 void
sen_cache_data_unref(int number)436 sen_cache_data_unref (int number)
437 {
438 CacheData *cd = GET_CD_NUM(number);
439 CacheHash *hash = GET_HASH_CD(cd);
440 int c;
441
442 /*
443 * FIXME: assert cd and hash.
444 */
445
446 unref_retry:
447 SEN_ATOMIC_ADD_EX(&(hash->lock), 1, c);
448 if (c > 0) {
449 // printf ("in unref_retry: hash=%d pid=%d\n",
450 // hash->num, getpid());
451 SEN_ATOMIC_DEC(&(hash->lock));
452 usleep (1);
453 goto unref_retry;
454 }
455 cd->ref--;
456 SEN_ATOMIC_DEC(&(hash->lock));
457 dp ("CacheData[%d]: unref: %d\n", cd->num, cd->ref);
458 }
459
460 /*
461 * count up reference
462 *
463 * @number: CacheData number
464 */
465 void
sen_cache_data_ref(int number)466 sen_cache_data_ref (int number)
467 {
468 CacheData *cd = GET_CD_NUM(number);
469 CacheHash *hash = GET_HASH_CD(cd);
470 int c;
471
472 /*
473 * FIXME: assert cd and hash.
474 */
475
476 ref_retry:
477 SEN_ATOMIC_ADD_EX(&(hash->lock), 1, c);
478 if (c > 0) {
479 //printf ("in ref_retry: hash=%d pid=%d\n",
480 // hash->num, getpid());
481 SEN_ATOMIC_DEC(&(hash->lock));
482 usleep (1);
483 goto ref_retry;
484 }
485 cd->ref++;
486 SEN_ATOMIC_DEC(&(hash->lock));
487 dp ("CacheData[%d]: ref: %d\n", cd->num, cd->ref);
488 }
489
490 /*
491 * mark CacheData as invalid between pos and pos+size.
492 *
493 * @dev: device number.
494 * @inode: inode number.
495 * @offset: real offset.
496 * @size: real required size.
497 *
498 */
499 void
sen_cache_mark_invalid(dev_t dev,ino_t inode,off_t offset,size_t size)500 sen_cache_mark_invalid (dev_t dev, ino_t inode, off_t offset, size_t size)
501 {
502 off_t voffset = offset - (offset % MEM_ALIGN);
503 size_t vsize = offset + size;
504 int number, hash_begin, hash_end;
505 CacheData *cd;
506
507 vsize = ((vsize -1) / MEM_ALIGN + 1) * MEM_ALIGN;
508 vsize = vsize - voffset;
509
510 hash_begin = CALC_HASH(dev, inode, voffset);
511 hash_end = CALC_HASH(dev, inode, (voffset+vsize-1));
512 if (hash_end < hash_begin)
513 hash_end = hash_end + HASH_SIZE;
514 /*
515 * h_b | |h_e |h_e+1 // begin | end | end+1
516 * oooo oooo oooo oooo // hash
517 * n n++ < // cd number
518 */
519 /*
520 * CACHE_NUM / HASH_SIZE = number of cd in a HASH.
521 */
522 for (number = hash_begin * (CACHE_NUM / HASH_SIZE);
523 number < (hash_end + 1) * (CACHE_NUM / HASH_SIZE);
524 number++) {
525
526 if (number >= CACHE_NUM)
527 cd = GET_CD_NUM(number - CACHE_NUM);
528 else
529 cd = GET_CD_NUM(number);
530
531 if (cd->offset >= voffset &&
532 cd->offset < voffset + vsize &&
533 cd->inode == inode &&
534 cd->dev == dev) {
535 dp ("MAI CacheData[%d] stamp=%u dev=%lu inode=%lu offset=%d size=%d flag=CACHE_READ ref=%d\n",
536 cd->num, cd->stamp, (long)cd->dev, (long)cd->inode, cd->offset, cd->size, cd->ref);
537 cd->flag = CACHE_INVALID;
538 }
539 }
540 }
541
542 /*
543 * for DEBUG.
544 *
545 */
546 void
sen_cache_dump(void)547 sen_cache_dump (void)
548 {
549 int i,l;
550 int used;
551 int allofused = 0;
552 for (i = 0; i < HASH_SIZE; i++) {
553 used = 0;
554 for (l = i * (CACHE_NUM / HASH_SIZE); l < (i +1) * (CACHE_NUM/HASH_SIZE); l++) {
555 CacheData *cd = GET_CD_NUM(l);
556 if (cd->flag != CACHE_INVALID)
557 used++;
558 printf (" HASH[%d] CacheData[%d]: dev=%lu inode=%lu offset=%u flag=%d ref=%d\n",
559 i, l, (long)cd->dev, (long)cd->inode, cd->offset, cd->flag, cd->ref);
560 }
561 allofused = allofused + used;
562 printf ("HASH %d: %d/%d used.\n---\n", i, used, CACHE_NUM / HASH_SIZE);
563 }
564 printf ("ALL: %d/%d used.\n", allofused, CACHE_NUM);
565 }
566
567 #endif /* USE_AIO */
568