1 /* BLURB lgpl
2
3 Coda File System
4 Release 5
5
6 Copyright (c) 1987-2016 Carnegie Mellon University
7 Additional copyrights listed below
8
9 This code is distributed "AS IS" without warranty of any kind under
10 the terms of the GNU Library General Public Licence Version 2, as
11 shown in the file LICENSE. The technical and financial contributors to
12 Coda are listed in the file CREDITS.
13
14 Additional copyrights
15 none currently
16
17 #*/
18
19 /*
20 *
21 * RVM log status area support
22 *
23 */
24 #include <unistd.h>
25 #include <sys/file.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <fcntl.h>
29 #include <errno.h>
30 #include "rvm_private.h"
31
32 #ifdef RVM_LOG_TAIL_BUG
33 #include <rvmtesting.h>
34 extern unsigned long *ClobberAddress;
35 #endif /* RVM_LOG_TAIL_BUG */
36
37 /* global variables */
38
39 rvm_bool_t rvm_utlsw; /* true iff RVM called by rvmutl,
40 permits certain structures to be
41 retained after errors are discovered
42 */
43 extern rvm_bool_t rvm_no_update; /* no segment or log update if true */
44 extern char *rvm_errmsg; /* internal error message buffer */
45
46 extern rvm_length_t page_size; /* system page size */
47 extern rvm_length_t page_mask; /* mask for rounding down to page size */
48 extern rvm_length_t flush_times_vec[flush_times_len]; /* flush timing histogram defs */
49 extern rvm_length_t truncation_times_vec[truncation_times_len]; /* truncation timing
50 histogram defs */
51 extern rvm_length_t range_lengths_vec[range_lengths_len]; /* range length
52 histogram defs */
53 extern rvm_length_t range_overlaps_vec[range_overlaps_len]; /* range coalesce
54 histogram defs */
55 extern rvm_length_t trans_overlaps_vec[trans_overlaps_len]; /* trans coalesce
56 histogram defs */
57 extern rvm_length_t range_elims_vec[range_elims_len]; /* ranges eliminated by range
58 coalesce histogram defs */
59 extern rvm_length_t trans_elims_vec[trans_elims_len]; /* ranges eliminated by trans
60 coalesce histogram defs */
61 extern rvm_length_t trans_coalesces_vec[trans_coalesces_len]; /* transactions
62 coalesed per flush */
63
64 /* root of global log device list */
65 log_t *default_log; /* default log descriptor ptr */
66
67 #ifdef RVM_LOG_TAIL_SHADOW
68 rvm_offset_t log_tail_shadow; /* shadow log tail pointer */
69 rvm_offset_t last_log_tail; /* last committed log tail value */
70 rvm_bool_t last_log_valid = rvm_false; /* validity of last_log_tail */
71 rvm_bool_t has_wrapped = rvm_false; /* whether or not we wrapped */
72 char *log_tail_shadow_in_object = "Compiled with a shadow log tail offset\n";
73 #endif /* RVM_LOG_TAIL_SHADOW */
74
75 /* locals */
76
77 static RVM_MUTEX log_root_lock; /* for list header, links & default */
78 list_entry_t log_root; /* header for log descriptor list */
79
80 static rvm_offset_t file_status_offset = /* log status area offset in files */
81 RVM_OFFSET_INITIALIZER(0,FILE_STATUS_OFFSET);
82
83 static rvm_offset_t raw_status_offset = /* log status area offset in partitions */
84 RVM_OFFSET_INITIALIZER(0,RAW_STATUS_OFFSET);
85
86 static rvm_offset_t min_trans_size = /* minimum usable log size as offset */
87 RVM_OFFSET_INITIALIZER(0,MIN_TRANS_SIZE);
88 /* log_root initialization */
init_log_list()89 void init_log_list()
90 {
91 init_list_header(&log_root,log_id);
92 mutex_init(&log_root_lock);
93 default_log = (log_t *)NULL;
94 }
95
96 /* enter new log in log list and establish default log if necessary */
97 /*
98 if we are looking for the RVM_LOG_TAIL_BUG, there can only ever
99 be one log. I *believe* that it is possibly to only have one log
100 open at a time. But, I'm not going to coda_assert that in the general
101 case -bnoble 7/30/94
102 */
103
enter_log(log)104 void enter_log(log)
105 log_t *log; /* log descriptor */
106 {
107
108 assert(log != NULL);
109 #ifdef RVM_LOG_TAIL_BUG
110 assert(default_log == NULL);
111 #endif /* RVM_LOG_TAIL_BUG */
112 CRITICAL(log_root_lock,
113 {
114 (void)move_list_entry(NULL,(list_entry_t *)&log_root,
115 log);
116 if (default_log == NULL)
117 default_log = log;
118 });
119
120 #ifdef RVM_LOG_TAIL_BUG
121 /*
122 this is massively unportable: for the moment, coda_assert we are
123 on pmax_mach.
124 */
125 #ifndef __MACH__
126 assert(0);
127 #endif /* __MACH__ */
128 #ifndef mips
129 assert(0);
130 #endif /* mips */
131 ClobberAddress = &(default_log->status.log_tail.low);
132 protect_page__Fi(ClobberAddress);
133 #endif /* RVM_LOG_TAIL_BUG */
134 #ifdef RVM_LOG_TAIL_SHADOW
135 RVM_ASSIGN_OFFSET(log_tail_shadow,default_log->status.log_tail);
136 RVM_ASSIGN_OFFSET(last_log_tail,log_tail_shadow);
137 last_log_valid = rvm_true;
138 #endif /* RVM_LOG_TAIL_SHADOW */
139
140 }
141
142 /* find an existing log -- returns descriptor ptr or NULL */
find_log(log_dev)143 static log_t *find_log(log_dev)
144 char *log_dev;
145 {
146 log_t *log;
147 char *log_dev_fullname = log_dev;
148
149 assert(log_dev != NULL);
150 CRITICAL(log_root_lock,
151 {
152 FOR_ENTRIES_OF(log_root,log_t,log)
153 if (strcmp(log->dev.name,log_dev_fullname) == 0)
154 goto found;
155
156 log = NULL;
157 found:;
158 });
159
160 return log;
161 }
162 /* log daemon control */
163
164 /* create daemon */
fork_daemon(log_t * log)165 static rvm_return_t fork_daemon(log_t *log)
166 {
167 log_daemon_t *daemon = &log->daemon; /* truncation daemon descriptor */
168
169 /* create daemon thread */
170 if (daemon->thread == (cthread_t)NULL)
171 {
172 mutex_lock(&daemon->lock);
173 daemon->truncate = 0;
174 daemon->state = rvm_idle;
175 daemon->thread = cthread_fork(log_daemon, log);
176 mutex_unlock(&daemon->lock);
177
178 if (daemon->thread == (cthread_t)NULL)
179 return RVM_ELOG;
180 }
181 return RVM_SUCCESS;
182 }
183
184 /* terminate daemon */
join_daemon(log)185 static rvm_return_t join_daemon(log)
186 log_t *log;
187 {
188 log_daemon_t *daemon = &log->daemon; /* truncation daemon descriptor */
189 rvm_return_t retval = RVM_SUCCESS;
190
191 if (daemon->thread != (cthread_t)NULL)
192 {
193 /* terminate the daemon */
194 CRITICAL(daemon->lock, /* begin daemon lock crit sec */
195 {
196 if (daemon->state != error)
197 {
198 daemon->state = terminate;
199 condition_signal(&daemon->code);
200 }
201 }); /* end daemon lock crit sec */
202
203 /* wait for daemon thread to terminate */
204 retval = (rvm_return_t)cthread_join(daemon->thread);
205 #ifdef RVM_USELWP
206 while(daemon->thread) cthread_yield();
207 #endif
208 daemon->thread = (cthread_t)NULL;
209 }
210 daemon->truncate = 0;
211
212 return retval;
213 }
214 /* set log truncation options */
set_truncate_options(log,rvm_options)215 static rvm_return_t set_truncate_options(log,rvm_options)
216 log_t *log; /* log descriptor ptr */
217 rvm_options_t *rvm_options; /* optional options descriptor */
218 {
219 log_daemon_t *daemon = &log->daemon; /* truncation daemon descriptor */
220 rvm_return_t retval = RVM_SUCCESS;
221
222 if (rvm_utlsw) /* no log options allowed */
223 return RVM_SUCCESS;
224
225 /* set truncation threshold if parameter within range and
226 thread package installed */
227 if ((rvm_options->truncate > 0) && (rvm_options->truncate <= 100)
228 && (cthread_self() != (cthread_t)NULL))
229 {
230 /* update daemon thread */
231 retval = fork_daemon(log); /* create daemon if necessary */
232 daemon->truncate = rvm_options->truncate;
233 }
234 else
235 retval = join_daemon(log); /* terminate daemon */
236
237 return retval;
238 }
239 /* close log device */
close_log(log)240 rvm_return_t close_log(log)
241 log_t *log;
242 {
243 log_special_t *special;
244 rvm_return_t retval = RVM_SUCCESS;
245
246 /* make sure all transactions ended */
247 CRITICAL(log->tid_list_lock, /* begin tid_list_lock crit sec */
248 {
249 if (LIST_NOT_EMPTY(log->tid_list))
250 retval = RVM_EUNCOMMIT;
251 }); /* end tid_list_lock crit sec */
252 if (retval != RVM_SUCCESS) return retval;
253
254 /* issue terminate to daemon */
255 (void)join_daemon(log); /* can we do something on error? */
256
257 /* flush log and close */
258 CRITICAL(log->truncation_lock,
259 {
260 if ((retval=flush_log(log,&log->status.n_flush))
261 == RVM_SUCCESS)
262 CRITICAL(log->dev_lock,
263 {
264 if ((retval=write_log_status(log,NULL))
265 == RVM_SUCCESS)
266 if (close_dev(&log->dev) < 0)
267 retval = RVM_EIO;
268 });
269 });
270 if (retval != RVM_SUCCESS) return retval;
271 if (default_log == log) {
272 #ifdef RVM_LOG_TAIL_BUG
273 unprotect_page__Fi(ClobberAddress);
274 ClobberAddress = 0;
275 #endif /* RVM_LOG_TAIL_BUG */
276 #ifdef RVM_LOG_TAIL_SHADOW
277 RVM_ZERO_OFFSET(log_tail_shadow);
278 RVM_ZERO_OFFSET(last_log_tail);
279 last_log_valid = rvm_false;
280 #endif /* RVM_LOG_TAIL_SHADOW */
281 default_log = NULL;
282 }
283 /* kill unflushed log_special records */
284 UNLINK_ENTRIES_OF(log->special_list,log_special_t,special)
285 free_log_special(special);
286
287 /* free descriptor */
288 free_log(log);
289
290 return retval;
291 }
292 /* termination close of all log devices */
close_all_logs()293 rvm_return_t close_all_logs()
294 {
295 log_t *log; /* log device descriptor ptr */
296 rvm_return_t retval = RVM_SUCCESS;
297
298 /* cycle through log list */
299 CRITICAL(log_root_lock, /* begin log_root_lock crit sec */
300 {
301 UNLINK_ENTRIES_OF(log_root,log_t,log)
302 {
303 if ((retval=close_log(log)) != RVM_SUCCESS)
304 break;
305 }
306 }); /* end log_root_lock crit sec */
307
308 return retval;
309 }
310 /* pre-load log raw i/o gather write buffer with tail log sector */
preload_wrt_buf(log)311 static rvm_return_t preload_wrt_buf(log)
312 log_t *log; /* log descriptor */
313 {
314 device_t *dev = &log->dev; /* device descriptor ptr */
315 log_status_t *status = &log->status; /* log status descriptor */
316 rvm_offset_t tail_sector; /* log tail sector */
317
318 tail_sector = CHOP_OFFSET_TO_SECTOR_SIZE(status->log_tail);
319 if (read_dev(dev,&tail_sector,dev->wrt_buf,SECTOR_SIZE) < 0)
320 return RVM_EIO;
321
322 dev->ptr = RVM_ADD_LENGTH_TO_ADDR(dev->wrt_buf,
323 OFFSET_TO_SECTOR_INDEX(status->log_tail));
324 dev->buf_start = dev->ptr;
325 dev->sync_offset = status->log_tail;
326
327 return RVM_SUCCESS;
328 }
329 /* create log descriptor and open log device */
open_log(dev_name,log_ptr,status_buf,rvm_options)330 rvm_return_t open_log(dev_name,log_ptr,status_buf,rvm_options)
331 char *dev_name; /* name of log storage device */
332 log_t **log_ptr; /* addr of log descriptor ptr */
333 char *status_buf; /* optional i/o buffer */
334 rvm_options_t *rvm_options; /* optional options descriptor */
335 {
336 log_t *log; /* log descriptor ptr */
337 log_buf_t *log_buf; /* log buffer descriptor ptr */
338 device_t *dev; /* device descriptor ptr */
339 rvm_length_t flags = O_RDWR; /* device open flags */
340 rvm_options_t local_options; /* local options record */
341 rvm_return_t retval;
342
343 /* build internal log structure */
344 if ((log = make_log(dev_name,&retval)) == NULL)
345 goto err_exit2;
346 dev = &log->dev;
347 log_buf = &log->log_buf;
348
349 /* allocate recovery buffers */
350 if (rvm_options == NULL)
351 {
352 rvm_options = &local_options;
353 rvm_init_options(rvm_options);
354 }
355 if ((long)(rvm_options->recovery_buf_len) < MIN_RECOVERY_BUF_LEN)
356 rvm_options->recovery_buf_len = MIN_RECOVERY_BUF_LEN;
357 log_buf->length=ROUND_TO_PAGE_SIZE(rvm_options->recovery_buf_len);
358 log_buf->aux_length = ROUND_TO_PAGE_SIZE(log_buf->length/2);
359 if ((retval=alloc_log_buf(log)) != RVM_SUCCESS)
360 return retval;
361
362 /* open the device and determine characteristics */
363 if (rvm_no_update) flags = O_RDONLY;
364 if (open_dev(dev,flags,0) != 0)
365 {
366 retval = RVM_EIO;
367 goto err_exit2;
368 }
369 if (set_dev_char(dev,NULL) < 0)
370 {
371 retval = RVM_EIO;
372 goto err_exit;
373 }
374 if (dev->raw_io) dev->num_bytes = /* enought to read status area */
375 RVM_ADD_LENGTH_TO_OFFSET(raw_status_offset,
376 LOG_DEV_STATUS_SIZE);
377 /* open status area */
378 if ((retval=read_log_status(log,status_buf)) != RVM_SUCCESS)
379 {
380 if (rvm_utlsw) goto keep_log; /* keep damaged status */
381 goto err_exit;
382 }
383 log->status.trunc_state = 0;
384 log->status.flush_state = 0;
385
386 /* create daemon truncation thread */
387 if ((retval=set_truncate_options(log,rvm_options))
388 != RVM_SUCCESS) goto err_exit;
389 /* raw i/o support */
390 if (dev->raw_io)
391 {
392 /* assign gather write buffer */
393 if ((long)(rvm_options->flush_buf_len) < MIN_FLUSH_BUF_LEN)
394 rvm_options->flush_buf_len = MIN_FLUSH_BUF_LEN;
395 dev->wrt_buf_len =
396 ROUND_TO_PAGE_SIZE(rvm_options->flush_buf_len);
397 dev->wrt_buf = page_alloc(dev->wrt_buf_len);
398 if (dev->wrt_buf == NULL)
399 {
400 retval = RVM_ENO_MEMORY;
401 goto err_exit;
402 }
403 dev->buf_end = RVM_ADD_LENGTH_TO_ADDR(dev->wrt_buf,
404 dev->wrt_buf_len);
405
406 /* pre-load write buffer */
407 if ((retval=preload_wrt_buf(log)) != RVM_SUCCESS)
408 goto err_exit;
409 }
410
411 /* enter in log list*/
412 keep_log:
413 enter_log(log);
414 *log_ptr = log;
415 return retval;
416
417 err_exit:
418 (void)close_dev(dev);
419 err_exit2:
420 free_log(log);
421 *log_ptr = (log_t *)NULL;
422 return retval;
423 }
424 /* log options processing */
do_log_options(log_ptr,rvm_options)425 rvm_return_t do_log_options(log_ptr,rvm_options)
426 log_t **log_ptr; /* addr of log descriptor ptr */
427 rvm_options_t *rvm_options; /* ptr to rvm options descriptor */
428 {
429 rvm_return_t retval;
430 log_t *log = NULL;
431 char *log_dev;
432
433 if ((rvm_options == NULL) || (rvm_options->log_dev == NULL))
434 return RVM_SUCCESS;
435
436 /* see if need to build a log descriptor */
437 log_dev = rvm_options->log_dev;
438 if ((log=find_log(log_dev)) == NULL)
439 {
440 /* see if already have a log */
441 if (default_log != NULL)
442 return RVM_ELOG;
443
444 /* build log descriptor */
445 if ((retval=open_log(log_dev,&log,NULL,rvm_options))
446 != RVM_SUCCESS) {
447 printf("open_log failed.\n");
448 return retval;
449 }
450 /* do recovery processing for log */
451 log->in_recovery = rvm_true;
452 if ((retval = log_recover(log,&log->status.tot_recovery,
453 rvm_false,RVM_RECOVERY)) != RVM_SUCCESS) {
454 printf("log_recover failed.\n");
455 return retval;
456 }
457
458 /* pre-load write buffer with new tail sector */
459 if (log->dev.raw_io)
460 {
461 CRITICAL(log->dev_lock,retval=preload_wrt_buf(log));
462 if (retval != RVM_SUCCESS) {
463 return retval;
464 printf("preload_wrt_buff failed\n");
465 }
466 }
467 }
468
469 /* process options and return log descriptor if wanted */
470 retval = set_truncate_options(log,rvm_options);
471 if (log_ptr != NULL)
472 *log_ptr = log;
473
474 return retval;
475 }
476 /* accumulate running statistics totals */
copy_log_stats(log)477 void copy_log_stats(log)
478 log_t *log;
479 {
480 log_status_t *status = &log->status; /* status area descriptor */
481 rvm_length_t i;
482 rvm_offset_t temp;
483
484 assert(((&log->dev == &default_log->dev) && (!rvm_utlsw)) ?
485 (!LOCK_FREE(default_log->dev_lock)) : 1);
486
487 /* sum epoch counts */
488 status->tot_abort += status->n_abort;
489 status->n_abort = 0;
490 status->tot_flush_commit += status->n_flush_commit;
491 status->n_flush_commit = 0;
492 status->tot_no_flush_commit += status->n_no_flush_commit;
493 status->n_no_flush_commit = 0;
494 status->tot_split += status->n_split;
495 status->n_split = 0;
496 status->tot_flush += status->n_flush;
497 status->n_flush = 0;
498 status->tot_rvm_flush += status->n_rvm_flush;
499 status->n_rvm_flush = 0;
500 status->tot_special += status->n_special;
501 status->n_special = 0;
502 status->tot_truncation_wait += status->n_truncation_wait;
503 status->n_truncation_wait = 0;
504 status->tot_range_elim += status->n_range_elim;
505 status->n_range_elim = 0;
506 status->tot_trans_elim += status->n_trans_elim;
507 status->n_trans_elim = 0;
508 status->tot_trans_coalesced += status->n_trans_coalesced;
509 status->n_trans_coalesced = 0;
510 status->tot_range_overlap =
511 RVM_ADD_OFFSETS(status->tot_range_overlap,
512 status->range_overlap);
513 RVM_ZERO_OFFSET(status->range_overlap);
514 status->tot_trans_overlap =
515 RVM_ADD_OFFSETS(status->tot_trans_overlap,
516 status->trans_overlap);
517 RVM_ZERO_OFFSET(status->trans_overlap);
518
519 /* sum length of log writes */
520 log_tail_length(log,&temp);
521 status->tot_log_written = RVM_ADD_OFFSETS(status->tot_log_written,
522 status->log_size);
523 status->tot_log_written = RVM_SUB_OFFSETS(status->tot_log_written,
524 temp);
525 /* sum cumulative histograms and zero current */
526 for (i=0; i < flush_times_len; i++)
527 {
528 status->tot_flush_times[i] += status->flush_times[i];
529 status->flush_times[i] = 0;
530 }
531 status->tot_flush_time = add_times(&status->tot_flush_time,
532 &status->flush_time);
533 for (i=0; i < range_lengths_len; i++)
534 {
535 status->tot_range_lengths[i] += status->range_lengths[i];
536 status->range_lengths[i] = 0;
537 status->tot_range_overlaps[i] += status->range_overlaps[i];
538 status->range_overlaps[i] = 0;
539 status->tot_trans_overlaps[i] += status->trans_overlaps[i];
540 status->trans_overlaps[i] = 0;
541 }
542
543 for (i=0; i < range_elims_len; i++)
544 {
545 status->tot_range_elims[i] += status->range_elims[i];
546 status->range_elims[i] = 0;
547 status->tot_trans_elims[i] += status->trans_elims[i];
548 status->trans_elims[i] = 0;
549 }
550 ZERO_TIME(status->flush_time);
551 }
552 /* clear non-permenant log status area fields */
clear_log_status(log)553 void clear_log_status(log)
554 log_t *log;
555 {
556 log_status_t *status = &log->status; /* status area descriptor */
557
558 assert(((&log->dev == &default_log->dev) && (!rvm_utlsw)) ?
559 (!LOCK_FREE(default_log->dev_lock)) : 1);
560
561 status->valid = rvm_true;
562 status->log_empty = rvm_true;
563 status->first_rec_num = 0;
564 status->last_rec_num = 0;
565 ZERO_TIME(status->first_uname);
566 ZERO_TIME(status->last_uname);
567 ZERO_TIME(status->last_commit);
568 ZERO_TIME(status->first_write);
569 ZERO_TIME(status->last_write);
570 ZERO_TIME(status->wrap_time);
571 ZERO_TIME(status->flush_time);
572 RVM_ZERO_OFFSET(status->prev_log_head);
573 RVM_ZERO_OFFSET(status->prev_log_tail);
574
575 copy_log_stats(log);
576 }
577 /* log status block initialization */
init_log_status(log)578 rvm_return_t init_log_status(log)
579 log_t *log; /* log descriptor */
580 {
581 rvm_length_t i;
582 log_status_t *status = &log->status; /* status area descriptor */
583 rvm_offset_t *status_offset; /* offset of status area */
584
585 /* initialize boundaries & size */
586 if (log->dev.raw_io) status_offset = &raw_status_offset;
587 else status_offset = &file_status_offset;
588 status->log_start = RVM_ADD_LENGTH_TO_OFFSET(*status_offset,
589 LOG_DEV_STATUS_SIZE);
590 status->log_size = RVM_SUB_OFFSETS(log->dev.num_bytes,
591 status->log_start);
592
593 /* initialize head and tail pointers */
594 status->log_head = status->log_start;
595 #ifdef RVM_LOG_TAIL_BUG
596 unprotect_page__Fi(ClobberAddress);
597 #endif /* RVM_LOG_TAIL_BUG */
598 #ifdef RVM_LOG_TAIL_SHADOW
599 assert(RVM_OFFSET_EQL(log_tail_shadow,status->log_tail));
600 #endif /* RVM_LOG_TAIL_SHADOW */
601 status->log_tail = status->log_start;
602 #ifdef RVM_LOG_TAIL_SHADOW
603 RVM_ASSIGN_OFFSET(log_tail_shadow,status->log_tail);
604 #endif /* RVM_LOG_TAIL_SHADOW */
605 #ifdef RVM_LOG_TAIL_BUG
606 protect_page__Fi(ClobberAddress);
607 #endif /* RVM_LOG_TAIL_BUG */
608 RVM_ZERO_OFFSET(status->prev_log_head);
609 RVM_ZERO_OFFSET(status->prev_log_tail);
610
611 /* init status variables */
612 clear_log_status(log);
613 make_uname(&status->status_init); /* initialization timestamp */
614 status->last_trunc = status->status_init;
615 status->prev_trunc = status->status_init;
616 status->next_rec_num = 1;
617 status->log_dev_max = 0;
618 status->last_flush_time = 0;
619 status->last_truncation_time = 0;
620 status->last_tree_build_time = 0;
621 status->last_tree_apply_time = 0;
622
623 /* clear cumulative statistics */
624 status->tot_rvm_truncate = 0;
625 status->tot_async_truncation = 0;
626 status->tot_sync_truncation = 0;
627 status->tot_truncation_wait = 0;
628 status->tot_recovery = 0;
629 status->tot_abort = 0;
630 status->tot_flush_commit = 0;
631 status->tot_no_flush_commit = 0;
632 status->tot_split = 0;
633 status->tot_rvm_flush = 0;
634 status->tot_flush = 0;
635 status->tot_special = 0;
636 status->tot_wrap = 0;
637 status->tot_range_elim = 0;
638 status->tot_trans_elim = 0;
639 status->tot_trans_coalesced = 0;
640 RVM_ZERO_OFFSET(status->tot_range_overlap);
641 RVM_ZERO_OFFSET(status->tot_trans_overlap);
642 RVM_ZERO_OFFSET(status->tot_log_written);
643 /* clear timings and histograms */
644 ZERO_TIME(status->tot_flush_time);
645 ZERO_TIME(status->tot_truncation_time);
646 for (i=0; i < flush_times_len; i++)
647 status->tot_flush_times[i] = 0;
648 for (i=0; i < truncation_times_len; i++)
649 {
650 status->tot_tree_build_times[i] = 0;
651 status->tot_tree_apply_times[i] = 0;
652 status->tot_truncation_times[i] = 0;
653 }
654 for (i=0; i < range_lengths_len; i++)
655 {
656 status->tot_range_lengths[i] = 0;
657 status->tot_range_overlaps[i] = 0;
658 status->tot_trans_overlaps[i] = 0;
659 }
660 for (i=0; i < range_elims_len; i++)
661 {
662 status->tot_range_elims[i] = 0;
663 status->tot_trans_elims[i] = 0;
664 status->tot_trans_coalesces[i] = 0;
665 }
666
667 /* write the device areas */
668 return write_log_status(log,NULL);
669 }
670 /* read log status area from log device */
read_log_status(log,status_buf)671 rvm_return_t read_log_status(log,status_buf)
672 log_t *log; /* log descriptor */
673 char *status_buf; /* optional i/o buffer */
674 {
675 log_status_t *status = &log->status; /* status area descriptor */
676 rvm_offset_t *status_offset; /* device status area offset */
677 log_dev_status_t *dev_status; /* status i/o area typed ptr */
678 char status_io[LOG_DEV_STATUS_SIZE]; /* i/o buffer */
679 rvm_length_t saved_chk_sum; /* save area for checksum read */
680
681 /* read the status areas */
682 if (status_buf != NULL)
683 dev_status = (log_dev_status_t *)status_buf;
684 else {
685 BZERO(status_io, LOG_DEV_STATUS_SIZE); /* clear buffer */
686 dev_status = (log_dev_status_t *)status_io;
687 }
688 if (log->dev.raw_io) status_offset = &raw_status_offset;
689 else status_offset = &file_status_offset;
690 if (read_dev(&log->dev,status_offset,
691 dev_status,LOG_DEV_STATUS_SIZE) < 0)
692 return RVM_EIO;
693
694 /* save old checksum and compute new */
695 saved_chk_sum = dev_status->chk_sum;
696 dev_status->chk_sum = 0;
697 dev_status->chk_sum = chk_sum((char *)dev_status,
698 LOG_DEV_STATUS_SIZE);
699
700 /* copy to log descriptor */
701 (void)BCOPY(&dev_status->status,(char *)status,
702 sizeof(log_status_t));
703 status->valid = rvm_false; /* status not valid until tail found */
704
705 /* compare checksum, struct_id, and version */
706 if ((dev_status->chk_sum != saved_chk_sum)
707 || (dev_status->struct_id != log_dev_status_id))
708 return RVM_ELOG; /* status area damaged */
709 if (strcmp(dev_status->version,RVM_VERSION) != 0)
710 return RVM_ELOG_VERSION_SKEW;
711 if (strcmp(dev_status->log_version,RVM_LOG_VERSION) != 0)
712 return RVM_ELOG_VERSION_SKEW;
713 if (strcmp(dev_status->statistics_version,RVM_STATISTICS_VERSION) != 0)
714 return RVM_ESTAT_VERSION_SKEW;
715
716 /* set log device length to log size at creation */
717 if (log->dev.raw_io)
718 log->dev.num_bytes = RVM_ADD_OFFSETS(status->log_size,
719 status->log_start);
720 status->update_cnt = UPDATE_STATUS;
721 return RVM_SUCCESS;
722 }
723 /* write log status area on log device */
write_log_status(log,dev)724 rvm_return_t write_log_status(log,dev)
725 log_t *log;
726 device_t *dev; /* optional device */
727 {
728 log_status_t *status = &log->status; /* status area descriptor */
729 rvm_offset_t *status_offset; /* device status area offset */
730 log_dev_status_t *dev_status; /* status i/o area typed ptr */
731 char status_io[LOG_DEV_STATUS_SIZE]; /* i/o buffer */
732
733 /* initializations */
734 #ifdef RVM_LOG_TAIL_SHADOW
735 assert(RVM_OFFSET_EQL(log_tail_shadow,log->status.log_tail));
736 /* we'll check to see whether this log offest is before the
737 previous one. If so, assert. Some false assertions, but hey. */
738 if (last_log_valid == rvm_true) {
739 if (has_wrapped == rvm_true) {
740 /* this log value should be LESS than the previous one */
741 assert(RVM_OFFSET_GEQ(last_log_tail,log->status.log_tail));
742 /* We've accounted for the log_wrap; reset it. */
743 has_wrapped = rvm_false;
744 } else {
745 /* this log value should be GREATER than the previous one */
746 assert(RVM_OFFSET_LEQ(last_log_tail,log->status.log_tail));
747 }
748 } else {
749 last_log_valid = rvm_true;
750 }
751 RVM_ASSIGN_OFFSET(last_log_tail,log->status.log_tail);
752 #endif /* RVM_LOG_TAIL_SHADOW */
753 if (dev == NULL) dev = &log->dev;
754 (void) BZERO(status_io, LOG_DEV_STATUS_SIZE); /* clear buffer */
755
756 /* set up device status i/o area */
757 status->update_cnt = UPDATE_STATUS;
758 make_uname(&status->status_write);
759 dev_status = (log_dev_status_t *)status_io;
760 dev_status->struct_id = log_dev_status_id;
761 (void)BCOPY((char *)status,&dev_status->status,
762 sizeof(log_status_t));
763 (void)strcpy(dev_status->version,RVM_VERSION);
764 (void)strcpy(dev_status->log_version,RVM_LOG_VERSION);
765 (void)strcpy(dev_status->statistics_version,
766 RVM_STATISTICS_VERSION);
767
768 /* compute checksum */
769 dev_status->chk_sum = 0;
770 dev_status->chk_sum = chk_sum((char *)dev_status,
771 LOG_DEV_STATUS_SIZE);
772
773 /* write the status areas */
774 if (dev->raw_io) status_offset = &raw_status_offset;
775 else status_offset = &file_status_offset;
776 if (write_dev(dev,status_offset,dev_status,
777 LOG_DEV_STATUS_SIZE,SYNCH) < 0)
778 return RVM_EIO;
779
780 return RVM_SUCCESS;
781 }
782 /* consistency check for log head/tail ptrs */
chk_tail(log)783 static rvm_bool_t chk_tail(log)
784 log_t *log;
785 {
786 log_status_t *status = &log->status; /* status area descriptor */
787
788 /* basic range checks -- current epoch */
789 assert(RVM_OFFSET_GEQ(status->log_tail,status->log_start));
790 assert(RVM_OFFSET_LEQ(status->log_tail,log->dev.num_bytes));
791 assert(RVM_OFFSET_GEQ(status->log_head,status->log_start));
792 assert(RVM_OFFSET_LEQ(status->log_head,log->dev.num_bytes));
793
794 /* basic range checks -- previous epoch */
795 if (!RVM_OFFSET_EQL_ZERO(status->prev_log_head))
796 {
797 assert(RVM_OFFSET_EQL(status->log_head,
798 status->prev_log_tail));
799 assert(RVM_OFFSET_GEQ(status->prev_log_tail,
800 status->log_start));
801 assert(RVM_OFFSET_LEQ(status->prev_log_tail,
802 log->dev.num_bytes));
803 assert(RVM_OFFSET_GEQ(status->prev_log_head,
804 status->log_start));
805 assert(RVM_OFFSET_LEQ(status->prev_log_head,
806 log->dev.num_bytes));
807 assert(RVM_OFFSET_EQL(status->prev_log_tail,
808 status->log_head));
809 }
810 /* current <==> previous epoch consistency checks */
811 if (RVM_OFFSET_GTR(status->log_head,status->log_tail))
812 { /* current epoch wrapped */
813 assert(RVM_OFFSET_GEQ(status->log_head,status->log_tail));
814 if (!RVM_OFFSET_EQL_ZERO(status->prev_log_head))
815 { /* check previous epoch */
816 assert(RVM_OFFSET_LEQ(status->prev_log_head,
817 status->prev_log_tail));
818 assert(RVM_OFFSET_GEQ(status->prev_log_head,
819 status->log_tail));
820 assert(RVM_OFFSET_GEQ(status->prev_log_head,
821 status->log_tail));
822 }
823 }
824 else
825 { /* current epoch not wrapped */
826 if (!RVM_OFFSET_EQL_ZERO(status->prev_log_head))
827 { /* check previous epoch */
828 if (RVM_OFFSET_GTR(status->prev_log_head,
829 status->prev_log_tail))
830 { /* previous epoch wrapped */
831 assert(RVM_OFFSET_GTR(status->prev_log_head,
832 status->log_tail));
833 assert(RVM_OFFSET_GEQ(status->prev_log_head,
834 status->log_tail));
835 }
836 else
837 { /* previous epoch not wrapped */
838 assert(RVM_OFFSET_GTR(status->log_head,
839 status->prev_log_head));
840 }
841 }
842 }
843
844 /* raw i/o buffer checks */
845 if (log->dev.raw_io)
846 {
847 assert((SECTOR_INDEX((long)log->dev.ptr)) ==
848 (OFFSET_TO_SECTOR_INDEX(status->log_tail)));
849 }
850
851 return rvm_true;
852 }
update_log_tail(log,rec_hdr)853 rvm_return_t update_log_tail(log,rec_hdr)
854 log_t *log;
855 rec_hdr_t *rec_hdr; /* header of last record */
856 {
857 log_status_t *status = &log->status; /* status area descriptor */
858 rvm_length_t temp;
859
860 assert(((&log->dev == &default_log->dev) && (!rvm_utlsw)) ?
861 (!LOCK_FREE(default_log->dev_lock)) : 1);
862
863 /* update unique name timestamps */
864 status->last_write = rec_hdr->timestamp;
865 if (TIME_EQL_ZERO(status->first_write))
866 status->first_write = status->last_write;
867
868 status->log_empty = rvm_false;
869 if (rec_hdr->struct_id != log_wrap_id)
870 {
871 /* update and check tail length */
872 temp = rec_hdr->rec_length+sizeof(rec_end_t);
873 assert(temp == log->dev.io_length);
874 #ifdef RVM_LOG_TAIL_BUG
875 unprotect_page__Fi(ClobberAddress);
876 #endif /* RVM_LOG_TAIL_BUG */
877 #ifdef RVM_LOG_TAIL_SHADOW
878 assert(RVM_OFFSET_EQL(log_tail_shadow,status->log_tail));
879 #endif /* RVM_LOG_TAIL_SHADOW */
880 status->log_tail = RVM_ADD_LENGTH_TO_OFFSET(status->log_tail,
881 temp);
882 #ifdef RVM_LOG_TAIL_SHADOW
883 RVM_ASSIGN_OFFSET(log_tail_shadow,status->log_tail);
884 #endif /* RVM_LOG_TAIL_SHADOW */
885 #ifdef RVM_LOG_TAIL_BUG
886 protect_page__Fi(ClobberAddress);
887 #endif /* RVM_LOG_TAIL_BUG */
888 assert(chk_tail(log));
889
890 /* update unames if transaction */
891 if (rec_hdr->struct_id == trans_hdr_id)
892 {
893 status->last_uname = ((trans_hdr_t *)rec_hdr)->uname;
894 if (TIME_EQL_ZERO(status->first_uname))
895 status->first_uname = status->last_uname;
896 }
897
898 /* count updates & update disk copies if necessary */
899 if (--status->update_cnt != 0)
900 return RVM_SUCCESS;
901 }
902
903 if (sync_dev(&log->dev) < 0) /* sync file buffers before status write */
904 return RVM_EIO;
905
906 /* if tail wrapped around, correct pointers */
907 if (rec_hdr->struct_id == log_wrap_id)
908 {
909 #ifdef RVM_LOG_TAIL_BUG
910 unprotect_page__Fi(ClobberAddress);
911 #endif /* RVM_LOG_TAIL_BUG */
912 #ifdef RVM_LOG_TAIL_SHADOW
913 assert(RVM_OFFSET_EQL(log_tail_shadow,status->log_tail));
914 #endif /* RVM_LOG_TAIL_SHADOW */
915 status->log_tail = status->log_start;
916 #ifdef RVM_LOG_TAIL_SHADOW
917 RVM_ASSIGN_OFFSET(log_tail_shadow,status->log_tail);
918 #endif /* RVM_LOG_TAIL_SHADOW */
919 #ifdef RVM_LOG_TAIL_BUG
920 protect_page__Fi(ClobberAddress);
921 #endif /* RVM_LOG_TAIL_BUG */
922 log->dev.sync_offset = status->log_start;
923 assert(chk_tail(log));
924 }
925
926 return write_log_status(log,NULL); /* update disk status block */
927 }
928 /* determine total length of log tail area */
log_tail_length(log,tail_length)929 void log_tail_length(log,tail_length)
930 log_t *log; /* log descriptor */
931 rvm_offset_t *tail_length; /* length [out] */
932 {
933 log_status_t *status = &log->status; /* status area descriptor */
934 rvm_offset_t temp;
935
936 /* determine effective head */
937 if (!RVM_OFFSET_EQL_ZERO(status->prev_log_head))
938 temp = CHOP_OFFSET_TO_SECTOR_SIZE(status->prev_log_head);
939 else /* no previous epoch */
940 temp = CHOP_OFFSET_TO_SECTOR_SIZE(status->log_head);
941
942 /* determine usable area */
943 if (RVM_OFFSET_GEQ(status->log_tail,status->log_head) &&
944 RVM_OFFSET_GEQ(status->log_tail,status->prev_log_head))
945 {
946 /* current not wrapped & previous not wrapped */
947 *tail_length = RVM_SUB_OFFSETS(log->dev.num_bytes,
948 status->log_tail);
949 if (RVM_OFFSET_LSS(*tail_length,min_trans_size))
950 RVM_ZERO_OFFSET(*tail_length);
951 *tail_length = RVM_ADD_OFFSETS(*tail_length,temp);
952 *tail_length = RVM_SUB_OFFSETS(*tail_length,status->log_start);
953 }
954 else
955 /* all other cases */
956 *tail_length = RVM_SUB_OFFSETS(temp,status->log_tail);
957
958 }
959 /* determine length of log tail area usable in single write */
log_tail_sngl_w(log_t * log,rvm_offset_t * tail_length)960 void log_tail_sngl_w(log_t *log, rvm_offset_t *tail_length)
961 {
962 log_status_t *status = &log->status; /* status area descriptor */
963
964 /* determine effective head */
965 if (!RVM_OFFSET_EQL_ZERO(status->prev_log_head))
966 *tail_length = CHOP_OFFSET_TO_SECTOR_SIZE(status->prev_log_head);
967 else /* no previous epoch */
968 *tail_length = CHOP_OFFSET_TO_SECTOR_SIZE(status->log_head);
969
970 /* determine effective end of useable area if
971 neither current nor previous wrapped */
972 if (RVM_OFFSET_GEQ(status->log_tail,status->log_head) &&
973 RVM_OFFSET_GEQ(status->log_tail,status->prev_log_head))
974 *tail_length = log->dev.num_bytes;
975
976 /* subtract current current tail & verify log ptrs */
977 *tail_length = RVM_SUB_OFFSETS(*tail_length,status->log_tail);
978 assert(chk_tail(log));
979 }
980 /* determine length of log currently in use */
cur_log_length(log,length)981 void cur_log_length(log,length)
982 log_t *log; /* log descriptor */
983 rvm_offset_t *length; /* length [out] */
984 {
985 log_status_t *status = &log->status; /* log status area descriptor */
986
987 if (RVM_OFFSET_GEQ(status->log_tail,status->log_head))
988 *length = RVM_SUB_OFFSETS(status->log_tail,status->log_head);
989 else
990 {
991 *length = RVM_SUB_OFFSETS(log->dev.num_bytes,status->log_head);
992 *length = RVM_ADD_OFFSETS(*length,status->log_tail);
993 *length = RVM_SUB_OFFSETS(*length,status->log_start);
994 }
995 }
996
997 /* determine percentage of log currently in use */
cur_log_percent(log,space_needed)998 long cur_log_percent(log,space_needed)
999 log_t *log; /* log descriptor */
1000 rvm_offset_t *space_needed; /* space neded immediately */
1001 {
1002 log_status_t *status = &log->status; /* log status area descriptor */
1003 float cur_size; /* current size of log as float */
1004 rvm_length_t cur_percent; /* current franction of log used (%) */
1005 rvm_offset_t temp; /* log free space calculation temp */
1006
1007 CRITICAL(log->dev_lock, /* begin dev_lock crit sec */
1008 {
1009 /* find out how much space is there now & set high water mark */
1010 log_tail_length(log,&temp);
1011 temp = RVM_SUB_OFFSETS(status->log_size,temp);
1012 cur_size = OFFSET_TO_FLOAT(temp);
1013 cur_percent = (long)(100.0*(cur_size/
1014 OFFSET_TO_FLOAT(status->log_size)));
1015 assert((cur_percent >= 0) && (cur_percent <= 100));
1016 if (cur_percent > status->log_dev_max)
1017 status->log_dev_max = cur_percent;
1018
1019 /* if space_needed specified, recompute percentage */
1020 if (space_needed != NULL)
1021 {
1022 temp = RVM_ADD_OFFSETS(temp,*space_needed);
1023 cur_size = OFFSET_TO_FLOAT(temp);
1024 cur_percent = (long)(100.0*(cur_size/
1025 OFFSET_TO_FLOAT(status->log_size)));
1026 }
1027 }); /* end dev_lock crit sec */
1028
1029 return cur_percent;
1030 }
1031 /* rvm_create_log application interface */
rvm_create_log(rvm_options,log_len,mode)1032 rvm_return_t rvm_create_log(rvm_options,log_len,mode)
1033 rvm_options_t *rvm_options; /* ptr to options record */
1034 rvm_offset_t *log_len; /* length of log data area */
1035 long mode; /* file creation protection mode */
1036 {
1037 log_t *log; /* descriptor for log */
1038 rvm_offset_t offset; /* offset temporary */
1039 char *end_mark = "end";
1040 long save_errno;
1041 rvm_return_t retval;
1042
1043 if ((retval=bad_options(rvm_options,rvm_true)) != RVM_SUCCESS)
1044 return retval; /* bad options ptr or record */
1045 if (rvm_options == NULL)
1046 return RVM_EOPTIONS; /* must have an options record */
1047
1048 /* check length of file name */
1049 if (strlen(rvm_options->log_dev) >= MAXPATHLEN)
1050 return RVM_ENAME_TOO_LONG;
1051
1052 /* check that log file length is legal */
1053 offset = RVM_ADD_LENGTH_TO_OFFSET(*log_len,
1054 LOG_DEV_STATUS_SIZE+FILE_STATUS_OFFSET);
1055 offset = CHOP_OFFSET_TO_SECTOR_SIZE(offset);
1056 if (RVM_OFFSET_HIGH_BITS_TO_LENGTH(offset) != 0)
1057 return RVM_ETOO_BIG;
1058
1059 /* be sure not an already declared log */
1060 if (find_log(rvm_options->log_dev) != NULL)
1061 return RVM_ELOG;
1062
1063 /* build a log descriptor and create log file*/
1064 if ((log=make_log(rvm_options->log_dev,&retval)) == NULL)
1065 return retval;
1066 #ifdef RVM_LOG_TAIL_BUG
1067 /*
1068 We only need to track the log descriptor while we are
1069 building it. It isn't going to be inserted into the list
1070 until later, so ClobberAddress won't be set properly.
1071 */
1072 ClobberAddress = &(log->status.log_tail.low);
1073 protect_page__Fi(ClobberAddress);
1074 #endif /* RVM_LOG_TAIL_BUG */
1075 #ifdef RVM_LOG_TAIL_SHADOW
1076 RVM_ASSIGN_OFFSET(log_tail_shadow,log->status.log_tail);
1077 #endif /* RVM_LOG_TAIL_SHADOW */
1078 if (open_dev(&log->dev,O_WRONLY,mode) == 0) /* don't allow create yet */
1079 {
1080 retval = RVM_ELOG; /* error -- file already exists */
1081 goto err_exit;
1082 }
1083 if (errno != ENOENT)
1084 {
1085 retval = RVM_EIO; /* other i/o error, errno specifies */
1086 goto err_exit;
1087 }
1088 if (open_dev(&log->dev,O_WRONLY | O_CREAT,mode) != 0)
1089 { /* do real create */
1090 retval = RVM_EIO;
1091 goto err_exit;
1092 }
1093 /* force file length to specified size by writting last byte */
1094 log->dev.num_bytes = offset;
1095 offset = RVM_SUB_LENGTH_FROM_OFFSET(offset,strlen(end_mark));
1096 if (write_dev(&log->dev,&offset,end_mark,
1097 strlen(end_mark),NO_SYNCH) < 0)
1098 {
1099 retval = RVM_EIO;
1100 goto err_exit;
1101 }
1102
1103 /* complete initialization */
1104 retval = init_log_status(log);
1105
1106 err_exit:
1107 if (log->dev.handle != 0)
1108 {
1109 save_errno = errno;
1110 (void)close_dev(&log->dev);
1111 errno = save_errno;
1112 }
1113 #ifdef RVM_LOG_TAIL_BUG
1114 /* drop the "temporary" clobber address */
1115 unprotect_page__Fi(ClobberAddress);
1116 ClobberAddress = 0;
1117 #endif /* RVM_LOG_TAIL_BUG */
1118 #ifdef RVM_LOG_TAIL_SHADOW
1119 RVM_ZERO_OFFSET(log_tail_shadow);
1120 #endif /* RVM_LOG_TAIL_SHADOW */
1121 free_log(log);
1122
1123 return retval;
1124 }
1125 /* special routines for basher */
rvm_log_head()1126 rvm_offset_t rvm_log_head()
1127 {
1128 return default_log->status.log_head;
1129 }
1130
rvm_log_tail()1131 rvm_offset_t rvm_log_tail()
1132 {
1133 return default_log->status.log_tail;
1134 }
1135
rvm_next_rec_num()1136 rvm_length_t rvm_next_rec_num()
1137 {
1138 return default_log->status.next_rec_num;
1139 }
1140