1 /* BLURB lgpl
2 
3                            Coda File System
4                               Release 5
5 
6           Copyright (c) 1987-2016 Carnegie Mellon University
7                   Additional copyrights listed below
8 
9 This  code  is  distributed "AS IS" without warranty of any kind under
10 the  terms of the  GNU  Library General Public Licence  Version 2,  as
11 shown in the file LICENSE. The technical and financial contributors to
12 Coda are listed in the file CREDITS.
13 
14                         Additional copyrights
15                            none currently
16 
17 #*/
18 
19 /*
20 *
21 *                            RVM log status area support
22 *
23 */
24 #include <unistd.h>
25 #include <sys/file.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <fcntl.h>
29 #include <errno.h>
30 #include "rvm_private.h"
31 
32 #ifdef RVM_LOG_TAIL_BUG
33 #include <rvmtesting.h>
34 extern unsigned long *ClobberAddress;
35 #endif /* RVM_LOG_TAIL_BUG */
36 
37 /* global variables */
38 
39 rvm_bool_t          rvm_utlsw;          /* true iff RVM called by rvmutl,
40                                            permits certain structures to be
41                                            retained after errors are discovered
42                                            */
43 extern rvm_bool_t   rvm_no_update;      /* no segment or log update if true */
44 extern char         *rvm_errmsg;        /* internal error message buffer */
45 
46 extern rvm_length_t page_size;          /* system page size */
47 extern rvm_length_t page_mask;          /* mask for rounding down to page size */
48 extern rvm_length_t flush_times_vec[flush_times_len]; /* flush timing histogram defs */
49 extern rvm_length_t truncation_times_vec[truncation_times_len]; /* truncation timing
50                                                                    histogram defs */
51 extern rvm_length_t range_lengths_vec[range_lengths_len]; /* range length
52                                                              histogram defs */
53 extern rvm_length_t range_overlaps_vec[range_overlaps_len]; /* range coalesce
54                                                              histogram defs */
55 extern rvm_length_t trans_overlaps_vec[trans_overlaps_len]; /* trans coalesce
56                                                              histogram defs */
57 extern rvm_length_t range_elims_vec[range_elims_len]; /* ranges eliminated by range
58                                                          coalesce histogram defs */
59 extern rvm_length_t trans_elims_vec[trans_elims_len]; /* ranges eliminated by trans
60                                                          coalesce histogram defs */
61 extern rvm_length_t trans_coalesces_vec[trans_coalesces_len]; /* transactions
62                                                                  coalesed per flush */
63 
64 /* root of global log device list */
65 log_t               *default_log;       /* default log descriptor ptr */
66 
67 #ifdef RVM_LOG_TAIL_SHADOW
68 rvm_offset_t        log_tail_shadow;        /* shadow log tail pointer */
69 rvm_offset_t        last_log_tail;          /* last committed log tail value */
70 rvm_bool_t          last_log_valid = rvm_false; /* validity of last_log_tail */
71 rvm_bool_t          has_wrapped = rvm_false;    /* whether or not we wrapped */
72 char *log_tail_shadow_in_object = "Compiled with a shadow log tail offset\n";
73 #endif /* RVM_LOG_TAIL_SHADOW */
74 
75 /* locals */
76 
77 static RVM_MUTEX    log_root_lock;      /* for list header, links & default */
78 list_entry_t        log_root;           /* header for log descriptor list */
79 
80 static rvm_offset_t file_status_offset = /* log status area offset in files */
81     RVM_OFFSET_INITIALIZER(0,FILE_STATUS_OFFSET);
82 
83 static rvm_offset_t raw_status_offset = /* log status area offset in partitions */
84     RVM_OFFSET_INITIALIZER(0,RAW_STATUS_OFFSET);
85 
86 static rvm_offset_t min_trans_size =    /* minimum usable log size as offset */
87     RVM_OFFSET_INITIALIZER(0,MIN_TRANS_SIZE);
88 /* log_root initialization */
init_log_list()89 void init_log_list()
90     {
91     init_list_header(&log_root,log_id);
92     mutex_init(&log_root_lock);
93     default_log = (log_t *)NULL;
94     }
95 
96 /* enter new log in log list and establish default log if necessary */
97 /*
98   if we are looking for the RVM_LOG_TAIL_BUG, there can only ever
99   be one log.  I *believe* that it is possibly to only have one log
100   open at a time.  But, I'm not going to coda_assert that in the general
101   case -bnoble 7/30/94
102 */
103 
enter_log(log)104 void enter_log(log)
105     log_t           *log;               /* log descriptor */
106     {
107 
108     assert(log != NULL);
109 #ifdef RVM_LOG_TAIL_BUG
110     assert(default_log == NULL);
111 #endif /* RVM_LOG_TAIL_BUG */
112     CRITICAL(log_root_lock,
113         {
114         (void)move_list_entry(NULL,(list_entry_t *)&log_root,
115                               log);
116         if (default_log == NULL)
117             default_log = log;
118         });
119 
120 #ifdef RVM_LOG_TAIL_BUG
121     /*
122       this is massively unportable: for the moment, coda_assert we are
123       on pmax_mach.
124     */
125 #ifndef	__MACH__
126     assert(0);
127 #endif	/* __MACH__ */
128 #ifndef mips
129     assert(0);
130 #endif /* mips */
131     ClobberAddress = &(default_log->status.log_tail.low);
132     protect_page__Fi(ClobberAddress);
133 #endif /* RVM_LOG_TAIL_BUG */
134 #ifdef RVM_LOG_TAIL_SHADOW
135     RVM_ASSIGN_OFFSET(log_tail_shadow,default_log->status.log_tail);
136     RVM_ASSIGN_OFFSET(last_log_tail,log_tail_shadow);
137     last_log_valid = rvm_true;
138 #endif /* RVM_LOG_TAIL_SHADOW */
139 
140     }
141 
142 /* find an existing log -- returns descriptor ptr or NULL */
find_log(log_dev)143 static log_t *find_log(log_dev)
144     char            *log_dev;
145     {
146     log_t           *log;
147     char            *log_dev_fullname = log_dev;
148 
149     assert(log_dev != NULL);
150     CRITICAL(log_root_lock,
151         {
152         FOR_ENTRIES_OF(log_root,log_t,log)
153             if (strcmp(log->dev.name,log_dev_fullname) == 0)
154                 goto found;
155 
156         log = NULL;
157 found:;
158         });
159 
160     return log;
161     }
162 /* log daemon control */
163 
164 /* create daemon */
fork_daemon(log_t * log)165 static rvm_return_t fork_daemon(log_t *log)
166 {
167     log_daemon_t *daemon = &log->daemon; /* truncation daemon descriptor */
168 
169     /* create daemon thread */
170     if (daemon->thread == (cthread_t)NULL)
171     {
172 	mutex_lock(&daemon->lock);
173 	daemon->truncate = 0;
174         daemon->state = rvm_idle;
175         daemon->thread = cthread_fork(log_daemon, log);
176 	mutex_unlock(&daemon->lock);
177 
178         if (daemon->thread == (cthread_t)NULL)
179             return RVM_ELOG;
180     }
181     return RVM_SUCCESS;
182 }
183 
184 /* terminate daemon */
join_daemon(log)185 static rvm_return_t join_daemon(log)
186     log_t           *log;
187     {
188     log_daemon_t    *daemon = &log->daemon; /* truncation daemon descriptor */
189     rvm_return_t    retval = RVM_SUCCESS;
190 
191     if (daemon->thread != (cthread_t)NULL)
192         {
193         /* terminate the daemon */
194         CRITICAL(daemon->lock,          /* begin daemon lock crit sec */
195 	    {
196             if (daemon->state != error)
197 		{
198                 daemon->state = terminate;
199 		condition_signal(&daemon->code);
200 		}
201             });                         /* end daemon lock crit sec */
202 
203         /* wait for daemon thread to terminate */
204         retval = (rvm_return_t)cthread_join(daemon->thread);
205 #ifdef RVM_USELWP
206         while(daemon->thread) cthread_yield();
207 #endif
208 	daemon->thread = (cthread_t)NULL;
209         }
210     daemon->truncate = 0;
211 
212     return retval;
213     }
214 /* set log truncation options */
set_truncate_options(log,rvm_options)215 static rvm_return_t set_truncate_options(log,rvm_options)
216     log_t           *log;               /* log descriptor ptr */
217     rvm_options_t   *rvm_options;       /* optional options descriptor */
218     {
219     log_daemon_t    *daemon = &log->daemon; /* truncation daemon descriptor */
220     rvm_return_t    retval = RVM_SUCCESS;
221 
222     if (rvm_utlsw)                      /* no log options allowed */
223         return RVM_SUCCESS;
224 
225     /* set truncation threshold if parameter within range and
226        thread package installed */
227     if ((rvm_options->truncate > 0) && (rvm_options->truncate <= 100)
228         && (cthread_self() != (cthread_t)NULL))
229         {
230         /* update daemon thread */
231         retval = fork_daemon(log);      /* create daemon if necessary */
232         daemon->truncate = rvm_options->truncate;
233         }
234     else
235         retval = join_daemon(log);      /* terminate daemon */
236 
237     return retval;
238     }
239 /* close log device */
close_log(log)240 rvm_return_t close_log(log)
241     log_t           *log;
242     {
243     log_special_t   *special;
244     rvm_return_t    retval = RVM_SUCCESS;
245 
246     /* make sure all transactions ended */
247     CRITICAL(log->tid_list_lock,        /* begin tid_list_lock crit sec */
248         {
249         if (LIST_NOT_EMPTY(log->tid_list))
250             retval = RVM_EUNCOMMIT;
251         });                             /* end tid_list_lock crit sec */
252     if (retval != RVM_SUCCESS) return retval;
253 
254     /* issue terminate to daemon */
255     (void)join_daemon(log);             /* can we do something on error? */
256 
257     /* flush log and close */
258     CRITICAL(log->truncation_lock,
259         {
260         if ((retval=flush_log(log,&log->status.n_flush))
261             == RVM_SUCCESS)
262             CRITICAL(log->dev_lock,
263                 {
264                 if ((retval=write_log_status(log,NULL))
265                     == RVM_SUCCESS)
266                     if (close_dev(&log->dev) < 0)
267                         retval = RVM_EIO;
268                 });
269         });
270     if (retval != RVM_SUCCESS) return retval;
271     if (default_log == log) {
272 #ifdef RVM_LOG_TAIL_BUG
273 	unprotect_page__Fi(ClobberAddress);
274 	ClobberAddress = 0;
275 #endif /* RVM_LOG_TAIL_BUG */
276 #ifdef RVM_LOG_TAIL_SHADOW
277 	RVM_ZERO_OFFSET(log_tail_shadow);
278 	RVM_ZERO_OFFSET(last_log_tail);
279 	last_log_valid = rvm_false;
280 #endif /* RVM_LOG_TAIL_SHADOW */
281 	default_log = NULL;
282     }
283     /* kill unflushed log_special records */
284     UNLINK_ENTRIES_OF(log->special_list,log_special_t,special)
285         free_log_special(special);
286 
287     /* free descriptor */
288     free_log(log);
289 
290     return retval;
291     }
292 /* termination close of all log devices */
close_all_logs()293 rvm_return_t close_all_logs()
294     {
295     log_t           *log;               /* log device descriptor ptr */
296     rvm_return_t    retval = RVM_SUCCESS;
297 
298     /* cycle through log list */
299     CRITICAL(log_root_lock,             /* begin log_root_lock crit sec */
300         {
301         UNLINK_ENTRIES_OF(log_root,log_t,log)
302             {
303             if ((retval=close_log(log)) != RVM_SUCCESS)
304                 break;
305             }
306         });                             /* end log_root_lock crit sec */
307 
308     return retval;
309     }
310 /* pre-load log raw i/o gather write buffer with tail log sector */
preload_wrt_buf(log)311 static rvm_return_t preload_wrt_buf(log)
312     log_t           *log;               /* log descriptor */
313     {
314     device_t        *dev = &log->dev;   /* device descriptor ptr */
315     log_status_t    *status = &log->status; /* log status descriptor */
316     rvm_offset_t    tail_sector;        /* log tail sector */
317 
318     tail_sector = CHOP_OFFSET_TO_SECTOR_SIZE(status->log_tail);
319     if (read_dev(dev,&tail_sector,dev->wrt_buf,SECTOR_SIZE) < 0)
320         return RVM_EIO;
321 
322     dev->ptr = RVM_ADD_LENGTH_TO_ADDR(dev->wrt_buf,
323                    OFFSET_TO_SECTOR_INDEX(status->log_tail));
324     dev->buf_start = dev->ptr;
325     dev->sync_offset = status->log_tail;
326 
327     return RVM_SUCCESS;
328     }
329 /* create log descriptor and open log device */
open_log(dev_name,log_ptr,status_buf,rvm_options)330 rvm_return_t open_log(dev_name,log_ptr,status_buf,rvm_options)
331     char            *dev_name;          /* name of log storage device */
332     log_t           **log_ptr;          /* addr of log descriptor ptr */
333     char            *status_buf;        /* optional i/o buffer */
334     rvm_options_t   *rvm_options;       /* optional options descriptor */
335     {
336     log_t           *log;               /* log descriptor ptr */
337     log_buf_t       *log_buf;           /* log buffer descriptor ptr */
338     device_t        *dev;               /* device descriptor ptr */
339     rvm_length_t    flags = O_RDWR;     /* device open flags */
340     rvm_options_t   local_options;      /* local options record */
341     rvm_return_t    retval;
342 
343     /* build internal log structure */
344     if ((log = make_log(dev_name,&retval)) == NULL)
345         goto err_exit2;
346     dev = &log->dev;
347     log_buf = &log->log_buf;
348 
349     /* allocate recovery buffers */
350     if (rvm_options == NULL)
351         {
352         rvm_options = &local_options;
353         rvm_init_options(rvm_options);
354         }
355     if ((long)(rvm_options->recovery_buf_len) < MIN_RECOVERY_BUF_LEN)
356         rvm_options->recovery_buf_len = MIN_RECOVERY_BUF_LEN;
357     log_buf->length=ROUND_TO_PAGE_SIZE(rvm_options->recovery_buf_len);
358     log_buf->aux_length = ROUND_TO_PAGE_SIZE(log_buf->length/2);
359     if ((retval=alloc_log_buf(log)) != RVM_SUCCESS)
360         return retval;
361 
362     /* open the device and determine characteristics */
363     if (rvm_no_update) flags = O_RDONLY;
364     if (open_dev(dev,flags,0) != 0)
365         {
366         retval = RVM_EIO;
367         goto err_exit2;
368         }
369     if (set_dev_char(dev,NULL) < 0)
370         {
371         retval = RVM_EIO;
372         goto err_exit;
373         }
374     if (dev->raw_io) dev->num_bytes =   /* enought to read status area */
375         RVM_ADD_LENGTH_TO_OFFSET(raw_status_offset,
376                                  LOG_DEV_STATUS_SIZE);
377     /* open status area */
378     if ((retval=read_log_status(log,status_buf)) != RVM_SUCCESS)
379         {
380         if (rvm_utlsw) goto keep_log; /* keep damaged status */
381         goto err_exit;
382         }
383     log->status.trunc_state = 0;
384     log->status.flush_state = 0;
385 
386     /* create daemon truncation thread */
387     if ((retval=set_truncate_options(log,rvm_options))
388         != RVM_SUCCESS) goto err_exit;
389     /* raw i/o support */
390     if (dev->raw_io)
391         {
392         /* assign gather write buffer */
393         if ((long)(rvm_options->flush_buf_len) < MIN_FLUSH_BUF_LEN)
394             rvm_options->flush_buf_len = MIN_FLUSH_BUF_LEN;
395         dev->wrt_buf_len =
396             ROUND_TO_PAGE_SIZE(rvm_options->flush_buf_len);
397         dev->wrt_buf = page_alloc(dev->wrt_buf_len);
398         if (dev->wrt_buf == NULL)
399             {
400             retval = RVM_ENO_MEMORY;
401             goto err_exit;
402             }
403         dev->buf_end = RVM_ADD_LENGTH_TO_ADDR(dev->wrt_buf,
404                                               dev->wrt_buf_len);
405 
406         /* pre-load write buffer */
407         if ((retval=preload_wrt_buf(log)) != RVM_SUCCESS)
408             goto err_exit;
409         }
410 
411     /* enter in log list*/
412 keep_log:
413     enter_log(log);
414     *log_ptr = log;
415     return retval;
416 
417 err_exit:
418     (void)close_dev(dev);
419 err_exit2:
420     free_log(log);
421     *log_ptr = (log_t *)NULL;
422     return retval;
423     }
424 /* log options processing */
do_log_options(log_ptr,rvm_options)425 rvm_return_t do_log_options(log_ptr,rvm_options)
426     log_t           **log_ptr;          /* addr of log descriptor ptr */
427     rvm_options_t   *rvm_options;       /* ptr to rvm options descriptor */
428     {
429     rvm_return_t    retval;
430     log_t           *log = NULL;
431     char            *log_dev;
432 
433     if ((rvm_options == NULL) || (rvm_options->log_dev == NULL))
434         return RVM_SUCCESS;
435 
436     /* see if need to build a log descriptor */
437     log_dev = rvm_options->log_dev;
438     if ((log=find_log(log_dev)) == NULL)
439         {
440         /* see if already have a log */
441         if (default_log != NULL)
442             return RVM_ELOG;
443 
444         /* build log descriptor */
445         if ((retval=open_log(log_dev,&log,NULL,rvm_options))
446             != RVM_SUCCESS) {
447 		printf("open_log failed.\n");
448 		return retval;
449 	}
450         /* do recovery processing for log */
451         log->in_recovery = rvm_true;
452         if ((retval = log_recover(log,&log->status.tot_recovery,
453                                   rvm_false,RVM_RECOVERY)) != RVM_SUCCESS) {
454 		printf("log_recover failed.\n");
455 		return retval;
456 	}
457 
458         /* pre-load write buffer with new tail sector */
459         if (log->dev.raw_io)
460             {
461             CRITICAL(log->dev_lock,retval=preload_wrt_buf(log));
462             if (retval != RVM_SUCCESS) {
463 		    return retval;
464 		    printf("preload_wrt_buff failed\n");
465 	    }
466             }
467         }
468 
469     /* process options and return log descriptor if wanted */
470     retval = set_truncate_options(log,rvm_options);
471     if (log_ptr != NULL)
472         *log_ptr = log;
473 
474     return retval;
475     }
476 /* accumulate running statistics totals */
copy_log_stats(log)477 void copy_log_stats(log)
478     log_t           *log;
479     {
480     log_status_t    *status = &log->status; /* status area descriptor */
481     rvm_length_t    i;
482     rvm_offset_t    temp;
483 
484     assert(((&log->dev == &default_log->dev) && (!rvm_utlsw)) ?
485            (!LOCK_FREE(default_log->dev_lock)) : 1);
486 
487     /* sum epoch counts */
488     status->tot_abort += status->n_abort;
489     status->n_abort = 0;
490     status->tot_flush_commit += status->n_flush_commit;
491     status->n_flush_commit = 0;
492     status->tot_no_flush_commit += status->n_no_flush_commit;
493     status->n_no_flush_commit = 0;
494     status->tot_split += status->n_split;
495     status->n_split = 0;
496     status->tot_flush += status->n_flush;
497     status->n_flush = 0;
498     status->tot_rvm_flush += status->n_rvm_flush;
499     status->n_rvm_flush = 0;
500     status->tot_special += status->n_special;
501     status->n_special = 0;
502     status->tot_truncation_wait += status->n_truncation_wait;
503     status->n_truncation_wait = 0;
504     status->tot_range_elim += status->n_range_elim;
505     status->n_range_elim = 0;
506     status->tot_trans_elim += status->n_trans_elim;
507     status->n_trans_elim = 0;
508     status->tot_trans_coalesced += status->n_trans_coalesced;
509     status->n_trans_coalesced = 0;
510     status->tot_range_overlap =
511         RVM_ADD_OFFSETS(status->tot_range_overlap,
512                         status->range_overlap);
513     RVM_ZERO_OFFSET(status->range_overlap);
514     status->tot_trans_overlap =
515         RVM_ADD_OFFSETS(status->tot_trans_overlap,
516                         status->trans_overlap);
517     RVM_ZERO_OFFSET(status->trans_overlap);
518 
519     /* sum length of log writes */
520     log_tail_length(log,&temp);
521     status->tot_log_written = RVM_ADD_OFFSETS(status->tot_log_written,
522                                               status->log_size);
523     status->tot_log_written = RVM_SUB_OFFSETS(status->tot_log_written,
524                                               temp);
525     /* sum cumulative histograms and zero current */
526     for (i=0; i < flush_times_len; i++)
527         {
528         status->tot_flush_times[i] += status->flush_times[i];
529         status->flush_times[i] = 0;
530         }
531     status->tot_flush_time = add_times(&status->tot_flush_time,
532                                        &status->flush_time);
533     for (i=0; i < range_lengths_len; i++)
534         {
535         status->tot_range_lengths[i] += status->range_lengths[i];
536         status->range_lengths[i] = 0;
537         status->tot_range_overlaps[i] += status->range_overlaps[i];
538         status->range_overlaps[i] = 0;
539         status->tot_trans_overlaps[i] += status->trans_overlaps[i];
540         status->trans_overlaps[i] = 0;
541         }
542 
543     for (i=0; i < range_elims_len; i++)
544         {
545         status->tot_range_elims[i] += status->range_elims[i];
546         status->range_elims[i] = 0;
547         status->tot_trans_elims[i] += status->trans_elims[i];
548         status->trans_elims[i] = 0;
549         }
550     ZERO_TIME(status->flush_time);
551     }
552 /* clear non-permenant log status area fields */
clear_log_status(log)553 void clear_log_status(log)
554     log_t           *log;
555     {
556     log_status_t    *status = &log->status; /* status area descriptor */
557 
558     assert(((&log->dev == &default_log->dev) && (!rvm_utlsw)) ?
559            (!LOCK_FREE(default_log->dev_lock)) : 1);
560 
561     status->valid = rvm_true;
562     status->log_empty = rvm_true;
563     status->first_rec_num = 0;
564     status->last_rec_num = 0;
565     ZERO_TIME(status->first_uname);
566     ZERO_TIME(status->last_uname);
567     ZERO_TIME(status->last_commit);
568     ZERO_TIME(status->first_write);
569     ZERO_TIME(status->last_write);
570     ZERO_TIME(status->wrap_time);
571     ZERO_TIME(status->flush_time);
572     RVM_ZERO_OFFSET(status->prev_log_head);
573     RVM_ZERO_OFFSET(status->prev_log_tail);
574 
575     copy_log_stats(log);
576     }
577 /* log status block initialization */
init_log_status(log)578 rvm_return_t init_log_status(log)
579     log_t           *log;               /* log descriptor */
580     {
581     rvm_length_t    i;
582     log_status_t    *status = &log->status; /* status area descriptor */
583     rvm_offset_t    *status_offset;     /* offset of status area */
584 
585     /* initialize boundaries & size */
586     if (log->dev.raw_io) status_offset = &raw_status_offset;
587     else status_offset = &file_status_offset;
588     status->log_start = RVM_ADD_LENGTH_TO_OFFSET(*status_offset,
589                                                  LOG_DEV_STATUS_SIZE);
590     status->log_size = RVM_SUB_OFFSETS(log->dev.num_bytes,
591                                        status->log_start);
592 
593     /* initialize head and tail pointers */
594     status->log_head = status->log_start;
595 #ifdef RVM_LOG_TAIL_BUG
596     unprotect_page__Fi(ClobberAddress);
597 #endif /* RVM_LOG_TAIL_BUG */
598 #ifdef RVM_LOG_TAIL_SHADOW
599     assert(RVM_OFFSET_EQL(log_tail_shadow,status->log_tail));
600 #endif /* RVM_LOG_TAIL_SHADOW */
601     status->log_tail = status->log_start;
602 #ifdef RVM_LOG_TAIL_SHADOW
603 	RVM_ASSIGN_OFFSET(log_tail_shadow,status->log_tail);
604 #endif /* RVM_LOG_TAIL_SHADOW */
605 #ifdef RVM_LOG_TAIL_BUG
606     protect_page__Fi(ClobberAddress);
607 #endif /* RVM_LOG_TAIL_BUG */
608     RVM_ZERO_OFFSET(status->prev_log_head);
609     RVM_ZERO_OFFSET(status->prev_log_tail);
610 
611     /* init status variables */
612     clear_log_status(log);
613     make_uname(&status->status_init);   /* initialization timestamp */
614     status->last_trunc = status->status_init;
615     status->prev_trunc = status->status_init;
616     status->next_rec_num = 1;
617     status->log_dev_max = 0;
618     status->last_flush_time = 0;
619     status->last_truncation_time = 0;
620     status->last_tree_build_time = 0;
621     status->last_tree_apply_time = 0;
622 
623     /* clear cumulative statistics */
624     status->tot_rvm_truncate = 0;
625     status->tot_async_truncation = 0;
626     status->tot_sync_truncation = 0;
627     status->tot_truncation_wait = 0;
628     status->tot_recovery = 0;
629     status->tot_abort = 0;
630     status->tot_flush_commit = 0;
631     status->tot_no_flush_commit = 0;
632     status->tot_split = 0;
633     status->tot_rvm_flush = 0;
634     status->tot_flush = 0;
635     status->tot_special = 0;
636     status->tot_wrap = 0;
637     status->tot_range_elim = 0;
638     status->tot_trans_elim = 0;
639     status->tot_trans_coalesced = 0;
640     RVM_ZERO_OFFSET(status->tot_range_overlap);
641     RVM_ZERO_OFFSET(status->tot_trans_overlap);
642     RVM_ZERO_OFFSET(status->tot_log_written);
643     /* clear timings and histograms */
644     ZERO_TIME(status->tot_flush_time);
645     ZERO_TIME(status->tot_truncation_time);
646     for (i=0; i < flush_times_len; i++)
647         status->tot_flush_times[i] = 0;
648     for (i=0; i < truncation_times_len; i++)
649         {
650         status->tot_tree_build_times[i] = 0;
651         status->tot_tree_apply_times[i] = 0;
652         status->tot_truncation_times[i] = 0;
653         }
654     for (i=0; i < range_lengths_len; i++)
655         {
656         status->tot_range_lengths[i] = 0;
657         status->tot_range_overlaps[i] = 0;
658         status->tot_trans_overlaps[i] = 0;
659         }
660     for (i=0; i < range_elims_len; i++)
661         {
662         status->tot_range_elims[i] = 0;
663         status->tot_trans_elims[i] = 0;
664         status->tot_trans_coalesces[i] = 0;
665         }
666 
667     /* write the device areas */
668     return write_log_status(log,NULL);
669     }
670 /* read log status area from log device */
read_log_status(log,status_buf)671 rvm_return_t read_log_status(log,status_buf)
672     log_t               *log;           /* log descriptor */
673     char                *status_buf;    /* optional i/o buffer */
674     {
675     log_status_t        *status = &log->status; /* status area descriptor */
676     rvm_offset_t        *status_offset; /* device status area offset */
677     log_dev_status_t    *dev_status;    /* status i/o area typed ptr */
678     char                status_io[LOG_DEV_STATUS_SIZE]; /* i/o buffer */
679     rvm_length_t        saved_chk_sum;  /* save area for checksum read */
680 
681     /* read the status areas */
682     if (status_buf != NULL)
683         dev_status = (log_dev_status_t *)status_buf;
684     else {
685 	BZERO(status_io, LOG_DEV_STATUS_SIZE); /* clear buffer */
686         dev_status = (log_dev_status_t *)status_io;
687     }
688     if (log->dev.raw_io) status_offset = &raw_status_offset;
689     else status_offset = &file_status_offset;
690     if (read_dev(&log->dev,status_offset,
691                   dev_status,LOG_DEV_STATUS_SIZE) < 0)
692         return RVM_EIO;
693 
694     /* save old checksum and compute new */
695     saved_chk_sum = dev_status->chk_sum;
696     dev_status->chk_sum = 0;
697     dev_status->chk_sum = chk_sum((char *)dev_status,
698                                   LOG_DEV_STATUS_SIZE);
699 
700     /* copy to log descriptor */
701     (void)BCOPY(&dev_status->status,(char *)status,
702                 sizeof(log_status_t));
703     status->valid = rvm_false;          /* status not valid until tail found */
704 
705     /* compare checksum, struct_id, and version */
706     if ((dev_status->chk_sum != saved_chk_sum)
707         || (dev_status->struct_id != log_dev_status_id))
708         return RVM_ELOG;                /* status area damaged */
709     if (strcmp(dev_status->version,RVM_VERSION) != 0)
710         return RVM_ELOG_VERSION_SKEW;
711     if (strcmp(dev_status->log_version,RVM_LOG_VERSION) != 0)
712         return RVM_ELOG_VERSION_SKEW;
713     if (strcmp(dev_status->statistics_version,RVM_STATISTICS_VERSION) != 0)
714         return RVM_ESTAT_VERSION_SKEW;
715 
716     /* set log device length to log size at creation */
717     if (log->dev.raw_io)
718         log->dev.num_bytes = RVM_ADD_OFFSETS(status->log_size,
719                                              status->log_start);
720     status->update_cnt = UPDATE_STATUS;
721     return RVM_SUCCESS;
722     }
723 /* write log status area on log device */
write_log_status(log,dev)724 rvm_return_t write_log_status(log,dev)
725     log_t               *log;
726     device_t            *dev;           /* optional device */
727     {
728     log_status_t        *status = &log->status; /* status area descriptor */
729     rvm_offset_t        *status_offset; /* device status area offset */
730     log_dev_status_t    *dev_status;    /* status i/o area typed ptr */
731     char                status_io[LOG_DEV_STATUS_SIZE]; /* i/o buffer */
732 
733     /* initializations */
734 #ifdef RVM_LOG_TAIL_SHADOW
735     assert(RVM_OFFSET_EQL(log_tail_shadow,log->status.log_tail));
736     /* we'll check to see whether this log offest is before the
737        previous one.  If so, assert.  Some false assertions, but hey. */
738     if (last_log_valid == rvm_true) {
739 	if (has_wrapped == rvm_true) {
740 	    /* this log value should be LESS than the previous one */
741 	    assert(RVM_OFFSET_GEQ(last_log_tail,log->status.log_tail));
742 	    /* We've accounted for the log_wrap; reset it. */
743 	    has_wrapped = rvm_false;
744 	} else {
745 	    /* this log value should be GREATER than the previous one */
746 	    assert(RVM_OFFSET_LEQ(last_log_tail,log->status.log_tail));
747 	}
748     } else {
749 	last_log_valid = rvm_true;
750     }
751     RVM_ASSIGN_OFFSET(last_log_tail,log->status.log_tail);
752 #endif /* RVM_LOG_TAIL_SHADOW */
753     if (dev == NULL) dev = &log->dev;
754     (void) BZERO(status_io, LOG_DEV_STATUS_SIZE); /* clear buffer */
755 
756     /* set up device status i/o area */
757     status->update_cnt = UPDATE_STATUS;
758     make_uname(&status->status_write);
759     dev_status = (log_dev_status_t *)status_io;
760     dev_status->struct_id = log_dev_status_id;
761     (void)BCOPY((char *)status,&dev_status->status,
762                 sizeof(log_status_t));
763     (void)strcpy(dev_status->version,RVM_VERSION);
764     (void)strcpy(dev_status->log_version,RVM_LOG_VERSION);
765     (void)strcpy(dev_status->statistics_version,
766                  RVM_STATISTICS_VERSION);
767 
768     /* compute checksum */
769     dev_status->chk_sum = 0;
770     dev_status->chk_sum = chk_sum((char *)dev_status,
771                                   LOG_DEV_STATUS_SIZE);
772 
773     /* write the status areas */
774     if (dev->raw_io) status_offset = &raw_status_offset;
775     else status_offset = &file_status_offset;
776     if (write_dev(dev,status_offset,dev_status,
777                   LOG_DEV_STATUS_SIZE,SYNCH) < 0)
778         return RVM_EIO;
779 
780     return RVM_SUCCESS;
781     }
782 /* consistency check for log head/tail ptrs */
chk_tail(log)783 static rvm_bool_t chk_tail(log)
784     log_t           *log;
785     {
786     log_status_t    *status = &log->status; /* status area descriptor */
787 
788     /* basic range checks -- current epoch */
789     assert(RVM_OFFSET_GEQ(status->log_tail,status->log_start));
790     assert(RVM_OFFSET_LEQ(status->log_tail,log->dev.num_bytes));
791     assert(RVM_OFFSET_GEQ(status->log_head,status->log_start));
792     assert(RVM_OFFSET_LEQ(status->log_head,log->dev.num_bytes));
793 
794     /* basic range checks -- previous epoch */
795     if (!RVM_OFFSET_EQL_ZERO(status->prev_log_head))
796         {
797         assert(RVM_OFFSET_EQL(status->log_head,
798                               status->prev_log_tail));
799         assert(RVM_OFFSET_GEQ(status->prev_log_tail,
800                               status->log_start));
801         assert(RVM_OFFSET_LEQ(status->prev_log_tail,
802                               log->dev.num_bytes));
803         assert(RVM_OFFSET_GEQ(status->prev_log_head,
804                               status->log_start));
805         assert(RVM_OFFSET_LEQ(status->prev_log_head,
806                               log->dev.num_bytes));
807         assert(RVM_OFFSET_EQL(status->prev_log_tail,
808                               status->log_head));
809         }
810     /* current <==> previous epoch consistency checks */
811     if (RVM_OFFSET_GTR(status->log_head,status->log_tail))
812         {                               /* current epoch wrapped */
813         assert(RVM_OFFSET_GEQ(status->log_head,status->log_tail));
814         if (!RVM_OFFSET_EQL_ZERO(status->prev_log_head))
815             {                           /* check previous epoch */
816             assert(RVM_OFFSET_LEQ(status->prev_log_head,
817                                   status->prev_log_tail));
818             assert(RVM_OFFSET_GEQ(status->prev_log_head,
819                                   status->log_tail));
820             assert(RVM_OFFSET_GEQ(status->prev_log_head,
821                                   status->log_tail));
822             }
823         }
824     else
825         {                               /* current epoch not wrapped */
826         if (!RVM_OFFSET_EQL_ZERO(status->prev_log_head))
827             {                           /* check previous epoch */
828             if (RVM_OFFSET_GTR(status->prev_log_head,
829                                status->prev_log_tail))
830                 {                       /* previous epoch wrapped */
831                 assert(RVM_OFFSET_GTR(status->prev_log_head,
832                                       status->log_tail));
833                 assert(RVM_OFFSET_GEQ(status->prev_log_head,
834                                       status->log_tail));
835                 }
836             else
837                 {                       /* previous epoch not wrapped */
838                 assert(RVM_OFFSET_GTR(status->log_head,
839                                       status->prev_log_head));
840                 }
841             }
842         }
843 
844     /* raw i/o buffer checks */
845     if (log->dev.raw_io)
846         {
847         assert((SECTOR_INDEX((long)log->dev.ptr)) ==
848                (OFFSET_TO_SECTOR_INDEX(status->log_tail)));
849         }
850 
851     return rvm_true;
852     }
update_log_tail(log,rec_hdr)853 rvm_return_t update_log_tail(log,rec_hdr)
854     log_t           *log;
855     rec_hdr_t       *rec_hdr;           /* header of last record */
856     {
857     log_status_t    *status = &log->status; /* status area descriptor */
858     rvm_length_t    temp;
859 
860     assert(((&log->dev == &default_log->dev) && (!rvm_utlsw)) ?
861            (!LOCK_FREE(default_log->dev_lock)) : 1);
862 
863     /* update unique name timestamps */
864     status->last_write = rec_hdr->timestamp;
865     if (TIME_EQL_ZERO(status->first_write))
866         status->first_write = status->last_write;
867 
868     status->log_empty = rvm_false;
869     if (rec_hdr->struct_id != log_wrap_id)
870         {
871         /* update and check tail length */
872         temp = rec_hdr->rec_length+sizeof(rec_end_t);
873         assert(temp == log->dev.io_length);
874 #ifdef RVM_LOG_TAIL_BUG
875 	unprotect_page__Fi(ClobberAddress);
876 #endif /* RVM_LOG_TAIL_BUG */
877 #ifdef RVM_LOG_TAIL_SHADOW
878     assert(RVM_OFFSET_EQL(log_tail_shadow,status->log_tail));
879 #endif /* RVM_LOG_TAIL_SHADOW */
880         status->log_tail = RVM_ADD_LENGTH_TO_OFFSET(status->log_tail,
881                                                     temp);
882 #ifdef RVM_LOG_TAIL_SHADOW
883 	RVM_ASSIGN_OFFSET(log_tail_shadow,status->log_tail);
884 #endif /* RVM_LOG_TAIL_SHADOW */
885 #ifdef RVM_LOG_TAIL_BUG
886 	protect_page__Fi(ClobberAddress);
887 #endif /* RVM_LOG_TAIL_BUG */
888         assert(chk_tail(log));
889 
890         /* update unames if transaction */
891         if (rec_hdr->struct_id == trans_hdr_id)
892             {
893             status->last_uname = ((trans_hdr_t *)rec_hdr)->uname;
894             if (TIME_EQL_ZERO(status->first_uname))
895                 status->first_uname = status->last_uname;
896             }
897 
898         /* count updates & update disk copies if necessary */
899         if (--status->update_cnt != 0)
900             return RVM_SUCCESS;
901         }
902 
903     if (sync_dev(&log->dev) < 0)        /* sync file buffers before status write */
904         return RVM_EIO;
905 
906     /* if tail wrapped around, correct pointers */
907     if (rec_hdr->struct_id == log_wrap_id)
908         {
909 #ifdef RVM_LOG_TAIL_BUG
910         unprotect_page__Fi(ClobberAddress);
911 #endif /* RVM_LOG_TAIL_BUG */
912 #ifdef RVM_LOG_TAIL_SHADOW
913     assert(RVM_OFFSET_EQL(log_tail_shadow,status->log_tail));
914 #endif /* RVM_LOG_TAIL_SHADOW */
915         status->log_tail = status->log_start;
916 #ifdef RVM_LOG_TAIL_SHADOW
917 	RVM_ASSIGN_OFFSET(log_tail_shadow,status->log_tail);
918 #endif /* RVM_LOG_TAIL_SHADOW */
919 #ifdef RVM_LOG_TAIL_BUG
920         protect_page__Fi(ClobberAddress);
921 #endif /* RVM_LOG_TAIL_BUG */
922         log->dev.sync_offset = status->log_start;
923         assert(chk_tail(log));
924         }
925 
926     return write_log_status(log,NULL);  /* update disk status block */
927     }
928 /* determine total length of log tail area */
log_tail_length(log,tail_length)929 void log_tail_length(log,tail_length)
930     log_t           *log;               /* log descriptor */
931     rvm_offset_t    *tail_length;       /* length [out] */
932     {
933     log_status_t    *status = &log->status; /* status area descriptor */
934     rvm_offset_t    temp;
935 
936     /* determine effective head */
937     if (!RVM_OFFSET_EQL_ZERO(status->prev_log_head))
938         temp = CHOP_OFFSET_TO_SECTOR_SIZE(status->prev_log_head);
939     else                                /* no previous epoch */
940         temp = CHOP_OFFSET_TO_SECTOR_SIZE(status->log_head);
941 
942     /* determine usable area */
943     if (RVM_OFFSET_GEQ(status->log_tail,status->log_head) &&
944         RVM_OFFSET_GEQ(status->log_tail,status->prev_log_head))
945         {
946         /* current not wrapped & previous not wrapped */
947         *tail_length = RVM_SUB_OFFSETS(log->dev.num_bytes,
948                                        status->log_tail);
949         if (RVM_OFFSET_LSS(*tail_length,min_trans_size))
950             RVM_ZERO_OFFSET(*tail_length);
951         *tail_length = RVM_ADD_OFFSETS(*tail_length,temp);
952         *tail_length = RVM_SUB_OFFSETS(*tail_length,status->log_start);
953         }
954     else
955         /* all other cases */
956         *tail_length = RVM_SUB_OFFSETS(temp,status->log_tail);
957 
958     }
959 /* determine length of log tail area usable in single write */
log_tail_sngl_w(log_t * log,rvm_offset_t * tail_length)960 void log_tail_sngl_w(log_t *log, rvm_offset_t *tail_length)
961 {
962     log_status_t    *status = &log->status; /* status area descriptor */
963 
964     /* determine effective head */
965     if (!RVM_OFFSET_EQL_ZERO(status->prev_log_head))
966         *tail_length = CHOP_OFFSET_TO_SECTOR_SIZE(status->prev_log_head);
967     else                            /* no previous epoch */
968         *tail_length = CHOP_OFFSET_TO_SECTOR_SIZE(status->log_head);
969 
970     /* determine effective end of useable area if
971         neither current nor previous wrapped */
972     if (RVM_OFFSET_GEQ(status->log_tail,status->log_head) &&
973         RVM_OFFSET_GEQ(status->log_tail,status->prev_log_head))
974         *tail_length = log->dev.num_bytes;
975 
976     /* subtract current current tail & verify log ptrs */
977     *tail_length = RVM_SUB_OFFSETS(*tail_length,status->log_tail);
978     assert(chk_tail(log));
979 }
980 /* determine length of log currently in use */
cur_log_length(log,length)981 void cur_log_length(log,length)
982     log_t           *log;               /* log descriptor */
983     rvm_offset_t    *length;            /* length [out] */
984     {
985     log_status_t    *status = &log->status; /* log status area descriptor */
986 
987     if (RVM_OFFSET_GEQ(status->log_tail,status->log_head))
988         *length = RVM_SUB_OFFSETS(status->log_tail,status->log_head);
989     else
990         {
991         *length = RVM_SUB_OFFSETS(log->dev.num_bytes,status->log_head);
992         *length = RVM_ADD_OFFSETS(*length,status->log_tail);
993         *length = RVM_SUB_OFFSETS(*length,status->log_start);
994         }
995     }
996 
997 /* determine percentage of log currently in use */
cur_log_percent(log,space_needed)998 long cur_log_percent(log,space_needed)
999     log_t           *log;               /* log descriptor */
1000     rvm_offset_t    *space_needed;      /* space neded immediately */
1001     {
1002     log_status_t    *status = &log->status; /* log status area descriptor */
1003     float           cur_size;           /* current size of log as float */
1004     rvm_length_t    cur_percent;        /* current franction of log used (%) */
1005     rvm_offset_t    temp;               /* log free space calculation temp */
1006 
1007     CRITICAL(log->dev_lock,             /* begin dev_lock crit sec */
1008         {
1009         /* find out how much space is there now & set high water mark */
1010         log_tail_length(log,&temp);
1011         temp = RVM_SUB_OFFSETS(status->log_size,temp);
1012         cur_size = OFFSET_TO_FLOAT(temp);
1013         cur_percent = (long)(100.0*(cur_size/
1014                                     OFFSET_TO_FLOAT(status->log_size)));
1015         assert((cur_percent >= 0) && (cur_percent <= 100));
1016         if (cur_percent > status->log_dev_max)
1017             status->log_dev_max = cur_percent;
1018 
1019         /* if space_needed specified, recompute percentage */
1020         if (space_needed != NULL)
1021             {
1022             temp = RVM_ADD_OFFSETS(temp,*space_needed);
1023             cur_size = OFFSET_TO_FLOAT(temp);
1024             cur_percent = (long)(100.0*(cur_size/
1025                                     OFFSET_TO_FLOAT(status->log_size)));
1026             }
1027         });                             /* end dev_lock crit sec */
1028 
1029     return cur_percent;
1030     }
1031 /* rvm_create_log application interface */
rvm_create_log(rvm_options,log_len,mode)1032 rvm_return_t rvm_create_log(rvm_options,log_len,mode)
1033     rvm_options_t   *rvm_options;       /* ptr to options record */
1034     rvm_offset_t    *log_len;           /* length of log data area */
1035     long            mode;               /* file creation protection mode */
1036     {
1037     log_t           *log;               /* descriptor for log */
1038     rvm_offset_t    offset;             /* offset temporary */
1039     char            *end_mark = "end";
1040     long            save_errno;
1041     rvm_return_t    retval;
1042 
1043     if ((retval=bad_options(rvm_options,rvm_true)) != RVM_SUCCESS)
1044         return retval;                  /* bad options ptr or record */
1045     if (rvm_options == NULL)
1046         return RVM_EOPTIONS;            /* must have an options record */
1047 
1048     /* check length of file name */
1049     if (strlen(rvm_options->log_dev) >= MAXPATHLEN)
1050         return RVM_ENAME_TOO_LONG;
1051 
1052     /* check that log file length is legal */
1053     offset = RVM_ADD_LENGTH_TO_OFFSET(*log_len,
1054     	    	 LOG_DEV_STATUS_SIZE+FILE_STATUS_OFFSET);
1055     offset = CHOP_OFFSET_TO_SECTOR_SIZE(offset);
1056     if (RVM_OFFSET_HIGH_BITS_TO_LENGTH(offset) != 0)
1057         return RVM_ETOO_BIG;
1058 
1059     /* be sure not an already declared log */
1060     if (find_log(rvm_options->log_dev) != NULL)
1061         return RVM_ELOG;
1062 
1063     /* build a log descriptor and create log file*/
1064     if ((log=make_log(rvm_options->log_dev,&retval)) == NULL)
1065         return retval;
1066 #ifdef RVM_LOG_TAIL_BUG
1067     /*
1068       We only need to track the log descriptor while we are
1069       building it.  It isn't going to be inserted into the list
1070       until later, so ClobberAddress won't be set properly.
1071     */
1072     ClobberAddress = &(log->status.log_tail.low);
1073     protect_page__Fi(ClobberAddress);
1074 #endif /* RVM_LOG_TAIL_BUG */
1075 #ifdef RVM_LOG_TAIL_SHADOW
1076     RVM_ASSIGN_OFFSET(log_tail_shadow,log->status.log_tail);
1077 #endif /* RVM_LOG_TAIL_SHADOW */
1078     if (open_dev(&log->dev,O_WRONLY,mode) == 0) /* don't allow create yet */
1079         {
1080         retval = RVM_ELOG;              /* error -- file already exists */
1081         goto err_exit;
1082         }
1083     if (errno != ENOENT)
1084         {
1085         retval = RVM_EIO;               /* other i/o error, errno specifies */
1086         goto err_exit;
1087         }
1088     if (open_dev(&log->dev,O_WRONLY | O_CREAT,mode) != 0)
1089         {                               /* do real create */
1090         retval = RVM_EIO;
1091         goto err_exit;
1092         }
1093     /* force file length to specified size by writting last byte */
1094     log->dev.num_bytes = offset;
1095     offset = RVM_SUB_LENGTH_FROM_OFFSET(offset,strlen(end_mark));
1096     if (write_dev(&log->dev,&offset,end_mark,
1097                   strlen(end_mark),NO_SYNCH) < 0)
1098         {
1099         retval = RVM_EIO;
1100         goto err_exit;
1101         }
1102 
1103     /* complete initialization */
1104     retval = init_log_status(log);
1105 
1106 err_exit:
1107     if (log->dev.handle != 0)
1108         {
1109         save_errno = errno;
1110         (void)close_dev(&log->dev);
1111         errno = save_errno;
1112         }
1113 #ifdef RVM_LOG_TAIL_BUG
1114     /* drop the "temporary" clobber address */
1115     unprotect_page__Fi(ClobberAddress);
1116     ClobberAddress = 0;
1117 #endif /* RVM_LOG_TAIL_BUG */
1118 #ifdef RVM_LOG_TAIL_SHADOW
1119     RVM_ZERO_OFFSET(log_tail_shadow);
1120 #endif /* RVM_LOG_TAIL_SHADOW */
1121     free_log(log);
1122 
1123     return retval;
1124     }
1125 /* special routines for basher */
rvm_log_head()1126 rvm_offset_t rvm_log_head()
1127     {
1128     return default_log->status.log_head;
1129     }
1130 
rvm_log_tail()1131 rvm_offset_t rvm_log_tail()
1132     {
1133     return default_log->status.log_tail;
1134     }
1135 
rvm_next_rec_num()1136 rvm_length_t rvm_next_rec_num()
1137     {
1138     return default_log->status.next_rec_num;
1139     }
1140