1 /* BLURB lgpl
2 
3                            Coda File System
4                               Release 5
5 
6           Copyright (c) 1987-2016 Carnegie Mellon University
7                   Additional copyrights listed below
8 
9 This  code  is  distributed "AS IS" without warranty of any kind under
10 the  terms of the  GNU  Library General Public Licence  Version 2,  as
11 shown in the file LICENSE. The technical and financial contributors to
12 Coda are listed in the file CREDITS.
13 
14                         Additional copyrights
15                            none currently
16 
17 #*/
18 
19 /*
20 *
21 *                 Internal Definitions for RVM
22 *
23 */
24 
25 
26 /* permit multiple includes */
27 #ifndef _RVM_PRIVATE_
28 #define _RVM_PRIVATE_ 1
29 
30 /* turn on debuging for now */
31 #ifndef DEBUG
32 #define DEBUG 1
33 #endif
34 
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
38 
39 #include <sys/types.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <assert.h>
43 #include <rvm/rvm.h>
44 #include <rvm/rvm_statistics.h>
45 
46 #include <fcntl.h>
47 #ifndef O_BINARY
48 #define O_BINARY 0
49 #endif
50 
51 /* note: Log Version must change if Statistics Version changed */
52 #define RVM_LOG_VERSION     "RVM Log Version  1.4 Oct 17, 1997 "
53 
54 /* general purpose macros */
55 
56 /* make sure realloc knows what to do with null ptr */
57 #define REALLOC(x,l)      (((x)==NULL) ? malloc(l) : realloc((x),(l)))
58 
59 /* bcopy <=> memcpy, defs of syscalls */
60 #ifdef __STDC__
61 #define BCOPY(x,y,n)     memcpy((y),(x),(n))
62 #define BZERO(x,n)       memset((x),0,(n))
63 #else
64 #define BCOPY(x,y,n)     bcopy((x),(y),(n))
65 #define BZERO(x,n)       bzero((x),(n))
66 #endif
67 
68 /* loop terminated by explicit break */
69 #define DO_FOREVER  for (;;)
70 #define MACRO_BEGIN			do {
71 #define MACRO_END			} while(0)
72 
73 #define FORWARD     rvm_true            /* record scan forward */
74 #define REVERSE     rvm_false           /* record scan reverse */
75 
76 
77 
78 /* RVM Internal Error Messages */
79 
80 #define ERR_DATE_SKEW       "Current time before last recorded - check kernel date"
81 /* timestamp arithmetic */
82 
83 /* comparison macros */
84 #define TIME_LSS(x,y)       (((x).tv_sec < (y).tv_sec) || \
85                              ((((x).tv_sec == (y).tv_sec) && \
86                              ((x).tv_usec < (y).tv_usec))))
87 #define TIME_GTR(x,y)       (((x).tv_sec > (y).tv_sec) || \
88                              ((((x).tv_sec == (y).tv_sec) && \
89                              ((x).tv_usec > (y).tv_usec))))
90 #define TIME_LEQ(x,y)       (!TIME_GTR((x),(y)))
91 #define TIME_GEQ(x,y)       (!TIME_LSS((x),(y)))
92 #define TIME_EQL(x,y)       (((x).tv_sec == (y).tv_sec) && \
93                              ((x).tv_usec == (y).tv_usec))
94 #define TIME_EQL_ZERO(x)    (((x).tv_sec == 0 && ((x).tv_usec == 0)))
95 
96 #define ZERO_TIME(x)        MACRO_BEGIN \
97                             (x).tv_sec = 0; (x).tv_usec = 0; \
98                             MACRO_END
99 
100 /* range monitoring vector */
101 typedef struct
102     {
103     char            *vmaddr;            /* range vm address */
104     rvm_length_t    length;             /* range length */
105     unsigned long   format;             /* data print format switches */
106     int             radix;              /* print radix for vmaddr */
107     }
108 chk_vec_t;
109 
110 /* signal handler function type (rvmutl only) */
111 typedef rvm_bool_t rvm_signal_call_t();
112 
113 /* recovery monitor call-back function type */
114 typedef void rvm_monitor_call_t();
115 /*  rvm_length_t    vmaddr;
116     rvm_length_t    length;
117     char            *data_ptr;
118     rvm_offset_t    *data_offset;
119     rec_hdr_t       *rec_hdr;
120     rvm_length_t    index;
121     char            *msg;
122 */
123 /*                    round up & down macros
124             **** all depend on sizes being a power of 2 ****
125 */
126 #define LENGTH_MASK          ((rvm_length_t)(~(sizeof(rvm_length_t)-1)))
127 #define ROUND_TO_LENGTH(len) (((rvm_length_t)((rvm_length_t)(len) \
128                                               +sizeof(rvm_length_t)-1)) \
129                               & LENGTH_MASK)
130 #define CHOP_TO_LENGTH(len)  ((rvm_length_t)((rvm_length_t)(len) \
131                                              & LENGTH_MASK))
132 #define ALIGNED_LEN(addr,len) (ROUND_TO_LENGTH((rvm_length_t)(addr) \
133                                                +(rvm_length_t)(len)) \
134                                - CHOP_TO_LENGTH(addr))
135 #define BYTE_SKEW(len)       ((rvm_length_t)(len) & ~LENGTH_MASK)
136 
137 #define SECTOR_SIZE          512
138 #define SECTOR_MASK          ((rvm_length_t)(~(SECTOR_SIZE-1)))
139 #define ROUND_TO_SECTOR_SIZE(x) (((rvm_length_t)(x)+SECTOR_SIZE-1) \
140                                  &SECTOR_MASK)
141 #define CHOP_TO_SECTOR_SIZE(x)  ((rvm_length_t)(x)&SECTOR_MASK)
142 
143 #define SECTOR_INDEX(x)      ((x) & (SECTOR_SIZE-1))
144 
145 #define ROUND_OFFSET_TO_SECTOR_SIZE(x) \
146     rvm_rnd_offset_to_sector(&(x))
147 
148 #define CHOP_OFFSET_TO_SECTOR_SIZE(x) \
149     (RVM_MK_OFFSET(RVM_OFFSET_HIGH_BITS_TO_LENGTH(x), \
150                    CHOP_TO_SECTOR_SIZE(RVM_OFFSET_TO_LENGTH(x))))
151 
152 #define OFFSET_TO_SECTOR_INDEX(x) \
153     (SECTOR_INDEX(RVM_OFFSET_TO_LENGTH((x))))
154 
155 #define CHOP_OFFSET_TO_LENGTH_SIZE(x) \
156     (RVM_MK_OFFSET(RVM_OFFSET_HIGH_BITS_TO_LENGTH(x), \
157                    CHOP_TO_LENGTH(RVM_OFFSET_TO_LENGTH(x))))
158 
159 #define ROUND_TO_PAGE_SIZE(x) (((rvm_length_t)(x)+page_size-1) \
160                                &page_mask)
161 #define CHOP_TO_PAGE_SIZE(x)  ((rvm_length_t)(x)&page_mask)
162 
163 /* other stuff... */
164 
165 #define OFFSET_TO_FLOAT(x) \
166     ((4.294967e+9)*((float)(RVM_OFFSET_HIGH_BITS_TO_LENGTH(x))) \
167      + (float)(RVM_OFFSET_TO_LENGTH(x)))
168 /* internal structure id's */
169 typedef enum
170     {
171     struct_first_id = 9,                /* base for free list array length */
172 
173                                         /* free list allocated structures */
174     log_id,                             /* log device descriptor */
175     int_tid_id,                         /* internal transaction descriptor */
176     tid_rvm_id,                         /* external tid while on free list */
177     range_id,                           /* range descriptor */
178     seg_id,                             /* segment descriptor */
179     region_id,                          /* internal region descriptor */
180     region_rvm_id,                      /* external region while on free list */
181     options_rvm_id,                     /* external options while on free list */
182     statistics_rvm_id,                  /* rvm_statistics record while on free list */
183     mem_region_id,                      /* vm region tree node */
184     dev_region_id,                      /* device region tree node */
185     log_special_id,                     /* special log record */
186 
187     struct_last_cache_id,               /* marker for free lists array length */
188 
189                                         /* non-free list allocated structures */
190 
191     log_status_id,                      /* log status descriptor */
192     log_dev_status_id,                  /* log device status area (on disk) */
193     log_wrap_id,                        /* log wrap-around marker */
194     log_seg_id,                         /* segment mapping marker in log */
195     seg_dict_id,                        /* recovery dictionary segment desc. */
196     trans_hdr_id,                       /* transaction header in log */
197     rec_end_id,                         /* log record end marker */
198     nv_range_id,                        /* new value range header */
199     nv_buf_id,                          /* new value vm buffer */
200     free_page_id,                       /* free page header descriptor */
201     rw_qentry_id,                       /* rw_lock queue entry */
202     tree_root_id,                       /* tree root */
203     /* mmapped_list_id,*/                    /* BSD/mmap systems only */
204     struct_last_id                      /* marker for last structure id */
205    }
206 struct_id_t;
207 /* macros to use struct_id's as int's & vice versa
208    for free list allocated structures only */
209 #define ID_INDEX(id)    ((rvm_length_t)(id)-(rvm_length_t)struct_first_id-1)
210 #define INDEX_ID(i)     ((struct_id_t)((i)+1+(long)struct_first_id))
211 
212 /* number of free list allocated structures */
213 #define NUM_CACHE_TYPES ((rvm_length_t)struct_last_cache_id \
214                          -(rvm_length_t)struct_first_id-1)
215 #define NUM_TYPES       ((rvm_length_t)struct_last_id \
216                          -(rvm_length_t)struct_first_id-1)
217 
218 /* preallocation sizes for internal structure free lists
219    must be in same order as above free list allocted enum's
220 */
221 #define NUM_PRE_ALLOCATED \
222     0,                                  /* log's */ \
223     20,                                 /* tid's */ \
224     20,                                 /* rvm_tid's */ \
225     50,                                 /* range's */ \
226     0,                                  /* seg's */ \
227     10,                                 /* region's */ \
228     0,                                  /* rvm_region's */ \
229     0,                                  /* rvm_options */ \
230     2,                                  /* rvm_statistics */ \
231     10,                                 /* mem_region nodes */ \
232     1,                                  /* dev_region nodes */ \
233     1                                   /* special log markers */
234 
235 /* maximum sizes for internal structure free lists
236    must be in same order as above free list allocted enum's
237 */
238 #define MAX_ALLOCATED \
239     0,                                  /* log's */ \
240     20,                                 /* tid's */ \
241     20,                                 /* rvm_tid's */ \
242     50,                                 /* range's */ \
243     0,                                  /* seg's */ \
244     10,                                 /* region's */ \
245     0,                                  /* rvm_region's */ \
246     0,                                  /* rvm_options */ \
247     2,                                  /* rvm_statistics */ \
248     10,                                 /* mem_region nodes */ \
249     2000,                               /* dev_region nodes */ \
250     1                                   /* special log markers */
251 /* sizes and names of internal types
252    must be in same order as above enum's
253 */
254 #define CACHE_TYPE_SIZES \
255     sizeof(log_t), \
256     sizeof(int_tid_t), \
257     sizeof(rvm_tid_t), \
258     sizeof(range_t), \
259     sizeof(seg_t), \
260     sizeof(region_t), \
261     sizeof(rvm_region_t), \
262     sizeof(rvm_options_t), \
263     sizeof(rvm_statistics_t), \
264     sizeof(mem_region_t), \
265     sizeof(dev_region_t), \
266     sizeof(log_special_t)
267 
268 #define OTHER_TYPE_SIZES \
269     0, \
270     sizeof(log_status_t), \
271     sizeof(log_dev_status_t), \
272     sizeof(log_wrap_t), \
273     sizeof(log_seg_t), \
274     sizeof(seg_dict_t), \
275     sizeof(trans_hdr_t), \
276     sizeof(rec_end_t), \
277     sizeof(nv_range_t), \
278     sizeof(nv_buf_t), \
279     sizeof(free_page_t), \
280     sizeof(rw_qentry_t), \
281     sizeof(tree_root_t)/*, \
282     sizeof(mmapped_list_t)*/
283 
284 #define TYPE_NAMES \
285     "log_id", \
286     "int_tid_id", \
287     "tid_rvm_id", \
288     "range_id", \
289     "seg_id", \
290     "region_id", \
291     "region_rvm_id", \
292     "options_rvm_id", \
293     "statistics_rvm_id", \
294     "mem_region_id", \
295     "dev_region_id", \
296     "log_special_id", \
297     "struct_last_cache_id", \
298     "log_status_id", \
299     "log_dev_status_id", \
300     "log_wrap_id", \
301     "log_seg_id", \
302     "seg_dict_id", \
303     "trans_hdr_id", \
304     "rec_end_id", \
305     "nv_range_id", \
306     "nv_buf_id", \
307     "free_page_id", \
308     "rw_qentry_id", \
309     "tree_root_id"/*, \
310     "mmapped_list_id"*/
311 /* doubly-linked list cell header
312    this structure serves as the link and struct_id carrier for larger
313    structures when declared as the 1st field of the structure.
314    it is also used as the root, or header, of a list when statically allocated,
315    or embedded in another structure as other than the 1st field,
316    in which case its struct_id is that of the type of elements on the list.
317 */
318 typedef struct list_entry_s
319     {
320     struct list_entry_s *nextentry;	/* in accordance with insque(3) */
321     struct list_entry_s *preventry;
322     union
323         {
324         struct list_entry_s  *name;     /* back pointer to head of list */
325         long                 length;    /* length of list if header */
326         }               list;
327     struct_id_t         struct_id;	/* self identifier; NEVER altered */
328     rvm_bool_t          is_hdr;         /* true if list header */
329     }
330 list_entry_t;
331 
332 /* list macros, lst: address of list header */
333 #define LIST_EMPTY(lst)     ((lst).list.length == 0)
334 #define LIST_NOT_EMPTY(lst) ((lst).list.length != 0)
335 
336 /* list iterators for simple list traversals, no unlinking */
337 #define FOR_ENTRIES_OF(lst,type,ptr)    /* list iterator, FIFO order */ \
338     for ( \
339          (ptr) = (type *)((lst).nextentry); \
340          !((ptr)->links.is_hdr); \
341          (ptr) = (type *)((ptr)->links.nextentry) \
342          )
343 
344 #define FOR_REVERSE_ENTRIES_OF(lst,type,ptr) /* list iterator, LIFO order */ \
345     for ( \
346          (ptr) = (type *)((lst).preventry); \
347          !((ptr)->links.is_hdr); \
348          (ptr) = (type *)((ptr)->links.preventry) \
349          )
350 
351 /* list iterators for traversals that unlink the entries */
352 #define UNLINK_ENTRIES_OF(lst,type,ptr) /* list generator, FIFO order */ \
353     for ( \
354          (ptr) = (type *)((lst).nextentry); \
355          !((ptr)->links.is_hdr); \
356          (ptr) = (type *)((lst).nextentry) \
357          )
358 
359 #define UNLINK_REVERSE_ENTRIES_OF(lst,type,ptr) /* list generator, LIFO order */ \
360     for ( \
361          (ptr) = (type *)((lst).preventry); \
362          !((ptr)->links.is_hdr); \
363          (ptr) = (type *)((lst).preventry) \
364          )
365 
366 /* free page list entry */
367 typedef struct
368     {
369     list_entry_t        links;          /* list links */
370     rvm_length_t        len;            /* length of free pages in bytes */
371     }
372 free_page_t;
373 /* Synchronization and Threads support */
374 
375 /*
376  * We can have one of three thread models:
377  *          cthreads:         Mach threads (kernel or coroutine)
378  *          lwp:              Coda's lightweight process package
379  *          pthreads:         POSIX threads
380  *
381  * If RVM_USELWP is defined, then lwp support is compiled in.
382  * If RVM_USEPT  is defined, then pthreads support is compiled in.
383  * If niether of these is defined, then cthreads support is compiled in.
384  *
385  * It is assumed in the rds package that cthreads and pthreads use
386  * preemptive scheduling, and they are synchronized appropriately.
387  *
388  * You must define only one of the above targets, and it must be defined
389  * consistently across the following packages: RVM, RDS, and URT
390  */
391 
392 #ifdef RVM_USELWP	 /* special thread support for Coda */
393 #include "rvm_lwp.h"
394 #elif defined(RVM_USEPT) /* special support for pthreads */
395 #include "rvm_pthread.h"
396 #else			 /* normal: use Cthreads */
397 #include <cthreads.h>
398 
399 /* define types symbolically to permit use of non-Cthread thread support */
400 #define RVM_MUTEX       struct mutex
401 #define RVM_CONDITION	struct condition
402 
403 /* macro for testing if a lock is free */
404 #define LOCK_FREE(lck) \
405     (mutex_try_lock(&(lck)) ? (mutex_unlock(&(lck)), rvm_true) : rvm_false)
406 #endif
407 
408 /* protect a critical section
409    lck:  address of mutex
410    body: the critical section code
411 */
412 #define CRITICAL(lck,body) \
413     MACRO_BEGIN \
414     mutex_lock(&(lck)); \
415     body; \
416     mutex_unlock(&(lck)); \
417     MACRO_END
418 
419 /*  rw_lock (read/write) support
420     An rw_lock permits many readers of a structure, but only
421     if there is no writer pending.  Only a single writer is permitted,
422     and to get the write lock, there must be no readers.
423     If a write is requested, no additional readers will be permitted
424     until the write is satisfied.  Blocked threads are processed in
425     FIFO order.
426 */
427 typedef enum                            /* rw_lock access modes */
428     {
429     r = 32,                             /* get lock for read-only */
430     w,                                  /* get lock for read/write */
431     f                                   /* lock free, (internal use only) */
432     }
433 rw_lock_mode_t;
434 
435 typedef struct                          /* rw_lock structure */
436     {
437     RVM_MUTEX           mutex;          /* mutex to protect rw_lock innards */
438     long                read_cnt;       /* read lock count, 0 ==> free */
439     long                write_cnt;      /* write lock count, 0 ==> free */
440     list_entry_t        queue;          /* blocked thread queue */
441     rw_lock_mode_t      lock_mode;      /* current lock mode */
442     }
443 rw_lock_t;
444 
445 typedef struct                          /* rw_lock queue entry */
446     {
447     list_entry_t        links;          /* queue links & struct_id */
448     RVM_CONDITION       wait;           /* condition code for blocking */
449     rw_lock_mode_t      mode;           /* access mode */
450     }
451 rw_qentry_t;
452 
453 /* protect a rw_lock critical section
454    lck:  address of rw_lock
455    mode: r or w
456    body: the critical section code
457 */
458 #define RW_CRITICAL(rwl,mode,body) \
459     MACRO_BEGIN \
460     rw_lock(&(rwl),(mode)); \
461     body; \
462     rw_unlock(&(rwl),(mode)); \
463     MACRO_END
464 
465 /* macro for testing if an rw_lock is free */
466 #define RW_LOCK_FREE(rwl) \
467     (((rwl).read_cnt+(rwl).write_cnt) == 0) && ((rwl).lock_mode == f)
468 /* tree node structures */
469 
470 typedef struct tree_node_s              /* basic tree node */
471     {
472     struct tree_node_s  *lss;           /* ptr to less than entry */
473     struct tree_node_s  *gtr;           /* ptr to greater than entry */
474     long                bf;             /* balance factor */
475     struct_id_t         struct_id;      /* self identifier */
476     }
477 tree_node_t;
478 
479 typedef union
480     {
481     tree_node_t         node;           /* links for trees */
482     list_entry_t        entry;          /* links for allocation cache */
483     }
484 tree_links_t;
485 
486 typedef enum                            /* traversal states */
487     {
488     lss = 50,
489     self,
490     gtr,
491     init
492     }
493 traverse_state_t;
494 
495 typedef struct                          /* tree traversal position entry */
496     {
497     tree_node_t         *ptr;           /* node pointer */
498     traverse_state_t     state;          /* state of traversal {lss,self,gtr} */
499     }
500 tree_pos_t;
501 
502 typedef struct                          /* tree root structure */
503     {
504     struct_id_t         struct_id;      /* self identifier */
505     tree_node_t         *root;          /* ptr to root node */
506     tree_pos_t          *traverse;      /* traversal position vector */
507     rvm_length_t        traverse_len;   /* max length of traverse vector */
508     long                level;          /* current position in traversal
509                                            vector */
510     rvm_length_t        n_nodes;        /* number of nodes in tree */
511     rvm_length_t        max_depth;      /* length of deepest path in tree */
512     rvm_bool_t          unlink;         /* unlink nodes as traversed */
513     }
514 tree_root_t;
515 
516 #define TRAVERSE_LEN_INCR  15           /* allocate 15 slots at a time */
517 /* tree structure iterators
518      -- nodes are delinked as traversed
519      -- do not use tree_insert or tree_delete or otherwise change
520         tree shape in body of iterators if iteration is to be continued
521      -- iterators may not be nested for same tree
522 */
523 #define FOR_NODES_OF(tree,type,ptr)     /* tree iterator, lss -> gtr order */ \
524     for ( \
525          (ptr) = (type *)init_tree_generator(&(tree),FORWARD,rvm_false); \
526          (ptr) != NULL; \
527          (ptr) = (type *)tree_successor(&(tree)) \
528          )
529 
530 #define FOR_REVERSE_NODES_OF(tree,type,ptr) /* tree iterator, gtr -> lss order */ \
531     for ( \
532          (ptr) = (type *)init_tree_generator(&(tree),REVERSE,rvm_false); \
533          (ptr) != NULL; \
534          (ptr) = (type *)tree_predecessor(&(tree)) \
535          )
536 
537 /* insertion test and iterate from existing nodes with equivalent key */
538 #define FROM_EXISTING_NODE_OF(tree,type,ptr,node,cmp) \
539     for ( \
540          (ptr) = (type *)tree_iterate_insert(&(tree),(node),(cmp)); \
541          (ptr) != NULL; \
542          (ptr) = (type *)tree_successor(&(tree)) \
543          )
544 
545 #define UNLINK_NODES_OF(tree,type,ptr)  /* tree iterator, lss -> gtr order */ \
546     for ( \
547          (ptr) = (type *)init_tree_generator(&(tree),FORWARD,rvm_true); \
548          (ptr) != NULL; \
549          (ptr) = (type *)tree_successor(&(tree)) \
550          )
551 
552 #define UNLINK_REVERSE_NODES_OF(tree,type,ptr) /* tree iterator, gtr -> lss order */ \
553     for ( \
554          (ptr) = (type *)init_tree_generator(&(tree),REVERSE,rvm_true); \
555          (ptr) != NULL; \
556          (ptr) = (type *)tree_predecessor(&(tree)) \
557          )
558 /* Structure to remember where we have/have not mmapped */
559 
560 /* vm buffers for dev_region_t nodes */
561 typedef struct
562     {
563     struct_id_t         struct_id;      /* self identifier */
564     rvm_length_t        ref_cnt;        /* references to buffer */
565     rvm_length_t        chk_sum;        /* data buffer checksum */
566     rvm_length_t        alloc_len;      /* allocated length of buffer */
567     rvm_length_t        data_len;       /* length of log data */
568     char                *buf;           /* start of data area */
569     }
570 nv_buf_t;
571 
572 #define NV_BUF_SIZE(len)  (ROUND_TO_LENGTH((len)) + sizeof(nv_buf_t))
573 
574 /* storage device region node */
575 typedef struct
576     {
577     tree_links_t        links;          /* ptr structure */
578     rvm_offset_t        offset;         /* segment start offset of changes */
579     rvm_offset_t        end_offset;     /* end offset (offset + length) */
580     rvm_length_t        length;         /* length of region */
581     char                *nv_ptr;        /* ptr into nv_buf */
582     nv_buf_t            *nv_buf;        /* buffer for new values if allocated */
583     rvm_offset_t        log_offset;     /* location of new values in log */
584     char                *vmaddr;        /* original vm addr (debug use only) */
585     }
586 dev_region_t;
587 
588 /* virtual memory region node */
589 typedef struct
590     {
591     tree_links_t    links;              /* ptr structure */
592     struct region_s *region;            /* region descriptor */
593     char            *vmaddr;            /* base address */
594     rvm_length_t    length;             /* length of vm region */
595     }
596 mem_region_t;
597 
598 /* node comparator function type */
599 typedef long cmp_func_t();
600 /*  tree_node_t     *node1;
601     tree_node_t     *node2;
602 */
603 /* log records written by commit, and associated with new value records */
604 /* generic record header; not actually allocated, but any record header
605    can be cast to this to get its type & length for detailed analysis
606 */
607 typedef struct
608     {
609     struct_id_t     struct_id;          /* type of entry */
610     rvm_length_t    rec_length;         /* record length */
611     struct timeval  timestamp;          /* timestamp of record entry */
612     rvm_length_t    rec_num;            /* record number of entry */
613     }
614 rec_hdr_t;
615 
616 /* transaction record header: trans_hdr_t -- a single copy in the log descriptor
617 */
618 typedef struct
619     {
620     rec_hdr_t	    rec_hdr;		/* common log record header */
621     rvm_length_t    num_ranges;         /* number of ranges in record */
622     struct timeval  uname;              /* uname of transaction */
623     struct timeval  commit_stamp;       /* timestamp of commit */
624     rvm_length_t    n_coalesced;        /* count of coalesced transactions */
625     rvm_length_t    flags;              /* mode and optimization flags */
626     }
627 trans_hdr_t;
628 
629 /* new value record range header: nv_range_t */
630 typedef struct
631     {
632     rec_hdr_t	    rec_hdr;		/* common log record header */
633     rvm_length_t    sub_rec_len;        /* back displacement to previous hdr */
634     rvm_length_t    range_num;          /* range number in record */
635     rvm_length_t    length;             /* actual modification length */
636     rvm_offset_t    offset;             /* offset of changes in segment */
637     char            *vmaddr;            /* modification vm address */
638     rvm_length_t    chk_sum;            /* data checksum */
639     long            seg_code;           /* segment short name */
640     rvm_bool_t      is_split;           /* is a range split for log wrap */
641     }
642 nv_range_t;
643 /* special log types -- these records are inserted into the log to
644    record events not related to transaction commit and new value
645    recording.
646    These are generally used by the recovery algorithm to reconstruct the
647    committed images of segments at the time of a crash.
648 */
649 /* segment mapping descriptor -- inserted by map when a segment
650    is mapped the first time; used to relate the short names to
651    an actual device or file name */
652 typedef struct
653     {
654     long            seg_code;           /* segment short name */
655     rvm_offset_t    num_bytes;          /* maximum usable length of seg dev */
656     long            name_len;           /* length of segment name */
657     char            *name;              /* full path name */
658     }
659 log_seg_t;
660 
661 /* log_special_t: the carrier for all special log types
662    free list allocated; additional type-dependent data can be placed
663    after this structure; all records end with rec_end_t record
664 */
665 typedef struct
666     {
667     list_entry_t    links;              /* list links and free list struct id */
668                                         /* following fields are written in log */
669     rec_hdr_t	    rec_hdr;		/* common log record header */
670     union
671         {
672         log_seg_t   log_seg;            /* segment mapping marker */
673         }           special;
674     }
675 log_special_t;
676 /* generic log entry types */
677 
678 /* log record end marker: rec_end_t -- a single copy in the log descriptor */
679 typedef struct
680     {
681     rec_hdr_t	    rec_hdr;		/* common log record header */
682     struct_id_t     rec_type;           /* type of recorded ended */
683     rvm_length_t    sub_rec_len;        /* back displacement to previous sub-
684                                            record; same as rec_length if none */
685     }
686 rec_end_t;
687 
688 /* log wrap-around marker -- a single copy in the log descriptor */
689 typedef struct
690     {
691     rec_hdr_t	    rec_hdr;		/* common log record header */
692     struct_id_t     struct_id2;         /* for scan_wrap_reverse()! */
693     }
694 log_wrap_t;
695 
696 /* device descriptor -- included in log and segment descriptors */
697 typedef struct
698     {
699     char            *name;              /* print name of device */
700     long            name_len;           /* allocation length */
701     long            handle;             /* device handle */
702     rvm_offset_t    num_bytes;          /* length of device */
703     rvm_bool_t      raw_io;             /* true if using raw i/o */
704     unsigned long   type;               /* to store device type */
705     rvm_bool_t      read_only;          /* true if opened read-only */
706 
707     struct iovec   *iov;                /* gather write io vector */
708     long            iov_length;         /* length of iov array */
709     long            iov_cnt;            /* count of entries used in iov */
710     rvm_length_t    io_length;          /* accumulated length of i/o */
711     rvm_offset_t    last_position;      /* last location seeked or transfered */
712                                         /* the following fields are used for
713                                            log devices only */
714     char            *wrt_buf;           /* working raw io write buffer base */
715     rvm_length_t    wrt_buf_len;        /* usable wrt_buf length */
716     char            *ptr;               /* write buffer fill ptr */
717     char            *buf_start;         /* start of buffer flush region */
718     char            *buf_end;           /* end of buffer */
719     rvm_offset_t    sync_offset;        /* end offset after last sync */
720 
721     char            *pad_buf;           /* padding buffer */
722     long            pad_buf_len;        /* length of current pad buf */
723     }
724 device_t;
725 /* log structure macros */
726 
727 #define RANGE_LEN(range)    (ALIGNED_LEN((range)->nv.vmaddr, \
728                                          (range)->nv.length))
729 
730 #define RANGE_SIZE(range)   ((rvm_length_t)(NV_RANGE_OVERHEAD \
731                             + RANGE_LEN(range)))
732 
733 #define TRANS_SIZE          (ROUND_TO_LENGTH((sizeof(trans_hdr_t) \
734                                               + sizeof(rec_end_t))))
735 
736 #define NV_RANGE_OVERHEAD   (ROUND_TO_LENGTH(sizeof(nv_range_t)))
737 
738 #define MIN_NV_RANGE_SIZE   (NV_RANGE_OVERHEAD+64)
739 
740 #define MIN_TRANS_SIZE      (TRANS_SIZE + MIN_NV_RANGE_SIZE \
741                              + ROUND_TO_LENGTH(sizeof(log_wrap_t)))
742 
743 #define LOG_SPECIAL_SIZE    (ROUND_TO_LENGTH(sizeof(log_special_t) \
744                                              - sizeof(list_entry_t)))
745 
746 #define LOG_SPECIAL_IOV_MAX 3
747 
748 /* largest log type header on disc */
749 #define MAX_HDR_SIZE        (ROUND_TO_LENGTH((sizeof(log_special_t) \
750                                              + MAXPATHLEN)))
751 /* other constants */
752 
753 /* maximum size nv's kept in vm during recovery */
754 #define NV_LOCAL_MAX        (8*1024 - ROUND_TO_LENGTH(NV_BUF_SIZE( \
755                                               sizeof(rvm_length_t)+1)))
756 
757 /* size of status area i/o buffer */
758 #define LOG_DEV_STATUS_SIZE \
759                 ROUND_TO_SECTOR_SIZE(sizeof(log_dev_status_t))
760 
761 /* offsets for log status structures in files and partitions */
762 #define RAW_STATUS_OFFSET   16*SECTOR_SIZE
763 #define FILE_STATUS_OFFSET  0
764 
765 #define UPDATE_STATUS       100         /* flushes before updating log status area */
766 /* log status descriptor -- included in the log descriptor */
767 #ifdef RVM_LOG_TAIL_SHADOW
768 extern rvm_offset_t log_tail_shadow;
769 extern rvm_bool_t   has_wrapped;
770 #define RVM_ASSIGN_OFFSET(x,y)  (x) = (y)
771 #endif /* RVM_LOG_TAIL_SHADOW */
772 
773 typedef struct
774     {
775                                         /* status area control fields */
776     long            update_cnt;         /* number of updates before write */
777     rvm_bool_t      valid;              /* data in status area valid */
778     rvm_bool_t      log_empty;          /* true if log device & buffer empty */
779 
780                                         /* log pointers & limits */
781     rvm_offset_t    log_start;          /* first offset for records */
782     rvm_offset_t    log_size;           /* dev.num_bytes - log_start:
783                                            space for records */
784     rvm_offset_t    log_head;           /* current log head */
785     rvm_offset_t    log_tail;           /* current log tail */
786     rvm_offset_t    prev_log_head;      /* previous head (truncation only) */
787     rvm_offset_t    prev_log_tail;      /* previous tail (truncation only) */
788 
789                                         /* consistency check fields */
790     struct timeval  status_init;        /* timestamp log creation */
791     struct timeval  status_write;       /* timestamp for last status write*/
792     struct timeval  last_trunc;         /* timestamp for last truncation */
793     struct timeval  prev_trunc;         /* timestamp for previous truncation */
794     struct timeval  first_write;        /* timestamp of first record in log */
795     struct timeval  last_write;         /* timestamp of last record in log */
796     struct timeval  first_uname;        /* first transaction uname in log */
797     struct timeval  last_uname;         /* last transaction uname in log */
798     struct timeval  last_commit;        /* last transaction commit timestamp */
799     struct timeval  wrap_time;          /* wrap timestamp if log wrapped */
800     rvm_length_t    first_rec_num;      /* 1st rec num of truncation epoch */
801     rvm_length_t    last_rec_num;       /* last rec num of truncation epoch */
802     rvm_length_t    next_rec_num;       /* assignment counter for rec_nums */
803 
804                                         /* transaction statistics */
805     rvm_length_t    n_abort;            /* number of transactions aborted */
806     rvm_length_t    n_flush_commit;     /* number of flush mode commits */
807     rvm_length_t    n_no_flush_commit;  /* number of no_flush mode commits */
808     rvm_length_t    n_split;            /* number trans split for log wrap */
809     rvm_length_t    n_truncation_wait;  /* transactions delayed by truncation */
810 
811                                         /* log statistics */
812     rvm_length_t    n_flush;            /* number of internal flushes */
813     rvm_length_t    n_rvm_flush;        /* number of explicit flush calls */
814     rvm_length_t    n_special;          /* number of special log records */
815     rvm_offset_t    range_overlap;      /* current overlap eliminated by range coalesce */
816     rvm_offset_t    trans_overlap;      /* current overlap eliminated by trans coalesce */
817     rvm_length_t    n_range_elim;       /* number of ranges eliminated by
818                                            range coalesce/flush */
819     rvm_length_t    n_trans_elim;       /* number of ranges eliminated by
820                                            trans coalesce/flush */
821     rvm_length_t    n_trans_coalesced;  /* number of transactions coalesced in
822                                            this flush cycle */
823     struct timeval  flush_time;         /* time spent in flushes */
824     rvm_length_t    last_flush_time;    /* duration of last flush (msec) */
825     rvm_length_t    last_truncation_time; /* duration of last truncation (sec) */
826     rvm_length_t    last_tree_build_time; /* duration of tree build (sec) */
827     rvm_length_t    last_tree_apply_time; /* duration of tree apply phase
828                                              (sec) */
829 
830                                         /* histogram vectors */
831 
832     rvm_length_t    flush_times[flush_times_len]; /* flush timings */
833     rvm_length_t    range_lengths[range_lengths_len]; /* range lengths flushed */
834     rvm_length_t    range_elims[range_elims_len]; /* num ranges eliminated by
835                                                      range coalesce/flush */
836     rvm_length_t    trans_elims[trans_elims_len]; /* num ranges eliminated by
837                                                      trans coalesce/flush */
838     rvm_length_t    range_overlaps[range_overlaps_len]; /* space saved by
839                                                            range coalesce/flush */
840     rvm_length_t    trans_overlaps[range_overlaps_len]; /* space saved by
841                                                            trans coalesce/flush */
842 
843                                         /* cummulative transaction stats */
844     rvm_length_t    tot_abort;          /* total aborted transactions */
845     rvm_length_t    tot_flush_commit;   /* total flush commits */
846     rvm_length_t    tot_no_flush_commit; /* total no_flush commits */
847     rvm_length_t    tot_split;          /* total transactions split for log
848                                            wrap-around */
849 
850                                         /* cummulative log statistics */
851     rvm_length_t    tot_flush;          /* total internal flush calls  */
852     rvm_length_t    tot_rvm_flush;      /* total explicit rvm_flush calls  */
853     rvm_length_t    tot_special;        /* total special log records */
854     rvm_length_t    tot_wrap;           /* total log wrap-arounds */
855     rvm_length_t    log_dev_max;        /* maximum % log device used so far */
856     rvm_offset_t    tot_log_written;    /* total length of all writes to log */
857     rvm_offset_t    tot_range_overlap;  /* total overlap eliminated by range coalesce */
858     rvm_offset_t    tot_trans_overlap;  /* total overlap eliminated by trans coalesce */
859     rvm_length_t    tot_range_elim;     /* total number of ranges eliminated by
860                                            range coalesce */
861     rvm_length_t    tot_trans_elim;     /* total number of ranges eliminated by
862                                            trans coalesce */
863     rvm_length_t    tot_trans_coalesced; /* total number of transactions coalesced */
864 
865                                         /* truncation statistics */
866     rvm_length_t    tot_rvm_truncate;   /* total explicit rvm_truncate calls */
867     rvm_length_t    tot_async_truncation; /* total asynchronous truncations */
868     rvm_length_t    tot_sync_truncation; /* total forced synchronous truncations */
869     rvm_length_t    tot_truncation_wait; /* total transactions delayed by truncation */
870     rvm_length_t    tot_recovery;       /* total recovery truncations */
871     struct timeval  tot_flush_time;     /* total time spent in flush */
872     struct timeval  tot_truncation_time; /* cumulative truncation time */
873 
874                                         /* histogram vectors */
875 
876     rvm_length_t    tot_tree_build_times[truncation_times_len]; /* truncation timings */
877     rvm_length_t    tot_tree_apply_times[truncation_times_len];
878     rvm_length_t    tot_truncation_times[truncation_times_len];
879     rvm_length_t    tot_flush_times[flush_times_len]; /* cummulative flush timings */
880     rvm_length_t    tot_range_lengths[range_lengths_len]; /* cummulative range lengths flushed */
881     rvm_length_t    tot_range_elims[range_elims_len]; /* total num ranges eliminated by
882                                                          range coalesce/flush */
883     rvm_length_t    tot_trans_elims[trans_elims_len]; /* total num ranges eliminated by                                                 trans coalesce/flush */
884     rvm_length_t    tot_range_overlaps[range_overlaps_len]; /* space saved by
885                                                            range coalesce/flush */
886     rvm_length_t    tot_trans_overlaps[range_overlaps_len]; /* space saved by
887                                                            trans coalesce/flush */
888     rvm_length_t    tot_trans_coalesces[trans_coalesces_len]; /* transactions coalesced
889                                                                  per flush  */
890     rvm_length_t    flush_state;        /* flush status */
891     rvm_length_t    trunc_state;        /* truncation status */
892     }
893 log_status_t;
894 /* log status descriptor on log device: log_dev_status_t */
895 typedef struct
896     {
897     struct_id_t     struct_id;          /* self identifier */
898     rvm_length_t    chk_sum;            /* check sum */
899     char            version[RVM_VERSION_MAX]; /* RVM interface version string */
900     char            log_version[RVM_VERSION_MAX]; /* RVM log version string */
901     char            statistics_version[RVM_VERSION_MAX]; /* RVM statistics version string */
902     log_status_t    status;             /* log status info */
903     }
904 log_dev_status_t;
905 
906 /* Flush and Truncation states */
907                                         /* log flush initiated by rvm_flush */
908 #define RVM_FLUSH_CALL      (1)
909                                         /* log flush initated by commit */
910 #define RVM_FLUSH_COMMIT    (2)
911                                         /* truncation initiated by rvm_truncate */
912 #define RVM_RECOVERY        (4)
913 #define RVM_TRUNCATE_CALL   (010)
914                                         /* truncation initiated by rvm daemon */
915 #define RVM_ASYNC_TRUNCATE  (020)
916                                         /* truncation forced by flush */
917 #define RVM_SYNC_TRUNCATE   (040)
918                                         /* truncation phase 1: find current log tail */
919 #define RVM_TRUNC_FIND_TAIL (0100)
920                                         /* phase 2: build modification trees */
921 #define RVM_TRUNC_BUILD_TREE (0200)
922                                         /* phase 3: apply modifications */
923 #define RVM_TRUNC_APPLY     (0400)
924                                         /* phase 4: update log status */
925 #define RVM_TRUNC_UPDATE    (01000)
926 
927 #define RVM_TRUNC_PHASES    (RVM_TRUNC_FIND_TAIL | RVM_TRUNC_BUILD_TREE \
928                              | RVM_TRUNC_APPLY | RVM_TRUNC_UPDATE)
929 
930 /* log recovery buffer descriptor -- single copy in log descriptor */
931 typedef struct
932     {
933     char            *buf;               /* working recovery buffer base */
934     char            *shadow_buf;
935     long            length;             /* length of allocated buffer */
936     rvm_offset_t    buf_len;            /* log buffer length as offset */
937     long            r_length;           /* length of data read into buffer */
938     rvm_offset_t    offset;             /* offset of buffer start in segment */
939     long            ptr;                /* index of present buffer position */
940     struct timeval  timestamp;          /* timestamp of transaction in buffer */
941 
942     char            *aux_buf;           /* working auxillary buffer base */
943     long            aux_length;         /* length of aux_buf */
944     rvm_offset_t    aux_offset;         /* offset of data in buffer */
945     long            aux_rlength;        /* length of data read into buffer */
946 
947     struct timeval  prev_timestamp;     /* timestamp of previous record */
948     rvm_length_t    prev_rec_num;       /* previous record number */
949     rvm_bool_t      prev_direction;     /* last scanning direction */
950     rvm_bool_t      split_ok;           /* ok to process split records */
951     }
952 log_buf_t;
953 
954 /* log buffer management defs */
955 
956 #define SYNCH       rvm_true            /* synchronization required */
957 #define NO_SYNCH    rvm_false           /* synchronization not required */
958 /* log truncation daemon control structures */
959 
960 typedef enum
961     {
962     rvm_idle = 1000,                    /* daemon idle */
963     init_truncate,                      /* initiate truncation */
964     truncating,                         /* truncation in progress */
965     terminate,                          /* shutdown */
966     error                               /* terminated due to error */
967     }
968 daemon_state_t;
969 
970 typedef struct
971     {
972     cthread_t       thread;             /* daemon thread handle */
973     RVM_MUTEX       lock;               /* daemon lock -- protects following
974                                            fields */
975     RVM_CONDITION   code;               /* condition code to signal daemon */
976     RVM_CONDITION   flush_flag;         /* condition code to signal flush */
977     RVM_CONDITION   wake_up;            /* conditon code to signal threads
978                                            waiting for truncation completion */
979     daemon_state_t  state;              /* control state */
980     long            truncate;           /* truncation threshold, as % of log */
981     }
982 log_daemon_t;
983 /* log descriptor */
984 typedef struct
985     {
986     list_entry_t    links;              /* list links and struct id -- points
987                                            to log list root */
988     long            ref_cnt;            /* count seg's using this log device */
989 
990     RVM_MUTEX       dev_lock;           /* log device lock, protects device and
991                                            following i/o related fields: */
992     device_t        dev;                /* log device descriptor */
993     log_status_t    status;             /* log status area descriptor */
994     trans_hdr_t     trans_hdr;          /* i/o header for transaction log entry */
995     rec_end_t       rec_end;            /* i/o end marker for log entry */
996     log_wrap_t      log_wrap;           /* i/o log wrap-around marker */
997     log_buf_t       log_buf;            /* log recovery buffer */
998                                         /* end of log_dev_lock protected fields */
999 
1000     RVM_MUTEX       tid_list_lock;      /* lock for tid list header & links
1001                                            used when adding/deleting a tid */
1002     list_entry_t    tid_list;           /* root of active transaction list */
1003 
1004     RVM_MUTEX       flush_list_lock;    /* lock for flush list header & links
1005                                            used to add/delete a no_flush tid */
1006     list_entry_t    flush_list;         /* list of no_flush committed tid's */
1007 
1008     RVM_MUTEX       special_list_lock;  /* lock for special list header & links
1009                                            used to add/delete a special entry */
1010     list_entry_t    special_list;       /* list of special log entries */
1011 
1012     rw_lock_t       flush_lock;         /* log flush synchronization */
1013     log_daemon_t    daemon;             /* truncation daemon control */
1014     RVM_MUTEX       truncation_lock;    /* truncation synchronization */
1015     cthread_t       trunc_thread;
1016     rvm_bool_t      in_recovery;        /* true if in recovery */
1017 
1018     struct seg_dict_s
1019                     *seg_dict_vec;      /* recovery segment dictionary */
1020     long            seg_dict_len;       /* length of seg_dict_vec */
1021     device_t        *cur_seg_dev;       /* current segment device in truncation */
1022     }
1023 log_t;
1024 /* segment descriptor: seg_t */
1025 typedef struct
1026     {
1027     list_entry_t    links;              /* list links and struct id */
1028 
1029     RVM_MUTEX       dev_lock;           /* device lock */
1030     device_t        dev;                /* segment device descriptor */
1031     long            seg_code;           /* short name for log entries */
1032     log_t           *log;               /* log descriptor ptr */
1033 
1034     RVM_MUTEX       seg_lock;           /* lock for seg lists: protects header
1035                                            and links -- used when mapping or
1036                                            unmapping a region */
1037     list_entry_t    map_list;           /* mapped region list header */
1038     list_entry_t    unmap_list;         /* unmapped region list header */
1039 
1040     rvm_bool_t      threads_waiting;    /* at least one thread is waiting to
1041                                            map a previously unmapped region */
1042     }
1043 seg_t;
1044 
1045 /* recovery dictionary segment descriptor: seg_dict_t */
1046 struct seg_dict_s
1047     {
1048     struct_id_t     struct_id;          /* self-identifier */
1049     seg_t           *seg;               /* ptr to real segment */
1050     device_t        dev;                /* used in recovery only */
1051     long            seg_code;           /* short segment id */
1052     tree_root_t     mod_tree;           /* modification tree for recovery */
1053     };
1054 
1055 typedef struct seg_dict_s seg_dict_t;
1056 
1057 #define SEG_DICT_INDEX(x)   ((x)-1)     /* index of segemnt in seg_dict_vec */
1058 /* region descriptor: region_t */
1059 typedef struct region_s
1060     {
1061     list_entry_t    links;              /* list links and struct id
1062                                            -- protected by seg.map_lock
1063                                               or seg.unmap_lock */
1064     rw_lock_t       region_lock;        /* rw lock for following fields */
1065     seg_t           *seg;               /* back ptr to segment */
1066     mem_region_t    *mem_region;        /* back ptr to region tree node */
1067     rvm_offset_t    offset;             /* offset of region base in segment */
1068     rvm_offset_t    end_offset;         /* offset of region end in segment */
1069     char            *vmaddr;            /* virtual memory base address */
1070     rvm_length_t    length;             /* length of region */
1071     rvm_bool_t      no_copy;            /* data not copied on map */
1072 
1073     RVM_MUTEX       count_lock;         /* accounting lock for next 2 fields */
1074     long            n_uncommit;         /* # uncommitted modifications in region */
1075     rvm_bool_t      dirty;              /* dirty bit; set by end_transaction */
1076 
1077     struct timeval  unmap_ts;           /* unmap timestamp for truncation chk */
1078     }
1079 region_t;
1080 
1081 /* modification range descriptor: range_t */
1082 typedef struct
1083     {
1084     tree_links_t    links;              /* tree links and struct id */
1085     char            *data;              /* old/new values, when used */
1086     rvm_length_t    data_len;           /* allocation length of data buffer */
1087     char            *nvaddr;            /* address of saved new values */
1088     region_t        *region;            /* back ptr to affected region */
1089     rvm_offset_t    end_offset;         /* end byte of range */
1090     nv_range_t      nv;                 /* nv range record header for i/o */
1091     }
1092 range_t;
1093 /* transaction id descriptor: int_tid_t */
1094 typedef struct
1095     {
1096     list_entry_t    links;              /* list links and struct id; protected
1097                                            by log tid_list_lock */
1098     rw_lock_t       tid_lock;           /* remaining fields protected by
1099                                            tid_lock until on flush list*/
1100     struct timeval  uname;              /* unique identifier */
1101     struct timeval  commit_stamp;       /* timestamp of commit */
1102     log_t           *log;               /* back link to log descriptor */
1103     rvm_offset_t    log_size;           /* log space required */
1104     tree_root_t     range_tree;         /* range tree root */
1105     range_t         **x_ranges;         /* vector of overlaping ranges */
1106     long            x_ranges_alloc;     /* allocated length of x_ranges */
1107     long            x_ranges_len;       /* current length of x_ranges */
1108     long            range_elim;         /* ranges eliminated by range coalesce */
1109     long            trans_elim;         /* ranges eliminated by trans coalesce */
1110     rvm_offset_t    range_overlap;      /* overlap eliminated by range coalesce */
1111     rvm_offset_t    trans_overlap;      /* overlap eliminated by trans coalesce */
1112     rvm_length_t    n_coalesced;        /* count of coalesced transactions */
1113     range_t         split_range;        /* extra range for flush */
1114     rvm_length_t    flags;              /* mode and optimization flags */
1115     rvm_length_t    back_link;          /* displacement to previous header */
1116     }
1117 int_tid_t;
1118 
1119 /* definitions for tid flags field (also used in trans_hdr flags field */
1120 #define RESTORE_FLAG        (2*RVM_COALESCE_TRANS)
1121 #define FLUSH_FLAG          (2*RESTORE_FLAG)
1122 #define FIRST_ENTRY_FLAG    (2*FLUSH_FLAG)
1123 #define LAST_ENTRY_FLAG     (2*FIRST_ENTRY_FLAG)
1124 #define FLUSH_MARK          (2*LAST_ENTRY_FLAG)
1125 
1126 #define TID(x)              ((tid->flags & (x)) != 0)
1127 #define TRANS_HDR(x)        ((trans_hdr->flags & (x)) != 0)
1128 /* functions and structures for managing list of RVM-allocated
1129      regions of memory (added by tilt, Nov 19 1996) */
1130 
1131 typedef struct rvm_page_entry {
1132     char                   *start;
1133     char                   *end;
1134     struct rvm_page_entry  *prev;
1135     struct rvm_page_entry  *next;
1136 } rvm_page_entry_t;
1137 
1138 rvm_bool_t rvm_register_page(char *vmaddr, rvm_length_t length);
1139 rvm_bool_t rvm_unregister_page(char *vmaddr, rvm_length_t length);
1140 /* list management functions */
1141 
1142 extern
1143 void init_list_header();                /* [rvm_utils.c] */
1144 /*  list_entry_t    *whichlist;
1145     struct_id_t     struct_id;
1146 */
1147 extern
1148 list_entry_t *move_list_entry();        /* [rvm_utils.c] */
1149 /*  register list_entry_t *fromptr;
1150     register list_entry_t *toptr;
1151     register list_entry_t *cell;
1152 */
1153 extern
1154 list_entry_t *alloc_list_entry();        /* [rvm_utils.c] */
1155 /*  struct_id_t     id; */
1156 
1157 /* internal type allocators/deallocators */
1158 
1159 extern
1160 void clear_free_list();                 /* [rvm_utils.c] */
1161 /*  struct_id_t     id; */
1162 
1163 extern
1164 region_t *make_region();                /* [rvm_utils.c] */
1165 
1166 extern
1167 void free_region();                     /* [rvm_utils.c] */
1168 /*  region_t        *region; */
1169 
1170 extern
1171 seg_t *make_seg();                      /* [rvm_utils.c] */
1172 /*  char            *seg_dev_name;
1173     rvm_return_t    *retval
1174 */
1175 extern
1176 void free_seg();                        /* [rvm_utils.c] */
1177 /*  seg_t           *seg; */
1178 
1179 extern
1180 void free_seg_dict_vec();               /* [rvm_utils.c] */
1181 /*  log_t           *log; */
1182 
1183 extern
1184 log_t *make_log();                      /* [rvm_utils.c] */
1185 /*  char            *dev_name;
1186     rvm_return_t    *retval
1187 */
1188 extern
1189 void free_log();                        /* [rvm_utils.c] */
1190 /*  log_t           *log; */
1191 
1192 extern
1193 char *make_full_name();                /* [rvm_utils.c] */
1194 /*  char            *dev_str;
1195     char            *dev_name;
1196     rvm_return_t    *retval;
1197 */
1198 extern
1199 void free_log();                        /* [rvm_utils.c] */
1200 /*  log_t           *log; */
1201 
1202 extern
1203 log_special_t *make_log_special();      /* [rvm_utils.c] */
1204 /*  struct_id_t     special_id;
1205     rvm_length_t    length;
1206 */
1207 extern
1208 void free_log_special();                /* [rvm_utils.c] */
1209 /*  log_special_t   *special; */
1210 extern
1211 rvm_return_t dev_init();                /* [rvm_utils.c] */
1212 /*  device_t        *dev;
1213     char            *dev_str;
1214 */
1215 extern
1216 range_t *make_range();                  /* [rvm_utils.c] */
1217 
1218 extern
1219 void free_range();                      /* [rvm_utils.c] */
1220 /*  range_t         *range; */
1221 
1222 extern
1223 int_tid_t *make_tid();                  /* [rvm_utils.c] */
1224 /*  rvm_mode_t      mode; */
1225 
1226 extern
1227 void free_tid();                        /* [rvm_utils.c] */
1228 /*  register int_tid_t  *tid; */
1229 
1230 extern
1231 mem_region_t *make_mem_region();        /* [rvm_utils.c] */
1232 
1233 extern
1234 void free_mem_region();                 /* [rvm_utils.c] */
1235 /*  mem_region_t   *node; */
1236 
1237 extern
1238 dev_region_t *make_dev_region();        /* [rvm_utils.c] */
1239 
1240 extern
1241 void free_dev_region();                 /* [rvm_utils.c] */
1242 /*  dev_region_t   *node; */
1243 /* log management functions */
1244 
1245 extern
1246 void init_log_list();                   /* [rvm_logstatus.c] */
1247 
1248 extern
1249 void enter_log();                       /* [rvm_logstatus.c] */
1250 /*  log_t           *log; */
1251 
1252 extern
1253 rvm_return_t open_log();                /* [rvm_logstatus.c] */
1254 /*  char            *dev_name;
1255     log_t           **log_ptr;
1256     char            *status_buf;
1257     rvm_options_t   *rvm_options;
1258 */
1259 extern
1260 rvm_return_t create_log();              /* [rvm_logstatus.c] */
1261 /*  log_t           **log_ptr;
1262     rvm_options_t   *rvm_options;
1263 */
1264 extern
1265 rvm_return_t do_log_options();          /* [rvm_logstatus.c] */
1266 /*  log_t           *log;
1267     rvm_options_t   *rvm_options;
1268 */
1269 extern
1270 rvm_return_t close_log();               /* [rvm_logstatus.c] */
1271 /*  log_t           *log; */
1272 
1273 extern
1274 rvm_return_t close_all_logs();          /* [rvm_logstatus.c] */
1275 
1276 extern
1277 void copy_log_stats();                  /* [rvm_logstatus.c] */
1278 /*  log_t           *log; */
1279 
1280 extern
1281 void clear_log_status();                /* [rvm_logstatus.c] */
1282 /*  log_t           *log; */
1283 
1284 extern
1285 rvm_return_t init_log_status();         /* [rvm_logstatus.c] */
1286 /*  log_t           *log; */
1287 extern
1288 rvm_return_t read_log_status();         /* [rvm_logstatus.c] */
1289 /*  log_t           *log;
1290     char            *status_buf;
1291 */
1292 extern
1293 rvm_return_t write_log_status();        /* [rvm_logstatus.c] */
1294 /*  log_t           *log;
1295     device_t        *dev;
1296 */
1297 extern
1298 rvm_return_t update_log_tail();         /* [rvm_logstatus.c] */
1299 /*  log_t           *log;
1300     rec_hdr_t       *rec_hdr;
1301 */
1302 extern
1303 void log_tail_length();                 /* [rvm_logstatus.c] */
1304 /*  log_t           *log;
1305     rvm_offset_t    *tail_length;
1306 */
1307 extern
1308 void log_tail_sngl_w();                 /* [rvm_logstatus.c] */
1309 /*  log_t           *log;
1310     rvm_offset_t    *tail_length;
1311 */
1312 extern
1313 long cur_log_percent();                 /* [rvm_logstatus.c] */
1314 /*  log_t           *log;
1315     rvm_offset_t    *space_nneded;
1316 */
1317 extern
1318 void cur_log_length();                  /* [rvm_logstatus.c] */
1319 /*  log_t           *log;
1320     rvm_offset_t    *length;
1321 */
1322 extern
1323 rvm_return_t queue_special();           /* [rvm_logflush.c] */
1324 /*  log_t           *log;
1325     log_special_t   *special;
1326 */
1327 extern
1328 rvm_return_t flush_log();               /* [rvm_logflush.c] */
1329 /*  log_t           *log;
1330     long            *count;
1331 */
1332 extern
1333 rvm_return_t locate_tail();             /* [rvm_logrecovr.c] */
1334 /*  log_t           *log; */
1335 
1336 extern
1337 rvm_return_t init_buffer();             /* [rvm_logrecovr.c] */
1338 /*  log_t           *log;
1339     rvm_offset_t    *offset;
1340     rvm_bool_t      direction;
1341     rvm_bool_t      synch;
1342 */
1343 extern
1344 void clear_aux_buf();                   /* [rvm_logrecovr.c] */
1345 /*  log_t           *log; */
1346 
1347 extern
1348 rvm_return_t load_aux_buf();            /* [rvm_logrecovr.c] */
1349 /*  log_t           *log;
1350     rvm_offset_t    *offset;
1351     rvm_length_t    length;
1352     rvm_length_t    *aux_ptr;
1353     rvm_length_t    *data_len;
1354     rvm_bool_t      direction;
1355     rvm_bool_t      synch;
1356 */
1357 extern
1358 void reset_hdr_chks();                  /* [rvm_logrecovr.c] */
1359 /*  log_t           *log; */
1360 
1361 extern
1362 rvm_bool_t chk_hdr_currency();          /* [rvm_logrecovr.c] */
1363 /*  log_t           *log;
1364     rec_hdr_t       *rec_hdr;
1365 */
1366 extern
1367 rvm_bool_t chk_hdr_sequence();          /* [rvm_logrecovr.c] */
1368 /*  log_t           *log;
1369     rec_hdr_t       *rec_hdr;
1370     rvm_bool_t      direction;
1371 */
1372 extern
1373 rvm_bool_t validate_hdr();              /* [rvm_logrecovr.c] */
1374 /*  log_t           *log;
1375     rec_hdr_t       *rec_hdr;
1376     rec_end_t       *rec_end;
1377     rvm_bool_t      direction;
1378 */
1379 extern
1380 rvm_return_t validate_rec_reverse();    /* [rvm_logrecovr.c] */
1381 /*  log_t           *log;
1382     rvm_bool_t      synch;
1383 */
1384 extern
1385 rvm_return_t scan_forward();            /* [rvm_logrecovr.c] */
1386 /*  log_t           *log;
1387     rvm_bool_t      synch;
1388 */
1389 extern
1390 rvm_return_t scan_reverse();            /* [rvm_logrecovr.c] */
1391 /*  log_t           *log;
1392     rvm_bool_t      synch;
1393 */
1394 extern
1395 rvm_return_t scan_nv_forward();         /* [rvm_logrecovr.c] */
1396 /*  log_t           *log;
1397     rvm_bool_t      synch;
1398 */
1399 extern
1400 rvm_return_t scan_wrap_reverse();       /* [rvm_logrecovr.c] */
1401 /*  log_t           *log;
1402     rvm_bool_t      synch;
1403 */
1404 extern
1405 rvm_bool_t initiate_truncation();       /* [rvm_logrecovr.c] */
1406 /*  log_t           *log;
1407     long            threshold;
1408 */
1409 extern
1410 rvm_return_t wait_for_truncation();     /* [rvm_logrecovr.c] */
1411 /*  log_t           *log;
1412     struct timeval  *time_stamp;
1413 */
1414 extern
1415 rvm_return_t log_recover();             /* [rvm_logrecovr.c] */
1416 /*  log_t           *log;
1417     long            *count;
1418     rvm_bool_t      is_daemon;
1419     rvm_length_t    flag;
1420 */
1421 extern
1422 void log_daemon();                      /* [rvm_logrecovr.c] */
1423 /*  log_t           *log; */
1424 
1425 extern
1426 rvm_return_t alloc_log_buf();           /* [rvm_logrecovr.c] */
1427 /*  log_t           *log; */
1428 
1429 extern
1430 void free_log_buf();                    /* [rvm_logrecovr.c] */
1431 /*  log_t           *log; */
1432 
1433 /* Segment & region management functions */
1434 /* [rvm_map.c] */
1435 void init_map_roots(void);
1436 rvm_return_t bad_region(rvm_region_t *rvm_region);
1437 char *page_alloc(rvm_length_t len);
1438 void page_free(char *vmaddr, rvm_length_t length);
1439 rvm_return_t close_all_segs(void);
1440 seg_t *seg_lookup(char *dev_name, rvm_return_t *retval);
1441 rvm_return_t define_all_segs(log_t *log);
1442 long dev_partial_include(rvm_offset_t *base1, rvm_offset_t *end1, rvm_offset_t *base2, rvm_offset_t *end2);
1443 long dev_total_include(rvm_offset_t *base1, rvm_offset_t *end1, rvm_offset_t *base2, rvm_offset_t *end2);
1444 long mem_total_include(tree_node_t *tnode1, tree_node_t *tnode2);
1445 region_t *find_whole_range(char *dest, rvm_length_t length, rw_lock_mode_t mode);
1446 rvm_return_t rvm_map(rvm_region_t *rvm_region, rvm_options_t *rvm_options);
1447 
1448 
1449 /* segment dictionary functions */
1450 extern
1451 rvm_return_t enter_seg_dict();          /* [rvm_logrecovr.c] */
1452 /*  log_t           *log;
1453     long            seg_code;
1454 */
1455 extern
1456 rvm_return_t def_seg_dict();            /* [rvm_logrecovr.c] */
1457 /*  log_t           *log;
1458     rec_hdr_t       *rec_hdr;
1459 */
1460 /* I/O functions */
1461 
1462 extern
1463 long open_dev();                        /* [rvm_io.c] */
1464 /*  device_t        *dev;
1465     long            flags;
1466     long            mode;
1467 */
1468 extern
1469 long close_dev();                       /* [rvm_io.c] */
1470 /*  device_t        *dev; */
1471 
1472 extern
1473 long read_dev();                        /* [rvm_io.c] */
1474 /*  device_t        *dev;
1475     rvm_offset_t    *offset;
1476     char            *dest;
1477     rvm_length_t    length;
1478 */
1479 extern
1480 long write_dev();                       /* [rvm_io.c] */
1481 /*  device_t        *dev;
1482     rvm_offset_t    *offset;
1483     char            *src;
1484     rvm_length_t    length;
1485     rvm_bool_t      no_sync;
1486 */
1487 extern
1488 long sync_dev();                        /* [rvm_io.c] */
1489 /*  device_t        *dev; */
1490 
1491 extern
1492 long gather_write_dev();                /* [rvm_io.c] */
1493 /*  device_t        *dev;
1494     rvm_offset_t    *offset;
1495     struct iovec    *iov;
1496     rvm_length_t    iovcnt;
1497 */
1498 
1499 /* length is optional [rvm_io.c] */
1500 extern long set_dev_char(device_t *dev,rvm_offset_t *dev_length);
1501 
1502 /* read/write lock */
1503 extern                                  /* [rvm_utils.c] */
1504 void rw_lock();
1505 /*  rw_lock_t       *rwl;
1506     rw_lock_mode_t  mode;
1507 */
1508 extern                                  /* [rvm_utils.c] */
1509 void rw_unlock();
1510 /*  rw_lock_t       *rwl;
1511     rw_lock_mode_t  mode;
1512 */
1513 extern                                  /* [rvm_utils.c] */
1514 void init_rw_lock();
1515 /*  rw_lock_t       *rwl; */
1516 
1517 extern
1518 void init_tree_root();                  /* [rvm_utils.c] */
1519 /*  tree_root_t     *root; */
1520 
1521 extern
1522 tree_node_t *tree_lookup();             /* [rvm_utils.c] */
1523 /*  tree_root_t     *tree;
1524     tree_node_t     *node;
1525     cmp_func_t      *cmp;
1526 */
1527 extern
1528 rvm_bool_t tree_insert();               /* [rvm_utils.c] */
1529 /*  tree_root_t     *tree;
1530     tree_node_t     *node;
1531     cmp_func_t      *cmp;
1532 */
1533 extern
1534 rvm_bool_t tree_delete();               /* [rvm_utils.c] */
1535 /*  tree_root_t     *tree;
1536     tree_node_t     *node;
1537     cmp_func_t      *cmp;
1538 */
1539 extern
1540 tree_node_t *init_tree_generator();     /* [rvm_utils.c] */
1541 /*  tree_root_t     *tree;
1542     rvm_bool_t      direction;
1543     rvm_bool_t      unlink;
1544 */
1545 extern
1546 tree_node_t *tree_iterate_insert();     /* [rvm_utils.c] */
1547 /*  tree_root_t     *tree;
1548     tree_node_t     *node;
1549     cmp_func_t      *cmp;
1550 */
1551 extern
1552 tree_node_t *tree_successor();          /* [rvm_utils.c] */
1553 /*  tree_root_t     *tree;
1554     rvm_bool_t      direction;
1555 */
1556 extern
1557 tree_node_t *tree_predecessor();        /* [rvm_utils.c] */
1558 /*  tree_root_t     *tree;
1559     rvm_bool_t      direction;
1560 */
1561 /* initialization, query, and structure checkers */
1562 
1563 extern rvm_bool_t bad_init();                  /* [rvm_init.c] */
1564 
1565 /* [rvm_status.c] */
1566 rvm_return_t bad_options(rvm_options_t *rvm_options, rvm_bool_t chk_log_dev);
1567 rvm_return_t bad_statistics(rvm_statistics_t *rvm_statistics);
1568 
1569 extern
1570 rvm_return_t bad_region();              /* [rvm_map.c] */
1571 /*   rvm_region_t   *rvm_region; */
1572 
1573 extern
1574 rvm_return_t bad_tid();                 /* [rvm_trans.c] */
1575 /*   rvm_tid_t      *rvm_tid; */
1576 
1577 extern
1578 rvm_return_t do_rvm_options();          /* [rvm_status.c] */
1579 /*  rvm_options_t   *rvm_options; */
1580 /* make unique name */
1581 extern                                  /* [rvm_utils.c] */
1582 void make_uname();
1583 /*  struct timeval  *time; */
1584 extern                                  /* [rvm_utils.c] */
1585 long init_unames();
1586 
1587 /* time value arithmetic */
1588 extern
1589 struct timeval add_times();             /* [rvm_utils.c] */
1590 /*  struct timeval  *x;
1591     struct timeval  *y;
1592 */
1593 extern
1594 struct timeval sub_times();             /* [rvm_utils.c] */
1595 /*  struct timeval  *x;
1596     struct timeval  *y;
1597 */
1598 extern
1599 long round_time();                      /* [rvm_utils.c] */
1600 /*  struct timeval  *x; */
1601 
1602 /* statistics gathering functions */
1603 extern
1604 void enter_histogram();                 /* [rvm_utils] */
1605 /*  long            val;
1606     long            *histo;
1607     long            *histo_def;
1608     long            length;
1609 */
1610 
1611 /* various initializers */
1612 extern
1613 void init_map_roots();                  /* [rvm_map.c] */
1614 
1615 extern
1616 long init_utils();                      /* [rvm_utils.c] */
1617 
1618 /* check summing and byte-aligned copy and pad functions */
1619 extern
1620 rvm_length_t chk_sum();                 /* rvm_utils.c */
1621 /*  char            *nvaddr;
1622     rvm_length_t    len;
1623 */
1624 extern
1625 void src_aligned_bcopy();               /* rvm_utils.c */
1626 /*  char            *src;
1627     char            *dest;
1628     rvm_length_t    len;
1629 */
1630 extern
1631 void dest_aligned_bcopy();               /* rvm_utils.c */
1632 /*  char            *src;
1633     char            *dest;
1634     rvm_length_t    len;
1635 */
1636 
1637 /*  offset arithmetic */
1638 extern
1639 rvm_offset_t rvm_rnd_offset_to_sector(); /* [rvm_utils.c] */
1640 /*  rvm_offset_t    *x; */
1641 
1642 /* debug support */
1643 extern
1644 void rvm_debug();                       /* [rvm_debug] */
1645 /*  rvm_length_t    val; */
1646 
1647 
1648 #endif /* _RVM_PRIVATE_ */
1649