1 /* BLURB lgpl
2 
3                            Coda File System
4                               Release 5
5 
6           Copyright (c) 1987-2016 Carnegie Mellon University
7                   Additional copyrights listed below
8 
9 This  code  is  distributed "AS IS" without warranty of any kind under
10 the  terms of the  GNU  Library General Public Licence  Version 2,  as
11 shown in the file LICENSE. The technical and financial contributors to
12 Coda are listed in the file CREDITS.
13 
14                         Additional copyrights
15                            none currently
16 
17 #*/
18 
19 /*
20 *
21 *                   RVM Mapping and Unmapping
22 *
23 */
24 
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28 
29 #ifdef __STDC__
30 #include <stdlib.h>
31 #else
32 #include <libc.h>
33 #endif
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <fcntl.h>
37 #include <sys/file.h>
38 #include <sys/mman.h>
39 #ifndef HAVE_GETPAGESIZE /* defined(__linux__) && defined(sparc) */
40 #include <asm/page.h>
41 #define getpagesize() PAGE_SIZE
42 #endif
43 #if defined(hpux) || defined(__hpux)
44 #include <hp_bsd.h>
45 #endif /* hpux */
46 #include <unistd.h>
47 #include <stdlib.h>
48 #include <errno.h>
49 #include "rvm_private.h"
50 
51 #ifdef __CYGWIN32__
52 #include <windows.h>
53 #endif
54 
55 /* global variables */
56 
57 extern log_t        *default_log;       /* default log descriptor ptr */
58 extern rvm_bool_t   rvm_no_update;      /* no segment or log update if true */
59 extern rvm_bool_t   rvm_map_private;    /* Do we want to map private? */
60 extern char         *rvm_errmsg;        /* internal error message buffer */
61 
62 /* root of segment list and region tree */
63 list_entry_t        seg_root;           /* global segment list */
64 rw_lock_t           seg_root_lock;      /* lock for segment list header & links */
65 
66 rw_lock_t           region_tree_lock;   /* lock for region tree */
67 tree_root_t         region_tree;        /* root of mapped region tree */
68 
69 list_entry_t        page_list;          /* list of usable pages */
70 RVM_MUTEX           page_list_lock;     /* lock for usable page list */
71 rvm_length_t        page_size;          /* system page size */
72 rvm_length_t        page_mask;          /* mask for rounding down to page size */
73 
74 /* locals */
75 static long         seg_code = 1;       /* segment short names */
76 static RVM_MUTEX    seg_code_lock;      /* lock for short names generator */
77 list_entry_t        page_list;          /* list of usable pages */
78 RVM_MUTEX           page_list_lock;     /* lock for usable page list */
79 
80 /* forward decl */
81 static rvm_page_entry_t *find_page_entry(char *vmaddr);
82 
83 /* basic page, segment lists and region tree initialization */
init_map_roots()84 void init_map_roots()
85 {
86     init_list_header(&seg_root,seg_id);
87     init_rw_lock(&seg_root_lock);
88     init_rw_lock(&region_tree_lock);
89     init_tree_root(&region_tree);
90     mutex_init(&seg_code_lock);
91 
92 #ifdef HAVE_MMAP
93     /* get page size */
94     page_size = (rvm_length_t)getpagesize();
95 #else
96     { SYSTEM_INFO nt_info;
97       GetSystemInfo(&nt_info);
98       page_size = (rvm_length_t)nt_info.dwAllocationGranularity;
99     }
100 #endif
101     page_mask = ~(page_size - 1);
102     mutex_init(&page_list_lock);
103     init_list_header(&page_list,free_page_id);
104 }
105 
106 /* check validity of rvm_region record & ptr */
bad_region(rvm_region)107 rvm_return_t bad_region(rvm_region)
108     rvm_region_t    *rvm_region;
109 {
110     if (rvm_region == NULL)
111         return RVM_EREGION;
112     if (rvm_region->struct_id != rvm_region_id)
113         return RVM_EREGION;
114 
115     if (rvm_region->data_dev != NULL)
116         if (strlen(rvm_region->data_dev) > (MAXPATHLEN-1))
117             return RVM_ENAME_TOO_LONG;
118 
119     return RVM_SUCCESS;
120 }
121 
122 #define PAGE_ALLOC_DEFINED
123 #include <sys/types.h>
124 #include <sys/mman.h>
125 #include "coda_mmap_anon.h"
126 
127 /*
128  * Page table management code
129  *
130  * This code is used by the page allocation code in RVM to track what
131  * regions of memory have been allocated for use in the persistent heap.
132  *
133  * In the original Mach specific code, this was gotten for "free" via
134  * a hack which called vm_allocate to reallocate the block in question.
135  * if the reallocation failed, the block had been allocated. if it
136  * succeeded, the block had not been allocated (and, since we had just
137  * allocated it, we quickly reallocated it and wiped the egg off of our
138  * faces).
139  *
140  * The original BSD44 port of this attempted to take advantage of the
141  * fact that if mmap() is called with the MAP_FIXED flag, it would
142  * attempt to allocate exactly the region of memory in question. Supposedly,
143  * if the region was already allocated, this mmap() call would fail.
144  *
145  * This solution turns out to be NOT CORRECT. Not only does BSD44 not
146  * perform in this fashion (it will deallocate whatever was there beforehand,
147  * silently), but there is another complication. If the application has
148  * allocated memory in that space, it could cause an erroneous result from
149  * the mem_chk() function. Since mmap() (if it behaved as originally beleived)
150  * would not be able to allocate the space, it would assume it is a mapped
151  * region. But, since it ISN'T a mapped region, just an allocated region,
152  * the result is incorrect.
153  *
154  * One more factor which complicates adding what would otherwise be a
155  * fairly straightforward list of allocated regions is that there are
156  * two places in RVM where memory is allocated. One is in the RVM
157  * library (page_alloc() and page_free(), both in rvm_map.c), and the
158  * other is in the SEG segment loader library (allocate_vm() and
159  * deallocate_vm(), both in rvm_segutil.c).
160  *
161  * --tilt, Nov 19 1996
162  */
163 
164 /* This is a doubly-linked list of allocated regions of memory. The regions
165    are stored in increasing order, so that once you have passed the area
166    where a questionable region has been stored, you can stop looking. */
167 rvm_page_entry_t *rvm_allocations      = NULL; /* allocated pages */
168 rvm_page_entry_t *rvm_allocations_tail = NULL; /* tail of list */
169 
170 /*
171  * rvm_register_page -- registers a page as being allocated.
172  *                      returns rvm_true if the page is successfully registered
173  *                      returns rvm_false is the page is already allocated
174  *
175  * TODO: should add optimization which coalesces page records.
176  *       should round end up to be at end of page boundary.
177  */
rvm_register_page(char * vmaddr,rvm_length_t length)178 rvm_bool_t rvm_register_page(char *vmaddr, rvm_length_t length)
179 {
180     rvm_page_entry_t *bookmark, *entry;
181     char *end = vmaddr + length - 1;
182 
183     if(rvm_allocations == NULL) {
184 	/* There are no other allocated pages, so this is the trivial case */
185 	entry = (rvm_page_entry_t *) malloc(sizeof(rvm_page_entry_t));
186 	assert(entry != NULL);
187 	entry->start    = vmaddr;
188 	entry->end      = end;
189 	entry->prev     = NULL;	 /* indicates beginning of list */
190 	entry->next     = NULL;	 /* indicates end of list */
191 	rvm_allocations      = entry; /* set this to be head of list */
192 	rvm_allocations_tail = entry; /* also set it to be the tail */
193 	return(rvm_true);
194     }
195 
196     /* XXX check if tail is before this region for "quick" verification */
197 
198     /* search through the rvm_allocations list to find either
199        a) where this should go, or b) a region which has already been
200        registered which contains this region. */
201 
202     bookmark = rvm_allocations;
203     while(bookmark != NULL) {
204 	/* check for various bad conditions: */
205 
206 	/* case one: the start of the new region falls within
207 	   a previously allocated region */
208 	if( (bookmark->start <= vmaddr) && (vmaddr <= bookmark->end) ) {
209 	    printf("Case one.\n");
210 	    return(rvm_false);
211 	}
212 
213 	/* case two: the end of the new region falls within
214 	   a previously allocated region */
215 	if ( (bookmark->start <= end)   && (end    <= bookmark->end) ) {
216 	    printf("Case two.\n");
217 	    return(rvm_false);
218 	}
219 
220 	/* case three: the new region spans a currently allocated region
221 	   (n.b.: the case where the new region is contained within a
222 	          currently allocated region is handled by case one) */
223 	if ( (vmaddr <= bookmark->start) && (bookmark->end <= end) ) {
224 	    printf("Case three.\n");
225 	    return(rvm_false);
226 	}
227 
228 	/* check to see if we're at the right place to insert this page.
229 	   we can do this by seeing if the end of the new region is
230 	   before the beginning of this one. if so, insert the new
231 	   region before the one we're currently looking at. */
232 	if(end < bookmark->start) {
233 	    entry = (rvm_page_entry_t *) malloc(sizeof(rvm_page_entry_t));
234 	    assert(entry != NULL);
235 	    entry->start    = vmaddr;
236 	    entry->end      = end;
237 	    entry->prev     = bookmark->prev; /* insert the new entry */
238 	    entry->next     = bookmark;	      /* between bookmark and */
239 	    if (bookmark->prev != NULL)
240 		bookmark->prev->next = entry;
241 	    else
242 		/* bookmark must be the head of the list */
243 		rvm_allocations = entry;
244 	    bookmark->prev  = entry;          /* the entry before bookmark */
245 	    return(rvm_true);
246 	}
247 
248 	/* if we're at the end, and we haven't tripped yet, we should
249 	   put the entry at the end */
250 	if(bookmark->next == NULL) {
251 	    entry = (rvm_page_entry_t *) malloc(sizeof(rvm_page_entry_t));
252 	    assert(entry != NULL);
253 	    entry->start    = vmaddr;
254 	    entry->end      = end;
255 	    entry->prev     = bookmark;       /* insert the new entry */
256 	    entry->next     = NULL;	      /* after bookmark */
257 	    bookmark->next  = entry;
258 	    rvm_allocations_tail = entry;     /* set the new tail */
259 	    return(rvm_true);
260 	} else {
261 	    bookmark = bookmark->next;
262 	}
263     } /* end while */
264 
265     /* we shouldn't be able to get here. */
266     assert(rvm_false);
267     return(rvm_false);
268 }
269 
270 /*
271  * rvm_unregister_page -- removes a previously registered page from the
272  *                        list of registered pages. returns true if the page is
273  *                        successfully unregistered; returns false if the
274  *                        page was not previously allocated.
275  */
rvm_unregister_page(char * vmaddr,rvm_length_t length)276 rvm_bool_t rvm_unregister_page(char *vmaddr, rvm_length_t length)
277 {
278     rvm_page_entry_t *entry, *previous_entry, *next_entry;
279 
280     entry = find_page_entry(vmaddr);
281     if(entry == NULL)
282 	return(rvm_false);
283 
284     if ( (entry->start != vmaddr) ||
285          (entry->end   != (vmaddr + length - 1)) ) {
286 	/* this isn't an exact match.
287 	   as long we don't do coalescing of region entries,
288 	   this means we should return false */
289 	return(rvm_false);
290     }
291 
292     /* if entry != NULL, we've found the page we're unregistering.
293        remove it from the list. */
294     previous_entry = entry->prev;
295     next_entry     = entry->next;
296 
297     /* set the entries before and after this one skip over this one */
298     if(previous_entry == NULL) {
299 	/* this is at the beginning of the list of allocated pages */
300 	rvm_allocations = next_entry;
301     } else {
302 	previous_entry->next = next_entry;
303     }
304 
305     if(next_entry != NULL)
306 	next_entry->prev = previous_entry;
307 
308     /* free this entry */
309     free(entry);
310 
311     return(rvm_true);
312 }
313 
314 /*
315  * find_page_entry -- this returns the first entry which contains
316  *                    the beginning of the requested region.
317  *                    these somewhat peculiar semantics allow
318  *                    us to support both rvm_unregister_page and
319  *                    chk_mem, which need slightly different things.
320  */
find_page_entry(char * vmaddr)321 static rvm_page_entry_t *find_page_entry(char *vmaddr)
322 {
323     rvm_page_entry_t *bookmark;
324 
325     bookmark = rvm_allocations;
326 
327     while(bookmark != NULL) {
328 	if( (bookmark->start <= vmaddr) && (vmaddr <= bookmark->end) ) {
329 	    return(bookmark);
330 	}
331 
332 	bookmark = bookmark->next;
333     }
334 
335     return(NULL);
336 }
337 
338 
339 /* BSD44 page allocator */
page_alloc(len)340 char *page_alloc(len)
341     rvm_length_t    len;
342     {
343     char           *vmaddr;
344     /* printf ("page_alloc(%ul)\n", len); */
345 #ifdef HAVE_MMAP
346     mmap_anon(vmaddr, NULL, len, PROT_READ | PROT_WRITE);
347 #else
348     {
349       HANDLE hMap = CreateFileMapping((HANDLE)0xFFFFFFFF, NULL,
350                                       PAGE_READWRITE, 0, len, NULL);
351       assert(hMap != NULL);
352       vmaddr = MapViewOfFile(hMap, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0);
353       assert(vmaddr != NULL);
354       CloseHandle(hMap);
355     }
356 #endif
357     if (vmaddr == (char *)-1)
358         {
359 	if (errno == ENOMEM)
360             {
361 	    vmaddr = NULL;
362 	    }
363 	else
364 	    {
365 	    assert(rvm_false);  /* Unknown error condition */
366    	    }
367         }
368 
369     /* modified by tilt, Nov 19 1996.
370        When we allocate a page (or range of pages) we register
371        it in an internal table we're keeping around to keep
372        track of pages. (The previous solution was to try to
373        re-allocate the page, and see if it fails, which is
374        not only wrong [since we don't if it's allocated, or
375        actually allocated in the RVM heap!!], but doesn't
376        work with mmap()). */
377     if (rvm_register_page(vmaddr, len) == rvm_false)
378         {
379 	assert(rvm_false);	/* Registering shouldn't have failed */
380 	}
381 
382     return vmaddr;
383     }
384 
385 
386 /* BSD44 page deallocator */
page_free(vmaddr,length)387 void page_free(vmaddr, length)
388     char            *vmaddr;
389     rvm_length_t     length;
390     {
391 #ifdef HAVE_MMAP
392 	if (munmap(vmaddr, length)) {
393 	    assert(0); /* should never fail */
394 	}
395 #else
396 	UnmapViewOfFile(vmaddr);
397 #endif
398 
399 	if (rvm_unregister_page(vmaddr, length) == rvm_false) {
400 	    assert(0); /* should never fail */
401 	}
402     }
403 
404 /*
405  * mem_chk -- verifies that the memory region in question
406  *            is actually addressable as part of RVM.
407  *            this means either that it is on the list,
408  *            or it is wholly contained by one or more list entries.
409  */
mem_chk(char * vmaddr,rvm_length_t length)410 static rvm_bool_t mem_chk(char *vmaddr, rvm_length_t length)
411 {
412     rvm_page_entry_t *entry;
413     char *start = vmaddr;
414     char *end   = vmaddr + length - 1;
415 
416     while(rvm_true) {
417 	entry = find_page_entry(start);
418 	if(entry == NULL)
419 	    return(rvm_false);
420 
421 	if(end <= entry->end)
422 	    return(rvm_true);
423 
424 	start = entry->end + 1;	/* XXX possible problems with
425 				       pages that aren't fully
426 				       allocated. burn that
427 				       bridge when we get to it. */
428     }
429 
430     assert(rvm_false);
431     return(rvm_false);		/* shouldn't be able to get here */
432 }
433 
434 /* segment short name generator */
make_seg_code()435 static long make_seg_code()
436 {
437 	long            retval;
438 
439 	CRITICAL(seg_code_lock,            /* begin seg_code_lock crit sec */
440 		 {
441                                         /* probably indivisible on CISC */
442 			 retval = seg_code++;            /* machines, but we can't RISC it, */
443 			 /* so we lock it... */
444 
445 		 });                             /* end seg_code_lock crit sec */
446 
447 	return retval;
448 }
449 
450 /* open segment device and set device characteristics */
open_seg_dev(seg,dev_length)451 static long open_seg_dev(seg,dev_length)
452     seg_t           *seg;               /* segment descriptor */
453     rvm_offset_t    *dev_length;        /* optional device length */
454     {
455     rvm_length_t    flags = O_RDWR;     /* device open flags */
456     long            retval;
457 
458     if (rvm_no_update) flags = O_RDONLY;
459     if ((retval=open_dev(&seg->dev,flags,0)) < 0)
460         return retval;
461     if ((retval=set_dev_char(&seg->dev,dev_length)) < 0)
462         close_dev(&seg->dev);
463 
464     return retval;
465     }
466 
close_seg_dev(seg)467 static long close_seg_dev(seg)
468     seg_t           *seg;               /* segment descriptor */
469     {
470 
471     return close_dev(&seg->dev);
472 
473     }
474 
475 /* close segment devices at termination time */
close_all_segs()476 rvm_return_t close_all_segs()
477     {
478     seg_t           *seg;               /* segment desriptor */
479     rvm_return_t    retval=RVM_SUCCESS; /* return value */
480 
481     RW_CRITICAL(seg_root_lock,w,        /* begin seg_root_lock crit section */
482         {
483         FOR_ENTRIES_OF(seg_root,seg_t,seg)
484             {
485             CRITICAL(seg->dev_lock,     /* begin seg->dev_lock crit section */
486                 {
487                 if (close_seg_dev(seg) < 0)
488                     retval = RVM_EIO;
489                 });                     /* end seg->dev_lock crit section */
490             if (retval != RVM_SUCCESS)
491                 break;
492             }
493         });                             /* end seg_root_lock crit section */
494 
495     return retval;
496     }
497 
498 /* segment lookup via device name */
seg_lookup(dev_name,retval)499 seg_t *seg_lookup(dev_name,retval)
500     char            *dev_name;          /* segment device name */
501     rvm_return_t    *retval;
502     {
503     char            full_name[MAXPATHLEN+1];
504     seg_t           *seg = NULL;
505 
506     /* get full path name for segment device */
507     (void)make_full_name(dev_name,full_name,retval);
508     if (*retval != RVM_SUCCESS)
509         return NULL;
510 
511     /* search segment list for full_name */
512     RW_CRITICAL(seg_root_lock,r,        /* begin seg_root_lock crit section */
513         {
514         FOR_ENTRIES_OF(seg_root,seg_t,seg)
515             if (!strcmp(seg->dev.name,full_name))
516                 break;                  /* found */
517         });                             /* end seg_root_lock crit section */
518 
519     if (!seg->links.is_hdr)
520         return seg;                     /* return found seg descriptor */
521     else
522         return NULL;
523     }
524 
525 /* enter segment short name definition in log */
define_seg(log,seg)526 static rvm_return_t define_seg(log,seg)
527     log_t           *log;               /* log descriptor */
528     seg_t           *seg;               /* segment descriptor */
529     {
530     log_seg_t       *log_seg;           /* special log segment entry */
531     log_special_t   *special;           /* allocation for log_seg */
532     long            name_len;           /* byte length of segment name */
533     rvm_return_t    retval;             /* return code */
534 
535     /* make segment definition record */
536     name_len = strlen(seg->dev.name);
537     special=make_log_special(log_seg_id,name_len+1);
538     if (special == NULL)
539         return RVM_ENO_MEMORY;          /* can't get descriptor */
540 
541     /* complete record and enter in log */
542     log_seg = &special->special.log_seg;
543     log_seg->seg_code = seg->seg_code;
544     log_seg->num_bytes = seg->dev.num_bytes;
545     log_seg->name_len = name_len;
546     (void)strcpy(log_seg->name,seg->dev.name);
547     if ((retval=queue_special(log,special)) != RVM_SUCCESS)
548         free_log_special(log_seg);
549 
550     return retval;
551     }
552 
553 /* write new segment dictionary entries for all segments */
define_all_segs(log)554 rvm_return_t define_all_segs(log)
555     log_t           *log;
556     {
557     seg_t           *seg;               /* segment descriptor */
558     rvm_return_t    retval = RVM_SUCCESS; /* return value */
559 
560     RW_CRITICAL(seg_root_lock,r,        /* begin seg_root_lock crit sec */
561         {
562         FOR_ENTRIES_OF(seg_root,seg_t,seg)
563             {
564             if ((retval=define_seg(log,seg)) != RVM_SUCCESS)
565                 break;
566             }
567         });                             /* end seg_root_lock crit sec */
568 
569     return retval;
570     }
571 
572 /* segment builder */
build_seg(rvm_region,log,retval)573 static seg_t *build_seg(rvm_region,log,retval)
574     rvm_region_t    *rvm_region;        /* segment's region descriptor */
575     log_t           *log;               /* log descriptor */
576     rvm_return_t    *retval;            /* ptr to return code */
577     {
578     seg_t           *seg;               /* new segment descriptor */
579 
580     /* build segment descriptor */
581     seg = make_seg(rvm_region->data_dev,retval);
582     if (*retval != RVM_SUCCESS)
583         goto err_exit;
584 
585     /* open device and set characteristics */
586     seg->log = log;
587     log->ref_cnt += 1;
588     if (open_seg_dev(seg,&rvm_region->dev_length) < 0)
589         {
590         *retval = RVM_EIO;
591         goto err_exit;
592         }
593 
594     /* raw devices require length */
595     if ((seg->dev.raw_io) &&
596         (RVM_OFFSET_EQL_ZERO(seg->dev.num_bytes)))
597         {
598         *retval = RVM_ENOT_MAPPED;
599         goto err_exit;
600         }
601 
602     /* define short name for log & queue log entry */
603     seg->seg_code = make_seg_code();
604     if ((*retval=define_seg(log,seg)) != RVM_SUCCESS)
605         goto err_exit;
606 
607     /* put segment on segment list */
608     RW_CRITICAL(seg_root_lock,w,        /* begin seg_root_lock crit sec */
609         {
610         (void)move_list_entry(NULL,&seg_root,seg);
611         });                             /* end seg_root_lock crit sec */
612     return seg;
613 
614 err_exit:
615     log->ref_cnt -= 1;                  /* log seg dict entry not */
616     if (seg != NULL) free_seg(seg);     /* deallocated since the seg_code is */
617     return NULL;                        /* unique -- to the log, it's just like */
618     }                                   /* a segment used read-only  */
619 
620 /* device region conflict comparator */
dev_partial_include(base1,end1,base2,end2)621 long dev_partial_include(base1,end1,base2,end2)
622     rvm_offset_t    *base1,*end1;
623     rvm_offset_t    *base2,*end2;
624     {
625     if (RVM_OFFSET_GEQ(*base1,*end2))
626         return 1;                       /* region1 above region2 */
627     if (RVM_OFFSET_LEQ(*end1,*base2))
628         return -1;                      /* region1 below region2 */
629 
630     return 0;                           /* regions at least partially overlap */
631     }
632 
633 /* device region within other region comparator */
dev_total_include(base1,end1,base2,end2)634 long dev_total_include(base1,end1,base2,end2)
635     rvm_offset_t    *base1,*end1;
636     rvm_offset_t    *base2,*end2;
637     {
638     if ((RVM_OFFSET_GEQ(*base1,*base2) && RVM_OFFSET_LEQ(*base1,*end2))
639         &&
640         (RVM_OFFSET_GEQ(*end1,*base2) && RVM_OFFSET_LEQ(*end1,*end2))
641         ) return 0;                     /* region1 included in region2 */
642     if (RVM_OFFSET_LSS(*base1,*base2))
643         return -1;                      /* region1 below region2, may overlap */
644 
645     return 1;                           /* region1 above region2, may overlap */
646     }
647 
648 /* vm range conflict comparator */
mem_partial_include(tnode1,tnode2)649 static long mem_partial_include(tnode1,tnode2)
650     tree_node_t     *tnode1;            /* range1 */
651     tree_node_t     *tnode2;            /* range2 */
652     {
653     rvm_length_t    addr1;              /* start of range 1 */
654     rvm_length_t    addr2;              /* start of range 2 */
655     rvm_length_t    end1;               /* end of range1 */
656     rvm_length_t    end2;               /* end of range2 */
657 
658     /* rebind types and compute end points */
659     addr1 = (rvm_length_t)(((mem_region_t *)tnode1)->vmaddr);
660     addr2 = (rvm_length_t)(((mem_region_t *)tnode2)->vmaddr);
661     end1 = addr1 + ((mem_region_t *)tnode1)->length - 1;
662     end2 = addr2 + ((mem_region_t *)tnode2)->length - 1;
663 
664     if (addr1 > end2) return 1;        /* range1 above range2 */
665     if (end1 < addr2) return -1;       /* range1 below range2 */
666     return 0;                          /* ranges at least partially overlap */
667     }
668 
669 /* vm range within other range comparator */
mem_total_include(tnode1,tnode2)670 long mem_total_include(tnode1,tnode2)
671     tree_node_t     *tnode1;            /* range1 */
672     tree_node_t     *tnode2;            /* range2 */
673     {
674     rvm_length_t    addr1;              /* start of range 1 */
675     rvm_length_t    addr2;              /* start of range 2 */
676     rvm_length_t    end1;               /* end of range1 */
677     rvm_length_t    end2;               /* end of range2 */
678 
679     /* rebind types and compute end points */
680     addr1 = (rvm_length_t)(((mem_region_t *)tnode1)->vmaddr);
681     addr2 = (rvm_length_t)(((mem_region_t *)tnode2)->vmaddr);
682     end1 = addr1 + ((mem_region_t *)tnode1)->length - 1;
683     end2 = addr2 + ((mem_region_t *)tnode2)->length - 1;
684 
685     if ((addr1 >= addr2) && (addr1 <= end2) && (end1 <= end2))
686         return 0;                       /* range1 included in range2 */
687 /* This test does not correspond to the comment, changed it. -JH */
688 //    if (end1 < addr2) return -1;        /* range1 below range2, may overlap */
689     if (addr1 < addr2) return -1;       /* range1 below range2, may overlap */
690     return 1;                           /* range1 above range2, may overlap */
691     }
692 
693 /* find and lock a region record iff vm range
694    entirely within a single mapped region
695    -- region tree is left lock if mode = w
696    -- used by transaction functions and unmap
697 */
find_whole_range(dest,length,mode)698 region_t *find_whole_range(dest,length,mode)
699     char            *dest;
700     rvm_length_t    length;
701     rw_lock_mode_t  mode;               /* lock mode for region descriptor */
702     {
703     mem_region_t    range;              /* dummy node for lookup */
704     mem_region_t    *node;              /* ptr to node found */
705     region_t        *region = NULL;     /* ptr to region for found node */
706 
707     range.vmaddr = dest;
708     range.length = length;
709     range.links.node.struct_id = mem_region_id;
710 
711     RW_CRITICAL(region_tree_lock,mode,  /* begin region_tree_lock crit sect */
712         {
713         node = (mem_region_t *)tree_lookup(&region_tree,
714                                           (tree_node_t *)&range,
715                                           mem_total_include);
716         if (node != NULL)
717             {
718             region = node->region;
719             if (region != NULL)
720                 {                       /* begin region_lock crit sect */
721                 rw_lock(&region->region_lock,mode); /* (ended by caller) */
722                 if (mode == w)          /* retain region_tree_lock */
723                     return region;      /* caller will unlock */
724                 }
725             }
726         });                             /* end region_tree_lock crit sect */
727 
728     return region;
729     }
730 /* apply mapping options, compute region size, and round to page size */
round_region(rvm_region,seg)731 static rvm_return_t round_region(rvm_region,seg)
732     rvm_region_t    *rvm_region;        /* user region specs [in/out] */
733     seg_t           *seg;               /* segment descriptor */
734     {
735     rvm_offset_t    big_len;
736 
737     /* see if region within segment */
738     if (RVM_OFFSET_GTR(rvm_region->offset,seg->dev.num_bytes))
739         return RVM_EOFFSET;
740     big_len = RVM_ADD_LENGTH_TO_OFFSET(rvm_region->offset,
741                                        rvm_region->length);
742     if (RVM_OFFSET_LSS(big_len,rvm_region->offset))
743         return RVM_EOFFSET;             /* overflow */
744 
745     /* round offset, length up and down to integral page size */
746     big_len = RVM_LENGTH_TO_OFFSET(ROUND_TO_PAGE_SIZE(
747                   RVM_OFFSET_TO_LENGTH(big_len)));
748     rvm_region->offset = RVM_MK_OFFSET(
749         RVM_OFFSET_HIGH_BITS_TO_LENGTH(rvm_region->offset),
750         CHOP_TO_PAGE_SIZE(RVM_OFFSET_TO_LENGTH(rvm_region->offset)));
751 
752     /* see if at end of segment */
753     if ((rvm_region->length == 0)
754         || RVM_OFFSET_GTR(big_len,seg->dev.num_bytes))
755         big_len = seg->dev.num_bytes;
756 
757     /* calculate actual length to map (only 32 bit lengths for now) */
758     big_len = RVM_SUB_OFFSETS(big_len,rvm_region->offset);
759     if (RVM_OFFSET_HIGH_BITS_TO_LENGTH(big_len) != 0)
760         return RVM_ERANGE;
761     rvm_region->length = RVM_OFFSET_TO_LENGTH(big_len);
762 
763     /* check page aligned buffer or allocate virtual memory region */
764     if (rvm_region->vmaddr != NULL)
765         {
766         if (rvm_region->vmaddr != (char *)
767                              CHOP_TO_PAGE_SIZE(rvm_region->vmaddr))
768             return RVM_ERANGE;          /* buffer not page aligned */
769         if (!mem_chk(rvm_region->vmaddr,rvm_region->length))
770             return RVM_ERANGE;          /* buffer not within task's vm */
771         }
772     else
773         {
774         rvm_region->vmaddr =
775             page_alloc(ROUND_TO_PAGE_SIZE(rvm_region->length));
776         if (rvm_region->vmaddr == NULL) return RVM_ENO_MEMORY;
777         }
778 
779     return RVM_SUCCESS;
780     }
781 
782 /* validate region and construct descriptors */
establish_range(rvm_region,region,mem_region,seg)783 static rvm_return_t establish_range(rvm_region,region,mem_region,seg)
784     rvm_region_t    *rvm_region;        /* user request region descriptor */
785     region_t        **region;           /* internal region descriptor [out]*/
786     mem_region_t    **mem_region;       /* region tree descriptor [out] */
787     seg_t           *seg;               /* segment ptr */
788     {
789     mem_region_t    *mem_node;
790     region_t        *new_region;
791     rvm_return_t    retval;
792 
793     /* get exact region size, address */
794     *region = NULL; *mem_region = NULL;
795     if ((retval=round_region(rvm_region,seg)) != RVM_SUCCESS)
796         return retval;
797 
798     /* build new region descriptor */
799     *region = new_region = make_region();
800     if (new_region == NULL) return RVM_ENO_MEMORY;
801     new_region->no_copy = rvm_region->no_copy;
802     new_region->offset = rvm_region->offset;
803     new_region->end_offset =
804         RVM_ADD_LENGTH_TO_OFFSET(rvm_region->offset,
805                                  rvm_region->length);
806 
807     /* build range tree node */
808     *mem_region = mem_node = make_mem_region();
809     if (mem_node == NULL) return RVM_ENO_MEMORY;
810     new_region->mem_region = mem_node;
811     mem_node->vmaddr = new_region->vmaddr = rvm_region->vmaddr;
812     mem_node->length = new_region->length
813                      = (rvm_length_t)rvm_region->length;
814     mem_node->region = NULL;
815 
816     /* put range tree node in tree to reserve range */
817     RW_CRITICAL(region_tree_lock,w,     /* begin region_tree_lock crit sect */
818         {
819         if (!tree_insert(&region_tree,(tree_node_t *)mem_node,
820                             mem_partial_include))
821             retval = RVM_EVM_OVERLAP;         /* vm range already mapped */
822         });                             /* end region_tree_lock crit sect */
823 
824     return retval;
825     }
826 
827 /* check for mapping dependencies on previously
828    mapped regions, or conflict with presently mapped region
829    -- caller provides list locking
830    returns true if dependency detected
831 */
chk_seg_mappings(chk_region,list_root)832 static region_t *chk_seg_mappings(chk_region,list_root)
833     region_t        *chk_region;        /* region descriptor to chk*/
834     list_entry_t    *list_root;         /* root of list to check */
835     {
836     region_t        *region;            /* internal region descriptor */
837 
838     FOR_ENTRIES_OF(*list_root,region_t,region)
839         {
840         /* test for overlap */
841         if (dev_partial_include(&chk_region->offset,
842                           &chk_region->end_offset,
843                           &region->offset,&region->end_offset
844                           ) == 0)
845             return region;              /* overlap */
846         }
847 
848     return NULL;
849     }
850 
851 /* check mapping dependencies within segment */
chk_dependencies(seg,region)852 static rvm_return_t chk_dependencies(seg,region)
853     seg_t           *seg;
854     region_t        *region;
855     {
856     region_t        *x_region;          /* conflicting or dependent region */
857     rvm_return_t    retval = RVM_SUCCESS;
858 
859     /* check for multiple mappings of same segment region */
860     CRITICAL(seg->seg_lock,            /* begin seg_lock crit sect */
861         {
862         if ((x_region=chk_seg_mappings(region,&seg->map_list))
863             == NULL)
864             {
865             /* enter region in map_list */
866             region->seg = seg;
867             (void)move_list_entry(NULL,&seg->map_list,
868                                   &region->links);
869 
870             /* check for overlap with modified and unmapped regions of segment
871                if found, must wait for truncation to get committed image of region */
872             DO_FOREVER
873                 if ((x_region=chk_seg_mappings(region,
874                                                &seg->unmap_list))
875                     != NULL)
876                     {
877                     (void)initiate_truncation(seg->log,100);
878                     if ((retval=wait_for_truncation(seg->log,
879                                                 &x_region->unmap_ts))
880                         != RVM_SUCCESS) goto err_exit;
881                     free_region(x_region); /* can free now */
882                     }
883                 else break;             /* no further dependencies */
884             }
885         else
886             retval = RVM_EOVERLAP;      /* multiply mapped */
887 err_exit:;
888         });                             /* end seg_lock crit sect */
889 
890     return retval;
891     }
892 
893 /* make data from segment available from mapped region */
map_data(rvm_options,region)894 static rvm_return_t map_data(rvm_options,region)
895     rvm_options_t   *rvm_options;
896     region_t        *region;
897     {
898     seg_t           *seg = region->seg;
899     rvm_return_t    retval = RVM_SUCCESS;
900 #if defined(__NetBSD__) || defined(__FreeBSD__)
901     char            *addr;
902 #endif
903     /* check for pager mapping */
904     if (rvm_options != NULL)
905         if (rvm_options->pager != NULL)
906             {
907             /* external pager interface not implemented yet */
908             return RVM_EPAGER;
909             }
910 
911 #if defined(__NetBSD__) || defined(__FreeBSD__)
912 		/* NetBSD has a kernel bug that will panic if we
913 		   try to read from a raw device and copy it to address
914 		   on or above 0x10400000.  This is known to be a problem
915 		   with vm_fault() of NetBSD kernel that panics when it
916 		   finds that the pte (page directory table entry) does
917 		   not exist in page dir table (instead of trying to
918 		   create it). Before that is fixed, we work around it
919 		   by manually touching one byte of address space of
920 		   every pte's that we'll need.  This will get the pte
921 		   created and we'll be fine.  This is proposed by rvb.
922 		     -- clement */
923 		if (seg->dev.raw_io) {
924 		    for (addr=region->vmaddr;
925 			 addr < ( (region->vmaddr)+(region->length) );
926 			 addr+=0x400000) { /* each pte is for 0x400000 of vm */
927 			*addr = 0; /* this will force kernel to create
928 				   the pte*/
929 		    }
930 		}
931 #endif /* __BSD44__ */
932     /* read data directly from segment */
933     if (!region->no_copy)
934         CRITICAL(seg->dev_lock,
935             {
936             if (read_dev(&seg->dev,&region->offset,
937                          region->vmaddr,region->length) < 0)
938                 retval = RVM_EIO;
939             });
940 
941     return retval;
942     }
943 
944 /* error exit cleanup */
clean_up(region,mem_region)945 static void clean_up(region,mem_region)
946     region_t        *region;
947     mem_region_t    *mem_region;
948 {
949     seg_t           *seg;
950 
951     /* kill region descriptor if created */
952     if (region != NULL)
953         {
954         seg = region->seg;
955         if (seg != NULL)
956             CRITICAL(seg->seg_lock,
957                 {
958                 (void)move_list_entry(&seg->map_list,NULL,
959                                        &region->links);
960                 });
961         free_region(region);
962         }
963 
964     /* kill region tree node if created */
965     if (mem_region != NULL)
966         {
967         RW_CRITICAL(region_tree_lock,w,
968             {
969             (void)tree_delete(&region_tree,(tree_node_t *)mem_region,
970                               mem_partial_include);
971             });
972         free_mem_region(mem_region);
973         }
974 }
975 
976 /* rvm_map */
rvm_map(rvm_region_t * rvm_region,rvm_options_t * rvm_options)977 rvm_return_t rvm_map(rvm_region_t *rvm_region, rvm_options_t *rvm_options)
978 {
979     seg_t               *seg;              /* segment descriptor */
980     region_t            *region = NULL;    /* new region descriptor */
981     mem_region_t        *mem_region= NULL; /* new region's tree node */
982     rvm_return_t        retval;
983     rvm_region_t        save_rvm_region;
984     int fd;				   /* For private mappings */
985     void *addr;
986 
987     /* preliminary checks & saves */
988     if (bad_init()) return RVM_EINIT;
989     if ((retval=bad_region(rvm_region)) != RVM_SUCCESS)
990         return retval;
991     if (rvm_options != NULL)
992         if ((retval=do_rvm_options(rvm_options)) != RVM_SUCCESS)
993             return retval;
994     if (default_log == NULL) return RVM_ELOG;
995     (void)BCOPY((char *)rvm_region,(char *)&save_rvm_region,
996                sizeof(rvm_region_t));
997 
998     /* find or build segment */
999     seg = seg_lookup(rvm_region->data_dev,&retval);
1000     if (retval != RVM_SUCCESS) goto err_exit;
1001     if (seg == NULL)
1002         {                               /* must build a new segment */
1003         if ((seg=build_seg(rvm_region,default_log,&retval))
1004             == NULL) goto err_exit;
1005         }
1006     else
1007         /* test if segment closed by earlier (failing) rvm_terminate */
1008         if (seg->dev.handle == 0) return RVM_EIO;
1009 
1010     /* check for vm overlap with existing mappings & build descriptors */
1011     if ((retval = establish_range(rvm_region,&region,&mem_region,seg))
1012                 != RVM_SUCCESS)
1013         goto err_exit;
1014 
1015     /* check for overlap with existing mappings in segment, check
1016        for truncation dependencies, and enter region in map_list */
1017     if ((retval=chk_dependencies(seg,region)) != RVM_SUCCESS)
1018         goto err_exit;
1019 
1020     /* Do the private map or get the data from the segment */
1021     if (rvm_map_private) {
1022 	fd = open(rvm_region->data_dev, O_RDONLY | O_BINARY);
1023 	if ( fd < 0 ) {
1024 	    retval = RVM_EIO;
1025 	    goto err_exit;
1026 	}
1027 	addr = mmap(rvm_region->vmaddr, rvm_region->length,
1028 		    PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE,
1029 		    fd, region->offset.low);
1030 	if (!rvm_region->vmaddr)
1031 	    rvm_region->vmaddr = addr;
1032 
1033 	if (addr != rvm_region->vmaddr) {
1034 	    retval = RVM_ENOT_MAPPED;
1035 	    goto err_exit;
1036 	}
1037 	if (close(fd)) {
1038 	    retval = RVM_EIO;
1039 	    goto err_exit;
1040 	}
1041     } else {
1042         /* get the data from the segment */
1043         if ((retval = map_data(rvm_options,region)) != RVM_SUCCESS) {
1044             rvm_region->length = 0;
1045 	    goto err_exit;
1046 	}
1047     }
1048 
1049     /* complete region tree node and exit*/
1050     mem_region->region = region;
1051     return RVM_SUCCESS;
1052 
1053   err_exit:
1054     clean_up(region,mem_region);
1055     (void)BCOPY((char *)&save_rvm_region,(char *)rvm_region,
1056                sizeof(rvm_region_t));
1057     return retval;
1058 }
1059