1 /* BLURB lgpl
2
3 Coda File System
4 Release 5
5
6 Copyright (c) 1987-2016 Carnegie Mellon University
7 Additional copyrights listed below
8
9 This code is distributed "AS IS" without warranty of any kind under
10 the terms of the GNU Library General Public Licence Version 2, as
11 shown in the file LICENSE. The technical and financial contributors to
12 Coda are listed in the file CREDITS.
13
14 Additional copyrights
15 none currently
16
17 #*/
18
19 /*
20 *
21 * RVM Mapping and Unmapping
22 *
23 */
24
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28
29 #ifdef __STDC__
30 #include <stdlib.h>
31 #else
32 #include <libc.h>
33 #endif
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <fcntl.h>
37 #include <sys/file.h>
38 #include <sys/mman.h>
39 #ifndef HAVE_GETPAGESIZE /* defined(__linux__) && defined(sparc) */
40 #include <asm/page.h>
41 #define getpagesize() PAGE_SIZE
42 #endif
43 #if defined(hpux) || defined(__hpux)
44 #include <hp_bsd.h>
45 #endif /* hpux */
46 #include <unistd.h>
47 #include <stdlib.h>
48 #include <errno.h>
49 #include "rvm_private.h"
50
51 #ifdef __CYGWIN32__
52 #include <windows.h>
53 #endif
54
55 /* global variables */
56
57 extern log_t *default_log; /* default log descriptor ptr */
58 extern rvm_bool_t rvm_no_update; /* no segment or log update if true */
59 extern rvm_bool_t rvm_map_private; /* Do we want to map private? */
60 extern char *rvm_errmsg; /* internal error message buffer */
61
62 /* root of segment list and region tree */
63 list_entry_t seg_root; /* global segment list */
64 rw_lock_t seg_root_lock; /* lock for segment list header & links */
65
66 rw_lock_t region_tree_lock; /* lock for region tree */
67 tree_root_t region_tree; /* root of mapped region tree */
68
69 list_entry_t page_list; /* list of usable pages */
70 RVM_MUTEX page_list_lock; /* lock for usable page list */
71 rvm_length_t page_size; /* system page size */
72 rvm_length_t page_mask; /* mask for rounding down to page size */
73
74 /* locals */
75 static long seg_code = 1; /* segment short names */
76 static RVM_MUTEX seg_code_lock; /* lock for short names generator */
77 list_entry_t page_list; /* list of usable pages */
78 RVM_MUTEX page_list_lock; /* lock for usable page list */
79
80 /* forward decl */
81 static rvm_page_entry_t *find_page_entry(char *vmaddr);
82
83 /* basic page, segment lists and region tree initialization */
init_map_roots()84 void init_map_roots()
85 {
86 init_list_header(&seg_root,seg_id);
87 init_rw_lock(&seg_root_lock);
88 init_rw_lock(®ion_tree_lock);
89 init_tree_root(®ion_tree);
90 mutex_init(&seg_code_lock);
91
92 #ifdef HAVE_MMAP
93 /* get page size */
94 page_size = (rvm_length_t)getpagesize();
95 #else
96 { SYSTEM_INFO nt_info;
97 GetSystemInfo(&nt_info);
98 page_size = (rvm_length_t)nt_info.dwAllocationGranularity;
99 }
100 #endif
101 page_mask = ~(page_size - 1);
102 mutex_init(&page_list_lock);
103 init_list_header(&page_list,free_page_id);
104 }
105
106 /* check validity of rvm_region record & ptr */
bad_region(rvm_region)107 rvm_return_t bad_region(rvm_region)
108 rvm_region_t *rvm_region;
109 {
110 if (rvm_region == NULL)
111 return RVM_EREGION;
112 if (rvm_region->struct_id != rvm_region_id)
113 return RVM_EREGION;
114
115 if (rvm_region->data_dev != NULL)
116 if (strlen(rvm_region->data_dev) > (MAXPATHLEN-1))
117 return RVM_ENAME_TOO_LONG;
118
119 return RVM_SUCCESS;
120 }
121
122 #define PAGE_ALLOC_DEFINED
123 #include <sys/types.h>
124 #include <sys/mman.h>
125 #include "coda_mmap_anon.h"
126
127 /*
128 * Page table management code
129 *
130 * This code is used by the page allocation code in RVM to track what
131 * regions of memory have been allocated for use in the persistent heap.
132 *
133 * In the original Mach specific code, this was gotten for "free" via
134 * a hack which called vm_allocate to reallocate the block in question.
135 * if the reallocation failed, the block had been allocated. if it
136 * succeeded, the block had not been allocated (and, since we had just
137 * allocated it, we quickly reallocated it and wiped the egg off of our
138 * faces).
139 *
140 * The original BSD44 port of this attempted to take advantage of the
141 * fact that if mmap() is called with the MAP_FIXED flag, it would
142 * attempt to allocate exactly the region of memory in question. Supposedly,
143 * if the region was already allocated, this mmap() call would fail.
144 *
145 * This solution turns out to be NOT CORRECT. Not only does BSD44 not
146 * perform in this fashion (it will deallocate whatever was there beforehand,
147 * silently), but there is another complication. If the application has
148 * allocated memory in that space, it could cause an erroneous result from
149 * the mem_chk() function. Since mmap() (if it behaved as originally beleived)
150 * would not be able to allocate the space, it would assume it is a mapped
151 * region. But, since it ISN'T a mapped region, just an allocated region,
152 * the result is incorrect.
153 *
154 * One more factor which complicates adding what would otherwise be a
155 * fairly straightforward list of allocated regions is that there are
156 * two places in RVM where memory is allocated. One is in the RVM
157 * library (page_alloc() and page_free(), both in rvm_map.c), and the
158 * other is in the SEG segment loader library (allocate_vm() and
159 * deallocate_vm(), both in rvm_segutil.c).
160 *
161 * --tilt, Nov 19 1996
162 */
163
164 /* This is a doubly-linked list of allocated regions of memory. The regions
165 are stored in increasing order, so that once you have passed the area
166 where a questionable region has been stored, you can stop looking. */
167 rvm_page_entry_t *rvm_allocations = NULL; /* allocated pages */
168 rvm_page_entry_t *rvm_allocations_tail = NULL; /* tail of list */
169
170 /*
171 * rvm_register_page -- registers a page as being allocated.
172 * returns rvm_true if the page is successfully registered
173 * returns rvm_false is the page is already allocated
174 *
175 * TODO: should add optimization which coalesces page records.
176 * should round end up to be at end of page boundary.
177 */
rvm_register_page(char * vmaddr,rvm_length_t length)178 rvm_bool_t rvm_register_page(char *vmaddr, rvm_length_t length)
179 {
180 rvm_page_entry_t *bookmark, *entry;
181 char *end = vmaddr + length - 1;
182
183 if(rvm_allocations == NULL) {
184 /* There are no other allocated pages, so this is the trivial case */
185 entry = (rvm_page_entry_t *) malloc(sizeof(rvm_page_entry_t));
186 assert(entry != NULL);
187 entry->start = vmaddr;
188 entry->end = end;
189 entry->prev = NULL; /* indicates beginning of list */
190 entry->next = NULL; /* indicates end of list */
191 rvm_allocations = entry; /* set this to be head of list */
192 rvm_allocations_tail = entry; /* also set it to be the tail */
193 return(rvm_true);
194 }
195
196 /* XXX check if tail is before this region for "quick" verification */
197
198 /* search through the rvm_allocations list to find either
199 a) where this should go, or b) a region which has already been
200 registered which contains this region. */
201
202 bookmark = rvm_allocations;
203 while(bookmark != NULL) {
204 /* check for various bad conditions: */
205
206 /* case one: the start of the new region falls within
207 a previously allocated region */
208 if( (bookmark->start <= vmaddr) && (vmaddr <= bookmark->end) ) {
209 printf("Case one.\n");
210 return(rvm_false);
211 }
212
213 /* case two: the end of the new region falls within
214 a previously allocated region */
215 if ( (bookmark->start <= end) && (end <= bookmark->end) ) {
216 printf("Case two.\n");
217 return(rvm_false);
218 }
219
220 /* case three: the new region spans a currently allocated region
221 (n.b.: the case where the new region is contained within a
222 currently allocated region is handled by case one) */
223 if ( (vmaddr <= bookmark->start) && (bookmark->end <= end) ) {
224 printf("Case three.\n");
225 return(rvm_false);
226 }
227
228 /* check to see if we're at the right place to insert this page.
229 we can do this by seeing if the end of the new region is
230 before the beginning of this one. if so, insert the new
231 region before the one we're currently looking at. */
232 if(end < bookmark->start) {
233 entry = (rvm_page_entry_t *) malloc(sizeof(rvm_page_entry_t));
234 assert(entry != NULL);
235 entry->start = vmaddr;
236 entry->end = end;
237 entry->prev = bookmark->prev; /* insert the new entry */
238 entry->next = bookmark; /* between bookmark and */
239 if (bookmark->prev != NULL)
240 bookmark->prev->next = entry;
241 else
242 /* bookmark must be the head of the list */
243 rvm_allocations = entry;
244 bookmark->prev = entry; /* the entry before bookmark */
245 return(rvm_true);
246 }
247
248 /* if we're at the end, and we haven't tripped yet, we should
249 put the entry at the end */
250 if(bookmark->next == NULL) {
251 entry = (rvm_page_entry_t *) malloc(sizeof(rvm_page_entry_t));
252 assert(entry != NULL);
253 entry->start = vmaddr;
254 entry->end = end;
255 entry->prev = bookmark; /* insert the new entry */
256 entry->next = NULL; /* after bookmark */
257 bookmark->next = entry;
258 rvm_allocations_tail = entry; /* set the new tail */
259 return(rvm_true);
260 } else {
261 bookmark = bookmark->next;
262 }
263 } /* end while */
264
265 /* we shouldn't be able to get here. */
266 assert(rvm_false);
267 return(rvm_false);
268 }
269
270 /*
271 * rvm_unregister_page -- removes a previously registered page from the
272 * list of registered pages. returns true if the page is
273 * successfully unregistered; returns false if the
274 * page was not previously allocated.
275 */
rvm_unregister_page(char * vmaddr,rvm_length_t length)276 rvm_bool_t rvm_unregister_page(char *vmaddr, rvm_length_t length)
277 {
278 rvm_page_entry_t *entry, *previous_entry, *next_entry;
279
280 entry = find_page_entry(vmaddr);
281 if(entry == NULL)
282 return(rvm_false);
283
284 if ( (entry->start != vmaddr) ||
285 (entry->end != (vmaddr + length - 1)) ) {
286 /* this isn't an exact match.
287 as long we don't do coalescing of region entries,
288 this means we should return false */
289 return(rvm_false);
290 }
291
292 /* if entry != NULL, we've found the page we're unregistering.
293 remove it from the list. */
294 previous_entry = entry->prev;
295 next_entry = entry->next;
296
297 /* set the entries before and after this one skip over this one */
298 if(previous_entry == NULL) {
299 /* this is at the beginning of the list of allocated pages */
300 rvm_allocations = next_entry;
301 } else {
302 previous_entry->next = next_entry;
303 }
304
305 if(next_entry != NULL)
306 next_entry->prev = previous_entry;
307
308 /* free this entry */
309 free(entry);
310
311 return(rvm_true);
312 }
313
314 /*
315 * find_page_entry -- this returns the first entry which contains
316 * the beginning of the requested region.
317 * these somewhat peculiar semantics allow
318 * us to support both rvm_unregister_page and
319 * chk_mem, which need slightly different things.
320 */
find_page_entry(char * vmaddr)321 static rvm_page_entry_t *find_page_entry(char *vmaddr)
322 {
323 rvm_page_entry_t *bookmark;
324
325 bookmark = rvm_allocations;
326
327 while(bookmark != NULL) {
328 if( (bookmark->start <= vmaddr) && (vmaddr <= bookmark->end) ) {
329 return(bookmark);
330 }
331
332 bookmark = bookmark->next;
333 }
334
335 return(NULL);
336 }
337
338
339 /* BSD44 page allocator */
page_alloc(len)340 char *page_alloc(len)
341 rvm_length_t len;
342 {
343 char *vmaddr;
344 /* printf ("page_alloc(%ul)\n", len); */
345 #ifdef HAVE_MMAP
346 mmap_anon(vmaddr, NULL, len, PROT_READ | PROT_WRITE);
347 #else
348 {
349 HANDLE hMap = CreateFileMapping((HANDLE)0xFFFFFFFF, NULL,
350 PAGE_READWRITE, 0, len, NULL);
351 assert(hMap != NULL);
352 vmaddr = MapViewOfFile(hMap, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0);
353 assert(vmaddr != NULL);
354 CloseHandle(hMap);
355 }
356 #endif
357 if (vmaddr == (char *)-1)
358 {
359 if (errno == ENOMEM)
360 {
361 vmaddr = NULL;
362 }
363 else
364 {
365 assert(rvm_false); /* Unknown error condition */
366 }
367 }
368
369 /* modified by tilt, Nov 19 1996.
370 When we allocate a page (or range of pages) we register
371 it in an internal table we're keeping around to keep
372 track of pages. (The previous solution was to try to
373 re-allocate the page, and see if it fails, which is
374 not only wrong [since we don't if it's allocated, or
375 actually allocated in the RVM heap!!], but doesn't
376 work with mmap()). */
377 if (rvm_register_page(vmaddr, len) == rvm_false)
378 {
379 assert(rvm_false); /* Registering shouldn't have failed */
380 }
381
382 return vmaddr;
383 }
384
385
386 /* BSD44 page deallocator */
page_free(vmaddr,length)387 void page_free(vmaddr, length)
388 char *vmaddr;
389 rvm_length_t length;
390 {
391 #ifdef HAVE_MMAP
392 if (munmap(vmaddr, length)) {
393 assert(0); /* should never fail */
394 }
395 #else
396 UnmapViewOfFile(vmaddr);
397 #endif
398
399 if (rvm_unregister_page(vmaddr, length) == rvm_false) {
400 assert(0); /* should never fail */
401 }
402 }
403
404 /*
405 * mem_chk -- verifies that the memory region in question
406 * is actually addressable as part of RVM.
407 * this means either that it is on the list,
408 * or it is wholly contained by one or more list entries.
409 */
mem_chk(char * vmaddr,rvm_length_t length)410 static rvm_bool_t mem_chk(char *vmaddr, rvm_length_t length)
411 {
412 rvm_page_entry_t *entry;
413 char *start = vmaddr;
414 char *end = vmaddr + length - 1;
415
416 while(rvm_true) {
417 entry = find_page_entry(start);
418 if(entry == NULL)
419 return(rvm_false);
420
421 if(end <= entry->end)
422 return(rvm_true);
423
424 start = entry->end + 1; /* XXX possible problems with
425 pages that aren't fully
426 allocated. burn that
427 bridge when we get to it. */
428 }
429
430 assert(rvm_false);
431 return(rvm_false); /* shouldn't be able to get here */
432 }
433
434 /* segment short name generator */
make_seg_code()435 static long make_seg_code()
436 {
437 long retval;
438
439 CRITICAL(seg_code_lock, /* begin seg_code_lock crit sec */
440 {
441 /* probably indivisible on CISC */
442 retval = seg_code++; /* machines, but we can't RISC it, */
443 /* so we lock it... */
444
445 }); /* end seg_code_lock crit sec */
446
447 return retval;
448 }
449
450 /* open segment device and set device characteristics */
open_seg_dev(seg,dev_length)451 static long open_seg_dev(seg,dev_length)
452 seg_t *seg; /* segment descriptor */
453 rvm_offset_t *dev_length; /* optional device length */
454 {
455 rvm_length_t flags = O_RDWR; /* device open flags */
456 long retval;
457
458 if (rvm_no_update) flags = O_RDONLY;
459 if ((retval=open_dev(&seg->dev,flags,0)) < 0)
460 return retval;
461 if ((retval=set_dev_char(&seg->dev,dev_length)) < 0)
462 close_dev(&seg->dev);
463
464 return retval;
465 }
466
close_seg_dev(seg)467 static long close_seg_dev(seg)
468 seg_t *seg; /* segment descriptor */
469 {
470
471 return close_dev(&seg->dev);
472
473 }
474
475 /* close segment devices at termination time */
close_all_segs()476 rvm_return_t close_all_segs()
477 {
478 seg_t *seg; /* segment desriptor */
479 rvm_return_t retval=RVM_SUCCESS; /* return value */
480
481 RW_CRITICAL(seg_root_lock,w, /* begin seg_root_lock crit section */
482 {
483 FOR_ENTRIES_OF(seg_root,seg_t,seg)
484 {
485 CRITICAL(seg->dev_lock, /* begin seg->dev_lock crit section */
486 {
487 if (close_seg_dev(seg) < 0)
488 retval = RVM_EIO;
489 }); /* end seg->dev_lock crit section */
490 if (retval != RVM_SUCCESS)
491 break;
492 }
493 }); /* end seg_root_lock crit section */
494
495 return retval;
496 }
497
498 /* segment lookup via device name */
seg_lookup(dev_name,retval)499 seg_t *seg_lookup(dev_name,retval)
500 char *dev_name; /* segment device name */
501 rvm_return_t *retval;
502 {
503 char full_name[MAXPATHLEN+1];
504 seg_t *seg = NULL;
505
506 /* get full path name for segment device */
507 (void)make_full_name(dev_name,full_name,retval);
508 if (*retval != RVM_SUCCESS)
509 return NULL;
510
511 /* search segment list for full_name */
512 RW_CRITICAL(seg_root_lock,r, /* begin seg_root_lock crit section */
513 {
514 FOR_ENTRIES_OF(seg_root,seg_t,seg)
515 if (!strcmp(seg->dev.name,full_name))
516 break; /* found */
517 }); /* end seg_root_lock crit section */
518
519 if (!seg->links.is_hdr)
520 return seg; /* return found seg descriptor */
521 else
522 return NULL;
523 }
524
525 /* enter segment short name definition in log */
define_seg(log,seg)526 static rvm_return_t define_seg(log,seg)
527 log_t *log; /* log descriptor */
528 seg_t *seg; /* segment descriptor */
529 {
530 log_seg_t *log_seg; /* special log segment entry */
531 log_special_t *special; /* allocation for log_seg */
532 long name_len; /* byte length of segment name */
533 rvm_return_t retval; /* return code */
534
535 /* make segment definition record */
536 name_len = strlen(seg->dev.name);
537 special=make_log_special(log_seg_id,name_len+1);
538 if (special == NULL)
539 return RVM_ENO_MEMORY; /* can't get descriptor */
540
541 /* complete record and enter in log */
542 log_seg = &special->special.log_seg;
543 log_seg->seg_code = seg->seg_code;
544 log_seg->num_bytes = seg->dev.num_bytes;
545 log_seg->name_len = name_len;
546 (void)strcpy(log_seg->name,seg->dev.name);
547 if ((retval=queue_special(log,special)) != RVM_SUCCESS)
548 free_log_special(log_seg);
549
550 return retval;
551 }
552
553 /* write new segment dictionary entries for all segments */
define_all_segs(log)554 rvm_return_t define_all_segs(log)
555 log_t *log;
556 {
557 seg_t *seg; /* segment descriptor */
558 rvm_return_t retval = RVM_SUCCESS; /* return value */
559
560 RW_CRITICAL(seg_root_lock,r, /* begin seg_root_lock crit sec */
561 {
562 FOR_ENTRIES_OF(seg_root,seg_t,seg)
563 {
564 if ((retval=define_seg(log,seg)) != RVM_SUCCESS)
565 break;
566 }
567 }); /* end seg_root_lock crit sec */
568
569 return retval;
570 }
571
572 /* segment builder */
build_seg(rvm_region,log,retval)573 static seg_t *build_seg(rvm_region,log,retval)
574 rvm_region_t *rvm_region; /* segment's region descriptor */
575 log_t *log; /* log descriptor */
576 rvm_return_t *retval; /* ptr to return code */
577 {
578 seg_t *seg; /* new segment descriptor */
579
580 /* build segment descriptor */
581 seg = make_seg(rvm_region->data_dev,retval);
582 if (*retval != RVM_SUCCESS)
583 goto err_exit;
584
585 /* open device and set characteristics */
586 seg->log = log;
587 log->ref_cnt += 1;
588 if (open_seg_dev(seg,&rvm_region->dev_length) < 0)
589 {
590 *retval = RVM_EIO;
591 goto err_exit;
592 }
593
594 /* raw devices require length */
595 if ((seg->dev.raw_io) &&
596 (RVM_OFFSET_EQL_ZERO(seg->dev.num_bytes)))
597 {
598 *retval = RVM_ENOT_MAPPED;
599 goto err_exit;
600 }
601
602 /* define short name for log & queue log entry */
603 seg->seg_code = make_seg_code();
604 if ((*retval=define_seg(log,seg)) != RVM_SUCCESS)
605 goto err_exit;
606
607 /* put segment on segment list */
608 RW_CRITICAL(seg_root_lock,w, /* begin seg_root_lock crit sec */
609 {
610 (void)move_list_entry(NULL,&seg_root,seg);
611 }); /* end seg_root_lock crit sec */
612 return seg;
613
614 err_exit:
615 log->ref_cnt -= 1; /* log seg dict entry not */
616 if (seg != NULL) free_seg(seg); /* deallocated since the seg_code is */
617 return NULL; /* unique -- to the log, it's just like */
618 } /* a segment used read-only */
619
620 /* device region conflict comparator */
dev_partial_include(base1,end1,base2,end2)621 long dev_partial_include(base1,end1,base2,end2)
622 rvm_offset_t *base1,*end1;
623 rvm_offset_t *base2,*end2;
624 {
625 if (RVM_OFFSET_GEQ(*base1,*end2))
626 return 1; /* region1 above region2 */
627 if (RVM_OFFSET_LEQ(*end1,*base2))
628 return -1; /* region1 below region2 */
629
630 return 0; /* regions at least partially overlap */
631 }
632
633 /* device region within other region comparator */
dev_total_include(base1,end1,base2,end2)634 long dev_total_include(base1,end1,base2,end2)
635 rvm_offset_t *base1,*end1;
636 rvm_offset_t *base2,*end2;
637 {
638 if ((RVM_OFFSET_GEQ(*base1,*base2) && RVM_OFFSET_LEQ(*base1,*end2))
639 &&
640 (RVM_OFFSET_GEQ(*end1,*base2) && RVM_OFFSET_LEQ(*end1,*end2))
641 ) return 0; /* region1 included in region2 */
642 if (RVM_OFFSET_LSS(*base1,*base2))
643 return -1; /* region1 below region2, may overlap */
644
645 return 1; /* region1 above region2, may overlap */
646 }
647
648 /* vm range conflict comparator */
mem_partial_include(tnode1,tnode2)649 static long mem_partial_include(tnode1,tnode2)
650 tree_node_t *tnode1; /* range1 */
651 tree_node_t *tnode2; /* range2 */
652 {
653 rvm_length_t addr1; /* start of range 1 */
654 rvm_length_t addr2; /* start of range 2 */
655 rvm_length_t end1; /* end of range1 */
656 rvm_length_t end2; /* end of range2 */
657
658 /* rebind types and compute end points */
659 addr1 = (rvm_length_t)(((mem_region_t *)tnode1)->vmaddr);
660 addr2 = (rvm_length_t)(((mem_region_t *)tnode2)->vmaddr);
661 end1 = addr1 + ((mem_region_t *)tnode1)->length - 1;
662 end2 = addr2 + ((mem_region_t *)tnode2)->length - 1;
663
664 if (addr1 > end2) return 1; /* range1 above range2 */
665 if (end1 < addr2) return -1; /* range1 below range2 */
666 return 0; /* ranges at least partially overlap */
667 }
668
669 /* vm range within other range comparator */
mem_total_include(tnode1,tnode2)670 long mem_total_include(tnode1,tnode2)
671 tree_node_t *tnode1; /* range1 */
672 tree_node_t *tnode2; /* range2 */
673 {
674 rvm_length_t addr1; /* start of range 1 */
675 rvm_length_t addr2; /* start of range 2 */
676 rvm_length_t end1; /* end of range1 */
677 rvm_length_t end2; /* end of range2 */
678
679 /* rebind types and compute end points */
680 addr1 = (rvm_length_t)(((mem_region_t *)tnode1)->vmaddr);
681 addr2 = (rvm_length_t)(((mem_region_t *)tnode2)->vmaddr);
682 end1 = addr1 + ((mem_region_t *)tnode1)->length - 1;
683 end2 = addr2 + ((mem_region_t *)tnode2)->length - 1;
684
685 if ((addr1 >= addr2) && (addr1 <= end2) && (end1 <= end2))
686 return 0; /* range1 included in range2 */
687 /* This test does not correspond to the comment, changed it. -JH */
688 // if (end1 < addr2) return -1; /* range1 below range2, may overlap */
689 if (addr1 < addr2) return -1; /* range1 below range2, may overlap */
690 return 1; /* range1 above range2, may overlap */
691 }
692
693 /* find and lock a region record iff vm range
694 entirely within a single mapped region
695 -- region tree is left lock if mode = w
696 -- used by transaction functions and unmap
697 */
find_whole_range(dest,length,mode)698 region_t *find_whole_range(dest,length,mode)
699 char *dest;
700 rvm_length_t length;
701 rw_lock_mode_t mode; /* lock mode for region descriptor */
702 {
703 mem_region_t range; /* dummy node for lookup */
704 mem_region_t *node; /* ptr to node found */
705 region_t *region = NULL; /* ptr to region for found node */
706
707 range.vmaddr = dest;
708 range.length = length;
709 range.links.node.struct_id = mem_region_id;
710
711 RW_CRITICAL(region_tree_lock,mode, /* begin region_tree_lock crit sect */
712 {
713 node = (mem_region_t *)tree_lookup(®ion_tree,
714 (tree_node_t *)&range,
715 mem_total_include);
716 if (node != NULL)
717 {
718 region = node->region;
719 if (region != NULL)
720 { /* begin region_lock crit sect */
721 rw_lock(®ion->region_lock,mode); /* (ended by caller) */
722 if (mode == w) /* retain region_tree_lock */
723 return region; /* caller will unlock */
724 }
725 }
726 }); /* end region_tree_lock crit sect */
727
728 return region;
729 }
730 /* apply mapping options, compute region size, and round to page size */
round_region(rvm_region,seg)731 static rvm_return_t round_region(rvm_region,seg)
732 rvm_region_t *rvm_region; /* user region specs [in/out] */
733 seg_t *seg; /* segment descriptor */
734 {
735 rvm_offset_t big_len;
736
737 /* see if region within segment */
738 if (RVM_OFFSET_GTR(rvm_region->offset,seg->dev.num_bytes))
739 return RVM_EOFFSET;
740 big_len = RVM_ADD_LENGTH_TO_OFFSET(rvm_region->offset,
741 rvm_region->length);
742 if (RVM_OFFSET_LSS(big_len,rvm_region->offset))
743 return RVM_EOFFSET; /* overflow */
744
745 /* round offset, length up and down to integral page size */
746 big_len = RVM_LENGTH_TO_OFFSET(ROUND_TO_PAGE_SIZE(
747 RVM_OFFSET_TO_LENGTH(big_len)));
748 rvm_region->offset = RVM_MK_OFFSET(
749 RVM_OFFSET_HIGH_BITS_TO_LENGTH(rvm_region->offset),
750 CHOP_TO_PAGE_SIZE(RVM_OFFSET_TO_LENGTH(rvm_region->offset)));
751
752 /* see if at end of segment */
753 if ((rvm_region->length == 0)
754 || RVM_OFFSET_GTR(big_len,seg->dev.num_bytes))
755 big_len = seg->dev.num_bytes;
756
757 /* calculate actual length to map (only 32 bit lengths for now) */
758 big_len = RVM_SUB_OFFSETS(big_len,rvm_region->offset);
759 if (RVM_OFFSET_HIGH_BITS_TO_LENGTH(big_len) != 0)
760 return RVM_ERANGE;
761 rvm_region->length = RVM_OFFSET_TO_LENGTH(big_len);
762
763 /* check page aligned buffer or allocate virtual memory region */
764 if (rvm_region->vmaddr != NULL)
765 {
766 if (rvm_region->vmaddr != (char *)
767 CHOP_TO_PAGE_SIZE(rvm_region->vmaddr))
768 return RVM_ERANGE; /* buffer not page aligned */
769 if (!mem_chk(rvm_region->vmaddr,rvm_region->length))
770 return RVM_ERANGE; /* buffer not within task's vm */
771 }
772 else
773 {
774 rvm_region->vmaddr =
775 page_alloc(ROUND_TO_PAGE_SIZE(rvm_region->length));
776 if (rvm_region->vmaddr == NULL) return RVM_ENO_MEMORY;
777 }
778
779 return RVM_SUCCESS;
780 }
781
782 /* validate region and construct descriptors */
establish_range(rvm_region,region,mem_region,seg)783 static rvm_return_t establish_range(rvm_region,region,mem_region,seg)
784 rvm_region_t *rvm_region; /* user request region descriptor */
785 region_t **region; /* internal region descriptor [out]*/
786 mem_region_t **mem_region; /* region tree descriptor [out] */
787 seg_t *seg; /* segment ptr */
788 {
789 mem_region_t *mem_node;
790 region_t *new_region;
791 rvm_return_t retval;
792
793 /* get exact region size, address */
794 *region = NULL; *mem_region = NULL;
795 if ((retval=round_region(rvm_region,seg)) != RVM_SUCCESS)
796 return retval;
797
798 /* build new region descriptor */
799 *region = new_region = make_region();
800 if (new_region == NULL) return RVM_ENO_MEMORY;
801 new_region->no_copy = rvm_region->no_copy;
802 new_region->offset = rvm_region->offset;
803 new_region->end_offset =
804 RVM_ADD_LENGTH_TO_OFFSET(rvm_region->offset,
805 rvm_region->length);
806
807 /* build range tree node */
808 *mem_region = mem_node = make_mem_region();
809 if (mem_node == NULL) return RVM_ENO_MEMORY;
810 new_region->mem_region = mem_node;
811 mem_node->vmaddr = new_region->vmaddr = rvm_region->vmaddr;
812 mem_node->length = new_region->length
813 = (rvm_length_t)rvm_region->length;
814 mem_node->region = NULL;
815
816 /* put range tree node in tree to reserve range */
817 RW_CRITICAL(region_tree_lock,w, /* begin region_tree_lock crit sect */
818 {
819 if (!tree_insert(®ion_tree,(tree_node_t *)mem_node,
820 mem_partial_include))
821 retval = RVM_EVM_OVERLAP; /* vm range already mapped */
822 }); /* end region_tree_lock crit sect */
823
824 return retval;
825 }
826
827 /* check for mapping dependencies on previously
828 mapped regions, or conflict with presently mapped region
829 -- caller provides list locking
830 returns true if dependency detected
831 */
chk_seg_mappings(chk_region,list_root)832 static region_t *chk_seg_mappings(chk_region,list_root)
833 region_t *chk_region; /* region descriptor to chk*/
834 list_entry_t *list_root; /* root of list to check */
835 {
836 region_t *region; /* internal region descriptor */
837
838 FOR_ENTRIES_OF(*list_root,region_t,region)
839 {
840 /* test for overlap */
841 if (dev_partial_include(&chk_region->offset,
842 &chk_region->end_offset,
843 ®ion->offset,®ion->end_offset
844 ) == 0)
845 return region; /* overlap */
846 }
847
848 return NULL;
849 }
850
851 /* check mapping dependencies within segment */
chk_dependencies(seg,region)852 static rvm_return_t chk_dependencies(seg,region)
853 seg_t *seg;
854 region_t *region;
855 {
856 region_t *x_region; /* conflicting or dependent region */
857 rvm_return_t retval = RVM_SUCCESS;
858
859 /* check for multiple mappings of same segment region */
860 CRITICAL(seg->seg_lock, /* begin seg_lock crit sect */
861 {
862 if ((x_region=chk_seg_mappings(region,&seg->map_list))
863 == NULL)
864 {
865 /* enter region in map_list */
866 region->seg = seg;
867 (void)move_list_entry(NULL,&seg->map_list,
868 ®ion->links);
869
870 /* check for overlap with modified and unmapped regions of segment
871 if found, must wait for truncation to get committed image of region */
872 DO_FOREVER
873 if ((x_region=chk_seg_mappings(region,
874 &seg->unmap_list))
875 != NULL)
876 {
877 (void)initiate_truncation(seg->log,100);
878 if ((retval=wait_for_truncation(seg->log,
879 &x_region->unmap_ts))
880 != RVM_SUCCESS) goto err_exit;
881 free_region(x_region); /* can free now */
882 }
883 else break; /* no further dependencies */
884 }
885 else
886 retval = RVM_EOVERLAP; /* multiply mapped */
887 err_exit:;
888 }); /* end seg_lock crit sect */
889
890 return retval;
891 }
892
893 /* make data from segment available from mapped region */
map_data(rvm_options,region)894 static rvm_return_t map_data(rvm_options,region)
895 rvm_options_t *rvm_options;
896 region_t *region;
897 {
898 seg_t *seg = region->seg;
899 rvm_return_t retval = RVM_SUCCESS;
900 #if defined(__NetBSD__) || defined(__FreeBSD__)
901 char *addr;
902 #endif
903 /* check for pager mapping */
904 if (rvm_options != NULL)
905 if (rvm_options->pager != NULL)
906 {
907 /* external pager interface not implemented yet */
908 return RVM_EPAGER;
909 }
910
911 #if defined(__NetBSD__) || defined(__FreeBSD__)
912 /* NetBSD has a kernel bug that will panic if we
913 try to read from a raw device and copy it to address
914 on or above 0x10400000. This is known to be a problem
915 with vm_fault() of NetBSD kernel that panics when it
916 finds that the pte (page directory table entry) does
917 not exist in page dir table (instead of trying to
918 create it). Before that is fixed, we work around it
919 by manually touching one byte of address space of
920 every pte's that we'll need. This will get the pte
921 created and we'll be fine. This is proposed by rvb.
922 -- clement */
923 if (seg->dev.raw_io) {
924 for (addr=region->vmaddr;
925 addr < ( (region->vmaddr)+(region->length) );
926 addr+=0x400000) { /* each pte is for 0x400000 of vm */
927 *addr = 0; /* this will force kernel to create
928 the pte*/
929 }
930 }
931 #endif /* __BSD44__ */
932 /* read data directly from segment */
933 if (!region->no_copy)
934 CRITICAL(seg->dev_lock,
935 {
936 if (read_dev(&seg->dev,®ion->offset,
937 region->vmaddr,region->length) < 0)
938 retval = RVM_EIO;
939 });
940
941 return retval;
942 }
943
944 /* error exit cleanup */
clean_up(region,mem_region)945 static void clean_up(region,mem_region)
946 region_t *region;
947 mem_region_t *mem_region;
948 {
949 seg_t *seg;
950
951 /* kill region descriptor if created */
952 if (region != NULL)
953 {
954 seg = region->seg;
955 if (seg != NULL)
956 CRITICAL(seg->seg_lock,
957 {
958 (void)move_list_entry(&seg->map_list,NULL,
959 ®ion->links);
960 });
961 free_region(region);
962 }
963
964 /* kill region tree node if created */
965 if (mem_region != NULL)
966 {
967 RW_CRITICAL(region_tree_lock,w,
968 {
969 (void)tree_delete(®ion_tree,(tree_node_t *)mem_region,
970 mem_partial_include);
971 });
972 free_mem_region(mem_region);
973 }
974 }
975
976 /* rvm_map */
rvm_map(rvm_region_t * rvm_region,rvm_options_t * rvm_options)977 rvm_return_t rvm_map(rvm_region_t *rvm_region, rvm_options_t *rvm_options)
978 {
979 seg_t *seg; /* segment descriptor */
980 region_t *region = NULL; /* new region descriptor */
981 mem_region_t *mem_region= NULL; /* new region's tree node */
982 rvm_return_t retval;
983 rvm_region_t save_rvm_region;
984 int fd; /* For private mappings */
985 void *addr;
986
987 /* preliminary checks & saves */
988 if (bad_init()) return RVM_EINIT;
989 if ((retval=bad_region(rvm_region)) != RVM_SUCCESS)
990 return retval;
991 if (rvm_options != NULL)
992 if ((retval=do_rvm_options(rvm_options)) != RVM_SUCCESS)
993 return retval;
994 if (default_log == NULL) return RVM_ELOG;
995 (void)BCOPY((char *)rvm_region,(char *)&save_rvm_region,
996 sizeof(rvm_region_t));
997
998 /* find or build segment */
999 seg = seg_lookup(rvm_region->data_dev,&retval);
1000 if (retval != RVM_SUCCESS) goto err_exit;
1001 if (seg == NULL)
1002 { /* must build a new segment */
1003 if ((seg=build_seg(rvm_region,default_log,&retval))
1004 == NULL) goto err_exit;
1005 }
1006 else
1007 /* test if segment closed by earlier (failing) rvm_terminate */
1008 if (seg->dev.handle == 0) return RVM_EIO;
1009
1010 /* check for vm overlap with existing mappings & build descriptors */
1011 if ((retval = establish_range(rvm_region,®ion,&mem_region,seg))
1012 != RVM_SUCCESS)
1013 goto err_exit;
1014
1015 /* check for overlap with existing mappings in segment, check
1016 for truncation dependencies, and enter region in map_list */
1017 if ((retval=chk_dependencies(seg,region)) != RVM_SUCCESS)
1018 goto err_exit;
1019
1020 /* Do the private map or get the data from the segment */
1021 if (rvm_map_private) {
1022 fd = open(rvm_region->data_dev, O_RDONLY | O_BINARY);
1023 if ( fd < 0 ) {
1024 retval = RVM_EIO;
1025 goto err_exit;
1026 }
1027 addr = mmap(rvm_region->vmaddr, rvm_region->length,
1028 PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE,
1029 fd, region->offset.low);
1030 if (!rvm_region->vmaddr)
1031 rvm_region->vmaddr = addr;
1032
1033 if (addr != rvm_region->vmaddr) {
1034 retval = RVM_ENOT_MAPPED;
1035 goto err_exit;
1036 }
1037 if (close(fd)) {
1038 retval = RVM_EIO;
1039 goto err_exit;
1040 }
1041 } else {
1042 /* get the data from the segment */
1043 if ((retval = map_data(rvm_options,region)) != RVM_SUCCESS) {
1044 rvm_region->length = 0;
1045 goto err_exit;
1046 }
1047 }
1048
1049 /* complete region tree node and exit*/
1050 mem_region->region = region;
1051 return RVM_SUCCESS;
1052
1053 err_exit:
1054 clean_up(region,mem_region);
1055 (void)BCOPY((char *)&save_rvm_region,(char *)rvm_region,
1056 sizeof(rvm_region_t));
1057 return retval;
1058 }
1059