1 /* $OpenBSD: subr_hibernate.c,v 1.142 2024/08/18 08:01:03 mpi Exp $ */
2
3 /*
4 * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5 * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 #include <sys/hibernate.h>
21 #include <sys/malloc.h>
22 #include <sys/param.h>
23 #include <sys/tree.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/disk.h>
27 #include <sys/conf.h>
28 #include <sys/buf.h>
29 #include <sys/fcntl.h>
30 #include <sys/stat.h>
31 #include <sys/atomic.h>
32
33 #include <uvm/uvm.h>
34 #include <uvm/uvm_swap.h>
35
36 #include <machine/hibernate.h>
37
38 /* Make sure the signature can fit in one block */
39 CTASSERT((offsetof(union hibernate_info, sec_size) + sizeof(u_int32_t)) <= DEV_BSIZE);
40
41 /*
42 * Hibernate piglet layout information
43 *
44 * The piglet is a scratch area of memory allocated by the suspending kernel.
45 * Its phys and virt addrs are recorded in the signature block. The piglet is
46 * used to guarantee an unused area of memory that can be used by the resuming
47 * kernel for various things. The piglet is excluded during unpack operations.
48 * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB).
49 *
50 * Offset from piglet_base Purpose
51 * ----------------------------------------------------------------------------
52 * 0 Private page for suspend I/O write functions
53 * 1*PAGE_SIZE I/O page used during hibernate suspend
54 * 2*PAGE_SIZE I/O page used during hibernate suspend
55 * 3*PAGE_SIZE copy page used during hibernate suspend
56 * 4*PAGE_SIZE final chunk ordering list (24 pages)
57 * 28*PAGE_SIZE RLE utility page
58 * 29*PAGE_SIZE start of hiballoc area
59 * 30*PAGE_SIZE preserved entropy
60 * 110*PAGE_SIZE end of hiballoc area (80 pages)
61 * 366*PAGE_SIZE end of retguard preservation region (256 pages)
62 * ... unused
63 * HIBERNATE_CHUNK_SIZE start of hibernate chunk table
64 * 2*HIBERNATE_CHUNK_SIZE bounce area for chunks being unpacked
65 * 4*HIBERNATE_CHUNK_SIZE end of piglet
66 */
67
68 /* Temporary vaddr ranges used during hibernate */
69 vaddr_t hibernate_temp_page;
70 vaddr_t hibernate_copy_page;
71 vaddr_t hibernate_rle_page;
72
73 /* Hibernate info as read from disk during resume */
74 union hibernate_info disk_hib;
75 struct bdevsw *bdsw;
76
77 /*
78 * Global copy of the pig start address. This needs to be a global as we
79 * switch stacks after computing it - it can't be stored on the stack.
80 */
81 paddr_t global_pig_start;
82
83 /*
84 * Global copies of the piglet start addresses (PA/VA). We store these
85 * as globals to avoid having to carry them around as parameters, as the
86 * piglet is allocated early and freed late - its lifecycle extends beyond
87 * that of the hibernate info union which is calculated on suspend/resume.
88 */
89 vaddr_t global_piglet_va;
90 paddr_t global_piglet_pa;
91
92 /* #define HIB_DEBUG */
93 #ifdef HIB_DEBUG
94 int hib_debug = 99;
95 #define DPRINTF(x...) do { if (hib_debug) printf(x); } while (0)
96 #define DNPRINTF(n,x...) do { if (hib_debug > (n)) printf(x); } while (0)
97 #else
98 #define DPRINTF(x...)
99 #define DNPRINTF(n,x...)
100 #endif
101
102 #define ROUNDUP(_x, _y) ((((_x)+(_y)-1)/(_y))*(_y))
103
104 #ifndef NO_PROPOLICE
105 extern long __guard_local;
106 #endif /* ! NO_PROPOLICE */
107
108 /* Retguard phys address (need to skip this region during unpack) */
109 paddr_t retguard_start_phys, retguard_end_phys;
110 extern char __retguard_start, __retguard_end;
111
112 void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t);
113 int hibernate_calc_rle(paddr_t, paddr_t);
114 int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *,
115 size_t *);
116
117 #define MAX_RLE (HIBERNATE_CHUNK_SIZE / PAGE_SIZE)
118
119 /*
120 * Hib alloc enforced alignment.
121 */
122 #define HIB_ALIGN 8 /* bytes alignment */
123
124 /*
125 * sizeof builtin operation, but with alignment constraint.
126 */
127 #define HIB_SIZEOF(_type) roundup(sizeof(_type), HIB_ALIGN)
128
129 struct hiballoc_entry {
130 size_t hibe_use;
131 size_t hibe_space;
132 RBT_ENTRY(hiballoc_entry) hibe_entry;
133 };
134
135 /*
136 * Sort hibernate memory ranges by ascending PA
137 */
138 void
hibernate_sort_ranges(union hibernate_info * hib_info)139 hibernate_sort_ranges(union hibernate_info *hib_info)
140 {
141 int i, j;
142 struct hibernate_memory_range *ranges;
143 paddr_t base, end;
144
145 ranges = hib_info->ranges;
146
147 for (i = 1; i < hib_info->nranges; i++) {
148 j = i;
149 while (j > 0 && ranges[j - 1].base > ranges[j].base) {
150 base = ranges[j].base;
151 end = ranges[j].end;
152 ranges[j].base = ranges[j - 1].base;
153 ranges[j].end = ranges[j - 1].end;
154 ranges[j - 1].base = base;
155 ranges[j - 1].end = end;
156 j--;
157 }
158 }
159 }
160
161 /*
162 * Compare hiballoc entries based on the address they manage.
163 *
164 * Since the address is fixed, relative to struct hiballoc_entry,
165 * we just compare the hiballoc_entry pointers.
166 */
167 static __inline int
hibe_cmp(const struct hiballoc_entry * l,const struct hiballoc_entry * r)168 hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r)
169 {
170 vaddr_t vl = (vaddr_t)l;
171 vaddr_t vr = (vaddr_t)r;
172
173 return vl < vr ? -1 : (vl > vr);
174 }
175
RBT_PROTOTYPE(hiballoc_addr,hiballoc_entry,hibe_entry,hibe_cmp)176 RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
177
178 /*
179 * Given a hiballoc entry, return the address it manages.
180 */
181 static __inline void *
182 hib_entry_to_addr(struct hiballoc_entry *entry)
183 {
184 caddr_t addr;
185
186 addr = (caddr_t)entry;
187 addr += HIB_SIZEOF(struct hiballoc_entry);
188 return addr;
189 }
190
191 /*
192 * Given an address, find the hiballoc that corresponds.
193 */
194 static __inline struct hiballoc_entry*
hib_addr_to_entry(void * addr_param)195 hib_addr_to_entry(void *addr_param)
196 {
197 caddr_t addr;
198
199 addr = (caddr_t)addr_param;
200 addr -= HIB_SIZEOF(struct hiballoc_entry);
201 return (struct hiballoc_entry*)addr;
202 }
203
204 RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp);
205
206 /*
207 * Allocate memory from the arena.
208 *
209 * Returns NULL if no memory is available.
210 */
211 void *
hib_alloc(struct hiballoc_arena * arena,size_t alloc_sz)212 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
213 {
214 struct hiballoc_entry *entry, *new_entry;
215 size_t find_sz;
216
217 /*
218 * Enforce alignment of HIB_ALIGN bytes.
219 *
220 * Note that, because the entry is put in front of the allocation,
221 * 0-byte allocations are guaranteed a unique address.
222 */
223 alloc_sz = roundup(alloc_sz, HIB_ALIGN);
224
225 /*
226 * Find an entry with hibe_space >= find_sz.
227 *
228 * If the root node is not large enough, we switch to tree traversal.
229 * Because all entries are made at the bottom of the free space,
230 * traversal from the end has a slightly better chance of yielding
231 * a sufficiently large space.
232 */
233 find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
234 entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs);
235 if (entry != NULL && entry->hibe_space < find_sz) {
236 RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
237 if (entry->hibe_space >= find_sz)
238 break;
239 }
240 }
241
242 /*
243 * Insufficient or too fragmented memory.
244 */
245 if (entry == NULL)
246 return NULL;
247
248 /*
249 * Create new entry in allocated space.
250 */
251 new_entry = (struct hiballoc_entry*)(
252 (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
253 new_entry->hibe_space = entry->hibe_space - find_sz;
254 new_entry->hibe_use = alloc_sz;
255
256 /*
257 * Insert entry.
258 */
259 if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
260 panic("hib_alloc: insert failure");
261 entry->hibe_space = 0;
262
263 /* Return address managed by entry. */
264 return hib_entry_to_addr(new_entry);
265 }
266
267 void
hib_getentropy(char ** bufp,size_t * bufplen)268 hib_getentropy(char **bufp, size_t *bufplen)
269 {
270 if (!bufp || !bufplen)
271 return;
272
273 *bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE));
274 *bufplen = PAGE_SIZE;
275 }
276
277 /*
278 * Free a pointer previously allocated from this arena.
279 *
280 * If addr is NULL, this will be silently accepted.
281 */
282 void
hib_free(struct hiballoc_arena * arena,void * addr)283 hib_free(struct hiballoc_arena *arena, void *addr)
284 {
285 struct hiballoc_entry *entry, *prev;
286
287 if (addr == NULL)
288 return;
289
290 /*
291 * Derive entry from addr and check it is really in this arena.
292 */
293 entry = hib_addr_to_entry(addr);
294 if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
295 panic("hib_free: freed item %p not in hib arena", addr);
296
297 /*
298 * Give the space in entry to its predecessor.
299 *
300 * If entry has no predecessor, change its used space into free space
301 * instead.
302 */
303 prev = RBT_PREV(hiballoc_addr, entry);
304 if (prev != NULL &&
305 (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
306 prev->hibe_use + prev->hibe_space) == entry) {
307 /* Merge entry. */
308 RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
309 prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
310 entry->hibe_use + entry->hibe_space;
311 } else {
312 /* Flip used memory to free space. */
313 entry->hibe_space += entry->hibe_use;
314 entry->hibe_use = 0;
315 }
316 }
317
318 /*
319 * Initialize hiballoc.
320 *
321 * The allocator will manage memory at ptr, which is len bytes.
322 */
323 int
hiballoc_init(struct hiballoc_arena * arena,void * p_ptr,size_t p_len)324 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
325 {
326 struct hiballoc_entry *entry;
327 caddr_t ptr;
328 size_t len;
329
330 RBT_INIT(hiballoc_addr, &arena->hib_addrs);
331
332 /*
333 * Hib allocator enforces HIB_ALIGN alignment.
334 * Fixup ptr and len.
335 */
336 ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
337 len = p_len - ((size_t)ptr - (size_t)p_ptr);
338 len &= ~((size_t)HIB_ALIGN - 1);
339
340 /*
341 * Insufficient memory to be able to allocate and also do bookkeeping.
342 */
343 if (len <= HIB_SIZEOF(struct hiballoc_entry))
344 return ENOMEM;
345
346 /*
347 * Create entry describing space.
348 */
349 entry = (struct hiballoc_entry*)ptr;
350 entry->hibe_use = 0;
351 entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
352 RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
353
354 return 0;
355 }
356
357 /*
358 * Zero all free memory.
359 */
360 void
uvm_pmr_zero_everything(void)361 uvm_pmr_zero_everything(void)
362 {
363 struct uvm_pmemrange *pmr;
364 struct vm_page *pg;
365 int i;
366
367 uvm_lock_fpageq();
368 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
369 /* Zero single pages. */
370 while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
371 != NULL) {
372 uvm_pmr_remove(pmr, pg);
373 uvm_pagezero(pg);
374 atomic_setbits_int(&pg->pg_flags, PG_ZERO);
375 uvmexp.zeropages++;
376 uvm_pmr_insert(pmr, pg, 0);
377 }
378
379 /* Zero multi page ranges. */
380 while ((pg = RBT_ROOT(uvm_pmr_size,
381 &pmr->size[UVM_PMR_MEMTYPE_DIRTY])) != NULL) {
382 pg--; /* Size tree always has second page. */
383 uvm_pmr_remove(pmr, pg);
384 for (i = 0; i < pg->fpgsz; i++) {
385 uvm_pagezero(&pg[i]);
386 atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
387 uvmexp.zeropages++;
388 }
389 uvm_pmr_insert(pmr, pg, 0);
390 }
391 }
392 uvm_unlock_fpageq();
393 }
394
395 /*
396 * Mark all memory as dirty.
397 *
398 * Used to inform the system that the clean memory isn't clean for some
399 * reason, for example because we just came back from hibernate.
400 */
401 void
uvm_pmr_dirty_everything(void)402 uvm_pmr_dirty_everything(void)
403 {
404 struct uvm_pmemrange *pmr;
405 struct vm_page *pg;
406 int i;
407
408 uvm_lock_fpageq();
409 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
410 /* Dirty single pages. */
411 while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
412 != NULL) {
413 uvm_pmr_remove(pmr, pg);
414 atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
415 uvm_pmr_insert(pmr, pg, 0);
416 }
417
418 /* Dirty multi page ranges. */
419 while ((pg = RBT_ROOT(uvm_pmr_size,
420 &pmr->size[UVM_PMR_MEMTYPE_ZERO])) != NULL) {
421 pg--; /* Size tree always has second page. */
422 uvm_pmr_remove(pmr, pg);
423 for (i = 0; i < pg->fpgsz; i++)
424 atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
425 uvm_pmr_insert(pmr, pg, 0);
426 }
427 }
428
429 uvmexp.zeropages = 0;
430 uvm_unlock_fpageq();
431 }
432
433 /*
434 * Allocate an area that can hold sz bytes and doesn't overlap with
435 * the piglet at piglet_pa.
436 */
437 int
uvm_pmr_alloc_pig(paddr_t * pa,psize_t sz,paddr_t piglet_pa)438 uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa)
439 {
440 struct uvm_constraint_range pig_constraint;
441 struct kmem_pa_mode kp_pig = {
442 .kp_constraint = &pig_constraint,
443 .kp_maxseg = 1
444 };
445 vaddr_t va;
446
447 sz = round_page(sz);
448
449 pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE;
450 pig_constraint.ucr_high = -1;
451
452 va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
453 if (va == 0) {
454 pig_constraint.ucr_low = 0;
455 pig_constraint.ucr_high = piglet_pa - 1;
456
457 va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
458 if (va == 0)
459 return ENOMEM;
460 }
461
462 pmap_extract(pmap_kernel(), va, pa);
463 return 0;
464 }
465
466 /*
467 * Allocate a piglet area.
468 *
469 * This needs to be in DMA-safe memory.
470 * Piglets are aligned.
471 *
472 * sz and align in bytes.
473 */
474 int
uvm_pmr_alloc_piglet(vaddr_t * va,paddr_t * pa,vsize_t sz,paddr_t align)475 uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align)
476 {
477 struct kmem_pa_mode kp_piglet = {
478 .kp_constraint = &dma_constraint,
479 .kp_align = align,
480 .kp_maxseg = 1
481 };
482
483 /* Ensure align is a power of 2 */
484 KASSERT((align & (align - 1)) == 0);
485
486 /*
487 * Fixup arguments: align must be at least PAGE_SIZE,
488 * sz will be converted to pagecount, since that is what
489 * pmemrange uses internally.
490 */
491 if (align < PAGE_SIZE)
492 kp_piglet.kp_align = PAGE_SIZE;
493
494 sz = round_page(sz);
495
496 *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait);
497 if (*va == 0)
498 return ENOMEM;
499
500 pmap_extract(pmap_kernel(), *va, pa);
501 return 0;
502 }
503
504 /*
505 * Free a piglet area.
506 */
507 void
uvm_pmr_free_piglet(vaddr_t va,vsize_t sz)508 uvm_pmr_free_piglet(vaddr_t va, vsize_t sz)
509 {
510 /*
511 * Fix parameters.
512 */
513 sz = round_page(sz);
514
515 /*
516 * Free the physical and virtual memory.
517 */
518 km_free((void *)va, sz, &kv_any, &kp_dma_contig);
519 }
520
521 /*
522 * Physmem RLE compression support.
523 *
524 * Given a physical page address, return the number of pages starting at the
525 * address that are free. Clamps to the number of pages in
526 * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free.
527 */
528 int
uvm_page_rle(paddr_t addr)529 uvm_page_rle(paddr_t addr)
530 {
531 struct vm_page *pg, *pg_end;
532 struct vm_physseg *vmp;
533 int pseg_idx, off_idx;
534
535 pseg_idx = vm_physseg_find(atop(addr), &off_idx);
536 if (pseg_idx == -1)
537 return 0;
538
539 vmp = &vm_physmem[pseg_idx];
540 pg = &vmp->pgs[off_idx];
541 if (!(pg->pg_flags & PQ_FREE))
542 return 0;
543
544 /*
545 * Search for the first non-free page after pg.
546 * Note that the page may not be the first page in a free pmemrange,
547 * therefore pg->fpgsz cannot be used.
548 */
549 for (pg_end = pg; pg_end <= vmp->lastpg &&
550 (pg_end->pg_flags & PQ_FREE) == PQ_FREE &&
551 (pg_end - pg) < HIBERNATE_CHUNK_SIZE/PAGE_SIZE; pg_end++)
552 ;
553 return pg_end - pg;
554 }
555
556 /*
557 * Fills out the hibernate_info union pointed to by hib
558 * with information about this machine (swap signature block
559 * offsets, number of memory ranges, kernel in use, etc)
560 */
561 int
get_hibernate_info(union hibernate_info * hib,int suspend)562 get_hibernate_info(union hibernate_info *hib, int suspend)
563 {
564 struct disklabel dl;
565 char err_string[128], *dl_ret;
566 int part;
567 SHA2_CTX ctx;
568 void *fn;
569
570 #ifndef NO_PROPOLICE
571 /* Save propolice guard */
572 hib->guard = __guard_local;
573 #endif /* ! NO_PROPOLICE */
574
575 /* Determine I/O function to use */
576 hib->io_func = get_hibernate_io_function(swdevt[0].sw_dev);
577 if (hib->io_func == NULL)
578 return (1);
579
580 /* Calculate hibernate device */
581 hib->dev = swdevt[0].sw_dev;
582
583 /* Read disklabel (used to calculate signature and image offsets) */
584 dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string));
585
586 if (dl_ret) {
587 printf("Hibernate error reading disklabel: %s\n", dl_ret);
588 return (1);
589 }
590
591 /* Make sure we have a swap partition. */
592 part = DISKPART(hib->dev);
593 if (dl.d_npartitions <= part ||
594 dl.d_secsize > sizeof(union hibernate_info) ||
595 dl.d_partitions[part].p_fstype != FS_SWAP ||
596 DL_GETPSIZE(&dl.d_partitions[part]) == 0)
597 return (1);
598
599 /* Magic number */
600 hib->magic = HIBERNATE_MAGIC;
601
602 /* Calculate signature block location */
603 hib->sec_size = dl.d_secsize;
604 hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part]) - 1;
605 hib->sig_offset = DL_SECTOBLK(&dl, hib->sig_offset);
606
607 SHA256Init(&ctx);
608 SHA256Update(&ctx, version, strlen(version));
609 fn = printf;
610 SHA256Update(&ctx, &fn, sizeof(fn));
611 fn = malloc;
612 SHA256Update(&ctx, &fn, sizeof(fn));
613 fn = km_alloc;
614 SHA256Update(&ctx, &fn, sizeof(fn));
615 fn = strlen;
616 SHA256Update(&ctx, &fn, sizeof(fn));
617 SHA256Final((u_int8_t *)&hib->kern_hash, &ctx);
618
619 if (suspend) {
620 /* Grab the previously-allocated piglet addresses */
621 hib->piglet_va = global_piglet_va;
622 hib->piglet_pa = global_piglet_pa;
623 hib->io_page = (void *)hib->piglet_va;
624
625 /*
626 * Initialization of the hibernate IO function for drivers
627 * that need to do prep work (such as allocating memory or
628 * setting up data structures that cannot safely be done
629 * during suspend without causing side effects). There is
630 * a matching HIB_DONE call performed after the write is
631 * completed.
632 */
633 if (hib->io_func(hib->dev,
634 DL_SECTOBLK(&dl, DL_GETPOFFSET(&dl.d_partitions[part])),
635 (vaddr_t)NULL,
636 DL_SECTOBLK(&dl, DL_GETPSIZE(&dl.d_partitions[part])),
637 HIB_INIT, hib->io_page))
638 goto fail;
639
640 } else {
641 /*
642 * Resuming kernels use a regular private page for the driver
643 * No need to free this I/O page as it will vanish as part of
644 * the resume.
645 */
646 hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
647 if (!hib->io_page)
648 goto fail;
649 }
650
651 if (get_hibernate_info_md(hib))
652 goto fail;
653
654 return (0);
655
656 fail:
657 return (1);
658 }
659
660 /*
661 * Allocate nitems*size bytes from the hiballoc area presently in use
662 */
663 void *
hibernate_zlib_alloc(void * unused,int nitems,int size)664 hibernate_zlib_alloc(void *unused, int nitems, int size)
665 {
666 struct hibernate_zlib_state *hibernate_state;
667
668 hibernate_state =
669 (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
670
671 return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
672 }
673
674 /*
675 * Free the memory pointed to by addr in the hiballoc area presently in
676 * use
677 */
678 void
hibernate_zlib_free(void * unused,void * addr)679 hibernate_zlib_free(void *unused, void *addr)
680 {
681 struct hibernate_zlib_state *hibernate_state;
682
683 hibernate_state =
684 (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
685
686 hib_free(&hibernate_state->hiballoc_arena, addr);
687 }
688
689 /*
690 * Inflate next page of data from the image stream.
691 * The rle parameter is modified on exit to contain the number of pages to
692 * skip in the output stream (or 0 if this page was inflated into).
693 *
694 * Returns 0 if the stream contains additional data, or 1 if the stream is
695 * finished.
696 */
697 int
hibernate_inflate_page(int * rle)698 hibernate_inflate_page(int *rle)
699 {
700 struct hibernate_zlib_state *hibernate_state;
701 int i;
702
703 hibernate_state =
704 (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
705
706 /* Set up the stream for RLE code inflate */
707 hibernate_state->hib_stream.next_out = (unsigned char *)rle;
708 hibernate_state->hib_stream.avail_out = sizeof(*rle);
709
710 /* Inflate RLE code */
711 i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
712 if (i != Z_OK && i != Z_STREAM_END) {
713 /*
714 * XXX - this will likely reboot/hang most machines
715 * since the console output buffer will be unmapped,
716 * but there's not much else we can do here.
717 */
718 panic("rle inflate stream error");
719 }
720
721 if (hibernate_state->hib_stream.avail_out != 0) {
722 /*
723 * XXX - this will likely reboot/hang most machines
724 * since the console output buffer will be unmapped,
725 * but there's not much else we can do here.
726 */
727 panic("rle short inflate error");
728 }
729
730 if (*rle < 0 || *rle > 1024) {
731 /*
732 * XXX - this will likely reboot/hang most machines
733 * since the console output buffer will be unmapped,
734 * but there's not much else we can do here.
735 */
736 panic("invalid rle count");
737 }
738
739 if (i == Z_STREAM_END)
740 return (1);
741
742 if (*rle != 0)
743 return (0);
744
745 /* Set up the stream for page inflate */
746 hibernate_state->hib_stream.next_out =
747 (unsigned char *)HIBERNATE_INFLATE_PAGE;
748 hibernate_state->hib_stream.avail_out = PAGE_SIZE;
749
750 /* Process next block of data */
751 i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
752 if (i != Z_OK && i != Z_STREAM_END) {
753 /*
754 * XXX - this will likely reboot/hang most machines
755 * since the console output buffer will be unmapped,
756 * but there's not much else we can do here.
757 */
758 panic("inflate error");
759 }
760
761 /* We should always have extracted a full page ... */
762 if (hibernate_state->hib_stream.avail_out != 0) {
763 /*
764 * XXX - this will likely reboot/hang most machines
765 * since the console output buffer will be unmapped,
766 * but there's not much else we can do here.
767 */
768 panic("incomplete page");
769 }
770
771 return (i == Z_STREAM_END);
772 }
773
774 /*
775 * Inflate size bytes from src into dest, skipping any pages in
776 * [src..dest] that are special (see hibernate_inflate_skip)
777 *
778 * This function executes while using the resume-time stack
779 * and pmap, and therefore cannot use ddb/printf/etc. Doing so
780 * will likely hang or reset the machine since the console output buffer
781 * will be unmapped.
782 */
783 void
hibernate_inflate_region(union hibernate_info * hib,paddr_t dest,paddr_t src,size_t size)784 hibernate_inflate_region(union hibernate_info *hib, paddr_t dest,
785 paddr_t src, size_t size)
786 {
787 int end_stream = 0, rle, skip;
788 struct hibernate_zlib_state *hibernate_state;
789
790 hibernate_state =
791 (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
792
793 hibernate_state->hib_stream.next_in = (unsigned char *)src;
794 hibernate_state->hib_stream.avail_in = size;
795
796 do {
797 /*
798 * Is this a special page? If yes, redirect the
799 * inflate output to a scratch page (eg, discard it)
800 */
801 skip = hibernate_inflate_skip(hib, dest);
802 if (skip == HIB_SKIP) {
803 hibernate_enter_resume_mapping(
804 HIBERNATE_INFLATE_PAGE,
805 HIBERNATE_INFLATE_PAGE, 0);
806 } else if (skip == HIB_MOVE) {
807 /*
808 * Special case : retguard region. This gets moved
809 * temporarily into the piglet region and copied into
810 * place immediately before resume
811 */
812 hibernate_enter_resume_mapping(
813 HIBERNATE_INFLATE_PAGE,
814 hib->piglet_pa + (110 * PAGE_SIZE) +
815 hib->retguard_ofs, 0);
816 hib->retguard_ofs += PAGE_SIZE;
817 if (hib->retguard_ofs > 255 * PAGE_SIZE) {
818 /*
819 * XXX - this will likely reboot/hang most
820 * machines since the console output
821 * buffer will be unmapped, but there's
822 * not much else we can do here.
823 */
824 panic("retguard move error, out of space");
825 }
826 } else {
827 hibernate_enter_resume_mapping(
828 HIBERNATE_INFLATE_PAGE, dest, 0);
829 }
830
831 hibernate_flush();
832 end_stream = hibernate_inflate_page(&rle);
833
834 if (rle == 0)
835 dest += PAGE_SIZE;
836 else
837 dest += (rle * PAGE_SIZE);
838 } while (!end_stream);
839 }
840
841 /*
842 * deflate from src into the I/O page, up to 'remaining' bytes
843 *
844 * Returns number of input bytes consumed, and may reset
845 * the 'remaining' parameter if not all the output space was consumed
846 * (this information is needed to know how much to write to disk)
847 */
848 size_t
hibernate_deflate(union hibernate_info * hib,paddr_t src,size_t * remaining)849 hibernate_deflate(union hibernate_info *hib, paddr_t src,
850 size_t *remaining)
851 {
852 vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
853 struct hibernate_zlib_state *hibernate_state;
854
855 hibernate_state =
856 (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
857
858 /* Set up the stream for deflate */
859 hibernate_state->hib_stream.next_in = (unsigned char *)src;
860 hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK);
861 hibernate_state->hib_stream.next_out =
862 (unsigned char *)hibernate_io_page + (PAGE_SIZE - *remaining);
863 hibernate_state->hib_stream.avail_out = *remaining;
864
865 /* Process next block of data */
866 if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK)
867 panic("hibernate zlib deflate error");
868
869 /* Update pointers and return number of bytes consumed */
870 *remaining = hibernate_state->hib_stream.avail_out;
871 return (PAGE_SIZE - (src & PAGE_MASK)) -
872 hibernate_state->hib_stream.avail_in;
873 }
874
875 /*
876 * Write the hibernation information specified in hiber_info
877 * to the location in swap previously calculated (last block of
878 * swap), called the "signature block".
879 */
880 int
hibernate_write_signature(union hibernate_info * hib)881 hibernate_write_signature(union hibernate_info *hib)
882 {
883 memset(&disk_hib, 0, hib->sec_size);
884 memcpy(&disk_hib, hib, DEV_BSIZE);
885
886 /* Write hibernate info to disk */
887 return (hib->io_func(hib->dev, hib->sig_offset,
888 (vaddr_t)&disk_hib, hib->sec_size, HIB_W,
889 hib->io_page));
890 }
891
892 /*
893 * Write the memory chunk table to the area in swap immediately
894 * preceding the signature block. The chunk table is stored
895 * in the piglet when this function is called. Returns errno.
896 */
897 int
hibernate_write_chunktable(union hibernate_info * hib)898 hibernate_write_chunktable(union hibernate_info *hib)
899 {
900 vaddr_t hibernate_chunk_table_start;
901 size_t hibernate_chunk_table_size;
902 int i, err;
903
904 hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE;
905
906 hibernate_chunk_table_start = hib->piglet_va +
907 HIBERNATE_CHUNK_SIZE;
908
909 /* Write chunk table */
910 for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) {
911 if ((err = hib->io_func(hib->dev,
912 hib->chunktable_offset + (i/DEV_BSIZE),
913 (vaddr_t)(hibernate_chunk_table_start + i),
914 MAXPHYS, HIB_W, hib->io_page))) {
915 DPRINTF("chunktable write error: %d\n", err);
916 return (err);
917 }
918 }
919
920 return (0);
921 }
922
923 /*
924 * Write an empty hiber_info to the swap signature block, which is
925 * guaranteed to not match any valid hib.
926 */
927 int
hibernate_clear_signature(union hibernate_info * hib)928 hibernate_clear_signature(union hibernate_info *hib)
929 {
930 uint8_t buf[DEV_BSIZE];
931
932 /* Zero out a blank hiber_info */
933 memcpy(&buf, &disk_hib, sizeof(buf));
934 memset(&disk_hib, 0, hib->sec_size);
935
936 /* Write (zeroed) hibernate info to disk */
937 DPRINTF("clearing hibernate signature block location: %lld\n",
938 hib->sig_offset);
939 if (hibernate_block_io(hib,
940 hib->sig_offset,
941 hib->sec_size, (vaddr_t)&disk_hib, 1))
942 printf("Warning: could not clear hibernate signature\n");
943
944 memcpy(&disk_hib, buf, sizeof(buf));
945 return (0);
946 }
947
948 /*
949 * Compare two hibernate_infos to determine if they are the same (eg,
950 * we should be performing a hibernate resume on this machine.
951 * Not all fields are checked - just enough to verify that the machine
952 * has the same memory configuration and kernel as the one that
953 * wrote the signature previously.
954 */
955 int
hibernate_compare_signature(union hibernate_info * mine,union hibernate_info * disk)956 hibernate_compare_signature(union hibernate_info *mine,
957 union hibernate_info *disk)
958 {
959 u_int i;
960
961 if (mine->nranges != disk->nranges) {
962 printf("unhibernate failed: memory layout changed\n");
963 return (1);
964 }
965
966 if (bcmp(mine->kern_hash, disk->kern_hash, SHA256_DIGEST_LENGTH) != 0) {
967 printf("unhibernate failed: original kernel changed\n");
968 return (1);
969 }
970
971 for (i = 0; i < mine->nranges; i++) {
972 if ((mine->ranges[i].base != disk->ranges[i].base) ||
973 (mine->ranges[i].end != disk->ranges[i].end) ) {
974 DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n",
975 i,
976 (void *)mine->ranges[i].base,
977 (void *)mine->ranges[i].end,
978 (void *)disk->ranges[i].base,
979 (void *)disk->ranges[i].end);
980 printf("unhibernate failed: memory size changed\n");
981 return (1);
982 }
983 }
984
985 return (0);
986 }
987
988 /*
989 * Transfers xfer_size bytes between the hibernate device specified in
990 * hib_info at offset blkctr and the vaddr specified at dest.
991 *
992 * Separate offsets and pages are used to handle misaligned reads (reads
993 * that span a page boundary).
994 *
995 * blkctr specifies a relative offset (relative to the start of swap),
996 * not an absolute disk offset
997 *
998 */
999 int
hibernate_block_io(union hibernate_info * hib,daddr_t blkctr,size_t xfer_size,vaddr_t dest,int iswrite)1000 hibernate_block_io(union hibernate_info *hib, daddr_t blkctr,
1001 size_t xfer_size, vaddr_t dest, int iswrite)
1002 {
1003 struct buf *bp;
1004 int error;
1005
1006 bp = geteblk(xfer_size);
1007 if (iswrite)
1008 bcopy((caddr_t)dest, bp->b_data, xfer_size);
1009
1010 bp->b_bcount = xfer_size;
1011 bp->b_blkno = blkctr;
1012 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
1013 SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW);
1014 bp->b_dev = hib->dev;
1015 (*bdsw->d_strategy)(bp);
1016
1017 error = biowait(bp);
1018 if (error) {
1019 printf("hib block_io biowait error %d blk %lld size %zu\n",
1020 error, (long long)blkctr, xfer_size);
1021 } else if (!iswrite)
1022 bcopy(bp->b_data, (caddr_t)dest, xfer_size);
1023
1024 bp->b_flags |= B_INVAL;
1025 brelse(bp);
1026
1027 return (error != 0);
1028 }
1029
1030 /*
1031 * Preserve one page worth of random data, generated from the resuming
1032 * kernel's arc4random. After resume, this preserved entropy can be used
1033 * to further improve the un-hibernated machine's entropy pool. This
1034 * random data is stored in the piglet, which is preserved across the
1035 * unpack operation, and is restored later in the resume process (see
1036 * hib_getentropy)
1037 */
1038 void
hibernate_preserve_entropy(union hibernate_info * hib)1039 hibernate_preserve_entropy(union hibernate_info *hib)
1040 {
1041 void *entropy;
1042
1043 entropy = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
1044
1045 if (!entropy)
1046 return;
1047
1048 pmap_activate(curproc);
1049 pmap_kenter_pa((vaddr_t)entropy,
1050 (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE)),
1051 PROT_READ | PROT_WRITE);
1052
1053 arc4random_buf((void *)entropy, PAGE_SIZE);
1054 pmap_kremove((vaddr_t)entropy, PAGE_SIZE);
1055 km_free(entropy, PAGE_SIZE, &kv_any, &kp_none);
1056 }
1057
1058 #ifndef NO_PROPOLICE
1059 vaddr_t
hibernate_unprotect_ssp(void)1060 hibernate_unprotect_ssp(void)
1061 {
1062 struct kmem_dyn_mode kd_avoidalias;
1063 vaddr_t va = trunc_page((vaddr_t)&__guard_local);
1064 paddr_t pa;
1065
1066 pmap_extract(pmap_kernel(), va, &pa);
1067
1068 memset(&kd_avoidalias, 0, sizeof kd_avoidalias);
1069 kd_avoidalias.kd_prefer = pa;
1070 kd_avoidalias.kd_waitok = 1;
1071 va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_avoidalias);
1072 if (!va)
1073 panic("hibernate_unprotect_ssp");
1074
1075 pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1076 pmap_update(pmap_kernel());
1077
1078 return va;
1079 }
1080
1081 void
hibernate_reprotect_ssp(vaddr_t va)1082 hibernate_reprotect_ssp(vaddr_t va)
1083 {
1084 pmap_kremove(va, PAGE_SIZE);
1085 km_free((void *)va, PAGE_SIZE, &kv_any, &kp_none);
1086 }
1087 #endif /* NO_PROPOLICE */
1088
1089 /*
1090 * Reads the signature block from swap, checks against the current machine's
1091 * information. If the information matches, perform a resume by reading the
1092 * saved image into the pig area, and unpacking.
1093 *
1094 * Must be called with interrupts enabled.
1095 */
1096 void
hibernate_resume(void)1097 hibernate_resume(void)
1098 {
1099 uint8_t buf[DEV_BSIZE];
1100 union hibernate_info *hib = (union hibernate_info *)&buf;
1101 int s;
1102 #ifndef NO_PROPOLICE
1103 vsize_t off = (vaddr_t)&__guard_local -
1104 trunc_page((vaddr_t)&__guard_local);
1105 vaddr_t guard_va;
1106 #endif
1107
1108 /* Get current running machine's hibernate info */
1109 memset(buf, 0, sizeof(buf));
1110 if (get_hibernate_info(hib, 0)) {
1111 DPRINTF("couldn't retrieve machine's hibernate info\n");
1112 return;
1113 }
1114
1115 /* Read hibernate info from disk */
1116 s = splbio();
1117
1118 bdsw = &bdevsw[major(hib->dev)];
1119 if ((*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc)) {
1120 printf("hibernate_resume device open failed\n");
1121 splx(s);
1122 return;
1123 }
1124
1125 DPRINTF("reading hibernate signature block location: %lld\n",
1126 hib->sig_offset);
1127
1128 if (hibernate_block_io(hib,
1129 hib->sig_offset,
1130 hib->sec_size, (vaddr_t)&disk_hib, 0)) {
1131 DPRINTF("error in hibernate read\n");
1132 goto fail;
1133 }
1134
1135 /* Check magic number */
1136 if (disk_hib.magic != HIBERNATE_MAGIC) {
1137 DPRINTF("wrong magic number in hibernate signature: %x\n",
1138 disk_hib.magic);
1139 goto fail;
1140 }
1141
1142 /*
1143 * We (possibly) found a hibernate signature. Clear signature first,
1144 * to prevent accidental resume or endless resume cycles later.
1145 */
1146 if (hibernate_clear_signature(hib)) {
1147 DPRINTF("error clearing hibernate signature block\n");
1148 goto fail;
1149 }
1150
1151 /*
1152 * If on-disk and in-memory hibernate signatures match,
1153 * this means we should do a resume from hibernate.
1154 */
1155 if (hibernate_compare_signature(hib, &disk_hib)) {
1156 DPRINTF("mismatched hibernate signature block\n");
1157 goto fail;
1158 }
1159 disk_hib.dev = hib->dev;
1160
1161 #ifdef MULTIPROCESSOR
1162 /* XXX - if we fail later, we may need to rehatch APs on some archs */
1163 DPRINTF("hibernate: quiescing APs\n");
1164 hibernate_quiesce_cpus();
1165 #endif /* MULTIPROCESSOR */
1166
1167 /* Read the image from disk into the image (pig) area */
1168 if (hibernate_read_image(&disk_hib))
1169 goto fail;
1170 if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc))
1171 printf("hibernate_resume device close failed\n");
1172 bdsw = NULL;
1173
1174 DPRINTF("hibernate: quiescing devices\n");
1175 if (config_suspend_all(DVACT_QUIESCE) != 0)
1176 goto fail;
1177
1178 #ifndef NO_PROPOLICE
1179 guard_va = hibernate_unprotect_ssp();
1180 #endif /* NO_PROPOLICE */
1181
1182 (void) splhigh();
1183 hibernate_disable_intr_machdep();
1184 cold = 2;
1185
1186 DPRINTF("hibernate: suspending devices\n");
1187 if (config_suspend_all(DVACT_SUSPEND) != 0) {
1188 cold = 0;
1189 hibernate_enable_intr_machdep();
1190 #ifndef NO_PROPOLICE
1191 hibernate_reprotect_ssp(guard_va);
1192 #endif /* ! NO_PROPOLICE */
1193 goto fail;
1194 }
1195
1196 pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start,
1197 &retguard_start_phys);
1198 pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end,
1199 &retguard_end_phys);
1200
1201 hibernate_preserve_entropy(&disk_hib);
1202
1203 printf("Unpacking image...\n");
1204
1205 /* Switch stacks */
1206 DPRINTF("hibernate: switching stacks\n");
1207 hibernate_switch_stack_machdep();
1208
1209 #ifndef NO_PROPOLICE
1210 /* Start using suspended kernel's propolice guard */
1211 *(long *)(guard_va + off) = disk_hib.guard;
1212 hibernate_reprotect_ssp(guard_va);
1213 #endif /* ! NO_PROPOLICE */
1214
1215 /* Unpack and resume */
1216 hibernate_unpack_image(&disk_hib);
1217
1218 fail:
1219 if (!bdsw)
1220 printf("\nUnable to resume hibernated image\n");
1221 else if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc))
1222 printf("hibernate_resume device close failed\n");
1223 splx(s);
1224 }
1225
1226 /*
1227 * Unpack image from pig area to original location by looping through the
1228 * list of output chunks in the order they should be restored (fchunks).
1229 *
1230 * Note that due to the stack smash protector and the fact that we have
1231 * switched stacks, it is not permitted to return from this function.
1232 */
1233 void
hibernate_unpack_image(union hibernate_info * hib)1234 hibernate_unpack_image(union hibernate_info *hib)
1235 {
1236 uint8_t buf[DEV_BSIZE];
1237 struct hibernate_disk_chunk *chunks;
1238 union hibernate_info *local_hib = (union hibernate_info *)&buf;
1239 paddr_t image_cur = global_pig_start;
1240 short i, *fchunks;
1241 char *pva;
1242
1243 /* Piglet will be identity mapped (VA == PA) */
1244 pva = (char *)hib->piglet_pa;
1245
1246 fchunks = (short *)(pva + (4 * PAGE_SIZE));
1247
1248 chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE);
1249
1250 /* Can't use hiber_info that's passed in after this point */
1251 memcpy(buf, hib, sizeof(buf));
1252 local_hib->retguard_ofs = 0;
1253
1254 /* VA == PA */
1255 local_hib->piglet_va = local_hib->piglet_pa;
1256
1257 /*
1258 * Point of no return. Once we pass this point, only kernel code can
1259 * be accessed. No global variables or other kernel data structures
1260 * are guaranteed to be coherent after unpack starts.
1261 *
1262 * The image is now in high memory (pig area), we unpack from the pig
1263 * to the correct location in memory. We'll eventually end up copying
1264 * on top of ourself, but we are assured the kernel code here is the
1265 * same between the hibernated and resuming kernel, and we are running
1266 * on our own stack, so the overwrite is ok.
1267 */
1268 DPRINTF("hibernate: activating alt. pagetable and starting unpack\n");
1269 hibernate_activate_resume_pt_machdep();
1270
1271 for (i = 0; i < local_hib->chunk_ctr; i++) {
1272 /* Reset zlib for inflate */
1273 if (hibernate_zlib_reset(local_hib, 0) != Z_OK)
1274 panic("hibernate failed to reset zlib for inflate");
1275
1276 hibernate_process_chunk(local_hib, &chunks[fchunks[i]],
1277 image_cur);
1278
1279 image_cur += chunks[fchunks[i]].compressed_size;
1280 }
1281
1282 /*
1283 * Resume the loaded kernel by jumping to the MD resume vector.
1284 * We won't be returning from this call. We pass the location of
1285 * the retguard save area so the MD code can replace it before
1286 * resuming. See the piglet layout at the top of this file for
1287 * more information on the layout of the piglet area.
1288 *
1289 * We use 'global_piglet_va' here since by the time we are at
1290 * this point, we have already unpacked the image, and we want
1291 * the suspended kernel's view of what the piglet was, before
1292 * suspend occurred (since we will need to use that in the retguard
1293 * copy code in hibernate_resume_machdep.)
1294 */
1295 hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE));
1296 }
1297
1298 /*
1299 * Bounce a compressed image chunk to the piglet, entering mappings for the
1300 * copied pages as needed
1301 */
1302 void
hibernate_copy_chunk_to_piglet(paddr_t img_cur,vaddr_t piglet,size_t size)1303 hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size)
1304 {
1305 size_t ct, ofs;
1306 paddr_t src = img_cur;
1307 vaddr_t dest = piglet;
1308
1309 /* Copy first partial page */
1310 ct = (PAGE_SIZE) - (src & PAGE_MASK);
1311 ofs = (src & PAGE_MASK);
1312
1313 if (ct < PAGE_SIZE) {
1314 hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE,
1315 (src - ofs), 0);
1316 hibernate_flush();
1317 bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct);
1318 src += ct;
1319 dest += ct;
1320 }
1321
1322 /* Copy remaining pages */
1323 while (src < size + img_cur) {
1324 hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0);
1325 hibernate_flush();
1326 ct = PAGE_SIZE;
1327 bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct);
1328 hibernate_flush();
1329 src += ct;
1330 dest += ct;
1331 }
1332 }
1333
1334 /*
1335 * Process a chunk by bouncing it to the piglet, followed by unpacking
1336 */
1337 void
hibernate_process_chunk(union hibernate_info * hib,struct hibernate_disk_chunk * chunk,paddr_t img_cur)1338 hibernate_process_chunk(union hibernate_info *hib,
1339 struct hibernate_disk_chunk *chunk, paddr_t img_cur)
1340 {
1341 char *pva = (char *)hib->piglet_va;
1342
1343 hibernate_copy_chunk_to_piglet(img_cur,
1344 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size);
1345 hibernate_inflate_region(hib, chunk->base,
1346 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)),
1347 chunk->compressed_size);
1348 }
1349
1350 /*
1351 * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between
1352 * inaddr and range_end.
1353 */
1354 int
hibernate_calc_rle(paddr_t inaddr,paddr_t range_end)1355 hibernate_calc_rle(paddr_t inaddr, paddr_t range_end)
1356 {
1357 int rle;
1358
1359 rle = uvm_page_rle(inaddr);
1360 KASSERT(rle >= 0 && rle <= MAX_RLE);
1361
1362 /* Clamp RLE to range end */
1363 if (rle > 0 && inaddr + (rle * PAGE_SIZE) > range_end)
1364 rle = (range_end - inaddr) / PAGE_SIZE;
1365
1366 return (rle);
1367 }
1368
1369 /*
1370 * Write the RLE byte for page at 'inaddr' to the output stream.
1371 * Returns the number of pages to be skipped at 'inaddr'.
1372 */
1373 int
hibernate_write_rle(union hibernate_info * hib,paddr_t inaddr,paddr_t range_end,daddr_t * blkctr,size_t * out_remaining)1374 hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr,
1375 paddr_t range_end, daddr_t *blkctr,
1376 size_t *out_remaining)
1377 {
1378 int rle, err, *rleloc;
1379 struct hibernate_zlib_state *hibernate_state;
1380 vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1381
1382 hibernate_state =
1383 (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1384
1385 rle = hibernate_calc_rle(inaddr, range_end);
1386
1387 rleloc = (int *)hibernate_rle_page + MAX_RLE - 1;
1388 *rleloc = rle;
1389
1390 /* Deflate the RLE byte into the stream */
1391 hibernate_deflate(hib, (paddr_t)rleloc, out_remaining);
1392
1393 /* Did we fill the output page? If so, flush to disk */
1394 if (*out_remaining == 0) {
1395 if ((err = hib->io_func(hib->dev, *blkctr + hib->image_offset,
1396 (vaddr_t)hibernate_io_page, PAGE_SIZE, HIB_W,
1397 hib->io_page))) {
1398 DPRINTF("hib write error %d\n", err);
1399 return (err);
1400 }
1401
1402 *blkctr += PAGE_SIZE / DEV_BSIZE;
1403 *out_remaining = PAGE_SIZE;
1404
1405 /* If we didn't deflate the entire RLE byte, finish it now */
1406 if (hibernate_state->hib_stream.avail_in != 0)
1407 hibernate_deflate(hib,
1408 (vaddr_t)hibernate_state->hib_stream.next_in,
1409 out_remaining);
1410 }
1411
1412 return (rle);
1413 }
1414
1415 /*
1416 * Write a compressed version of this machine's memory to disk, at the
1417 * precalculated swap offset:
1418 *
1419 * end of swap - signature block size - chunk table size - memory size
1420 *
1421 * The function begins by looping through each phys mem range, cutting each
1422 * one into MD sized chunks. These chunks are then compressed individually
1423 * and written out to disk, in phys mem order. Some chunks might compress
1424 * more than others, and for this reason, each chunk's size is recorded
1425 * in the chunk table, which is written to disk after the image has
1426 * properly been compressed and written (in hibernate_write_chunktable).
1427 *
1428 * When this function is called, the machine is nearly suspended - most
1429 * devices are quiesced/suspended, interrupts are off, and cold has
1430 * been set. This means that there can be no side effects once the
1431 * write has started, and the write function itself can also have no
1432 * side effects. This also means no printfs are permitted (since printf
1433 * has side effects.)
1434 *
1435 * Return values :
1436 *
1437 * 0 - success
1438 * EIO - I/O error occurred writing the chunks
1439 * EINVAL - Failed to write a complete range
1440 * ENOMEM - Memory allocation failure during preparation of the zlib arena
1441 */
1442 int
hibernate_write_chunks(union hibernate_info * hib)1443 hibernate_write_chunks(union hibernate_info *hib)
1444 {
1445 paddr_t range_base, range_end, inaddr, temp_inaddr;
1446 size_t out_remaining, used;
1447 struct hibernate_disk_chunk *chunks;
1448 vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1449 daddr_t blkctr = 0;
1450 int i, rle, err;
1451 struct hibernate_zlib_state *hibernate_state;
1452
1453 hibernate_state =
1454 (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1455
1456 hib->chunk_ctr = 0;
1457
1458 /*
1459 * Map the utility VAs to the piglet. See the piglet map at the
1460 * top of this file for piglet layout information.
1461 */
1462 hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE;
1463 hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE;
1464
1465 chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
1466 HIBERNATE_CHUNK_SIZE);
1467
1468 /* Calculate the chunk regions */
1469 for (i = 0; i < hib->nranges; i++) {
1470 range_base = hib->ranges[i].base;
1471 range_end = hib->ranges[i].end;
1472
1473 inaddr = range_base;
1474
1475 while (inaddr < range_end) {
1476 chunks[hib->chunk_ctr].base = inaddr;
1477 if (inaddr + HIBERNATE_CHUNK_SIZE < range_end)
1478 chunks[hib->chunk_ctr].end = inaddr +
1479 HIBERNATE_CHUNK_SIZE;
1480 else
1481 chunks[hib->chunk_ctr].end = range_end;
1482
1483 inaddr += HIBERNATE_CHUNK_SIZE;
1484 hib->chunk_ctr ++;
1485 }
1486 }
1487
1488 uvm_pmr_dirty_everything();
1489 uvm_pmr_zero_everything();
1490
1491 /* Compress and write the chunks in the chunktable */
1492 for (i = 0; i < hib->chunk_ctr; i++) {
1493 range_base = chunks[i].base;
1494 range_end = chunks[i].end;
1495
1496 chunks[i].offset = blkctr + hib->image_offset;
1497
1498 /* Reset zlib for deflate */
1499 if (hibernate_zlib_reset(hib, 1) != Z_OK) {
1500 DPRINTF("hibernate_zlib_reset failed for deflate\n");
1501 return (ENOMEM);
1502 }
1503
1504 inaddr = range_base;
1505
1506 /*
1507 * For each range, loop through its phys mem region
1508 * and write out the chunks (the last chunk might be
1509 * smaller than the chunk size).
1510 */
1511 while (inaddr < range_end) {
1512 out_remaining = PAGE_SIZE;
1513 while (out_remaining > 0 && inaddr < range_end) {
1514 /*
1515 * Adjust for regions that are not evenly
1516 * divisible by PAGE_SIZE or overflowed
1517 * pages from the previous iteration.
1518 */
1519 temp_inaddr = (inaddr & PAGE_MASK) +
1520 hibernate_copy_page;
1521
1522 /* Deflate from temp_inaddr to IO page */
1523 if (inaddr != range_end) {
1524 if (inaddr % PAGE_SIZE == 0) {
1525 rle = hibernate_write_rle(hib,
1526 inaddr,
1527 range_end,
1528 &blkctr,
1529 &out_remaining);
1530 }
1531
1532 if (rle == 0) {
1533 pmap_kenter_pa(hibernate_temp_page,
1534 inaddr & PMAP_PA_MASK,
1535 PROT_READ);
1536
1537 bcopy((caddr_t)hibernate_temp_page,
1538 (caddr_t)hibernate_copy_page,
1539 PAGE_SIZE);
1540 inaddr += hibernate_deflate(hib,
1541 temp_inaddr,
1542 &out_remaining);
1543 } else {
1544 inaddr += rle * PAGE_SIZE;
1545 if (inaddr > range_end)
1546 inaddr = range_end;
1547 }
1548
1549 }
1550
1551 if (out_remaining == 0) {
1552 /* Filled up the page */
1553 if ((err = hib->io_func(hib->dev,
1554 blkctr + hib->image_offset,
1555 (vaddr_t)hibernate_io_page,
1556 PAGE_SIZE, HIB_W, hib->io_page))) {
1557 DPRINTF("hib write error %d\n",
1558 err);
1559 return (err);
1560 }
1561 blkctr += PAGE_SIZE / DEV_BSIZE;
1562 }
1563 }
1564 }
1565
1566 if (inaddr != range_end) {
1567 DPRINTF("deflate range ended prematurely\n");
1568 return (EINVAL);
1569 }
1570
1571 /*
1572 * End of range. Round up to next secsize bytes
1573 * after finishing compress
1574 */
1575 if (out_remaining == 0)
1576 out_remaining = PAGE_SIZE;
1577
1578 /* Finish compress */
1579 hibernate_state->hib_stream.next_in = (unsigned char *)inaddr;
1580 hibernate_state->hib_stream.avail_in = 0;
1581 hibernate_state->hib_stream.next_out =
1582 (unsigned char *)hibernate_io_page +
1583 (PAGE_SIZE - out_remaining);
1584
1585 /* We have an extra output page available for finalize */
1586 hibernate_state->hib_stream.avail_out =
1587 out_remaining + PAGE_SIZE;
1588
1589 if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) !=
1590 Z_STREAM_END) {
1591 DPRINTF("deflate error in output stream: %d\n", err);
1592 return (err);
1593 }
1594
1595 out_remaining = hibernate_state->hib_stream.avail_out;
1596
1597 /* Round up to next sector if needed */
1598 used = ROUNDUP(2 * PAGE_SIZE - out_remaining, hib->sec_size);
1599
1600 /* Write final block(s) for this chunk */
1601 if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset,
1602 (vaddr_t)hibernate_io_page, used,
1603 HIB_W, hib->io_page))) {
1604 DPRINTF("hib final write error %d\n", err);
1605 return (err);
1606 }
1607
1608 blkctr += used / DEV_BSIZE;
1609
1610 chunks[i].compressed_size = (blkctr + hib->image_offset -
1611 chunks[i].offset) * DEV_BSIZE;
1612 }
1613
1614 hib->chunktable_offset = hib->image_offset + blkctr;
1615 return (0);
1616 }
1617
1618 /*
1619 * Reset the zlib stream state and allocate a new hiballoc area for either
1620 * inflate or deflate. This function is called once for each hibernate chunk.
1621 * Calling hiballoc_init multiple times is acceptable since the memory it is
1622 * provided is unmanaged memory (stolen). We use the memory provided to us
1623 * by the piglet allocated via the supplied hib.
1624 */
1625 int
hibernate_zlib_reset(union hibernate_info * hib,int deflate)1626 hibernate_zlib_reset(union hibernate_info *hib, int deflate)
1627 {
1628 vaddr_t hibernate_zlib_start;
1629 size_t hibernate_zlib_size;
1630 char *pva = (char *)hib->piglet_va;
1631 struct hibernate_zlib_state *hibernate_state;
1632
1633 hibernate_state =
1634 (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1635
1636 if (!deflate)
1637 pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1638
1639 /*
1640 * See piglet layout information at the start of this file for
1641 * information on the zlib page assignments.
1642 */
1643 hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE));
1644 hibernate_zlib_size = 80 * PAGE_SIZE;
1645
1646 memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size);
1647 memset(hibernate_state, 0, PAGE_SIZE);
1648
1649 /* Set up stream structure */
1650 hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc;
1651 hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free;
1652
1653 /* Initialize the hiballoc arena for zlib allocs/frees */
1654 hiballoc_init(&hibernate_state->hiballoc_arena,
1655 (caddr_t)hibernate_zlib_start, hibernate_zlib_size);
1656
1657 if (deflate) {
1658 return deflateInit(&hibernate_state->hib_stream,
1659 Z_BEST_SPEED);
1660 } else
1661 return inflateInit(&hibernate_state->hib_stream);
1662 }
1663
1664 /*
1665 * Reads the hibernated memory image from disk, whose location and
1666 * size are recorded in hib. Begin by reading the persisted
1667 * chunk table, which records the original chunk placement location
1668 * and compressed size for each. Next, allocate a pig region of
1669 * sufficient size to hold the compressed image. Next, read the
1670 * chunks into the pig area (calling hibernate_read_chunks to do this),
1671 * and finally, if all of the above succeeds, clear the hibernate signature.
1672 * The function will then return to hibernate_resume, which will proceed
1673 * to unpack the pig image to the correct place in memory.
1674 */
1675 int
hibernate_read_image(union hibernate_info * hib)1676 hibernate_read_image(union hibernate_info *hib)
1677 {
1678 size_t compressed_size, disk_size, chunktable_size, pig_sz;
1679 paddr_t image_start, image_end, pig_start, pig_end;
1680 struct hibernate_disk_chunk *chunks;
1681 daddr_t blkctr;
1682 vaddr_t chunktable = (vaddr_t)NULL;
1683 paddr_t piglet_chunktable = hib->piglet_pa +
1684 HIBERNATE_CHUNK_SIZE;
1685 int i, status;
1686
1687 status = 0;
1688 pmap_activate(curproc);
1689
1690 /* Calculate total chunk table size in disk blocks */
1691 chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
1692
1693 blkctr = hib->chunktable_offset;
1694
1695 chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any,
1696 &kp_none, &kd_nowait);
1697
1698 if (!chunktable)
1699 return (1);
1700
1701 /* Map chunktable pages */
1702 for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE)
1703 pmap_kenter_pa(chunktable + i, piglet_chunktable + i,
1704 PROT_READ | PROT_WRITE);
1705 pmap_update(pmap_kernel());
1706
1707 /* Read the chunktable from disk into the piglet chunktable */
1708 for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE;
1709 i += MAXPHYS, blkctr += MAXPHYS/DEV_BSIZE)
1710 hibernate_block_io(hib, blkctr, MAXPHYS,
1711 chunktable + i, 0);
1712
1713 blkctr = hib->image_offset;
1714 compressed_size = 0;
1715
1716 chunks = (struct hibernate_disk_chunk *)chunktable;
1717
1718 for (i = 0; i < hib->chunk_ctr; i++)
1719 compressed_size += chunks[i].compressed_size;
1720
1721 disk_size = compressed_size;
1722
1723 printf("unhibernating @ block %lld length %luMB\n",
1724 hib->sig_offset - chunktable_size,
1725 compressed_size / (1024 * 1024));
1726
1727 /* Allocate the pig area */
1728 pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE;
1729 if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM) {
1730 status = 1;
1731 goto unmap;
1732 }
1733
1734 pig_end = pig_start + pig_sz;
1735
1736 /* Calculate image extents. Pig image must end on a chunk boundary. */
1737 image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1);
1738 image_start = image_end - disk_size;
1739
1740 hibernate_read_chunks(hib, image_start, image_end, disk_size,
1741 chunks);
1742
1743 /* Prepare the resume time pmap/page table */
1744 hibernate_populate_resume_pt(hib, image_start, image_end);
1745
1746 unmap:
1747 /* Unmap chunktable pages */
1748 pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE);
1749 pmap_update(pmap_kernel());
1750
1751 return (status);
1752 }
1753
1754 /*
1755 * Read the hibernated memory chunks from disk (chunk information at this
1756 * point is stored in the piglet) into the pig area specified by
1757 * [pig_start .. pig_end]. Order the chunks so that the final chunk is the
1758 * only chunk with overlap possibilities.
1759 */
1760 int
hibernate_read_chunks(union hibernate_info * hib,paddr_t pig_start,paddr_t pig_end,size_t image_compr_size,struct hibernate_disk_chunk * chunks)1761 hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start,
1762 paddr_t pig_end, size_t image_compr_size,
1763 struct hibernate_disk_chunk *chunks)
1764 {
1765 paddr_t img_cur, piglet_base;
1766 daddr_t blkctr;
1767 size_t processed, compressed_size, read_size;
1768 int nchunks, nfchunks, num_io_pages;
1769 vaddr_t tempva, hibernate_fchunk_area;
1770 short *fchunks, i, j;
1771
1772 tempva = (vaddr_t)NULL;
1773 hibernate_fchunk_area = (vaddr_t)NULL;
1774 nfchunks = 0;
1775 piglet_base = hib->piglet_pa;
1776 global_pig_start = pig_start;
1777
1778 /*
1779 * These mappings go into the resuming kernel's page table, and are
1780 * used only during image read. They disappear from existence
1781 * when the suspended kernel is unpacked on top of us.
1782 */
1783 tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none,
1784 &kd_nowait);
1785 if (!tempva)
1786 return (1);
1787 hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE, &kv_any,
1788 &kp_none, &kd_nowait);
1789 if (!hibernate_fchunk_area)
1790 return (1);
1791
1792 /* Final output chunk ordering VA */
1793 fchunks = (short *)hibernate_fchunk_area;
1794
1795 /* Map the chunk ordering region */
1796 for(i = 0; i < 24 ; i++)
1797 pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE),
1798 piglet_base + ((4 + i) * PAGE_SIZE),
1799 PROT_READ | PROT_WRITE);
1800 pmap_update(pmap_kernel());
1801
1802 nchunks = hib->chunk_ctr;
1803
1804 /* Initially start all chunks as unplaced */
1805 for (i = 0; i < nchunks; i++)
1806 chunks[i].flags = 0;
1807
1808 /*
1809 * Search the list for chunks that are outside the pig area. These
1810 * can be placed first in the final output list.
1811 */
1812 for (i = 0; i < nchunks; i++) {
1813 if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) {
1814 fchunks[nfchunks] = i;
1815 nfchunks++;
1816 chunks[i].flags |= HIBERNATE_CHUNK_PLACED;
1817 }
1818 }
1819
1820 /*
1821 * Walk the ordering, place the chunks in ascending memory order.
1822 */
1823 for (i = 0; i < nchunks; i++) {
1824 if (chunks[i].flags != HIBERNATE_CHUNK_PLACED) {
1825 fchunks[nfchunks] = i;
1826 nfchunks++;
1827 chunks[i].flags = HIBERNATE_CHUNK_PLACED;
1828 }
1829 }
1830
1831 img_cur = pig_start;
1832
1833 for (i = 0; i < nfchunks; i++) {
1834 blkctr = chunks[fchunks[i]].offset;
1835 processed = 0;
1836 compressed_size = chunks[fchunks[i]].compressed_size;
1837
1838 while (processed < compressed_size) {
1839 if (compressed_size - processed >= MAXPHYS)
1840 read_size = MAXPHYS;
1841 else
1842 read_size = compressed_size - processed;
1843
1844 /*
1845 * We're reading read_size bytes, offset from the
1846 * start of a page by img_cur % PAGE_SIZE, so the
1847 * end will be read_size + (img_cur % PAGE_SIZE)
1848 * from the start of the first page. Round that
1849 * up to the next page size.
1850 */
1851 num_io_pages = (read_size + (img_cur % PAGE_SIZE)
1852 + PAGE_SIZE - 1) / PAGE_SIZE;
1853
1854 KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1);
1855
1856 /* Map pages for this read */
1857 for (j = 0; j < num_io_pages; j ++)
1858 pmap_kenter_pa(tempva + j * PAGE_SIZE,
1859 img_cur + j * PAGE_SIZE,
1860 PROT_READ | PROT_WRITE);
1861
1862 pmap_update(pmap_kernel());
1863
1864 hibernate_block_io(hib, blkctr, read_size,
1865 tempva + (img_cur & PAGE_MASK), 0);
1866
1867 blkctr += (read_size / DEV_BSIZE);
1868
1869 pmap_kremove(tempva, num_io_pages * PAGE_SIZE);
1870 pmap_update(pmap_kernel());
1871
1872 processed += read_size;
1873 img_cur += read_size;
1874 }
1875 }
1876
1877 pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE);
1878 pmap_update(pmap_kernel());
1879
1880 return (0);
1881 }
1882
1883 /*
1884 * Hibernating a machine comprises the following operations:
1885 * 1. Calculating this machine's hibernate_info information
1886 * 2. Allocating a piglet and saving the piglet's physaddr
1887 * 3. Calculating the memory chunks
1888 * 4. Writing the compressed chunks to disk
1889 * 5. Writing the chunk table
1890 * 6. Writing the signature block (hibernate_info)
1891 *
1892 * On most architectures, the function calling hibernate_suspend would
1893 * then power off the machine using some MD-specific implementation.
1894 */
1895 int
hibernate_suspend(void)1896 hibernate_suspend(void)
1897 {
1898 uint8_t buf[DEV_BSIZE];
1899 union hibernate_info *hib = (union hibernate_info *)&buf;
1900 u_long start, end;
1901
1902 /*
1903 * Calculate memory ranges, swap offsets, etc.
1904 * This also allocates a piglet whose physaddr is stored in
1905 * hib->piglet_pa and vaddr stored in hib->piglet_va
1906 */
1907 if (get_hibernate_info(hib, 1)) {
1908 DPRINTF("failed to obtain hibernate info\n");
1909 return (1);
1910 }
1911
1912 /* Find a page-addressed region in swap [start,end] */
1913 if (uvm_hibswap(hib->dev, &start, &end)) {
1914 printf("hibernate: cannot find any swap\n");
1915 return (1);
1916 }
1917
1918 if (end - start < 1000) {
1919 printf("hibernate: insufficient swap (%lu is too small)\n",
1920 end - start + 1);
1921 return (1);
1922 }
1923
1924 pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start,
1925 &retguard_start_phys);
1926 pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end,
1927 &retguard_end_phys);
1928
1929 /* Calculate block offsets in swap */
1930 hib->image_offset = ctod(start);
1931
1932 DPRINTF("hibernate @ block %lld max-length %lu blocks\n",
1933 hib->image_offset, ctod(end) - ctod(start) + 1);
1934
1935 pmap_activate(curproc);
1936 DPRINTF("hibernate: writing chunks\n");
1937 if (hibernate_write_chunks(hib)) {
1938 DPRINTF("hibernate_write_chunks failed\n");
1939 return (1);
1940 }
1941
1942 DPRINTF("hibernate: writing chunktable\n");
1943 if (hibernate_write_chunktable(hib)) {
1944 DPRINTF("hibernate_write_chunktable failed\n");
1945 return (1);
1946 }
1947
1948 DPRINTF("hibernate: writing signature\n");
1949 if (hibernate_write_signature(hib)) {
1950 DPRINTF("hibernate_write_signature failed\n");
1951 return (1);
1952 }
1953
1954 /* Allow the disk to settle */
1955 delay(500000);
1956
1957 /*
1958 * Give the device-specific I/O function a notification that we're
1959 * done, and that it can clean up or shutdown as needed.
1960 */
1961 hib->io_func(hib->dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib->io_page);
1962 return (0);
1963 }
1964
1965 int
hibernate_alloc(void)1966 hibernate_alloc(void)
1967 {
1968 KASSERT(global_piglet_va == 0);
1969 KASSERT(hibernate_temp_page == 0);
1970
1971 pmap_activate(curproc);
1972 pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1973 PROT_READ | PROT_WRITE);
1974
1975 /* Allocate a piglet, store its addresses in the supplied globals */
1976 if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa,
1977 HIBERNATE_CHUNK_SIZE * 4, HIBERNATE_CHUNK_SIZE))
1978 goto unmap;
1979
1980 /*
1981 * Allocate VA for the temp page.
1982 *
1983 * This will become part of the suspended kernel and will
1984 * be freed in hibernate_free, upon resume (or hibernate
1985 * failure)
1986 */
1987 hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1988 &kp_none, &kd_nowait);
1989 if (!hibernate_temp_page) {
1990 uvm_pmr_free_piglet(global_piglet_va, 4 * HIBERNATE_CHUNK_SIZE);
1991 global_piglet_va = 0;
1992 goto unmap;
1993 }
1994 return (0);
1995 unmap:
1996 pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
1997 pmap_update(pmap_kernel());
1998 return (ENOMEM);
1999 }
2000
2001 /*
2002 * Free items allocated by hibernate_alloc()
2003 */
2004 void
hibernate_free(void)2005 hibernate_free(void)
2006 {
2007 pmap_activate(curproc);
2008
2009 if (global_piglet_va)
2010 uvm_pmr_free_piglet(global_piglet_va,
2011 4 * HIBERNATE_CHUNK_SIZE);
2012
2013 if (hibernate_temp_page) {
2014 pmap_kremove(hibernate_temp_page, PAGE_SIZE);
2015 km_free((void *)hibernate_temp_page, PAGE_SIZE,
2016 &kv_any, &kp_none);
2017 }
2018
2019 global_piglet_va = 0;
2020 hibernate_temp_page = 0;
2021 pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
2022 pmap_update(pmap_kernel());
2023 }
2024