xref: /openbsd/sys/kern/subr_hibernate.c (revision 2fbc8e98)
1 /*	$OpenBSD: subr_hibernate.c,v 1.152 2025/01/24 18:13:29 krw Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/hibernate.h>
21 #include <sys/malloc.h>
22 #include <sys/param.h>
23 #include <sys/tree.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/disk.h>
27 #include <sys/conf.h>
28 #include <sys/buf.h>
29 #include <sys/fcntl.h>
30 #include <sys/stat.h>
31 #include <sys/atomic.h>
32 
33 #include <uvm/uvm.h>
34 #include <uvm/uvm_swap.h>
35 
36 #include <machine/hibernate.h>
37 
38 /* Make sure the signature can fit in one block */
39 CTASSERT((offsetof(union hibernate_info, sec_size) + sizeof(u_int32_t)) <= DEV_BSIZE);
40 
41 /*
42  * Hibernate piglet layout information
43  *
44  * The piglet is a scratch area of memory allocated by the suspending kernel.
45  * Its phys and virt addrs are recorded in the signature block. The piglet is
46  * used to guarantee an unused area of memory that can be used by the resuming
47  * kernel for various things. The piglet is excluded during unpack operations.
48  * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB).
49  *
50  * Offset from piglet_base	Purpose
51  * ----------------------------------------------------------------------------
52  * 0				Private page for suspend I/O write functions
53  * 1*PAGE_SIZE			I/O page used during hibernate suspend
54  * 2*PAGE_SIZE			I/O page used during hibernate suspend
55  * 3*PAGE_SIZE			copy page used during hibernate suspend
56  * 4*PAGE_SIZE			final chunk ordering list (24 pages)
57  * 28*PAGE_SIZE			RLE utility page
58  * 29*PAGE_SIZE			start of hiballoc area
59  * 30*PAGE_SIZE			preserved entropy
60  * 110*PAGE_SIZE		end of hiballoc area (80 pages)
61  * 366*PAGE_SIZE		end of retguard preservation region (256 pages)
62  * ...				unused
63  * HIBERNATE_CHUNK_SIZE		start of hibernate chunk table
64  * 2*HIBERNATE_CHUNK_SIZE	bounce area for chunks being unpacked
65  * 4*HIBERNATE_CHUNK_SIZE	end of piglet
66  */
67 
68 /* Temporary vaddr ranges used during hibernate */
69 vaddr_t hibernate_temp_page;
70 vaddr_t hibernate_copy_page;
71 vaddr_t hibernate_rle_page;
72 
73 /* Hibernate info as read from disk during resume */
74 union hibernate_info disk_hib;
75 struct bdevsw *bdsw;
76 
77 /*
78  * Global copy of the pig start address. This needs to be a global as we
79  * switch stacks after computing it - it can't be stored on the stack.
80  */
81 paddr_t global_pig_start;
82 
83 /*
84  * Global copies of the piglet start addresses (PA/VA). We store these
85  * as globals to avoid having to carry them around as parameters, as the
86  * piglet is allocated early and freed late - its lifecycle extends beyond
87  * that of the hibernate info union which is calculated on suspend/resume.
88  */
89 vaddr_t global_piglet_va;
90 paddr_t global_piglet_pa;
91 
92 /* #define HIB_DEBUG */
93 #ifdef HIB_DEBUG
94 int	hib_debug = 99;
95 #define DPRINTF(x...)     do { if (hib_debug) printf(x); } while (0)
96 #define DNPRINTF(n,x...)  do { if (hib_debug > (n)) printf(x); } while (0)
97 #else
98 #define DPRINTF(x...)
99 #define DNPRINTF(n,x...)
100 #endif
101 
102 #ifndef NO_PROPOLICE
103 extern long __guard_local;
104 #endif /* ! NO_PROPOLICE */
105 
106 /* Retguard phys address (need to skip this region during unpack) */
107 paddr_t retguard_start_phys, retguard_end_phys;
108 extern char __retguard_start, __retguard_end;
109 
110 void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t);
111 int hibernate_calc_rle(paddr_t, paddr_t);
112 int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *,
113 	size_t *);
114 
115 #define MAX_RLE (HIBERNATE_CHUNK_SIZE / PAGE_SIZE)
116 
117 /*
118  * Hib alloc enforced alignment.
119  */
120 #define HIB_ALIGN		8 /* bytes alignment */
121 
122 /*
123  * sizeof builtin operation, but with alignment constraint.
124  */
125 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
126 
127 struct hiballoc_entry {
128 	size_t			hibe_use;
129 	size_t			hibe_space;
130 	RBT_ENTRY(hiballoc_entry) hibe_entry;
131 };
132 
133 #define IO_TYPE_IMG 1
134 #define IO_TYPE_CHK 2
135 #define IO_TYPE_SIG 3
136 
137 int
hibernate_write(union hibernate_info * hib,daddr_t offset,vaddr_t addr,size_t size,int io_type)138 hibernate_write(union hibernate_info *hib, daddr_t offset, vaddr_t addr,
139     size_t size, int io_type)
140 {
141 	const uint64_t blks = btodb(size);
142 
143 	if (hib == NULL || offset < 0 || blks == 0) {
144 		printf("%s: hib is NULL, offset < 0 or blks == 0\n", __func__);
145 		return (EINVAL);
146 	}
147 
148 	switch (io_type) {
149 	case IO_TYPE_IMG:
150 		if (offset + blks > hib->image_size) {
151 			printf("%s: image write is out of bounds: "
152 			    "offset-image=%lld, offset-write=%lld, blks=%llu\n",
153 			    __func__, hib->image_offset, offset, blks);
154 			return (EIO);
155 		}
156 		offset += hib->image_offset;
157 		break;
158 	case IO_TYPE_CHK:
159 		if (offset + blks > btodb(HIBERNATE_CHUNK_TABLE_SIZE)) {
160 			printf("%s: chunktable write is out of bounds: "
161 			    "offset-chunk=%lld, offset-write=%lld, blks=%llu\n",
162 			    __func__, hib->chunktable_offset, offset, blks);
163 			return (EIO);
164 		}
165 		offset += hib->chunktable_offset;
166 		break;
167 	case IO_TYPE_SIG:
168 		if (offset != hib->sig_offset || size != hib->sec_size) {
169 			printf("%s: signature write is out of bounds: "
170 			    "offset-sig=%lld, offset-write=%lld, blks=%llu\n",
171 			    __func__, hib->sig_offset, offset, blks);
172 			return (EIO);
173 		}
174 		break;
175 	default:
176 		printf("%s: unsupported io type %d\n", __func__, io_type);
177 		return (EINVAL);
178 	}
179 
180 	return (hib->io_func(hib->dev, offset, addr, size, HIB_W,
181 	    hib->io_page));
182 }
183 
184 /*
185  * Sort hibernate memory ranges by ascending PA
186  */
187 void
hibernate_sort_ranges(union hibernate_info * hib_info)188 hibernate_sort_ranges(union hibernate_info *hib_info)
189 {
190 	int i, j;
191 	struct hibernate_memory_range *ranges;
192 	paddr_t base, end;
193 
194 	ranges = hib_info->ranges;
195 
196 	for (i = 1; i < hib_info->nranges; i++) {
197 		j = i;
198 		while (j > 0 && ranges[j - 1].base > ranges[j].base) {
199 			base = ranges[j].base;
200 			end = ranges[j].end;
201 			ranges[j].base = ranges[j - 1].base;
202 			ranges[j].end = ranges[j - 1].end;
203 			ranges[j - 1].base = base;
204 			ranges[j - 1].end = end;
205 			j--;
206 		}
207 	}
208 }
209 
210 /*
211  * Compare hiballoc entries based on the address they manage.
212  *
213  * Since the address is fixed, relative to struct hiballoc_entry,
214  * we just compare the hiballoc_entry pointers.
215  */
216 static __inline int
hibe_cmp(const struct hiballoc_entry * l,const struct hiballoc_entry * r)217 hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r)
218 {
219 	vaddr_t vl = (vaddr_t)l;
220 	vaddr_t vr = (vaddr_t)r;
221 
222 	return vl < vr ? -1 : (vl > vr);
223 }
224 
RBT_PROTOTYPE(hiballoc_addr,hiballoc_entry,hibe_entry,hibe_cmp)225 RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
226 
227 /*
228  * Given a hiballoc entry, return the address it manages.
229  */
230 static __inline void *
231 hib_entry_to_addr(struct hiballoc_entry *entry)
232 {
233 	caddr_t addr;
234 
235 	addr = (caddr_t)entry;
236 	addr += HIB_SIZEOF(struct hiballoc_entry);
237 	return addr;
238 }
239 
240 /*
241  * Given an address, find the hiballoc that corresponds.
242  */
243 static __inline struct hiballoc_entry*
hib_addr_to_entry(void * addr_param)244 hib_addr_to_entry(void *addr_param)
245 {
246 	caddr_t addr;
247 
248 	addr = (caddr_t)addr_param;
249 	addr -= HIB_SIZEOF(struct hiballoc_entry);
250 	return (struct hiballoc_entry*)addr;
251 }
252 
253 RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp);
254 
255 /*
256  * Allocate memory from the arena.
257  *
258  * Returns NULL if no memory is available.
259  */
260 void *
hib_alloc(struct hiballoc_arena * arena,size_t alloc_sz)261 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
262 {
263 	struct hiballoc_entry *entry, *new_entry;
264 	size_t find_sz;
265 
266 	/*
267 	 * Enforce alignment of HIB_ALIGN bytes.
268 	 *
269 	 * Note that, because the entry is put in front of the allocation,
270 	 * 0-byte allocations are guaranteed a unique address.
271 	 */
272 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
273 
274 	/*
275 	 * Find an entry with hibe_space >= find_sz.
276 	 *
277 	 * If the root node is not large enough, we switch to tree traversal.
278 	 * Because all entries are made at the bottom of the free space,
279 	 * traversal from the end has a slightly better chance of yielding
280 	 * a sufficiently large space.
281 	 */
282 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
283 	entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs);
284 	if (entry != NULL && entry->hibe_space < find_sz) {
285 		RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
286 			if (entry->hibe_space >= find_sz)
287 				break;
288 		}
289 	}
290 
291 	/*
292 	 * Insufficient or too fragmented memory.
293 	 */
294 	if (entry == NULL)
295 		return NULL;
296 
297 	/*
298 	 * Create new entry in allocated space.
299 	 */
300 	new_entry = (struct hiballoc_entry*)(
301 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
302 	new_entry->hibe_space = entry->hibe_space - find_sz;
303 	new_entry->hibe_use = alloc_sz;
304 
305 	/*
306 	 * Insert entry.
307 	 */
308 	if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
309 		panic("hib_alloc: insert failure");
310 	entry->hibe_space = 0;
311 
312 	/* Return address managed by entry. */
313 	return hib_entry_to_addr(new_entry);
314 }
315 
316 void
hib_getentropy(char ** bufp,size_t * bufplen)317 hib_getentropy(char **bufp, size_t *bufplen)
318 {
319 	if (!bufp || !bufplen)
320 		return;
321 
322 	*bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE));
323 	*bufplen = PAGE_SIZE;
324 }
325 
326 /*
327  * Free a pointer previously allocated from this arena.
328  *
329  * If addr is NULL, this will be silently accepted.
330  */
331 void
hib_free(struct hiballoc_arena * arena,void * addr)332 hib_free(struct hiballoc_arena *arena, void *addr)
333 {
334 	struct hiballoc_entry *entry, *prev;
335 
336 	if (addr == NULL)
337 		return;
338 
339 	/*
340 	 * Derive entry from addr and check it is really in this arena.
341 	 */
342 	entry = hib_addr_to_entry(addr);
343 	if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
344 		panic("hib_free: freed item %p not in hib arena", addr);
345 
346 	/*
347 	 * Give the space in entry to its predecessor.
348 	 *
349 	 * If entry has no predecessor, change its used space into free space
350 	 * instead.
351 	 */
352 	prev = RBT_PREV(hiballoc_addr, entry);
353 	if (prev != NULL &&
354 	    (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
355 	    prev->hibe_use + prev->hibe_space) == entry) {
356 		/* Merge entry. */
357 		RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
358 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
359 		    entry->hibe_use + entry->hibe_space;
360 	} else {
361 		/* Flip used memory to free space. */
362 		entry->hibe_space += entry->hibe_use;
363 		entry->hibe_use = 0;
364 	}
365 }
366 
367 /*
368  * Initialize hiballoc.
369  *
370  * The allocator will manage memory at ptr, which is len bytes.
371  */
372 int
hiballoc_init(struct hiballoc_arena * arena,void * p_ptr,size_t p_len)373 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
374 {
375 	struct hiballoc_entry *entry;
376 	caddr_t ptr;
377 	size_t len;
378 
379 	RBT_INIT(hiballoc_addr, &arena->hib_addrs);
380 
381 	/*
382 	 * Hib allocator enforces HIB_ALIGN alignment.
383 	 * Fixup ptr and len.
384 	 */
385 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
386 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
387 	len &= ~((size_t)HIB_ALIGN - 1);
388 
389 	/*
390 	 * Insufficient memory to be able to allocate and also do bookkeeping.
391 	 */
392 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
393 		return ENOMEM;
394 
395 	/*
396 	 * Create entry describing space.
397 	 */
398 	entry = (struct hiballoc_entry*)ptr;
399 	entry->hibe_use = 0;
400 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
401 	RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
402 
403 	return 0;
404 }
405 
406 /*
407  * Mark all memory as dirty.
408  *
409  * Used to inform the system that there are no pre-zero'd (PG_ZERO) free pages
410  * when we came back from hibernate.
411  */
412 void
uvm_pmr_dirty_everything(void)413 uvm_pmr_dirty_everything(void)
414 {
415 	struct uvm_pmemrange	*pmr;
416 	struct vm_page		*pg;
417 	int			 i;
418 
419 	uvm_lock_fpageq();
420 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
421 		/* Dirty single pages. */
422 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
423 		    != NULL) {
424 			uvm_pmr_remove(pmr, pg);
425 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
426 			uvm_pmr_insert(pmr, pg, 0);
427 		}
428 
429 		/* Dirty multi page ranges. */
430 		while ((pg = RBT_ROOT(uvm_pmr_size,
431 		    &pmr->size[UVM_PMR_MEMTYPE_ZERO])) != NULL) {
432 			pg--; /* Size tree always has second page. */
433 			uvm_pmr_remove(pmr, pg);
434 			for (i = 0; i < pg->fpgsz; i++)
435 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
436 			uvm_pmr_insert(pmr, pg, 0);
437 		}
438 	}
439 
440 	uvmexp.zeropages = 0;
441 	uvm_unlock_fpageq();
442 }
443 
444 /*
445  * Allocate an area that can hold sz bytes and doesn't overlap with
446  * the piglet at piglet_pa.
447  */
448 int
uvm_pmr_alloc_pig(paddr_t * pa,psize_t sz,paddr_t piglet_pa)449 uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa)
450 {
451 	struct uvm_constraint_range pig_constraint;
452 	struct kmem_pa_mode kp_pig = {
453 		.kp_constraint = &pig_constraint,
454 		.kp_maxseg = 1
455 	};
456 	vaddr_t va;
457 
458 	sz = round_page(sz);
459 
460 	pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE;
461 	pig_constraint.ucr_high = -1;
462 
463 	va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
464 	if (va == 0) {
465 		pig_constraint.ucr_low = 0;
466 		pig_constraint.ucr_high = piglet_pa - 1;
467 
468 		va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
469 		if (va == 0)
470 			return ENOMEM;
471 	}
472 
473 	pmap_extract(pmap_kernel(), va, pa);
474 	return 0;
475 }
476 
477 /*
478  * Allocate a piglet area.
479  *
480  * This needs to be in DMA-safe memory.
481  * Piglets are aligned.
482  *
483  * sz and align in bytes.
484  */
485 int
uvm_pmr_alloc_piglet(vaddr_t * va,paddr_t * pa,vsize_t sz,paddr_t align)486 uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align)
487 {
488 	struct kmem_pa_mode kp_piglet = {
489 		.kp_constraint = &dma_constraint,
490 		.kp_align = align,
491 		.kp_maxseg = 1
492 	};
493 
494 	/* Ensure align is a power of 2 */
495 	KASSERT((align & (align - 1)) == 0);
496 
497 	/*
498 	 * Fixup arguments: align must be at least PAGE_SIZE,
499 	 * sz will be converted to pagecount, since that is what
500 	 * pmemrange uses internally.
501 	 */
502 	if (align < PAGE_SIZE)
503 		kp_piglet.kp_align = PAGE_SIZE;
504 
505 	sz = round_page(sz);
506 
507 	*va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait);
508 	if (*va == 0)
509 		return ENOMEM;
510 
511 	pmap_extract(pmap_kernel(), *va, pa);
512 	return 0;
513 }
514 
515 /*
516  * Free a piglet area.
517  */
518 void
uvm_pmr_free_piglet(vaddr_t va,vsize_t sz)519 uvm_pmr_free_piglet(vaddr_t va, vsize_t sz)
520 {
521 	/*
522 	 * Fix parameters.
523 	 */
524 	sz = round_page(sz);
525 
526 	/*
527 	 * Free the physical and virtual memory.
528 	 */
529 	km_free((void *)va, sz, &kv_any, &kp_dma_contig);
530 }
531 
532 /*
533  * Physmem RLE compression support.
534  *
535  * Given a physical page address, return the number of pages starting at the
536  * address that are free.  Clamps to the number of pages in
537  * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free.
538  */
539 int
uvm_page_rle(paddr_t addr)540 uvm_page_rle(paddr_t addr)
541 {
542 	struct vm_page		*pg, *pg_end;
543 	struct vm_physseg	*vmp;
544 	int			 pseg_idx, off_idx;
545 
546 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
547 	if (pseg_idx == -1)
548 		return 0;
549 
550 	vmp = &vm_physmem[pseg_idx];
551 	pg = &vmp->pgs[off_idx];
552 	if (!(pg->pg_flags & PQ_FREE))
553 		return 0;
554 
555 	/*
556 	 * Search for the first non-free page after pg.
557 	 * Note that the page may not be the first page in a free pmemrange,
558 	 * therefore pg->fpgsz cannot be used.
559 	 */
560 	for (pg_end = pg; pg_end <= vmp->lastpg &&
561 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE &&
562 	    (pg_end - pg) < HIBERNATE_CHUNK_SIZE/PAGE_SIZE; pg_end++)
563 		;
564 	return pg_end - pg;
565 }
566 
567 /*
568  * Fills out the hibernate_info union pointed to by hib
569  * with information about this machine (swap signature block
570  * offsets, number of memory ranges, kernel in use, etc)
571  */
572 int
get_hibernate_info(union hibernate_info * hib,int suspend)573 get_hibernate_info(union hibernate_info *hib, int suspend)
574 {
575 	struct disklabel dl;
576 	char err_string[128], *dl_ret;
577 	int part;
578 	SHA2_CTX ctx;
579 	void *fn;
580 
581 #ifndef NO_PROPOLICE
582 	/* Save propolice guard */
583 	hib->guard = __guard_local;
584 #endif /* ! NO_PROPOLICE */
585 
586 	/* Determine I/O function to use */
587 	hib->io_func = get_hibernate_io_function(swdevt[0]);
588 	if (hib->io_func == NULL)
589 		return (1);
590 
591 	/* Calculate hibernate device */
592 	hib->dev = swdevt[0];
593 
594 	/* Read disklabel (used to calculate signature and image offsets) */
595 	dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string));
596 
597 	if (dl_ret) {
598 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
599 		return (1);
600 	}
601 
602 	/* Make sure we have a swap partition. */
603 	part = DISKPART(hib->dev);
604 	if (dl.d_npartitions <= part ||
605 	    dl.d_secsize > sizeof(union hibernate_info) ||
606 	    dl.d_partitions[part].p_fstype != FS_SWAP ||
607 	    DL_GETPSIZE(&dl.d_partitions[part]) == 0)
608 		return (1);
609 
610 	/* Magic number */
611 	hib->magic = HIBERNATE_MAGIC;
612 
613 	/* Calculate signature block location */
614 	hib->sec_size = dl.d_secsize;
615 	hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part]) - 1;
616 	hib->sig_offset = DL_SECTOBLK(&dl, hib->sig_offset);
617 
618 	SHA256Init(&ctx);
619 	SHA256Update(&ctx, version, strlen(version));
620 	fn = printf;
621 	SHA256Update(&ctx, &fn, sizeof(fn));
622 	fn = malloc;
623 	SHA256Update(&ctx, &fn, sizeof(fn));
624 	fn = km_alloc;
625 	SHA256Update(&ctx, &fn, sizeof(fn));
626 	fn = strlen;
627 	SHA256Update(&ctx, &fn, sizeof(fn));
628 	SHA256Final((u_int8_t *)&hib->kern_hash, &ctx);
629 
630 	if (suspend) {
631 		/* Grab the previously-allocated piglet addresses */
632 		hib->piglet_va = global_piglet_va;
633 		hib->piglet_pa = global_piglet_pa;
634 		hib->io_page = (void *)hib->piglet_va;
635 
636 		/*
637 		 * Initialization of the hibernate IO function for drivers
638 		 * that need to do prep work (such as allocating memory or
639 		 * setting up data structures that cannot safely be done
640 		 * during suspend without causing side effects). There is
641 		 * a matching HIB_DONE call performed after the write is
642 		 * completed.
643 		 */
644 		if (hib->io_func(hib->dev,
645 		    DL_SECTOBLK(&dl, DL_GETPOFFSET(&dl.d_partitions[part])),
646 		    (vaddr_t)NULL,
647 		    DL_SECTOBLK(&dl, DL_GETPSIZE(&dl.d_partitions[part])),
648 		    HIB_INIT, hib->io_page))
649 			goto fail;
650 
651 	} else {
652 		/*
653 		 * Resuming kernels use a regular private page for the driver
654 		 * No need to free this I/O page as it will vanish as part of
655 		 * the resume.
656 		 */
657 		hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
658 		if (!hib->io_page)
659 			goto fail;
660 	}
661 
662 	if (get_hibernate_info_md(hib))
663 		goto fail;
664 
665 	return (0);
666 
667 fail:
668 	return (1);
669 }
670 
671 /*
672  * Allocate nitems*size bytes from the hiballoc area presently in use
673  */
674 void *
hibernate_zlib_alloc(void * unused,int nitems,int size)675 hibernate_zlib_alloc(void *unused, int nitems, int size)
676 {
677 	struct hibernate_zlib_state *hibernate_state;
678 
679 	hibernate_state =
680 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
681 
682 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
683 }
684 
685 /*
686  * Free the memory pointed to by addr in the hiballoc area presently in
687  * use
688  */
689 void
hibernate_zlib_free(void * unused,void * addr)690 hibernate_zlib_free(void *unused, void *addr)
691 {
692 	struct hibernate_zlib_state *hibernate_state;
693 
694 	hibernate_state =
695 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
696 
697 	hib_free(&hibernate_state->hiballoc_arena, addr);
698 }
699 
700 /*
701  * Inflate next page of data from the image stream.
702  * The rle parameter is modified on exit to contain the number of pages to
703  * skip in the output stream (or 0 if this page was inflated into).
704  *
705  * Returns 0 if the stream contains additional data, or 1 if the stream is
706  * finished.
707  */
708 int
hibernate_inflate_page(int * rle)709 hibernate_inflate_page(int *rle)
710 {
711 	struct hibernate_zlib_state *hibernate_state;
712 	int i;
713 
714 	hibernate_state =
715 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
716 
717 	/* Set up the stream for RLE code inflate */
718 	hibernate_state->hib_stream.next_out = (unsigned char *)rle;
719 	hibernate_state->hib_stream.avail_out = sizeof(*rle);
720 
721 	/* Inflate RLE code */
722 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
723 	if (i != Z_OK && i != Z_STREAM_END) {
724 		/*
725 		 * XXX - this will likely reboot/hang most machines
726 		 *       since the console output buffer will be unmapped,
727 		 *       but there's not much else we can do here.
728 		 */
729 		panic("rle inflate stream error");
730 	}
731 
732 	if (hibernate_state->hib_stream.avail_out != 0) {
733 		/*
734 		 * XXX - this will likely reboot/hang most machines
735 		 *       since the console output buffer will be unmapped,
736 		 *       but there's not much else we can do here.
737 		 */
738 		panic("rle short inflate error");
739 	}
740 
741 	if (*rle < 0 || *rle > 1024) {
742 		/*
743 		 * XXX - this will likely reboot/hang most machines
744 		 *       since the console output buffer will be unmapped,
745 		 *       but there's not much else we can do here.
746 		 */
747 		panic("invalid rle count");
748 	}
749 
750 	if (i == Z_STREAM_END)
751 		return (1);
752 
753 	if (*rle != 0)
754 		return (0);
755 
756 	/* Set up the stream for page inflate */
757 	hibernate_state->hib_stream.next_out =
758 		(unsigned char *)HIBERNATE_INFLATE_PAGE;
759 	hibernate_state->hib_stream.avail_out = PAGE_SIZE;
760 
761 	/* Process next block of data */
762 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
763 	if (i != Z_OK && i != Z_STREAM_END) {
764 		/*
765 		 * XXX - this will likely reboot/hang most machines
766 		 *       since the console output buffer will be unmapped,
767 		 *       but there's not much else we can do here.
768 		 */
769 		panic("inflate error");
770 	}
771 
772 	/* We should always have extracted a full page ... */
773 	if (hibernate_state->hib_stream.avail_out != 0) {
774 		/*
775 		 * XXX - this will likely reboot/hang most machines
776 		 *       since the console output buffer will be unmapped,
777 		 *       but there's not much else we can do here.
778 		 */
779 		panic("incomplete page");
780 	}
781 
782 	return (i == Z_STREAM_END);
783 }
784 
785 /*
786  * Inflate size bytes from src into dest, skipping any pages in
787  * [src..dest] that are special (see hibernate_inflate_skip)
788  *
789  * This function executes while using the resume-time stack
790  * and pmap, and therefore cannot use ddb/printf/etc. Doing so
791  * will likely hang or reset the machine since the console output buffer
792  * will be unmapped.
793  */
794 void
hibernate_inflate_region(union hibernate_info * hib,paddr_t dest,paddr_t src,size_t size)795 hibernate_inflate_region(union hibernate_info *hib, paddr_t dest,
796     paddr_t src, size_t size)
797 {
798 	int end_stream = 0, rle, skip;
799 	struct hibernate_zlib_state *hibernate_state;
800 
801 	hibernate_state =
802 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
803 
804 	hibernate_state->hib_stream.next_in = (unsigned char *)src;
805 	hibernate_state->hib_stream.avail_in = size;
806 
807 	do {
808 		/*
809 		 * Is this a special page? If yes, redirect the
810 		 * inflate output to a scratch page (eg, discard it)
811 		 */
812 		skip = hibernate_inflate_skip(hib, dest);
813 		if (skip == HIB_SKIP) {
814 			hibernate_enter_resume_mapping(
815 			    HIBERNATE_INFLATE_PAGE,
816 			    HIBERNATE_INFLATE_PAGE, 0);
817 		} else if (skip == HIB_MOVE) {
818 			/*
819 			 * Special case : retguard region. This gets moved
820 			 * temporarily into the piglet region and copied into
821 			 * place immediately before resume
822 			 */
823 			hibernate_enter_resume_mapping(
824 			    HIBERNATE_INFLATE_PAGE,
825 			    hib->piglet_pa + (110 * PAGE_SIZE) +
826 			    hib->retguard_ofs, 0);
827 			hib->retguard_ofs += PAGE_SIZE;
828 			if (hib->retguard_ofs > 255 * PAGE_SIZE) {
829 				/*
830 				 * XXX - this will likely reboot/hang most
831 				 *       machines since the console output
832 				 *       buffer will be unmapped, but there's
833 				 *       not much else we can do here.
834 				 */
835 				panic("retguard move error, out of space");
836 			}
837 		} else {
838 			hibernate_enter_resume_mapping(
839 			    HIBERNATE_INFLATE_PAGE, dest, 0);
840 		}
841 
842 		hibernate_flush();
843 		end_stream = hibernate_inflate_page(&rle);
844 
845 		if (rle == 0)
846 			dest += PAGE_SIZE;
847 		else
848 			dest += (rle * PAGE_SIZE);
849 	} while (!end_stream);
850 }
851 
852 /*
853  * deflate from src into the I/O page, up to 'remaining' bytes
854  *
855  * Returns number of input bytes consumed, and may reset
856  * the 'remaining' parameter if not all the output space was consumed
857  * (this information is needed to know how much to write to disk)
858  */
859 size_t
hibernate_deflate(union hibernate_info * hib,paddr_t src,size_t * remaining)860 hibernate_deflate(union hibernate_info *hib, paddr_t src,
861     size_t *remaining)
862 {
863 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
864 	struct hibernate_zlib_state *hibernate_state;
865 
866 	hibernate_state =
867 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
868 
869 	/* Set up the stream for deflate */
870 	hibernate_state->hib_stream.next_in = (unsigned char *)src;
871 	hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK);
872 	hibernate_state->hib_stream.next_out =
873 		(unsigned char *)hibernate_io_page + (PAGE_SIZE - *remaining);
874 	hibernate_state->hib_stream.avail_out = *remaining;
875 
876 	/* Process next block of data */
877 	if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK)
878 		panic("hibernate zlib deflate error");
879 
880 	/* Update pointers and return number of bytes consumed */
881 	*remaining = hibernate_state->hib_stream.avail_out;
882 	return (PAGE_SIZE - (src & PAGE_MASK)) -
883 	    hibernate_state->hib_stream.avail_in;
884 }
885 
886 /*
887  * Write the hibernation information specified in hiber_info
888  * to the location in swap previously calculated (last block of
889  * swap), called the "signature block".
890  */
891 int
hibernate_write_signature(union hibernate_info * hib)892 hibernate_write_signature(union hibernate_info *hib)
893 {
894 	memset(&disk_hib, 0, hib->sec_size);
895 	memcpy(&disk_hib, hib, DEV_BSIZE);
896 
897 	/* Write hibernate info to disk */
898 	return (hibernate_write(hib, hib->sig_offset,
899 	    (vaddr_t)&disk_hib, hib->sec_size, IO_TYPE_SIG));
900 }
901 
902 /*
903  * Write the memory chunk table to the area in swap immediately
904  * preceding the signature block. The chunk table is stored
905  * in the piglet when this function is called.  Returns errno.
906  */
907 int
hibernate_write_chunktable(union hibernate_info * hib)908 hibernate_write_chunktable(union hibernate_info *hib)
909 {
910 	vaddr_t hibernate_chunk_table_start;
911 	size_t hibernate_chunk_table_size;
912 	int i, err;
913 
914 	hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE;
915 
916 	hibernate_chunk_table_start = hib->piglet_va +
917 	    HIBERNATE_CHUNK_SIZE;
918 
919 	/* Write chunk table */
920 	for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) {
921 		if ((err = hibernate_write(hib, btodb(i),
922 		    (vaddr_t)(hibernate_chunk_table_start + i),
923 		    MAXPHYS, IO_TYPE_CHK))) {
924 			DPRINTF("chunktable write error: %d\n", err);
925 			return (err);
926 		}
927 	}
928 
929 	return (0);
930 }
931 
932 /*
933  * Write an empty hiber_info to the swap signature block, which is
934  * guaranteed to not match any valid hib.
935  */
936 int
hibernate_clear_signature(union hibernate_info * hib)937 hibernate_clear_signature(union hibernate_info *hib)
938 {
939 	uint8_t buf[DEV_BSIZE];
940 
941 	/* Zero out a blank hiber_info */
942 	memcpy(&buf, &disk_hib, sizeof(buf));
943 	memset(&disk_hib, 0, hib->sec_size);
944 
945 	/* Write (zeroed) hibernate info to disk */
946 	DPRINTF("clearing hibernate signature block location: %lld\n",
947 		hib->sig_offset);
948 	if (hibernate_block_io(hib,
949 	    hib->sig_offset,
950 	    hib->sec_size, (vaddr_t)&disk_hib, 1))
951 		printf("Warning: could not clear hibernate signature\n");
952 
953 	memcpy(&disk_hib, buf, sizeof(buf));
954 	return (0);
955 }
956 
957 /*
958  * Compare two hibernate_infos to determine if they are the same (eg,
959  * we should be performing a hibernate resume on this machine.
960  * Not all fields are checked - just enough to verify that the machine
961  * has the same memory configuration and kernel as the one that
962  * wrote the signature previously.
963  */
964 int
hibernate_compare_signature(union hibernate_info * mine,union hibernate_info * disk)965 hibernate_compare_signature(union hibernate_info *mine,
966     union hibernate_info *disk)
967 {
968 	u_int i;
969 
970 	if (mine->nranges != disk->nranges) {
971 		printf("unhibernate failed: memory layout changed\n");
972 		return (1);
973 	}
974 
975 	if (bcmp(mine->kern_hash, disk->kern_hash, SHA256_DIGEST_LENGTH) != 0) {
976 		printf("unhibernate failed: original kernel changed\n");
977 		return (1);
978 	}
979 
980 	for (i = 0; i < mine->nranges; i++) {
981 		if ((mine->ranges[i].base != disk->ranges[i].base) ||
982 		    (mine->ranges[i].end != disk->ranges[i].end) ) {
983 			DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n",
984 				i,
985 				(void *)mine->ranges[i].base,
986 				(void *)mine->ranges[i].end,
987 				(void *)disk->ranges[i].base,
988 				(void *)disk->ranges[i].end);
989 			printf("unhibernate failed: memory size changed\n");
990 			return (1);
991 		}
992 	}
993 
994 	return (0);
995 }
996 
997 /*
998  * Transfers xfer_size bytes between the hibernate device specified in
999  * hib_info at offset blkctr and the vaddr specified at dest.
1000  *
1001  * Separate offsets and pages are used to handle misaligned reads (reads
1002  * that span a page boundary).
1003  *
1004  * blkctr specifies a relative offset (relative to the start of swap),
1005  * not an absolute disk offset
1006  *
1007  */
1008 int
hibernate_block_io(union hibernate_info * hib,daddr_t blkctr,size_t xfer_size,vaddr_t dest,int iswrite)1009 hibernate_block_io(union hibernate_info *hib, daddr_t blkctr,
1010     size_t xfer_size, vaddr_t dest, int iswrite)
1011 {
1012 	struct buf *bp;
1013 	int error;
1014 
1015 	bp = geteblk(xfer_size);
1016 	if (iswrite)
1017 		bcopy((caddr_t)dest, bp->b_data, xfer_size);
1018 
1019 	bp->b_bcount = xfer_size;
1020 	bp->b_blkno = blkctr;
1021 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
1022 	SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW);
1023 	bp->b_dev = hib->dev;
1024 	(*bdsw->d_strategy)(bp);
1025 
1026 	error = biowait(bp);
1027 	if (error) {
1028 		printf("hib block_io biowait error %d blk %lld size %zu\n",
1029 			error, (long long)blkctr, xfer_size);
1030 	} else if (!iswrite)
1031 		bcopy(bp->b_data, (caddr_t)dest, xfer_size);
1032 
1033 	bp->b_flags |= B_INVAL;
1034 	brelse(bp);
1035 
1036 	return (error != 0);
1037 }
1038 
1039 /*
1040  * Preserve one page worth of random data, generated from the resuming
1041  * kernel's arc4random. After resume, this preserved entropy can be used
1042  * to further improve the un-hibernated machine's entropy pool. This
1043  * random data is stored in the piglet, which is preserved across the
1044  * unpack operation, and is restored later in the resume process (see
1045  * hib_getentropy)
1046  */
1047 void
hibernate_preserve_entropy(union hibernate_info * hib)1048 hibernate_preserve_entropy(union hibernate_info *hib)
1049 {
1050 	void *entropy;
1051 
1052 	entropy = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
1053 
1054 	if (!entropy)
1055 		return;
1056 
1057 	pmap_activate(curproc);
1058 	pmap_kenter_pa((vaddr_t)entropy,
1059 	    (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE)),
1060 	    PROT_READ | PROT_WRITE);
1061 
1062 	arc4random_buf((void *)entropy, PAGE_SIZE);
1063 	pmap_kremove((vaddr_t)entropy, PAGE_SIZE);
1064 	km_free(entropy, PAGE_SIZE, &kv_any, &kp_none);
1065 }
1066 
1067 #ifndef NO_PROPOLICE
1068 vaddr_t
hibernate_unprotect_ssp(void)1069 hibernate_unprotect_ssp(void)
1070 {
1071 	struct kmem_dyn_mode kd_avoidalias;
1072 	vaddr_t va = trunc_page((vaddr_t)&__guard_local);
1073 	paddr_t pa;
1074 
1075 	pmap_extract(pmap_kernel(), va, &pa);
1076 
1077 	memset(&kd_avoidalias, 0, sizeof kd_avoidalias);
1078 	kd_avoidalias.kd_prefer = pa;
1079 	kd_avoidalias.kd_waitok = 1;
1080 	va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_avoidalias);
1081 	if (!va)
1082 		panic("hibernate_unprotect_ssp");
1083 
1084 	pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1085 	pmap_update(pmap_kernel());
1086 
1087 	return va;
1088 }
1089 
1090 void
hibernate_reprotect_ssp(vaddr_t va)1091 hibernate_reprotect_ssp(vaddr_t va)
1092 {
1093 	pmap_kremove(va, PAGE_SIZE);
1094 	km_free((void *)va, PAGE_SIZE, &kv_any, &kp_none);
1095 }
1096 #endif /* NO_PROPOLICE */
1097 
1098 /*
1099  * Reads the signature block from swap, checks against the current machine's
1100  * information. If the information matches, perform a resume by reading the
1101  * saved image into the pig area, and unpacking.
1102  *
1103  * Must be called with interrupts enabled.
1104  */
1105 void
hibernate_resume(void)1106 hibernate_resume(void)
1107 {
1108 	uint8_t buf[DEV_BSIZE];
1109 	union hibernate_info *hib = (union hibernate_info *)&buf;
1110 	int s;
1111 #ifndef NO_PROPOLICE
1112 	vsize_t off = (vaddr_t)&__guard_local -
1113 	    trunc_page((vaddr_t)&__guard_local);
1114 	vaddr_t guard_va;
1115 #endif
1116 
1117 	/* Get current running machine's hibernate info */
1118 	memset(buf, 0, sizeof(buf));
1119 	if (get_hibernate_info(hib, 0)) {
1120 		DPRINTF("couldn't retrieve machine's hibernate info\n");
1121 		return;
1122 	}
1123 
1124 	/* Read hibernate info from disk */
1125 	s = splbio();
1126 
1127 	bdsw = &bdevsw[major(hib->dev)];
1128 	if ((*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc)) {
1129 		printf("hibernate_resume device open failed\n");
1130 		splx(s);
1131 		return;
1132 	}
1133 
1134 	DPRINTF("reading hibernate signature block location: %lld\n",
1135 		hib->sig_offset);
1136 
1137 	if (hibernate_block_io(hib,
1138 	    hib->sig_offset,
1139 	    hib->sec_size, (vaddr_t)&disk_hib, 0)) {
1140 		DPRINTF("error in hibernate read\n");
1141 		goto fail;
1142 	}
1143 
1144 	/* Check magic number */
1145 	if (disk_hib.magic != HIBERNATE_MAGIC) {
1146 		DPRINTF("wrong magic number in hibernate signature: %x\n",
1147 			disk_hib.magic);
1148 		goto fail;
1149 	}
1150 
1151 	/*
1152 	 * We (possibly) found a hibernate signature. Clear signature first,
1153 	 * to prevent accidental resume or endless resume cycles later.
1154 	 */
1155 	if (hibernate_clear_signature(hib)) {
1156 		DPRINTF("error clearing hibernate signature block\n");
1157 		goto fail;
1158 	}
1159 
1160 	/*
1161 	 * If on-disk and in-memory hibernate signatures match,
1162 	 * this means we should do a resume from hibernate.
1163 	 */
1164 	if (hibernate_compare_signature(hib, &disk_hib)) {
1165 		DPRINTF("mismatched hibernate signature block\n");
1166 		goto fail;
1167 	}
1168 	disk_hib.dev = hib->dev;
1169 
1170 #ifdef MULTIPROCESSOR
1171 	/* XXX - if we fail later, we may need to rehatch APs on some archs */
1172 	DPRINTF("hibernate: quiescing APs\n");
1173 	hibernate_quiesce_cpus();
1174 #endif /* MULTIPROCESSOR */
1175 
1176 	/* Read the image from disk into the image (pig) area */
1177 	if (hibernate_read_image(&disk_hib))
1178 		goto fail;
1179 	if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc))
1180 		printf("hibernate_resume device close failed\n");
1181 	bdsw = NULL;
1182 
1183 	DPRINTF("hibernate: quiescing devices\n");
1184 	if (config_suspend_all(DVACT_QUIESCE) != 0)
1185 		goto fail;
1186 
1187 #ifndef NO_PROPOLICE
1188 	guard_va = hibernate_unprotect_ssp();
1189 #endif /* NO_PROPOLICE */
1190 
1191 	(void) splhigh();
1192 	hibernate_disable_intr_machdep();
1193 	cold = 2;
1194 
1195 	DPRINTF("hibernate: suspending devices\n");
1196 	if (config_suspend_all(DVACT_SUSPEND) != 0) {
1197 		cold = 0;
1198 		hibernate_enable_intr_machdep();
1199 #ifndef NO_PROPOLICE
1200 		hibernate_reprotect_ssp(guard_va);
1201 #endif /* ! NO_PROPOLICE */
1202 		goto fail;
1203 	}
1204 
1205 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start,
1206 	    &retguard_start_phys);
1207 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end,
1208 	    &retguard_end_phys);
1209 
1210 	hibernate_preserve_entropy(&disk_hib);
1211 
1212 	printf("Unpacking image...\n");
1213 
1214 	/* Switch stacks */
1215 	DPRINTF("hibernate: switching stacks\n");
1216 	hibernate_switch_stack_machdep();
1217 
1218 #ifndef NO_PROPOLICE
1219 	/* Start using suspended kernel's propolice guard */
1220 	*(long *)(guard_va + off) = disk_hib.guard;
1221 	hibernate_reprotect_ssp(guard_va);
1222 #endif /* ! NO_PROPOLICE */
1223 
1224 	/* Unpack and resume */
1225 	hibernate_unpack_image(&disk_hib);
1226 
1227 fail:
1228 	if (!bdsw)
1229 		printf("\nUnable to resume hibernated image\n");
1230 	else if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc))
1231 		printf("hibernate_resume device close failed\n");
1232 	splx(s);
1233 }
1234 
1235 /*
1236  * Unpack image from pig area to original location by looping through the
1237  * list of output chunks in the order they should be restored (fchunks).
1238  *
1239  * Note that due to the stack smash protector and the fact that we have
1240  * switched stacks, it is not permitted to return from this function.
1241  */
1242 void
hibernate_unpack_image(union hibernate_info * hib)1243 hibernate_unpack_image(union hibernate_info *hib)
1244 {
1245 	uint8_t buf[DEV_BSIZE];
1246 	struct hibernate_disk_chunk *chunks;
1247 	union hibernate_info *local_hib = (union hibernate_info *)&buf;
1248 	paddr_t image_cur = global_pig_start;
1249 	short i, *fchunks;
1250 	char *pva;
1251 
1252 	/* Piglet will be identity mapped (VA == PA) */
1253 	pva = (char *)hib->piglet_pa;
1254 
1255 	fchunks = (short *)(pva + (4 * PAGE_SIZE));
1256 
1257 	chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE);
1258 
1259 	/* Can't use hiber_info that's passed in after this point */
1260 	memcpy(buf, hib, sizeof(buf));
1261 	local_hib->retguard_ofs = 0;
1262 
1263 	/* VA == PA */
1264 	local_hib->piglet_va = local_hib->piglet_pa;
1265 
1266 	/*
1267 	 * Point of no return. Once we pass this point, only kernel code can
1268 	 * be accessed. No global variables or other kernel data structures
1269 	 * are guaranteed to be coherent after unpack starts.
1270 	 *
1271 	 * The image is now in high memory (pig area), we unpack from the pig
1272 	 * to the correct location in memory. We'll eventually end up copying
1273 	 * on top of ourself, but we are assured the kernel code here is the
1274 	 * same between the hibernated and resuming kernel, and we are running
1275 	 * on our own stack, so the overwrite is ok.
1276 	 */
1277 	DPRINTF("hibernate: activating alt. pagetable and starting unpack\n");
1278 	hibernate_activate_resume_pt_machdep();
1279 
1280 	for (i = 0; i < local_hib->chunk_ctr; i++) {
1281 		/* Reset zlib for inflate */
1282 		if (hibernate_zlib_reset(local_hib, 0) != Z_OK)
1283 			panic("hibernate failed to reset zlib for inflate");
1284 
1285 		hibernate_process_chunk(local_hib, &chunks[fchunks[i]],
1286 		    image_cur);
1287 
1288 		image_cur += chunks[fchunks[i]].compressed_size;
1289 	}
1290 
1291 	/*
1292 	 * Resume the loaded kernel by jumping to the MD resume vector.
1293 	 * We won't be returning from this call. We pass the location of
1294 	 * the retguard save area so the MD code can replace it before
1295 	 * resuming. See the piglet layout at the top of this file for
1296 	 * more information on the layout of the piglet area.
1297 	 *
1298 	 * We use 'global_piglet_va' here since by the time we are at
1299 	 * this point, we have already unpacked the image, and we want
1300 	 * the suspended kernel's view of what the piglet was, before
1301 	 * suspend occurred (since we will need to use that in the retguard
1302 	 * copy code in hibernate_resume_machdep.)
1303 	 */
1304 	hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE));
1305 }
1306 
1307 /*
1308  * Bounce a compressed image chunk to the piglet, entering mappings for the
1309  * copied pages as needed
1310  */
1311 void
hibernate_copy_chunk_to_piglet(paddr_t img_cur,vaddr_t piglet,size_t size)1312 hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size)
1313 {
1314 	size_t ct, ofs;
1315 	paddr_t src = img_cur;
1316 	vaddr_t dest = piglet;
1317 
1318 	/* Copy first partial page */
1319 	ct = (PAGE_SIZE) - (src & PAGE_MASK);
1320 	ofs = (src & PAGE_MASK);
1321 
1322 	if (ct < PAGE_SIZE) {
1323 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE,
1324 			(src - ofs), 0);
1325 		hibernate_flush();
1326 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct);
1327 		src += ct;
1328 		dest += ct;
1329 	}
1330 
1331 	/* Copy remaining pages */
1332 	while (src < size + img_cur) {
1333 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0);
1334 		hibernate_flush();
1335 		ct = PAGE_SIZE;
1336 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct);
1337 		hibernate_flush();
1338 		src += ct;
1339 		dest += ct;
1340 	}
1341 }
1342 
1343 /*
1344  * Process a chunk by bouncing it to the piglet, followed by unpacking
1345  */
1346 void
hibernate_process_chunk(union hibernate_info * hib,struct hibernate_disk_chunk * chunk,paddr_t img_cur)1347 hibernate_process_chunk(union hibernate_info *hib,
1348     struct hibernate_disk_chunk *chunk, paddr_t img_cur)
1349 {
1350 	char *pva = (char *)hib->piglet_va;
1351 
1352 	hibernate_copy_chunk_to_piglet(img_cur,
1353 	 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size);
1354 	hibernate_inflate_region(hib, chunk->base,
1355 	    (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)),
1356 	    chunk->compressed_size);
1357 }
1358 
1359 /*
1360  * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between
1361  * inaddr and range_end.
1362  */
1363 int
hibernate_calc_rle(paddr_t inaddr,paddr_t range_end)1364 hibernate_calc_rle(paddr_t inaddr, paddr_t range_end)
1365 {
1366 	int rle;
1367 
1368 	rle = uvm_page_rle(inaddr);
1369 	KASSERT(rle >= 0 && rle <= MAX_RLE);
1370 
1371 	/* Clamp RLE to range end */
1372 	if (rle > 0 && inaddr + (rle * PAGE_SIZE) > range_end)
1373 		rle = (range_end - inaddr) / PAGE_SIZE;
1374 
1375 	return (rle);
1376 }
1377 
1378 /*
1379  * Write the RLE byte for page at 'inaddr' to the output stream.
1380  * Returns the number of pages to be skipped at 'inaddr'.
1381  */
1382 int
hibernate_write_rle(union hibernate_info * hib,paddr_t inaddr,paddr_t range_end,daddr_t * blkctr,size_t * out_remaining)1383 hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr,
1384 	paddr_t range_end, daddr_t *blkctr,
1385 	size_t *out_remaining)
1386 {
1387 	int rle, err, *rleloc;
1388 	struct hibernate_zlib_state *hibernate_state;
1389 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1390 
1391 	hibernate_state =
1392 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1393 
1394 	rle = hibernate_calc_rle(inaddr, range_end);
1395 
1396 	rleloc = (int *)hibernate_rle_page + MAX_RLE - 1;
1397 	*rleloc = rle;
1398 
1399 	/* Deflate the RLE byte into the stream */
1400 	hibernate_deflate(hib, (paddr_t)rleloc, out_remaining);
1401 
1402 	/* Did we fill the output page? If so, flush to disk */
1403 	if (*out_remaining == 0) {
1404 		if ((err = hibernate_write(hib, *blkctr,
1405 			(vaddr_t)hibernate_io_page, PAGE_SIZE, IO_TYPE_IMG))) {
1406 				DPRINTF("hib write error %d\n", err);
1407 				return -1;
1408 		}
1409 
1410 		*blkctr += btodb(PAGE_SIZE);
1411 		*out_remaining = PAGE_SIZE;
1412 
1413 		/* If we didn't deflate the entire RLE byte, finish it now */
1414 		if (hibernate_state->hib_stream.avail_in != 0)
1415 			hibernate_deflate(hib,
1416 				(vaddr_t)hibernate_state->hib_stream.next_in,
1417 				out_remaining);
1418 	}
1419 
1420 	return (rle);
1421 }
1422 
1423 /*
1424  * Write a compressed version of this machine's memory to disk, at the
1425  * precalculated swap offset:
1426  *
1427  * end of swap - signature block size - chunk table size - memory size
1428  *
1429  * The function begins by looping through each phys mem range, cutting each
1430  * one into MD sized chunks. These chunks are then compressed individually
1431  * and written out to disk, in phys mem order. Some chunks might compress
1432  * more than others, and for this reason, each chunk's size is recorded
1433  * in the chunk table, which is written to disk after the image has
1434  * properly been compressed and written (in hibernate_write_chunktable).
1435  *
1436  * When this function is called, the machine is nearly suspended - most
1437  * devices are quiesced/suspended, interrupts are off, and cold has
1438  * been set. This means that there can be no side effects once the
1439  * write has started, and the write function itself can also have no
1440  * side effects. This also means no printfs are permitted (since printf
1441  * has side effects.)
1442  *
1443  * Return values :
1444  *
1445  * 0      - success
1446  * EIO    - I/O error occurred writing the chunks
1447  * EINVAL - Failed to write a complete range
1448  * ENOMEM - Memory allocation failure during preparation of the zlib arena
1449  */
1450 int
hibernate_write_chunks(union hibernate_info * hib)1451 hibernate_write_chunks(union hibernate_info *hib)
1452 {
1453 	paddr_t range_base, range_end, inaddr, temp_inaddr;
1454 	size_t out_remaining, used;
1455 	struct hibernate_disk_chunk *chunks;
1456 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1457 	daddr_t blkctr = 0;
1458 	int i, rle, err;
1459 	struct hibernate_zlib_state *hibernate_state;
1460 
1461 	hibernate_state =
1462 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1463 
1464 	hib->chunk_ctr = 0;
1465 
1466 	/*
1467 	 * Map the utility VAs to the piglet. See the piglet map at the
1468 	 * top of this file for piglet layout information.
1469 	 */
1470 	hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE;
1471 	hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE;
1472 
1473 	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
1474 	    HIBERNATE_CHUNK_SIZE);
1475 
1476 	/* Calculate the chunk regions */
1477 	for (i = 0; i < hib->nranges; i++) {
1478 		range_base = hib->ranges[i].base;
1479 		range_end = hib->ranges[i].end;
1480 
1481 		inaddr = range_base;
1482 
1483 		while (inaddr < range_end) {
1484 			chunks[hib->chunk_ctr].base = inaddr;
1485 			if (inaddr + HIBERNATE_CHUNK_SIZE < range_end)
1486 				chunks[hib->chunk_ctr].end = inaddr +
1487 				    HIBERNATE_CHUNK_SIZE;
1488 			else
1489 				chunks[hib->chunk_ctr].end = range_end;
1490 
1491 			inaddr += HIBERNATE_CHUNK_SIZE;
1492 			hib->chunk_ctr ++;
1493 		}
1494 	}
1495 
1496 	uvm_pmr_dirty_everything();
1497 
1498 	/* Compress and write the chunks in the chunktable */
1499 	for (i = 0; i < hib->chunk_ctr; i++) {
1500 		range_base = chunks[i].base;
1501 		range_end = chunks[i].end;
1502 
1503 		chunks[i].offset = blkctr;
1504 
1505 		/* Reset zlib for deflate */
1506 		if (hibernate_zlib_reset(hib, 1) != Z_OK) {
1507 			DPRINTF("hibernate_zlib_reset failed for deflate\n");
1508 			return (ENOMEM);
1509 		}
1510 
1511 		inaddr = range_base;
1512 
1513 		/*
1514 		 * For each range, loop through its phys mem region
1515 		 * and write out the chunks (the last chunk might be
1516 		 * smaller than the chunk size).
1517 		 */
1518 		while (inaddr < range_end) {
1519 			out_remaining = PAGE_SIZE;
1520 			while (out_remaining > 0 && inaddr < range_end) {
1521 				/*
1522 				 * Adjust for regions that are not evenly
1523 				 * divisible by PAGE_SIZE or overflowed
1524 				 * pages from the previous iteration.
1525 				 */
1526 				temp_inaddr = (inaddr & PAGE_MASK) +
1527 				    hibernate_copy_page;
1528 
1529 				/* Deflate from temp_inaddr to IO page */
1530 				if (inaddr != range_end) {
1531 					rle = 0;
1532 					if (inaddr % PAGE_SIZE == 0) {
1533 						rle = hibernate_write_rle(hib,
1534 							inaddr,
1535 							range_end,
1536 							&blkctr,
1537 							&out_remaining);
1538 					}
1539 
1540 					switch (rle) {
1541 					case -1:
1542 						return EIO;
1543 					case 0:
1544 						pmap_kenter_pa(hibernate_temp_page,
1545 							inaddr & PMAP_PA_MASK,
1546 							PROT_READ);
1547 
1548 						bcopy((caddr_t)hibernate_temp_page,
1549 							(caddr_t)hibernate_copy_page,
1550 							PAGE_SIZE);
1551 						inaddr += hibernate_deflate(hib,
1552 							temp_inaddr,
1553 							&out_remaining);
1554 						break;
1555 					default:
1556 						inaddr += rle * PAGE_SIZE;
1557 						if (inaddr > range_end)
1558 							inaddr = range_end;
1559 						break;
1560 					}
1561 
1562 				}
1563 
1564 				if (out_remaining == 0) {
1565 					/* Filled up the page */
1566 					if ((err = hibernate_write(hib, blkctr,
1567 					    (vaddr_t)hibernate_io_page,
1568 					    PAGE_SIZE, IO_TYPE_IMG))) {
1569 						DPRINTF("hib write error %d\n",
1570 						    err);
1571 						return (err);
1572 					}
1573 					blkctr += btodb(PAGE_SIZE);
1574 				}
1575 			}
1576 		}
1577 
1578 		if (inaddr != range_end) {
1579 			DPRINTF("deflate range ended prematurely\n");
1580 			return (EINVAL);
1581 		}
1582 
1583 		/*
1584 		 * End of range. Round up to next secsize bytes
1585 		 * after finishing compress
1586 		 */
1587 		if (out_remaining == 0)
1588 			out_remaining = PAGE_SIZE;
1589 
1590 		/* Finish compress */
1591 		hibernate_state->hib_stream.next_in = (unsigned char *)inaddr;
1592 		hibernate_state->hib_stream.avail_in = 0;
1593 		hibernate_state->hib_stream.next_out =
1594 		    (unsigned char *)hibernate_io_page +
1595 			(PAGE_SIZE - out_remaining);
1596 
1597 		/* We have an extra output page available for finalize */
1598 		hibernate_state->hib_stream.avail_out =
1599 			out_remaining + PAGE_SIZE;
1600 
1601 		if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) !=
1602 		    Z_STREAM_END) {
1603 			DPRINTF("deflate error in output stream: %d\n", err);
1604 			return (err);
1605 		}
1606 
1607 		out_remaining = hibernate_state->hib_stream.avail_out;
1608 
1609 		/* Round up to next sector if needed */
1610 		used = roundup(2 * PAGE_SIZE - out_remaining, hib->sec_size);
1611 
1612 		/* Write final block(s) for this chunk */
1613 		if ((err = hibernate_write(hib, blkctr,
1614 		    (vaddr_t)hibernate_io_page, used, IO_TYPE_IMG))) {
1615 			DPRINTF("hib final write error %d\n", err);
1616 			return (err);
1617 		}
1618 
1619 		blkctr += btodb(used);
1620 
1621 		chunks[i].compressed_size = dbtob(blkctr - chunks[i].offset);
1622 	}
1623 
1624 	return (0);
1625 }
1626 
1627 /*
1628  * Reset the zlib stream state and allocate a new hiballoc area for either
1629  * inflate or deflate. This function is called once for each hibernate chunk.
1630  * Calling hiballoc_init multiple times is acceptable since the memory it is
1631  * provided is unmanaged memory (stolen). We use the memory provided to us
1632  * by the piglet allocated via the supplied hib.
1633  */
1634 int
hibernate_zlib_reset(union hibernate_info * hib,int deflate)1635 hibernate_zlib_reset(union hibernate_info *hib, int deflate)
1636 {
1637 	vaddr_t hibernate_zlib_start;
1638 	size_t hibernate_zlib_size;
1639 	char *pva = (char *)hib->piglet_va;
1640 	struct hibernate_zlib_state *hibernate_state;
1641 
1642 	hibernate_state =
1643 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1644 
1645 	if (!deflate)
1646 		pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1647 
1648 	/*
1649 	 * See piglet layout information at the start of this file for
1650 	 * information on the zlib page assignments.
1651 	 */
1652 	hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE));
1653 	hibernate_zlib_size = 80 * PAGE_SIZE;
1654 
1655 	memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size);
1656 	memset(hibernate_state, 0, PAGE_SIZE);
1657 
1658 	/* Set up stream structure */
1659 	hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc;
1660 	hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free;
1661 
1662 	/* Initialize the hiballoc arena for zlib allocs/frees */
1663 	if (hiballoc_init(&hibernate_state->hiballoc_arena,
1664 	    (caddr_t)hibernate_zlib_start, hibernate_zlib_size))
1665 		return 1;
1666 
1667 	if (deflate) {
1668 		return deflateInit(&hibernate_state->hib_stream,
1669 		    Z_BEST_SPEED);
1670 	} else
1671 		return inflateInit(&hibernate_state->hib_stream);
1672 }
1673 
1674 /*
1675  * Reads the hibernated memory image from disk, whose location and
1676  * size are recorded in hib. Begin by reading the persisted
1677  * chunk table, which records the original chunk placement location
1678  * and compressed size for each. Next, allocate a pig region of
1679  * sufficient size to hold the compressed image. Next, read the
1680  * chunks into the pig area (calling hibernate_read_chunks to do this),
1681  * and finally, if all of the above succeeds, clear the hibernate signature.
1682  * The function will then return to hibernate_resume, which will proceed
1683  * to unpack the pig image to the correct place in memory.
1684  */
1685 int
hibernate_read_image(union hibernate_info * hib)1686 hibernate_read_image(union hibernate_info *hib)
1687 {
1688 	size_t compressed_size, disk_size, chunktable_size, pig_sz;
1689 	paddr_t image_start, image_end, pig_start, pig_end;
1690 	struct hibernate_disk_chunk *chunks;
1691 	daddr_t blkctr;
1692 	vaddr_t chunktable = (vaddr_t)NULL;
1693 	paddr_t piglet_chunktable = hib->piglet_pa +
1694 	    HIBERNATE_CHUNK_SIZE;
1695 	int i, status;
1696 
1697 	status = 0;
1698 	pmap_activate(curproc);
1699 
1700 	/* Calculate total chunk table size in disk blocks */
1701 	chunktable_size = btodb(HIBERNATE_CHUNK_TABLE_SIZE);
1702 
1703 	blkctr = hib->chunktable_offset;
1704 
1705 	chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any,
1706 	    &kp_none, &kd_nowait);
1707 
1708 	if (!chunktable)
1709 		return (1);
1710 
1711 	/* Map chunktable pages */
1712 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE)
1713 		pmap_kenter_pa(chunktable + i, piglet_chunktable + i,
1714 		    PROT_READ | PROT_WRITE);
1715 	pmap_update(pmap_kernel());
1716 
1717 	/* Read the chunktable from disk into the piglet chunktable */
1718 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE;
1719 	    i += MAXPHYS, blkctr += btodb(MAXPHYS)) {
1720 		if (hibernate_block_io(hib, blkctr, MAXPHYS,
1721 		    chunktable + i, 0)) {
1722 			status = 1;
1723 			goto unmap;
1724 		}
1725 	}
1726 
1727 	blkctr = hib->image_offset;
1728 	compressed_size = 0;
1729 
1730 	chunks = (struct hibernate_disk_chunk *)chunktable;
1731 
1732 	for (i = 0; i < hib->chunk_ctr; i++)
1733 		compressed_size += chunks[i].compressed_size;
1734 
1735 	disk_size = compressed_size;
1736 
1737 	printf("unhibernating @ block %lld length %luMB\n",
1738 	    hib->image_offset, compressed_size / (1024 * 1024));
1739 
1740 	/* Allocate the pig area */
1741 	pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE;
1742 	if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM) {
1743 		status = 1;
1744 		goto unmap;
1745 	}
1746 
1747 	pig_end = pig_start + pig_sz;
1748 
1749 	/* Calculate image extents. Pig image must end on a chunk boundary. */
1750 	image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1);
1751 	image_start = image_end - disk_size;
1752 
1753 	if (hibernate_read_chunks(hib, image_start, image_end, disk_size,
1754 	    chunks)) {
1755 		status = 1;
1756 		goto unmap;
1757 	}
1758 
1759 	/* Prepare the resume time pmap/page table */
1760 	hibernate_populate_resume_pt(hib, image_start, image_end);
1761 
1762 unmap:
1763 	/* Unmap chunktable pages */
1764 	pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE);
1765 	pmap_update(pmap_kernel());
1766 
1767 	return (status);
1768 }
1769 
1770 /*
1771  * Read the hibernated memory chunks from disk (chunk information at this
1772  * point is stored in the piglet) into the pig area specified by
1773  * [pig_start .. pig_end]. Order the chunks so that the final chunk is the
1774  * only chunk with overlap possibilities.
1775  */
1776 int
hibernate_read_chunks(union hibernate_info * hib,paddr_t pig_start,paddr_t pig_end,size_t image_compr_size,struct hibernate_disk_chunk * chunks)1777 hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start,
1778     paddr_t pig_end, size_t image_compr_size,
1779     struct hibernate_disk_chunk *chunks)
1780 {
1781 	paddr_t img_cur, piglet_base;
1782 	daddr_t blkctr;
1783 	size_t processed, compressed_size, read_size;
1784 	int err, nchunks, nfchunks, num_io_pages;
1785 	vaddr_t tempva, hibernate_fchunk_area;
1786 	short *fchunks, i, j;
1787 
1788 	tempva = (vaddr_t)NULL;
1789 	hibernate_fchunk_area = (vaddr_t)NULL;
1790 	nfchunks = 0;
1791 	piglet_base = hib->piglet_pa;
1792 	global_pig_start = pig_start;
1793 
1794 	/*
1795 	 * These mappings go into the resuming kernel's page table, and are
1796 	 * used only during image read. They disappear from existence
1797 	 * when the suspended kernel is unpacked on top of us.
1798 	 */
1799 	tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none,
1800 		&kd_nowait);
1801 	if (!tempva)
1802 		return (1);
1803 	hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE, &kv_any,
1804 	    &kp_none, &kd_nowait);
1805 	if (!hibernate_fchunk_area)
1806 		return (1);
1807 
1808 	/* Final output chunk ordering VA */
1809 	fchunks = (short *)hibernate_fchunk_area;
1810 
1811 	/* Map the chunk ordering region */
1812 	for(i = 0; i < 24 ; i++)
1813 		pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE),
1814 			piglet_base + ((4 + i) * PAGE_SIZE),
1815 			PROT_READ | PROT_WRITE);
1816 	pmap_update(pmap_kernel());
1817 
1818 	nchunks = hib->chunk_ctr;
1819 
1820 	/* Initially start all chunks as unplaced */
1821 	for (i = 0; i < nchunks; i++)
1822 		chunks[i].flags = 0;
1823 
1824 	/*
1825 	 * Search the list for chunks that are outside the pig area. These
1826 	 * can be placed first in the final output list.
1827 	 */
1828 	for (i = 0; i < nchunks; i++) {
1829 		if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) {
1830 			fchunks[nfchunks] = i;
1831 			nfchunks++;
1832 			chunks[i].flags |= HIBERNATE_CHUNK_PLACED;
1833 		}
1834 	}
1835 
1836 	/*
1837 	 * Walk the ordering, place the chunks in ascending memory order.
1838 	 */
1839 	for (i = 0; i < nchunks; i++) {
1840 		if (chunks[i].flags != HIBERNATE_CHUNK_PLACED) {
1841 			fchunks[nfchunks] = i;
1842 			nfchunks++;
1843 			chunks[i].flags = HIBERNATE_CHUNK_PLACED;
1844 		}
1845 	}
1846 
1847 	img_cur = pig_start;
1848 
1849 	for (i = 0, err = 0; i < nfchunks && err == 0; i++) {
1850 		blkctr = chunks[fchunks[i]].offset + hib->image_offset;
1851 		processed = 0;
1852 		compressed_size = chunks[fchunks[i]].compressed_size;
1853 
1854 		while (processed < compressed_size && err == 0) {
1855 			if (compressed_size - processed >= MAXPHYS)
1856 				read_size = MAXPHYS;
1857 			else
1858 				read_size = compressed_size - processed;
1859 
1860 			/*
1861 			 * We're reading read_size bytes, offset from the
1862 			 * start of a page by img_cur % PAGE_SIZE, so the
1863 			 * end will be read_size + (img_cur % PAGE_SIZE)
1864 			 * from the start of the first page.  Round that
1865 			 * up to the next page size.
1866 			 */
1867 			num_io_pages = (read_size + (img_cur % PAGE_SIZE)
1868 				+ PAGE_SIZE - 1) / PAGE_SIZE;
1869 
1870 			KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1);
1871 
1872 			/* Map pages for this read */
1873 			for (j = 0; j < num_io_pages; j ++)
1874 				pmap_kenter_pa(tempva + j * PAGE_SIZE,
1875 				    img_cur + j * PAGE_SIZE,
1876 				    PROT_READ | PROT_WRITE);
1877 
1878 			pmap_update(pmap_kernel());
1879 
1880 			err = hibernate_block_io(hib, blkctr, read_size,
1881 			    tempva + (img_cur & PAGE_MASK), 0);
1882 
1883 			blkctr += btodb(read_size);
1884 
1885 			pmap_kremove(tempva, num_io_pages * PAGE_SIZE);
1886 			pmap_update(pmap_kernel());
1887 
1888 			processed += read_size;
1889 			img_cur += read_size;
1890 		}
1891 	}
1892 
1893 	pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE);
1894 	pmap_update(pmap_kernel());
1895 
1896 	return (i != nfchunks);
1897 }
1898 
1899 /*
1900  * Hibernating a machine comprises the following operations:
1901  *  1. Calculating this machine's hibernate_info information
1902  *  2. Allocating a piglet and saving the piglet's physaddr
1903  *  3. Calculating the memory chunks
1904  *  4. Writing the compressed chunks to disk
1905  *  5. Writing the chunk table
1906  *  6. Writing the signature block (hibernate_info)
1907  *
1908  * On most architectures, the function calling hibernate_suspend would
1909  * then power off the machine using some MD-specific implementation.
1910  */
1911 int
hibernate_suspend(void)1912 hibernate_suspend(void)
1913 {
1914 	uint8_t buf[DEV_BSIZE];
1915 	union hibernate_info *hib = (union hibernate_info *)&buf;
1916 	u_long start, end;
1917 
1918 	/*
1919 	 * Calculate memory ranges, swap offsets, etc.
1920 	 * This also allocates a piglet whose physaddr is stored in
1921 	 * hib->piglet_pa and vaddr stored in hib->piglet_va
1922 	 */
1923 	if (get_hibernate_info(hib, 1)) {
1924 		DPRINTF("failed to obtain hibernate info\n");
1925 		return (1);
1926 	}
1927 
1928 	/* Find a page-addressed region in swap [start,end] */
1929 	if (uvm_hibswap(hib->dev, &start, &end)) {
1930 		printf("hibernate: cannot find any swap\n");
1931 		return (1);
1932 	}
1933 
1934 	if (end - start + 1 < 1000) {
1935 		printf("hibernate: insufficient swap (%lu is too small)\n",
1936 			end - start + 1);
1937 		return (1);
1938 	}
1939 
1940 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start,
1941 	    &retguard_start_phys);
1942 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end,
1943 	    &retguard_end_phys);
1944 
1945 	/* Calculate block offsets in swap */
1946 	hib->image_offset = ctod(start);
1947 	hib->image_size = ctod(end - start + 1) -
1948 	    btodb(HIBERNATE_CHUNK_TABLE_SIZE);
1949 	hib->chunktable_offset = hib->image_offset + hib->image_size;
1950 
1951 	DPRINTF("hibernate @ block %lld chunks-length %lu blocks, "
1952 	    "chunktable-length %d blocks\n", hib->image_offset, hib->image_size,
1953 	    btodb(HIBERNATE_CHUNK_TABLE_SIZE));
1954 
1955 	pmap_activate(curproc);
1956 	DPRINTF("hibernate: writing chunks\n");
1957 	if (hibernate_write_chunks(hib)) {
1958 		DPRINTF("hibernate_write_chunks failed\n");
1959 		return (1);
1960 	}
1961 
1962 	DPRINTF("hibernate: writing chunktable\n");
1963 	if (hibernate_write_chunktable(hib)) {
1964 		DPRINTF("hibernate_write_chunktable failed\n");
1965 		return (1);
1966 	}
1967 
1968 	DPRINTF("hibernate: writing signature\n");
1969 	if (hibernate_write_signature(hib)) {
1970 		DPRINTF("hibernate_write_signature failed\n");
1971 		return (1);
1972 	}
1973 
1974 	/* Allow the disk to settle */
1975 	delay(500000);
1976 
1977 	/*
1978 	 * Give the device-specific I/O function a notification that we're
1979 	 * done, and that it can clean up or shutdown as needed.
1980 	 */
1981 	if (hib->io_func(hib->dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib->io_page))
1982 		printf("Warning: hibernate done failed\n");
1983 	return (0);
1984 }
1985 
1986 int
hibernate_alloc(void)1987 hibernate_alloc(void)
1988 {
1989 	KASSERT(global_piglet_va == 0);
1990 	KASSERT(hibernate_temp_page == 0);
1991 
1992 	pmap_activate(curproc);
1993 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1994 	    PROT_READ | PROT_WRITE);
1995 
1996 	/* Allocate a piglet, store its addresses in the supplied globals */
1997 	if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa,
1998 	    HIBERNATE_CHUNK_SIZE * 4, HIBERNATE_CHUNK_SIZE))
1999 		goto unmap;
2000 
2001 	/*
2002 	 * Allocate VA for the temp page.
2003 	 *
2004 	 * This will become part of the suspended kernel and will
2005 	 * be freed in hibernate_free, upon resume (or hibernate
2006 	 * failure)
2007 	 */
2008 	hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
2009 	    &kp_none, &kd_nowait);
2010 	if (!hibernate_temp_page) {
2011 		uvm_pmr_free_piglet(global_piglet_va, 4 * HIBERNATE_CHUNK_SIZE);
2012 		global_piglet_va = 0;
2013 		goto unmap;
2014 	}
2015 	return (0);
2016 unmap:
2017 	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
2018 	pmap_update(pmap_kernel());
2019 	return (ENOMEM);
2020 }
2021 
2022 /*
2023  * Free items allocated by hibernate_alloc()
2024  */
2025 void
hibernate_free(void)2026 hibernate_free(void)
2027 {
2028 	pmap_activate(curproc);
2029 
2030 	if (global_piglet_va)
2031 		uvm_pmr_free_piglet(global_piglet_va,
2032 		    4 * HIBERNATE_CHUNK_SIZE);
2033 
2034 	if (hibernate_temp_page) {
2035 		pmap_kremove(hibernate_temp_page, PAGE_SIZE);
2036 		km_free((void *)hibernate_temp_page, PAGE_SIZE,
2037 		    &kv_any, &kp_none);
2038 	}
2039 
2040 	global_piglet_va = 0;
2041 	hibernate_temp_page = 0;
2042 	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
2043 	pmap_update(pmap_kernel());
2044 }
2045