xref: /openbsd/sys/kern/subr_hibernate.c (revision c3ed0588)
1 /*	$OpenBSD: subr_hibernate.c,v 1.141 2024/06/05 11:04:17 krw Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/hibernate.h>
21 #include <sys/malloc.h>
22 #include <sys/param.h>
23 #include <sys/tree.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/disk.h>
27 #include <sys/conf.h>
28 #include <sys/buf.h>
29 #include <sys/fcntl.h>
30 #include <sys/stat.h>
31 #include <sys/atomic.h>
32 
33 #include <uvm/uvm.h>
34 #include <uvm/uvm_swap.h>
35 
36 #include <machine/hibernate.h>
37 
38 /* Make sure the signature can fit in one block */
39 CTASSERT((offsetof(union hibernate_info, sec_size) + sizeof(u_int32_t)) <= DEV_BSIZE);
40 
41 /*
42  * Hibernate piglet layout information
43  *
44  * The piglet is a scratch area of memory allocated by the suspending kernel.
45  * Its phys and virt addrs are recorded in the signature block. The piglet is
46  * used to guarantee an unused area of memory that can be used by the resuming
47  * kernel for various things. The piglet is excluded during unpack operations.
48  * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB).
49  *
50  * Offset from piglet_base	Purpose
51  * ----------------------------------------------------------------------------
52  * 0				Private page for suspend I/O write functions
53  * 1*PAGE_SIZE			I/O page used during hibernate suspend
54  * 2*PAGE_SIZE			I/O page used during hibernate suspend
55  * 3*PAGE_SIZE			copy page used during hibernate suspend
56  * 4*PAGE_SIZE			final chunk ordering list (24 pages)
57  * 28*PAGE_SIZE			RLE utility page
58  * 29*PAGE_SIZE			start of hiballoc area
59  * 30*PAGE_SIZE			preserved entropy
60  * 110*PAGE_SIZE		end of hiballoc area (80 pages)
61  * 366*PAGE_SIZE		end of retguard preservation region (256 pages)
62  * ...				unused
63  * HIBERNATE_CHUNK_SIZE		start of hibernate chunk table
64  * 2*HIBERNATE_CHUNK_SIZE	bounce area for chunks being unpacked
65  * 4*HIBERNATE_CHUNK_SIZE	end of piglet
66  */
67 
68 /* Temporary vaddr ranges used during hibernate */
69 vaddr_t hibernate_temp_page;
70 vaddr_t hibernate_copy_page;
71 vaddr_t hibernate_rle_page;
72 
73 /* Hibernate info as read from disk during resume */
74 union hibernate_info disk_hib;
75 struct bdevsw *bdsw;
76 
77 /*
78  * Global copy of the pig start address. This needs to be a global as we
79  * switch stacks after computing it - it can't be stored on the stack.
80  */
81 paddr_t global_pig_start;
82 
83 /*
84  * Global copies of the piglet start addresses (PA/VA). We store these
85  * as globals to avoid having to carry them around as parameters, as the
86  * piglet is allocated early and freed late - its lifecycle extends beyond
87  * that of the hibernate info union which is calculated on suspend/resume.
88  */
89 vaddr_t global_piglet_va;
90 paddr_t global_piglet_pa;
91 
92 /* #define HIB_DEBUG */
93 #ifdef HIB_DEBUG
94 int	hib_debug = 99;
95 #define DPRINTF(x...)     do { if (hib_debug) printf(x); } while (0)
96 #define DNPRINTF(n,x...)  do { if (hib_debug > (n)) printf(x); } while (0)
97 #else
98 #define DPRINTF(x...)
99 #define DNPRINTF(n,x...)
100 #endif
101 
102 #define	ROUNDUP(_x, _y)	((((_x)+(_y)-1)/(_y))*(_y))
103 
104 #ifndef NO_PROPOLICE
105 extern long __guard_local;
106 #endif /* ! NO_PROPOLICE */
107 
108 /* Retguard phys address (need to skip this region during unpack) */
109 paddr_t retguard_start_phys, retguard_end_phys;
110 extern char __retguard_start, __retguard_end;
111 
112 void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t);
113 int hibernate_calc_rle(paddr_t, paddr_t);
114 int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *,
115 	size_t *);
116 
117 #define MAX_RLE (HIBERNATE_CHUNK_SIZE / PAGE_SIZE)
118 
119 /*
120  * Hib alloc enforced alignment.
121  */
122 #define HIB_ALIGN		8 /* bytes alignment */
123 
124 /*
125  * sizeof builtin operation, but with alignment constraint.
126  */
127 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
128 
129 struct hiballoc_entry {
130 	size_t			hibe_use;
131 	size_t			hibe_space;
132 	RBT_ENTRY(hiballoc_entry) hibe_entry;
133 };
134 
135 /*
136  * Sort hibernate memory ranges by ascending PA
137  */
138 void
hibernate_sort_ranges(union hibernate_info * hib_info)139 hibernate_sort_ranges(union hibernate_info *hib_info)
140 {
141 	int i, j;
142 	struct hibernate_memory_range *ranges;
143 	paddr_t base, end;
144 
145 	ranges = hib_info->ranges;
146 
147 	for (i = 1; i < hib_info->nranges; i++) {
148 		j = i;
149 		while (j > 0 && ranges[j - 1].base > ranges[j].base) {
150 			base = ranges[j].base;
151 			end = ranges[j].end;
152 			ranges[j].base = ranges[j - 1].base;
153 			ranges[j].end = ranges[j - 1].end;
154 			ranges[j - 1].base = base;
155 			ranges[j - 1].end = end;
156 			j--;
157 		}
158 	}
159 }
160 
161 /*
162  * Compare hiballoc entries based on the address they manage.
163  *
164  * Since the address is fixed, relative to struct hiballoc_entry,
165  * we just compare the hiballoc_entry pointers.
166  */
167 static __inline int
hibe_cmp(const struct hiballoc_entry * l,const struct hiballoc_entry * r)168 hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r)
169 {
170 	vaddr_t vl = (vaddr_t)l;
171 	vaddr_t vr = (vaddr_t)r;
172 
173 	return vl < vr ? -1 : (vl > vr);
174 }
175 
RBT_PROTOTYPE(hiballoc_addr,hiballoc_entry,hibe_entry,hibe_cmp)176 RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
177 
178 /*
179  * Given a hiballoc entry, return the address it manages.
180  */
181 static __inline void *
182 hib_entry_to_addr(struct hiballoc_entry *entry)
183 {
184 	caddr_t addr;
185 
186 	addr = (caddr_t)entry;
187 	addr += HIB_SIZEOF(struct hiballoc_entry);
188 	return addr;
189 }
190 
191 /*
192  * Given an address, find the hiballoc that corresponds.
193  */
194 static __inline struct hiballoc_entry*
hib_addr_to_entry(void * addr_param)195 hib_addr_to_entry(void *addr_param)
196 {
197 	caddr_t addr;
198 
199 	addr = (caddr_t)addr_param;
200 	addr -= HIB_SIZEOF(struct hiballoc_entry);
201 	return (struct hiballoc_entry*)addr;
202 }
203 
204 RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp);
205 
206 /*
207  * Allocate memory from the arena.
208  *
209  * Returns NULL if no memory is available.
210  */
211 void *
hib_alloc(struct hiballoc_arena * arena,size_t alloc_sz)212 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
213 {
214 	struct hiballoc_entry *entry, *new_entry;
215 	size_t find_sz;
216 
217 	/*
218 	 * Enforce alignment of HIB_ALIGN bytes.
219 	 *
220 	 * Note that, because the entry is put in front of the allocation,
221 	 * 0-byte allocations are guaranteed a unique address.
222 	 */
223 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
224 
225 	/*
226 	 * Find an entry with hibe_space >= find_sz.
227 	 *
228 	 * If the root node is not large enough, we switch to tree traversal.
229 	 * Because all entries are made at the bottom of the free space,
230 	 * traversal from the end has a slightly better chance of yielding
231 	 * a sufficiently large space.
232 	 */
233 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
234 	entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs);
235 	if (entry != NULL && entry->hibe_space < find_sz) {
236 		RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
237 			if (entry->hibe_space >= find_sz)
238 				break;
239 		}
240 	}
241 
242 	/*
243 	 * Insufficient or too fragmented memory.
244 	 */
245 	if (entry == NULL)
246 		return NULL;
247 
248 	/*
249 	 * Create new entry in allocated space.
250 	 */
251 	new_entry = (struct hiballoc_entry*)(
252 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
253 	new_entry->hibe_space = entry->hibe_space - find_sz;
254 	new_entry->hibe_use = alloc_sz;
255 
256 	/*
257 	 * Insert entry.
258 	 */
259 	if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
260 		panic("hib_alloc: insert failure");
261 	entry->hibe_space = 0;
262 
263 	/* Return address managed by entry. */
264 	return hib_entry_to_addr(new_entry);
265 }
266 
267 void
hib_getentropy(char ** bufp,size_t * bufplen)268 hib_getentropy(char **bufp, size_t *bufplen)
269 {
270 	if (!bufp || !bufplen)
271 		return;
272 
273 	*bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE));
274 	*bufplen = PAGE_SIZE;
275 }
276 
277 /*
278  * Free a pointer previously allocated from this arena.
279  *
280  * If addr is NULL, this will be silently accepted.
281  */
282 void
hib_free(struct hiballoc_arena * arena,void * addr)283 hib_free(struct hiballoc_arena *arena, void *addr)
284 {
285 	struct hiballoc_entry *entry, *prev;
286 
287 	if (addr == NULL)
288 		return;
289 
290 	/*
291 	 * Derive entry from addr and check it is really in this arena.
292 	 */
293 	entry = hib_addr_to_entry(addr);
294 	if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
295 		panic("hib_free: freed item %p not in hib arena", addr);
296 
297 	/*
298 	 * Give the space in entry to its predecessor.
299 	 *
300 	 * If entry has no predecessor, change its used space into free space
301 	 * instead.
302 	 */
303 	prev = RBT_PREV(hiballoc_addr, entry);
304 	if (prev != NULL &&
305 	    (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
306 	    prev->hibe_use + prev->hibe_space) == entry) {
307 		/* Merge entry. */
308 		RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
309 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
310 		    entry->hibe_use + entry->hibe_space;
311 	} else {
312 		/* Flip used memory to free space. */
313 		entry->hibe_space += entry->hibe_use;
314 		entry->hibe_use = 0;
315 	}
316 }
317 
318 /*
319  * Initialize hiballoc.
320  *
321  * The allocator will manage memory at ptr, which is len bytes.
322  */
323 int
hiballoc_init(struct hiballoc_arena * arena,void * p_ptr,size_t p_len)324 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
325 {
326 	struct hiballoc_entry *entry;
327 	caddr_t ptr;
328 	size_t len;
329 
330 	RBT_INIT(hiballoc_addr, &arena->hib_addrs);
331 
332 	/*
333 	 * Hib allocator enforces HIB_ALIGN alignment.
334 	 * Fixup ptr and len.
335 	 */
336 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
337 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
338 	len &= ~((size_t)HIB_ALIGN - 1);
339 
340 	/*
341 	 * Insufficient memory to be able to allocate and also do bookkeeping.
342 	 */
343 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
344 		return ENOMEM;
345 
346 	/*
347 	 * Create entry describing space.
348 	 */
349 	entry = (struct hiballoc_entry*)ptr;
350 	entry->hibe_use = 0;
351 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
352 	RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
353 
354 	return 0;
355 }
356 
357 /*
358  * Zero all free memory.
359  */
360 void
uvm_pmr_zero_everything(void)361 uvm_pmr_zero_everything(void)
362 {
363 	struct uvm_pmemrange	*pmr;
364 	struct vm_page		*pg;
365 	int			 i;
366 
367 	uvm_lock_fpageq();
368 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
369 		/* Zero single pages. */
370 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
371 		    != NULL) {
372 			uvm_pmr_remove(pmr, pg);
373 			uvm_pagezero(pg);
374 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
375 			uvmexp.zeropages++;
376 			uvm_pmr_insert(pmr, pg, 0);
377 		}
378 
379 		/* Zero multi page ranges. */
380 		while ((pg = RBT_ROOT(uvm_pmr_size,
381 		    &pmr->size[UVM_PMR_MEMTYPE_DIRTY])) != NULL) {
382 			pg--; /* Size tree always has second page. */
383 			uvm_pmr_remove(pmr, pg);
384 			for (i = 0; i < pg->fpgsz; i++) {
385 				uvm_pagezero(&pg[i]);
386 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
387 				uvmexp.zeropages++;
388 			}
389 			uvm_pmr_insert(pmr, pg, 0);
390 		}
391 	}
392 	uvm_unlock_fpageq();
393 }
394 
395 /*
396  * Mark all memory as dirty.
397  *
398  * Used to inform the system that the clean memory isn't clean for some
399  * reason, for example because we just came back from hibernate.
400  */
401 void
uvm_pmr_dirty_everything(void)402 uvm_pmr_dirty_everything(void)
403 {
404 	struct uvm_pmemrange	*pmr;
405 	struct vm_page		*pg;
406 	int			 i;
407 
408 	uvm_lock_fpageq();
409 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
410 		/* Dirty single pages. */
411 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
412 		    != NULL) {
413 			uvm_pmr_remove(pmr, pg);
414 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
415 			uvm_pmr_insert(pmr, pg, 0);
416 		}
417 
418 		/* Dirty multi page ranges. */
419 		while ((pg = RBT_ROOT(uvm_pmr_size,
420 		    &pmr->size[UVM_PMR_MEMTYPE_ZERO])) != NULL) {
421 			pg--; /* Size tree always has second page. */
422 			uvm_pmr_remove(pmr, pg);
423 			for (i = 0; i < pg->fpgsz; i++)
424 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
425 			uvm_pmr_insert(pmr, pg, 0);
426 		}
427 	}
428 
429 	uvmexp.zeropages = 0;
430 	uvm_unlock_fpageq();
431 }
432 
433 /*
434  * Allocate an area that can hold sz bytes and doesn't overlap with
435  * the piglet at piglet_pa.
436  */
437 int
uvm_pmr_alloc_pig(paddr_t * pa,psize_t sz,paddr_t piglet_pa)438 uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa)
439 {
440 	struct uvm_constraint_range pig_constraint;
441 	struct kmem_pa_mode kp_pig = {
442 		.kp_constraint = &pig_constraint,
443 		.kp_maxseg = 1
444 	};
445 	vaddr_t va;
446 
447 	sz = round_page(sz);
448 
449 	pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE;
450 	pig_constraint.ucr_high = -1;
451 
452 	va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
453 	if (va == 0) {
454 		pig_constraint.ucr_low = 0;
455 		pig_constraint.ucr_high = piglet_pa - 1;
456 
457 		va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
458 		if (va == 0)
459 			return ENOMEM;
460 	}
461 
462 	pmap_extract(pmap_kernel(), va, pa);
463 	return 0;
464 }
465 
466 /*
467  * Allocate a piglet area.
468  *
469  * This needs to be in DMA-safe memory.
470  * Piglets are aligned.
471  *
472  * sz and align in bytes.
473  *
474  * The call will sleep for the pagedaemon to attempt to free memory.
475  * The pagedaemon may decide its not possible to free enough memory, causing
476  * the allocation to fail.
477  */
478 int
uvm_pmr_alloc_piglet(vaddr_t * va,paddr_t * pa,vsize_t sz,paddr_t align)479 uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align)
480 {
481 	struct kmem_pa_mode kp_piglet = {
482 		.kp_constraint = &dma_constraint,
483 		.kp_align = align,
484 		.kp_maxseg = 1
485 	};
486 
487 	/* Ensure align is a power of 2 */
488 	KASSERT((align & (align - 1)) == 0);
489 
490 	/*
491 	 * Fixup arguments: align must be at least PAGE_SIZE,
492 	 * sz will be converted to pagecount, since that is what
493 	 * pmemrange uses internally.
494 	 */
495 	if (align < PAGE_SIZE)
496 		kp_piglet.kp_align = PAGE_SIZE;
497 
498 	sz = round_page(sz);
499 
500 	*va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait);
501 	if (*va == 0)
502 		return ENOMEM;
503 
504 	pmap_extract(pmap_kernel(), *va, pa);
505 	return 0;
506 }
507 
508 /*
509  * Free a piglet area.
510  */
511 void
uvm_pmr_free_piglet(vaddr_t va,vsize_t sz)512 uvm_pmr_free_piglet(vaddr_t va, vsize_t sz)
513 {
514 	/*
515 	 * Fix parameters.
516 	 */
517 	sz = round_page(sz);
518 
519 	/*
520 	 * Free the physical and virtual memory.
521 	 */
522 	km_free((void *)va, sz, &kv_any, &kp_dma_contig);
523 }
524 
525 /*
526  * Physmem RLE compression support.
527  *
528  * Given a physical page address, return the number of pages starting at the
529  * address that are free.  Clamps to the number of pages in
530  * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free.
531  */
532 int
uvm_page_rle(paddr_t addr)533 uvm_page_rle(paddr_t addr)
534 {
535 	struct vm_page		*pg, *pg_end;
536 	struct vm_physseg	*vmp;
537 	int			 pseg_idx, off_idx;
538 
539 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
540 	if (pseg_idx == -1)
541 		return 0;
542 
543 	vmp = &vm_physmem[pseg_idx];
544 	pg = &vmp->pgs[off_idx];
545 	if (!(pg->pg_flags & PQ_FREE))
546 		return 0;
547 
548 	/*
549 	 * Search for the first non-free page after pg.
550 	 * Note that the page may not be the first page in a free pmemrange,
551 	 * therefore pg->fpgsz cannot be used.
552 	 */
553 	for (pg_end = pg; pg_end <= vmp->lastpg &&
554 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE &&
555 	    (pg_end - pg) < HIBERNATE_CHUNK_SIZE/PAGE_SIZE; pg_end++)
556 		;
557 	return pg_end - pg;
558 }
559 
560 /*
561  * Fills out the hibernate_info union pointed to by hib
562  * with information about this machine (swap signature block
563  * offsets, number of memory ranges, kernel in use, etc)
564  */
565 int
get_hibernate_info(union hibernate_info * hib,int suspend)566 get_hibernate_info(union hibernate_info *hib, int suspend)
567 {
568 	struct disklabel dl;
569 	char err_string[128], *dl_ret;
570 	int part;
571 	SHA2_CTX ctx;
572 	void *fn;
573 
574 #ifndef NO_PROPOLICE
575 	/* Save propolice guard */
576 	hib->guard = __guard_local;
577 #endif /* ! NO_PROPOLICE */
578 
579 	/* Determine I/O function to use */
580 	hib->io_func = get_hibernate_io_function(swdevt[0].sw_dev);
581 	if (hib->io_func == NULL)
582 		return (1);
583 
584 	/* Calculate hibernate device */
585 	hib->dev = swdevt[0].sw_dev;
586 
587 	/* Read disklabel (used to calculate signature and image offsets) */
588 	dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string));
589 
590 	if (dl_ret) {
591 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
592 		return (1);
593 	}
594 
595 	/* Make sure we have a swap partition. */
596 	part = DISKPART(hib->dev);
597 	if (dl.d_npartitions <= part ||
598 	    dl.d_secsize > sizeof(union hibernate_info) ||
599 	    dl.d_partitions[part].p_fstype != FS_SWAP ||
600 	    DL_GETPSIZE(&dl.d_partitions[part]) == 0)
601 		return (1);
602 
603 	/* Magic number */
604 	hib->magic = HIBERNATE_MAGIC;
605 
606 	/* Calculate signature block location */
607 	hib->sec_size = dl.d_secsize;
608 	hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part]) - 1;
609 	hib->sig_offset = DL_SECTOBLK(&dl, hib->sig_offset);
610 
611 	SHA256Init(&ctx);
612 	SHA256Update(&ctx, version, strlen(version));
613 	fn = printf;
614 	SHA256Update(&ctx, &fn, sizeof(fn));
615 	fn = malloc;
616 	SHA256Update(&ctx, &fn, sizeof(fn));
617 	fn = km_alloc;
618 	SHA256Update(&ctx, &fn, sizeof(fn));
619 	fn = strlen;
620 	SHA256Update(&ctx, &fn, sizeof(fn));
621 	SHA256Final((u_int8_t *)&hib->kern_hash, &ctx);
622 
623 	if (suspend) {
624 		/* Grab the previously-allocated piglet addresses */
625 		hib->piglet_va = global_piglet_va;
626 		hib->piglet_pa = global_piglet_pa;
627 		hib->io_page = (void *)hib->piglet_va;
628 
629 		/*
630 		 * Initialization of the hibernate IO function for drivers
631 		 * that need to do prep work (such as allocating memory or
632 		 * setting up data structures that cannot safely be done
633 		 * during suspend without causing side effects). There is
634 		 * a matching HIB_DONE call performed after the write is
635 		 * completed.
636 		 */
637 		if (hib->io_func(hib->dev,
638 		    DL_SECTOBLK(&dl, DL_GETPOFFSET(&dl.d_partitions[part])),
639 		    (vaddr_t)NULL,
640 		    DL_SECTOBLK(&dl, DL_GETPSIZE(&dl.d_partitions[part])),
641 		    HIB_INIT, hib->io_page))
642 			goto fail;
643 
644 	} else {
645 		/*
646 		 * Resuming kernels use a regular private page for the driver
647 		 * No need to free this I/O page as it will vanish as part of
648 		 * the resume.
649 		 */
650 		hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
651 		if (!hib->io_page)
652 			goto fail;
653 	}
654 
655 	if (get_hibernate_info_md(hib))
656 		goto fail;
657 
658 	return (0);
659 
660 fail:
661 	return (1);
662 }
663 
664 /*
665  * Allocate nitems*size bytes from the hiballoc area presently in use
666  */
667 void *
hibernate_zlib_alloc(void * unused,int nitems,int size)668 hibernate_zlib_alloc(void *unused, int nitems, int size)
669 {
670 	struct hibernate_zlib_state *hibernate_state;
671 
672 	hibernate_state =
673 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
674 
675 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
676 }
677 
678 /*
679  * Free the memory pointed to by addr in the hiballoc area presently in
680  * use
681  */
682 void
hibernate_zlib_free(void * unused,void * addr)683 hibernate_zlib_free(void *unused, void *addr)
684 {
685 	struct hibernate_zlib_state *hibernate_state;
686 
687 	hibernate_state =
688 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
689 
690 	hib_free(&hibernate_state->hiballoc_arena, addr);
691 }
692 
693 /*
694  * Inflate next page of data from the image stream.
695  * The rle parameter is modified on exit to contain the number of pages to
696  * skip in the output stream (or 0 if this page was inflated into).
697  *
698  * Returns 0 if the stream contains additional data, or 1 if the stream is
699  * finished.
700  */
701 int
hibernate_inflate_page(int * rle)702 hibernate_inflate_page(int *rle)
703 {
704 	struct hibernate_zlib_state *hibernate_state;
705 	int i;
706 
707 	hibernate_state =
708 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
709 
710 	/* Set up the stream for RLE code inflate */
711 	hibernate_state->hib_stream.next_out = (unsigned char *)rle;
712 	hibernate_state->hib_stream.avail_out = sizeof(*rle);
713 
714 	/* Inflate RLE code */
715 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
716 	if (i != Z_OK && i != Z_STREAM_END) {
717 		/*
718 		 * XXX - this will likely reboot/hang most machines
719 		 *       since the console output buffer will be unmapped,
720 		 *       but there's not much else we can do here.
721 		 */
722 		panic("rle inflate stream error");
723 	}
724 
725 	if (hibernate_state->hib_stream.avail_out != 0) {
726 		/*
727 		 * XXX - this will likely reboot/hang most machines
728 		 *       since the console output buffer will be unmapped,
729 		 *       but there's not much else we can do here.
730 		 */
731 		panic("rle short inflate error");
732 	}
733 
734 	if (*rle < 0 || *rle > 1024) {
735 		/*
736 		 * XXX - this will likely reboot/hang most machines
737 		 *       since the console output buffer will be unmapped,
738 		 *       but there's not much else we can do here.
739 		 */
740 		panic("invalid rle count");
741 	}
742 
743 	if (i == Z_STREAM_END)
744 		return (1);
745 
746 	if (*rle != 0)
747 		return (0);
748 
749 	/* Set up the stream for page inflate */
750 	hibernate_state->hib_stream.next_out =
751 		(unsigned char *)HIBERNATE_INFLATE_PAGE;
752 	hibernate_state->hib_stream.avail_out = PAGE_SIZE;
753 
754 	/* Process next block of data */
755 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
756 	if (i != Z_OK && i != Z_STREAM_END) {
757 		/*
758 		 * XXX - this will likely reboot/hang most machines
759 		 *       since the console output buffer will be unmapped,
760 		 *       but there's not much else we can do here.
761 		 */
762 		panic("inflate error");
763 	}
764 
765 	/* We should always have extracted a full page ... */
766 	if (hibernate_state->hib_stream.avail_out != 0) {
767 		/*
768 		 * XXX - this will likely reboot/hang most machines
769 		 *       since the console output buffer will be unmapped,
770 		 *       but there's not much else we can do here.
771 		 */
772 		panic("incomplete page");
773 	}
774 
775 	return (i == Z_STREAM_END);
776 }
777 
778 /*
779  * Inflate size bytes from src into dest, skipping any pages in
780  * [src..dest] that are special (see hibernate_inflate_skip)
781  *
782  * This function executes while using the resume-time stack
783  * and pmap, and therefore cannot use ddb/printf/etc. Doing so
784  * will likely hang or reset the machine since the console output buffer
785  * will be unmapped.
786  */
787 void
hibernate_inflate_region(union hibernate_info * hib,paddr_t dest,paddr_t src,size_t size)788 hibernate_inflate_region(union hibernate_info *hib, paddr_t dest,
789     paddr_t src, size_t size)
790 {
791 	int end_stream = 0, rle, skip;
792 	struct hibernate_zlib_state *hibernate_state;
793 
794 	hibernate_state =
795 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
796 
797 	hibernate_state->hib_stream.next_in = (unsigned char *)src;
798 	hibernate_state->hib_stream.avail_in = size;
799 
800 	do {
801 		/*
802 		 * Is this a special page? If yes, redirect the
803 		 * inflate output to a scratch page (eg, discard it)
804 		 */
805 		skip = hibernate_inflate_skip(hib, dest);
806 		if (skip == HIB_SKIP) {
807 			hibernate_enter_resume_mapping(
808 			    HIBERNATE_INFLATE_PAGE,
809 			    HIBERNATE_INFLATE_PAGE, 0);
810 		} else if (skip == HIB_MOVE) {
811 			/*
812 			 * Special case : retguard region. This gets moved
813 			 * temporarily into the piglet region and copied into
814 			 * place immediately before resume
815 			 */
816 			hibernate_enter_resume_mapping(
817 			    HIBERNATE_INFLATE_PAGE,
818 			    hib->piglet_pa + (110 * PAGE_SIZE) +
819 			    hib->retguard_ofs, 0);
820 			hib->retguard_ofs += PAGE_SIZE;
821 			if (hib->retguard_ofs > 255 * PAGE_SIZE) {
822 				/*
823 				 * XXX - this will likely reboot/hang most
824 				 *       machines since the console output
825 				 *       buffer will be unmapped, but there's
826 				 *       not much else we can do here.
827 				 */
828 				panic("retguard move error, out of space");
829 			}
830 		} else {
831 			hibernate_enter_resume_mapping(
832 			    HIBERNATE_INFLATE_PAGE, dest, 0);
833 		}
834 
835 		hibernate_flush();
836 		end_stream = hibernate_inflate_page(&rle);
837 
838 		if (rle == 0)
839 			dest += PAGE_SIZE;
840 		else
841 			dest += (rle * PAGE_SIZE);
842 	} while (!end_stream);
843 }
844 
845 /*
846  * deflate from src into the I/O page, up to 'remaining' bytes
847  *
848  * Returns number of input bytes consumed, and may reset
849  * the 'remaining' parameter if not all the output space was consumed
850  * (this information is needed to know how much to write to disk)
851  */
852 size_t
hibernate_deflate(union hibernate_info * hib,paddr_t src,size_t * remaining)853 hibernate_deflate(union hibernate_info *hib, paddr_t src,
854     size_t *remaining)
855 {
856 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
857 	struct hibernate_zlib_state *hibernate_state;
858 
859 	hibernate_state =
860 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
861 
862 	/* Set up the stream for deflate */
863 	hibernate_state->hib_stream.next_in = (unsigned char *)src;
864 	hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK);
865 	hibernate_state->hib_stream.next_out =
866 		(unsigned char *)hibernate_io_page + (PAGE_SIZE - *remaining);
867 	hibernate_state->hib_stream.avail_out = *remaining;
868 
869 	/* Process next block of data */
870 	if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK)
871 		panic("hibernate zlib deflate error");
872 
873 	/* Update pointers and return number of bytes consumed */
874 	*remaining = hibernate_state->hib_stream.avail_out;
875 	return (PAGE_SIZE - (src & PAGE_MASK)) -
876 	    hibernate_state->hib_stream.avail_in;
877 }
878 
879 /*
880  * Write the hibernation information specified in hiber_info
881  * to the location in swap previously calculated (last block of
882  * swap), called the "signature block".
883  */
884 int
hibernate_write_signature(union hibernate_info * hib)885 hibernate_write_signature(union hibernate_info *hib)
886 {
887 	memset(&disk_hib, 0, hib->sec_size);
888 	memcpy(&disk_hib, hib, DEV_BSIZE);
889 
890 	/* Write hibernate info to disk */
891 	return (hib->io_func(hib->dev, hib->sig_offset,
892 	    (vaddr_t)&disk_hib, hib->sec_size, HIB_W,
893 	    hib->io_page));
894 }
895 
896 /*
897  * Write the memory chunk table to the area in swap immediately
898  * preceding the signature block. The chunk table is stored
899  * in the piglet when this function is called.  Returns errno.
900  */
901 int
hibernate_write_chunktable(union hibernate_info * hib)902 hibernate_write_chunktable(union hibernate_info *hib)
903 {
904 	vaddr_t hibernate_chunk_table_start;
905 	size_t hibernate_chunk_table_size;
906 	int i, err;
907 
908 	hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE;
909 
910 	hibernate_chunk_table_start = hib->piglet_va +
911 	    HIBERNATE_CHUNK_SIZE;
912 
913 	/* Write chunk table */
914 	for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) {
915 		if ((err = hib->io_func(hib->dev,
916 		    hib->chunktable_offset + (i/DEV_BSIZE),
917 		    (vaddr_t)(hibernate_chunk_table_start + i),
918 		    MAXPHYS, HIB_W, hib->io_page))) {
919 			DPRINTF("chunktable write error: %d\n", err);
920 			return (err);
921 		}
922 	}
923 
924 	return (0);
925 }
926 
927 /*
928  * Write an empty hiber_info to the swap signature block, which is
929  * guaranteed to not match any valid hib.
930  */
931 int
hibernate_clear_signature(union hibernate_info * hib)932 hibernate_clear_signature(union hibernate_info *hib)
933 {
934 	uint8_t buf[DEV_BSIZE];
935 
936 	/* Zero out a blank hiber_info */
937 	memcpy(&buf, &disk_hib, sizeof(buf));
938 	memset(&disk_hib, 0, hib->sec_size);
939 
940 	/* Write (zeroed) hibernate info to disk */
941 	DPRINTF("clearing hibernate signature block location: %lld\n",
942 		hib->sig_offset);
943 	if (hibernate_block_io(hib,
944 	    hib->sig_offset,
945 	    hib->sec_size, (vaddr_t)&disk_hib, 1))
946 		printf("Warning: could not clear hibernate signature\n");
947 
948 	memcpy(&disk_hib, buf, sizeof(buf));
949 	return (0);
950 }
951 
952 /*
953  * Compare two hibernate_infos to determine if they are the same (eg,
954  * we should be performing a hibernate resume on this machine.
955  * Not all fields are checked - just enough to verify that the machine
956  * has the same memory configuration and kernel as the one that
957  * wrote the signature previously.
958  */
959 int
hibernate_compare_signature(union hibernate_info * mine,union hibernate_info * disk)960 hibernate_compare_signature(union hibernate_info *mine,
961     union hibernate_info *disk)
962 {
963 	u_int i;
964 
965 	if (mine->nranges != disk->nranges) {
966 		printf("unhibernate failed: memory layout changed\n");
967 		return (1);
968 	}
969 
970 	if (bcmp(mine->kern_hash, disk->kern_hash, SHA256_DIGEST_LENGTH) != 0) {
971 		printf("unhibernate failed: original kernel changed\n");
972 		return (1);
973 	}
974 
975 	for (i = 0; i < mine->nranges; i++) {
976 		if ((mine->ranges[i].base != disk->ranges[i].base) ||
977 		    (mine->ranges[i].end != disk->ranges[i].end) ) {
978 			DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n",
979 				i,
980 				(void *)mine->ranges[i].base,
981 				(void *)mine->ranges[i].end,
982 				(void *)disk->ranges[i].base,
983 				(void *)disk->ranges[i].end);
984 			printf("unhibernate failed: memory size changed\n");
985 			return (1);
986 		}
987 	}
988 
989 	return (0);
990 }
991 
992 /*
993  * Transfers xfer_size bytes between the hibernate device specified in
994  * hib_info at offset blkctr and the vaddr specified at dest.
995  *
996  * Separate offsets and pages are used to handle misaligned reads (reads
997  * that span a page boundary).
998  *
999  * blkctr specifies a relative offset (relative to the start of swap),
1000  * not an absolute disk offset
1001  *
1002  */
1003 int
hibernate_block_io(union hibernate_info * hib,daddr_t blkctr,size_t xfer_size,vaddr_t dest,int iswrite)1004 hibernate_block_io(union hibernate_info *hib, daddr_t blkctr,
1005     size_t xfer_size, vaddr_t dest, int iswrite)
1006 {
1007 	struct buf *bp;
1008 	int error;
1009 
1010 	bp = geteblk(xfer_size);
1011 	if (iswrite)
1012 		bcopy((caddr_t)dest, bp->b_data, xfer_size);
1013 
1014 	bp->b_bcount = xfer_size;
1015 	bp->b_blkno = blkctr;
1016 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
1017 	SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW);
1018 	bp->b_dev = hib->dev;
1019 	(*bdsw->d_strategy)(bp);
1020 
1021 	error = biowait(bp);
1022 	if (error) {
1023 		printf("hib block_io biowait error %d blk %lld size %zu\n",
1024 			error, (long long)blkctr, xfer_size);
1025 	} else if (!iswrite)
1026 		bcopy(bp->b_data, (caddr_t)dest, xfer_size);
1027 
1028 	bp->b_flags |= B_INVAL;
1029 	brelse(bp);
1030 
1031 	return (error != 0);
1032 }
1033 
1034 /*
1035  * Preserve one page worth of random data, generated from the resuming
1036  * kernel's arc4random. After resume, this preserved entropy can be used
1037  * to further improve the un-hibernated machine's entropy pool. This
1038  * random data is stored in the piglet, which is preserved across the
1039  * unpack operation, and is restored later in the resume process (see
1040  * hib_getentropy)
1041  */
1042 void
hibernate_preserve_entropy(union hibernate_info * hib)1043 hibernate_preserve_entropy(union hibernate_info *hib)
1044 {
1045 	void *entropy;
1046 
1047 	entropy = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
1048 
1049 	if (!entropy)
1050 		return;
1051 
1052 	pmap_activate(curproc);
1053 	pmap_kenter_pa((vaddr_t)entropy,
1054 	    (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE)),
1055 	    PROT_READ | PROT_WRITE);
1056 
1057 	arc4random_buf((void *)entropy, PAGE_SIZE);
1058 	pmap_kremove((vaddr_t)entropy, PAGE_SIZE);
1059 	km_free(entropy, PAGE_SIZE, &kv_any, &kp_none);
1060 }
1061 
1062 #ifndef NO_PROPOLICE
1063 vaddr_t
hibernate_unprotect_ssp(void)1064 hibernate_unprotect_ssp(void)
1065 {
1066 	struct kmem_dyn_mode kd_avoidalias;
1067 	vaddr_t va = trunc_page((vaddr_t)&__guard_local);
1068 	paddr_t pa;
1069 
1070 	pmap_extract(pmap_kernel(), va, &pa);
1071 
1072 	memset(&kd_avoidalias, 0, sizeof kd_avoidalias);
1073 	kd_avoidalias.kd_prefer = pa;
1074 	kd_avoidalias.kd_waitok = 1;
1075 	va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_avoidalias);
1076 	if (!va)
1077 		panic("hibernate_unprotect_ssp");
1078 
1079 	pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1080 	pmap_update(pmap_kernel());
1081 
1082 	return va;
1083 }
1084 
1085 void
hibernate_reprotect_ssp(vaddr_t va)1086 hibernate_reprotect_ssp(vaddr_t va)
1087 {
1088 	pmap_kremove(va, PAGE_SIZE);
1089 	km_free((void *)va, PAGE_SIZE, &kv_any, &kp_none);
1090 }
1091 #endif /* NO_PROPOLICE */
1092 
1093 /*
1094  * Reads the signature block from swap, checks against the current machine's
1095  * information. If the information matches, perform a resume by reading the
1096  * saved image into the pig area, and unpacking.
1097  *
1098  * Must be called with interrupts enabled.
1099  */
1100 void
hibernate_resume(void)1101 hibernate_resume(void)
1102 {
1103 	uint8_t buf[DEV_BSIZE];
1104 	union hibernate_info *hib = (union hibernate_info *)&buf;
1105 	int s;
1106 #ifndef NO_PROPOLICE
1107 	vsize_t off = (vaddr_t)&__guard_local -
1108 	    trunc_page((vaddr_t)&__guard_local);
1109 	vaddr_t guard_va;
1110 #endif
1111 
1112 	/* Get current running machine's hibernate info */
1113 	memset(buf, 0, sizeof(buf));
1114 	if (get_hibernate_info(hib, 0)) {
1115 		DPRINTF("couldn't retrieve machine's hibernate info\n");
1116 		return;
1117 	}
1118 
1119 	/* Read hibernate info from disk */
1120 	s = splbio();
1121 
1122 	bdsw = &bdevsw[major(hib->dev)];
1123 	if ((*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc)) {
1124 		printf("hibernate_resume device open failed\n");
1125 		splx(s);
1126 		return;
1127 	}
1128 
1129 	DPRINTF("reading hibernate signature block location: %lld\n",
1130 		hib->sig_offset);
1131 
1132 	if (hibernate_block_io(hib,
1133 	    hib->sig_offset,
1134 	    hib->sec_size, (vaddr_t)&disk_hib, 0)) {
1135 		DPRINTF("error in hibernate read\n");
1136 		goto fail;
1137 	}
1138 
1139 	/* Check magic number */
1140 	if (disk_hib.magic != HIBERNATE_MAGIC) {
1141 		DPRINTF("wrong magic number in hibernate signature: %x\n",
1142 			disk_hib.magic);
1143 		goto fail;
1144 	}
1145 
1146 	/*
1147 	 * We (possibly) found a hibernate signature. Clear signature first,
1148 	 * to prevent accidental resume or endless resume cycles later.
1149 	 */
1150 	if (hibernate_clear_signature(hib)) {
1151 		DPRINTF("error clearing hibernate signature block\n");
1152 		goto fail;
1153 	}
1154 
1155 	/*
1156 	 * If on-disk and in-memory hibernate signatures match,
1157 	 * this means we should do a resume from hibernate.
1158 	 */
1159 	if (hibernate_compare_signature(hib, &disk_hib)) {
1160 		DPRINTF("mismatched hibernate signature block\n");
1161 		goto fail;
1162 	}
1163 	disk_hib.dev = hib->dev;
1164 
1165 #ifdef MULTIPROCESSOR
1166 	/* XXX - if we fail later, we may need to rehatch APs on some archs */
1167 	DPRINTF("hibernate: quiescing APs\n");
1168 	hibernate_quiesce_cpus();
1169 #endif /* MULTIPROCESSOR */
1170 
1171 	/* Read the image from disk into the image (pig) area */
1172 	if (hibernate_read_image(&disk_hib))
1173 		goto fail;
1174 	if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc))
1175 		printf("hibernate_resume device close failed\n");
1176 	bdsw = NULL;
1177 
1178 	DPRINTF("hibernate: quiescing devices\n");
1179 	if (config_suspend_all(DVACT_QUIESCE) != 0)
1180 		goto fail;
1181 
1182 #ifndef NO_PROPOLICE
1183 	guard_va = hibernate_unprotect_ssp();
1184 #endif /* NO_PROPOLICE */
1185 
1186 	(void) splhigh();
1187 	hibernate_disable_intr_machdep();
1188 	cold = 2;
1189 
1190 	DPRINTF("hibernate: suspending devices\n");
1191 	if (config_suspend_all(DVACT_SUSPEND) != 0) {
1192 		cold = 0;
1193 		hibernate_enable_intr_machdep();
1194 #ifndef NO_PROPOLICE
1195 		hibernate_reprotect_ssp(guard_va);
1196 #endif /* ! NO_PROPOLICE */
1197 		goto fail;
1198 	}
1199 
1200 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start,
1201 	    &retguard_start_phys);
1202 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end,
1203 	    &retguard_end_phys);
1204 
1205 	hibernate_preserve_entropy(&disk_hib);
1206 
1207 	printf("Unpacking image...\n");
1208 
1209 	/* Switch stacks */
1210 	DPRINTF("hibernate: switching stacks\n");
1211 	hibernate_switch_stack_machdep();
1212 
1213 #ifndef NO_PROPOLICE
1214 	/* Start using suspended kernel's propolice guard */
1215 	*(long *)(guard_va + off) = disk_hib.guard;
1216 	hibernate_reprotect_ssp(guard_va);
1217 #endif /* ! NO_PROPOLICE */
1218 
1219 	/* Unpack and resume */
1220 	hibernate_unpack_image(&disk_hib);
1221 
1222 fail:
1223 	if (!bdsw)
1224 		printf("\nUnable to resume hibernated image\n");
1225 	else if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc))
1226 		printf("hibernate_resume device close failed\n");
1227 	splx(s);
1228 }
1229 
1230 /*
1231  * Unpack image from pig area to original location by looping through the
1232  * list of output chunks in the order they should be restored (fchunks).
1233  *
1234  * Note that due to the stack smash protector and the fact that we have
1235  * switched stacks, it is not permitted to return from this function.
1236  */
1237 void
hibernate_unpack_image(union hibernate_info * hib)1238 hibernate_unpack_image(union hibernate_info *hib)
1239 {
1240 	uint8_t buf[DEV_BSIZE];
1241 	struct hibernate_disk_chunk *chunks;
1242 	union hibernate_info *local_hib = (union hibernate_info *)&buf;
1243 	paddr_t image_cur = global_pig_start;
1244 	short i, *fchunks;
1245 	char *pva;
1246 
1247 	/* Piglet will be identity mapped (VA == PA) */
1248 	pva = (char *)hib->piglet_pa;
1249 
1250 	fchunks = (short *)(pva + (4 * PAGE_SIZE));
1251 
1252 	chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE);
1253 
1254 	/* Can't use hiber_info that's passed in after this point */
1255 	memcpy(buf, hib, sizeof(buf));
1256 	local_hib->retguard_ofs = 0;
1257 
1258 	/* VA == PA */
1259 	local_hib->piglet_va = local_hib->piglet_pa;
1260 
1261 	/*
1262 	 * Point of no return. Once we pass this point, only kernel code can
1263 	 * be accessed. No global variables or other kernel data structures
1264 	 * are guaranteed to be coherent after unpack starts.
1265 	 *
1266 	 * The image is now in high memory (pig area), we unpack from the pig
1267 	 * to the correct location in memory. We'll eventually end up copying
1268 	 * on top of ourself, but we are assured the kernel code here is the
1269 	 * same between the hibernated and resuming kernel, and we are running
1270 	 * on our own stack, so the overwrite is ok.
1271 	 */
1272 	DPRINTF("hibernate: activating alt. pagetable and starting unpack\n");
1273 	hibernate_activate_resume_pt_machdep();
1274 
1275 	for (i = 0; i < local_hib->chunk_ctr; i++) {
1276 		/* Reset zlib for inflate */
1277 		if (hibernate_zlib_reset(local_hib, 0) != Z_OK)
1278 			panic("hibernate failed to reset zlib for inflate");
1279 
1280 		hibernate_process_chunk(local_hib, &chunks[fchunks[i]],
1281 		    image_cur);
1282 
1283 		image_cur += chunks[fchunks[i]].compressed_size;
1284 	}
1285 
1286 	/*
1287 	 * Resume the loaded kernel by jumping to the MD resume vector.
1288 	 * We won't be returning from this call. We pass the location of
1289 	 * the retguard save area so the MD code can replace it before
1290 	 * resuming. See the piglet layout at the top of this file for
1291 	 * more information on the layout of the piglet area.
1292 	 *
1293 	 * We use 'global_piglet_va' here since by the time we are at
1294 	 * this point, we have already unpacked the image, and we want
1295 	 * the suspended kernel's view of what the piglet was, before
1296 	 * suspend occurred (since we will need to use that in the retguard
1297 	 * copy code in hibernate_resume_machdep.)
1298 	 */
1299 	hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE));
1300 }
1301 
1302 /*
1303  * Bounce a compressed image chunk to the piglet, entering mappings for the
1304  * copied pages as needed
1305  */
1306 void
hibernate_copy_chunk_to_piglet(paddr_t img_cur,vaddr_t piglet,size_t size)1307 hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size)
1308 {
1309 	size_t ct, ofs;
1310 	paddr_t src = img_cur;
1311 	vaddr_t dest = piglet;
1312 
1313 	/* Copy first partial page */
1314 	ct = (PAGE_SIZE) - (src & PAGE_MASK);
1315 	ofs = (src & PAGE_MASK);
1316 
1317 	if (ct < PAGE_SIZE) {
1318 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE,
1319 			(src - ofs), 0);
1320 		hibernate_flush();
1321 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct);
1322 		src += ct;
1323 		dest += ct;
1324 	}
1325 
1326 	/* Copy remaining pages */
1327 	while (src < size + img_cur) {
1328 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0);
1329 		hibernate_flush();
1330 		ct = PAGE_SIZE;
1331 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct);
1332 		hibernate_flush();
1333 		src += ct;
1334 		dest += ct;
1335 	}
1336 }
1337 
1338 /*
1339  * Process a chunk by bouncing it to the piglet, followed by unpacking
1340  */
1341 void
hibernate_process_chunk(union hibernate_info * hib,struct hibernate_disk_chunk * chunk,paddr_t img_cur)1342 hibernate_process_chunk(union hibernate_info *hib,
1343     struct hibernate_disk_chunk *chunk, paddr_t img_cur)
1344 {
1345 	char *pva = (char *)hib->piglet_va;
1346 
1347 	hibernate_copy_chunk_to_piglet(img_cur,
1348 	 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size);
1349 	hibernate_inflate_region(hib, chunk->base,
1350 	    (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)),
1351 	    chunk->compressed_size);
1352 }
1353 
1354 /*
1355  * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between
1356  * inaddr and range_end.
1357  */
1358 int
hibernate_calc_rle(paddr_t inaddr,paddr_t range_end)1359 hibernate_calc_rle(paddr_t inaddr, paddr_t range_end)
1360 {
1361 	int rle;
1362 
1363 	rle = uvm_page_rle(inaddr);
1364 	KASSERT(rle >= 0 && rle <= MAX_RLE);
1365 
1366 	/* Clamp RLE to range end */
1367 	if (rle > 0 && inaddr + (rle * PAGE_SIZE) > range_end)
1368 		rle = (range_end - inaddr) / PAGE_SIZE;
1369 
1370 	return (rle);
1371 }
1372 
1373 /*
1374  * Write the RLE byte for page at 'inaddr' to the output stream.
1375  * Returns the number of pages to be skipped at 'inaddr'.
1376  */
1377 int
hibernate_write_rle(union hibernate_info * hib,paddr_t inaddr,paddr_t range_end,daddr_t * blkctr,size_t * out_remaining)1378 hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr,
1379 	paddr_t range_end, daddr_t *blkctr,
1380 	size_t *out_remaining)
1381 {
1382 	int rle, err, *rleloc;
1383 	struct hibernate_zlib_state *hibernate_state;
1384 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1385 
1386 	hibernate_state =
1387 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1388 
1389 	rle = hibernate_calc_rle(inaddr, range_end);
1390 
1391 	rleloc = (int *)hibernate_rle_page + MAX_RLE - 1;
1392 	*rleloc = rle;
1393 
1394 	/* Deflate the RLE byte into the stream */
1395 	hibernate_deflate(hib, (paddr_t)rleloc, out_remaining);
1396 
1397 	/* Did we fill the output page? If so, flush to disk */
1398 	if (*out_remaining == 0) {
1399 		if ((err = hib->io_func(hib->dev, *blkctr + hib->image_offset,
1400 			(vaddr_t)hibernate_io_page, PAGE_SIZE, HIB_W,
1401 			hib->io_page))) {
1402 				DPRINTF("hib write error %d\n", err);
1403 				return (err);
1404 		}
1405 
1406 		*blkctr += PAGE_SIZE / DEV_BSIZE;
1407 		*out_remaining = PAGE_SIZE;
1408 
1409 		/* If we didn't deflate the entire RLE byte, finish it now */
1410 		if (hibernate_state->hib_stream.avail_in != 0)
1411 			hibernate_deflate(hib,
1412 				(vaddr_t)hibernate_state->hib_stream.next_in,
1413 				out_remaining);
1414 	}
1415 
1416 	return (rle);
1417 }
1418 
1419 /*
1420  * Write a compressed version of this machine's memory to disk, at the
1421  * precalculated swap offset:
1422  *
1423  * end of swap - signature block size - chunk table size - memory size
1424  *
1425  * The function begins by looping through each phys mem range, cutting each
1426  * one into MD sized chunks. These chunks are then compressed individually
1427  * and written out to disk, in phys mem order. Some chunks might compress
1428  * more than others, and for this reason, each chunk's size is recorded
1429  * in the chunk table, which is written to disk after the image has
1430  * properly been compressed and written (in hibernate_write_chunktable).
1431  *
1432  * When this function is called, the machine is nearly suspended - most
1433  * devices are quiesced/suspended, interrupts are off, and cold has
1434  * been set. This means that there can be no side effects once the
1435  * write has started, and the write function itself can also have no
1436  * side effects. This also means no printfs are permitted (since printf
1437  * has side effects.)
1438  *
1439  * Return values :
1440  *
1441  * 0      - success
1442  * EIO    - I/O error occurred writing the chunks
1443  * EINVAL - Failed to write a complete range
1444  * ENOMEM - Memory allocation failure during preparation of the zlib arena
1445  */
1446 int
hibernate_write_chunks(union hibernate_info * hib)1447 hibernate_write_chunks(union hibernate_info *hib)
1448 {
1449 	paddr_t range_base, range_end, inaddr, temp_inaddr;
1450 	size_t out_remaining, used;
1451 	struct hibernate_disk_chunk *chunks;
1452 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1453 	daddr_t blkctr = 0;
1454 	int i, rle, err;
1455 	struct hibernate_zlib_state *hibernate_state;
1456 
1457 	hibernate_state =
1458 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1459 
1460 	hib->chunk_ctr = 0;
1461 
1462 	/*
1463 	 * Map the utility VAs to the piglet. See the piglet map at the
1464 	 * top of this file for piglet layout information.
1465 	 */
1466 	hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE;
1467 	hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE;
1468 
1469 	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
1470 	    HIBERNATE_CHUNK_SIZE);
1471 
1472 	/* Calculate the chunk regions */
1473 	for (i = 0; i < hib->nranges; i++) {
1474 		range_base = hib->ranges[i].base;
1475 		range_end = hib->ranges[i].end;
1476 
1477 		inaddr = range_base;
1478 
1479 		while (inaddr < range_end) {
1480 			chunks[hib->chunk_ctr].base = inaddr;
1481 			if (inaddr + HIBERNATE_CHUNK_SIZE < range_end)
1482 				chunks[hib->chunk_ctr].end = inaddr +
1483 				    HIBERNATE_CHUNK_SIZE;
1484 			else
1485 				chunks[hib->chunk_ctr].end = range_end;
1486 
1487 			inaddr += HIBERNATE_CHUNK_SIZE;
1488 			hib->chunk_ctr ++;
1489 		}
1490 	}
1491 
1492 	uvm_pmr_dirty_everything();
1493 	uvm_pmr_zero_everything();
1494 
1495 	/* Compress and write the chunks in the chunktable */
1496 	for (i = 0; i < hib->chunk_ctr; i++) {
1497 		range_base = chunks[i].base;
1498 		range_end = chunks[i].end;
1499 
1500 		chunks[i].offset = blkctr + hib->image_offset;
1501 
1502 		/* Reset zlib for deflate */
1503 		if (hibernate_zlib_reset(hib, 1) != Z_OK) {
1504 			DPRINTF("hibernate_zlib_reset failed for deflate\n");
1505 			return (ENOMEM);
1506 		}
1507 
1508 		inaddr = range_base;
1509 
1510 		/*
1511 		 * For each range, loop through its phys mem region
1512 		 * and write out the chunks (the last chunk might be
1513 		 * smaller than the chunk size).
1514 		 */
1515 		while (inaddr < range_end) {
1516 			out_remaining = PAGE_SIZE;
1517 			while (out_remaining > 0 && inaddr < range_end) {
1518 				/*
1519 				 * Adjust for regions that are not evenly
1520 				 * divisible by PAGE_SIZE or overflowed
1521 				 * pages from the previous iteration.
1522 				 */
1523 				temp_inaddr = (inaddr & PAGE_MASK) +
1524 				    hibernate_copy_page;
1525 
1526 				/* Deflate from temp_inaddr to IO page */
1527 				if (inaddr != range_end) {
1528 					if (inaddr % PAGE_SIZE == 0) {
1529 						rle = hibernate_write_rle(hib,
1530 							inaddr,
1531 							range_end,
1532 							&blkctr,
1533 							&out_remaining);
1534 					}
1535 
1536 					if (rle == 0) {
1537 						pmap_kenter_pa(hibernate_temp_page,
1538 							inaddr & PMAP_PA_MASK,
1539 							PROT_READ);
1540 
1541 						bcopy((caddr_t)hibernate_temp_page,
1542 							(caddr_t)hibernate_copy_page,
1543 							PAGE_SIZE);
1544 						inaddr += hibernate_deflate(hib,
1545 							temp_inaddr,
1546 							&out_remaining);
1547 					} else {
1548 						inaddr += rle * PAGE_SIZE;
1549 						if (inaddr > range_end)
1550 							inaddr = range_end;
1551 					}
1552 
1553 				}
1554 
1555 				if (out_remaining == 0) {
1556 					/* Filled up the page */
1557 					if ((err = hib->io_func(hib->dev,
1558 					    blkctr + hib->image_offset,
1559 					    (vaddr_t)hibernate_io_page,
1560 					    PAGE_SIZE, HIB_W, hib->io_page))) {
1561 						DPRINTF("hib write error %d\n",
1562 						    err);
1563 						return (err);
1564 					}
1565 					blkctr += PAGE_SIZE / DEV_BSIZE;
1566 				}
1567 			}
1568 		}
1569 
1570 		if (inaddr != range_end) {
1571 			DPRINTF("deflate range ended prematurely\n");
1572 			return (EINVAL);
1573 		}
1574 
1575 		/*
1576 		 * End of range. Round up to next secsize bytes
1577 		 * after finishing compress
1578 		 */
1579 		if (out_remaining == 0)
1580 			out_remaining = PAGE_SIZE;
1581 
1582 		/* Finish compress */
1583 		hibernate_state->hib_stream.next_in = (unsigned char *)inaddr;
1584 		hibernate_state->hib_stream.avail_in = 0;
1585 		hibernate_state->hib_stream.next_out =
1586 		    (unsigned char *)hibernate_io_page +
1587 			(PAGE_SIZE - out_remaining);
1588 
1589 		/* We have an extra output page available for finalize */
1590 		hibernate_state->hib_stream.avail_out =
1591 			out_remaining + PAGE_SIZE;
1592 
1593 		if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) !=
1594 		    Z_STREAM_END) {
1595 			DPRINTF("deflate error in output stream: %d\n", err);
1596 			return (err);
1597 		}
1598 
1599 		out_remaining = hibernate_state->hib_stream.avail_out;
1600 
1601 		/* Round up to next sector if needed */
1602 		used = ROUNDUP(2 * PAGE_SIZE - out_remaining, hib->sec_size);
1603 
1604 		/* Write final block(s) for this chunk */
1605 		if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset,
1606 		    (vaddr_t)hibernate_io_page, used,
1607 		    HIB_W, hib->io_page))) {
1608 			DPRINTF("hib final write error %d\n", err);
1609 			return (err);
1610 		}
1611 
1612 		blkctr += used / DEV_BSIZE;
1613 
1614 		chunks[i].compressed_size = (blkctr + hib->image_offset -
1615 		    chunks[i].offset) * DEV_BSIZE;
1616 	}
1617 
1618 	hib->chunktable_offset = hib->image_offset + blkctr;
1619 	return (0);
1620 }
1621 
1622 /*
1623  * Reset the zlib stream state and allocate a new hiballoc area for either
1624  * inflate or deflate. This function is called once for each hibernate chunk.
1625  * Calling hiballoc_init multiple times is acceptable since the memory it is
1626  * provided is unmanaged memory (stolen). We use the memory provided to us
1627  * by the piglet allocated via the supplied hib.
1628  */
1629 int
hibernate_zlib_reset(union hibernate_info * hib,int deflate)1630 hibernate_zlib_reset(union hibernate_info *hib, int deflate)
1631 {
1632 	vaddr_t hibernate_zlib_start;
1633 	size_t hibernate_zlib_size;
1634 	char *pva = (char *)hib->piglet_va;
1635 	struct hibernate_zlib_state *hibernate_state;
1636 
1637 	hibernate_state =
1638 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1639 
1640 	if (!deflate)
1641 		pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1642 
1643 	/*
1644 	 * See piglet layout information at the start of this file for
1645 	 * information on the zlib page assignments.
1646 	 */
1647 	hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE));
1648 	hibernate_zlib_size = 80 * PAGE_SIZE;
1649 
1650 	memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size);
1651 	memset(hibernate_state, 0, PAGE_SIZE);
1652 
1653 	/* Set up stream structure */
1654 	hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc;
1655 	hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free;
1656 
1657 	/* Initialize the hiballoc arena for zlib allocs/frees */
1658 	hiballoc_init(&hibernate_state->hiballoc_arena,
1659 	    (caddr_t)hibernate_zlib_start, hibernate_zlib_size);
1660 
1661 	if (deflate) {
1662 		return deflateInit(&hibernate_state->hib_stream,
1663 		    Z_BEST_SPEED);
1664 	} else
1665 		return inflateInit(&hibernate_state->hib_stream);
1666 }
1667 
1668 /*
1669  * Reads the hibernated memory image from disk, whose location and
1670  * size are recorded in hib. Begin by reading the persisted
1671  * chunk table, which records the original chunk placement location
1672  * and compressed size for each. Next, allocate a pig region of
1673  * sufficient size to hold the compressed image. Next, read the
1674  * chunks into the pig area (calling hibernate_read_chunks to do this),
1675  * and finally, if all of the above succeeds, clear the hibernate signature.
1676  * The function will then return to hibernate_resume, which will proceed
1677  * to unpack the pig image to the correct place in memory.
1678  */
1679 int
hibernate_read_image(union hibernate_info * hib)1680 hibernate_read_image(union hibernate_info *hib)
1681 {
1682 	size_t compressed_size, disk_size, chunktable_size, pig_sz;
1683 	paddr_t image_start, image_end, pig_start, pig_end;
1684 	struct hibernate_disk_chunk *chunks;
1685 	daddr_t blkctr;
1686 	vaddr_t chunktable = (vaddr_t)NULL;
1687 	paddr_t piglet_chunktable = hib->piglet_pa +
1688 	    HIBERNATE_CHUNK_SIZE;
1689 	int i, status;
1690 
1691 	status = 0;
1692 	pmap_activate(curproc);
1693 
1694 	/* Calculate total chunk table size in disk blocks */
1695 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
1696 
1697 	blkctr = hib->chunktable_offset;
1698 
1699 	chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any,
1700 	    &kp_none, &kd_nowait);
1701 
1702 	if (!chunktable)
1703 		return (1);
1704 
1705 	/* Map chunktable pages */
1706 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE)
1707 		pmap_kenter_pa(chunktable + i, piglet_chunktable + i,
1708 		    PROT_READ | PROT_WRITE);
1709 	pmap_update(pmap_kernel());
1710 
1711 	/* Read the chunktable from disk into the piglet chunktable */
1712 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE;
1713 	    i += MAXPHYS, blkctr += MAXPHYS/DEV_BSIZE)
1714 		hibernate_block_io(hib, blkctr, MAXPHYS,
1715 		    chunktable + i, 0);
1716 
1717 	blkctr = hib->image_offset;
1718 	compressed_size = 0;
1719 
1720 	chunks = (struct hibernate_disk_chunk *)chunktable;
1721 
1722 	for (i = 0; i < hib->chunk_ctr; i++)
1723 		compressed_size += chunks[i].compressed_size;
1724 
1725 	disk_size = compressed_size;
1726 
1727 	printf("unhibernating @ block %lld length %luMB\n",
1728 	    hib->sig_offset - chunktable_size,
1729 	    compressed_size / (1024 * 1024));
1730 
1731 	/* Allocate the pig area */
1732 	pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE;
1733 	if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM) {
1734 		status = 1;
1735 		goto unmap;
1736 	}
1737 
1738 	pig_end = pig_start + pig_sz;
1739 
1740 	/* Calculate image extents. Pig image must end on a chunk boundary. */
1741 	image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1);
1742 	image_start = image_end - disk_size;
1743 
1744 	hibernate_read_chunks(hib, image_start, image_end, disk_size,
1745 	    chunks);
1746 
1747 	/* Prepare the resume time pmap/page table */
1748 	hibernate_populate_resume_pt(hib, image_start, image_end);
1749 
1750 unmap:
1751 	/* Unmap chunktable pages */
1752 	pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE);
1753 	pmap_update(pmap_kernel());
1754 
1755 	return (status);
1756 }
1757 
1758 /*
1759  * Read the hibernated memory chunks from disk (chunk information at this
1760  * point is stored in the piglet) into the pig area specified by
1761  * [pig_start .. pig_end]. Order the chunks so that the final chunk is the
1762  * only chunk with overlap possibilities.
1763  */
1764 int
hibernate_read_chunks(union hibernate_info * hib,paddr_t pig_start,paddr_t pig_end,size_t image_compr_size,struct hibernate_disk_chunk * chunks)1765 hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start,
1766     paddr_t pig_end, size_t image_compr_size,
1767     struct hibernate_disk_chunk *chunks)
1768 {
1769 	paddr_t img_cur, piglet_base;
1770 	daddr_t blkctr;
1771 	size_t processed, compressed_size, read_size;
1772 	int nchunks, nfchunks, num_io_pages;
1773 	vaddr_t tempva, hibernate_fchunk_area;
1774 	short *fchunks, i, j;
1775 
1776 	tempva = (vaddr_t)NULL;
1777 	hibernate_fchunk_area = (vaddr_t)NULL;
1778 	nfchunks = 0;
1779 	piglet_base = hib->piglet_pa;
1780 	global_pig_start = pig_start;
1781 
1782 	/*
1783 	 * These mappings go into the resuming kernel's page table, and are
1784 	 * used only during image read. They disappear from existence
1785 	 * when the suspended kernel is unpacked on top of us.
1786 	 */
1787 	tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none,
1788 		&kd_nowait);
1789 	if (!tempva)
1790 		return (1);
1791 	hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE, &kv_any,
1792 	    &kp_none, &kd_nowait);
1793 	if (!hibernate_fchunk_area)
1794 		return (1);
1795 
1796 	/* Final output chunk ordering VA */
1797 	fchunks = (short *)hibernate_fchunk_area;
1798 
1799 	/* Map the chunk ordering region */
1800 	for(i = 0; i < 24 ; i++)
1801 		pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE),
1802 			piglet_base + ((4 + i) * PAGE_SIZE),
1803 			PROT_READ | PROT_WRITE);
1804 	pmap_update(pmap_kernel());
1805 
1806 	nchunks = hib->chunk_ctr;
1807 
1808 	/* Initially start all chunks as unplaced */
1809 	for (i = 0; i < nchunks; i++)
1810 		chunks[i].flags = 0;
1811 
1812 	/*
1813 	 * Search the list for chunks that are outside the pig area. These
1814 	 * can be placed first in the final output list.
1815 	 */
1816 	for (i = 0; i < nchunks; i++) {
1817 		if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) {
1818 			fchunks[nfchunks] = i;
1819 			nfchunks++;
1820 			chunks[i].flags |= HIBERNATE_CHUNK_PLACED;
1821 		}
1822 	}
1823 
1824 	/*
1825 	 * Walk the ordering, place the chunks in ascending memory order.
1826 	 */
1827 	for (i = 0; i < nchunks; i++) {
1828 		if (chunks[i].flags != HIBERNATE_CHUNK_PLACED) {
1829 			fchunks[nfchunks] = i;
1830 			nfchunks++;
1831 			chunks[i].flags = HIBERNATE_CHUNK_PLACED;
1832 		}
1833 	}
1834 
1835 	img_cur = pig_start;
1836 
1837 	for (i = 0; i < nfchunks; i++) {
1838 		blkctr = chunks[fchunks[i]].offset;
1839 		processed = 0;
1840 		compressed_size = chunks[fchunks[i]].compressed_size;
1841 
1842 		while (processed < compressed_size) {
1843 			if (compressed_size - processed >= MAXPHYS)
1844 				read_size = MAXPHYS;
1845 			else
1846 				read_size = compressed_size - processed;
1847 
1848 			/*
1849 			 * We're reading read_size bytes, offset from the
1850 			 * start of a page by img_cur % PAGE_SIZE, so the
1851 			 * end will be read_size + (img_cur % PAGE_SIZE)
1852 			 * from the start of the first page.  Round that
1853 			 * up to the next page size.
1854 			 */
1855 			num_io_pages = (read_size + (img_cur % PAGE_SIZE)
1856 				+ PAGE_SIZE - 1) / PAGE_SIZE;
1857 
1858 			KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1);
1859 
1860 			/* Map pages for this read */
1861 			for (j = 0; j < num_io_pages; j ++)
1862 				pmap_kenter_pa(tempva + j * PAGE_SIZE,
1863 				    img_cur + j * PAGE_SIZE,
1864 				    PROT_READ | PROT_WRITE);
1865 
1866 			pmap_update(pmap_kernel());
1867 
1868 			hibernate_block_io(hib, blkctr, read_size,
1869 			    tempva + (img_cur & PAGE_MASK), 0);
1870 
1871 			blkctr += (read_size / DEV_BSIZE);
1872 
1873 			pmap_kremove(tempva, num_io_pages * PAGE_SIZE);
1874 			pmap_update(pmap_kernel());
1875 
1876 			processed += read_size;
1877 			img_cur += read_size;
1878 		}
1879 	}
1880 
1881 	pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE);
1882 	pmap_update(pmap_kernel());
1883 
1884 	return (0);
1885 }
1886 
1887 /*
1888  * Hibernating a machine comprises the following operations:
1889  *  1. Calculating this machine's hibernate_info information
1890  *  2. Allocating a piglet and saving the piglet's physaddr
1891  *  3. Calculating the memory chunks
1892  *  4. Writing the compressed chunks to disk
1893  *  5. Writing the chunk table
1894  *  6. Writing the signature block (hibernate_info)
1895  *
1896  * On most architectures, the function calling hibernate_suspend would
1897  * then power off the machine using some MD-specific implementation.
1898  */
1899 int
hibernate_suspend(void)1900 hibernate_suspend(void)
1901 {
1902 	uint8_t buf[DEV_BSIZE];
1903 	union hibernate_info *hib = (union hibernate_info *)&buf;
1904 	u_long start, end;
1905 
1906 	/*
1907 	 * Calculate memory ranges, swap offsets, etc.
1908 	 * This also allocates a piglet whose physaddr is stored in
1909 	 * hib->piglet_pa and vaddr stored in hib->piglet_va
1910 	 */
1911 	if (get_hibernate_info(hib, 1)) {
1912 		DPRINTF("failed to obtain hibernate info\n");
1913 		return (1);
1914 	}
1915 
1916 	/* Find a page-addressed region in swap [start,end] */
1917 	if (uvm_hibswap(hib->dev, &start, &end)) {
1918 		printf("hibernate: cannot find any swap\n");
1919 		return (1);
1920 	}
1921 
1922 	if (end - start < 1000) {
1923 		printf("hibernate: insufficient swap (%lu is too small)\n",
1924 			end - start + 1);
1925 		return (1);
1926 	}
1927 
1928 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start,
1929 	    &retguard_start_phys);
1930 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end,
1931 	    &retguard_end_phys);
1932 
1933 	/* Calculate block offsets in swap */
1934 	hib->image_offset = ctod(start);
1935 
1936 	DPRINTF("hibernate @ block %lld max-length %lu blocks\n",
1937 	    hib->image_offset, ctod(end) - ctod(start) + 1);
1938 
1939 	pmap_activate(curproc);
1940 	DPRINTF("hibernate: writing chunks\n");
1941 	if (hibernate_write_chunks(hib)) {
1942 		DPRINTF("hibernate_write_chunks failed\n");
1943 		return (1);
1944 	}
1945 
1946 	DPRINTF("hibernate: writing chunktable\n");
1947 	if (hibernate_write_chunktable(hib)) {
1948 		DPRINTF("hibernate_write_chunktable failed\n");
1949 		return (1);
1950 	}
1951 
1952 	DPRINTF("hibernate: writing signature\n");
1953 	if (hibernate_write_signature(hib)) {
1954 		DPRINTF("hibernate_write_signature failed\n");
1955 		return (1);
1956 	}
1957 
1958 	/* Allow the disk to settle */
1959 	delay(500000);
1960 
1961 	/*
1962 	 * Give the device-specific I/O function a notification that we're
1963 	 * done, and that it can clean up or shutdown as needed.
1964 	 */
1965 	hib->io_func(hib->dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib->io_page);
1966 	return (0);
1967 }
1968 
1969 int
hibernate_alloc(void)1970 hibernate_alloc(void)
1971 {
1972 	KASSERT(global_piglet_va == 0);
1973 	KASSERT(hibernate_temp_page == 0);
1974 
1975 	pmap_activate(curproc);
1976 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1977 	    PROT_READ | PROT_WRITE);
1978 
1979 	/* Allocate a piglet, store its addresses in the supplied globals */
1980 	if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa,
1981 	    HIBERNATE_CHUNK_SIZE * 4, HIBERNATE_CHUNK_SIZE))
1982 		goto unmap;
1983 
1984 	/*
1985 	 * Allocate VA for the temp page.
1986 	 *
1987 	 * This will become part of the suspended kernel and will
1988 	 * be freed in hibernate_free, upon resume (or hibernate
1989 	 * failure)
1990 	 */
1991 	hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1992 	    &kp_none, &kd_nowait);
1993 	if (!hibernate_temp_page) {
1994 		uvm_pmr_free_piglet(global_piglet_va, 4 * HIBERNATE_CHUNK_SIZE);
1995 		global_piglet_va = 0;
1996 		goto unmap;
1997 	}
1998 	return (0);
1999 unmap:
2000 	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
2001 	pmap_update(pmap_kernel());
2002 	return (ENOMEM);
2003 }
2004 
2005 /*
2006  * Free items allocated by hibernate_alloc()
2007  */
2008 void
hibernate_free(void)2009 hibernate_free(void)
2010 {
2011 	pmap_activate(curproc);
2012 
2013 	if (global_piglet_va)
2014 		uvm_pmr_free_piglet(global_piglet_va,
2015 		    4 * HIBERNATE_CHUNK_SIZE);
2016 
2017 	if (hibernate_temp_page) {
2018 		pmap_kremove(hibernate_temp_page, PAGE_SIZE);
2019 		km_free((void *)hibernate_temp_page, PAGE_SIZE,
2020 		    &kv_any, &kp_none);
2021 	}
2022 
2023 	global_piglet_va = 0;
2024 	hibernate_temp_page = 0;
2025 	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
2026 	pmap_update(pmap_kernel());
2027 }
2028