1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <mdb/mdb_modapi.h>
29 #include <sys/types.h>
30 #include <vm/page.h>
31 #include <sys/thread.h>
32 #include <sys/swap.h>
33 #include <sys/memlist.h>
34 
35 
36 /*
37  * Page walker.
38  * By default, this will walk all pages in the system.  If given an
39  * address, it will walk all pages belonging to the vnode at that
40  * address.
41  */
42 
43 /*
44  * page_walk_data
45  *
46  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
47  * number of hash locations remaining in the page hash table when
48  * walking all pages.
49  *
50  * The astute reader will notice that pw_hashloc is only used when
51  * reading all pages (to hold a pointer to our location in the page
52  * hash table), and that pw_first is only used when reading the pages
53  * belonging to a particular vnode (to hold a pointer to the first
54  * page).  While these could be combined to be a single pointer, they
55  * are left separate for clarity.
56  */
57 typedef struct page_walk_data {
58 	long		pw_hashleft;
59 	void		**pw_hashloc;
60 	uintptr_t	pw_first;
61 } page_walk_data_t;
62 
63 int
64 page_walk_init(mdb_walk_state_t *wsp)
65 {
66 	page_walk_data_t	*pwd;
67 	void	**ptr;
68 	size_t	hashsz;
69 	vnode_t	vn;
70 
71 	if (wsp->walk_addr == NULL) {
72 
73 		/*
74 		 * Walk all pages
75 		 */
76 
77 		if ((mdb_readvar(&ptr, "page_hash") == -1) ||
78 		    (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
79 		    (ptr == NULL) || (hashsz == 0)) {
80 			mdb_warn("page_hash, page_hashsz not found or invalid");
81 			return (WALK_ERR);
82 		}
83 
84 		/*
85 		 * Since we are walking all pages, initialize hashleft
86 		 * to be the remaining number of entries in the page
87 		 * hash.  hashloc is set the start of the page hash
88 		 * table.  Setting the walk address to 0 indicates that
89 		 * we aren't currently following a hash chain, and that
90 		 * we need to scan the page hash table for a page.
91 		 */
92 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
93 		pwd->pw_hashleft = hashsz;
94 		pwd->pw_hashloc = ptr;
95 		wsp->walk_addr = 0;
96 	} else {
97 
98 		/*
99 		 * Walk just this vnode
100 		 */
101 
102 		if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
103 			mdb_warn("unable to read vnode_t at %#lx",
104 			    wsp->walk_addr);
105 			return (WALK_ERR);
106 		}
107 
108 		/*
109 		 * We set hashleft to -1 to indicate that we are
110 		 * walking a vnode, and initialize first to 0 (it is
111 		 * used to terminate the walk, so it must not be set
112 		 * until after we have walked the first page).  The
113 		 * walk address is set to the first page.
114 		 */
115 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
116 		pwd->pw_hashleft = -1;
117 		pwd->pw_first = 0;
118 
119 		wsp->walk_addr = (uintptr_t)vn.v_pages;
120 	}
121 
122 	wsp->walk_data = pwd;
123 
124 	return (WALK_NEXT);
125 }
126 
127 int
128 page_walk_step(mdb_walk_state_t *wsp)
129 {
130 	page_walk_data_t	*pwd = wsp->walk_data;
131 	page_t		page;
132 	uintptr_t	pp;
133 
134 	pp = wsp->walk_addr;
135 
136 	if (pwd->pw_hashleft < 0) {
137 
138 		/* We're walking a vnode's pages */
139 
140 		/*
141 		 * If we don't have any pages to walk, we have come
142 		 * back around to the first one (we finished), or we
143 		 * can't read the page we're looking at, we are done.
144 		 */
145 		if (pp == NULL || pp == pwd->pw_first)
146 			return (WALK_DONE);
147 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
148 			mdb_warn("unable to read page_t at %#lx", pp);
149 			return (WALK_ERR);
150 		}
151 
152 		/*
153 		 * Set the walk address to the next page, and if the
154 		 * first page hasn't been set yet (i.e. we are on the
155 		 * first page), set it.
156 		 */
157 		wsp->walk_addr = (uintptr_t)page.p_vpnext;
158 		if (pwd->pw_first == NULL)
159 			pwd->pw_first = pp;
160 
161 	} else if (pwd->pw_hashleft > 0) {
162 
163 		/* We're walking all pages */
164 
165 		/*
166 		 * If pp (the walk address) is NULL, we scan through
167 		 * the page hash table until we find a page.
168 		 */
169 		if (pp == NULL) {
170 
171 			/*
172 			 * Iterate through the page hash table until we
173 			 * find a page or reach the end.
174 			 */
175 			do {
176 				if (mdb_vread(&pp, sizeof (uintptr_t),
177 				    (uintptr_t)pwd->pw_hashloc) == -1) {
178 					mdb_warn("unable to read from %#p",
179 					    pwd->pw_hashloc);
180 					return (WALK_ERR);
181 				}
182 				pwd->pw_hashleft--;
183 				pwd->pw_hashloc++;
184 			} while (pwd->pw_hashleft && (pp == NULL));
185 
186 			/*
187 			 * We've reached the end; exit.
188 			 */
189 			if (pp == NULL)
190 				return (WALK_DONE);
191 		}
192 
193 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
194 			mdb_warn("unable to read page_t at %#lx", pp);
195 			return (WALK_ERR);
196 		}
197 
198 		/*
199 		 * Set the walk address to the next page.
200 		 */
201 		wsp->walk_addr = (uintptr_t)page.p_hash;
202 
203 	} else {
204 		/* We've finished walking all pages. */
205 		return (WALK_DONE);
206 	}
207 
208 	return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
209 }
210 
211 void
212 page_walk_fini(mdb_walk_state_t *wsp)
213 {
214 	mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
215 }
216 
217 /* Summary statistics of pages */
218 typedef struct memstat {
219 	struct vnode    *ms_kvp;	/* Cached address of kernel vnode */
220 	struct vnode    *ms_zvp;	/* Cached address of zio vnode    */
221 	uint64_t	ms_kmem;	/* Pages of kernel memory	  */
222 	uint64_t	ms_anon;	/* Pages of anonymous memory	  */
223 	uint64_t	ms_vnode;	/* Pages of named (vnode) memory  */
224 	uint64_t	ms_exec;	/* Pages of exec/library memory	  */
225 	uint64_t	ms_cachelist;	/* Pages on the cachelist (free)  */
226 	uint64_t	ms_total;	/* Pages on page hash		  */
227 } memstat_t;
228 
229 #define	MS_PP_ISKAS(pp, stats)				\
230 	(((pp)->p_vnode == (stats)->ms_kvp) ||		\
231 	    (((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp)))
232 
233 /*
234  * Summarize pages by type; called from page walker.
235  */
236 
237 /* ARGSUSED */
238 static int
239 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
240 {
241 	struct vnode vn, *vp;
242 	uintptr_t ptr;
243 
244 	/* read page's vnode pointer */
245 	if ((ptr = (uintptr_t)(pp->p_vnode)) != NULL) {
246 		if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
247 			mdb_warn("unable to read vnode_t at %#lx",
248 			    ptr);
249 			return (WALK_ERR);
250 		}
251 		vp = &vn;
252 	} else
253 		vp = NULL;
254 
255 	if (PP_ISFREE(pp))
256 		stats->ms_cachelist++;
257 	else if (vp && IS_SWAPFSVP(vp))
258 		stats->ms_anon++;
259 	else if (MS_PP_ISKAS(pp, stats))
260 		stats->ms_kmem++;
261 	else if (vp && (((vp)->v_flag & VVMEXEC)) != 0)
262 		stats->ms_exec++;
263 	else
264 		stats->ms_vnode++;
265 
266 	stats->ms_total++;
267 
268 	return (WALK_NEXT);
269 }
270 
271 /* ARGSUSED */
272 int
273 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
274 {
275 	ulong_t pagesize;
276 	pgcnt_t total_pages;
277 	ulong_t physmem;
278 	memstat_t stats;
279 	memstat_t unused_stats;
280 	GElf_Sym sym;
281 
282 	bzero(&stats, sizeof (memstat_t));
283 	bzero(&unused_stats, sizeof (memstat_t));
284 
285 	if (argc != 0 || (flags & DCMD_ADDRSPEC))
286 		return (DCMD_USAGE);
287 
288 	/* Grab base page size */
289 	if (mdb_readvar(&pagesize, "_pagesize") == -1) {
290 		mdb_warn("unable to read _pagesize");
291 		return (DCMD_ERR);
292 	}
293 
294 	/* Total physical memory */
295 	if (mdb_readvar(&total_pages, "total_pages") == -1) {
296 		mdb_warn("unable to read total_pages");
297 		return (DCMD_ERR);
298 	}
299 
300 	/* Artificially limited memory */
301 	if (mdb_readvar(&physmem, "physmem") == -1) {
302 		mdb_warn("unable to read physmem");
303 		return (DCMD_ERR);
304 	}
305 
306 	/* read kernel vnode pointer */
307 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvp",
308 		(GElf_Sym *)&sym) == -1) {
309 		mdb_warn("unable to read kvp");
310 		return (DCMD_ERR);
311 	}
312 
313 	stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value;
314 
315 	/*
316 	 * Read the zio vnode pointer.  It may not exist on all kernels, so it
317 	 * it isn't found, it's not a fatal error.
318 	 */
319 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "zvp",
320 		(GElf_Sym *)&sym) == -1) {
321 		stats.ms_zvp = NULL;
322 	} else {
323 		stats.ms_zvp = (struct vnode *)(uintptr_t)sym.st_value;
324 	}
325 
326 	/* Walk page structures, summarizing usage */
327 	if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback,
328 		&stats) == -1) {
329 		mdb_warn("can't walk pages");
330 		return (DCMD_ERR);
331 	}
332 
333 	/* read unused pages vnode */
334 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
335 		(GElf_Sym *)&sym) == -1) {
336 		mdb_warn("unable to read unused_pages_vp");
337 		return (DCMD_ERR);
338 	}
339 
340 	unused_stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value;
341 
342 	/* Find unused pages */
343 	if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback,
344 		&unused_stats) == -1) {
345 		mdb_warn("can't walk pages");
346 		return (DCMD_ERR);
347 	}
348 
349 	/*
350 	 * If physmem != total_pages, then the administrator has limited the
351 	 * number of pages available in the system.  In order to account for
352 	 * this, we reduce the amount normally attributed to the page cache.
353 	 */
354 	stats.ms_vnode -= unused_stats.ms_kmem;
355 	stats.ms_total -= unused_stats.ms_kmem;
356 
357 #define	MS_PCT_TOTAL(x)	(((5 * total_pages) + ((x) * 1000ull))) / \
358 		((physmem) * 10)
359 
360 	mdb_printf("Page Summary                Pages                MB"
361 	    "  %%Tot\n");
362 	mdb_printf("------------     ----------------  ----------------"
363 		"  ----\n");
364 	mdb_printf("Kernel           %16llu  %16llu  %3llu%%\n",
365 	    stats.ms_kmem,
366 	    (uint64_t)stats.ms_kmem * pagesize / (1024 * 1024),
367 	    MS_PCT_TOTAL(stats.ms_kmem));
368 	mdb_printf("Anon             %16llu  %16llu  %3llu%%\n",
369 	    stats.ms_anon,
370 	    (uint64_t)stats.ms_anon * pagesize / (1024 * 1024),
371 	    MS_PCT_TOTAL(stats.ms_anon));
372 	mdb_printf("Exec and libs    %16llu  %16llu  %3llu%%\n",
373 	    stats.ms_exec,
374 	    (uint64_t)stats.ms_exec * pagesize / (1024 * 1024),
375 	    MS_PCT_TOTAL(stats.ms_exec));
376 	mdb_printf("Page cache       %16llu  %16llu  %3llu%%\n",
377 	    stats.ms_vnode,
378 	    (uint64_t)stats.ms_vnode * pagesize / (1024 * 1024),
379 	    MS_PCT_TOTAL(stats.ms_vnode));
380 	mdb_printf("Free (cachelist) %16llu  %16llu  %3llu%%\n",
381 	    stats.ms_cachelist,
382 	    (uint64_t)stats.ms_cachelist * pagesize / (1024 * 1024),
383 	    MS_PCT_TOTAL(stats.ms_cachelist));
384 	mdb_printf("Free (freelist)  %16llu  %16llu  %3llu%%\n",
385 	    physmem - stats.ms_total,
386 	    (uint64_t)(physmem - stats.ms_total) * pagesize / (1024 * 1024),
387 	    MS_PCT_TOTAL(physmem - stats.ms_total));
388 	mdb_printf("\nTotal            %16lu  %16lu\n",
389 	    physmem,
390 	    (uint64_t)physmem * pagesize / (1024 * 1024));
391 
392 	if (physmem != total_pages) {
393 		mdb_printf("Physical         %16lu  %16lu\n",
394 		    total_pages,
395 		    (uint64_t)total_pages * pagesize / (1024 * 1024));
396 	}
397 
398 #undef MS_PCT_TOTAL
399 
400 	return (DCMD_OK);
401 }
402 
403 int
404 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
405 {
406 	page_t	p;
407 
408 	if (!(flags & DCMD_ADDRSPEC)) {
409 		if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
410 			mdb_warn("can't walk pages");
411 			return (DCMD_ERR);
412 		}
413 		return (DCMD_OK);
414 	}
415 
416 	if (DCMD_HDRSPEC(flags)) {
417 		mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
418 		    "PAGE", "VNODE", "OFFSET", "SELOCK",
419 		    "LCT", "COW", "IO", "FS", "ST");
420 	}
421 
422 	if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
423 		mdb_warn("can't read page_t at %#lx", addr);
424 		return (DCMD_ERR);
425 	}
426 
427 	mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
428 	    addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
429 	    p.p_iolock_state, p.p_fsdata, p.p_state);
430 
431 	return (DCMD_OK);
432 }
433 
434 int
435 swap_walk_init(mdb_walk_state_t *wsp)
436 {
437 	void	*ptr;
438 
439 	if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
440 		mdb_warn("swapinfo not found or invalid");
441 		return (WALK_ERR);
442 	}
443 
444 	wsp->walk_addr = (uintptr_t)ptr;
445 
446 	return (WALK_NEXT);
447 }
448 
449 int
450 swap_walk_step(mdb_walk_state_t *wsp)
451 {
452 	uintptr_t	sip;
453 	struct swapinfo	si;
454 
455 	sip = wsp->walk_addr;
456 
457 	if (sip == NULL)
458 		return (WALK_DONE);
459 
460 	if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
461 		mdb_warn("unable to read swapinfo at %#lx", sip);
462 		return (WALK_ERR);
463 	}
464 
465 	wsp->walk_addr = (uintptr_t)si.si_next;
466 
467 	return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
468 }
469 
470 int
471 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
472 {
473 	struct swapinfo	si;
474 	char		*name;
475 
476 	if (!(flags & DCMD_ADDRSPEC)) {
477 		if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
478 			mdb_warn("can't walk swapinfo");
479 			return (DCMD_ERR);
480 		}
481 		return (DCMD_OK);
482 	}
483 
484 	if (DCMD_HDRSPEC(flags)) {
485 		mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
486 		    "ADDR", "VNODE", "PAGES", "FREE", "NAME");
487 	}
488 
489 	if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
490 		mdb_warn("can't read swapinfo at %#lx", addr);
491 		return (DCMD_ERR);
492 	}
493 
494 	name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
495 	if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
496 		name = "*error*";
497 
498 	mdb_printf("%0?lx %?p %9d %9d %s\n",
499 	    addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
500 
501 	return (DCMD_OK);
502 }
503 
504 int
505 memlist_walk_step(mdb_walk_state_t *wsp)
506 {
507 	uintptr_t	mlp;
508 	struct memlist	ml;
509 
510 	mlp = wsp->walk_addr;
511 
512 	if (mlp == NULL)
513 		return (WALK_DONE);
514 
515 	if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
516 		mdb_warn("unable to read memlist at %#lx", mlp);
517 		return (WALK_ERR);
518 	}
519 
520 	wsp->walk_addr = (uintptr_t)ml.next;
521 
522 	return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
523 }
524 
525 int
526 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
527 {
528 	struct memlist	ml;
529 
530 	if (!(flags & DCMD_ADDRSPEC)) {
531 		uintptr_t ptr;
532 		uint_t list = 0;
533 		int i;
534 		static const char *lists[] = {
535 			"phys_install",
536 			"phys_avail",
537 			"virt_avail"
538 		};
539 
540 		if (mdb_getopts(argc, argv,
541 		    'i', MDB_OPT_SETBITS, (1 << 0), &list,
542 		    'a', MDB_OPT_SETBITS, (1 << 1), &list,
543 		    'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
544 			return (DCMD_USAGE);
545 
546 		if (!list)
547 			list = 1;
548 
549 		for (i = 0; list; i++, list >>= 1) {
550 			if (!(list & 1))
551 				continue;
552 			if ((mdb_readvar(&ptr, lists[i]) == -1) ||
553 			    (ptr == NULL)) {
554 				mdb_warn("%s not found or invalid", lists[i]);
555 				return (DCMD_ERR);
556 			}
557 
558 			mdb_printf("%s:\n", lists[i]);
559 			if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
560 			    ptr) == -1) {
561 				mdb_warn("can't walk memlist");
562 				return (DCMD_ERR);
563 			}
564 		}
565 		return (DCMD_OK);
566 	}
567 
568 	if (DCMD_HDRSPEC(flags))
569 		mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
570 
571 	if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
572 		mdb_warn("can't read memlist at %#lx", addr);
573 		return (DCMD_ERR);
574 	}
575 
576 	mdb_printf("%0?lx %16llx %16llx\n", addr, ml.address, ml.size);
577 
578 	return (DCMD_OK);
579 }
580