1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <mdb/mdb_modapi.h>
30 #include <sys/types.h>
31 #include <vm/page.h>
32 #include <sys/thread.h>
33 #include <sys/swap.h>
34 #include <sys/memlist.h>
35 
36 
37 /*
38  * Page walker.
39  * By default, this will walk all pages in the system.  If given an
40  * address, it will walk all pages belonging to the vnode at that
41  * address.
42  */
43 
44 /*
45  * page_walk_data
46  *
47  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
48  * number of hash locations remaining in the page hash table when
49  * walking all pages.
50  *
51  * The astute reader will notice that pw_hashloc is only used when
52  * reading all pages (to hold a pointer to our location in the page
53  * hash table), and that pw_first is only used when reading the pages
54  * belonging to a particular vnode (to hold a pointer to the first
55  * page).  While these could be combined to be a single pointer, they
56  * are left separate for clarity.
57  */
58 typedef struct page_walk_data {
59 	long		pw_hashleft;
60 	void		**pw_hashloc;
61 	uintptr_t	pw_first;
62 } page_walk_data_t;
63 
64 int
65 page_walk_init(mdb_walk_state_t *wsp)
66 {
67 	page_walk_data_t	*pwd;
68 	void	**ptr;
69 	size_t	hashsz;
70 	vnode_t	vn;
71 
72 	if (wsp->walk_addr == NULL) {
73 
74 		/*
75 		 * Walk all pages
76 		 */
77 
78 		if ((mdb_readvar(&ptr, "page_hash") == -1) ||
79 		    (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
80 		    (ptr == NULL) || (hashsz == 0)) {
81 			mdb_warn("page_hash, page_hashsz not found or invalid");
82 			return (WALK_ERR);
83 		}
84 
85 		/*
86 		 * Since we are walking all pages, initialize hashleft
87 		 * to be the remaining number of entries in the page
88 		 * hash.  hashloc is set the start of the page hash
89 		 * table.  Setting the walk address to 0 indicates that
90 		 * we aren't currently following a hash chain, and that
91 		 * we need to scan the page hash table for a page.
92 		 */
93 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
94 		pwd->pw_hashleft = hashsz;
95 		pwd->pw_hashloc = ptr;
96 		wsp->walk_addr = 0;
97 	} else {
98 
99 		/*
100 		 * Walk just this vnode
101 		 */
102 
103 		if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
104 			mdb_warn("unable to read vnode_t at %#lx",
105 			    wsp->walk_addr);
106 			return (WALK_ERR);
107 		}
108 
109 		/*
110 		 * We set hashleft to -1 to indicate that we are
111 		 * walking a vnode, and initialize first to 0 (it is
112 		 * used to terminate the walk, so it must not be set
113 		 * until after we have walked the first page).  The
114 		 * walk address is set to the first page.
115 		 */
116 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
117 		pwd->pw_hashleft = -1;
118 		pwd->pw_first = 0;
119 
120 		wsp->walk_addr = (uintptr_t)vn.v_pages;
121 	}
122 
123 	wsp->walk_data = pwd;
124 
125 	return (WALK_NEXT);
126 }
127 
128 int
129 page_walk_step(mdb_walk_state_t *wsp)
130 {
131 	page_walk_data_t	*pwd = wsp->walk_data;
132 	page_t		page;
133 	uintptr_t	pp;
134 
135 	pp = wsp->walk_addr;
136 
137 	if (pwd->pw_hashleft < 0) {
138 
139 		/* We're walking a vnode's pages */
140 
141 		/*
142 		 * If we don't have any pages to walk, we have come
143 		 * back around to the first one (we finished), or we
144 		 * can't read the page we're looking at, we are done.
145 		 */
146 		if (pp == NULL || pp == pwd->pw_first)
147 			return (WALK_DONE);
148 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
149 			mdb_warn("unable to read page_t at %#lx", pp);
150 			return (WALK_ERR);
151 		}
152 
153 		/*
154 		 * Set the walk address to the next page, and if the
155 		 * first page hasn't been set yet (i.e. we are on the
156 		 * first page), set it.
157 		 */
158 		wsp->walk_addr = (uintptr_t)page.p_vpnext;
159 		if (pwd->pw_first == NULL)
160 			pwd->pw_first = pp;
161 
162 	} else if (pwd->pw_hashleft > 0) {
163 
164 		/* We're walking all pages */
165 
166 		/*
167 		 * If pp (the walk address) is NULL, we scan through
168 		 * the page hash table until we find a page.
169 		 */
170 		if (pp == NULL) {
171 
172 			/*
173 			 * Iterate through the page hash table until we
174 			 * find a page or reach the end.
175 			 */
176 			do {
177 				if (mdb_vread(&pp, sizeof (uintptr_t),
178 				    (uintptr_t)pwd->pw_hashloc) == -1) {
179 					mdb_warn("unable to read from %#p",
180 					    pwd->pw_hashloc);
181 					return (WALK_ERR);
182 				}
183 				pwd->pw_hashleft--;
184 				pwd->pw_hashloc++;
185 			} while (pwd->pw_hashleft && (pp == NULL));
186 
187 			/*
188 			 * We've reached the end; exit.
189 			 */
190 			if (pp == NULL)
191 				return (WALK_DONE);
192 		}
193 
194 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
195 			mdb_warn("unable to read page_t at %#lx", pp);
196 			return (WALK_ERR);
197 		}
198 
199 		/*
200 		 * Set the walk address to the next page.
201 		 */
202 		wsp->walk_addr = (uintptr_t)page.p_hash;
203 
204 	} else {
205 		/* We've finished walking all pages. */
206 		return (WALK_DONE);
207 	}
208 
209 	return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
210 }
211 
212 void
213 page_walk_fini(mdb_walk_state_t *wsp)
214 {
215 	mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
216 }
217 
218 /* Summary statistics of pages */
219 typedef struct memstat {
220 	struct vnode    *ms_kvp;	/* Cached address of kernel vnode */
221 	uint64_t	ms_kmem;	/* Pages of kernel memory	  */
222 	uint64_t	ms_anon;	/* Pages of anonymous memory	  */
223 	uint64_t	ms_vnode;	/* Pages of named (vnode) memory  */
224 	uint64_t	ms_exec;	/* Pages of exec/library memory	  */
225 	uint64_t	ms_cachelist;	/* Pages on the cachelist (free)  */
226 	uint64_t	ms_total;	/* Pages on page hash		  */
227 } memstat_t;
228 
229 /*
230  * Summarize pages by type; called from page walker.
231  */
232 
233 /* ARGSUSED */
234 static int
235 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
236 {
237 	struct vnode vn, *vp;
238 	uintptr_t ptr;
239 
240 	/* read page's vnode pointer */
241 	if ((ptr = (uintptr_t)(pp->p_vnode)) != NULL) {
242 		if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
243 			mdb_warn("unable to read vnode_t at %#lx",
244 			    ptr);
245 			return (WALK_ERR);
246 		}
247 		vp = &vn;
248 	} else
249 		vp = NULL;
250 
251 	if (PP_ISFREE(pp))
252 		stats->ms_cachelist++;
253 	else if (vp && IS_SWAPFSVP(vp))
254 		stats->ms_anon++;
255 	else if (pp->p_vnode == stats->ms_kvp)
256 		stats->ms_kmem++;
257 	else if (vp && (((vp)->v_flag & VVMEXEC)) != 0)
258 		stats->ms_exec++;
259 	else
260 		stats->ms_vnode++;
261 
262 	stats->ms_total++;
263 
264 	return (WALK_NEXT);
265 }
266 
267 /* ARGSUSED */
268 int
269 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
270 {
271 	ulong_t pagesize;
272 	pgcnt_t total_pages;
273 	ulong_t physmem;
274 	memstat_t stats;
275 	memstat_t unused_stats;
276 	GElf_Sym sym;
277 
278 	bzero(&stats, sizeof (memstat_t));
279 	bzero(&unused_stats, sizeof (memstat_t));
280 
281 	if (argc != 0 || (flags & DCMD_ADDRSPEC))
282 		return (DCMD_USAGE);
283 
284 	/* Grab base page size */
285 	if (mdb_readvar(&pagesize, "_pagesize") == -1) {
286 		mdb_warn("unable to read _pagesize");
287 		return (DCMD_ERR);
288 	}
289 
290 	/* Total physical memory */
291 	if (mdb_readvar(&total_pages, "total_pages") == -1) {
292 		mdb_warn("unable to read total_pages");
293 		return (DCMD_ERR);
294 	}
295 
296 	/* Artificially limited memory */
297 	if (mdb_readvar(&physmem, "physmem") == -1) {
298 		mdb_warn("unable to read physmem");
299 		return (DCMD_ERR);
300 	}
301 
302 	/* read kernel vnode pointer */
303 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvp",
304 		(GElf_Sym *)&sym) == -1) {
305 		mdb_warn("unable to read kvp");
306 		return (DCMD_ERR);
307 	}
308 
309 	stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value;
310 
311 	/* Walk page structures, summarizing usage */
312 	if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback,
313 		&stats) == -1) {
314 		mdb_warn("can't walk pages");
315 		return (DCMD_ERR);
316 	}
317 
318 	/* read unused pages vnode */
319 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
320 		(GElf_Sym *)&sym) == -1) {
321 		mdb_warn("unable to read unused_pages_vp");
322 		return (DCMD_ERR);
323 	}
324 
325 	unused_stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value;
326 
327 	/* Find unused pages */
328 	if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback,
329 		&unused_stats) == -1) {
330 		mdb_warn("can't walk pages");
331 		return (DCMD_ERR);
332 	}
333 
334 	/*
335 	 * If physmem != total_pages, then the administrator has limited the
336 	 * number of pages available in the system.  In order to account for
337 	 * this, we reduce the amount normally attributed to the page cache.
338 	 */
339 	stats.ms_vnode -= unused_stats.ms_kmem;
340 	stats.ms_total -= unused_stats.ms_kmem;
341 
342 #define	MS_PCT_TOTAL(x)	(((5 * total_pages) + ((x) * 1000ull))) / \
343 		((physmem) * 10)
344 
345 	mdb_printf("Page Summary                Pages                MB"
346 	    "  %%Tot\n");
347 	mdb_printf("------------     ----------------  ----------------"
348 		"  ----\n");
349 	mdb_printf("Kernel           %16llu  %16llu  %3llu%%\n",
350 	    stats.ms_kmem,
351 	    (uint64_t)stats.ms_kmem * pagesize / (1024 * 1024),
352 	    MS_PCT_TOTAL(stats.ms_kmem));
353 	mdb_printf("Anon             %16llu  %16llu  %3llu%%\n",
354 	    stats.ms_anon,
355 	    (uint64_t)stats.ms_anon * pagesize / (1024 * 1024),
356 	    MS_PCT_TOTAL(stats.ms_anon));
357 	mdb_printf("Exec and libs    %16llu  %16llu  %3llu%%\n",
358 	    stats.ms_exec,
359 	    (uint64_t)stats.ms_exec * pagesize / (1024 * 1024),
360 	    MS_PCT_TOTAL(stats.ms_exec));
361 	mdb_printf("Page cache       %16llu  %16llu  %3llu%%\n",
362 	    stats.ms_vnode,
363 	    (uint64_t)stats.ms_vnode * pagesize / (1024 * 1024),
364 	    MS_PCT_TOTAL(stats.ms_vnode));
365 	mdb_printf("Free (cachelist) %16llu  %16llu  %3llu%%\n",
366 	    stats.ms_cachelist,
367 	    (uint64_t)stats.ms_cachelist * pagesize / (1024 * 1024),
368 	    MS_PCT_TOTAL(stats.ms_cachelist));
369 	mdb_printf("Free (freelist)  %16llu  %16llu  %3llu%%\n",
370 	    physmem - stats.ms_total,
371 	    (uint64_t)(physmem - stats.ms_total) * pagesize / (1024 * 1024),
372 	    MS_PCT_TOTAL(physmem - stats.ms_total));
373 	mdb_printf("\nTotal            %16lu  %16lu\n",
374 	    physmem,
375 	    (uint64_t)physmem * pagesize / (1024 * 1024));
376 
377 	if (physmem != total_pages) {
378 		mdb_printf("Physical         %16lu  %16lu\n",
379 		    total_pages,
380 		    (uint64_t)total_pages * pagesize / (1024 * 1024));
381 	}
382 
383 #undef MS_PCT_TOTAL
384 
385 	return (DCMD_OK);
386 }
387 
388 int
389 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
390 {
391 	page_t	p;
392 
393 	if (!(flags & DCMD_ADDRSPEC)) {
394 		if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
395 			mdb_warn("can't walk pages");
396 			return (DCMD_ERR);
397 		}
398 		return (DCMD_OK);
399 	}
400 
401 	if (DCMD_HDRSPEC(flags)) {
402 		mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
403 		    "PAGE", "VNODE", "OFFSET", "SELOCK",
404 		    "LCT", "COW", "IO", "FS", "ST");
405 	}
406 
407 	if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
408 		mdb_warn("can't read page_t at %#lx", addr);
409 		return (DCMD_ERR);
410 	}
411 
412 	mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
413 	    addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
414 	    p.p_iolock_state, p.p_fsdata, p.p_state);
415 
416 	return (DCMD_OK);
417 }
418 
419 int
420 swap_walk_init(mdb_walk_state_t *wsp)
421 {
422 	void	*ptr;
423 
424 	if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
425 		mdb_warn("swapinfo not found or invalid");
426 		return (WALK_ERR);
427 	}
428 
429 	wsp->walk_addr = (uintptr_t)ptr;
430 
431 	return (WALK_NEXT);
432 }
433 
434 int
435 swap_walk_step(mdb_walk_state_t *wsp)
436 {
437 	uintptr_t	sip;
438 	struct swapinfo	si;
439 
440 	sip = wsp->walk_addr;
441 
442 	if (sip == NULL)
443 		return (WALK_DONE);
444 
445 	if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
446 		mdb_warn("unable to read swapinfo at %#lx", sip);
447 		return (WALK_ERR);
448 	}
449 
450 	wsp->walk_addr = (uintptr_t)si.si_next;
451 
452 	return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
453 }
454 
455 int
456 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
457 {
458 	struct swapinfo	si;
459 	char		*name;
460 
461 	if (!(flags & DCMD_ADDRSPEC)) {
462 		if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
463 			mdb_warn("can't walk swapinfo");
464 			return (DCMD_ERR);
465 		}
466 		return (DCMD_OK);
467 	}
468 
469 	if (DCMD_HDRSPEC(flags)) {
470 		mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
471 		    "ADDR", "VNODE", "PAGES", "FREE", "NAME");
472 	}
473 
474 	if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
475 		mdb_warn("can't read swapinfo at %#lx", addr);
476 		return (DCMD_ERR);
477 	}
478 
479 	name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
480 	if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
481 		name = "*error*";
482 
483 	mdb_printf("%0?lx %?p %9d %9d %s\n",
484 	    addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
485 
486 	return (DCMD_OK);
487 }
488 
489 int
490 memlist_walk_step(mdb_walk_state_t *wsp)
491 {
492 	uintptr_t	mlp;
493 	struct memlist	ml;
494 
495 	mlp = wsp->walk_addr;
496 
497 	if (mlp == NULL)
498 		return (WALK_DONE);
499 
500 	if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
501 		mdb_warn("unable to read memlist at %#lx", mlp);
502 		return (WALK_ERR);
503 	}
504 
505 	wsp->walk_addr = (uintptr_t)ml.next;
506 
507 	return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
508 }
509 
510 int
511 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
512 {
513 	struct memlist	ml;
514 
515 	if (!(flags & DCMD_ADDRSPEC)) {
516 		uintptr_t ptr;
517 		uint_t list = 0;
518 		int i;
519 		static const char *lists[] = {
520 			"phys_install",
521 			"phys_avail",
522 			"virt_avail"
523 		};
524 
525 		if (mdb_getopts(argc, argv,
526 		    'i', MDB_OPT_SETBITS, (1 << 0), &list,
527 		    'a', MDB_OPT_SETBITS, (1 << 1), &list,
528 		    'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
529 			return (DCMD_USAGE);
530 
531 		if (!list)
532 			list = 1;
533 
534 		for (i = 0; list; i++, list >>= 1) {
535 			if (!(list & 1))
536 				continue;
537 			if ((mdb_readvar(&ptr, lists[i]) == -1) ||
538 			    (ptr == NULL)) {
539 				mdb_warn("%s not found or invalid", lists[i]);
540 				return (DCMD_ERR);
541 			}
542 
543 			mdb_printf("%s:\n", lists[i]);
544 			if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
545 			    ptr) == -1) {
546 				mdb_warn("can't walk memlist");
547 				return (DCMD_ERR);
548 			}
549 		}
550 		return (DCMD_OK);
551 	}
552 
553 	if (DCMD_HDRSPEC(flags))
554 		mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
555 
556 	if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
557 		mdb_warn("can't read memlist at %#lx", addr);
558 		return (DCMD_ERR);
559 	}
560 
561 	mdb_printf("%0?lx %16llx %16llx\n", addr, ml.address, ml.size);
562 
563 	return (DCMD_OK);
564 }
565