xref: /freebsd/sys/compat/linprocfs/linprocfs.c (revision 0957b409)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 2000 Dag-Erling Coïdan Smørgrav
5  * Copyright (c) 1999 Pierre Beyssac
6  * Copyright (c) 1993 Jan-Simon Pendry
7  * Copyright (c) 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Jan-Simon Pendry.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)procfs_status.c	8.4 (Berkeley) 6/15/94
42  */
43 
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD$");
46 
47 #include <sys/param.h>
48 #include <sys/queue.h>
49 #include <sys/blist.h>
50 #include <sys/conf.h>
51 #include <sys/exec.h>
52 #include <sys/fcntl.h>
53 #include <sys/filedesc.h>
54 #include <sys/jail.h>
55 #include <sys/kernel.h>
56 #include <sys/limits.h>
57 #include <sys/linker.h>
58 #include <sys/lock.h>
59 #include <sys/malloc.h>
60 #include <sys/msg.h>
61 #include <sys/mutex.h>
62 #include <sys/namei.h>
63 #include <sys/proc.h>
64 #include <sys/ptrace.h>
65 #include <sys/resourcevar.h>
66 #include <sys/resource.h>
67 #include <sys/sbuf.h>
68 #include <sys/sem.h>
69 #include <sys/smp.h>
70 #include <sys/socket.h>
71 #include <sys/syscallsubr.h>
72 #include <sys/sysctl.h>
73 #include <sys/sysent.h>
74 #include <sys/systm.h>
75 #include <sys/time.h>
76 #include <sys/tty.h>
77 #include <sys/user.h>
78 #include <sys/uuid.h>
79 #include <sys/vmmeter.h>
80 #include <sys/vnode.h>
81 #include <sys/bus.h>
82 
83 #include <net/if.h>
84 #include <net/if_var.h>
85 #include <net/if_types.h>
86 
87 #include <vm/vm.h>
88 #include <vm/vm_extern.h>
89 #include <vm/pmap.h>
90 #include <vm/vm_map.h>
91 #include <vm/vm_param.h>
92 #include <vm/vm_object.h>
93 #include <vm/swap_pager.h>
94 
95 #include <machine/clock.h>
96 
97 #include <geom/geom.h>
98 #include <geom/geom_int.h>
99 
100 #if defined(__i386__) || defined(__amd64__)
101 #include <machine/cputypes.h>
102 #include <machine/md_var.h>
103 #endif /* __i386__ || __amd64__ */
104 
105 #include <compat/linux/linux.h>
106 #include <compat/linux/linux_mib.h>
107 #include <compat/linux/linux_misc.h>
108 #include <compat/linux/linux_util.h>
109 #include <fs/pseudofs/pseudofs.h>
110 #include <fs/procfs/procfs.h>
111 
112 /*
113  * Various conversion macros
114  */
115 #define T2J(x) ((long)(((x) * 100ULL) / (stathz ? stathz : hz)))	/* ticks to jiffies */
116 #define T2CS(x) ((unsigned long)(((x) * 100ULL) / (stathz ? stathz : hz)))	/* ticks to centiseconds */
117 #define T2S(x) ((x) / (stathz ? stathz : hz))		/* ticks to seconds */
118 #define B2K(x) ((x) >> 10)				/* bytes to kbytes */
119 #define B2P(x) ((x) >> PAGE_SHIFT)			/* bytes to pages */
120 #define P2B(x) ((x) << PAGE_SHIFT)			/* pages to bytes */
121 #define P2K(x) ((x) << (PAGE_SHIFT - 10))		/* pages to kbytes */
122 #define TV2J(x)	((x)->tv_sec * 100UL + (x)->tv_usec / 10000)
123 
124 /**
125  * @brief Mapping of ki_stat in struct kinfo_proc to the linux state
126  *
127  * The linux procfs state field displays one of the characters RSDZTW to
128  * denote running, sleeping in an interruptible wait, waiting in an
129  * uninterruptible disk sleep, a zombie process, process is being traced
130  * or stopped, or process is paging respectively.
131  *
132  * Our struct kinfo_proc contains the variable ki_stat which contains a
133  * value out of SIDL, SRUN, SSLEEP, SSTOP, SZOMB, SWAIT and SLOCK.
134  *
135  * This character array is used with ki_stati-1 as an index and tries to
136  * map our states to suitable linux states.
137  */
138 static char linux_state[] = "RRSTZDD";
139 
140 /*
141  * Filler function for proc/meminfo
142  */
143 static int
144 linprocfs_domeminfo(PFS_FILL_ARGS)
145 {
146 	unsigned long memtotal;		/* total memory in bytes */
147 	unsigned long memused;		/* used memory in bytes */
148 	unsigned long memfree;		/* free memory in bytes */
149 	unsigned long buffers, cached;	/* buffer / cache memory ??? */
150 	unsigned long long swaptotal;	/* total swap space in bytes */
151 	unsigned long long swapused;	/* used swap space in bytes */
152 	unsigned long long swapfree;	/* free swap space in bytes */
153 	int i, j;
154 
155 	memtotal = physmem * PAGE_SIZE;
156 	/*
157 	 * The correct thing here would be:
158 	 *
159 	memfree = vm_free_count() * PAGE_SIZE;
160 	memused = memtotal - memfree;
161 	 *
162 	 * but it might mislead linux binaries into thinking there
163 	 * is very little memory left, so we cheat and tell them that
164 	 * all memory that isn't wired down is free.
165 	 */
166 	memused = vm_wire_count() * PAGE_SIZE;
167 	memfree = memtotal - memused;
168 	swap_pager_status(&i, &j);
169 	swaptotal = (unsigned long long)i * PAGE_SIZE;
170 	swapused = (unsigned long long)j * PAGE_SIZE;
171 	swapfree = swaptotal - swapused;
172 	/*
173 	 * We'd love to be able to write:
174 	 *
175 	buffers = bufspace;
176 	 *
177 	 * but bufspace is internal to vfs_bio.c and we don't feel
178 	 * like unstaticizing it just for linprocfs's sake.
179 	 */
180 	buffers = 0;
181 	cached = vm_inactive_count() * PAGE_SIZE;
182 
183 	sbuf_printf(sb,
184 	    "MemTotal: %9lu kB\n"
185 	    "MemFree:  %9lu kB\n"
186 	    "Buffers:  %9lu kB\n"
187 	    "Cached:   %9lu kB\n"
188 	    "SwapTotal:%9llu kB\n"
189 	    "SwapFree: %9llu kB\n",
190 	    B2K(memtotal), B2K(memfree), B2K(buffers),
191 	    B2K(cached), B2K(swaptotal), B2K(swapfree));
192 
193 	return (0);
194 }
195 
196 #if defined(__i386__) || defined(__amd64__)
197 /*
198  * Filler function for proc/cpuinfo (i386 & amd64 version)
199  */
200 static int
201 linprocfs_docpuinfo(PFS_FILL_ARGS)
202 {
203 	int hw_model[2];
204 	char model[128];
205 	uint64_t freq;
206 	size_t size;
207 	u_int cache_size[4];
208 	int fqmhz, fqkhz;
209 	int i, j;
210 
211 	/*
212 	 * We default the flags to include all non-conflicting flags,
213 	 * and the Intel versions of conflicting flags.
214 	 */
215 	static char *flags[] = {
216 		"fpu",	    "vme",     "de",	   "pse",      "tsc",
217 		"msr",	    "pae",     "mce",	   "cx8",      "apic",
218 		"sep",	    "sep",     "mtrr",	   "pge",      "mca",
219 		"cmov",	    "pat",     "pse36",	   "pn",       "b19",
220 		"b20",	    "b21",     "mmxext",   "mmx",      "fxsr",
221 		"xmm",	    "sse2",    "b27",	   "b28",      "b29",
222 		"3dnowext", "3dnow"
223 	};
224 
225 	static char *power_flags[] = {
226 		"ts",           "fid",          "vid",
227 		"ttp",          "tm",           "stc",
228 		"100mhzsteps",  "hwpstate",     "",
229 		"cpb",          "eff_freq_ro",  "proc_feedback",
230 		"acc_power",
231 	};
232 
233 	hw_model[0] = CTL_HW;
234 	hw_model[1] = HW_MODEL;
235 	model[0] = '\0';
236 	size = sizeof(model);
237 	if (kernel_sysctl(td, hw_model, 2, &model, &size, 0, 0, 0, 0) != 0)
238 		strcpy(model, "unknown");
239 #ifdef __i386__
240 	switch (cpu_vendor_id) {
241 	case CPU_VENDOR_AMD:
242 		if (cpu_class < CPUCLASS_686)
243 			flags[16] = "fcmov";
244 		break;
245 	case CPU_VENDOR_CYRIX:
246 		flags[24] = "cxmmx";
247 		break;
248 	}
249 #endif
250 	if (cpu_exthigh >= 0x80000006)
251 		do_cpuid(0x80000006, cache_size);
252 	else
253 		memset(cache_size, 0, sizeof(cache_size));
254 	for (i = 0; i < mp_ncpus; ++i) {
255 		fqmhz = 0;
256 		fqkhz = 0;
257 		freq = atomic_load_acq_64(&tsc_freq);
258 		if (freq != 0) {
259 			fqmhz = (freq + 4999) / 1000000;
260 			fqkhz = ((freq + 4999) / 10000) % 100;
261 		}
262 		sbuf_printf(sb,
263 		    "processor\t: %d\n"
264 		    "vendor_id\t: %.20s\n"
265 		    "cpu family\t: %u\n"
266 		    "model\t\t: %u\n"
267 		    "model name\t: %s\n"
268 		    "stepping\t: %u\n"
269 		    "cpu MHz\t\t: %d.%02d\n"
270 		    "cache size\t: %d KB\n"
271 		    "physical id\t: %d\n"
272 		    "siblings\t: %d\n"
273 		    "core id\t\t: %d\n"
274 		    "cpu cores\t: %d\n"
275 		    "apicid\t\t: %d\n"
276 		    "initial apicid\t: %d\n"
277 		    "fpu\t\t: %s\n"
278 		    "fpu_exception\t: %s\n"
279 		    "cpuid level\t: %d\n"
280 		    "wp\t\t: %s\n",
281 		    i, cpu_vendor, CPUID_TO_FAMILY(cpu_id),
282 		    CPUID_TO_MODEL(cpu_id), model, cpu_id & CPUID_STEPPING,
283 		    fqmhz, fqkhz,
284 		    (cache_size[2] >> 16), 0, mp_ncpus, i, mp_ncpus,
285 		    i, i, /*cpu_id & CPUID_LOCAL_APIC_ID ??*/
286 		    (cpu_feature & CPUID_FPU) ? "yes" : "no", "yes",
287 		    CPUID_TO_FAMILY(cpu_id), "yes");
288 		sbuf_cat(sb, "flags\t\t:");
289 		for (j = 0; j < nitems(flags); j++)
290 			if (cpu_feature & (1 << j))
291 				sbuf_printf(sb, " %s", flags[j]);
292 		sbuf_cat(sb, "\n");
293 		sbuf_printf(sb,
294 		    "bugs\t\t: %s\n"
295 		    "bogomips\t: %d.%02d\n"
296 		    "clflush size\t: %d\n"
297 		    "cache_alignment\t: %d\n"
298 		    "address sizes\t: %d bits physical, %d bits virtual\n",
299 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
300 		    (has_f00f_bug) ? "Intel F00F" : "",
301 #else
302 		    "",
303 #endif
304 		    fqmhz, fqkhz,
305 		    cpu_clflush_line_size, cpu_clflush_line_size,
306 		    cpu_maxphyaddr,
307 		    (cpu_maxphyaddr > 32) ? 48 : 0);
308 		sbuf_cat(sb, "power management: ");
309 		for (j = 0; j < nitems(power_flags); j++)
310 			if (amd_pminfo & (1 << j))
311 				sbuf_printf(sb, " %s", power_flags[j]);
312 		sbuf_cat(sb, "\n\n");
313 
314 		/* XXX per-cpu vendor / class / model / id? */
315 	}
316 	sbuf_cat(sb, "\n");
317 
318 	return (0);
319 }
320 #else
321 /* ARM64TODO: implement non-stubbed linprocfs_docpuinfo */
322 static int
323 linprocfs_docpuinfo(PFS_FILL_ARGS)
324 {
325 	int i;
326 
327 	for (i = 0; i < mp_ncpus; ++i) {
328 		sbuf_printf(sb,
329 		    "processor\t: %d\n"
330 		    "BogoMIPS\t: %d.%02d\n",
331 		    i, 0, 0);
332 		sbuf_cat(sb, "Features\t: ");
333 		sbuf_cat(sb, "\n");
334 		sbuf_printf(sb,
335 		    "CPU implementer\t: \n"
336 		    "CPU architecture: \n"
337 		    "CPU variant\t: 0x%x\n"
338 		    "CPU part\t: 0x%x\n"
339 		    "CPU revision\t: %d\n",
340 		    0, 0, 0);
341 		sbuf_cat(sb, "\n");
342 	}
343 
344 	return (0);
345 }
346 #endif /* __i386__ || __amd64__ */
347 
348 /*
349  * Filler function for proc/mtab
350  *
351  * This file doesn't exist in Linux' procfs, but is included here so
352  * users can symlink /compat/linux/etc/mtab to /proc/mtab
353  */
354 static int
355 linprocfs_domtab(PFS_FILL_ARGS)
356 {
357 	struct nameidata nd;
358 	const char *lep;
359 	char *dlep, *flep, *mntto, *mntfrom, *fstype;
360 	size_t lep_len;
361 	int error;
362 	struct statfs *buf, *sp;
363 	size_t count;
364 
365 	/* resolve symlinks etc. in the emulation tree prefix */
366 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, linux_emul_path, td);
367 	flep = NULL;
368 	error = namei(&nd);
369 	lep = linux_emul_path;
370 	if (error == 0) {
371 		if (vn_fullpath(td, nd.ni_vp, &dlep, &flep) == 0)
372 			lep = dlep;
373 		vrele(nd.ni_vp);
374 	}
375 	lep_len = strlen(lep);
376 
377 	buf = NULL;
378 	error = kern_getfsstat(td, &buf, SIZE_T_MAX, &count,
379 	    UIO_SYSSPACE, MNT_WAIT);
380 	if (error != 0) {
381 		free(buf, M_TEMP);
382 		free(flep, M_TEMP);
383 		return (error);
384 	}
385 
386 	for (sp = buf; count > 0; sp++, count--) {
387 		/* determine device name */
388 		mntfrom = sp->f_mntfromname;
389 
390 		/* determine mount point */
391 		mntto = sp->f_mntonname;
392 		if (strncmp(mntto, lep, lep_len) == 0 && mntto[lep_len] == '/')
393 			mntto += lep_len;
394 
395 		/* determine fs type */
396 		fstype = sp->f_fstypename;
397 		if (strcmp(fstype, pn->pn_info->pi_name) == 0)
398 			mntfrom = fstype = "proc";
399 		else if (strcmp(fstype, "procfs") == 0)
400 			continue;
401 
402 		if (strcmp(fstype, "linsysfs") == 0) {
403 			sbuf_printf(sb, "/sys %s sysfs %s", mntto,
404 			    sp->f_flags & MNT_RDONLY ? "ro" : "rw");
405 		} else {
406 			/* For Linux msdosfs is called vfat */
407 			if (strcmp(fstype, "msdosfs") == 0)
408 				fstype = "vfat";
409 			sbuf_printf(sb, "%s %s %s %s", mntfrom, mntto, fstype,
410 			    sp->f_flags & MNT_RDONLY ? "ro" : "rw");
411 		}
412 #define ADD_OPTION(opt, name) \
413 	if (sp->f_flags & (opt)) sbuf_printf(sb, "," name);
414 		ADD_OPTION(MNT_SYNCHRONOUS,	"sync");
415 		ADD_OPTION(MNT_NOEXEC,		"noexec");
416 		ADD_OPTION(MNT_NOSUID,		"nosuid");
417 		ADD_OPTION(MNT_UNION,		"union");
418 		ADD_OPTION(MNT_ASYNC,		"async");
419 		ADD_OPTION(MNT_SUIDDIR,		"suiddir");
420 		ADD_OPTION(MNT_NOSYMFOLLOW,	"nosymfollow");
421 		ADD_OPTION(MNT_NOATIME,		"noatime");
422 #undef ADD_OPTION
423 		/* a real Linux mtab will also show NFS options */
424 		sbuf_printf(sb, " 0 0\n");
425 	}
426 
427 	free(buf, M_TEMP);
428 	free(flep, M_TEMP);
429 	return (error);
430 }
431 
432 /*
433  * Filler function for proc/partitions
434  */
435 static int
436 linprocfs_dopartitions(PFS_FILL_ARGS)
437 {
438 	struct g_class *cp;
439 	struct g_geom *gp;
440 	struct g_provider *pp;
441 	int major, minor;
442 
443 	g_topology_lock();
444 	sbuf_printf(sb, "major minor  #blocks  name rio rmerge rsect "
445 	    "ruse wio wmerge wsect wuse running use aveq\n");
446 
447 	LIST_FOREACH(cp, &g_classes, class) {
448 		if (strcmp(cp->name, "DISK") == 0 ||
449 		    strcmp(cp->name, "PART") == 0)
450 			LIST_FOREACH(gp, &cp->geom, geom) {
451 				LIST_FOREACH(pp, &gp->provider, provider) {
452 					if (linux_driver_get_major_minor(
453 					    pp->name, &major, &minor) != 0) {
454 						major = 0;
455 						minor = 0;
456 					}
457 					sbuf_printf(sb, "%d %d %lld %s "
458 					    "%d %d %d %d %d "
459 					     "%d %d %d %d %d %d\n",
460 					     major, minor,
461 					     (long long)pp->mediasize, pp->name,
462 					     0, 0, 0, 0, 0,
463 					     0, 0, 0, 0, 0, 0);
464 				}
465 			}
466 	}
467 	g_topology_unlock();
468 
469 	return (0);
470 }
471 
472 /*
473  * Filler function for proc/stat
474  *
475  * Output depends on kernel version:
476  *
477  * v2.5.40 <=
478  *   user nice system idle
479  * v2.5.41
480  *   user nice system idle iowait
481  * v2.6.11
482  *   user nice system idle iowait irq softirq steal
483  * v2.6.24
484  *   user nice system idle iowait irq softirq steal guest
485  * v2.6.33 >=
486  *   user nice system idle iowait irq softirq steal guest guest_nice
487  */
488 static int
489 linprocfs_dostat(PFS_FILL_ARGS)
490 {
491 	struct pcpu *pcpu;
492 	long cp_time[CPUSTATES];
493 	long *cp;
494 	struct timeval boottime;
495 	int i;
496 	char *zero_pad;
497 	bool has_intr = true;
498 
499 	if (linux_kernver(td) >= LINUX_KERNVER(2,6,33)) {
500 		zero_pad = " 0 0 0 0\n";
501 	} else if (linux_kernver(td) >= LINUX_KERNVER(2,6,24)) {
502 		zero_pad = " 0 0 0\n";
503 	} else if (linux_kernver(td) >= LINUX_KERNVER(2,6,11)) {
504 		zero_pad = " 0 0\n";
505 	} else if (linux_kernver(td) >= LINUX_KERNVER(2,5,41)) {
506 		has_intr = false;
507 		zero_pad = " 0\n";
508 	} else {
509 		has_intr = false;
510 		zero_pad = "\n";
511 	}
512 
513 	read_cpu_time(cp_time);
514 	getboottime(&boottime);
515 	/* Parameters common to all versions */
516 	sbuf_printf(sb, "cpu %lu %lu %lu %lu",
517 	    T2J(cp_time[CP_USER]),
518 	    T2J(cp_time[CP_NICE]),
519 	    T2J(cp_time[CP_SYS]),
520 	    T2J(cp_time[CP_IDLE]));
521 
522 	/* Print interrupt stats if available */
523 	if (has_intr) {
524 		sbuf_printf(sb, " 0 %lu", T2J(cp_time[CP_INTR]));
525 	}
526 
527 	/* Pad out remaining fields depending on version */
528 	sbuf_printf(sb, "%s", zero_pad);
529 
530 	CPU_FOREACH(i) {
531 		pcpu = pcpu_find(i);
532 		cp = pcpu->pc_cp_time;
533 		sbuf_printf(sb, "cpu%d %lu %lu %lu %lu", i,
534 		    T2J(cp[CP_USER]),
535 		    T2J(cp[CP_NICE]),
536 		    T2J(cp[CP_SYS]),
537 		    T2J(cp[CP_IDLE]));
538 
539 		if (has_intr) {
540 			sbuf_printf(sb, " 0 %lu", T2J(cp[CP_INTR]));
541 		}
542 
543 		sbuf_printf(sb, "%s", zero_pad);
544 	}
545 	sbuf_printf(sb,
546 	    "disk 0 0 0 0\n"
547 	    "page %ju %ju\n"
548 	    "swap %ju %ju\n"
549 	    "intr %ju\n"
550 	    "ctxt %ju\n"
551 	    "btime %lld\n",
552 	    (uintmax_t)VM_CNT_FETCH(v_vnodepgsin),
553 	    (uintmax_t)VM_CNT_FETCH(v_vnodepgsout),
554 	    (uintmax_t)VM_CNT_FETCH(v_swappgsin),
555 	    (uintmax_t)VM_CNT_FETCH(v_swappgsout),
556 	    (uintmax_t)VM_CNT_FETCH(v_intr),
557 	    (uintmax_t)VM_CNT_FETCH(v_swtch),
558 	    (long long)boottime.tv_sec);
559 	return (0);
560 }
561 
562 static int
563 linprocfs_doswaps(PFS_FILL_ARGS)
564 {
565 	struct xswdev xsw;
566 	uintmax_t total, used;
567 	int n;
568 	char devname[SPECNAMELEN + 1];
569 
570 	sbuf_printf(sb, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
571 	for (n = 0; ; n++) {
572 		if (swap_dev_info(n, &xsw, devname, sizeof(devname)) != 0)
573 			break;
574 		total = (uintmax_t)xsw.xsw_nblks * PAGE_SIZE / 1024;
575 		used  = (uintmax_t)xsw.xsw_used * PAGE_SIZE / 1024;
576 
577 		/*
578 		 * The space and not tab after the device name is on
579 		 * purpose.  Linux does so.
580 		 */
581 		sbuf_printf(sb, "/dev/%-34s unknown\t\t%jd\t%jd\t-1\n",
582 		    devname, total, used);
583 	}
584 	return (0);
585 }
586 
587 /*
588  * Filler function for proc/uptime
589  */
590 static int
591 linprocfs_douptime(PFS_FILL_ARGS)
592 {
593 	long cp_time[CPUSTATES];
594 	struct timeval tv;
595 
596 	getmicrouptime(&tv);
597 	read_cpu_time(cp_time);
598 	sbuf_printf(sb, "%lld.%02ld %ld.%02lu\n",
599 	    (long long)tv.tv_sec, tv.tv_usec / 10000,
600 	    T2S(cp_time[CP_IDLE] / mp_ncpus),
601 	    T2CS(cp_time[CP_IDLE] / mp_ncpus) % 100);
602 	return (0);
603 }
604 
605 /*
606  * Get OS build date
607  */
608 static void
609 linprocfs_osbuild(struct thread *td, struct sbuf *sb)
610 {
611 #if 0
612 	char osbuild[256];
613 	char *cp1, *cp2;
614 
615 	strncpy(osbuild, version, 256);
616 	osbuild[255] = '\0';
617 	cp1 = strstr(osbuild, "\n");
618 	cp2 = strstr(osbuild, ":");
619 	if (cp1 && cp2) {
620 		*cp1 = *cp2 = '\0';
621 		cp1 = strstr(osbuild, "#");
622 	} else
623 		cp1 = NULL;
624 	if (cp1)
625 		sbuf_printf(sb, "%s%s", cp1, cp2 + 1);
626 	else
627 #endif
628 		sbuf_cat(sb, "#4 Sun Dec 18 04:30:00 CET 1977");
629 }
630 
631 /*
632  * Get OS builder
633  */
634 static void
635 linprocfs_osbuilder(struct thread *td, struct sbuf *sb)
636 {
637 #if 0
638 	char builder[256];
639 	char *cp;
640 
641 	cp = strstr(version, "\n    ");
642 	if (cp) {
643 		strncpy(builder, cp + 5, 256);
644 		builder[255] = '\0';
645 		cp = strstr(builder, ":");
646 		if (cp)
647 			*cp = '\0';
648 	}
649 	if (cp)
650 		sbuf_cat(sb, builder);
651 	else
652 #endif
653 		sbuf_cat(sb, "des@freebsd.org");
654 }
655 
656 /*
657  * Filler function for proc/version
658  */
659 static int
660 linprocfs_doversion(PFS_FILL_ARGS)
661 {
662 	char osname[LINUX_MAX_UTSNAME];
663 	char osrelease[LINUX_MAX_UTSNAME];
664 
665 	linux_get_osname(td, osname);
666 	linux_get_osrelease(td, osrelease);
667 	sbuf_printf(sb, "%s version %s (", osname, osrelease);
668 	linprocfs_osbuilder(td, sb);
669 	sbuf_cat(sb, ") (gcc version " __VERSION__ ") ");
670 	linprocfs_osbuild(td, sb);
671 	sbuf_cat(sb, "\n");
672 
673 	return (0);
674 }
675 
676 /*
677  * Filler function for proc/loadavg
678  */
679 static int
680 linprocfs_doloadavg(PFS_FILL_ARGS)
681 {
682 
683 	sbuf_printf(sb,
684 	    "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
685 	    (int)(averunnable.ldavg[0] / averunnable.fscale),
686 	    (int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
687 	    (int)(averunnable.ldavg[1] / averunnable.fscale),
688 	    (int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
689 	    (int)(averunnable.ldavg[2] / averunnable.fscale),
690 	    (int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
691 	    1,				/* number of running tasks */
692 	    nprocs,			/* number of tasks */
693 	    lastpid			/* the last pid */
694 	);
695 	return (0);
696 }
697 
698 /*
699  * Filler function for proc/pid/stat
700  */
701 static int
702 linprocfs_doprocstat(PFS_FILL_ARGS)
703 {
704 	struct kinfo_proc kp;
705 	struct timeval boottime;
706 	char state;
707 	static int ratelimit = 0;
708 	vm_offset_t startcode, startdata;
709 
710 	getboottime(&boottime);
711 	sx_slock(&proctree_lock);
712 	PROC_LOCK(p);
713 	fill_kinfo_proc(p, &kp);
714 	sx_sunlock(&proctree_lock);
715 	if (p->p_vmspace) {
716 	   startcode = (vm_offset_t)p->p_vmspace->vm_taddr;
717 	   startdata = (vm_offset_t)p->p_vmspace->vm_daddr;
718 	} else {
719 	   startcode = 0;
720 	   startdata = 0;
721 	}
722 	sbuf_printf(sb, "%d", p->p_pid);
723 #define PS_ADD(name, fmt, arg) sbuf_printf(sb, " " fmt, arg)
724 	PS_ADD("comm",		"(%s)",	p->p_comm);
725 	if (kp.ki_stat > sizeof(linux_state)) {
726 		state = 'R';
727 
728 		if (ratelimit == 0) {
729 			printf("linprocfs: don't know how to handle unknown FreeBSD state %d/%zd, mapping to R\n",
730 			    kp.ki_stat, sizeof(linux_state));
731 			++ratelimit;
732 		}
733 	} else
734 		state = linux_state[kp.ki_stat - 1];
735 	PS_ADD("state",		"%c",	state);
736 	PS_ADD("ppid",		"%d",	p->p_pptr ? p->p_pptr->p_pid : 0);
737 	PS_ADD("pgrp",		"%d",	p->p_pgid);
738 	PS_ADD("session",	"%d",	p->p_session->s_sid);
739 	PROC_UNLOCK(p);
740 	PS_ADD("tty",		"%ju",	(uintmax_t)kp.ki_tdev);
741 	PS_ADD("tpgid",		"%d",	kp.ki_tpgid);
742 	PS_ADD("flags",		"%u",	0); /* XXX */
743 	PS_ADD("minflt",	"%lu",	kp.ki_rusage.ru_minflt);
744 	PS_ADD("cminflt",	"%lu",	kp.ki_rusage_ch.ru_minflt);
745 	PS_ADD("majflt",	"%lu",	kp.ki_rusage.ru_majflt);
746 	PS_ADD("cmajflt",	"%lu",	kp.ki_rusage_ch.ru_majflt);
747 	PS_ADD("utime",		"%ld",	TV2J(&kp.ki_rusage.ru_utime));
748 	PS_ADD("stime",		"%ld",	TV2J(&kp.ki_rusage.ru_stime));
749 	PS_ADD("cutime",	"%ld",	TV2J(&kp.ki_rusage_ch.ru_utime));
750 	PS_ADD("cstime",	"%ld",	TV2J(&kp.ki_rusage_ch.ru_stime));
751 	PS_ADD("priority",	"%d",	kp.ki_pri.pri_user);
752 	PS_ADD("nice",		"%d",	kp.ki_nice); /* 19 (nicest) to -19 */
753 	PS_ADD("0",		"%d",	0); /* removed field */
754 	PS_ADD("itrealvalue",	"%d",	0); /* XXX */
755 	PS_ADD("starttime",	"%lu",	TV2J(&kp.ki_start) - TV2J(&boottime));
756 	PS_ADD("vsize",		"%ju",	P2K((uintmax_t)kp.ki_size));
757 	PS_ADD("rss",		"%ju",	(uintmax_t)kp.ki_rssize);
758 	PS_ADD("rlim",		"%lu",	kp.ki_rusage.ru_maxrss);
759 	PS_ADD("startcode",	"%ju",	(uintmax_t)startcode);
760 	PS_ADD("endcode",	"%ju",	(uintmax_t)startdata);
761 	PS_ADD("startstack",	"%u",	0); /* XXX */
762 	PS_ADD("kstkesp",	"%u",	0); /* XXX */
763 	PS_ADD("kstkeip",	"%u",	0); /* XXX */
764 	PS_ADD("signal",	"%u",	0); /* XXX */
765 	PS_ADD("blocked",	"%u",	0); /* XXX */
766 	PS_ADD("sigignore",	"%u",	0); /* XXX */
767 	PS_ADD("sigcatch",	"%u",	0); /* XXX */
768 	PS_ADD("wchan",		"%u",	0); /* XXX */
769 	PS_ADD("nswap",		"%lu",	kp.ki_rusage.ru_nswap);
770 	PS_ADD("cnswap",	"%lu",	kp.ki_rusage_ch.ru_nswap);
771 	PS_ADD("exitsignal",	"%d",	0); /* XXX */
772 	PS_ADD("processor",	"%u",	kp.ki_lastcpu);
773 	PS_ADD("rt_priority",	"%u",	0); /* XXX */ /* >= 2.5.19 */
774 	PS_ADD("policy",	"%u",	kp.ki_pri.pri_class); /* >= 2.5.19 */
775 #undef PS_ADD
776 	sbuf_putc(sb, '\n');
777 
778 	return (0);
779 }
780 
781 /*
782  * Filler function for proc/pid/statm
783  */
784 static int
785 linprocfs_doprocstatm(PFS_FILL_ARGS)
786 {
787 	struct kinfo_proc kp;
788 	segsz_t lsize;
789 
790 	sx_slock(&proctree_lock);
791 	PROC_LOCK(p);
792 	fill_kinfo_proc(p, &kp);
793 	PROC_UNLOCK(p);
794 	sx_sunlock(&proctree_lock);
795 
796 	/*
797 	 * See comments in linprocfs_doprocstatus() regarding the
798 	 * computation of lsize.
799 	 */
800 	/* size resident share trs drs lrs dt */
801 	sbuf_printf(sb, "%ju ", B2P((uintmax_t)kp.ki_size));
802 	sbuf_printf(sb, "%ju ", (uintmax_t)kp.ki_rssize);
803 	sbuf_printf(sb, "%ju ", (uintmax_t)0); /* XXX */
804 	sbuf_printf(sb, "%ju ",	(uintmax_t)kp.ki_tsize);
805 	sbuf_printf(sb, "%ju ", (uintmax_t)(kp.ki_dsize + kp.ki_ssize));
806 	lsize = B2P(kp.ki_size) - kp.ki_dsize -
807 	    kp.ki_ssize - kp.ki_tsize - 1;
808 	sbuf_printf(sb, "%ju ", (uintmax_t)lsize);
809 	sbuf_printf(sb, "%ju\n", (uintmax_t)0); /* XXX */
810 
811 	return (0);
812 }
813 
814 /*
815  * Filler function for proc/pid/status
816  */
817 static int
818 linprocfs_doprocstatus(PFS_FILL_ARGS)
819 {
820 	struct kinfo_proc kp;
821 	char *state;
822 	segsz_t lsize;
823 	struct thread *td2;
824 	struct sigacts *ps;
825 	l_sigset_t siglist, sigignore, sigcatch;
826 	int i;
827 
828 	sx_slock(&proctree_lock);
829 	PROC_LOCK(p);
830 	td2 = FIRST_THREAD_IN_PROC(p); /* XXXKSE pretend only one thread */
831 
832 	if (P_SHOULDSTOP(p)) {
833 		state = "T (stopped)";
834 	} else {
835 		switch(p->p_state) {
836 		case PRS_NEW:
837 			state = "I (idle)";
838 			break;
839 		case PRS_NORMAL:
840 			if (p->p_flag & P_WEXIT) {
841 				state = "X (exiting)";
842 				break;
843 			}
844 			switch(td2->td_state) {
845 			case TDS_INHIBITED:
846 				state = "S (sleeping)";
847 				break;
848 			case TDS_RUNQ:
849 			case TDS_RUNNING:
850 				state = "R (running)";
851 				break;
852 			default:
853 				state = "? (unknown)";
854 				break;
855 			}
856 			break;
857 		case PRS_ZOMBIE:
858 			state = "Z (zombie)";
859 			break;
860 		default:
861 			state = "? (unknown)";
862 			break;
863 		}
864 	}
865 
866 	fill_kinfo_proc(p, &kp);
867 	sx_sunlock(&proctree_lock);
868 
869 	sbuf_printf(sb, "Name:\t%s\n",		p->p_comm); /* XXX escape */
870 	sbuf_printf(sb, "State:\t%s\n",		state);
871 
872 	/*
873 	 * Credentials
874 	 */
875 	sbuf_printf(sb, "Pid:\t%d\n",		p->p_pid);
876 	sbuf_printf(sb, "PPid:\t%d\n",		kp.ki_ppid );
877 	sbuf_printf(sb, "TracerPid:\t%d\n",	kp.ki_tracer );
878 	sbuf_printf(sb, "Uid:\t%d %d %d %d\n",	p->p_ucred->cr_ruid,
879 						p->p_ucred->cr_uid,
880 						p->p_ucred->cr_svuid,
881 						/* FreeBSD doesn't have fsuid */
882 						p->p_ucred->cr_uid);
883 	sbuf_printf(sb, "Gid:\t%d %d %d %d\n",	p->p_ucred->cr_rgid,
884 						p->p_ucred->cr_gid,
885 						p->p_ucred->cr_svgid,
886 						/* FreeBSD doesn't have fsgid */
887 						p->p_ucred->cr_gid);
888 	sbuf_cat(sb, "Groups:\t");
889 	for (i = 0; i < p->p_ucred->cr_ngroups; i++)
890 		sbuf_printf(sb, "%d ",		p->p_ucred->cr_groups[i]);
891 	PROC_UNLOCK(p);
892 	sbuf_putc(sb, '\n');
893 
894 	/*
895 	 * Memory
896 	 *
897 	 * While our approximation of VmLib may not be accurate (I
898 	 * don't know of a simple way to verify it, and I'm not sure
899 	 * it has much meaning anyway), I believe it's good enough.
900 	 *
901 	 * The same code that could (I think) accurately compute VmLib
902 	 * could also compute VmLck, but I don't really care enough to
903 	 * implement it. Submissions are welcome.
904 	 */
905 	sbuf_printf(sb, "VmSize:\t%8ju kB\n",	B2K((uintmax_t)kp.ki_size));
906 	sbuf_printf(sb, "VmLck:\t%8u kB\n",	P2K(0)); /* XXX */
907 	sbuf_printf(sb, "VmRSS:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_rssize));
908 	sbuf_printf(sb, "VmData:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_dsize));
909 	sbuf_printf(sb, "VmStk:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_ssize));
910 	sbuf_printf(sb, "VmExe:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_tsize));
911 	lsize = B2P(kp.ki_size) - kp.ki_dsize -
912 	    kp.ki_ssize - kp.ki_tsize - 1;
913 	sbuf_printf(sb, "VmLib:\t%8ju kB\n",	P2K((uintmax_t)lsize));
914 
915 	/*
916 	 * Signal masks
917 	 */
918 	PROC_LOCK(p);
919 	bsd_to_linux_sigset(&p->p_siglist, &siglist);
920 	ps = p->p_sigacts;
921 	mtx_lock(&ps->ps_mtx);
922 	bsd_to_linux_sigset(&ps->ps_sigignore, &sigignore);
923 	bsd_to_linux_sigset(&ps->ps_sigcatch, &sigcatch);
924 	mtx_unlock(&ps->ps_mtx);
925 	PROC_UNLOCK(p);
926 
927 	sbuf_printf(sb, "SigPnd:\t%016jx\n",	siglist.__mask);
928 	/*
929 	 * XXX. SigBlk - target thread's signal mask, td_sigmask.
930 	 * To implement SigBlk pseudofs should support proc/tid dir entries.
931 	 */
932 	sbuf_printf(sb, "SigBlk:\t%016x\n",	0);
933 	sbuf_printf(sb, "SigIgn:\t%016jx\n",	sigignore.__mask);
934 	sbuf_printf(sb, "SigCgt:\t%016jx\n",	sigcatch.__mask);
935 
936 	/*
937 	 * Linux also prints the capability masks, but we don't have
938 	 * capabilities yet, and when we do get them they're likely to
939 	 * be meaningless to Linux programs, so we lie. XXX
940 	 */
941 	sbuf_printf(sb, "CapInh:\t%016x\n",	0);
942 	sbuf_printf(sb, "CapPrm:\t%016x\n",	0);
943 	sbuf_printf(sb, "CapEff:\t%016x\n",	0);
944 
945 	return (0);
946 }
947 
948 
949 /*
950  * Filler function for proc/pid/cwd
951  */
952 static int
953 linprocfs_doproccwd(PFS_FILL_ARGS)
954 {
955 	struct filedesc *fdp;
956 	struct vnode *vp;
957 	char *fullpath = "unknown";
958 	char *freepath = NULL;
959 
960 	fdp = p->p_fd;
961 	FILEDESC_SLOCK(fdp);
962 	vp = fdp->fd_cdir;
963 	if (vp != NULL)
964 		VREF(vp);
965 	FILEDESC_SUNLOCK(fdp);
966 	vn_fullpath(td, vp, &fullpath, &freepath);
967 	if (vp != NULL)
968 		vrele(vp);
969 	sbuf_printf(sb, "%s", fullpath);
970 	if (freepath)
971 		free(freepath, M_TEMP);
972 	return (0);
973 }
974 
975 /*
976  * Filler function for proc/pid/root
977  */
978 static int
979 linprocfs_doprocroot(PFS_FILL_ARGS)
980 {
981 	struct filedesc *fdp;
982 	struct vnode *vp;
983 	char *fullpath = "unknown";
984 	char *freepath = NULL;
985 
986 	fdp = p->p_fd;
987 	FILEDESC_SLOCK(fdp);
988 	vp = jailed(p->p_ucred) ? fdp->fd_jdir : fdp->fd_rdir;
989 	if (vp != NULL)
990 		VREF(vp);
991 	FILEDESC_SUNLOCK(fdp);
992 	vn_fullpath(td, vp, &fullpath, &freepath);
993 	if (vp != NULL)
994 		vrele(vp);
995 	sbuf_printf(sb, "%s", fullpath);
996 	if (freepath)
997 		free(freepath, M_TEMP);
998 	return (0);
999 }
1000 
1001 /*
1002  * Filler function for proc/pid/cmdline
1003  */
1004 static int
1005 linprocfs_doproccmdline(PFS_FILL_ARGS)
1006 {
1007 	int ret;
1008 
1009 	PROC_LOCK(p);
1010 	if ((ret = p_cansee(td, p)) != 0) {
1011 		PROC_UNLOCK(p);
1012 		return (ret);
1013 	}
1014 
1015 	/*
1016 	 * Mimic linux behavior and pass only processes with usermode
1017 	 * address space as valid.  Return zero silently otherwize.
1018 	 */
1019 	if (p->p_vmspace == &vmspace0) {
1020 		PROC_UNLOCK(p);
1021 		return (0);
1022 	}
1023 	if (p->p_args != NULL) {
1024 		sbuf_bcpy(sb, p->p_args->ar_args, p->p_args->ar_length);
1025 		PROC_UNLOCK(p);
1026 		return (0);
1027 	}
1028 
1029 	if ((p->p_flag & P_SYSTEM) != 0) {
1030 		PROC_UNLOCK(p);
1031 		return (0);
1032 	}
1033 
1034 	PROC_UNLOCK(p);
1035 
1036 	ret = proc_getargv(td, p, sb);
1037 	return (ret);
1038 }
1039 
1040 /*
1041  * Filler function for proc/pid/environ
1042  */
1043 static int
1044 linprocfs_doprocenviron(PFS_FILL_ARGS)
1045 {
1046 
1047 	/*
1048 	 * Mimic linux behavior and pass only processes with usermode
1049 	 * address space as valid.  Return zero silently otherwize.
1050 	 */
1051 	if (p->p_vmspace == &vmspace0)
1052 		return (0);
1053 
1054 	return (proc_getenvv(td, p, sb));
1055 }
1056 
1057 static char l32_map_str[] = "%08lx-%08lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
1058 static char l64_map_str[] = "%016lx-%016lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
1059 static char vdso_str[] = "      [vdso]";
1060 static char stack_str[] = "      [stack]";
1061 
1062 /*
1063  * Filler function for proc/pid/maps
1064  */
1065 static int
1066 linprocfs_doprocmaps(PFS_FILL_ARGS)
1067 {
1068 	struct vmspace *vm;
1069 	vm_map_t map;
1070 	vm_map_entry_t entry, tmp_entry;
1071 	vm_object_t obj, tobj, lobj;
1072 	vm_offset_t e_start, e_end;
1073 	vm_ooffset_t off = 0;
1074 	vm_prot_t e_prot;
1075 	unsigned int last_timestamp;
1076 	char *name = "", *freename = NULL;
1077 	const char *l_map_str;
1078 	ino_t ino;
1079 	int ref_count, shadow_count, flags;
1080 	int error;
1081 	struct vnode *vp;
1082 	struct vattr vat;
1083 
1084 	PROC_LOCK(p);
1085 	error = p_candebug(td, p);
1086 	PROC_UNLOCK(p);
1087 	if (error)
1088 		return (error);
1089 
1090 	if (uio->uio_rw != UIO_READ)
1091 		return (EOPNOTSUPP);
1092 
1093 	error = 0;
1094 	vm = vmspace_acquire_ref(p);
1095 	if (vm == NULL)
1096 		return (ESRCH);
1097 
1098 	if (SV_CURPROC_FLAG(SV_LP64))
1099 		l_map_str = l64_map_str;
1100 	else
1101 		l_map_str = l32_map_str;
1102 	map = &vm->vm_map;
1103 	vm_map_lock_read(map);
1104 	for (entry = map->header.next; entry != &map->header;
1105 	    entry = entry->next) {
1106 		name = "";
1107 		freename = NULL;
1108 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
1109 			continue;
1110 		e_prot = entry->protection;
1111 		e_start = entry->start;
1112 		e_end = entry->end;
1113 		obj = entry->object.vm_object;
1114 		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
1115 			VM_OBJECT_RLOCK(tobj);
1116 			if (lobj != obj)
1117 				VM_OBJECT_RUNLOCK(lobj);
1118 			lobj = tobj;
1119 		}
1120 		last_timestamp = map->timestamp;
1121 		vm_map_unlock_read(map);
1122 		ino = 0;
1123 		if (lobj) {
1124 			off = IDX_TO_OFF(lobj->size);
1125 			vp = vm_object_vnode(lobj);
1126 			if (vp != NULL)
1127 				vref(vp);
1128 			if (lobj != obj)
1129 				VM_OBJECT_RUNLOCK(lobj);
1130 			flags = obj->flags;
1131 			ref_count = obj->ref_count;
1132 			shadow_count = obj->shadow_count;
1133 			VM_OBJECT_RUNLOCK(obj);
1134 			if (vp != NULL) {
1135 				vn_fullpath(td, vp, &name, &freename);
1136 				vn_lock(vp, LK_SHARED | LK_RETRY);
1137 				VOP_GETATTR(vp, &vat, td->td_ucred);
1138 				ino = vat.va_fileid;
1139 				vput(vp);
1140 			} else if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
1141 				if (e_start == p->p_sysent->sv_shared_page_base)
1142 					name = vdso_str;
1143 				if (e_end == p->p_sysent->sv_usrstack)
1144 					name = stack_str;
1145 			}
1146 		} else {
1147 			flags = 0;
1148 			ref_count = 0;
1149 			shadow_count = 0;
1150 		}
1151 
1152 		/*
1153 		 * format:
1154 		 *  start, end, access, offset, major, minor, inode, name.
1155 		 */
1156 		error = sbuf_printf(sb, l_map_str,
1157 		    (u_long)e_start, (u_long)e_end,
1158 		    (e_prot & VM_PROT_READ)?"r":"-",
1159 		    (e_prot & VM_PROT_WRITE)?"w":"-",
1160 		    (e_prot & VM_PROT_EXECUTE)?"x":"-",
1161 		    "p",
1162 		    (u_long)off,
1163 		    0,
1164 		    0,
1165 		    (u_long)ino,
1166 		    *name ? "     " : "",
1167 		    name
1168 		    );
1169 		if (freename)
1170 			free(freename, M_TEMP);
1171 		vm_map_lock_read(map);
1172 		if (error == -1) {
1173 			error = 0;
1174 			break;
1175 		}
1176 		if (last_timestamp != map->timestamp) {
1177 			/*
1178 			 * Look again for the entry because the map was
1179 			 * modified while it was unlocked.  Specifically,
1180 			 * the entry may have been clipped, merged, or deleted.
1181 			 */
1182 			vm_map_lookup_entry(map, e_end - 1, &tmp_entry);
1183 			entry = tmp_entry;
1184 		}
1185 	}
1186 	vm_map_unlock_read(map);
1187 	vmspace_free(vm);
1188 
1189 	return (error);
1190 }
1191 
1192 /*
1193  * Criteria for interface name translation
1194  */
1195 #define IFP_IS_ETH(ifp) (ifp->if_type == IFT_ETHER)
1196 
1197 static int
1198 linux_ifname(struct ifnet *ifp, char *buffer, size_t buflen)
1199 {
1200 	struct ifnet *ifscan;
1201 	int ethno;
1202 
1203 	IFNET_RLOCK_ASSERT();
1204 
1205 	/* Short-circuit non ethernet interfaces */
1206 	if (!IFP_IS_ETH(ifp))
1207 		return (strlcpy(buffer, ifp->if_xname, buflen));
1208 
1209 	/* Determine the (relative) unit number for ethernet interfaces */
1210 	ethno = 0;
1211 	CK_STAILQ_FOREACH(ifscan, &V_ifnet, if_link) {
1212 		if (ifscan == ifp)
1213 			return (snprintf(buffer, buflen, "eth%d", ethno));
1214 		if (IFP_IS_ETH(ifscan))
1215 			ethno++;
1216 	}
1217 
1218 	return (0);
1219 }
1220 
1221 /*
1222  * Filler function for proc/net/dev
1223  */
1224 static int
1225 linprocfs_donetdev(PFS_FILL_ARGS)
1226 {
1227 	char ifname[16]; /* XXX LINUX_IFNAMSIZ */
1228 	struct ifnet *ifp;
1229 
1230 	sbuf_printf(sb, "%6s|%58s|%s\n"
1231 	    "%6s|%58s|%58s\n",
1232 	    "Inter-", "   Receive", "  Transmit",
1233 	    " face",
1234 	    "bytes    packets errs drop fifo frame compressed multicast",
1235 	    "bytes    packets errs drop fifo colls carrier compressed");
1236 
1237 	CURVNET_SET(TD_TO_VNET(curthread));
1238 	IFNET_RLOCK();
1239 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1240 		linux_ifname(ifp, ifname, sizeof ifname);
1241 		sbuf_printf(sb, "%6.6s: ", ifname);
1242 		sbuf_printf(sb, "%7ju %7ju %4ju %4ju %4lu %5lu %10lu %9ju ",
1243 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IBYTES),
1244 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS),
1245 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IERRORS),
1246 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS),
1247 							/* rx_missed_errors */
1248 		    0UL,				/* rx_fifo_errors */
1249 		    0UL,				/* rx_length_errors +
1250 							 * rx_over_errors +
1251 							 * rx_crc_errors +
1252 							 * rx_frame_errors */
1253 		    0UL,				/* rx_compressed */
1254 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS));
1255 							/* XXX-BZ rx only? */
1256 		sbuf_printf(sb, "%8ju %7ju %4ju %4ju %4lu %5ju %7lu %10lu\n",
1257 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OBYTES),
1258 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS),
1259 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OERRORS),
1260 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS),
1261 		    0UL,				/* tx_fifo_errors */
1262 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS),
1263 		    0UL,				/* tx_carrier_errors +
1264 							 * tx_aborted_errors +
1265 							 * tx_window_errors +
1266 							 * tx_heartbeat_errors*/
1267 		    0UL);				/* tx_compressed */
1268 	}
1269 	IFNET_RUNLOCK();
1270 	CURVNET_RESTORE();
1271 
1272 	return (0);
1273 }
1274 
1275 /*
1276  * Filler function for proc/sys/kernel/osrelease
1277  */
1278 static int
1279 linprocfs_doosrelease(PFS_FILL_ARGS)
1280 {
1281 	char osrelease[LINUX_MAX_UTSNAME];
1282 
1283 	linux_get_osrelease(td, osrelease);
1284 	sbuf_printf(sb, "%s\n", osrelease);
1285 
1286 	return (0);
1287 }
1288 
1289 /*
1290  * Filler function for proc/sys/kernel/ostype
1291  */
1292 static int
1293 linprocfs_doostype(PFS_FILL_ARGS)
1294 {
1295 	char osname[LINUX_MAX_UTSNAME];
1296 
1297 	linux_get_osname(td, osname);
1298 	sbuf_printf(sb, "%s\n", osname);
1299 
1300 	return (0);
1301 }
1302 
1303 /*
1304  * Filler function for proc/sys/kernel/version
1305  */
1306 static int
1307 linprocfs_doosbuild(PFS_FILL_ARGS)
1308 {
1309 
1310 	linprocfs_osbuild(td, sb);
1311 	sbuf_cat(sb, "\n");
1312 	return (0);
1313 }
1314 
1315 /*
1316  * Filler function for proc/sys/kernel/msgmni
1317  */
1318 static int
1319 linprocfs_domsgmni(PFS_FILL_ARGS)
1320 {
1321 
1322 	sbuf_printf(sb, "%d\n", msginfo.msgmni);
1323 	return (0);
1324 }
1325 
1326 /*
1327  * Filler function for proc/sys/kernel/pid_max
1328  */
1329 static int
1330 linprocfs_dopid_max(PFS_FILL_ARGS)
1331 {
1332 
1333 	sbuf_printf(sb, "%i\n", PID_MAX);
1334 	return (0);
1335 }
1336 
1337 /*
1338  * Filler function for proc/sys/kernel/sem
1339  */
1340 static int
1341 linprocfs_dosem(PFS_FILL_ARGS)
1342 {
1343 
1344 	sbuf_printf(sb, "%d %d %d %d\n", seminfo.semmsl, seminfo.semmns,
1345 	    seminfo.semopm, seminfo.semmni);
1346 	return (0);
1347 }
1348 
1349 /*
1350  * Filler function for proc/sys/vm/min_free_kbytes
1351  *
1352  * This mirrors the approach in illumos to return zero for reads. Effectively,
1353  * it says, no memory is kept in reserve for "atomic allocations". This class
1354  * of allocation can be used at times when a thread cannot be suspended.
1355  */
1356 static int
1357 linprocfs_dominfree(PFS_FILL_ARGS)
1358 {
1359 
1360 	sbuf_printf(sb, "%d\n", 0);
1361 	return (0);
1362 }
1363 
1364 /*
1365  * Filler function for proc/scsi/device_info
1366  */
1367 static int
1368 linprocfs_doscsidevinfo(PFS_FILL_ARGS)
1369 {
1370 
1371 	return (0);
1372 }
1373 
1374 /*
1375  * Filler function for proc/scsi/scsi
1376  */
1377 static int
1378 linprocfs_doscsiscsi(PFS_FILL_ARGS)
1379 {
1380 
1381 	return (0);
1382 }
1383 
1384 /*
1385  * Filler function for proc/devices
1386  */
1387 static int
1388 linprocfs_dodevices(PFS_FILL_ARGS)
1389 {
1390 	char *char_devices;
1391 	sbuf_printf(sb, "Character devices:\n");
1392 
1393 	char_devices = linux_get_char_devices();
1394 	sbuf_printf(sb, "%s", char_devices);
1395 	linux_free_get_char_devices(char_devices);
1396 
1397 	sbuf_printf(sb, "\nBlock devices:\n");
1398 
1399 	return (0);
1400 }
1401 
1402 /*
1403  * Filler function for proc/cmdline
1404  */
1405 static int
1406 linprocfs_docmdline(PFS_FILL_ARGS)
1407 {
1408 
1409 	sbuf_printf(sb, "BOOT_IMAGE=%s", kernelname);
1410 	sbuf_printf(sb, " ro root=302\n");
1411 	return (0);
1412 }
1413 
1414 /*
1415  * Filler function for proc/filesystems
1416  */
1417 static int
1418 linprocfs_dofilesystems(PFS_FILL_ARGS)
1419 {
1420 	struct vfsconf *vfsp;
1421 
1422 	vfsconf_slock();
1423 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
1424 		if (vfsp->vfc_flags & VFCF_SYNTHETIC)
1425 			sbuf_printf(sb, "nodev");
1426 		sbuf_printf(sb, "\t%s\n", vfsp->vfc_name);
1427 	}
1428 	vfsconf_sunlock();
1429 	return(0);
1430 }
1431 
1432 #if 0
1433 /*
1434  * Filler function for proc/modules
1435  */
1436 static int
1437 linprocfs_domodules(PFS_FILL_ARGS)
1438 {
1439 	struct linker_file *lf;
1440 
1441 	TAILQ_FOREACH(lf, &linker_files, link) {
1442 		sbuf_printf(sb, "%-20s%8lu%4d\n", lf->filename,
1443 		    (unsigned long)lf->size, lf->refs);
1444 	}
1445 	return (0);
1446 }
1447 #endif
1448 
1449 /*
1450  * Filler function for proc/pid/fd
1451  */
1452 static int
1453 linprocfs_dofdescfs(PFS_FILL_ARGS)
1454 {
1455 
1456 	if (p == curproc)
1457 		sbuf_printf(sb, "/dev/fd");
1458 	else
1459 		sbuf_printf(sb, "unknown");
1460 	return (0);
1461 }
1462 
1463 /*
1464  * Filler function for proc/pid/limits
1465  */
1466 static const struct linux_rlimit_ident {
1467 	const char	*desc;
1468 	const char	*unit;
1469 	unsigned int	rlim_id;
1470 } linux_rlimits_ident[] = {
1471 	{ "Max cpu time",	"seconds",	RLIMIT_CPU },
1472 	{ "Max file size", 	"bytes",	RLIMIT_FSIZE },
1473 	{ "Max data size",	"bytes", 	RLIMIT_DATA },
1474 	{ "Max stack size",	"bytes", 	RLIMIT_STACK },
1475 	{ "Max core file size",  "bytes",	RLIMIT_CORE },
1476 	{ "Max resident set",	"bytes",	RLIMIT_RSS },
1477 	{ "Max processes",	"processes",	RLIMIT_NPROC },
1478 	{ "Max open files",	"files",	RLIMIT_NOFILE },
1479 	{ "Max locked memory",	"bytes",	RLIMIT_MEMLOCK },
1480 	{ "Max address space",	"bytes",	RLIMIT_AS },
1481 	{ "Max file locks",	"locks",	LINUX_RLIMIT_LOCKS },
1482 	{ "Max pending signals", "signals",	LINUX_RLIMIT_SIGPENDING },
1483 	{ "Max msgqueue size",	"bytes",	LINUX_RLIMIT_MSGQUEUE },
1484 	{ "Max nice priority", 		"",	LINUX_RLIMIT_NICE },
1485 	{ "Max realtime priority",	"",	LINUX_RLIMIT_RTPRIO },
1486 	{ "Max realtime timeout",	"us",	LINUX_RLIMIT_RTTIME },
1487 	{ 0, 0, 0 }
1488 };
1489 
1490 static int
1491 linprocfs_doproclimits(PFS_FILL_ARGS)
1492 {
1493 	const struct linux_rlimit_ident *li;
1494 	struct plimit *limp;
1495 	struct rlimit rl;
1496 	ssize_t size;
1497 	int res, error;
1498 
1499 	error = 0;
1500 
1501 	PROC_LOCK(p);
1502 	limp = lim_hold(p->p_limit);
1503 	PROC_UNLOCK(p);
1504 	size = sizeof(res);
1505 	sbuf_printf(sb, "%-26s%-21s%-21s%-21s\n", "Limit", "Soft Limit",
1506 			"Hard Limit", "Units");
1507 	for (li = linux_rlimits_ident; li->desc != NULL; ++li) {
1508 		switch (li->rlim_id)
1509 		{
1510 		case LINUX_RLIMIT_LOCKS:
1511 			/* FALLTHROUGH */
1512 		case LINUX_RLIMIT_RTTIME:
1513 			rl.rlim_cur = RLIM_INFINITY;
1514 			break;
1515 		case LINUX_RLIMIT_SIGPENDING:
1516 			error = kernel_sysctlbyname(td,
1517 			    "kern.sigqueue.max_pending_per_proc",
1518 			    &res, &size, 0, 0, 0, 0);
1519 			if (error != 0)
1520 				goto out;
1521 			rl.rlim_cur = res;
1522 			rl.rlim_max = res;
1523 			break;
1524 		case LINUX_RLIMIT_MSGQUEUE:
1525 			error = kernel_sysctlbyname(td,
1526 			    "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0);
1527 			if (error != 0)
1528 				goto out;
1529 			rl.rlim_cur = res;
1530 			rl.rlim_max = res;
1531 			break;
1532 		case LINUX_RLIMIT_NICE:
1533 			/* FALLTHROUGH */
1534 		case LINUX_RLIMIT_RTPRIO:
1535 			rl.rlim_cur = 0;
1536 			rl.rlim_max = 0;
1537 			break;
1538 		default:
1539 			rl = limp->pl_rlimit[li->rlim_id];
1540 			break;
1541 		}
1542 		if (rl.rlim_cur == RLIM_INFINITY)
1543 			sbuf_printf(sb, "%-26s%-21s%-21s%-10s\n",
1544 			    li->desc, "unlimited", "unlimited", li->unit);
1545 		else
1546 			sbuf_printf(sb, "%-26s%-21llu%-21llu%-10s\n",
1547 			    li->desc, (unsigned long long)rl.rlim_cur,
1548 			    (unsigned long long)rl.rlim_max, li->unit);
1549 	}
1550 out:
1551 	lim_free(limp);
1552 	return (error);
1553 }
1554 
1555 /*
1556  * Filler function for proc/sys/kernel/random/uuid
1557  */
1558 static int
1559 linprocfs_douuid(PFS_FILL_ARGS)
1560 {
1561 	struct uuid uuid;
1562 
1563 	kern_uuidgen(&uuid, 1);
1564 	sbuf_printf_uuid(sb, &uuid);
1565 	sbuf_printf(sb, "\n");
1566 	return(0);
1567 }
1568 
1569 /*
1570  * Filler function for proc/pid/auxv
1571  */
1572 static int
1573 linprocfs_doauxv(PFS_FILL_ARGS)
1574 {
1575 	struct sbuf *asb;
1576 	off_t buflen, resid;
1577 	int error;
1578 
1579 	/*
1580 	 * Mimic linux behavior and pass only processes with usermode
1581 	 * address space as valid. Return zero silently otherwise.
1582 	 */
1583 	if (p->p_vmspace == &vmspace0)
1584 		return (0);
1585 
1586 	if (uio->uio_resid == 0)
1587 		return (0);
1588 	if (uio->uio_offset < 0 || uio->uio_resid < 0)
1589 		return (EINVAL);
1590 
1591 	asb = sbuf_new_auto();
1592 	if (asb == NULL)
1593 		return (ENOMEM);
1594 	error = proc_getauxv(td, p, asb);
1595 	if (error == 0)
1596 		error = sbuf_finish(asb);
1597 
1598 	resid = sbuf_len(asb) - uio->uio_offset;
1599 	if (resid > uio->uio_resid)
1600 		buflen = uio->uio_resid;
1601 	else
1602 		buflen = resid;
1603 	if (buflen > IOSIZE_MAX)
1604 		return (EINVAL);
1605 	if (buflen > MAXPHYS)
1606 		buflen = MAXPHYS;
1607 	if (resid <= 0)
1608 		return (0);
1609 
1610 	if (error == 0)
1611 		error = uiomove(sbuf_data(asb) + uio->uio_offset, buflen, uio);
1612 	sbuf_delete(asb);
1613 	return (error);
1614 }
1615 
1616 /*
1617  * Constructor
1618  */
1619 static int
1620 linprocfs_init(PFS_INIT_ARGS)
1621 {
1622 	struct pfs_node *root;
1623 	struct pfs_node *dir;
1624 	struct pfs_node *sys;
1625 
1626 	root = pi->pi_root;
1627 
1628 	/* /proc/... */
1629 	pfs_create_file(root, "cmdline", &linprocfs_docmdline,
1630 	    NULL, NULL, NULL, PFS_RD);
1631 	pfs_create_file(root, "cpuinfo", &linprocfs_docpuinfo,
1632 	    NULL, NULL, NULL, PFS_RD);
1633 	pfs_create_file(root, "devices", &linprocfs_dodevices,
1634 	    NULL, NULL, NULL, PFS_RD);
1635 	pfs_create_file(root, "filesystems", &linprocfs_dofilesystems,
1636 	    NULL, NULL, NULL, PFS_RD);
1637 	pfs_create_file(root, "loadavg", &linprocfs_doloadavg,
1638 	    NULL, NULL, NULL, PFS_RD);
1639 	pfs_create_file(root, "meminfo", &linprocfs_domeminfo,
1640 	    NULL, NULL, NULL, PFS_RD);
1641 #if 0
1642 	pfs_create_file(root, "modules", &linprocfs_domodules,
1643 	    NULL, NULL, NULL, PFS_RD);
1644 #endif
1645 	pfs_create_file(root, "mounts", &linprocfs_domtab,
1646 	    NULL, NULL, NULL, PFS_RD);
1647 	pfs_create_file(root, "mtab", &linprocfs_domtab,
1648 	    NULL, NULL, NULL, PFS_RD);
1649 	pfs_create_file(root, "partitions", &linprocfs_dopartitions,
1650 	    NULL, NULL, NULL, PFS_RD);
1651 	pfs_create_link(root, "self", &procfs_docurproc,
1652 	    NULL, NULL, NULL, 0);
1653 	pfs_create_file(root, "stat", &linprocfs_dostat,
1654 	    NULL, NULL, NULL, PFS_RD);
1655 	pfs_create_file(root, "swaps", &linprocfs_doswaps,
1656 	    NULL, NULL, NULL, PFS_RD);
1657 	pfs_create_file(root, "uptime", &linprocfs_douptime,
1658 	    NULL, NULL, NULL, PFS_RD);
1659 	pfs_create_file(root, "version", &linprocfs_doversion,
1660 	    NULL, NULL, NULL, PFS_RD);
1661 
1662 	/* /proc/net/... */
1663 	dir = pfs_create_dir(root, "net", NULL, NULL, NULL, 0);
1664 	pfs_create_file(dir, "dev", &linprocfs_donetdev,
1665 	    NULL, NULL, NULL, PFS_RD);
1666 
1667 	/* /proc/<pid>/... */
1668 	dir = pfs_create_dir(root, "pid", NULL, NULL, NULL, PFS_PROCDEP);
1669 	pfs_create_file(dir, "cmdline", &linprocfs_doproccmdline,
1670 	    NULL, NULL, NULL, PFS_RD);
1671 	pfs_create_link(dir, "cwd", &linprocfs_doproccwd,
1672 	    NULL, NULL, NULL, 0);
1673 	pfs_create_file(dir, "environ", &linprocfs_doprocenviron,
1674 	    NULL, &procfs_candebug, NULL, PFS_RD);
1675 	pfs_create_link(dir, "exe", &procfs_doprocfile,
1676 	    NULL, &procfs_notsystem, NULL, 0);
1677 	pfs_create_file(dir, "maps", &linprocfs_doprocmaps,
1678 	    NULL, NULL, NULL, PFS_RD);
1679 	pfs_create_file(dir, "mem", &procfs_doprocmem,
1680 	    procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
1681 	pfs_create_file(dir, "mounts", &linprocfs_domtab,
1682 	    NULL, NULL, NULL, PFS_RD);
1683 	pfs_create_link(dir, "root", &linprocfs_doprocroot,
1684 	    NULL, NULL, NULL, 0);
1685 	pfs_create_file(dir, "stat", &linprocfs_doprocstat,
1686 	    NULL, NULL, NULL, PFS_RD);
1687 	pfs_create_file(dir, "statm", &linprocfs_doprocstatm,
1688 	    NULL, NULL, NULL, PFS_RD);
1689 	pfs_create_file(dir, "status", &linprocfs_doprocstatus,
1690 	    NULL, NULL, NULL, PFS_RD);
1691 	pfs_create_link(dir, "fd", &linprocfs_dofdescfs,
1692 	    NULL, NULL, NULL, 0);
1693 	pfs_create_file(dir, "auxv", &linprocfs_doauxv,
1694 	    NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD);
1695 	pfs_create_file(dir, "limits", &linprocfs_doproclimits,
1696 	    NULL, NULL, NULL, PFS_RD);
1697 
1698 	/* /proc/scsi/... */
1699 	dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0);
1700 	pfs_create_file(dir, "device_info", &linprocfs_doscsidevinfo,
1701 	    NULL, NULL, NULL, PFS_RD);
1702 	pfs_create_file(dir, "scsi", &linprocfs_doscsiscsi,
1703 	    NULL, NULL, NULL, PFS_RD);
1704 
1705 	/* /proc/sys/... */
1706 	sys = pfs_create_dir(root, "sys", NULL, NULL, NULL, 0);
1707 	/* /proc/sys/kernel/... */
1708 	dir = pfs_create_dir(sys, "kernel", NULL, NULL, NULL, 0);
1709 	pfs_create_file(dir, "osrelease", &linprocfs_doosrelease,
1710 	    NULL, NULL, NULL, PFS_RD);
1711 	pfs_create_file(dir, "ostype", &linprocfs_doostype,
1712 	    NULL, NULL, NULL, PFS_RD);
1713 	pfs_create_file(dir, "version", &linprocfs_doosbuild,
1714 	    NULL, NULL, NULL, PFS_RD);
1715 	pfs_create_file(dir, "msgmni", &linprocfs_domsgmni,
1716 	    NULL, NULL, NULL, PFS_RD);
1717 	pfs_create_file(dir, "pid_max", &linprocfs_dopid_max,
1718 	    NULL, NULL, NULL, PFS_RD);
1719 	pfs_create_file(dir, "sem", &linprocfs_dosem,
1720 	    NULL, NULL, NULL, PFS_RD);
1721 
1722 	/* /proc/sys/kernel/random/... */
1723 	dir = pfs_create_dir(dir, "random", NULL, NULL, NULL, 0);
1724 	pfs_create_file(dir, "uuid", &linprocfs_douuid,
1725 	    NULL, NULL, NULL, PFS_RD);
1726 
1727 	/* /proc/sys/vm/.... */
1728 	dir = pfs_create_dir(sys, "vm", NULL, NULL, NULL, 0);
1729 	pfs_create_file(dir, "min_free_kbytes", &linprocfs_dominfree,
1730 	    NULL, NULL, NULL, PFS_RD);
1731 
1732 	return (0);
1733 }
1734 
1735 /*
1736  * Destructor
1737  */
1738 static int
1739 linprocfs_uninit(PFS_INIT_ARGS)
1740 {
1741 
1742 	/* nothing to do, pseudofs will GC */
1743 	return (0);
1744 }
1745 
1746 PSEUDOFS(linprocfs, 1, VFCF_JAIL);
1747 #if defined(__aarch64__) || defined(__amd64__)
1748 MODULE_DEPEND(linprocfs, linux_common, 1, 1, 1);
1749 #else
1750 MODULE_DEPEND(linprocfs, linux, 1, 1, 1);
1751 #endif
1752 MODULE_DEPEND(linprocfs, procfs, 1, 1, 1);
1753 MODULE_DEPEND(linprocfs, sysvmsg, 1, 1, 1);
1754 MODULE_DEPEND(linprocfs, sysvsem, 1, 1, 1);
1755