xref: /freebsd/sys/compat/linprocfs/linprocfs.c (revision 069ac184)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 2000 Dag-Erling Smørgrav
5  * Copyright (c) 1999 Pierre Beyssac
6  * Copyright (c) 1993 Jan-Simon Pendry
7  * Copyright (c) 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Jan-Simon Pendry.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  */
41 
42 #include "opt_inet.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/blist.h>
47 #include <sys/conf.h>
48 #include <sys/exec.h>
49 #include <sys/fcntl.h>
50 #include <sys/filedesc.h>
51 #include <sys/jail.h>
52 #include <sys/kernel.h>
53 #include <sys/limits.h>
54 #include <sys/linker.h>
55 #include <sys/lock.h>
56 #include <sys/malloc.h>
57 #include <sys/msg.h>
58 #include <sys/mutex.h>
59 #include <sys/namei.h>
60 #include <sys/proc.h>
61 #include <sys/ptrace.h>
62 #include <sys/queue.h>
63 #include <sys/resourcevar.h>
64 #include <sys/resource.h>
65 #include <sys/sbuf.h>
66 #include <sys/sem.h>
67 #include <sys/shm.h>
68 #include <sys/smp.h>
69 #include <sys/socket.h>
70 #include <sys/syscallsubr.h>
71 #include <sys/sysctl.h>
72 #include <sys/sysent.h>
73 #include <sys/time.h>
74 #include <sys/tty.h>
75 #include <sys/user.h>
76 #include <sys/uuid.h>
77 #include <sys/vmmeter.h>
78 #include <sys/vnode.h>
79 #include <sys/bus.h>
80 #include <sys/uio.h>
81 
82 #include <net/if.h>
83 #include <net/if_var.h>
84 #include <net/if_types.h>
85 
86 #include <net/route.h>
87 #include <net/route/nhop.h>
88 #include <net/route/route_ctl.h>
89 
90 #include <vm/vm.h>
91 #include <vm/vm_extern.h>
92 #include <vm/pmap.h>
93 #include <vm/vm_map.h>
94 #include <vm/vm_param.h>
95 #include <vm/vm_object.h>
96 #include <vm/swap_pager.h>
97 
98 #include <machine/clock.h>
99 
100 #include <geom/geom.h>
101 #include <geom/geom_int.h>
102 
103 #if defined(__i386__) || defined(__amd64__)
104 #include <machine/cputypes.h>
105 #include <machine/md_var.h>
106 #endif /* __i386__ || __amd64__ */
107 
108 #include <compat/linux/linux.h>
109 #include <compat/linux/linux_common.h>
110 #include <compat/linux/linux_emul.h>
111 #include <compat/linux/linux_mib.h>
112 #include <compat/linux/linux_misc.h>
113 #include <compat/linux/linux_util.h>
114 #include <fs/pseudofs/pseudofs.h>
115 #include <fs/procfs/procfs.h>
116 
117 /*
118  * Various conversion macros
119  */
120 #define T2J(x) ((long)(((x) * 100ULL) / (stathz ? stathz : hz)))	/* ticks to jiffies */
121 #define T2CS(x) ((unsigned long)(((x) * 100ULL) / (stathz ? stathz : hz)))	/* ticks to centiseconds */
122 #define T2S(x) ((x) / (stathz ? stathz : hz))		/* ticks to seconds */
123 #define B2K(x) ((x) >> 10)				/* bytes to kbytes */
124 #define B2P(x) ((x) >> PAGE_SHIFT)			/* bytes to pages */
125 #define P2B(x) ((x) << PAGE_SHIFT)			/* pages to bytes */
126 #define P2K(x) ((x) << (PAGE_SHIFT - 10))		/* pages to kbytes */
127 #define TV2J(x)	((x)->tv_sec * 100UL + (x)->tv_usec / 10000)
128 
129 /**
130  * @brief Mapping of ki_stat in struct kinfo_proc to the linux state
131  *
132  * The linux procfs state field displays one of the characters RSDZTW to
133  * denote running, sleeping in an interruptible wait, waiting in an
134  * uninterruptible disk sleep, a zombie process, process is being traced
135  * or stopped, or process is paging respectively.
136  *
137  * Our struct kinfo_proc contains the variable ki_stat which contains a
138  * value out of SIDL, SRUN, SSLEEP, SSTOP, SZOMB, SWAIT and SLOCK.
139  *
140  * This character array is used with ki_stati-1 as an index and tries to
141  * map our states to suitable linux states.
142  */
143 static char linux_state[] = "RRSTZDD";
144 
145 /*
146  * Filler function for proc/meminfo
147  */
148 static int
149 linprocfs_domeminfo(PFS_FILL_ARGS)
150 {
151 	unsigned long memtotal;		/* total memory in bytes */
152 	unsigned long memfree;		/* free memory in bytes */
153 	unsigned long cached;		/* page cache */
154 	unsigned long buffers;		/* buffer cache */
155 	unsigned long long swaptotal;	/* total swap space in bytes */
156 	unsigned long long swapused;	/* used swap space in bytes */
157 	unsigned long long swapfree;	/* free swap space in bytes */
158 	size_t sz;
159 	int error, i, j;
160 
161 	memtotal = physmem * PAGE_SIZE;
162 	memfree = (unsigned long)vm_free_count() * PAGE_SIZE;
163 	swap_pager_status(&i, &j);
164 	swaptotal = (unsigned long long)i * PAGE_SIZE;
165 	swapused = (unsigned long long)j * PAGE_SIZE;
166 	swapfree = swaptotal - swapused;
167 
168 	/*
169 	 * This value may exclude wired pages, but we have no good way of
170 	 * accounting for that.
171 	 */
172 	cached =
173 	    (vm_active_count() + vm_inactive_count() + vm_laundry_count()) *
174 	    PAGE_SIZE;
175 
176 	sz = sizeof(buffers);
177 	error = kernel_sysctlbyname(curthread, "vfs.bufspace", &buffers, &sz,
178 	    NULL, 0, 0, 0);
179 	if (error != 0)
180 		buffers = 0;
181 
182 	sbuf_printf(sb,
183 	    "MemTotal: %9lu kB\n"
184 	    "MemFree:  %9lu kB\n"
185 	    "Buffers:  %9lu kB\n"
186 	    "Cached:   %9lu kB\n"
187 	    "SwapTotal:%9llu kB\n"
188 	    "SwapFree: %9llu kB\n",
189 	    B2K(memtotal), B2K(memfree), B2K(buffers),
190 	    B2K(cached), B2K(swaptotal), B2K(swapfree));
191 
192 	return (0);
193 }
194 
195 #if defined(__i386__) || defined(__amd64__)
196 /*
197  * Filler function for proc/cpuinfo (i386 & amd64 version)
198  */
199 static int
200 linprocfs_docpuinfo(PFS_FILL_ARGS)
201 {
202 	uint64_t freq;
203 	u_int cache_size[4];
204 	u_int regs[4] = { 0 };
205 	int fqmhz, fqkhz;
206 	int i, j;
207 
208 	/*
209 	 * We default the flags to include all non-conflicting flags,
210 	 * and the Intel versions of conflicting flags.
211 	 */
212 	static char *cpu_feature_names[] = {
213 		/*  0 */ "fpu", "vme", "de", "pse",
214 		/*  4 */ "tsc", "msr", "pae", "mce",
215 		/*  8 */ "cx8", "apic", "", "sep",
216 		/* 12 */ "mtrr", "pge", "mca", "cmov",
217 		/* 16 */ "pat", "pse36", "pn", "clflush",
218 		/* 20 */ "", "dts", "acpi", "mmx",
219 		/* 24 */ "fxsr", "sse", "sse2", "ss",
220 		/* 28 */ "ht", "tm", "ia64", "pbe"
221 	};
222 
223 	static char *amd_feature_names[] = {
224 		/*  0 */ "", "", "", "",
225 		/*  4 */ "", "", "", "",
226 		/*  8 */ "", "", "", "syscall",
227 		/* 12 */ "", "", "", "",
228 		/* 16 */ "", "", "", "mp",
229 		/* 20 */ "nx", "", "mmxext", "",
230 		/* 24 */ "", "fxsr_opt", "pdpe1gb", "rdtscp",
231 		/* 28 */ "", "lm", "3dnowext", "3dnow"
232 	};
233 
234 	static char *cpu_feature2_names[] = {
235 		/*  0 */ "pni", "pclmulqdq", "dtes64", "monitor",
236 		/*  4 */ "ds_cpl", "vmx", "smx", "est",
237 		/*  8 */ "tm2", "ssse3", "cid", "sdbg",
238 		/* 12 */ "fma", "cx16", "xtpr", "pdcm",
239 		/* 16 */ "", "pcid", "dca", "sse4_1",
240 		/* 20 */ "sse4_2", "x2apic", "movbe", "popcnt",
241 		/* 24 */ "tsc_deadline_timer", "aes", "xsave", "",
242 		/* 28 */ "avx", "f16c", "rdrand", "hypervisor"
243 	};
244 
245 	static char *amd_feature2_names[] = {
246 		/*  0 */ "lahf_lm", "cmp_legacy", "svm", "extapic",
247 		/*  4 */ "cr8_legacy", "abm", "sse4a", "misalignsse",
248 		/*  8 */ "3dnowprefetch", "osvw", "ibs", "xop",
249 		/* 12 */ "skinit", "wdt", "", "lwp",
250 		/* 16 */ "fma4", "tce", "", "nodeid_msr",
251 		/* 20 */ "", "tbm", "topoext", "perfctr_core",
252 		/* 24 */ "perfctr_nb", "", "bpext", "ptsc",
253 		/* 28 */ "perfctr_llc", "mwaitx", "", ""
254 	};
255 
256 	static char *cpu_stdext_feature_names[] = {
257 		/*  0 */ "fsgsbase", "tsc_adjust", "sgx", "bmi1",
258 		/*  4 */ "hle", "avx2", "", "smep",
259 		/*  8 */ "bmi2", "erms", "invpcid", "rtm",
260 		/* 12 */ "cqm", "", "mpx", "rdt_a",
261 		/* 16 */ "avx512f", "avx512dq", "rdseed", "adx",
262 		/* 20 */ "smap", "avx512ifma", "", "clflushopt",
263 		/* 24 */ "clwb", "intel_pt", "avx512pf", "avx512er",
264 		/* 28 */ "avx512cd", "sha_ni", "avx512bw", "avx512vl"
265 	};
266 
267 	static char *cpu_stdext_feature2_names[] = {
268 		/*  0 */ "prefetchwt1", "avx512vbmi", "umip", "pku",
269 		/*  4 */ "ospke", "waitpkg", "avx512_vbmi2", "",
270 		/*  8 */ "gfni", "vaes", "vpclmulqdq", "avx512_vnni",
271 		/* 12 */ "avx512_bitalg", "", "avx512_vpopcntdq", "",
272 		/* 16 */ "", "", "", "",
273 		/* 20 */ "", "", "rdpid", "",
274 		/* 24 */ "", "cldemote", "", "movdiri",
275 		/* 28 */ "movdir64b", "enqcmd", "sgx_lc", ""
276 	};
277 
278 	static char *cpu_stdext_feature3_names[] = {
279 		/*  0 */ "", "", "avx512_4vnniw", "avx512_4fmaps",
280 		/*  4 */ "fsrm", "", "", "",
281 		/*  8 */ "avx512_vp2intersect", "", "md_clear", "",
282 		/* 12 */ "", "", "", "",
283 		/* 16 */ "", "", "pconfig", "",
284 		/* 20 */ "", "", "", "",
285 		/* 24 */ "", "", "ibrs", "stibp",
286 		/* 28 */ "flush_l1d", "arch_capabilities", "core_capabilities", "ssbd"
287 	};
288 
289 	static char *cpu_stdext_feature_l1_names[] = {
290 		/*  0 */ "xsaveopt", "xsavec", "xgetbv1", "xsaves",
291 		/*  4 */ "xfd"
292 	};
293 
294 	static char *power_flags[] = {
295 		"ts",           "fid",          "vid",
296 		"ttp",          "tm",           "stc",
297 		"100mhzsteps",  "hwpstate",     "",
298 		"cpb",          "eff_freq_ro",  "proc_feedback",
299 		"acc_power",
300 	};
301 
302 #ifdef __i386__
303 	switch (cpu_vendor_id) {
304 	case CPU_VENDOR_AMD:
305 		if (cpu_class < CPUCLASS_686)
306 			cpu_feature_names[16] = "fcmov";
307 		break;
308 	case CPU_VENDOR_CYRIX:
309 		cpu_feature_names[24] = "cxmmx";
310 		break;
311 	}
312 #endif
313 	if (cpu_exthigh >= 0x80000006)
314 		do_cpuid(0x80000006, cache_size);
315 	else
316 		memset(cache_size, 0, sizeof(cache_size));
317 	for (i = 0; i < mp_ncpus; ++i) {
318 		fqmhz = 0;
319 		fqkhz = 0;
320 		freq = atomic_load_acq_64(&tsc_freq);
321 		if (freq != 0) {
322 			fqmhz = (freq + 4999) / 1000000;
323 			fqkhz = ((freq + 4999) / 10000) % 100;
324 		}
325 		sbuf_printf(sb,
326 		    "processor\t: %d\n"
327 		    "vendor_id\t: %.20s\n"
328 		    "cpu family\t: %u\n"
329 		    "model\t\t: %u\n"
330 		    "model name\t: %s\n"
331 		    "stepping\t: %u\n"
332 		    "cpu MHz\t\t: %d.%02d\n"
333 		    "cache size\t: %d KB\n"
334 		    "physical id\t: %d\n"
335 		    "siblings\t: %d\n"
336 		    "core id\t\t: %d\n"
337 		    "cpu cores\t: %d\n"
338 		    "apicid\t\t: %d\n"
339 		    "initial apicid\t: %d\n"
340 		    "fpu\t\t: %s\n"
341 		    "fpu_exception\t: %s\n"
342 		    "cpuid level\t: %d\n"
343 		    "wp\t\t: %s\n",
344 		    i, cpu_vendor, CPUID_TO_FAMILY(cpu_id),
345 		    CPUID_TO_MODEL(cpu_id), cpu_model, cpu_id & CPUID_STEPPING,
346 		    fqmhz, fqkhz,
347 		    (cache_size[2] >> 16), 0, mp_ncpus, i, mp_ncpus,
348 		    i, i, /*cpu_id & CPUID_LOCAL_APIC_ID ??*/
349 		    (cpu_feature & CPUID_FPU) ? "yes" : "no", "yes",
350 		    CPUID_TO_FAMILY(cpu_id), "yes");
351 		sbuf_cat(sb, "flags\t\t:");
352 		for (j = 0; j < nitems(cpu_feature_names); j++)
353 			if (cpu_feature & (1 << j) &&
354 			    cpu_feature_names[j][0] != '\0')
355 				sbuf_printf(sb, " %s", cpu_feature_names[j]);
356 		for (j = 0; j < nitems(amd_feature_names); j++)
357 			if (amd_feature & (1 << j) &&
358 			    amd_feature_names[j][0] != '\0')
359 				sbuf_printf(sb, " %s", amd_feature_names[j]);
360 		for (j = 0; j < nitems(cpu_feature2_names); j++)
361 			if (cpu_feature2 & (1 << j) &&
362 			    cpu_feature2_names[j][0] != '\0')
363 				sbuf_printf(sb, " %s", cpu_feature2_names[j]);
364 		for (j = 0; j < nitems(amd_feature2_names); j++)
365 			if (amd_feature2 & (1 << j) &&
366 			    amd_feature2_names[j][0] != '\0')
367 				sbuf_printf(sb, " %s", amd_feature2_names[j]);
368 		for (j = 0; j < nitems(cpu_stdext_feature_names); j++)
369 			if (cpu_stdext_feature & (1 << j) &&
370 			    cpu_stdext_feature_names[j][0] != '\0')
371 				sbuf_printf(sb, " %s",
372 				    cpu_stdext_feature_names[j]);
373 		if (tsc_is_invariant)
374 			sbuf_cat(sb, " constant_tsc");
375 		for (j = 0; j < nitems(cpu_stdext_feature2_names); j++)
376 			if (cpu_stdext_feature2 & (1 << j) &&
377 			    cpu_stdext_feature2_names[j][0] != '\0')
378 				sbuf_printf(sb, " %s",
379 				    cpu_stdext_feature2_names[j]);
380 		for (j = 0; j < nitems(cpu_stdext_feature3_names); j++)
381 			if (cpu_stdext_feature3 & (1 << j) &&
382 			    cpu_stdext_feature3_names[j][0] != '\0')
383 				sbuf_printf(sb, " %s",
384 				    cpu_stdext_feature3_names[j]);
385 		if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
386 			cpuid_count(0xd, 0x1, regs);
387 			for (j = 0; j < nitems(cpu_stdext_feature_l1_names); j++)
388 				if (regs[0] & (1 << j) &&
389 				    cpu_stdext_feature_l1_names[j][0] != '\0')
390 					sbuf_printf(sb, " %s",
391 					    cpu_stdext_feature_l1_names[j]);
392 		}
393 		sbuf_cat(sb, "\n");
394 		sbuf_printf(sb,
395 		    "bugs\t\t: %s\n"
396 		    "bogomips\t: %d.%02d\n"
397 		    "clflush size\t: %d\n"
398 		    "cache_alignment\t: %d\n"
399 		    "address sizes\t: %d bits physical, %d bits virtual\n",
400 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
401 		    (has_f00f_bug) ? "Intel F00F" : "",
402 #else
403 		    "",
404 #endif
405 		    fqmhz * 2, fqkhz,
406 		    cpu_clflush_line_size, cpu_clflush_line_size,
407 		    cpu_maxphyaddr,
408 		    (cpu_maxphyaddr > 32) ? 48 : 0);
409 		sbuf_cat(sb, "power management: ");
410 		for (j = 0; j < nitems(power_flags); j++)
411 			if (amd_pminfo & (1 << j))
412 				sbuf_printf(sb, " %s", power_flags[j]);
413 		sbuf_cat(sb, "\n\n");
414 
415 		/* XXX per-cpu vendor / class / model / id? */
416 	}
417 	sbuf_cat(sb, "\n");
418 
419 	return (0);
420 }
421 #else
422 /* ARM64TODO: implement non-stubbed linprocfs_docpuinfo */
423 static int
424 linprocfs_docpuinfo(PFS_FILL_ARGS)
425 {
426 	int i;
427 
428 	for (i = 0; i < mp_ncpus; ++i) {
429 		sbuf_printf(sb,
430 		    "processor\t: %d\n"
431 		    "BogoMIPS\t: %d.%02d\n",
432 		    i, 0, 0);
433 		sbuf_cat(sb, "Features\t: ");
434 		sbuf_cat(sb, "\n");
435 		sbuf_printf(sb,
436 		    "CPU implementer\t: \n"
437 		    "CPU architecture: \n"
438 		    "CPU variant\t: 0x%x\n"
439 		    "CPU part\t: 0x%x\n"
440 		    "CPU revision\t: %d\n",
441 		    0, 0, 0);
442 		sbuf_cat(sb, "\n");
443 	}
444 
445 	return (0);
446 }
447 #endif /* __i386__ || __amd64__ */
448 
449 static const char *path_slash_sys = "/sys";
450 static const char *fstype_sysfs = "sysfs";
451 
452 static int
453 _mtab_helper(const struct pfs_node *pn, const struct statfs *sp,
454     const char **mntfrom, const char **mntto, const char **fstype)
455 {
456 	/* determine device name */
457 	*mntfrom = sp->f_mntfromname;
458 
459 	/* determine mount point */
460 	*mntto = sp->f_mntonname;
461 
462 	/* determine fs type */
463 	*fstype = sp->f_fstypename;
464 	if (strcmp(*fstype, pn->pn_info->pi_name) == 0)
465 		*mntfrom = *fstype = "proc";
466 	else if (strcmp(*fstype, "procfs") == 0)
467 		return (ECANCELED);
468 
469 	if (strcmp(*fstype, "autofs") == 0) {
470 		/*
471 		 * FreeBSD uses eg "map -hosts", whereas Linux
472 		 * expects just "-hosts".
473 		 */
474 		if (strncmp(*mntfrom, "map ", 4) == 0)
475 			*mntfrom += 4;
476 	}
477 
478 	if (strcmp(*fstype, "linsysfs") == 0) {
479 		*mntfrom = path_slash_sys;
480 		*fstype = fstype_sysfs;
481 	} else {
482 		/* For Linux msdosfs is called vfat */
483 		if (strcmp(*fstype, "msdosfs") == 0)
484 			*fstype = "vfat";
485 	}
486 	return (0);
487 }
488 
489 static void
490 _sbuf_mntoptions_helper(struct sbuf *sb, uint64_t f_flags)
491 {
492 	sbuf_cat(sb, (f_flags & MNT_RDONLY) ? "ro" : "rw");
493 #define ADD_OPTION(opt, name) \
494 	if (f_flags & (opt)) sbuf_cat(sb, "," name);
495 	ADD_OPTION(MNT_SYNCHRONOUS,	"sync");
496 	ADD_OPTION(MNT_NOEXEC,		"noexec");
497 	ADD_OPTION(MNT_NOSUID,		"nosuid");
498 	ADD_OPTION(MNT_UNION,		"union");
499 	ADD_OPTION(MNT_ASYNC,		"async");
500 	ADD_OPTION(MNT_SUIDDIR,		"suiddir");
501 	ADD_OPTION(MNT_NOSYMFOLLOW,	"nosymfollow");
502 	ADD_OPTION(MNT_NOATIME,		"noatime");
503 #undef ADD_OPTION
504 }
505 
506 /*
507  * Filler function for proc/mtab and proc/<pid>/mounts.
508  *
509  * /proc/mtab doesn't exist in Linux' procfs, but is included here so
510  * users can symlink /compat/linux/etc/mtab to /proc/mtab
511  */
512 static int
513 linprocfs_domtab(PFS_FILL_ARGS)
514 {
515 	const char *mntto, *mntfrom, *fstype;
516 	char *dlep, *flep;
517 	struct vnode *vp;
518 	struct pwd *pwd;
519 	size_t lep_len;
520 	int error;
521 	struct statfs *buf, *sp;
522 	size_t count;
523 
524 	/*
525 	 * Resolve emulation tree prefix
526 	 */
527 	flep = NULL;
528 	pwd = pwd_hold(td);
529 	vp = pwd->pwd_adir;
530 	error = vn_fullpath_global(vp, &dlep, &flep);
531 	pwd_drop(pwd);
532 	if (error != 0)
533 		return (error);
534 	lep_len = strlen(dlep);
535 
536 	buf = NULL;
537 	error = kern_getfsstat(td, &buf, SIZE_T_MAX, &count,
538 	    UIO_SYSSPACE, MNT_WAIT);
539 	if (error != 0) {
540 		free(buf, M_TEMP);
541 		free(flep, M_TEMP);
542 		return (error);
543 	}
544 
545 	for (sp = buf; count > 0; sp++, count--) {
546 		error = _mtab_helper(pn, sp, &mntfrom, &mntto, &fstype);
547 		if (error != 0) {
548 			MPASS(error == ECANCELED);
549 			continue;
550 		}
551 
552 		/* determine mount point */
553 		if (strncmp(mntto, dlep, lep_len) == 0 && mntto[lep_len] == '/')
554 			mntto += lep_len;
555 
556 		sbuf_printf(sb, "%s %s %s ", mntfrom, mntto, fstype);
557 		_sbuf_mntoptions_helper(sb, sp->f_flags);
558 		/* a real Linux mtab will also show NFS options */
559 		sbuf_printf(sb, " 0 0\n");
560 	}
561 
562 	free(buf, M_TEMP);
563 	free(flep, M_TEMP);
564 	return (error);
565 }
566 
567 static int
568 linprocfs_doprocmountinfo(PFS_FILL_ARGS)
569 {
570 	const char *mntfrom, *mntto, *fstype;
571 	char *dlep, *flep;
572 	struct statfs *buf, *sp;
573 	size_t count, lep_len;
574 	struct vnode *vp;
575 	struct pwd *pwd;
576 	int error;
577 
578 	/*
579 	 * Resolve emulation tree prefix
580 	 */
581 	flep = NULL;
582 	pwd = pwd_hold(td);
583 	vp = pwd->pwd_adir;
584 	error = vn_fullpath_global(vp, &dlep, &flep);
585 	pwd_drop(pwd);
586 	if (error != 0)
587 		return (error);
588 	lep_len = strlen(dlep);
589 
590 	buf = NULL;
591 	error = kern_getfsstat(td, &buf, SIZE_T_MAX, &count,
592 	    UIO_SYSSPACE, MNT_WAIT);
593 	if (error != 0)
594 		goto out;
595 
596 	for (sp = buf; count > 0; sp++, count--) {
597 		error = _mtab_helper(pn, sp, &mntfrom, &mntto, &fstype);
598 		if (error != 0) {
599 			MPASS(error == ECANCELED);
600 			continue;
601 		}
602 
603 		if (strncmp(mntto, dlep, lep_len) == 0 && mntto[lep_len] == '/')
604 			mntto += lep_len;
605 #if 0
606 		/*
607 		 * If the prefix is a chroot, and this mountpoint is not under
608 		 * the prefix, we should skip it.  Leave it for now for
609 		 * consistency with procmtab above.
610 		 */
611 		else
612 			continue;
613 #endif
614 
615 		/*
616 		 * (1) mount id
617 		 *
618 		 * (2) parent mount id -- we don't have this cheaply, so
619 		 * provide a dummy value
620 		 *
621 		 * (3) major:minor -- ditto
622 		 *
623 		 * (4) root filesystem mount -- probably a namespaces thing
624 		 *
625 		 * (5) mountto path
626 		 */
627 		sbuf_printf(sb, "%u 0 0:0 / %s ",
628 		    sp->f_fsid.val[0] ^ sp->f_fsid.val[1], mntto);
629 		/* (6) mount options */
630 		_sbuf_mntoptions_helper(sb, sp->f_flags);
631 		/*
632 		 * (7) zero or more optional fields -- again, namespace related
633 		 *
634 		 * (8) End of variable length fields separator ("-")
635 		 *
636 		 * (9) fstype
637 		 *
638 		 * (10) mount from
639 		 *
640 		 * (11) "superblock" options -- like (6), but different
641 		 * semantics in Linux
642 		 */
643 		sbuf_printf(sb, " - %s %s %s\n", fstype, mntfrom,
644 		    (sp->f_flags & MNT_RDONLY) ? "ro" : "rw");
645 	}
646 
647 	error = 0;
648 out:
649 	free(buf, M_TEMP);
650 	free(flep, M_TEMP);
651 	return (error);
652 }
653 
654 /*
655  * Filler function for proc/partitions
656  */
657 static int
658 linprocfs_dopartitions(PFS_FILL_ARGS)
659 {
660 	struct g_class *cp;
661 	struct g_geom *gp;
662 	struct g_provider *pp;
663 	int major, minor;
664 
665 	g_topology_lock();
666 	sbuf_printf(sb, "major minor  #blocks  name rio rmerge rsect "
667 	    "ruse wio wmerge wsect wuse running use aveq\n");
668 
669 	LIST_FOREACH(cp, &g_classes, class) {
670 		if (strcmp(cp->name, "DISK") == 0 ||
671 		    strcmp(cp->name, "PART") == 0)
672 			LIST_FOREACH(gp, &cp->geom, geom) {
673 				LIST_FOREACH(pp, &gp->provider, provider) {
674 					if (linux_driver_get_major_minor(
675 					    pp->name, &major, &minor) != 0) {
676 						major = 0;
677 						minor = 0;
678 					}
679 					sbuf_printf(sb, "%d %d %lld %s "
680 					    "%d %d %d %d %d "
681 					     "%d %d %d %d %d %d\n",
682 					     major, minor,
683 					     (long long)pp->mediasize, pp->name,
684 					     0, 0, 0, 0, 0,
685 					     0, 0, 0, 0, 0, 0);
686 				}
687 			}
688 	}
689 	g_topology_unlock();
690 
691 	return (0);
692 }
693 
694 /*
695  * Filler function for proc/stat
696  *
697  * Output depends on kernel version:
698  *
699  * v2.5.40 <=
700  *   user nice system idle
701  * v2.5.41
702  *   user nice system idle iowait
703  * v2.6.11
704  *   user nice system idle iowait irq softirq steal
705  * v2.6.24
706  *   user nice system idle iowait irq softirq steal guest
707  * v2.6.33 >=
708  *   user nice system idle iowait irq softirq steal guest guest_nice
709  */
710 static int
711 linprocfs_dostat(PFS_FILL_ARGS)
712 {
713 	struct pcpu *pcpu;
714 	long cp_time[CPUSTATES];
715 	long *cp;
716 	struct timeval boottime;
717 	int i;
718 	char *zero_pad;
719 	bool has_intr = true;
720 
721 	if (linux_kernver(td) >= LINUX_KERNVER(2,6,33)) {
722 		zero_pad = " 0 0 0 0\n";
723 	} else if (linux_kernver(td) >= LINUX_KERNVER(2,6,24)) {
724 		zero_pad = " 0 0 0\n";
725 	} else if (linux_kernver(td) >= LINUX_KERNVER(2,6,11)) {
726 		zero_pad = " 0 0\n";
727 	} else if (linux_kernver(td) >= LINUX_KERNVER(2,5,41)) {
728 		has_intr = false;
729 		zero_pad = " 0\n";
730 	} else {
731 		has_intr = false;
732 		zero_pad = "\n";
733 	}
734 
735 	read_cpu_time(cp_time);
736 	getboottime(&boottime);
737 	/* Parameters common to all versions */
738 	sbuf_printf(sb, "cpu %lu %lu %lu %lu",
739 	    T2J(cp_time[CP_USER]),
740 	    T2J(cp_time[CP_NICE]),
741 	    T2J(cp_time[CP_SYS]),
742 	    T2J(cp_time[CP_IDLE]));
743 
744 	/* Print interrupt stats if available */
745 	if (has_intr) {
746 		sbuf_printf(sb, " 0 %lu", T2J(cp_time[CP_INTR]));
747 	}
748 
749 	/* Pad out remaining fields depending on version */
750 	sbuf_printf(sb, "%s", zero_pad);
751 
752 	CPU_FOREACH(i) {
753 		pcpu = pcpu_find(i);
754 		cp = pcpu->pc_cp_time;
755 		sbuf_printf(sb, "cpu%d %lu %lu %lu %lu", i,
756 		    T2J(cp[CP_USER]),
757 		    T2J(cp[CP_NICE]),
758 		    T2J(cp[CP_SYS]),
759 		    T2J(cp[CP_IDLE]));
760 
761 		if (has_intr) {
762 			sbuf_printf(sb, " 0 %lu", T2J(cp[CP_INTR]));
763 		}
764 
765 		sbuf_printf(sb, "%s", zero_pad);
766 	}
767 	sbuf_printf(sb,
768 	    "disk 0 0 0 0\n"
769 	    "page %ju %ju\n"
770 	    "swap %ju %ju\n"
771 	    "intr %ju\n"
772 	    "ctxt %ju\n"
773 	    "btime %lld\n",
774 	    (uintmax_t)VM_CNT_FETCH(v_vnodepgsin),
775 	    (uintmax_t)VM_CNT_FETCH(v_vnodepgsout),
776 	    (uintmax_t)VM_CNT_FETCH(v_swappgsin),
777 	    (uintmax_t)VM_CNT_FETCH(v_swappgsout),
778 	    (uintmax_t)VM_CNT_FETCH(v_intr),
779 	    (uintmax_t)VM_CNT_FETCH(v_swtch),
780 	    (long long)boottime.tv_sec);
781 	return (0);
782 }
783 
784 static int
785 linprocfs_doswaps(PFS_FILL_ARGS)
786 {
787 	struct xswdev xsw;
788 	uintmax_t total, used;
789 	int n;
790 	char devname[SPECNAMELEN + 1];
791 
792 	sbuf_printf(sb, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
793 	for (n = 0; ; n++) {
794 		if (swap_dev_info(n, &xsw, devname, sizeof(devname)) != 0)
795 			break;
796 		total = (uintmax_t)xsw.xsw_nblks * PAGE_SIZE / 1024;
797 		used  = (uintmax_t)xsw.xsw_used * PAGE_SIZE / 1024;
798 
799 		/*
800 		 * The space and not tab after the device name is on
801 		 * purpose.  Linux does so.
802 		 */
803 		sbuf_printf(sb, "/dev/%-34s unknown\t\t%jd\t%jd\t-1\n",
804 		    devname, total, used);
805 	}
806 	return (0);
807 }
808 
809 /*
810  * Filler function for proc/uptime
811  */
812 static int
813 linprocfs_douptime(PFS_FILL_ARGS)
814 {
815 	long cp_time[CPUSTATES];
816 	struct timeval tv;
817 
818 	getmicrouptime(&tv);
819 	read_cpu_time(cp_time);
820 	sbuf_printf(sb, "%lld.%02ld %ld.%02lu\n",
821 	    (long long)tv.tv_sec, tv.tv_usec / 10000,
822 	    T2S(cp_time[CP_IDLE] / mp_ncpus),
823 	    T2CS(cp_time[CP_IDLE] / mp_ncpus) % 100);
824 	return (0);
825 }
826 
827 /*
828  * Get OS build date
829  */
830 static void
831 linprocfs_osbuild(struct thread *td, struct sbuf *sb)
832 {
833 #if 0
834 	char osbuild[256];
835 	char *cp1, *cp2;
836 
837 	strncpy(osbuild, version, 256);
838 	osbuild[255] = '\0';
839 	cp1 = strstr(osbuild, "\n");
840 	cp2 = strstr(osbuild, ":");
841 	if (cp1 && cp2) {
842 		*cp1 = *cp2 = '\0';
843 		cp1 = strstr(osbuild, "#");
844 	} else
845 		cp1 = NULL;
846 	if (cp1)
847 		sbuf_printf(sb, "%s%s", cp1, cp2 + 1);
848 	else
849 #endif
850 		sbuf_cat(sb, "#4 Sun Dec 18 04:30:00 CET 1977");
851 }
852 
853 /*
854  * Get OS builder
855  */
856 static void
857 linprocfs_osbuilder(struct thread *td, struct sbuf *sb)
858 {
859 #if 0
860 	char builder[256];
861 	char *cp;
862 
863 	cp = strstr(version, "\n    ");
864 	if (cp) {
865 		strncpy(builder, cp + 5, 256);
866 		builder[255] = '\0';
867 		cp = strstr(builder, ":");
868 		if (cp)
869 			*cp = '\0';
870 	}
871 	if (cp)
872 		sbuf_cat(sb, builder);
873 	else
874 #endif
875 		sbuf_cat(sb, "des@freebsd.org");
876 }
877 
878 /*
879  * Filler function for proc/version
880  */
881 static int
882 linprocfs_doversion(PFS_FILL_ARGS)
883 {
884 	char osname[LINUX_MAX_UTSNAME];
885 	char osrelease[LINUX_MAX_UTSNAME];
886 
887 	linux_get_osname(td, osname);
888 	linux_get_osrelease(td, osrelease);
889 	sbuf_printf(sb, "%s version %s (", osname, osrelease);
890 	linprocfs_osbuilder(td, sb);
891 	sbuf_cat(sb, ") (gcc version " __VERSION__ ") ");
892 	linprocfs_osbuild(td, sb);
893 	sbuf_cat(sb, "\n");
894 
895 	return (0);
896 }
897 
898 /*
899  * Filler function for proc/loadavg
900  */
901 static int
902 linprocfs_doloadavg(PFS_FILL_ARGS)
903 {
904 
905 	sbuf_printf(sb,
906 	    "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
907 	    (int)(averunnable.ldavg[0] / averunnable.fscale),
908 	    (int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
909 	    (int)(averunnable.ldavg[1] / averunnable.fscale),
910 	    (int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
911 	    (int)(averunnable.ldavg[2] / averunnable.fscale),
912 	    (int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
913 	    1,				/* number of running tasks */
914 	    nprocs,			/* number of tasks */
915 	    lastpid			/* the last pid */
916 	);
917 	return (0);
918 }
919 
920 static int
921 linprocfs_get_tty_nr(struct proc *p)
922 {
923 	struct session *sp;
924 	const char *ttyname;
925 	int error, major, minor, nr;
926 
927 	PROC_LOCK_ASSERT(p, MA_OWNED);
928 	sx_assert(&proctree_lock, SX_LOCKED);
929 
930 	if ((p->p_flag & P_CONTROLT) == 0)
931 		return (-1);
932 
933 	sp = p->p_pgrp->pg_session;
934 	if (sp == NULL)
935 		return (-1);
936 
937 	ttyname = devtoname(sp->s_ttyp->t_dev);
938 	error = linux_driver_get_major_minor(ttyname, &major, &minor);
939 	if (error != 0)
940 		return (-1);
941 
942 	nr = makedev(major, minor);
943 	return (nr);
944 }
945 
946 /*
947  * Filler function for proc/pid/stat
948  */
949 static int
950 linprocfs_doprocstat(PFS_FILL_ARGS)
951 {
952 	struct kinfo_proc kp;
953 	struct timeval boottime;
954 	char state;
955 	static int ratelimit = 0;
956 	int tty_nr;
957 	vm_offset_t startcode, startdata;
958 
959 	getboottime(&boottime);
960 	sx_slock(&proctree_lock);
961 	PROC_LOCK(p);
962 	fill_kinfo_proc(p, &kp);
963 	tty_nr = linprocfs_get_tty_nr(p);
964 	sx_sunlock(&proctree_lock);
965 	if (p->p_vmspace) {
966 	   startcode = (vm_offset_t)p->p_vmspace->vm_taddr;
967 	   startdata = (vm_offset_t)p->p_vmspace->vm_daddr;
968 	} else {
969 	   startcode = 0;
970 	   startdata = 0;
971 	}
972 	sbuf_printf(sb, "%d", p->p_pid);
973 #define PS_ADD(name, fmt, arg) sbuf_printf(sb, " " fmt, arg)
974 	PS_ADD("comm",		"(%s)",	p->p_comm);
975 	if (kp.ki_stat > sizeof(linux_state)) {
976 		state = 'R';
977 
978 		if (ratelimit == 0) {
979 			printf("linprocfs: don't know how to handle unknown FreeBSD state %d/%zd, mapping to R\n",
980 			    kp.ki_stat, sizeof(linux_state));
981 			++ratelimit;
982 		}
983 	} else
984 		state = linux_state[kp.ki_stat - 1];
985 	PS_ADD("state",		"%c",	state);
986 	PS_ADD("ppid",		"%d",	p->p_pptr ? p->p_pptr->p_pid : 0);
987 	PS_ADD("pgrp",		"%d",	p->p_pgid);
988 	PS_ADD("session",	"%d",	p->p_session->s_sid);
989 	PROC_UNLOCK(p);
990 	PS_ADD("tty",		"%d",	tty_nr);
991 	PS_ADD("tpgid",		"%d",	kp.ki_tpgid);
992 	PS_ADD("flags",		"%u",	0); /* XXX */
993 	PS_ADD("minflt",	"%lu",	kp.ki_rusage.ru_minflt);
994 	PS_ADD("cminflt",	"%lu",	kp.ki_rusage_ch.ru_minflt);
995 	PS_ADD("majflt",	"%lu",	kp.ki_rusage.ru_majflt);
996 	PS_ADD("cmajflt",	"%lu",	kp.ki_rusage_ch.ru_majflt);
997 	PS_ADD("utime",		"%ld",	TV2J(&kp.ki_rusage.ru_utime));
998 	PS_ADD("stime",		"%ld",	TV2J(&kp.ki_rusage.ru_stime));
999 	PS_ADD("cutime",	"%ld",	TV2J(&kp.ki_rusage_ch.ru_utime));
1000 	PS_ADD("cstime",	"%ld",	TV2J(&kp.ki_rusage_ch.ru_stime));
1001 	PS_ADD("priority",	"%d",	kp.ki_pri.pri_user);
1002 	PS_ADD("nice",		"%d",	kp.ki_nice); /* 19 (nicest) to -19 */
1003 	PS_ADD("0",		"%d",	0); /* removed field */
1004 	PS_ADD("itrealvalue",	"%d",	0); /* XXX */
1005 	PS_ADD("starttime",	"%lu",	TV2J(&kp.ki_start) - TV2J(&boottime));
1006 	PS_ADD("vsize",		"%ju",	(uintmax_t)kp.ki_size);
1007 	PS_ADD("rss",		"%ju",	(uintmax_t)kp.ki_rssize);
1008 	PS_ADD("rlim",		"%lu",	kp.ki_rusage.ru_maxrss);
1009 	PS_ADD("startcode",	"%ju",	(uintmax_t)startcode);
1010 	PS_ADD("endcode",	"%ju",	(uintmax_t)startdata);
1011 	PS_ADD("startstack",	"%u",	0); /* XXX */
1012 	PS_ADD("kstkesp",	"%u",	0); /* XXX */
1013 	PS_ADD("kstkeip",	"%u",	0); /* XXX */
1014 	PS_ADD("signal",	"%u",	0); /* XXX */
1015 	PS_ADD("blocked",	"%u",	0); /* XXX */
1016 	PS_ADD("sigignore",	"%u",	0); /* XXX */
1017 	PS_ADD("sigcatch",	"%u",	0); /* XXX */
1018 	PS_ADD("wchan",		"%u",	0); /* XXX */
1019 	PS_ADD("nswap",		"%lu",	kp.ki_rusage.ru_nswap);
1020 	PS_ADD("cnswap",	"%lu",	kp.ki_rusage_ch.ru_nswap);
1021 	PS_ADD("exitsignal",	"%d",	0); /* XXX */
1022 	PS_ADD("processor",	"%u",	kp.ki_lastcpu);
1023 	PS_ADD("rt_priority",	"%u",	0); /* XXX */ /* >= 2.5.19 */
1024 	PS_ADD("policy",	"%u",	kp.ki_pri.pri_class); /* >= 2.5.19 */
1025 #undef PS_ADD
1026 	sbuf_putc(sb, '\n');
1027 
1028 	return (0);
1029 }
1030 
1031 /*
1032  * Filler function for proc/pid/statm
1033  */
1034 static int
1035 linprocfs_doprocstatm(PFS_FILL_ARGS)
1036 {
1037 	struct kinfo_proc kp;
1038 	segsz_t lsize;
1039 
1040 	sx_slock(&proctree_lock);
1041 	PROC_LOCK(p);
1042 	fill_kinfo_proc(p, &kp);
1043 	PROC_UNLOCK(p);
1044 	sx_sunlock(&proctree_lock);
1045 
1046 	/*
1047 	 * See comments in linprocfs_doprocstatus() regarding the
1048 	 * computation of lsize.
1049 	 */
1050 	/* size resident share trs drs lrs dt */
1051 	sbuf_printf(sb, "%ju ", B2P((uintmax_t)kp.ki_size));
1052 	sbuf_printf(sb, "%ju ", (uintmax_t)kp.ki_rssize);
1053 	sbuf_printf(sb, "%ju ", (uintmax_t)0); /* XXX */
1054 	sbuf_printf(sb, "%ju ",	(uintmax_t)kp.ki_tsize);
1055 	sbuf_printf(sb, "%ju ", (uintmax_t)(kp.ki_dsize + kp.ki_ssize));
1056 	lsize = B2P(kp.ki_size) - kp.ki_dsize -
1057 	    kp.ki_ssize - kp.ki_tsize - 1;
1058 	sbuf_printf(sb, "%ju ", (uintmax_t)lsize);
1059 	sbuf_printf(sb, "%ju\n", (uintmax_t)0); /* XXX */
1060 
1061 	return (0);
1062 }
1063 
1064 /*
1065  * Filler function for proc/pid/status
1066  */
1067 static int
1068 linprocfs_doprocstatus(PFS_FILL_ARGS)
1069 {
1070 	struct kinfo_proc kp;
1071 	char *state;
1072 	segsz_t lsize;
1073 	struct thread *td2;
1074 	struct sigacts *ps;
1075 	l_sigset_t siglist, sigignore, sigcatch;
1076 	int i;
1077 
1078 	sx_slock(&proctree_lock);
1079 	PROC_LOCK(p);
1080 	td2 = FIRST_THREAD_IN_PROC(p);
1081 
1082 	if (P_SHOULDSTOP(p)) {
1083 		state = "T (stopped)";
1084 	} else {
1085 		switch(p->p_state) {
1086 		case PRS_NEW:
1087 			state = "I (idle)";
1088 			break;
1089 		case PRS_NORMAL:
1090 			if (p->p_flag & P_WEXIT) {
1091 				state = "X (exiting)";
1092 				break;
1093 			}
1094 			switch(TD_GET_STATE(td2)) {
1095 			case TDS_INHIBITED:
1096 				state = "S (sleeping)";
1097 				break;
1098 			case TDS_RUNQ:
1099 			case TDS_RUNNING:
1100 				state = "R (running)";
1101 				break;
1102 			default:
1103 				state = "? (unknown)";
1104 				break;
1105 			}
1106 			break;
1107 		case PRS_ZOMBIE:
1108 			state = "Z (zombie)";
1109 			break;
1110 		default:
1111 			state = "? (unknown)";
1112 			break;
1113 		}
1114 	}
1115 
1116 	fill_kinfo_proc(p, &kp);
1117 	sx_sunlock(&proctree_lock);
1118 
1119 	sbuf_printf(sb, "Name:\t%s\n",		p->p_comm); /* XXX escape */
1120 	sbuf_printf(sb, "State:\t%s\n",		state);
1121 
1122 	/*
1123 	 * Credentials
1124 	 */
1125 	sbuf_printf(sb, "Tgid:\t%d\n",		p->p_pid);
1126 	sbuf_printf(sb, "Pid:\t%d\n",		p->p_pid);
1127 	sbuf_printf(sb, "PPid:\t%d\n",		kp.ki_ppid );
1128 	sbuf_printf(sb, "TracerPid:\t%d\n",	kp.ki_tracer );
1129 	sbuf_printf(sb, "Uid:\t%d\t%d\t%d\t%d\n", p->p_ucred->cr_ruid,
1130 						p->p_ucred->cr_uid,
1131 						p->p_ucred->cr_svuid,
1132 						/* FreeBSD doesn't have fsuid */
1133 						p->p_ucred->cr_uid);
1134 	sbuf_printf(sb, "Gid:\t%d\t%d\t%d\t%d\n", p->p_ucred->cr_rgid,
1135 						p->p_ucred->cr_gid,
1136 						p->p_ucred->cr_svgid,
1137 						/* FreeBSD doesn't have fsgid */
1138 						p->p_ucred->cr_gid);
1139 	sbuf_cat(sb, "Groups:\t");
1140 	for (i = 0; i < p->p_ucred->cr_ngroups; i++)
1141 		sbuf_printf(sb, "%d ",		p->p_ucred->cr_groups[i]);
1142 	PROC_UNLOCK(p);
1143 	sbuf_putc(sb, '\n');
1144 
1145 	/*
1146 	 * Memory
1147 	 *
1148 	 * While our approximation of VmLib may not be accurate (I
1149 	 * don't know of a simple way to verify it, and I'm not sure
1150 	 * it has much meaning anyway), I believe it's good enough.
1151 	 *
1152 	 * The same code that could (I think) accurately compute VmLib
1153 	 * could also compute VmLck, but I don't really care enough to
1154 	 * implement it. Submissions are welcome.
1155 	 */
1156 	sbuf_printf(sb, "VmSize:\t%8ju kB\n",	B2K((uintmax_t)kp.ki_size));
1157 	sbuf_printf(sb, "VmLck:\t%8u kB\n",	P2K(0)); /* XXX */
1158 	sbuf_printf(sb, "VmRSS:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_rssize));
1159 	sbuf_printf(sb, "VmData:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_dsize));
1160 	sbuf_printf(sb, "VmStk:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_ssize));
1161 	sbuf_printf(sb, "VmExe:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_tsize));
1162 	lsize = B2P(kp.ki_size) - kp.ki_dsize -
1163 	    kp.ki_ssize - kp.ki_tsize - 1;
1164 	sbuf_printf(sb, "VmLib:\t%8ju kB\n",	P2K((uintmax_t)lsize));
1165 
1166 	/*
1167 	 * Signal masks
1168 	 */
1169 	PROC_LOCK(p);
1170 	bsd_to_linux_sigset(&p->p_siglist, &siglist);
1171 	ps = p->p_sigacts;
1172 	mtx_lock(&ps->ps_mtx);
1173 	bsd_to_linux_sigset(&ps->ps_sigignore, &sigignore);
1174 	bsd_to_linux_sigset(&ps->ps_sigcatch, &sigcatch);
1175 	mtx_unlock(&ps->ps_mtx);
1176 	PROC_UNLOCK(p);
1177 
1178 	sbuf_printf(sb, "SigPnd:\t%016jx\n",	siglist.__mask);
1179 	/*
1180 	 * XXX. SigBlk - target thread's signal mask, td_sigmask.
1181 	 * To implement SigBlk pseudofs should support proc/tid dir entries.
1182 	 */
1183 	sbuf_printf(sb, "SigBlk:\t%016x\n",	0);
1184 	sbuf_printf(sb, "SigIgn:\t%016jx\n",	sigignore.__mask);
1185 	sbuf_printf(sb, "SigCgt:\t%016jx\n",	sigcatch.__mask);
1186 
1187 	/*
1188 	 * Linux also prints the capability masks, but we don't have
1189 	 * capabilities yet, and when we do get them they're likely to
1190 	 * be meaningless to Linux programs, so we lie. XXX
1191 	 */
1192 	sbuf_printf(sb, "CapInh:\t%016x\n",	0);
1193 	sbuf_printf(sb, "CapPrm:\t%016x\n",	0);
1194 	sbuf_printf(sb, "CapEff:\t%016x\n",	0);
1195 
1196 	return (0);
1197 }
1198 
1199 /*
1200  * Filler function for proc/pid/cwd
1201  */
1202 static int
1203 linprocfs_doproccwd(PFS_FILL_ARGS)
1204 {
1205 	struct pwd *pwd;
1206 	char *fullpath = "unknown";
1207 	char *freepath = NULL;
1208 
1209 	pwd = pwd_hold_proc(p);
1210 	vn_fullpath(pwd->pwd_cdir, &fullpath, &freepath);
1211 	sbuf_printf(sb, "%s", fullpath);
1212 	if (freepath)
1213 		free(freepath, M_TEMP);
1214 	pwd_drop(pwd);
1215 	return (0);
1216 }
1217 
1218 /*
1219  * Filler function for proc/pid/root
1220  */
1221 static int
1222 linprocfs_doprocroot(PFS_FILL_ARGS)
1223 {
1224 	struct pwd *pwd;
1225 	struct vnode *vp;
1226 	char *fullpath = "unknown";
1227 	char *freepath = NULL;
1228 
1229 	pwd = pwd_hold_proc(p);
1230 	vp = jailed(p->p_ucred) ? pwd->pwd_jdir : pwd->pwd_rdir;
1231 	vn_fullpath(vp, &fullpath, &freepath);
1232 	sbuf_printf(sb, "%s", fullpath);
1233 	if (freepath)
1234 		free(freepath, M_TEMP);
1235 	pwd_drop(pwd);
1236 	return (0);
1237 }
1238 
1239 /*
1240  * Filler function for proc/pid/cmdline
1241  */
1242 static int
1243 linprocfs_doproccmdline(PFS_FILL_ARGS)
1244 {
1245 	int ret;
1246 
1247 	PROC_LOCK(p);
1248 	if ((ret = p_cansee(td, p)) != 0) {
1249 		PROC_UNLOCK(p);
1250 		return (ret);
1251 	}
1252 
1253 	/*
1254 	 * Mimic linux behavior and pass only processes with usermode
1255 	 * address space as valid.  Return zero silently otherwize.
1256 	 */
1257 	if (p->p_vmspace == &vmspace0) {
1258 		PROC_UNLOCK(p);
1259 		return (0);
1260 	}
1261 	if (p->p_args != NULL) {
1262 		sbuf_bcpy(sb, p->p_args->ar_args, p->p_args->ar_length);
1263 		PROC_UNLOCK(p);
1264 		return (0);
1265 	}
1266 
1267 	if ((p->p_flag & P_SYSTEM) != 0) {
1268 		PROC_UNLOCK(p);
1269 		return (0);
1270 	}
1271 
1272 	PROC_UNLOCK(p);
1273 
1274 	ret = proc_getargv(td, p, sb);
1275 	return (ret);
1276 }
1277 
1278 /*
1279  * Filler function for proc/pid/environ
1280  */
1281 static int
1282 linprocfs_doprocenviron(PFS_FILL_ARGS)
1283 {
1284 
1285 	/*
1286 	 * Mimic linux behavior and pass only processes with usermode
1287 	 * address space as valid.  Return zero silently otherwize.
1288 	 */
1289 	if (p->p_vmspace == &vmspace0)
1290 		return (0);
1291 
1292 	return (proc_getenvv(td, p, sb));
1293 }
1294 
1295 static char l32_map_str[] = "%08lx-%08lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
1296 static char l64_map_str[] = "%016lx-%016lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
1297 static char vdso_str[] = "      [vdso]";
1298 static char stack_str[] = "      [stack]";
1299 
1300 /*
1301  * Filler function for proc/pid/maps
1302  */
1303 static int
1304 linprocfs_doprocmaps(PFS_FILL_ARGS)
1305 {
1306 	struct vmspace *vm;
1307 	vm_map_t map;
1308 	vm_map_entry_t entry, tmp_entry;
1309 	vm_object_t obj, tobj, lobj;
1310 	vm_offset_t e_start, e_end;
1311 	vm_ooffset_t off;
1312 	vm_prot_t e_prot;
1313 	unsigned int last_timestamp;
1314 	char *name = "", *freename = NULL;
1315 	const char *l_map_str;
1316 	ino_t ino;
1317 	int error;
1318 	struct vnode *vp;
1319 	struct vattr vat;
1320 	bool private;
1321 
1322 	PROC_LOCK(p);
1323 	error = p_candebug(td, p);
1324 	PROC_UNLOCK(p);
1325 	if (error)
1326 		return (error);
1327 
1328 	if (uio->uio_rw != UIO_READ)
1329 		return (EOPNOTSUPP);
1330 
1331 	error = 0;
1332 	vm = vmspace_acquire_ref(p);
1333 	if (vm == NULL)
1334 		return (ESRCH);
1335 
1336 	if (SV_CURPROC_FLAG(SV_LP64))
1337 		l_map_str = l64_map_str;
1338 	else
1339 		l_map_str = l32_map_str;
1340 	map = &vm->vm_map;
1341 	vm_map_lock_read(map);
1342 	VM_MAP_ENTRY_FOREACH(entry, map) {
1343 		name = "";
1344 		freename = NULL;
1345 		/*
1346 		 * Skip printing of the guard page of the stack region, as
1347 		 * it confuses glibc pthread_getattr_np() method, where both
1348 		 * the base address and size of the stack of the initial thread
1349 		 * are calculated.
1350 		 */
1351 		if ((entry->eflags & (MAP_ENTRY_IS_SUB_MAP | MAP_ENTRY_GUARD)) != 0)
1352 			continue;
1353 		e_prot = entry->protection;
1354 		e_start = entry->start;
1355 		e_end = entry->end;
1356 		obj = entry->object.vm_object;
1357 		off = entry->offset;
1358 		for (lobj = tobj = obj; tobj != NULL;
1359 		    lobj = tobj, tobj = tobj->backing_object) {
1360 			VM_OBJECT_RLOCK(tobj);
1361 			off += lobj->backing_object_offset;
1362 			if (lobj != obj)
1363 				VM_OBJECT_RUNLOCK(lobj);
1364 		}
1365 		private = (entry->eflags & MAP_ENTRY_COW) != 0 || obj == NULL ||
1366 		    (obj->flags & OBJ_ANON) != 0;
1367 		last_timestamp = map->timestamp;
1368 		vm_map_unlock_read(map);
1369 		ino = 0;
1370 		if (lobj) {
1371 			vp = vm_object_vnode(lobj);
1372 			if (vp != NULL)
1373 				vref(vp);
1374 			if (lobj != obj)
1375 				VM_OBJECT_RUNLOCK(lobj);
1376 			VM_OBJECT_RUNLOCK(obj);
1377 			if (vp != NULL) {
1378 				vn_fullpath(vp, &name, &freename);
1379 				vn_lock(vp, LK_SHARED | LK_RETRY);
1380 				VOP_GETATTR(vp, &vat, td->td_ucred);
1381 				ino = vat.va_fileid;
1382 				vput(vp);
1383 			} else if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
1384 				/*
1385 				 * sv_shared_page_base pointed out to the
1386 				 * FreeBSD sharedpage, PAGE_SIZE is a size
1387 				 * of it. The vDSO page is above.
1388 				 */
1389 				if (e_start == p->p_sysent->sv_shared_page_base +
1390 				    PAGE_SIZE)
1391 					name = vdso_str;
1392 				if (e_end == p->p_sysent->sv_usrstack)
1393 					name = stack_str;
1394 			}
1395 		}
1396 
1397 		/*
1398 		 * format:
1399 		 *  start, end, access, offset, major, minor, inode, name.
1400 		 */
1401 		error = sbuf_printf(sb, l_map_str,
1402 		    (u_long)e_start, (u_long)e_end,
1403 		    (e_prot & VM_PROT_READ)?"r":"-",
1404 		    (e_prot & VM_PROT_WRITE)?"w":"-",
1405 		    (e_prot & VM_PROT_EXECUTE)?"x":"-",
1406 		    private ? "p" : "s",
1407 		    (u_long)off,
1408 		    0,
1409 		    0,
1410 		    (u_long)ino,
1411 		    *name ? "     " : " ",
1412 		    name
1413 		    );
1414 		if (freename)
1415 			free(freename, M_TEMP);
1416 		vm_map_lock_read(map);
1417 		if (error == -1) {
1418 			error = 0;
1419 			break;
1420 		}
1421 		if (last_timestamp != map->timestamp) {
1422 			/*
1423 			 * Look again for the entry because the map was
1424 			 * modified while it was unlocked.  Specifically,
1425 			 * the entry may have been clipped, merged, or deleted.
1426 			 */
1427 			vm_map_lookup_entry(map, e_end - 1, &tmp_entry);
1428 			entry = tmp_entry;
1429 		}
1430 	}
1431 	vm_map_unlock_read(map);
1432 	vmspace_free(vm);
1433 
1434 	return (error);
1435 }
1436 
1437 /*
1438  * Filler function for proc/pid/mem
1439  */
1440 static int
1441 linprocfs_doprocmem(PFS_FILL_ARGS)
1442 {
1443 	ssize_t resid;
1444 	int error;
1445 
1446 	resid = uio->uio_resid;
1447 	error = procfs_doprocmem(PFS_FILL_ARGNAMES);
1448 
1449 	if (uio->uio_rw == UIO_READ && resid != uio->uio_resid)
1450 		return (0);
1451 
1452 	if (error == EFAULT)
1453 		error = EIO;
1454 
1455 	return (error);
1456 }
1457 
1458 /*
1459  * Filler function for proc/net/dev
1460  */
1461 static int
1462 linprocfs_donetdev_cb(if_t ifp, void *arg)
1463 {
1464 	char ifname[LINUX_IFNAMSIZ];
1465 	struct sbuf *sb = arg;
1466 
1467 	if (ifname_bsd_to_linux_ifp(ifp, ifname, sizeof(ifname)) <= 0)
1468 		return (ENODEV);
1469 
1470 	sbuf_printf(sb, "%6.6s: ", ifname);
1471 	sbuf_printf(sb, "%7ju %7ju %4ju %4ju %4lu %5lu %10lu %9ju ",
1472 	    (uintmax_t)if_getcounter(ifp, IFCOUNTER_IBYTES),
1473 	    (uintmax_t)if_getcounter(ifp, IFCOUNTER_IPACKETS),
1474 	    (uintmax_t)if_getcounter(ifp, IFCOUNTER_IERRORS),
1475 	    (uintmax_t)if_getcounter(ifp, IFCOUNTER_IQDROPS),
1476 						/* rx_missed_errors */
1477 	    0UL,				/* rx_fifo_errors */
1478 	    0UL,				/* rx_length_errors +
1479 						 * rx_over_errors +
1480 						 * rx_crc_errors +
1481 						 * rx_frame_errors */
1482 	    0UL,				/* rx_compressed */
1483 	    (uintmax_t)if_getcounter(ifp, IFCOUNTER_IMCASTS));
1484 						/* XXX-BZ rx only? */
1485 	sbuf_printf(sb, "%8ju %7ju %4ju %4ju %4lu %5ju %7lu %10lu\n",
1486 	    (uintmax_t)if_getcounter(ifp, IFCOUNTER_OBYTES),
1487 	    (uintmax_t)if_getcounter(ifp, IFCOUNTER_OPACKETS),
1488 	    (uintmax_t)if_getcounter(ifp, IFCOUNTER_OERRORS),
1489 	    (uintmax_t)if_getcounter(ifp, IFCOUNTER_OQDROPS),
1490 	    0UL,				/* tx_fifo_errors */
1491 	    (uintmax_t)if_getcounter(ifp, IFCOUNTER_COLLISIONS),
1492 	    0UL,				/* tx_carrier_errors +
1493 						 * tx_aborted_errors +
1494 						 * tx_window_errors +
1495 						 * tx_heartbeat_errors*/
1496 	    0UL);				/* tx_compressed */
1497 	return (0);
1498 }
1499 
1500 static int
1501 linprocfs_donetdev(PFS_FILL_ARGS)
1502 {
1503 	struct epoch_tracker et;
1504 
1505 	sbuf_printf(sb, "%6s|%58s|%s\n"
1506 	    "%6s|%58s|%58s\n",
1507 	    "Inter-", "   Receive", "  Transmit",
1508 	    " face",
1509 	    "bytes    packets errs drop fifo frame compressed multicast",
1510 	    "bytes    packets errs drop fifo colls carrier compressed");
1511 
1512 	CURVNET_SET(TD_TO_VNET(curthread));
1513 	NET_EPOCH_ENTER(et);
1514 	if_foreach(linprocfs_donetdev_cb, sb);
1515 	NET_EPOCH_EXIT(et);
1516 	CURVNET_RESTORE();
1517 
1518 	return (0);
1519 }
1520 
1521 struct walkarg {
1522 	struct sbuf *sb;
1523 };
1524 
1525 static int
1526 linux_route_print(struct rtentry *rt, void *vw)
1527 {
1528 #ifdef INET
1529 	struct walkarg *w = vw;
1530 	struct route_nhop_data rnd;
1531 	struct in_addr dst, mask;
1532 	struct nhop_object *nh;
1533 	char ifname[16];
1534 	uint32_t scopeid = 0;
1535 	uint32_t gw = 0;
1536 	uint32_t linux_flags = 0;
1537 
1538 	rt_get_inet_prefix_pmask(rt, &dst, &mask, &scopeid);
1539 
1540 	rt_get_rnd(rt, &rnd);
1541 
1542 	/* select only first route in case of multipath */
1543 	nh = nhop_select_func(rnd.rnd_nhop, 0);
1544 
1545 	if (ifname_bsd_to_linux_ifp(nh->nh_ifp, ifname, sizeof(ifname)) <= 0)
1546 		return (ENODEV);
1547 
1548 	gw = (nh->nh_flags & NHF_GATEWAY)
1549 		? nh->gw4_sa.sin_addr.s_addr : 0;
1550 
1551 	linux_flags = RTF_UP |
1552 		(nhop_get_rtflags(nh) & (RTF_GATEWAY | RTF_HOST));
1553 
1554 	sbuf_printf(w->sb,
1555 		"%s\t"
1556 		"%08X\t%08X\t%04X\t"
1557 		"%d\t%u\t%d\t"
1558 		"%08X\t%d\t%u\t%u",
1559 		ifname,
1560 		dst.s_addr, gw, linux_flags,
1561 		0, 0, rnd.rnd_weight,
1562 		mask.s_addr, nh->nh_mtu, 0, 0);
1563 
1564 	sbuf_printf(w->sb, "\n\n");
1565 #endif
1566 	return (0);
1567 }
1568 
1569 /*
1570  * Filler function for proc/net/route
1571  */
1572 static int
1573 linprocfs_donetroute(PFS_FILL_ARGS)
1574 {
1575 	struct epoch_tracker et;
1576 	struct walkarg w = {
1577 		.sb = sb
1578 	};
1579 	uint32_t fibnum = curthread->td_proc->p_fibnum;
1580 
1581 	sbuf_printf(w.sb, "%-127s\n", "Iface\tDestination\tGateway "
1582                "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
1583                "\tWindow\tIRTT");
1584 
1585 	CURVNET_SET(TD_TO_VNET(curthread));
1586 	NET_EPOCH_ENTER(et);
1587 	rib_walk(fibnum, AF_INET, false, linux_route_print, &w);
1588 	NET_EPOCH_EXIT(et);
1589 	CURVNET_RESTORE();
1590 
1591 	return (0);
1592 }
1593 
1594 /*
1595  * Filler function for proc/sys/kernel/osrelease
1596  */
1597 static int
1598 linprocfs_doosrelease(PFS_FILL_ARGS)
1599 {
1600 	char osrelease[LINUX_MAX_UTSNAME];
1601 
1602 	linux_get_osrelease(td, osrelease);
1603 	sbuf_printf(sb, "%s\n", osrelease);
1604 
1605 	return (0);
1606 }
1607 
1608 /*
1609  * Filler function for proc/sys/kernel/ostype
1610  */
1611 static int
1612 linprocfs_doostype(PFS_FILL_ARGS)
1613 {
1614 	char osname[LINUX_MAX_UTSNAME];
1615 
1616 	linux_get_osname(td, osname);
1617 	sbuf_printf(sb, "%s\n", osname);
1618 
1619 	return (0);
1620 }
1621 
1622 /*
1623  * Filler function for proc/sys/kernel/version
1624  */
1625 static int
1626 linprocfs_doosbuild(PFS_FILL_ARGS)
1627 {
1628 
1629 	linprocfs_osbuild(td, sb);
1630 	sbuf_cat(sb, "\n");
1631 	return (0);
1632 }
1633 
1634 /*
1635  * Filler function for proc/sys/kernel/msgmax
1636  */
1637 static int
1638 linprocfs_domsgmax(PFS_FILL_ARGS)
1639 {
1640 
1641 	sbuf_printf(sb, "%d\n", msginfo.msgmax);
1642 	return (0);
1643 }
1644 
1645 /*
1646  * Filler function for proc/sys/kernel/msgmni
1647  */
1648 static int
1649 linprocfs_domsgmni(PFS_FILL_ARGS)
1650 {
1651 
1652 	sbuf_printf(sb, "%d\n", msginfo.msgmni);
1653 	return (0);
1654 }
1655 
1656 /*
1657  * Filler function for proc/sys/kernel/msgmnb
1658  */
1659 static int
1660 linprocfs_domsgmnb(PFS_FILL_ARGS)
1661 {
1662 
1663 	sbuf_printf(sb, "%d\n", msginfo.msgmnb);
1664 	return (0);
1665 }
1666 
1667 /*
1668  * Filler function for proc/sys/kernel/ngroups_max
1669  *
1670  * Note that in Linux it defaults to 65536, not 1023.
1671  */
1672 static int
1673 linprocfs_dongroups_max(PFS_FILL_ARGS)
1674 {
1675 
1676 	sbuf_printf(sb, "%d\n", ngroups_max);
1677 	return (0);
1678 }
1679 
1680 /*
1681  * Filler function for proc/sys/kernel/pid_max
1682  */
1683 static int
1684 linprocfs_dopid_max(PFS_FILL_ARGS)
1685 {
1686 
1687 	sbuf_printf(sb, "%i\n", PID_MAX);
1688 	return (0);
1689 }
1690 
1691 /*
1692  * Filler function for proc/sys/kernel/sem
1693  */
1694 static int
1695 linprocfs_dosem(PFS_FILL_ARGS)
1696 {
1697 
1698 	sbuf_printf(sb, "%d %d %d %d\n", seminfo.semmsl, seminfo.semmns,
1699 	    seminfo.semopm, seminfo.semmni);
1700 	return (0);
1701 }
1702 
1703 /*
1704  * Filler function for proc/sys/kernel/shmall
1705  */
1706 static int
1707 linprocfs_doshmall(PFS_FILL_ARGS)
1708 {
1709 
1710 	sbuf_printf(sb, "%lu\n", shminfo.shmall);
1711 	return (0);
1712 }
1713 
1714 /*
1715  * Filler function for proc/sys/kernel/shmmax
1716  */
1717 static int
1718 linprocfs_doshmmax(PFS_FILL_ARGS)
1719 {
1720 
1721 	sbuf_printf(sb, "%lu\n", shminfo.shmmax);
1722 	return (0);
1723 }
1724 
1725 /*
1726  * Filler function for proc/sys/kernel/shmmni
1727  */
1728 static int
1729 linprocfs_doshmmni(PFS_FILL_ARGS)
1730 {
1731 
1732 	sbuf_printf(sb, "%lu\n", shminfo.shmmni);
1733 	return (0);
1734 }
1735 
1736 /*
1737  * Filler function for proc/sys/kernel/tainted
1738  */
1739 static int
1740 linprocfs_dotainted(PFS_FILL_ARGS)
1741 {
1742 
1743 	sbuf_printf(sb, "0\n");
1744 	return (0);
1745 }
1746 
1747 /*
1748  * Filler function for proc/sys/vm/min_free_kbytes
1749  *
1750  * This mirrors the approach in illumos to return zero for reads. Effectively,
1751  * it says, no memory is kept in reserve for "atomic allocations". This class
1752  * of allocation can be used at times when a thread cannot be suspended.
1753  */
1754 static int
1755 linprocfs_dominfree(PFS_FILL_ARGS)
1756 {
1757 
1758 	sbuf_printf(sb, "%d\n", 0);
1759 	return (0);
1760 }
1761 
1762 /*
1763  * Filler function for proc/scsi/device_info
1764  */
1765 static int
1766 linprocfs_doscsidevinfo(PFS_FILL_ARGS)
1767 {
1768 
1769 	return (0);
1770 }
1771 
1772 /*
1773  * Filler function for proc/scsi/scsi
1774  */
1775 static int
1776 linprocfs_doscsiscsi(PFS_FILL_ARGS)
1777 {
1778 
1779 	return (0);
1780 }
1781 
1782 /*
1783  * Filler function for proc/devices
1784  */
1785 static int
1786 linprocfs_dodevices(PFS_FILL_ARGS)
1787 {
1788 	char *char_devices;
1789 	sbuf_printf(sb, "Character devices:\n");
1790 
1791 	char_devices = linux_get_char_devices();
1792 	sbuf_printf(sb, "%s", char_devices);
1793 	linux_free_get_char_devices(char_devices);
1794 
1795 	sbuf_printf(sb, "\nBlock devices:\n");
1796 
1797 	return (0);
1798 }
1799 
1800 /*
1801  * Filler function for proc/cmdline
1802  */
1803 static int
1804 linprocfs_docmdline(PFS_FILL_ARGS)
1805 {
1806 
1807 	sbuf_printf(sb, "BOOT_IMAGE=%s", kernelname);
1808 	sbuf_printf(sb, " ro root=302\n");
1809 	return (0);
1810 }
1811 
1812 /*
1813  * Filler function for proc/filesystems
1814  */
1815 static int
1816 linprocfs_dofilesystems(PFS_FILL_ARGS)
1817 {
1818 	struct vfsconf *vfsp;
1819 
1820 	vfsconf_slock();
1821 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
1822 		if (vfsp->vfc_flags & VFCF_SYNTHETIC)
1823 			sbuf_printf(sb, "nodev");
1824 		sbuf_printf(sb, "\t%s\n", vfsp->vfc_name);
1825 	}
1826 	vfsconf_sunlock();
1827 	return(0);
1828 }
1829 
1830 /*
1831  * Filler function for proc/modules
1832  */
1833 static int
1834 linprocfs_domodules(PFS_FILL_ARGS)
1835 {
1836 #if 0
1837 	struct linker_file *lf;
1838 
1839 	TAILQ_FOREACH(lf, &linker_files, link) {
1840 		sbuf_printf(sb, "%-20s%8lu%4d\n", lf->filename,
1841 		    (unsigned long)lf->size, lf->refs);
1842 	}
1843 #endif
1844 	return (0);
1845 }
1846 
1847 /*
1848  * Filler function for proc/pid/fd
1849  */
1850 static int
1851 linprocfs_dofdescfs(PFS_FILL_ARGS)
1852 {
1853 
1854 	if (p == curproc)
1855 		sbuf_printf(sb, "/dev/fd");
1856 	else
1857 		sbuf_printf(sb, "unknown");
1858 	return (0);
1859 }
1860 
1861 /*
1862  * Filler function for proc/pid/limits
1863  */
1864 static const struct linux_rlimit_ident {
1865 	const char	*desc;
1866 	const char	*unit;
1867 	unsigned int	rlim_id;
1868 } linux_rlimits_ident[] = {
1869 	{ "Max cpu time",	"seconds",	RLIMIT_CPU },
1870 	{ "Max file size", 	"bytes",	RLIMIT_FSIZE },
1871 	{ "Max data size",	"bytes", 	RLIMIT_DATA },
1872 	{ "Max stack size",	"bytes", 	RLIMIT_STACK },
1873 	{ "Max core file size",  "bytes",	RLIMIT_CORE },
1874 	{ "Max resident set",	"bytes",	RLIMIT_RSS },
1875 	{ "Max processes",	"processes",	RLIMIT_NPROC },
1876 	{ "Max open files",	"files",	RLIMIT_NOFILE },
1877 	{ "Max locked memory",	"bytes",	RLIMIT_MEMLOCK },
1878 	{ "Max address space",	"bytes",	RLIMIT_AS },
1879 	{ "Max file locks",	"locks",	LINUX_RLIMIT_LOCKS },
1880 	{ "Max pending signals", "signals",	LINUX_RLIMIT_SIGPENDING },
1881 	{ "Max msgqueue size",	"bytes",	LINUX_RLIMIT_MSGQUEUE },
1882 	{ "Max nice priority", 		"",	LINUX_RLIMIT_NICE },
1883 	{ "Max realtime priority",	"",	LINUX_RLIMIT_RTPRIO },
1884 	{ "Max realtime timeout",	"us",	LINUX_RLIMIT_RTTIME },
1885 	{ 0, 0, 0 }
1886 };
1887 
1888 static int
1889 linprocfs_doproclimits(PFS_FILL_ARGS)
1890 {
1891 	const struct linux_rlimit_ident *li;
1892 	struct plimit *limp;
1893 	struct rlimit rl;
1894 	ssize_t size;
1895 	int res, error;
1896 
1897 	error = 0;
1898 
1899 	PROC_LOCK(p);
1900 	limp = lim_hold(p->p_limit);
1901 	PROC_UNLOCK(p);
1902 	size = sizeof(res);
1903 	sbuf_printf(sb, "%-26s%-21s%-21s%-21s\n", "Limit", "Soft Limit",
1904 			"Hard Limit", "Units");
1905 	for (li = linux_rlimits_ident; li->desc != NULL; ++li) {
1906 		switch (li->rlim_id)
1907 		{
1908 		case LINUX_RLIMIT_LOCKS:
1909 			/* FALLTHROUGH */
1910 		case LINUX_RLIMIT_RTTIME:
1911 			rl.rlim_cur = RLIM_INFINITY;
1912 			break;
1913 		case LINUX_RLIMIT_SIGPENDING:
1914 			error = kernel_sysctlbyname(td,
1915 			    "kern.sigqueue.max_pending_per_proc",
1916 			    &res, &size, 0, 0, 0, 0);
1917 			if (error != 0)
1918 				goto out;
1919 			rl.rlim_cur = res;
1920 			rl.rlim_max = res;
1921 			break;
1922 		case LINUX_RLIMIT_MSGQUEUE:
1923 			error = kernel_sysctlbyname(td,
1924 			    "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0);
1925 			if (error != 0)
1926 				goto out;
1927 			rl.rlim_cur = res;
1928 			rl.rlim_max = res;
1929 			break;
1930 		case LINUX_RLIMIT_NICE:
1931 			/* FALLTHROUGH */
1932 		case LINUX_RLIMIT_RTPRIO:
1933 			rl.rlim_cur = 0;
1934 			rl.rlim_max = 0;
1935 			break;
1936 		default:
1937 			rl = limp->pl_rlimit[li->rlim_id];
1938 			break;
1939 		}
1940 		if (rl.rlim_cur == RLIM_INFINITY)
1941 			sbuf_printf(sb, "%-26s%-21s%-21s%-10s\n",
1942 			    li->desc, "unlimited", "unlimited", li->unit);
1943 		else
1944 			sbuf_printf(sb, "%-26s%-21llu%-21llu%-10s\n",
1945 			    li->desc, (unsigned long long)rl.rlim_cur,
1946 			    (unsigned long long)rl.rlim_max, li->unit);
1947 	}
1948 out:
1949 	lim_free(limp);
1950 	return (error);
1951 }
1952 
1953 /*
1954  * The point of the following two functions is to work around
1955  * an assertion in Chromium; see kern/240991 for details.
1956  */
1957 static int
1958 linprocfs_dotaskattr(PFS_ATTR_ARGS)
1959 {
1960 
1961 	vap->va_nlink = 3;
1962 	return (0);
1963 }
1964 
1965 /*
1966  * Filler function for proc/<pid>/task/.dummy
1967  */
1968 static int
1969 linprocfs_dotaskdummy(PFS_FILL_ARGS)
1970 {
1971 
1972 	return (0);
1973 }
1974 
1975 /*
1976  * Filler function for proc/sys/kernel/random/uuid
1977  */
1978 static int
1979 linprocfs_douuid(PFS_FILL_ARGS)
1980 {
1981 	struct uuid uuid;
1982 
1983 	kern_uuidgen(&uuid, 1);
1984 	sbuf_printf_uuid(sb, &uuid);
1985 	sbuf_printf(sb, "\n");
1986 	return(0);
1987 }
1988 
1989 /*
1990  * Filler function for proc/sys/kernel/random/boot_id
1991  */
1992 static int
1993 linprocfs_doboot_id(PFS_FILL_ARGS)
1994 {
1995        static bool firstboot = 1;
1996        static struct uuid uuid;
1997 
1998        if (firstboot) {
1999                kern_uuidgen(&uuid, 1);
2000                firstboot = 0;
2001        }
2002        sbuf_printf_uuid(sb, &uuid);
2003        sbuf_printf(sb, "\n");
2004        return(0);
2005 }
2006 
2007 /*
2008  * Filler function for proc/pid/auxv
2009  */
2010 static int
2011 linprocfs_doauxv(PFS_FILL_ARGS)
2012 {
2013 	struct sbuf *asb;
2014 	off_t buflen, resid;
2015 	int error;
2016 
2017 	/*
2018 	 * Mimic linux behavior and pass only processes with usermode
2019 	 * address space as valid. Return zero silently otherwise.
2020 	 */
2021 	if (p->p_vmspace == &vmspace0)
2022 		return (0);
2023 
2024 	if (uio->uio_resid == 0)
2025 		return (0);
2026 	if (uio->uio_offset < 0 || uio->uio_resid < 0)
2027 		return (EINVAL);
2028 
2029 	asb = sbuf_new_auto();
2030 	if (asb == NULL)
2031 		return (ENOMEM);
2032 	error = proc_getauxv(td, p, asb);
2033 	if (error == 0)
2034 		error = sbuf_finish(asb);
2035 
2036 	resid = sbuf_len(asb) - uio->uio_offset;
2037 	if (resid > uio->uio_resid)
2038 		buflen = uio->uio_resid;
2039 	else
2040 		buflen = resid;
2041 	if (buflen > IOSIZE_MAX)
2042 		return (EINVAL);
2043 	if (buflen > maxphys)
2044 		buflen = maxphys;
2045 	if (resid <= 0)
2046 		return (0);
2047 
2048 	if (error == 0)
2049 		error = uiomove(sbuf_data(asb) + uio->uio_offset, buflen, uio);
2050 	sbuf_delete(asb);
2051 	return (error);
2052 }
2053 
2054 /*
2055  * Filler function for proc/self/oom_score_adj
2056  */
2057 static int
2058 linprocfs_do_oom_score_adj(PFS_FILL_ARGS)
2059 {
2060 	struct linux_pemuldata *pem;
2061 	long oom;
2062 
2063 	pem = pem_find(p);
2064 	if (pem == NULL || uio == NULL)
2065 		return (EOPNOTSUPP);
2066 	if (uio->uio_rw == UIO_READ) {
2067 		sbuf_printf(sb, "%d\n", pem->oom_score_adj);
2068 	} else {
2069 		sbuf_trim(sb);
2070 		sbuf_finish(sb);
2071 		oom = strtol(sbuf_data(sb), NULL, 10);
2072 		if (oom < LINUX_OOM_SCORE_ADJ_MIN ||
2073 		    oom > LINUX_OOM_SCORE_ADJ_MAX)
2074 			return (EINVAL);
2075 		pem->oom_score_adj = oom;
2076 	}
2077 	return (0);
2078 }
2079 
2080 /*
2081  * Filler function for proc/sys/vm/max_map_count
2082  *
2083  * Maximum number of active map areas, on Linux this limits the number
2084  * of vmaps per mm struct. We don't limit mappings, return a suitable
2085  * large value.
2086  */
2087 static int
2088 linprocfs_domax_map_cnt(PFS_FILL_ARGS)
2089 {
2090 
2091 	sbuf_printf(sb, "%d\n", INT32_MAX);
2092 	return (0);
2093 }
2094 
2095 /*
2096  * Constructor
2097  */
2098 static int
2099 linprocfs_init(PFS_INIT_ARGS)
2100 {
2101 	struct pfs_node *root;
2102 	struct pfs_node *dir;
2103 	struct pfs_node *sys;
2104 
2105 	root = pi->pi_root;
2106 
2107 	/* /proc/... */
2108 	pfs_create_file(root, "cmdline", &linprocfs_docmdline,
2109 	    NULL, NULL, NULL, PFS_RD);
2110 	pfs_create_file(root, "cpuinfo", &linprocfs_docpuinfo,
2111 	    NULL, NULL, NULL, PFS_RD);
2112 	pfs_create_file(root, "devices", &linprocfs_dodevices,
2113 	    NULL, NULL, NULL, PFS_RD);
2114 	pfs_create_file(root, "filesystems", &linprocfs_dofilesystems,
2115 	    NULL, NULL, NULL, PFS_RD);
2116 	pfs_create_file(root, "loadavg", &linprocfs_doloadavg,
2117 	    NULL, NULL, NULL, PFS_RD);
2118 	pfs_create_file(root, "meminfo", &linprocfs_domeminfo,
2119 	    NULL, NULL, NULL, PFS_RD);
2120 	pfs_create_file(root, "modules", &linprocfs_domodules,
2121 	    NULL, NULL, NULL, PFS_RD);
2122 	pfs_create_file(root, "mounts", &linprocfs_domtab,
2123 	    NULL, NULL, NULL, PFS_RD);
2124 	pfs_create_file(root, "mtab", &linprocfs_domtab,
2125 	    NULL, NULL, NULL, PFS_RD);
2126 	pfs_create_file(root, "partitions", &linprocfs_dopartitions,
2127 	    NULL, NULL, NULL, PFS_RD);
2128 	pfs_create_link(root, "self", &procfs_docurproc,
2129 	    NULL, NULL, NULL, 0);
2130 	pfs_create_file(root, "stat", &linprocfs_dostat,
2131 	    NULL, NULL, NULL, PFS_RD);
2132 	pfs_create_file(root, "swaps", &linprocfs_doswaps,
2133 	    NULL, NULL, NULL, PFS_RD);
2134 	pfs_create_file(root, "uptime", &linprocfs_douptime,
2135 	    NULL, NULL, NULL, PFS_RD);
2136 	pfs_create_file(root, "version", &linprocfs_doversion,
2137 	    NULL, NULL, NULL, PFS_RD);
2138 
2139 	/* /proc/bus/... */
2140 	dir = pfs_create_dir(root, "bus", NULL, NULL, NULL, 0);
2141 	dir = pfs_create_dir(dir, "pci", NULL, NULL, NULL, 0);
2142 	dir = pfs_create_dir(dir, "devices", NULL, NULL, NULL, 0);
2143 
2144 	/* /proc/net/... */
2145 	dir = pfs_create_dir(root, "net", NULL, NULL, NULL, 0);
2146 	pfs_create_file(dir, "dev", &linprocfs_donetdev,
2147 	    NULL, NULL, NULL, PFS_RD);
2148 	pfs_create_file(dir, "route", &linprocfs_donetroute,
2149 	    NULL, NULL, NULL, PFS_RD);
2150 
2151 	/* /proc/<pid>/... */
2152 	dir = pfs_create_dir(root, "pid", NULL, NULL, NULL, PFS_PROCDEP);
2153 	pfs_create_file(dir, "cmdline", &linprocfs_doproccmdline,
2154 	    NULL, NULL, NULL, PFS_RD);
2155 	pfs_create_link(dir, "cwd", &linprocfs_doproccwd,
2156 	    NULL, NULL, NULL, 0);
2157 	pfs_create_file(dir, "environ", &linprocfs_doprocenviron,
2158 	    NULL, &procfs_candebug, NULL, PFS_RD);
2159 	pfs_create_link(dir, "exe", &procfs_doprocfile,
2160 	    NULL, &procfs_notsystem, NULL, 0);
2161 	pfs_create_file(dir, "maps", &linprocfs_doprocmaps,
2162 	    NULL, NULL, NULL, PFS_RD | PFS_AUTODRAIN);
2163 	pfs_create_file(dir, "mem", &linprocfs_doprocmem,
2164 	    procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
2165 	pfs_create_file(dir, "mountinfo", &linprocfs_doprocmountinfo,
2166 	    NULL, NULL, NULL, PFS_RD);
2167 	pfs_create_file(dir, "mounts", &linprocfs_domtab,
2168 	    NULL, NULL, NULL, PFS_RD);
2169 	pfs_create_link(dir, "root", &linprocfs_doprocroot,
2170 	    NULL, NULL, NULL, 0);
2171 	pfs_create_file(dir, "stat", &linprocfs_doprocstat,
2172 	    NULL, NULL, NULL, PFS_RD);
2173 	pfs_create_file(dir, "statm", &linprocfs_doprocstatm,
2174 	    NULL, NULL, NULL, PFS_RD);
2175 	pfs_create_file(dir, "status", &linprocfs_doprocstatus,
2176 	    NULL, NULL, NULL, PFS_RD);
2177 	pfs_create_link(dir, "fd", &linprocfs_dofdescfs,
2178 	    NULL, NULL, NULL, 0);
2179 	pfs_create_file(dir, "auxv", &linprocfs_doauxv,
2180 	    NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD);
2181 	pfs_create_file(dir, "limits", &linprocfs_doproclimits,
2182 	    NULL, NULL, NULL, PFS_RD);
2183 	pfs_create_file(dir, "oom_score_adj", &linprocfs_do_oom_score_adj,
2184 	    procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR);
2185 
2186 	/* /proc/<pid>/task/... */
2187 	dir = pfs_create_dir(dir, "task", linprocfs_dotaskattr, NULL, NULL, 0);
2188 	pfs_create_file(dir, ".dummy", &linprocfs_dotaskdummy,
2189 	    NULL, NULL, NULL, PFS_RD);
2190 
2191 	/* /proc/scsi/... */
2192 	dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0);
2193 	pfs_create_file(dir, "device_info", &linprocfs_doscsidevinfo,
2194 	    NULL, NULL, NULL, PFS_RD);
2195 	pfs_create_file(dir, "scsi", &linprocfs_doscsiscsi,
2196 	    NULL, NULL, NULL, PFS_RD);
2197 
2198 	/* /proc/sys/... */
2199 	sys = pfs_create_dir(root, "sys", NULL, NULL, NULL, 0);
2200 
2201 	/* /proc/sys/kernel/... */
2202 	dir = pfs_create_dir(sys, "kernel", NULL, NULL, NULL, 0);
2203 	pfs_create_file(dir, "osrelease", &linprocfs_doosrelease,
2204 	    NULL, NULL, NULL, PFS_RD);
2205 	pfs_create_file(dir, "ostype", &linprocfs_doostype,
2206 	    NULL, NULL, NULL, PFS_RD);
2207 	pfs_create_file(dir, "version", &linprocfs_doosbuild,
2208 	    NULL, NULL, NULL, PFS_RD);
2209 	pfs_create_file(dir, "msgmax", &linprocfs_domsgmax,
2210 	    NULL, NULL, NULL, PFS_RD);
2211 	pfs_create_file(dir, "msgmni", &linprocfs_domsgmni,
2212 	    NULL, NULL, NULL, PFS_RD);
2213 	pfs_create_file(dir, "msgmnb", &linprocfs_domsgmnb,
2214 	    NULL, NULL, NULL, PFS_RD);
2215 	pfs_create_file(dir, "ngroups_max", &linprocfs_dongroups_max,
2216 	    NULL, NULL, NULL, PFS_RD);
2217 	pfs_create_file(dir, "pid_max", &linprocfs_dopid_max,
2218 	    NULL, NULL, NULL, PFS_RD);
2219 	pfs_create_file(dir, "sem", &linprocfs_dosem,
2220 	    NULL, NULL, NULL, PFS_RD);
2221 	pfs_create_file(dir, "shmall", &linprocfs_doshmall,
2222 	    NULL, NULL, NULL, PFS_RD);
2223 	pfs_create_file(dir, "shmmax", &linprocfs_doshmmax,
2224 	    NULL, NULL, NULL, PFS_RD);
2225 	pfs_create_file(dir, "shmmni", &linprocfs_doshmmni,
2226 	    NULL, NULL, NULL, PFS_RD);
2227 	pfs_create_file(dir, "tainted", &linprocfs_dotainted,
2228 	    NULL, NULL, NULL, PFS_RD);
2229 
2230 	/* /proc/sys/kernel/random/... */
2231 	dir = pfs_create_dir(dir, "random", NULL, NULL, NULL, 0);
2232 	pfs_create_file(dir, "uuid", &linprocfs_douuid,
2233 	    NULL, NULL, NULL, PFS_RD);
2234 	pfs_create_file(dir, "boot_id", &linprocfs_doboot_id,
2235 	    NULL, NULL, NULL, PFS_RD);
2236 
2237 	/* /proc/sys/vm/.... */
2238 	dir = pfs_create_dir(sys, "vm", NULL, NULL, NULL, 0);
2239 	pfs_create_file(dir, "min_free_kbytes", &linprocfs_dominfree,
2240 	    NULL, NULL, NULL, PFS_RD);
2241 	pfs_create_file(dir, "max_map_count", &linprocfs_domax_map_cnt,
2242 	    NULL, NULL, NULL, PFS_RD);
2243 
2244 	return (0);
2245 }
2246 
2247 /*
2248  * Destructor
2249  */
2250 static int
2251 linprocfs_uninit(PFS_INIT_ARGS)
2252 {
2253 
2254 	/* nothing to do, pseudofs will GC */
2255 	return (0);
2256 }
2257 
2258 PSEUDOFS(linprocfs, 1, VFCF_JAIL);
2259 #if defined(__aarch64__) || defined(__amd64__)
2260 MODULE_DEPEND(linprocfs, linux_common, 1, 1, 1);
2261 #else
2262 MODULE_DEPEND(linprocfs, linux, 1, 1, 1);
2263 #endif
2264 MODULE_DEPEND(linprocfs, procfs, 1, 1, 1);
2265 MODULE_DEPEND(linprocfs, sysvmsg, 1, 1, 1);
2266 MODULE_DEPEND(linprocfs, sysvsem, 1, 1, 1);
2267 MODULE_DEPEND(linprocfs, sysvshm, 1, 1, 1);
2268