1 /*-
2  * Copyright (c) 2005-2006 The FreeBSD Project
3  * All rights reserved.
4  *
5  * Author: Victor Cruceru <soc-victor@freebsd.org>
6  *
7  * Redistribution of this software and documentation and use in source and
8  * binary forms, with or without modification, are permitted provided that
9  * the following conditions are met:
10  *
11  * 1. Redistributions of source code or documentation must retain the above
12  *    copyright notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 /*
33  * Host Resources MIB for SNMPd. Implementation for hrProcessorTable
34  */
35 
36 #include <sys/param.h>
37 #include <sys/sysctl.h>
38 #include <sys/user.h>
39 
40 #include <assert.h>
41 #include <math.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <syslog.h>
45 
46 #include "hostres_snmp.h"
47 #include "hostres_oid.h"
48 #include "hostres_tree.h"
49 
50 /*
51  * This structure is used to hold a SNMP table entry
52  * for HOST-RESOURCES-MIB's hrProcessorTable.
53  * Note that index is external being allocated & maintained
54  * by the hrDeviceTable code..
55  */
56 struct processor_entry {
57 	int32_t		index;
58 	const struct asn_oid *frwId;
59 	int32_t		load;
60 	TAILQ_ENTRY(processor_entry) link;
61 	u_char		cpu_no;		/* which cpu, counted from 0 */
62 	pid_t		idle_pid;	/* PID of idle process for this CPU */
63 
64 	/* the samples from the last minute, as required by MIB */
65 	double		samples[MAX_CPU_SAMPLES];
66 
67 	/* current sample to fill in next time, must be < MAX_CPU_SAMPLES */
68 	uint32_t	cur_sample_idx;
69 
70 	/* number of useful samples */
71 	uint32_t	sample_cnt;
72 };
73 TAILQ_HEAD(processor_tbl, processor_entry);
74 
75 /* the head of the list with hrDeviceTable's entries */
76 static struct processor_tbl processor_tbl =
77     TAILQ_HEAD_INITIALIZER(processor_tbl);
78 
79 /* number of processors in dev tbl */
80 static int32_t detected_processor_count;
81 
82 /* sysctlbyname(hw.ncpu) */
83 static int hw_ncpu;
84 
85 /* sysctlbyname(kern.{ccpu,fscale}) */
86 static fixpt_t ccpu;
87 static int fscale;
88 
89 /* tick of PDU where we have refreshed the processor table last */
90 static uint64_t proctbl_tick;
91 
92 /* periodic timer used to get cpu load stats */
93 static void *cpus_load_timer;
94 
95 /*
96  * Average the samples. The entire algorithm seems to be wrong XXX.
97  */
98 static int
99 get_avg_load(struct processor_entry *e)
100 {
101 	u_int i;
102 	double sum = 0.0;
103 
104 	assert(e != NULL);
105 
106 	if (e->sample_cnt == 0)
107 		return (0);
108 
109 	for (i = 0; i < e->sample_cnt; i++)
110 		sum += e->samples[i];
111 
112 	return ((int)floor((double)sum/(double)e->sample_cnt));
113 }
114 
115 /*
116  * Stolen from /usr/src/bin/ps/print.c. The idle process should never
117  * be swapped out :-)
118  */
119 static double
120 processor_getpcpu(struct kinfo_proc *ki_p)
121 {
122 
123 	if (ccpu == 0 || fscale == 0)
124 		return (0.0);
125 
126 #define	fxtofl(fixpt) ((double)(fixpt) / fscale)
127 	return (100.0 * fxtofl(ki_p->ki_pctcpu) /
128 	    (1.0 - exp(ki_p->ki_swtime * log(fxtofl(ccpu)))));
129 }
130 
131 /**
132  * Save a new sample
133  */
134 static void
135 save_sample(struct processor_entry *e, struct kinfo_proc *kp)
136 {
137 
138 	e->samples[e->cur_sample_idx] = 100.0 - processor_getpcpu(kp);
139 	e->load = get_avg_load(e);
140 	e->cur_sample_idx = (e->cur_sample_idx + 1) % MAX_CPU_SAMPLES;
141 
142 	if (++e->sample_cnt > MAX_CPU_SAMPLES)
143 		e->sample_cnt = MAX_CPU_SAMPLES;
144 }
145 
146 /**
147  * Create a new entry into the processor table.
148  */
149 static struct processor_entry *
150 proc_create_entry(u_int cpu_no, struct device_map_entry *map)
151 {
152 	struct device_entry *dev;
153 	struct processor_entry *entry;
154 	char name[128];
155 
156 	/*
157 	 * If there is no map entry create one by creating a device table
158 	 * entry.
159 	 */
160 	if (map == NULL) {
161 		snprintf(name, sizeof(name), "cpu%u", cpu_no);
162 		if ((dev = device_entry_create(name, "", "")) == NULL)
163 			return (NULL);
164 		dev->flags |= HR_DEVICE_IMMUTABLE;
165 		STAILQ_FOREACH(map, &device_map, link)
166 			if (strcmp(map->name_key, name) == 0)
167 				break;
168 		if (map == NULL)
169 			abort();
170 	}
171 
172 	if ((entry = malloc(sizeof(*entry))) == NULL) {
173 		syslog(LOG_ERR, "hrProcessorTable: %s malloc "
174 		    "failed: %m", __func__);
175 		return (NULL);
176 	}
177 	memset(entry, 0, sizeof(*entry));
178 
179 	entry->index = map->hrIndex;
180 	entry->load = 0;
181 	entry->cpu_no = (u_char)cpu_no;
182 	entry->idle_pid = 0;
183 	entry->frwId = &oid_zeroDotZero; /* unknown id FIXME */
184 
185 	INSERT_OBJECT_INT(entry, &processor_tbl);
186 
187 	HRDBG("CPU %d added with SNMP index=%d",
188 	    entry->cpu_no, entry->index);
189 
190 	return (entry);
191 }
192 
193 /**
194  * Get the PIDs for the idle processes of the CPUs.
195  */
196 static void
197 processor_get_pids(void)
198 {
199 	struct kinfo_proc *plist, *kp;
200 	int i;
201 	int nproc;
202 	int cpu;
203 	int nchars;
204 	struct processor_entry *entry;
205 
206 	plist = kvm_getprocs(hr_kd, KERN_PROC_ALL, 0, &nproc);
207 	if (plist == NULL || nproc < 0) {
208 		syslog(LOG_ERR, "hrProcessor: kvm_getprocs() failed: %m");
209 		return;
210 	}
211 
212 	for (i = 0, kp = plist; i < nproc; i++, kp++) {
213 		if (!IS_KERNPROC(kp))
214 			continue;
215 
216 		if (strcmp(kp->ki_comm, "idle") == 0) {
217 			/* single processor system */
218 			cpu = 0;
219 		} else if (sscanf(kp->ki_comm, "idle: cpu%d%n", &cpu, &nchars)
220 		    == 1 && (u_int)nchars == strlen(kp->ki_comm)) {
221 			/* MP system */
222 		} else
223 			/* not an idle process */
224 			continue;
225 
226 		HRDBG("'%s' proc with pid %d is on CPU #%d (last on #%d)",
227 		    kp->ki_comm, kp->ki_pid, kp->ki_oncpu, kp->ki_lastcpu);
228 
229 		TAILQ_FOREACH(entry, &processor_tbl, link)
230 			if (entry->cpu_no == kp->ki_lastcpu)
231 				break;
232 
233 		if (entry == NULL) {
234 			/* create entry on non-ACPI systems */
235 			if ((entry = proc_create_entry(cpu, NULL)) == NULL)
236 				continue;
237 
238 			detected_processor_count++;
239 		}
240 
241 		entry->idle_pid = kp->ki_pid;
242 		HRDBG("CPU no. %d with SNMP index=%d has idle PID %d",
243 		    entry->cpu_no, entry->index, entry->idle_pid);
244 
245 		save_sample(entry, kp);
246 	}
247 }
248 
249 /**
250  * Scan the device map table for CPUs and create an entry into the
251  * processor table for each CPU. Then fetch the idle PIDs for all CPUs.
252  */
253 static void
254 create_proc_table(void)
255 {
256 	struct device_map_entry *map;
257 	struct processor_entry *entry;
258 	int cpu_no;
259 
260 	detected_processor_count = 0;
261 
262 	/*
263 	 * Because hrProcessorTable depends on hrDeviceTable,
264 	 * the device detection must be performed at this point.
265 	 * If not, no entries will be present in the hrProcessor Table.
266 	 *
267 	 * For non-ACPI system the processors are not in the device table,
268 	 * therefor insert them when getting the idle pids. XXX
269 	 */
270 	STAILQ_FOREACH(map, &device_map, link)
271 		if (strncmp(map->name_key, "cpu", strlen("cpu")) == 0 &&
272 		    strstr(map->location_key, ".CPU") != NULL) {
273 			if (sscanf(map->name_key,"cpu%d", &cpu_no) != 1) {
274 				syslog(LOG_ERR, "hrProcessorTable: Failed to "
275 				    "get cpu no. from device named '%s'",
276 				    map->name_key);
277 				continue;
278 			}
279 
280 			if ((entry = proc_create_entry(cpu_no, map)) == NULL)
281 				continue;
282 
283 			detected_processor_count++;
284 		}
285 
286 	HRDBG("%d CPUs detected", detected_processor_count);
287 
288 	processor_get_pids();
289 }
290 
291 /**
292  * Free the processor table
293  */
294 static void
295 free_proc_table(void)
296 {
297 	struct processor_entry *n1;
298 
299 	while ((n1 = TAILQ_FIRST(&processor_tbl)) != NULL) {
300 		TAILQ_REMOVE(&processor_tbl, n1, link);
301 		free(n1);
302 		detected_processor_count--;
303 	}
304 
305 	assert(detected_processor_count == 0);
306 	detected_processor_count = 0;
307 }
308 
309 /**
310  * Init the things for hrProcessorTable.
311  * Scan the device table for processor entries.
312  */
313 void
314 init_processor_tbl(void)
315 {
316 	size_t len;
317 
318 	/* get various parameters from the kernel */
319 	len = sizeof(ccpu);
320 	if (sysctlbyname("kern.ccpu", &ccpu, &len, NULL, 0) == -1) {
321 		syslog(LOG_ERR, "hrProcessorTable: sysctl(kern.ccpu) failed");
322 		ccpu = 0;
323 	}
324 
325 	len = sizeof(fscale);
326 	if (sysctlbyname("kern.fscale", &fscale, &len, NULL, 0) == -1) {
327 		syslog(LOG_ERR, "hrProcessorTable: sysctl(kern.fscale) failed");
328 		fscale = 0;
329 	}
330 
331 	/* create the initial processor table */
332 	create_proc_table();
333 }
334 
335 /**
336  * Finalization routine for hrProcessorTable.
337  * It destroys the lists and frees any allocated heap memory.
338  */
339 void
340 fini_processor_tbl(void)
341 {
342 
343 	if (cpus_load_timer != NULL) {
344 		timer_stop(cpus_load_timer);
345 		cpus_load_timer = NULL;
346 	}
347 
348 	free_proc_table();
349 }
350 
351 /**
352  * Make sure that the number of processors announced by the kernel hw.ncpu
353  * is equal to the number of processors we have found in the device table.
354  * If they differ rescan the device table.
355  */
356 static void
357 processor_refill_tbl(void)
358 {
359 
360 	HRDBG("hw_ncpu=%d detected_processor_count=%d", hw_ncpu,
361 	    detected_processor_count);
362 
363 	if (hw_ncpu <= 0) {
364 		size_t size = sizeof(hw_ncpu);
365 
366 		if (sysctlbyname("hw.ncpu", &hw_ncpu, &size, NULL, 0) == -1 ||
367 		    size != sizeof(hw_ncpu)) {
368 			syslog(LOG_ERR, "hrProcessorTable: "
369 			    "sysctl(hw.ncpu) failed: %m");
370 			hw_ncpu = 0;
371 			return;
372 		}
373 	}
374 
375 	if (hw_ncpu != detected_processor_count) {
376 		free_proc_table();
377 		create_proc_table();
378 	}
379 }
380 
381 /**
382  * Refresh all values in the processor table. We call this once for
383  * every PDU that accesses the table.
384  */
385 static void
386 refresh_processor_tbl(void)
387 {
388 	struct processor_entry *entry;
389 	int need_pids;
390 	struct kinfo_proc *plist;
391 	int nproc;
392 
393 	processor_refill_tbl();
394 
395 	need_pids = 0;
396 	TAILQ_FOREACH(entry, &processor_tbl, link) {
397 		if (entry->idle_pid <= 0) {
398 			need_pids = 1;
399 			continue;
400 		}
401 
402 		assert(hr_kd != NULL);
403 
404 		plist = kvm_getprocs(hr_kd, KERN_PROC_PID,
405 		    entry->idle_pid, &nproc);
406 		if (plist == NULL || nproc != 1) {
407 			syslog(LOG_ERR, "%s: missing item with "
408 			    "PID = %d for CPU #%d\n ", __func__,
409 			    entry->idle_pid, entry->cpu_no);
410 			need_pids = 1;
411 			continue;
412 		}
413 		save_sample(entry, plist);
414 	}
415 
416 	if (need_pids == 1)
417 		processor_get_pids();
418 
419 	proctbl_tick = this_tick;
420 }
421 
422 /**
423  * This function is called MAX_CPU_SAMPLES times per minute to collect the
424  * CPU load.
425  */
426 static void
427 get_cpus_samples(void *arg __unused)
428 {
429 
430 	HRDBG("[%llu] ENTER", (unsigned long long)get_ticks());
431 	refresh_processor_tbl();
432 	HRDBG("[%llu] EXIT", (unsigned long long)get_ticks());
433 }
434 
435 /**
436  * Called to start this table. We need to start the periodic idle
437  * time collection.
438  */
439 void
440 start_processor_tbl(struct lmodule *mod)
441 {
442 
443 	/*
444 	 * Start the cpu stats collector
445 	 * The semantics of timer_start parameters is in "SNMP ticks";
446 	 * we have 100 "SNMP ticks" per second, thus we are trying below
447 	 * to get MAX_CPU_SAMPLES per minute
448 	 */
449 	cpus_load_timer = timer_start_repeat(100, 100 * 60 / MAX_CPU_SAMPLES,
450 	    get_cpus_samples, NULL, mod);
451 }
452 
453 /**
454  * Access routine for the processor table.
455  */
456 int
457 op_hrProcessorTable(struct snmp_context *ctx __unused,
458     struct snmp_value *value, u_int sub, u_int iidx __unused,
459     enum snmp_op curr_op)
460 {
461 	struct processor_entry *entry;
462 
463 	if (this_tick != proctbl_tick)
464 		refresh_processor_tbl();
465 
466 	switch (curr_op) {
467 
468 	case SNMP_OP_GETNEXT:
469 		if ((entry = NEXT_OBJECT_INT(&processor_tbl,
470 		    &value->var, sub)) == NULL)
471 			return (SNMP_ERR_NOSUCHNAME);
472 		value->var.len = sub + 1;
473 		value->var.subs[sub] = entry->index;
474 		goto get;
475 
476 	case SNMP_OP_GET:
477 		if ((entry = FIND_OBJECT_INT(&processor_tbl,
478 		    &value->var, sub)) == NULL)
479 			return (SNMP_ERR_NOSUCHNAME);
480 		goto get;
481 
482 	case SNMP_OP_SET:
483 		if ((entry = FIND_OBJECT_INT(&processor_tbl,
484 		    &value->var, sub)) == NULL)
485 			return (SNMP_ERR_NO_CREATION);
486 		return (SNMP_ERR_NOT_WRITEABLE);
487 
488 	case SNMP_OP_ROLLBACK:
489 	case SNMP_OP_COMMIT:
490 		abort();
491 	}
492 	abort();
493 
494   get:
495 	switch (value->var.subs[sub - 1]) {
496 
497 	case LEAF_hrProcessorFrwID:
498 		assert(entry->frwId != NULL);
499 		value->v.oid = *entry->frwId;
500 		return (SNMP_ERR_NOERROR);
501 
502 	case LEAF_hrProcessorLoad:
503 		value->v.integer = entry->load;
504 		return (SNMP_ERR_NOERROR);
505 	}
506 	abort();
507 }
508