1 /*
2 * collectd - src/dcpmm.c
3 * MIT License
4 *
5 * Copyright (C) 2019 Intel Corporation. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Hari TG <hari.tg at intel.com>
27 */
28
29 #include "collectd.h"
30 #include "utils/common/common.h"
31
32 #include "pmw_api.h"
33
34 #define PLUGIN_NAME "dcpmm"
35 #define PRINT_BOOL(s) (s ? "true" : "false")
36
37 int num_nvdimms;
38 int skip_stop = 0;
39 bool enable_dispatch_all = false;
40 cdtime_t interval = 0;
41 PMWATCH_OP_BUF pmw_output_buf;
42 PMWATCH_CONFIG_NODE pmwatch_config;
43
add_metric(const char * plugin_inst,const char * type,const char * type_inst,gauge_t value)44 static void add_metric(const char *plugin_inst, const char *type,
45 const char *type_inst, gauge_t value) {
46 value_list_t vl = VALUE_LIST_INIT;
47
48 vl.values = &(value_t){.gauge = value};
49 vl.values_len = 1;
50
51 sstrncpy(vl.plugin, PLUGIN_NAME, sizeof(vl.plugin));
52 sstrncpy(vl.host, hostname_g, sizeof(vl.host));
53 sstrncpy(vl.type, type, sizeof(vl.type));
54
55 if (plugin_inst != NULL) {
56 sstrncpy(vl.plugin_instance, plugin_inst, sizeof(vl.plugin_instance));
57 }
58
59 if (type_inst != NULL) {
60 sstrncpy(vl.type_instance, type_inst, sizeof(vl.type_instance));
61 }
62
63 plugin_dispatch_values(&vl);
64
65 #if COLLECT_DEBUG
66
67 notification_t n = {
68 .severity = NOTIF_OKAY, .time = cdtime(), .plugin = PLUGIN_NAME};
69
70 if (strncmp(type_inst, "read_hit_ratio", strlen("read_hit_ratio")) == 0 ||
71 strncmp(type_inst, "write_hit_ratio", strlen("write_hit_ratio")) == 0 ||
72 strncmp(type_inst, "media_temperature", strlen("media_temperature")) ==
73 0 ||
74 strncmp(type_inst, "controller_temperature",
75 strlen("controller_temperature")) == 0) {
76 snprintf(n.message, sizeof(n.message), "Value: %0.2f", value);
77 } else {
78 snprintf(n.message, sizeof(n.message), "Value: %0.0f", value);
79 }
80 sstrncpy(n.host, hostname_g, sizeof(n.host));
81 sstrncpy(n.type, type, sizeof(n.type));
82 sstrncpy(n.type_instance, type_inst, sizeof(n.type_instance));
83 sstrncpy(n.plugin_instance, plugin_inst, sizeof(n.plugin_instance));
84
85 plugin_dispatch_notification(&n);
86
87 #endif /* COLLECT_DEBUG */
88
89 return;
90 } /* void add_metric */
91
dcpmm_read(user_data_t * ud)92 static int dcpmm_read(__attribute__((unused)) user_data_t *ud) {
93 DEBUG(PLUGIN_NAME ": %s:%d", __FUNCTION__, __LINE__);
94
95 int i, ret = 0;
96 char dimm_num[16];
97
98 ret = PMWAPIRead(&pmw_output_buf);
99 if (ret != 0) {
100 ERROR(PLUGIN_NAME ": Failed to read data from the collection.");
101
102 return ret;
103 }
104
105 for (i = 0; i < num_nvdimms; i++) {
106 snprintf(dimm_num, sizeof(dimm_num), "%d", i);
107
108 if (pmwatch_config.collect_perf_metrics) {
109 add_metric(dimm_num, "timestamp", "epoch",
110 PMWATCH_OP_BUF_EPOCH(&pmw_output_buf[i]));
111 add_metric(dimm_num, "timestamp", "tsc_cycles",
112 PMWATCH_OP_BUF_TIMESTAMP(&pmw_output_buf[i]));
113 add_metric(dimm_num, "media", "total_bytes_read",
114 PMWATCH_OP_BUF_TOTAL_BYTES_READ(&pmw_output_buf[i]));
115 add_metric(dimm_num, "media", "total_bytes_written",
116 PMWATCH_OP_BUF_TOTAL_BYTES_WRITTEN(&pmw_output_buf[i]));
117 add_metric(dimm_num, "media", "read_64B_ops_rcvd",
118 PMWATCH_OP_BUF_BYTES_READ(&pmw_output_buf[i]));
119 add_metric(dimm_num, "media", "write_64B_ops_rcvd",
120 PMWATCH_OP_BUF_BYTES_WRITTEN(&pmw_output_buf[i]));
121 add_metric(dimm_num, "media", "media_read_ops",
122 PMWATCH_OP_BUF_MEDIA_READ(&pmw_output_buf[i]));
123 add_metric(dimm_num, "media", "media_write_ops",
124 PMWATCH_OP_BUF_MEDIA_WRITE(&pmw_output_buf[i]));
125 add_metric(dimm_num, "controller", "host_reads",
126 PMWATCH_OP_BUF_HOST_READS(&pmw_output_buf[i]));
127 add_metric(dimm_num, "controller", "host_writes",
128 PMWATCH_OP_BUF_HOST_WRITES(&pmw_output_buf[i]));
129 add_metric(dimm_num, "buffer", "read_hit_ratio",
130 PMWATCH_OP_BUF_READ_HIT_RATIO(&pmw_output_buf[i]));
131 add_metric(dimm_num, "buffer", "write_hit_ratio",
132 PMWATCH_OP_BUF_WRITE_HIT_RATIO(&pmw_output_buf[i]));
133 }
134
135 if (pmwatch_config.collect_health) {
136 if (pmwatch_config.collect_perf_metrics && !enable_dispatch_all) {
137 continue;
138 }
139 add_metric(dimm_num, "timestamp", "epoch",
140 PMWATCH_OP_BUF_EPOCH(&pmw_output_buf[i]));
141 add_metric(dimm_num, "timestamp", "tsc_cycles",
142 PMWATCH_OP_BUF_TIMESTAMP(&pmw_output_buf[i]));
143 add_metric(dimm_num, "health", "health_status",
144 PMWATCH_OP_BUF_HEALTH_STATUS(&pmw_output_buf[i]));
145 add_metric(dimm_num, "health", "lifespan_remaining",
146 PMWATCH_OP_BUF_PERCENTAGE_REMAINING(&pmw_output_buf[i]));
147 add_metric(dimm_num, "health", "lifespan_used",
148 PMWATCH_OP_BUF_PERCENTAGE_USED(&pmw_output_buf[i]));
149 add_metric(dimm_num, "health", "power_on_time",
150 PMWATCH_OP_POWER_ON_TIME(&pmw_output_buf[i]));
151 add_metric(dimm_num, "health", "uptime",
152 PMWATCH_OP_BUF_UPTIME(&pmw_output_buf[i]));
153 add_metric(dimm_num, "health", "last_shutdown_time",
154 PMWATCH_OP_BUF_LAST_SHUTDOWN_TIME(&pmw_output_buf[i]));
155 add_metric(dimm_num, "health", "media_temperature",
156 PMWATCH_OP_BUF_MEDIA_TEMP(&pmw_output_buf[i]));
157 add_metric(dimm_num, "health", "controller_temperature",
158 PMWATCH_OP_BUF_CONTROLLER_TEMP(&pmw_output_buf[i]));
159 add_metric(dimm_num, "health", "max_media_temperature",
160 PMWATCH_OP_BUF_MAX_MEDIA_TEMP(&pmw_output_buf[i]));
161 add_metric(dimm_num, "health", "max_controller_temperature",
162 PMWATCH_OP_BUF_MAX_CONTROLLER_TEMP(&pmw_output_buf[i]));
163 }
164 }
165
166 return 0;
167 } /* int dcpmm_read */
168
dcpmm_stop(void)169 static int dcpmm_stop(void) {
170 DEBUG(PLUGIN_NAME ": %s:%d", __FUNCTION__, __LINE__);
171
172 int ret = 0;
173
174 if (skip_stop) {
175 DEBUG(PLUGIN_NAME ": %s:%d skipping stop function", __FUNCTION__, __LINE__);
176
177 return ret;
178 }
179
180 ret = PMWAPIStop();
181 if (ret != 0) {
182 ERROR(PLUGIN_NAME ": Failed to stop the collection.");
183 }
184
185 return ret;
186 } /* int dcpmm_stop */
187
dcpmm_shutdown(void)188 static int dcpmm_shutdown(void) {
189 DEBUG(PLUGIN_NAME ": %s:%d", __FUNCTION__, __LINE__);
190
191 int ret = 0;
192
193 free(pmw_output_buf);
194
195 ret = dcpmm_stop();
196
197 return ret;
198 } /* int dcpmm_shutdown */
199
dcpmm_init(void)200 static int dcpmm_init(void) {
201 DEBUG(PLUGIN_NAME ": %s:%d", __FUNCTION__, __LINE__);
202
203 int ret = 0;
204
205 ret = PMWAPIGetDIMMCount(&num_nvdimms);
206 if (ret != 0) {
207 ERROR(PLUGIN_NAME
208 ": Failed to obtain count of Intel(R) Optane DCPMM. "
209 "A common cause for this is collectd running without "
210 "root privileges. Ensure that collectd is running with "
211 "root privileges. Also, make sure that Intel(R) Optane DC "
212 "Persistent Memory is available in the system.");
213 skip_stop = 1;
214
215 return ret;
216 }
217
218 ret = PMWAPIStart(pmwatch_config);
219 if (ret != 0) {
220 ERROR(PLUGIN_NAME ": Failed to start the collection. "
221 "A common cause for this is collectd running without "
222 "root privileges. Ensure that collectd is running with "
223 "root privileges.");
224 skip_stop = 1;
225
226 return ret;
227 }
228
229 pmw_output_buf =
230 (PMWATCH_OP_BUF)calloc(num_nvdimms, sizeof(PMWATCH_OP_BUF_NODE));
231 if (pmw_output_buf == NULL) {
232 ERROR(PLUGIN_NAME ": Memory allocation for output buffer failed.");
233 dcpmm_stop();
234 skip_stop = 1;
235 ret = 1;
236 }
237
238 return ret;
239 } /* int dcpmm_init */
240
dcpmm_config(oconfig_item_t * ci)241 static int dcpmm_config(oconfig_item_t *ci) {
242 DEBUG(PLUGIN_NAME ": %s:%d", __FUNCTION__, __LINE__);
243
244 int ret = 0;
245
246 for (int i = 0; i < ci->children_num; i++) {
247 oconfig_item_t *child = ci->children + i;
248
249 if (strncasecmp("Interval", child->key, strlen("Interval")) == 0) {
250 ret = cf_util_get_cdtime(child, &interval);
251 if (!ret) {
252 ret = cf_util_get_double(child, &pmwatch_config.interval);
253 }
254 } else if (strncasecmp("CollectHealth", child->key,
255 strlen("CollectHealth")) == 0) {
256 ret = cf_util_get_boolean(child, &pmwatch_config.collect_health);
257
258 } else if (strncasecmp("CollectPerfMetrics", child->key,
259 strlen("CollectPerfMetrics")) == 0) {
260 ret = cf_util_get_boolean(child, &pmwatch_config.collect_perf_metrics);
261 } else if (strncasecmp("EnableDispatchAll", child->key,
262 strlen("EnableDispatchAll")) == 0) {
263 ret = cf_util_get_boolean(child, &enable_dispatch_all);
264 } else {
265 ERROR(PLUGIN_NAME ": Unkown configuration parameter %s.", child->key);
266 ret = 1;
267 }
268
269 if (ret != 0) {
270 ERROR(PLUGIN_NAME ": Failed to parse configuration parameters");
271 return ret;
272 }
273 }
274
275 DEBUG("%s Config: Interval %.2f ; CollectHealth %s ; CollectdPerfMetrics %s "
276 "; EnableDispatchAll %s",
277 PLUGIN_NAME, pmwatch_config.interval,
278 PRINT_BOOL(pmwatch_config.collect_health),
279 PRINT_BOOL(pmwatch_config.collect_perf_metrics),
280 PRINT_BOOL(enable_dispatch_all));
281
282 if (!pmwatch_config.collect_health && !pmwatch_config.collect_perf_metrics) {
283 ERROR(PLUGIN_NAME ": CollectdHealth and CollectPerfMetrics are disabled. "
284 "Enable atleast one.");
285 return 1;
286 }
287
288 plugin_register_complex_read(NULL, PLUGIN_NAME, dcpmm_read, interval, NULL);
289
290 return 0;
291 } /* int dcpmm_config */
292
module_register(void)293 void module_register(void) {
294 plugin_register_init(PLUGIN_NAME, dcpmm_init);
295 plugin_register_complex_config(PLUGIN_NAME, dcpmm_config);
296 plugin_register_shutdown(PLUGIN_NAME, dcpmm_shutdown);
297 } /* void module_register */
298