1 /*****************************************************************************\
2 * slurm_ext_sensors.c - implementation-independent external sensors plugin
3 * definitions
4 *****************************************************************************
5 * Copyright (C) 2013 Bull-HN-PHX.
6 * Written by Bull-HN-PHX/Martin Perry,
7 *
8 * This file is part of Slurm, a resource management program.
9 * For details, see <https://slurm.schedmd.com/>.
10 * Please also read the included file: DISCLAIMER.
11 *
12 * Slurm is free software; you can redistribute it and/or modify it under
13 * the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 2 of the License, or (at your option)
15 * any later version.
16 *
17 * In addition, as a special exception, the copyright holders give permission
18 * to link the code of portions of this program with the OpenSSL library under
19 * certain conditions as described in each individual source file, and
20 * distribute linked combinations including the two. You must obey the GNU
21 * General Public License in all respects for all of the code used other than
22 * OpenSSL. If you modify file(s) with this exception, you may extend this
23 * exception to your version of the file(s), but you are not obligated to do
24 * so. If you do not wish to do so, delete this exception statement from your
25 * version. If you delete this exception statement from all source files in
26 * the program, then also delete it here.
27 *
28 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
29 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
31 * details.
32 *
33 * You should have received a copy of the GNU General Public License along
34 * with Slurm; if not, write to the Free Software Foundation, Inc.,
35 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
36 \*****************************************************************************/
37
38 #include <pwd.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <time.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44 #include <unistd.h>
45
46 #include "src/common/macros.h"
47 #include "src/common/parse_config.h"
48 #include "src/common/plugin.h"
49 #include "src/common/plugrack.h"
50 #include "src/common/read_config.h"
51 #include "src/slurmctld/slurmctld.h"
52 #include "src/common/slurm_ext_sensors.h"
53 #include "src/common/slurm_protocol_api.h"
54 #include "src/slurmd/slurmstepd/slurmstepd_job.h"
55 #include "src/common/xmalloc.h"
56 #include "src/common/xstring.h"
57
58
59 typedef struct slurm_ext_sensors_ops {
60 int (*update_component_data) (void);
61 int (*get_stepstartdata) (step_record_t *step_rec);
62 int (*get_stependdata) (step_record_t *step_rec);
63 List (*get_config) (void);
64 } slurm_ext_sensors_ops_t;
65 /*
66 * These strings must be kept in the same order as the fields
67 * declared for slurm_ext_sensors_ops_t.
68 */
69 static const char *syms[] = {
70 "ext_sensors_p_update_component_data",
71 "ext_sensors_p_get_stepstartdata",
72 "ext_sensors_p_get_stependdata",
73 "ext_sensors_p_get_config",
74 };
75
76 static slurm_ext_sensors_ops_t ops;
77 static plugin_context_t *g_context = NULL;
78 static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER;
79 static bool init_run = false;
80
ext_sensors_init(void)81 extern int ext_sensors_init(void)
82 {
83 int retval = SLURM_SUCCESS;
84 char *plugin_type = "ext_sensors";
85 char *type = NULL;
86
87 if (init_run && g_context)
88 return retval;
89
90 slurm_mutex_lock(&g_context_lock);
91
92 if (g_context)
93 goto done;
94
95 type = slurm_get_ext_sensors_type();
96
97 g_context = plugin_context_create(
98 plugin_type, type, (void **)&ops, syms, sizeof(syms));
99
100 if (!g_context) {
101 error("cannot create %s context for %s", plugin_type, type);
102 retval = SLURM_ERROR;
103 goto done;
104 }
105 init_run = true;
106
107 done:
108 slurm_mutex_unlock(&g_context_lock);
109 xfree(type);
110
111 return retval;
112 }
113
ext_sensors_fini(void)114 extern int ext_sensors_fini(void)
115 {
116 int rc;
117
118 if (!g_context)
119 return SLURM_SUCCESS;
120
121 init_run = false;
122 rc = plugin_context_destroy(g_context);
123 g_context = NULL;
124
125 return rc;
126 }
127
ext_sensors_alloc(void)128 extern ext_sensors_data_t *ext_sensors_alloc(void)
129 {
130 ext_sensors_data_t *ext_sensors =
131 xmalloc(sizeof(struct ext_sensors_data));
132
133 ext_sensors->consumed_energy = NO_VAL64;
134 ext_sensors->temperature = NO_VAL;
135
136 return ext_sensors;
137 }
138
ext_sensors_destroy(ext_sensors_data_t * ext_sensors)139 extern void ext_sensors_destroy(ext_sensors_data_t *ext_sensors)
140 {
141 xfree(ext_sensors);
142 }
143
ext_sensors_data_pack(ext_sensors_data_t * ext_sensors,Buf buffer,uint16_t protocol_version)144 extern void ext_sensors_data_pack(ext_sensors_data_t *ext_sensors, Buf buffer,
145 uint16_t protocol_version)
146 {
147 if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
148 if (!ext_sensors) {
149 pack64(0, buffer);
150 pack32(0, buffer);
151 pack_time((time_t)0, buffer);
152 pack32(0, buffer);
153 return;
154 }
155
156 pack64(ext_sensors->consumed_energy, buffer);
157 pack32(ext_sensors->temperature, buffer);
158 pack_time(ext_sensors->energy_update_time, buffer);
159 pack32(ext_sensors->current_watts, buffer);
160 }
161 }
162
ext_sensors_data_unpack(ext_sensors_data_t ** ext_sensors,Buf buffer,uint16_t protocol_version)163 extern int ext_sensors_data_unpack(ext_sensors_data_t **ext_sensors, Buf buffer,
164 uint16_t protocol_version)
165 {
166 ext_sensors_data_t *ext_sensors_ptr = ext_sensors_alloc();
167 *ext_sensors = ext_sensors_ptr;
168 if (ext_sensors_ptr == NULL)
169 return SLURM_ERROR;
170
171 if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
172 safe_unpack64(&ext_sensors_ptr->consumed_energy, buffer);
173 safe_unpack32(&ext_sensors_ptr->temperature, buffer);
174 safe_unpack_time(&ext_sensors_ptr->energy_update_time, buffer);
175 safe_unpack32(&ext_sensors_ptr->current_watts, buffer);
176 }
177
178 return SLURM_SUCCESS;
179
180 unpack_error:
181 ext_sensors_destroy(ext_sensors_ptr);
182 *ext_sensors = NULL;
183 return SLURM_ERROR;
184 }
185
ext_sensors_g_update_component_data(void)186 extern int ext_sensors_g_update_component_data(void)
187 {
188 int retval = SLURM_ERROR;
189
190 if (ext_sensors_init() < 0)
191 return retval;
192
193 retval = (*(ops.update_component_data))();
194
195 return retval;
196 }
197
ext_sensors_g_get_stepstartdata(step_record_t * step_rec)198 extern int ext_sensors_g_get_stepstartdata(step_record_t *step_rec)
199 {
200 int retval = SLURM_ERROR;
201
202 if (ext_sensors_init() < 0)
203 return retval;
204
205 retval = (*(ops.get_stepstartdata))(step_rec);
206
207 return retval;
208 }
209
ext_sensors_g_get_stependdata(step_record_t * step_rec)210 extern int ext_sensors_g_get_stependdata(step_record_t *step_rec)
211 {
212 int retval = SLURM_ERROR;
213
214 if (ext_sensors_init() < 0)
215 return retval;
216
217 retval = (*(ops.get_stependdata))(step_rec);
218
219 return retval;
220 }
221
ext_sensors_g_get_config(void * data)222 extern int ext_sensors_g_get_config(void *data)
223 {
224
225 List *tmp_list = (List *) data;
226
227 if (ext_sensors_init() < 0)
228 return SLURM_ERROR;
229
230 *tmp_list = (*(ops.get_config))();
231
232 return SLURM_SUCCESS;
233 }
234