1 /*****************************************************************************\
2  *  slurm_ext_sensors.c - implementation-independent external sensors plugin
3  *  definitions
4  *****************************************************************************
5  *  Copyright (C) 2013 Bull-HN-PHX.
6  *  Written by Bull-HN-PHX/Martin Perry,
7  *
8  *  This file is part of Slurm, a resource management program.
9  *  For details, see <https://slurm.schedmd.com/>.
10  *  Please also read the included file: DISCLAIMER.
11  *
12  *  Slurm is free software; you can redistribute it and/or modify it under
13  *  the terms of the GNU General Public License as published by the Free
14  *  Software Foundation; either version 2 of the License, or (at your option)
15  *  any later version.
16  *
17  *  In addition, as a special exception, the copyright holders give permission
18  *  to link the code of portions of this program with the OpenSSL library under
19  *  certain conditions as described in each individual source file, and
20  *  distribute linked combinations including the two. You must obey the GNU
21  *  General Public License in all respects for all of the code used other than
22  *  OpenSSL. If you modify file(s) with this exception, you may extend this
23  *  exception to your version of the file(s), but you are not obligated to do
24  *  so. If you do not wish to do so, delete this exception statement from your
25  *  version.  If you delete this exception statement from all source files in
26  *  the program, then also delete it here.
27  *
28  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
29  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
31  *  details.
32  *
33  *  You should have received a copy of the GNU General Public License along
34  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
35  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
36 \*****************************************************************************/
37 
38 #include <pwd.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <time.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44 #include <unistd.h>
45 
46 #include "src/common/macros.h"
47 #include "src/common/parse_config.h"
48 #include "src/common/plugin.h"
49 #include "src/common/plugrack.h"
50 #include "src/common/read_config.h"
51 #include "src/slurmctld/slurmctld.h"
52 #include "src/common/slurm_ext_sensors.h"
53 #include "src/common/slurm_protocol_api.h"
54 #include "src/slurmd/slurmstepd/slurmstepd_job.h"
55 #include "src/common/xmalloc.h"
56 #include "src/common/xstring.h"
57 
58 
59 typedef struct slurm_ext_sensors_ops {
60 	int (*update_component_data) (void);
61 	int (*get_stepstartdata)     (step_record_t *step_rec);
62 	int (*get_stependdata)       (step_record_t *step_rec);
63 	List (*get_config)           (void);
64 } slurm_ext_sensors_ops_t;
65 /*
66  * These strings must be kept in the same order as the fields
67  * declared for slurm_ext_sensors_ops_t.
68  */
69 static const char *syms[] = {
70 	"ext_sensors_p_update_component_data",
71 	"ext_sensors_p_get_stepstartdata",
72 	"ext_sensors_p_get_stependdata",
73 	"ext_sensors_p_get_config",
74 };
75 
76 static slurm_ext_sensors_ops_t ops;
77 static plugin_context_t *g_context = NULL;
78 static pthread_mutex_t g_context_lock =	PTHREAD_MUTEX_INITIALIZER;
79 static bool init_run = false;
80 
ext_sensors_init(void)81 extern int ext_sensors_init(void)
82 {
83 	int retval = SLURM_SUCCESS;
84 	char *plugin_type = "ext_sensors";
85 	char *type = NULL;
86 
87 	if (init_run && g_context)
88 		return retval;
89 
90 	slurm_mutex_lock(&g_context_lock);
91 
92 	if (g_context)
93 		goto done;
94 
95 	type = slurm_get_ext_sensors_type();
96 
97 	g_context = plugin_context_create(
98 		plugin_type, type, (void **)&ops, syms, sizeof(syms));
99 
100 	if (!g_context) {
101 		error("cannot create %s context for %s", plugin_type, type);
102 		retval = SLURM_ERROR;
103 		goto done;
104 	}
105 	init_run = true;
106 
107 done:
108 	slurm_mutex_unlock(&g_context_lock);
109 	xfree(type);
110 
111 	return retval;
112 }
113 
ext_sensors_fini(void)114 extern int ext_sensors_fini(void)
115 {
116 	int rc;
117 
118 	if (!g_context)
119 		return SLURM_SUCCESS;
120 
121 	init_run = false;
122 	rc = plugin_context_destroy(g_context);
123 	g_context = NULL;
124 
125 	return rc;
126 }
127 
ext_sensors_alloc(void)128 extern ext_sensors_data_t *ext_sensors_alloc(void)
129 {
130 	ext_sensors_data_t *ext_sensors =
131 		xmalloc(sizeof(struct ext_sensors_data));
132 
133 	ext_sensors->consumed_energy = NO_VAL64;
134 	ext_sensors->temperature = NO_VAL;
135 
136 	return ext_sensors;
137 }
138 
ext_sensors_destroy(ext_sensors_data_t * ext_sensors)139 extern void ext_sensors_destroy(ext_sensors_data_t *ext_sensors)
140 {
141 	xfree(ext_sensors);
142 }
143 
ext_sensors_data_pack(ext_sensors_data_t * ext_sensors,Buf buffer,uint16_t protocol_version)144 extern void ext_sensors_data_pack(ext_sensors_data_t *ext_sensors, Buf buffer,
145 				    uint16_t protocol_version)
146 {
147 	if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
148 		if (!ext_sensors) {
149 			pack64(0, buffer);
150 			pack32(0, buffer);
151 			pack_time((time_t)0, buffer);
152 			pack32(0, buffer);
153 			return;
154 		}
155 
156 		pack64(ext_sensors->consumed_energy, buffer);
157 		pack32(ext_sensors->temperature, buffer);
158 		pack_time(ext_sensors->energy_update_time, buffer);
159 		pack32(ext_sensors->current_watts, buffer);
160 	}
161 }
162 
ext_sensors_data_unpack(ext_sensors_data_t ** ext_sensors,Buf buffer,uint16_t protocol_version)163 extern int ext_sensors_data_unpack(ext_sensors_data_t **ext_sensors, Buf buffer,
164 				     uint16_t protocol_version)
165 {
166 	ext_sensors_data_t *ext_sensors_ptr = ext_sensors_alloc();
167 	*ext_sensors = ext_sensors_ptr;
168 	if (ext_sensors_ptr == NULL)
169 		return SLURM_ERROR;
170 
171 	if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
172 		safe_unpack64(&ext_sensors_ptr->consumed_energy, buffer);
173 		safe_unpack32(&ext_sensors_ptr->temperature, buffer);
174 		safe_unpack_time(&ext_sensors_ptr->energy_update_time, buffer);
175 		safe_unpack32(&ext_sensors_ptr->current_watts, buffer);
176 	}
177 
178 	return SLURM_SUCCESS;
179 
180 unpack_error:
181 	ext_sensors_destroy(ext_sensors_ptr);
182 	*ext_sensors = NULL;
183 	return SLURM_ERROR;
184 }
185 
ext_sensors_g_update_component_data(void)186 extern int ext_sensors_g_update_component_data(void)
187 {
188 	int retval = SLURM_ERROR;
189 
190 	if (ext_sensors_init() < 0)
191 		return retval;
192 
193 	retval = (*(ops.update_component_data))();
194 
195 	return retval;
196 }
197 
ext_sensors_g_get_stepstartdata(step_record_t * step_rec)198 extern int ext_sensors_g_get_stepstartdata(step_record_t *step_rec)
199 {
200 	int retval = SLURM_ERROR;
201 
202 	if (ext_sensors_init() < 0)
203 		return retval;
204 
205 	retval = (*(ops.get_stepstartdata))(step_rec);
206 
207 	return retval;
208 }
209 
ext_sensors_g_get_stependdata(step_record_t * step_rec)210 extern int ext_sensors_g_get_stependdata(step_record_t *step_rec)
211 {
212 	int retval = SLURM_ERROR;
213 
214 	if (ext_sensors_init() < 0)
215 		return retval;
216 
217 	retval = (*(ops.get_stependdata))(step_rec);
218 
219 	return retval;
220 }
221 
ext_sensors_g_get_config(void * data)222 extern int ext_sensors_g_get_config(void *data)
223 {
224 
225 	List *tmp_list = (List *) data;
226 
227 	if (ext_sensors_init() < 0)
228 		return SLURM_ERROR;
229 
230 	*tmp_list = (*(ops.get_config))();
231 
232 	return SLURM_SUCCESS;
233 }
234