1 /*****************************************************************************\
2 * fetch_config.c - functions for "configless" slurm operation
3 *****************************************************************************
4 * Copyright (C) 2020 SchedMD LLC.
5 * Written by Tim Wickberg <tim@schedmd.com>
6 *
7 * This file is part of Slurm, a resource management program.
8 * For details, see <https://slurm.schedmd.com/>.
9 * Please also read the included file: DISCLAIMER.
10 *
11 * Slurm is free software; you can redistribute it and/or modify it under
12 * the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 *
16 * In addition, as a special exception, the copyright holders give permission
17 * to link the code of portions of this program with the OpenSSL library under
18 * certain conditions as described in each individual source file, and
19 * distribute linked combinations including the two. You must obey the GNU
20 * General Public License in all respects for all of the code used other than
21 * OpenSSL. If you modify file(s) with this exception, you may extend this
22 * exception to your version of the file(s), but you are not obligated to do
23 * so. If you do not wish to do so, delete this exception statement from your
24 * version. If you delete this exception statement from all source files in
25 * the program, then also delete it here.
26 *
27 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
28 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
29 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
30 * details.
31 *
32 * You should have received a copy of the GNU General Public License along
33 * with Slurm; if not, write to the Free Software Foundation, Inc.,
34 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 \*****************************************************************************/
36
37 #define _GNU_SOURCE
38
39 #include <inttypes.h>
40 #include <sys/mman.h> /* memfd_create */
41 #include <sys/types.h>
42
43 #include "src/common/fetch_config.h"
44 #include "src/common/read_config.h"
45 #include "src/common/slurm_protocol_api.h"
46 #include "src/common/slurm_protocol_defs.h"
47 #include "src/common/slurm_resolv.h"
48 #include "src/common/strlcpy.h"
49 #include "src/common/xstring.h"
50 #include "src/common/xmalloc.h"
51
52 static void _init_minimal_conf_server_config(List controllers);
53
54 static int to_parent[2] = {-1, -1};
55
_fetch_parent(pid_t pid)56 static config_response_msg_t *_fetch_parent(pid_t pid)
57 {
58 int len;
59 buf_t *buffer;
60 config_response_msg_t *config = NULL;
61 int status;
62
63 safe_read(to_parent[0], &len, sizeof(int));
64 buffer = init_buf(len);
65 safe_read(to_parent[0], buffer->head, len);
66
67 if (unpack_config_response_msg(&config, buffer,
68 SLURM_PROTOCOL_VERSION)) {
69 error("%s: unpack failed", __func__);
70 return NULL;
71 }
72
73 waitpid(pid, &status, 0);
74 debug2("%s: status from child %d", __func__, status);
75 return config;
76
77 rwfail:
78 error("%s: failed to read from child: %m", __func__);
79 waitpid(pid, &status, 0);
80 debug2("%s: status from child %d", __func__, status);
81
82 return NULL;
83 }
84
_fetch_child(List controllers,uint32_t flags)85 static void _fetch_child(List controllers, uint32_t flags)
86 {
87 config_response_msg_t *config;
88 buf_t *buffer = init_buf(1024 * 1024);
89 int len;
90
91 /*
92 * Parent process was holding this, but we need to drop it before
93 * issuing any RPC calls as the RPC stack will call into
94 * several slurm_conf_get_() functions.
95 *
96 * This is safe as we're single-threaded due to the fork().
97 */
98 slurm_conf_unlock();
99
100 _init_minimal_conf_server_config(controllers);
101 config = fetch_config_from_controller(flags);
102
103 if (!config) {
104 error("%s: failed to fetch remote configs", __func__);
105 _exit(1);
106 }
107
108 pack_config_response_msg(config, buffer, SLURM_PROTOCOL_VERSION);
109
110 len = buffer->processed;
111 safe_write(to_parent[1], &len, sizeof(int));
112 safe_write(to_parent[1], buffer->head, len);
113
114 _exit(0);
115
116 rwfail:
117 error("%s: failed to write to parent: %m", __func__);
118 _exit(1);
119 }
120
fetch_config(char * conf_server,uint32_t flags)121 extern config_response_msg_t *fetch_config(char *conf_server, uint32_t flags)
122 {
123 char *env_conf_server = getenv("SLURM_CONF_SERVER");
124 List controllers = NULL;
125 pid_t pid;
126
127 /*
128 * Two main processing options here: we are either given an explicit
129 * server (with optional port number) via SLURM_CONF_SERVER or the
130 * conf_server argument, or we will need to make a blind DNS lookup.
131 *
132 * In either case, phase one here is to make a List with at least one
133 * slurmctld entry.
134 */
135 if (env_conf_server || conf_server) {
136 char *server, *port;
137 ctl_entry_t *ctl = xmalloc(sizeof(*ctl));
138 controllers = list_create(xfree_ptr);
139
140 if (!(server = env_conf_server))
141 server = conf_server;
142 strlcpy(ctl->hostname, server, sizeof(ctl->hostname));
143
144 if ((port = xstrchr(ctl->hostname, ':'))) {
145 *port = '\0';
146 port++;
147 ctl->port = atoi(port);
148 } else
149 ctl->port = SLURMCTLD_PORT;
150
151 list_push(controllers, ctl);
152 } else {
153 if (!(controllers = resolve_ctls_from_dns_srv())) {
154 error("%s: DNS SRV lookup failed", __func__);
155 return NULL;
156 }
157 }
158
159 /*
160 * At this point we have a List of controllers.
161 * Use that to build a memfd-backed minimal config file so we can
162 * communicate with slurmctld and get the real configs.
163 */
164 if (pipe(to_parent) < 0) {
165 error("%s: pipe failed: %m", __func__);
166 return NULL;
167 }
168
169 if ((pid = fork()) < 0) {
170 error("%s: fork: %m", __func__);
171 close(to_parent[0]);
172 close(to_parent[1]);
173 return NULL;
174 } else if (pid > 0) {
175 list_destroy(controllers);
176 return _fetch_parent(pid);
177 }
178
179 _fetch_child(controllers, flags);
180 return NULL;
181 }
182
fetch_config_from_controller(uint32_t flags)183 extern config_response_msg_t *fetch_config_from_controller(uint32_t flags)
184 {
185 int rc;
186 slurm_msg_t req_msg;
187 slurm_msg_t resp_msg;
188 config_request_msg_t req;
189 config_response_msg_t *resp;
190
191 slurm_msg_t_init(&req_msg);
192 slurm_msg_t_init(&resp_msg);
193
194 memset(&req, 0, sizeof(req));
195 req.flags = flags;
196 req_msg.msg_type = REQUEST_CONFIG;
197 req_msg.data = &req;
198
199 if (slurm_send_recv_controller_msg(&req_msg, &resp_msg,
200 working_cluster_rec) < 0)
201 return NULL;
202
203 switch (resp_msg.msg_type) {
204 case RESPONSE_CONFIG:
205 resp = (config_response_msg_t *) resp_msg.data;
206 break;
207 case RESPONSE_SLURM_RC:
208 rc = ((return_code_msg_t *) resp_msg.data)->return_code;
209 slurm_free_return_code_msg(resp_msg.data);
210 slurm_seterrno(rc);
211 return NULL;
212 break;
213 default:
214 slurm_seterrno(SLURM_UNEXPECTED_MSG_ERROR);
215 return NULL;
216 break;
217 }
218
219 return resp;
220 }
221
dump_to_memfd(char * type,char * config,char ** filename)222 int dump_to_memfd(char *type, char *config, char **filename)
223 {
224 #ifdef HAVE_MEMFD_CREATE
225 pid_t pid = getpid();
226
227 int fd = memfd_create(type, MFD_CLOEXEC);
228 if (fd < 0)
229 fatal("%s: failed memfd_create: %m", __func__);
230
231 xfree(*filename);
232 xstrfmtcat(*filename, "/proc/%lu/fd/%d", (unsigned long) pid, fd);
233
234 safe_write(fd, config, strlen(config));
235
236 return fd;
237
238 rwfail:
239 fatal("%s: could not write conf file, likely out of memory", __func__);
240 return SLURM_ERROR;
241 #else
242 pid_t pid = getpid();
243 char template[] = "/tmp/fake-memfd-XXXXXX";
244 int fd = mkstemp(template);
245
246 if (fd < 0)
247 fatal("%s: could not create temp file", __func__);
248 /* immediately unlink the file so it doesn't get left around */
249 (void) unlink(template);
250
251 xfree(*filename);
252 xstrfmtcat(*filename, "/proc/%lu/fd/%d", (unsigned long) pid, fd);
253
254 safe_write(fd, config, strlen(config));
255
256 return fd;
257
258 rwfail:
259 fatal("%s: could not write conf file", __func__);
260 return SLURM_ERROR;
261 #endif
262 }
263
_print_controllers(void * x,void * arg)264 static int _print_controllers(void *x, void *arg)
265 {
266 ctl_entry_t *ctl = (ctl_entry_t *) x;
267 char **conf = (char **) arg;
268
269 /*
270 * First ctl entry's port number will be used. Slurm does not support
271 * the TCP port varying between slurmctlds.
272 */
273 if (!*conf)
274 xstrfmtcat(*conf, "SlurmctldPort=%u\n", ctl->port);
275 xstrfmtcat(*conf, "SlurmctldHost=%s\n", ctl->hostname);
276
277 return SLURM_SUCCESS;
278 }
279
_init_minimal_conf_server_config(List controllers)280 static void _init_minimal_conf_server_config(List controllers)
281 {
282 char *conf = NULL, *filename = NULL;
283 int fd;
284
285 list_for_each(controllers, _print_controllers, &conf);
286 xstrfmtcat(conf, "ClusterName=CONFIGLESS\n");
287
288 if ((fd = dump_to_memfd("slurm.conf", conf, &filename)) < 0)
289 fatal("%s: could not write temporary config", __func__);
290 xfree(conf);
291
292 slurm_conf_init(filename);
293
294 close(fd);
295 xfree(filename);
296 }
297
_write_conf(const char * dir,const char * name,const char * content)298 static int _write_conf(const char *dir, const char *name, const char *content)
299 {
300 char *file = NULL, *file_final = NULL;
301 int fd = -1;
302
303 xstrfmtcat(file, "%s/%s.new", dir, name);
304 xstrfmtcat(file_final, "%s/%s", dir, name);
305
306 if (!content) {
307 (void) unlink(file_final);
308 goto cleanup;
309 }
310
311 if ((fd = open(file, O_CREAT|O_WRONLY|O_TRUNC|O_CLOEXEC, 0644)) < 0) {
312 error("%s: could not open config file `%s`", __func__, file);
313 goto rwfail;
314 }
315
316 safe_write(fd, content, strlen(content));
317 close(fd);
318 fd = -1;
319
320 if (rename(file, file_final))
321 goto rwfail;
322
323 cleanup:
324 xfree(file);
325 xfree(file_final);
326 return SLURM_SUCCESS;
327
328 rwfail:
329 error("%s: error writing config to %s: %m", __func__, file);
330 xfree(file);
331 xfree(file_final);
332 if (fd >= 0)
333 close(fd);
334 return SLURM_ERROR;
335 }
336
write_configs_to_conf_cache(config_response_msg_t * msg,const char * dir)337 extern int write_configs_to_conf_cache(config_response_msg_t *msg,
338 const char *dir)
339 {
340 if (_write_conf(dir, "slurm.conf", msg->config))
341 return SLURM_ERROR;
342 if (_write_conf(dir, "acct_gather.conf", msg->acct_gather_config))
343 return SLURM_ERROR;
344 if (_write_conf(dir, "cgroup.conf", msg->cgroup_config))
345 return SLURM_ERROR;
346 if (_write_conf(dir, "cgroup_allowed_devices_file.conf",
347 msg->cgroup_allowed_devices_file_config))
348 return SLURM_ERROR;
349 if (_write_conf(dir, "ext_sensors.conf", msg->ext_sensors_config))
350 return SLURM_ERROR;
351 if (_write_conf(dir, "gres.conf", msg->gres_config))
352 return SLURM_ERROR;
353 if (_write_conf(dir, "knl_cray.conf", msg->knl_cray_config))
354 return SLURM_ERROR;
355 if (_write_conf(dir, "knl_generic.conf", msg->knl_generic_config))
356 return SLURM_ERROR;
357 if (_write_conf(dir, "plugstack.conf", msg->plugstack_config))
358 return SLURM_ERROR;
359 if (_write_conf(dir, "topology.conf", msg->topology_config))
360 return SLURM_ERROR;
361
362 return SLURM_SUCCESS;
363 }
364
_load_conf(const char * dir,const char * name,char ** target)365 static void _load_conf(const char *dir, const char *name, char **target)
366 {
367 char *file = NULL;
368 buf_t *config;
369
370 xstrfmtcat(file, "%s/%s", dir, name);
371 config = create_mmap_buf(file);
372 xfree(file);
373
374 /*
375 * If we can't load a given config, then assume that one isn't required
376 * on this system.
377 */
378 if (config)
379 *target = xstrndup(config->head, config->size);
380
381 free_buf(config);
382 }
383
load_config_response_msg(config_response_msg_t * msg,int flags)384 extern void load_config_response_msg(config_response_msg_t *msg, int flags)
385 {
386 xassert(msg);
387 char *dir = get_extra_conf_path("");
388
389 _load_conf(dir, "slurm.conf", &msg->config);
390
391 if (!(flags & CONFIG_REQUEST_SLURMD)) {
392 xfree(dir);
393 return;
394 }
395
396 _load_conf(dir, "acct_gather.conf", &msg->acct_gather_config);
397 _load_conf(dir, "cgroup.conf", &msg->cgroup_config);
398 _load_conf(dir, "cgroup_allowed_devices_file.conf",
399 &msg->cgroup_allowed_devices_file_config);
400 _load_conf(dir, "ext_sensors.conf", &msg->ext_sensors_config);
401 _load_conf(dir, "gres.conf", &msg->gres_config);
402 _load_conf(dir, "knl_cray.conf", &msg->knl_cray_config);
403 _load_conf(dir, "knl_generic.conf", &msg->knl_generic_config);
404 _load_conf(dir, "plugstack.conf", &msg->plugstack_config);
405 _load_conf(dir, "topology.conf", &msg->topology_config);
406
407 msg->slurmd_spooldir = xstrdup(slurmctld_conf.slurmd_spooldir);
408
409 xfree(dir);
410 }
411