1 /*****************************************************************************\
2  *  fetch_config.c - functions for "configless" slurm operation
3  *****************************************************************************
4  *  Copyright (C) 2020 SchedMD LLC.
5  *  Written by Tim Wickberg <tim@schedmd.com>
6  *
7  *  This file is part of Slurm, a resource management program.
8  *  For details, see <https://slurm.schedmd.com/>.
9  *  Please also read the included file: DISCLAIMER.
10  *
11  *  Slurm is free software; you can redistribute it and/or modify it under
12  *  the terms of the GNU General Public License as published by the Free
13  *  Software Foundation; either version 2 of the License, or (at your option)
14  *  any later version.
15  *
16  *  In addition, as a special exception, the copyright holders give permission
17  *  to link the code of portions of this program with the OpenSSL library under
18  *  certain conditions as described in each individual source file, and
19  *  distribute linked combinations including the two. You must obey the GNU
20  *  General Public License in all respects for all of the code used other than
21  *  OpenSSL. If you modify file(s) with this exception, you may extend this
22  *  exception to your version of the file(s), but you are not obligated to do
23  *  so. If you do not wish to do so, delete this exception statement from your
24  *  version.  If you delete this exception statement from all source files in
25  *  the program, then also delete it here.
26  *
27  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
28  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
29  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
30  *  details.
31  *
32  *  You should have received a copy of the GNU General Public License along
33  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
34  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
35 \*****************************************************************************/
36 
37 #define _GNU_SOURCE
38 
39 #include <inttypes.h>
40 #include <sys/mman.h>	/* memfd_create */
41 #include <sys/types.h>
42 
43 #include "src/common/fetch_config.h"
44 #include "src/common/read_config.h"
45 #include "src/common/slurm_protocol_api.h"
46 #include "src/common/slurm_protocol_defs.h"
47 #include "src/common/slurm_resolv.h"
48 #include "src/common/strlcpy.h"
49 #include "src/common/xstring.h"
50 #include "src/common/xmalloc.h"
51 
52 static void _init_minimal_conf_server_config(List controllers);
53 
54 static int to_parent[2] = {-1, -1};
55 
_fetch_parent(pid_t pid)56 static config_response_msg_t *_fetch_parent(pid_t pid)
57 {
58 	int len;
59 	buf_t *buffer;
60 	config_response_msg_t *config = NULL;
61 	int status;
62 
63 	safe_read(to_parent[0], &len, sizeof(int));
64 	buffer = init_buf(len);
65 	safe_read(to_parent[0], buffer->head, len);
66 
67 	if (unpack_config_response_msg(&config, buffer,
68 				       SLURM_PROTOCOL_VERSION)) {
69 		error("%s: unpack failed", __func__);
70 		return NULL;
71 	}
72 
73 	waitpid(pid, &status, 0);
74 	debug2("%s: status from child %d", __func__, status);
75 	return config;
76 
77 rwfail:
78 	error("%s: failed to read from child: %m", __func__);
79 	waitpid(pid, &status, 0);
80 	debug2("%s: status from child %d", __func__, status);
81 
82 	return NULL;
83 }
84 
_fetch_child(List controllers,uint32_t flags)85 static void _fetch_child(List controllers, uint32_t flags)
86 {
87 	config_response_msg_t *config;
88 	buf_t *buffer = init_buf(1024 * 1024);
89 	int len;
90 
91 	/*
92 	 * Parent process was holding this, but we need to drop it before
93 	 * issuing any RPC calls as the RPC stack will call into
94 	 * several slurm_conf_get_() functions.
95 	 *
96 	 * This is safe as we're single-threaded due to the fork().
97 	 */
98 	slurm_conf_unlock();
99 
100 	_init_minimal_conf_server_config(controllers);
101 	config = fetch_config_from_controller(flags);
102 
103 	if (!config) {
104 		error("%s: failed to fetch remote configs", __func__);
105 		_exit(1);
106 	}
107 
108 	pack_config_response_msg(config, buffer, SLURM_PROTOCOL_VERSION);
109 
110 	len = buffer->processed;
111 	safe_write(to_parent[1], &len, sizeof(int));
112 	safe_write(to_parent[1], buffer->head, len);
113 
114 	_exit(0);
115 
116 rwfail:
117 	error("%s: failed to write to parent: %m", __func__);
118 	_exit(1);
119 }
120 
fetch_config(char * conf_server,uint32_t flags)121 extern config_response_msg_t *fetch_config(char *conf_server, uint32_t flags)
122 {
123 	char *env_conf_server = getenv("SLURM_CONF_SERVER");
124 	List controllers = NULL;
125 	pid_t pid;
126 
127 	/*
128 	 * Two main processing options here: we are either given an explicit
129 	 * server (with optional port number) via SLURM_CONF_SERVER or the
130 	 * conf_server argument, or we will need to make a blind DNS lookup.
131 	 *
132 	 * In either case, phase one here is to make a List with at least one
133 	 * slurmctld entry.
134 	 */
135 	if (env_conf_server || conf_server) {
136 		char *server, *port;
137 		ctl_entry_t *ctl = xmalloc(sizeof(*ctl));
138 		controllers = list_create(xfree_ptr);
139 
140 		if (!(server = env_conf_server))
141 			server = conf_server;
142 		strlcpy(ctl->hostname, server, sizeof(ctl->hostname));
143 
144 		if ((port = xstrchr(ctl->hostname, ':'))) {
145 			*port = '\0';
146 			port++;
147 			ctl->port = atoi(port);
148 		} else
149 			ctl->port = SLURMCTLD_PORT;
150 
151 		list_push(controllers, ctl);
152 	} else {
153                 if (!(controllers = resolve_ctls_from_dns_srv())) {
154                         error("%s: DNS SRV lookup failed", __func__);
155 			return NULL;
156                 }
157 	}
158 
159 	/*
160 	 * At this point we have a List of controllers.
161 	 * Use that to build a memfd-backed minimal config file so we can
162 	 * communicate with slurmctld and get the real configs.
163 	 */
164 	if (pipe(to_parent) < 0) {
165 		error("%s: pipe failed: %m", __func__);
166 		return NULL;
167 	}
168 
169 	if ((pid = fork()) < 0) {
170 		error("%s: fork: %m", __func__);
171 		close(to_parent[0]);
172 		close(to_parent[1]);
173 		return NULL;
174 	} else if (pid > 0) {
175 		list_destroy(controllers);
176 		return _fetch_parent(pid);
177 	}
178 
179 	_fetch_child(controllers, flags);
180 	return NULL;
181 }
182 
fetch_config_from_controller(uint32_t flags)183 extern config_response_msg_t *fetch_config_from_controller(uint32_t flags)
184 {
185 	int rc;
186 	slurm_msg_t req_msg;
187 	slurm_msg_t resp_msg;
188 	config_request_msg_t req;
189 	config_response_msg_t *resp;
190 
191 	slurm_msg_t_init(&req_msg);
192 	slurm_msg_t_init(&resp_msg);
193 
194 	memset(&req, 0, sizeof(req));
195 	req.flags = flags;
196 	req_msg.msg_type = REQUEST_CONFIG;
197 	req_msg.data = &req;
198 
199 	if (slurm_send_recv_controller_msg(&req_msg, &resp_msg,
200 					   working_cluster_rec) < 0)
201 		return NULL;
202 
203 	switch (resp_msg.msg_type) {
204 	case RESPONSE_CONFIG:
205 		resp = (config_response_msg_t *) resp_msg.data;
206 		break;
207 	case RESPONSE_SLURM_RC:
208 		rc = ((return_code_msg_t *) resp_msg.data)->return_code;
209 		slurm_free_return_code_msg(resp_msg.data);
210 		slurm_seterrno(rc);
211 		return NULL;
212 		break;
213 	default:
214 		slurm_seterrno(SLURM_UNEXPECTED_MSG_ERROR);
215 		return NULL;
216 		break;
217 	}
218 
219 	return resp;
220 }
221 
dump_to_memfd(char * type,char * config,char ** filename)222 int dump_to_memfd(char *type, char *config, char **filename)
223 {
224 #ifdef HAVE_MEMFD_CREATE
225 	pid_t pid = getpid();
226 
227 	int fd = memfd_create(type, MFD_CLOEXEC);
228 	if (fd < 0)
229 		fatal("%s: failed memfd_create: %m", __func__);
230 
231 	xfree(*filename);
232 	xstrfmtcat(*filename, "/proc/%lu/fd/%d", (unsigned long) pid, fd);
233 
234 	safe_write(fd, config, strlen(config));
235 
236 	return fd;
237 
238 rwfail:
239 	fatal("%s: could not write conf file, likely out of memory", __func__);
240 	return SLURM_ERROR;
241 #else
242 	pid_t pid = getpid();
243 	char template[] = "/tmp/fake-memfd-XXXXXX";
244 	int fd = mkstemp(template);
245 
246 	if (fd < 0)
247 		fatal("%s: could not create temp file", __func__);
248 	/* immediately unlink the file so it doesn't get left around */
249 	(void) unlink(template);
250 
251 	xfree(*filename);
252 	xstrfmtcat(*filename, "/proc/%lu/fd/%d", (unsigned long) pid, fd);
253 
254 	safe_write(fd, config, strlen(config));
255 
256 	return fd;
257 
258 rwfail:
259 	fatal("%s: could not write conf file", __func__);
260 	return SLURM_ERROR;
261 #endif
262 }
263 
_print_controllers(void * x,void * arg)264 static int _print_controllers(void *x, void *arg)
265 {
266 	ctl_entry_t *ctl = (ctl_entry_t *) x;
267 	char **conf = (char **) arg;
268 
269 	/*
270 	 * First ctl entry's port number will be used. Slurm does not support
271 	 * the TCP port varying between slurmctlds.
272 	 */
273 	if (!*conf)
274 		xstrfmtcat(*conf, "SlurmctldPort=%u\n", ctl->port);
275 	xstrfmtcat(*conf, "SlurmctldHost=%s\n", ctl->hostname);
276 
277 	return SLURM_SUCCESS;
278 }
279 
_init_minimal_conf_server_config(List controllers)280 static void _init_minimal_conf_server_config(List controllers)
281 {
282 	char *conf = NULL, *filename = NULL;
283 	int fd;
284 
285 	list_for_each(controllers, _print_controllers, &conf);
286 	xstrfmtcat(conf, "ClusterName=CONFIGLESS\n");
287 
288 	if ((fd = dump_to_memfd("slurm.conf", conf, &filename)) < 0)
289 		fatal("%s: could not write temporary config", __func__);
290 	xfree(conf);
291 
292 	slurm_conf_init(filename);
293 
294 	close(fd);
295 	xfree(filename);
296 }
297 
_write_conf(const char * dir,const char * name,const char * content)298 static int _write_conf(const char *dir, const char *name, const char *content)
299 {
300 	char *file = NULL, *file_final = NULL;
301 	int fd = -1;
302 
303 	xstrfmtcat(file, "%s/%s.new", dir, name);
304 	xstrfmtcat(file_final, "%s/%s", dir, name);
305 
306 	if (!content) {
307 		(void) unlink(file_final);
308 		goto cleanup;
309 	}
310 
311 	if ((fd = open(file, O_CREAT|O_WRONLY|O_TRUNC|O_CLOEXEC, 0644)) < 0) {
312 		error("%s: could not open config file `%s`", __func__, file);
313 		goto rwfail;
314 	}
315 
316 	safe_write(fd, content, strlen(content));
317 	close(fd);
318 	fd = -1;
319 
320 	if (rename(file, file_final))
321 		goto rwfail;
322 
323 cleanup:
324 	xfree(file);
325 	xfree(file_final);
326 	return SLURM_SUCCESS;
327 
328 rwfail:
329 	error("%s: error writing config to %s: %m", __func__, file);
330 	xfree(file);
331 	xfree(file_final);
332 	if (fd >= 0)
333 		close(fd);
334 	return SLURM_ERROR;
335 }
336 
write_configs_to_conf_cache(config_response_msg_t * msg,const char * dir)337 extern int write_configs_to_conf_cache(config_response_msg_t *msg,
338 				       const char *dir)
339 {
340 	if (_write_conf(dir, "slurm.conf", msg->config))
341 		return SLURM_ERROR;
342 	if (_write_conf(dir, "acct_gather.conf", msg->acct_gather_config))
343 		return SLURM_ERROR;
344 	if (_write_conf(dir, "cgroup.conf", msg->cgroup_config))
345 		return SLURM_ERROR;
346 	if (_write_conf(dir, "cgroup_allowed_devices_file.conf",
347 			msg->cgroup_allowed_devices_file_config))
348 		return SLURM_ERROR;
349 	if (_write_conf(dir, "ext_sensors.conf", msg->ext_sensors_config))
350 		return SLURM_ERROR;
351 	if (_write_conf(dir, "gres.conf", msg->gres_config))
352 		return SLURM_ERROR;
353 	if (_write_conf(dir, "knl_cray.conf", msg->knl_cray_config))
354 		return SLURM_ERROR;
355 	if (_write_conf(dir, "knl_generic.conf", msg->knl_generic_config))
356 		return SLURM_ERROR;
357 	if (_write_conf(dir, "plugstack.conf", msg->plugstack_config))
358 		return SLURM_ERROR;
359 	if (_write_conf(dir, "topology.conf", msg->topology_config))
360 		return SLURM_ERROR;
361 
362 	return SLURM_SUCCESS;
363 }
364 
_load_conf(const char * dir,const char * name,char ** target)365 static void _load_conf(const char *dir, const char *name, char **target)
366 {
367 	char *file = NULL;
368 	buf_t *config;
369 
370 	xstrfmtcat(file, "%s/%s", dir, name);
371 	config = create_mmap_buf(file);
372 	xfree(file);
373 
374 	/*
375 	 * If we can't load a given config, then assume that one isn't required
376 	 * on this system.
377 	 */
378 	if (config)
379 		*target = xstrndup(config->head, config->size);
380 
381 	free_buf(config);
382 }
383 
load_config_response_msg(config_response_msg_t * msg,int flags)384 extern void load_config_response_msg(config_response_msg_t *msg, int flags)
385 {
386 	xassert(msg);
387 	char *dir = get_extra_conf_path("");
388 
389 	_load_conf(dir, "slurm.conf", &msg->config);
390 
391 	if (!(flags & CONFIG_REQUEST_SLURMD)) {
392 		xfree(dir);
393 		return;
394 	}
395 
396 	_load_conf(dir, "acct_gather.conf", &msg->acct_gather_config);
397 	_load_conf(dir, "cgroup.conf", &msg->cgroup_config);
398 	_load_conf(dir, "cgroup_allowed_devices_file.conf",
399 		   &msg->cgroup_allowed_devices_file_config);
400 	_load_conf(dir, "ext_sensors.conf", &msg->ext_sensors_config);
401 	_load_conf(dir, "gres.conf", &msg->gres_config);
402 	_load_conf(dir, "knl_cray.conf", &msg->knl_cray_config);
403 	_load_conf(dir, "knl_generic.conf", &msg->knl_generic_config);
404 	_load_conf(dir, "plugstack.conf", &msg->plugstack_config);
405 	_load_conf(dir, "topology.conf", &msg->topology_config);
406 
407 	msg->slurmd_spooldir = xstrdup(slurmctld_conf.slurmd_spooldir);
408 
409 	xfree(dir);
410 }
411