1 /*****************************************************************************\
2 * acct_gather_profile_hdf5.c - slurm energy accounting plugin for
3 * hdf5 profiling.
4 *****************************************************************************
5 * Copyright (C) 2013 Bull S. A. S.
6 * Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois.
7 *
8 * Portions Copyright (C) 2013 SchedMD LLC.
9 *
10 * Initially written by Rod Schultz <rod.schultz@bull.com> @ Bull
11 * and Danny Auble <da@schedmd.com> @ SchedMD.
12 * Adapted by Yoann Blein <yoann.blein@bull.net> @ Bull.
13 *
14 * This file is part of Slurm, a resource management program.
15 * For details, see <https://slurm.schedmd.com>.
16 * Please also read the included file: DISCLAIMER.
17 *
18 * Slurm is free software; you can redistribute it and/or modify it under
19 * the terms of the GNU General Public License as published by the Free
20 * Software Foundation; either version 2 of the License, or (at your option)
21 * any later version.
22 *
23 * In addition, as a special exception, the copyright holders give permission
24 * to link the code of portions of this program with the OpenSSL library under
25 * certain conditions as described in each individual source file, and
26 * distribute linked combinations including the two. You must obey the GNU
27 * General Public License in all respects for all of the code used other than
28 * OpenSSL. If you modify file(s) with this exception, you may extend this
29 * exception to your version of the file(s), but you are not obligated to do
30 * so. If you do not wish to do so, delete this exception statement from your
31 * version. If you delete this exception statement from all source files in
32 * the program, then also delete it here.
33 *
34 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
35 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
36 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
37 * details.
38 *
39 * You should have received a copy of the GNU General Public License along
40 * with Slurm; if not, write to the Free Software Foundation, Inc.,
41 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
42 *
43 * This file is patterned after jobcomp_linux.c, written by Morris Jette and
44 * Copyright (C) 2002 The Regents of the University of California.
45 \*****************************************************************************/
46
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <sys/types.h>
50 #include <sys/un.h>
51 #include <sys/stat.h>
52 #include <fcntl.h>
53 #include <errno.h>
54 #include <inttypes.h>
55 #include <unistd.h>
56 #include <math.h>
57
58 #include "src/common/slurm_xlator.h"
59 #include "src/common/fd.h"
60 #include "src/common/slurm_acct_gather_profile.h"
61 #include "src/common/slurm_protocol_api.h"
62 #include "src/common/slurm_protocol_defs.h"
63 #include "src/common/slurm_time.h"
64 #include "src/slurmd/common/proctrack.h"
65 #include "hdf5_api.h"
66
67 #define HDF5_CHUNK_SIZE 10
68 /* Compression level, a value of 0 through 9. Level 0 is faster but offers the
69 * least compression; level 9 is slower but offers maximum compression.
70 * A setting of -1 indicates that no compression is desired. */
71 /* TODO: Make this configurable with a parameter */
72 #define HDF5_COMPRESS 0
73
74 /*
75 * These variables are required by the generic plugin interface. If they
76 * are not found in the plugin, the plugin loader will ignore it.
77 *
78 * plugin_name - a string giving a human-readable description of the
79 * plugin. There is no maximum length, but the symbol must refer to
80 * a valid string.
81 *
82 * plugin_type - a string suggesting the type of the plugin or its
83 * applicability to a particular form of data or method of data handling.
84 * If the low-level plugin API is used, the contents of this string are
85 * unimportant and may be anything. Slurm uses the higher-level plugin
86 * interface which requires this string to be of the form
87 *
88 * <application>/<method>
89 *
90 * where <application> is a description of the intended application of
91 * the plugin (e.g., "jobacct" for Slurm job completion logging) and <method>
92 * is a description of how this plugin satisfies that application. Slurm will
93 * only load job completion logging plugins if the plugin_type string has a
94 * prefix of "jobacct/".
95 *
96 * plugin_version - an unsigned 32-bit integer containing the Slurm version
97 * (major.minor.micro combined into a single number).
98 */
99 const char plugin_name[] = "AcctGatherProfile hdf5 plugin";
100 const char plugin_type[] = "acct_gather_profile/hdf5";
101 const uint32_t plugin_version = SLURM_VERSION_NUMBER;
102
103 typedef struct {
104 char *dir;
105 uint32_t def;
106 } slurm_hdf5_conf_t;
107
108 typedef struct {
109 hid_t table_id;
110 size_t type_size;
111 } table_t;
112
113 // Global HDF5 Variables
114 // The HDF5 file and base objects will remain open for the duration of the
115 // step. This avoids reconstruction on every acct_gather_sample and
116 // flushing the buffers on every put.
117 // Static variables ok as add function are inside a lock.
118 static hid_t file_id = -1; // File
119 static hid_t gid_node = -1;
120 static hid_t gid_tasks = -1;
121 static hid_t gid_samples = -1;
122 static hid_t gid_totals = -1;
123 static char group_node[MAX_GROUP_NAME+1];
124 static slurm_hdf5_conf_t hdf5_conf;
125 static uint64_t debug_flags = 0;
126 static uint32_t g_profile_running = ACCT_GATHER_PROFILE_NOT_SET;
127 static stepd_step_rec_t *g_job = NULL;
128 static time_t step_start_time;
129
130 static hid_t *groups = NULL;
131 static size_t groups_len = 0;
132 static table_t *tables = NULL;
133 static size_t tables_max_len = 0;
134 static size_t tables_cur_len = 0;
135
_reset_slurm_profile_conf(void)136 static void _reset_slurm_profile_conf(void)
137 {
138 xfree(hdf5_conf.dir);
139 hdf5_conf.def = ACCT_GATHER_PROFILE_NONE;
140 }
141
_determine_profile(void)142 static uint32_t _determine_profile(void)
143 {
144 uint32_t profile;
145 xassert(g_job);
146
147 if (g_profile_running != ACCT_GATHER_PROFILE_NOT_SET)
148 profile = g_profile_running;
149 else if (g_job->profile >= ACCT_GATHER_PROFILE_NONE)
150 profile = g_job->profile;
151 else
152 profile = hdf5_conf.def;
153
154 return profile;
155 }
156
_create_directories(void)157 static int _create_directories(void)
158 {
159 int rc;
160 struct stat st;
161 char *user_dir = NULL;
162
163 xassert(g_job);
164 xassert(hdf5_conf.dir);
165 /*
166 * If profile director does not exist, try to create it.
167 * Otherwise, ensure path is a directory as expected, and that
168 * we have permission to write to it.
169 * also make sure the subdirectory tmp exists.
170 */
171
172 if (((rc = stat(hdf5_conf.dir, &st)) < 0) && (errno == ENOENT)) {
173 if (mkdir(hdf5_conf.dir, 0755) < 0)
174 fatal("mkdir(%s): %m", hdf5_conf.dir);
175 } else if (rc < 0)
176 fatal("Unable to stat acct_gather_profile_dir: %s: %m",
177 hdf5_conf.dir);
178 else if (!S_ISDIR(st.st_mode))
179 fatal("acct_gather_profile_dir: %s: Not a directory!",
180 hdf5_conf.dir);
181 else if (access(hdf5_conf.dir, R_OK|W_OK|X_OK) < 0)
182 fatal("Incorrect permissions on acct_gather_profile_dir: %s",
183 hdf5_conf.dir);
184 if (chmod(hdf5_conf.dir, 0755) == -1)
185 error("%s: chmod(%s): %m", __func__, hdf5_conf.dir);
186
187 user_dir = xstrdup_printf("%s/%s", hdf5_conf.dir, g_job->user_name);
188 if (((rc = stat(user_dir, &st)) < 0) && (errno == ENOENT)) {
189 if (mkdir(user_dir, 0700) < 0)
190 fatal("mkdir(%s): %m", user_dir);
191 }
192 if (chmod(user_dir, 0700) == -1)
193 error("%s: chmod(%s): %m", __func__, user_dir);
194 if (chown(user_dir, (uid_t)g_job->uid,
195 (gid_t)g_job->gid) < 0)
196 error("chown(%s): %m", user_dir);
197
198 xfree(user_dir);
199
200 return SLURM_SUCCESS;
201 }
202
203 /*
204 * init() is called when the plugin is loaded, before any other functions
205 * are called. Put global initialization here.
206 */
init(void)207 extern int init(void)
208 {
209 if (!running_in_slurmstepd())
210 return SLURM_SUCCESS;
211
212 debug_flags = slurm_get_debug_flags();
213
214 /* Move HDF5 trace printing to log file instead of stderr */
215 H5Eset_auto(H5E_DEFAULT, (herr_t (*)(hid_t, void *))H5Eprint,
216 log_fp());
217
218 return SLURM_SUCCESS;
219 }
220
fini(void)221 extern int fini(void)
222 {
223 xfree(tables);
224 xfree(groups);
225 xfree(hdf5_conf.dir);
226 return SLURM_SUCCESS;
227 }
228
acct_gather_profile_p_conf_options(s_p_options_t ** full_options,int * full_options_cnt)229 extern void acct_gather_profile_p_conf_options(s_p_options_t **full_options,
230 int *full_options_cnt)
231 {
232 s_p_options_t options[] = {
233 {"ProfileHDF5Dir", S_P_STRING},
234 {"ProfileHDF5Default", S_P_STRING},
235 {NULL} };
236
237 transfer_s_p_options(full_options, options, full_options_cnt);
238 return;
239 }
240
acct_gather_profile_p_conf_set(s_p_hashtbl_t * tbl)241 extern void acct_gather_profile_p_conf_set(s_p_hashtbl_t *tbl)
242 {
243 char *tmp = NULL;
244 _reset_slurm_profile_conf();
245 if (tbl) {
246 s_p_get_string(&hdf5_conf.dir, "ProfileHDF5Dir", tbl);
247
248 if (s_p_get_string(&tmp, "ProfileHDF5Default", tbl)) {
249 hdf5_conf.def = acct_gather_profile_from_string(tmp);
250 if (hdf5_conf.def == ACCT_GATHER_PROFILE_NOT_SET) {
251 fatal("ProfileHDF5Default can not be "
252 "set to %s, please specify a valid "
253 "option", tmp);
254 }
255 xfree(tmp);
256 }
257 }
258
259 if (!hdf5_conf.dir)
260 fatal("No ProfileHDF5Dir in your acct_gather.conf file. "
261 "This is required to use the %s plugin", plugin_type);
262
263 debug("%s loaded", plugin_name);
264 }
265
acct_gather_profile_p_get(enum acct_gather_profile_info info_type,void * data)266 extern void acct_gather_profile_p_get(enum acct_gather_profile_info info_type,
267 void *data)
268 {
269 uint32_t *uint32 = (uint32_t *) data;
270 char **tmp_char = (char **) data;
271
272 switch (info_type) {
273 case ACCT_GATHER_PROFILE_DIR:
274 *tmp_char = xstrdup(hdf5_conf.dir);
275 break;
276 case ACCT_GATHER_PROFILE_DEFAULT:
277 *uint32 = hdf5_conf.def;
278 break;
279 case ACCT_GATHER_PROFILE_RUNNING:
280 *uint32 = g_profile_running;
281 break;
282 default:
283 debug2("acct_gather_profile_p_get info_type %d invalid",
284 info_type);
285 }
286 }
287
acct_gather_profile_p_node_step_start(stepd_step_rec_t * job)288 extern int acct_gather_profile_p_node_step_start(stepd_step_rec_t* job)
289 {
290 int rc = SLURM_SUCCESS;
291
292 char *profile_file_name;
293 char *profile_str;
294
295 xassert(running_in_slurmstepd());
296
297 g_job = job;
298
299 xassert(hdf5_conf.dir);
300
301 if (debug_flags & DEBUG_FLAG_PROFILE) {
302 profile_str = acct_gather_profile_to_string(g_job->profile);
303 info("PROFILE: option --profile=%s", profile_str);
304 }
305
306 if (g_profile_running == ACCT_GATHER_PROFILE_NOT_SET)
307 g_profile_running = _determine_profile();
308
309 if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
310 return rc;
311
312 _create_directories();
313
314 /*
315 * Use a more user friendly string "batch" rather
316 * then 4294967294.
317 */
318 if (g_job->stepid == NO_VAL) {
319 profile_file_name = xstrdup_printf("%s/%s/%u_%s_%s.h5",
320 hdf5_conf.dir,
321 g_job->user_name,
322 g_job->jobid,
323 "batch",
324 g_job->node_name);
325 } else {
326 profile_file_name = xstrdup_printf(
327 "%s/%s/%u_%u_%s.h5",
328 hdf5_conf.dir, g_job->user_name,
329 g_job->jobid, g_job->stepid, g_job->node_name);
330 }
331
332 if (debug_flags & DEBUG_FLAG_PROFILE) {
333 profile_str = acct_gather_profile_to_string(g_profile_running);
334 info("PROFILE: node_step_start, opt=%s file=%s",
335 profile_str, profile_file_name);
336 }
337
338 /*
339 * Create a new file using the default properties
340 */
341 file_id = H5Fcreate(profile_file_name, H5F_ACC_TRUNC, H5P_DEFAULT,
342 H5P_DEFAULT);
343 if (chown(profile_file_name, (uid_t)g_job->uid,
344 (gid_t)g_job->gid) < 0)
345 error("chown(%s): %m", profile_file_name);
346 if (chmod(profile_file_name, 0600) < 0)
347 error("chmod(%s): %m", profile_file_name);
348 xfree(profile_file_name);
349
350 if (file_id < 1) {
351 info("PROFILE: Failed to create Node group");
352 return SLURM_ERROR;
353 }
354 /*
355 * fd_set_close_on_exec(file_id); Not supported for HDF5
356 */
357 sprintf(group_node, "/%s", g_job->node_name);
358 gid_node = make_group(file_id, group_node);
359 if (gid_node < 0) {
360 H5Fclose(file_id);
361 file_id = -1;
362 info("PROFILE: Failed to create Node group");
363 return SLURM_ERROR;
364 }
365 put_int_attribute(gid_node, ATTR_NODEINX, g_job->nodeid);
366 put_string_attribute(gid_node, ATTR_NODENAME, g_job->node_name);
367 put_int_attribute(gid_node, ATTR_NTASKS, g_job->node_tasks);
368 put_int_attribute(gid_node, ATTR_CPUPERTASK, g_job->cpus_per_task);
369
370 step_start_time = time(NULL);
371 put_string_attribute(gid_node, ATTR_STARTTIME,
372 slurm_ctime2(&step_start_time));
373
374 return rc;
375 }
376
acct_gather_profile_p_child_forked(void)377 extern int acct_gather_profile_p_child_forked(void)
378 {
379 if (gid_totals > 0)
380 H5Gclose(gid_totals);
381 if (gid_samples > 0)
382 H5Gclose(gid_samples);
383 if (gid_tasks > 0)
384 H5Gclose(gid_tasks);
385 if (gid_node > 0)
386 H5Gclose(gid_node);
387 if (file_id > 0)
388 H5Fclose(file_id);
389
390 return SLURM_SUCCESS;
391 }
392
acct_gather_profile_p_node_step_end(void)393 extern int acct_gather_profile_p_node_step_end(void)
394 {
395 int rc = SLURM_SUCCESS;
396 size_t i;
397
398 xassert(running_in_slurmstepd());
399
400 xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET);
401
402 // No check for --profile as we always want to close the HDF5 file
403 // if it has been opened.
404
405
406 if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
407 return rc;
408
409 if (debug_flags & DEBUG_FLAG_PROFILE)
410 info("PROFILE: node_step_end (shutdown)");
411
412 /* close tables */
413 for (i = 0; i < tables_cur_len; ++i) {
414 H5PTclose(tables[i].table_id);
415 }
416 /* close groups */
417 for (i = 0; i < groups_len; ++i) {
418 H5Gclose(groups[i]);
419 }
420
421 if (gid_totals > 0)
422 H5Gclose(gid_totals);
423 if (gid_samples > 0)
424 H5Gclose(gid_samples);
425 if (gid_tasks > 0)
426 H5Gclose(gid_tasks);
427 if (gid_node > 0)
428 H5Gclose(gid_node);
429 if (file_id > 0)
430 H5Fclose(file_id);
431 profile_fini();
432 file_id = -1;
433
434 return rc;
435 }
436
acct_gather_profile_p_task_start(uint32_t taskid)437 extern int acct_gather_profile_p_task_start(uint32_t taskid)
438 {
439 int rc = SLURM_SUCCESS;
440
441 xassert(running_in_slurmstepd());
442 xassert(g_job);
443
444 xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET);
445
446 if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
447 return rc;
448
449 if (debug_flags & DEBUG_FLAG_PROFILE)
450 info("PROFILE: task_start");
451
452 return rc;
453 }
454
acct_gather_profile_p_task_end(pid_t taskpid)455 extern int acct_gather_profile_p_task_end(pid_t taskpid)
456 {
457 if (debug_flags & DEBUG_FLAG_PROFILE)
458 info("PROFILE: task_end");
459 return SLURM_SUCCESS;
460 }
461
acct_gather_profile_p_create_group(const char * name)462 extern int64_t acct_gather_profile_p_create_group(const char* name)
463 {
464 hid_t gid_group = make_group(gid_node, name);
465 if (gid_group < 0) {
466 return SLURM_ERROR;
467 }
468
469 /* store the group to keep track of it */
470 groups = xrealloc(groups, (groups_len + 1) * sizeof(hid_t));
471 groups[groups_len] = gid_group;
472 ++groups_len;
473
474 return gid_group;
475 }
476
acct_gather_profile_p_create_dataset(const char * name,int64_t parent,acct_gather_profile_dataset_t * dataset)477 extern int acct_gather_profile_p_create_dataset(
478 const char* name, int64_t parent,
479 acct_gather_profile_dataset_t *dataset)
480 {
481 size_t type_size;
482 size_t offset, field_size;
483 hid_t dtype_id;
484 hid_t field_id;
485 hid_t table_id;
486 acct_gather_profile_dataset_t *dataset_loc = dataset;
487
488 if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
489 return SLURM_ERROR;
490
491 debug("acct_gather_profile_p_create_dataset %s", name);
492
493 /* compute the size of the type needed to create the table */
494 type_size = sizeof(uint64_t) * 2; /* size for time field */
495 while (dataset_loc && (dataset_loc->type != PROFILE_FIELD_NOT_SET)) {
496 switch (dataset_loc->type) {
497 case PROFILE_FIELD_UINT64:
498 type_size += sizeof(uint64_t);
499 break;
500 case PROFILE_FIELD_DOUBLE:
501 type_size += sizeof(double);
502 break;
503 case PROFILE_FIELD_NOT_SET:
504 break;
505 }
506 dataset_loc++;
507 }
508
509 /* create the datatype for the dataset */
510 if ((dtype_id = H5Tcreate(H5T_COMPOUND, type_size)) < 0) {
511 debug3("PROFILE: failed to create datatype for table %s",
512 name);
513 return SLURM_ERROR;
514 }
515
516 /* insert fields */
517 if (H5Tinsert(dtype_id, "ElapsedTime", 0,
518 H5T_NATIVE_UINT64) < 0)
519 return SLURM_ERROR;
520 if (H5Tinsert(dtype_id, "EpochTime", sizeof(uint64_t),
521 H5T_NATIVE_UINT64) < 0)
522 return SLURM_ERROR;
523
524 dataset_loc = dataset;
525
526 offset = sizeof(uint64_t) * 2;
527 while (dataset_loc && (dataset_loc->type != PROFILE_FIELD_NOT_SET)) {
528 switch (dataset_loc->type) {
529 case PROFILE_FIELD_UINT64:
530 field_id = H5T_NATIVE_UINT64;
531 field_size = sizeof(uint64_t);
532 break;
533 case PROFILE_FIELD_DOUBLE:
534 field_id = H5T_NATIVE_DOUBLE;
535 field_size = sizeof(double);
536 break;
537 default:
538 error("%s: unknown field type:%d",
539 __func__, dataset_loc->type);
540 continue;
541 }
542 if (H5Tinsert(dtype_id, dataset_loc->name,
543 offset, field_id) < 0)
544 return SLURM_ERROR;
545 offset += field_size;
546 dataset_loc++;
547 }
548
549 /* create the table */
550 if (parent < 0)
551 parent = gid_node; /* default parent is the node group */
552 table_id = H5PTcreate_fl(parent, name, dtype_id, HDF5_CHUNK_SIZE,
553 HDF5_COMPRESS);
554 if (table_id < 0) {
555 error("PROFILE: Impossible to create the table %s", name);
556 H5Tclose(dtype_id);
557 return SLURM_ERROR;
558 }
559 H5Tclose(dtype_id); /* close the datatype since H5PT keeps a copy */
560
561 /* resize the tables array if full */
562 if (tables_cur_len == tables_max_len) {
563 if (tables_max_len == 0)
564 ++tables_max_len;
565 tables_max_len *= 2;
566 tables = xrealloc(tables, tables_max_len * sizeof(table_t));
567 }
568
569 /* reserve a new table */
570 tables[tables_cur_len].table_id = table_id;
571 tables[tables_cur_len].type_size = type_size;
572 ++tables_cur_len;
573
574 return tables_cur_len - 1;
575 }
576
acct_gather_profile_p_add_sample_data(int table_id,void * data,time_t sample_time)577 extern int acct_gather_profile_p_add_sample_data(int table_id, void *data,
578 time_t sample_time)
579 {
580 table_t *ds = &tables[table_id];
581 uint8_t send_data[ds->type_size];
582 int header_size = 0;
583 debug("acct_gather_profile_p_add_sample_data %d", table_id);
584
585 if (file_id < 0) {
586 debug("PROFILE: Trying to add data but profiling is over");
587 return SLURM_SUCCESS;
588 }
589
590 if (table_id < 0 || table_id >= tables_cur_len) {
591 error("PROFILE: trying to add samples to an invalid table %d",
592 table_id);
593 return SLURM_ERROR;
594 }
595
596 /* ensure that we have to record something */
597 xassert(running_in_slurmstepd());
598 xassert(g_job);
599 xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET);
600
601 if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
602 return SLURM_ERROR;
603
604 /* prepend timestampe and relative time */
605 ((uint64_t *)send_data)[0] = difftime(sample_time, step_start_time);
606 header_size += sizeof(uint64_t);
607 ((uint64_t *)send_data)[1] = sample_time;
608 header_size += sizeof(uint64_t);
609
610 memcpy(send_data + header_size, data, ds->type_size - header_size);
611
612 /* append the record to the table */
613 if (H5PTappend(ds->table_id, 1, send_data) < 0) {
614 error("PROFILE: Impossible to add data to the table %d; "
615 "maybe the table has not been created?", table_id);
616 return SLURM_ERROR;
617 }
618
619 return SLURM_SUCCESS;
620 }
621
acct_gather_profile_p_conf_values(List * data)622 extern void acct_gather_profile_p_conf_values(List *data)
623 {
624 config_key_pair_t *key_pair;
625
626 xassert(*data);
627
628 key_pair = xmalloc(sizeof(config_key_pair_t));
629 key_pair->name = xstrdup("ProfileHDF5Dir");
630 key_pair->value = xstrdup(hdf5_conf.dir);
631 list_append(*data, key_pair);
632
633 key_pair = xmalloc(sizeof(config_key_pair_t));
634 key_pair->name = xstrdup("ProfileHDF5Default");
635 key_pair->value = xstrdup(acct_gather_profile_to_string(hdf5_conf.def));
636 list_append(*data, key_pair);
637
638 return;
639
640 }
641
acct_gather_profile_p_is_active(uint32_t type)642 extern bool acct_gather_profile_p_is_active(uint32_t type)
643 {
644 if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
645 return false;
646 return (type == ACCT_GATHER_PROFILE_NOT_SET)
647 || (g_profile_running & type);
648 }
649