1 /*****************************************************************************\
2  *  slurm_acct_gather_profile.h - implementation-independent job profile
3  *  accounting plugin definitions
4  *  Copyright (C) 2013 Bull S. A. S.
5  *		Bull, Rue Jean Jaures, B.P.68, 78340, Les Clayes-sous-Bois.
6  *
7  *  Written by Rod Schultz <rod.schultz@bull.com>
8  *
9  *  This file is part of Slurm, a resource management program.
10  *  For details, see <https://slurm.schedmd.com>.
11  *  Please also read the included file: DISCLAIMER.
12  *
13  *  Slurm is free software; you can redistribute it and/or modify it under
14  *  the terms of the GNU General Public License as published by the Free
15  *  Software Foundation; either version 2 of the License, or (at your option)
16  *  any later version.
17  *
18  *  In addition, as a special exception, the copyright holders give permission
19  *  to link the code of portions of this program with the OpenSSL library under
20  *  certain conditions as described in each individual source file, and
21  *  distribute linked combinations including the two. You must obey the GNU
22  *  General Public License in all respects for all of the code used other than
23  *  OpenSSL. If you modify file(s) with this exception, you may extend this
24  *  exception to your version of the file(s), but you are not obligated to do
25  *  so. If you do not wish to do so, delete this exception statement from your
26  *  version.  If you delete this exception statement from all source files in
27  *  the program, then also delete it here.
28  *
29  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
30  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
31  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
32  *  details.
33  *
34  *  You should have received a copy of the GNU General Public License along
35  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
36  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
37 \*****************************************************************************/
38 
39 #ifndef __SLURM_ACCT_GATHER_PROFILE_H__
40 #define __SLURM_ACCT_GATHER_PROFILE_H__
41 
42 #include <inttypes.h>
43 #include <sys/resource.h>
44 #include <sys/types.h>
45 #include <time.h>
46 #include <unistd.h>
47 
48 #include "slurm/slurm.h"
49 #include "slurm/slurmdb.h"
50 #include "src/common/macros.h"
51 #include "src/common/pack.h"
52 #include "src/common/list.h"
53 #include "src/common/xmalloc.h"
54 #include "src/common/slurm_acct_gather.h"
55 #include "src/slurmd/slurmstepd/slurmstepd_job.h"
56 
57 #define NO_PARENT -1
58 
59 typedef enum {
60 	PROFILE_ENERGY,
61 	PROFILE_TASK,
62 	PROFILE_FILESYSTEM,
63 	PROFILE_NETWORK,
64 	PROFILE_CNT
65 } acct_gather_profile_type_t;
66 
67 typedef enum {
68 	PROFILE_FIELD_NOT_SET,
69 	PROFILE_FIELD_UINT64,
70 	PROFILE_FIELD_DOUBLE
71 } acct_gather_profile_field_type_t;
72 
73 typedef struct {
74 	char *name;
75 	acct_gather_profile_field_type_t type;
76 } acct_gather_profile_dataset_t;
77 
78 typedef struct {
79 	int freq;
80 	time_t last_notify;
81 	pthread_cond_t notify;
82 	pthread_mutex_t notify_mutex;
83 } acct_gather_profile_timer_t;
84 
85 extern acct_gather_profile_timer_t acct_gather_profile_timer[PROFILE_CNT];
86 
87 /*
88  * Load the plugin
89  */
90 extern int acct_gather_profile_init(void);
91 
92 /*
93  * Unload the plugin
94  */
95 extern int acct_gather_profile_fini(void);
96 
97 /* translate uint32_t profile to string (DO NOT free) */
98 extern char *acct_gather_profile_to_string(uint32_t profile);
99 
100 /* translate string of words to uint32_t filled in with bits set to profile */
101 extern uint32_t acct_gather_profile_from_string(const char *profile_str);
102 
103 /* Return true if acct_gather_profile_running flag is set */
104 extern bool acct_gather_profile_test(void);
105 
106 extern char *acct_gather_profile_type_to_string(uint32_t series);
107 extern uint32_t acct_gather_profile_type_from_string(char *series_str);
108 
109 extern char *acct_gather_profile_type_t_name(acct_gather_profile_type_t type);
110 extern char *acct_gather_profile_dataset_str(
111 	acct_gather_profile_dataset_t *dataset, void *data,
112 	char *str, int str_len);
113 extern int acct_gather_profile_startpoll(char *freq, char *freq_def);
114 extern void acct_gather_profile_endpoll(void);
115 
116 /* Called from slurmstepd between fork() and exec() of application.
117  * Close open files */
118 extern int acct_gather_profile_g_child_forked(void);
119 
120 /*
121  * Define plugin local conf for acct_gather.conf
122  *
123  * Parameters
124  * 	full_options -- pointer that will receive list of plugin local
125  *			definitions
126  *	full_options_cnt -- count of plugin local definitions
127  */
128 extern int acct_gather_profile_g_conf_options(s_p_options_t **full_options,
129 					       int *full_options_cnt);
130 /*
131  * set plugin local conf from acct_gather.conf into its structure
132  *
133  * Parameters
134  * 	tbl - hash table of acct_gather.conf key-values.
135  */
136 extern int acct_gather_profile_g_conf_set(s_p_hashtbl_t *tbl);
137 
138 /*
139  * get info from the profile plugin
140  *
141  */
142 extern int acct_gather_profile_g_get(enum acct_gather_profile_info info_type,
143 				      void *data);
144 
145 /*
146  * Called once per step on each node from slurmstepd, before launching tasks.
147  * Provides an opportunity to create files and other node-step level
148  * initialization.
149  *
150  * Parameters
151  *	job -- structure defining a slurm job
152  *
153  * Returns -- SLURM_SUCCESS or SLURM_ERROR
154  */
155 extern int acct_gather_profile_g_node_step_start(stepd_step_rec_t* job);
156 
157 /*
158  * Called once per step on each node from slurmstepd, after all tasks end.
159  * Provides an opportunity to close files, etc.
160  *
161  *
162  * Returns -- SLURM_SUCCESS or SLURM_ERROR
163  */
164 extern int acct_gather_profile_g_node_step_end(void);
165 
166 /*
167  * Called once per task from slurmstepd, BEFORE node step start is called.
168  * Provides an opportunity to gather beginning values from node counters
169  * (bytes_read ...)
170  * At this point in the life cycle, the value of the --profile option isn't
171  * known and and files are not open so calls to the 'add_*_data'
172  * functions cannot be made.
173  *
174  * Parameters
175  *	taskid -- slurm taskid
176  *
177  * Returns -- SLURM_SUCCESS or SLURM_ERROR
178  */
179 extern int acct_gather_profile_g_task_start(uint32_t taskid);
180 
181 /*
182  * Called once per task from slurmstepd.
183  * Provides an opportunity to put final data for a task.
184  *
185  * Parameters
186  *	taskpid -- linux process id of task
187  *
188  * Returns -- SLURM_SUCCESS or SLURM_ERROR
189  */
190 extern int acct_gather_profile_g_task_end(pid_t taskpid);
191 
192 /*
193  * Create a new group which can contain datasets.
194  *
195  * Returns -- the identifier of the group on success,
196  *            a negative value on failure
197  */
198 extern int64_t acct_gather_profile_g_create_group(const char* name);
199 
200 /*
201  * Create a new dataset to record profiling data in the group "parent".
202  * Must be called by each accounting plugin in order to record data.
203  * A "Time" field is automatically added.
204  *
205  * Parameters
206  *  name        -- name of the dataset
207  *  parent      -- id of the parent group created with
208  *                 acct_gather_profile_g_create_group, or NO_PARENT for
209  *                 default group
210  *  profile_series -- profile_series_def_t array filled in with the
211  *                    series definition
212  * Returns -- an identifier to the dataset on success
213  *            a negative value on failure
214  */
215 extern int acct_gather_profile_g_create_dataset(
216 	const char *name, int64_t parent,
217 	acct_gather_profile_dataset_t *dataset);
218 
219 /*
220  * Put data at the Node Samples level. Typically called from something called
221  * at either job_acct_gather interval or acct_gather_energy interval.
222  * Time is automatically added.
223  *
224  * Parameters
225  *	dataset_id -- identifies the dataset to add data to.
226  *	data       -- data structure to be recorded
227  *      sample_time-- when the sample happened
228  *
229  * Returns -- SLURM_SUCCESS or SLURM_ERROR
230  */
231 extern int acct_gather_profile_g_add_sample_data(int dataset_id, void *data,
232 						 time_t sample_time);
233 
234 /* Get the values from the plugin that are setup in the .conf
235  * file. This function should most likely only be called from
236  * src/common/slurm_acct_gather.c (acct_gather_get_values())
237  */
238 extern void acct_gather_profile_g_conf_values(void *data);
239 
240 /* Return true if the given type of plugin must be profiled */
241 extern bool acct_gather_profile_g_is_active(uint32_t type);
242 
243 #endif /*__SLURM_ACCT_GATHER_PROFILE_H__*/
244