1 /****************************************************************************\
2  *  slurmdbd_defs.h - definitions used for Slurm DBD RPCs
3  *****************************************************************************
4  *  Copyright (C) 2008-2010 Lawrence Livermore National Security.
5  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
6  *  Written by Morris Jette <jette1@llnl.gov>
7  *  CODE-OCEC-09-009. All rights reserved.
8  *
9  *  This file is part of Slurm, a resource management program.
10  *  For details, see <https://slurm.schedmd.com/>.
11  *  Please also read the included file: DISCLAIMER.
12  *
13  *  Slurm is free software; you can redistribute it and/or modify it under
14  *  the terms of the GNU General Public License as published by the Free
15  *  Software Foundation; either version 2 of the License, or (at your option)
16  *  any later version.
17  *
18  *  In addition, as a special exception, the copyright holders give permission
19  *  to link the code of portions of this program with the OpenSSL library under
20  *  certain conditions as described in each individual source file, and
21  *  distribute linked combinations including the two. You must obey the GNU
22  *  General Public License in all respects for all of the code used other than
23  *  OpenSSL. If you modify file(s) with this exception, you may extend this
24  *  exception to your version of the file(s), but you are not obligated to do
25  *  so. If you do not wish to do so, delete this exception statement from your
26  *  version.  If you delete this exception statement from all source files in
27  *  the program, then also delete it here.
28  *
29  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
30  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
31  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
32  *  details.
33  *
34  *  You should have received a copy of the GNU General Public License along
35  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
36  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
37 \*****************************************************************************/
38 
39 #ifndef _SLURMDBD_DEFS_H
40 #define _SLURMDBD_DEFS_H
41 
42 #include <inttypes.h>
43 
44 #include "slurm/slurm.h"
45 
46 #include "src/common/list.h"
47 #include "src/common/slurm_accounting_storage.h"
48 
49 /* Slurm DBD message types */
50 /* ANY TIME YOU ADD TO THIS LIST UPDATE THE CONVERSION FUNCTIONS! */
51 typedef enum {
52 	DEFUNCT_DBD_INIT = 1400,/* Connection initialization		*/
53 	DBD_FINI,       	/* Connection finalization		*/
54 	DBD_ADD_ACCOUNTS,       /* Add new account to the mix           */
55 	DBD_ADD_ACCOUNT_COORDS, /* Add new coordinatior to an account   */
56 	DBD_ADD_ASSOCS,         /* Add new association to the mix       */
57 	DBD_ADD_CLUSTERS,       /* Add new cluster to the mix           */
58 	DBD_ADD_USERS,          /* Add new user to the mix              */
59 	DBD_CLUSTER_TRES,	/* Record total tres on cluster	*/
60 	DBD_FLUSH_JOBS, 	/* End jobs that are still running
61 				 * when a controller is restarted.	*/
62 	DBD_GET_ACCOUNTS,	/* Get account information		*/
63 	DBD_GET_ASSOCS,         /* #1410, Get association information   */
64 	DBD_GET_ASSOC_USAGE,  	/* Get assoc usage information   	*/
65 	DBD_GET_CLUSTERS,	/* Get account information		*/
66 	DBD_GET_CLUSTER_USAGE, 	/* Get cluster usage information	*/
67 	DBD_RECONFIG,   	/* Reread the slurmdbd.conf     	*/
68 	DBD_GET_USERS,  	/* Get account information		*/
69 	DBD_GOT_ACCOUNTS,	/* Response to DBD_GET_ACCOUNTS		*/
70 	DBD_GOT_ASSOCS, 	/* Response to DBD_GET_ASSOCS   	*/
71 	DBD_GOT_ASSOC_USAGE,  	/* Response to DBD_GET_ASSOC_USAGE    	*/
72 	DBD_GOT_CLUSTERS,	/* Response to DBD_GET_CLUSTERS		*/
73 	DBD_GOT_CLUSTER_USAGE, 	/* #1420, Response to DBD_GET_CLUSTER_USAGE */
74 	DBD_GOT_JOBS,		/* Response to DBD_GET_JOBS		*/
75 	DBD_GOT_LIST,           /* Response to DBD_MODIFY/REMOVE MOVE_* */
76 	DBD_GOT_USERS,  	/* Response to DBD_GET_USERS		*/
77 	DBD_JOB_COMPLETE,	/* Record job completion 		*/
78 	DBD_JOB_START,		/* Record job starting			*/
79 	DBD_ID_RC,	        /* return db_index from job
80 				 * insertion, or any other id from
81 				 * other commands.              	*/
82 	DBD_JOB_SUSPEND,	/* Record job suspension		*/
83 	DBD_MODIFY_ACCOUNTS,    /* Modify existing account              */
84 	DBD_MODIFY_ASSOCS,      /* Modify existing association          */
85 	DBD_MODIFY_CLUSTERS,    /* #1430, Modify existing cluster       */
86 	DBD_MODIFY_USERS,       /* Modify existing user                 */
87 	DBD_NODE_STATE,		/* Record node state transition		*/
88 	DBD_DEFUNCT_RPC_1433,	/* Free for reuse			*/
89 	DBD_REGISTER_CTLD,	/* Register a slurmctld's comm port	*/
90 	DBD_REMOVE_ACCOUNTS,    /* Remove existing account              */
91 	DBD_REMOVE_ACCOUNT_COORDS,/* Remove existing coordinator from
92 				   * an account */
93 	DBD_REMOVE_ASSOCS,      /* Remove existing association          */
94 	DBD_REMOVE_CLUSTERS,    /* Remove existing cluster              */
95 	DBD_REMOVE_USERS,       /* Remove existing user                 */
96 	DBD_ROLL_USAGE,         /* #1440 Roll up usage                  */
97 	DBD_STEP_COMPLETE,	/* Record step completion		*/
98 	DBD_STEP_START,		/* Record step starting			*/
99 	DBD_DEFUNCT_RPC_1443,	/* Free for reuse			*/
100 	DBD_GET_JOBS_COND, 	/* Get job information with a condition */
101 	DBD_GET_TXN,		/* Get transaction information		*/
102 	DBD_GOT_TXN,		/* Got transaction information		*/
103 	DBD_ADD_QOS,		/* Add QOS information   	        */
104 	DBD_GET_QOS,		/* Get QOS information   	        */
105 	DBD_GOT_QOS,		/* Got QOS information   	        */
106 	DBD_REMOVE_QOS,		/* #1450, Remove QOS information        */
107 	DBD_MODIFY_QOS,         /* Modify existing QOS                  */
108 	DBD_ADD_WCKEYS,		/* Add WCKEY information   	        */
109 	DBD_GET_WCKEYS,		/* Get WCKEY information   	        */
110 	DBD_GOT_WCKEYS,		/* Got WCKEY information   	        */
111 	DBD_REMOVE_WCKEYS,	/* Remove WCKEY information   	        */
112 	DBD_MODIFY_WCKEYS,      /* Modify existing WCKEY                */
113 	DBD_GET_WCKEY_USAGE,  	/* Get wckey usage information  	*/
114 	DBD_GOT_WCKEY_USAGE,  	/* Get wckey usage information  	*/
115 	DBD_ARCHIVE_DUMP,    	/* issue a request to dump jobs to
116 				 * archive */
117 	DBD_ARCHIVE_LOAD,    	/* #1460, load an archive file          */
118 	DBD_ADD_RESV,    	/* add a reservation                    */
119 	DBD_REMOVE_RESV,    	/* remove a reservation                 */
120 	DBD_MODIFY_RESV,    	/* modify a reservation                 */
121 	DBD_GET_RESVS,    	/* Get reservation information  	*/
122 	DBD_GOT_RESVS,		/* Response to DBD_GET_RESV		*/
123 	DBD_GET_CONFIG,  	/* Get configuration information	*/
124 	DBD_GOT_CONFIG,		/* Response to DBD_GET_CONFIG		*/
125 	DBD_GET_PROBS,  	/* Get problems existing in accounting	*/
126 	DBD_GOT_PROBS,		/* Response to DBD_GET_PROBS		*/
127 	DBD_GET_EVENTS, 	/* #1470, Get event information		*/
128 	DBD_GOT_EVENTS, 	/* Response to DBD_GET_EVENTS		*/
129 	DBD_SEND_MULT_JOB_START,/* Send multiple job starts		*/
130 	DBD_GOT_MULT_JOB_START,	/* Get response to DBD_SEND_MULT_JOB_START */
131 	DBD_SEND_MULT_MSG,      /* Send multiple message		*/
132 	DBD_GOT_MULT_MSG,	/* Get response to DBD_SEND_MULT_MSG    */
133 	DBD_MODIFY_JOB,		/* Modify existing Job(s)               */
134 	DBD_ADD_RES,    	/* Add new system resource to the mix   */
135 	DBD_GET_RES,		/* Get resource information		*/
136 	DBD_GOT_RES,		/* Got resource information		*/
137 	DBD_REMOVE_RES,     	/* #1480, Remove existing resource      */
138 	DBD_MODIFY_RES,     	/* Modify existing resource      	*/
139 	DBD_ADD_CLUS_RES,    	/* Add cluster using a resource    	*/
140 	DBD_REMOVE_CLUS_RES,   	/* Remove existing cluster resource    	*/
141 	DBD_MODIFY_CLUS_RES,   	/* Modify existing cluster resource   	*/
142 	DBD_ADD_TRES,           /* Add tres to the database           */
143 	DBD_GET_TRES,           /* Get tres from the database         */
144 	DBD_GOT_TRES,           /* Got tres from the database         */
145 	DBD_FIX_RUNAWAY_JOB,    /* Fix any runaway jobs */
146 	DBD_GET_STATS,		/* Get daemon statistics */
147 	DBD_GOT_STATS,		/* #1490 ,Got daemon statistics data */
148 	DBD_CLEAR_STATS,	/* Clear daemon statistics */
149 	DBD_SHUTDOWN,		/* Shutdown daemon */
150 	DBD_ADD_FEDERATIONS,    /* Add new federation to the mix        */
151 	DBD_GET_FEDERATIONS,	/* Get federation information		*/
152 	DBD_GOT_FEDERATIONS,	/* Response to DBD_GET_FEDERATIONS 	*/
153 	DBD_MODIFY_FEDERATIONS, /* Modify existing federation 		*/
154 	DBD_REMOVE_FEDERATIONS, /* Removing existing federation 	*/
155 
156 	SLURM_PERSIST_INIT = 6500, /* So we don't use the
157 				    * REQUEST_PERSIST_INIT also used here.
158 				    */
159 } slurmdbd_msg_type_t;
160 
161 /*****************************************************************************\
162  * Slurm DBD protocol data structures
163 \*****************************************************************************/
164 
165 typedef struct {
166 	List acct_list; /* list of account names (char *'s) */
167 	slurmdb_user_cond_t *cond;
168 } dbd_acct_coord_msg_t;
169 
170 typedef struct dbd_cluster_tres_msg {
171 	char *cluster_nodes;	/* nodes in cluster */
172 	time_t event_time;	/* time of transition */
173 	char *tres_str;	        /* Simple comma separated list of TRES */
174 } dbd_cluster_tres_msg_t;
175 
176 typedef struct {
177 	void *rec; /* this could be anything based on the type types
178 		    * are defined in slurm_accounting_storage.h
179 		    * *_rec_t */
180 } dbd_rec_msg_t;
181 
182 typedef struct {
183 	void *cond; /* this could be anything based on the type types
184 		     * are defined in slurm_accounting_storage.h
185 		     * *_cond_t */
186 } dbd_cond_msg_t;
187 
188 typedef struct {
189 	uint16_t archive_data;
190 	time_t end;
191 	time_t start;
192 } dbd_roll_usage_msg_t;
193 
194 typedef struct {
195 	time_t end;
196 	void *rec;
197 	time_t start;
198 } dbd_usage_msg_t;
199 
200 typedef struct dbd_get_jobs_msg {
201 	char *cluster_name;	/* name of cluster to query */
202 	uint16_t completion;	/* get job completion records instead
203 				 * of accounting record */
204 	uint32_t gid;		/* group id */
205 	time_t last_update;	/* time of latest info */
206 	List selected_steps;	/* List of slurmdb_selected_step_t *'s */
207 	List selected_parts;	/* List of char *'s */
208 	char *user;		/* user name */
209 } dbd_get_jobs_msg_t;
210 
211 typedef struct dbd_init_msg {
212 	char *cluster_name;     /* cluster this message is coming from */
213 	uint16_t version;	/* protocol version */
214 	uint32_t uid;		/* UID originating connection,
215 				 * filled by authtentication plugin*/
216 } dbd_init_msg_t;
217 
218 typedef struct dbd_fini_msg {
219 	uint16_t close_conn;  /* to close connection 1, 0 will keep
220 				 connection open */
221 	uint16_t commit;      /* to rollback(0) or commit(1) changes */
222 } dbd_fini_msg_t;
223 
224 typedef struct dbd_job_comp_msg {
225 	char *	 admin_comment;	/* job admin comment field */
226 	uint32_t assoc_id;	/* accounting association id needed to
227 				 * find job record in db */
228 	char *	 comment;	/* job comment field */
229 	uint64_t db_index;	/* index into the db for this job */
230 	uint32_t derived_ec;	/* derived job exit code or signal */
231 	time_t   end_time;	/* job termintation time */
232 	uint32_t exit_code;	/* job exit code or signal */
233 	uint32_t job_id;	/* job ID */
234 	uint32_t job_state;	/* job state */
235 	char *   nodes;		/* hosts allocated to the job */
236 	uint32_t req_uid;	/* requester user ID */
237 	time_t   start_time;	/* job start time */
238 	time_t   submit_time;	/* job submit time needed to find job
239 				 * record in db */
240 	char *	 system_comment;/* job system comment field */
241 	char    *tres_alloc_str;/* Simple comma separated list of TRES */
242 } dbd_job_comp_msg_t;
243 
244 typedef struct dbd_job_start_msg {
245 	char *   account;       /* Account name for those not running
246 				 * with associations */
247 	uint32_t alloc_nodes;   /* how many nodes used in job */
248 	uint32_t array_job_id;	/* job_id of a job array or 0 if N/A */
249 	uint32_t array_max_tasks;/* max number of tasks able to run at once */
250 	uint32_t array_task_id;	/* task_id of a job array of NO_VAL
251 				 * if N/A */
252 	char *   array_task_str;/* hex string of unstarted tasks */
253 	uint32_t array_task_pending;/* number of tasks still pending */
254 	uint32_t assoc_id;	/* accounting association id */
255 	char *   constraints;   /* features/constraints requested by job */
256 	uint32_t db_flags;      /* flags about job */
257 	uint64_t db_index;	/* index into the db for this job */
258 	time_t   eligible_time;	/* time job becomes eligible to run */
259 	uint32_t gid;	        /* group ID */
260 	uint32_t het_job_id;	/* ID of hetjob leader or 0 */
261 	uint32_t het_job_offset; /* Hetjob component ID, zero-origin */
262 	uint32_t job_id;	/* job ID */
263 	uint32_t job_state;	/* job state */
264 	char *   mcs_label;	/* job mcs_label */
265 	char *   name;		/* job name */
266 	char *   nodes;		/* hosts allocated to the job */
267 	char *   node_inx;      /* ranged bitmap string of hosts
268 				 * allocated to the job */
269 	char *   partition;	/* partition job is running on */
270 	uint32_t priority;	/* job priority */
271 	uint32_t qos_id;        /* qos job is running with */
272 	uint32_t req_cpus;	/* count of req processors */
273 	uint64_t req_mem;       /* requested minimum memory */
274 	uint32_t resv_id;	/* reservation id */
275 	time_t   start_time;	/* job start time */
276 	uint32_t state_reason_prev; /* Last reason of blocking before job
277 				     * started */
278 	time_t   submit_time;	/* job submit time */
279 	uint32_t timelimit;	/* job timelimit */
280 	uint32_t uid;	        /* user ID if associations are being used */
281 	char*    gres_alloc;    /* String depicting the allocated GRES by
282 				 * type for the entire job on all nodes. */
283 	char*    gres_req;      /* String depicting the requested GRES by
284 				 * type for the entire job on all nodes. */
285 	char*    gres_used;     /* String depicting the GRES actually used by
286 				 * type for the entire job on all nodes. */
287 	char    *tres_alloc_str;/* Simple comma separated list of TRES */
288 	char    *tres_req_str;  /* Simple comma separated list of TRES */
289 	char *   wckey;		/* wckey name */
290 	char    *work_dir;      /* work dir of job */
291 } dbd_job_start_msg_t;
292 
293 /* returns a uint32_t along with a return code */
294 typedef struct dbd_id_rc_msg {
295 	uint32_t job_id;
296 	uint64_t db_index;
297 	uint32_t return_code;
298 } dbd_id_rc_msg_t;
299 
300 typedef struct dbd_job_suspend_msg {
301 	uint32_t assoc_id;	/* accounting association id needed
302 				 * to find job record in db */
303 	uint64_t db_index;	/* index into the db for this job */
304 	uint32_t job_id;	/* job ID needed to find job record
305 				 * in db */
306 	uint32_t job_state;	/* job state */
307 	time_t   submit_time;	/* job submit time needed to find job record
308 				 * in db */
309 	time_t   suspend_time;	/* job suspend or resume time */
310 } dbd_job_suspend_msg_t;
311 
312 typedef struct {
313 	List my_list;		/* this list could be of any type as long as it
314 				 * is handled correctly on both ends */
315 	uint32_t return_code;   /* If there was an error and a list of
316 				 * them this is the type of error it
317 				 * was */
318 } dbd_list_msg_t;
319 
320 typedef struct {
321 	void *cond;
322 	void *rec;
323 } dbd_modify_msg_t;
324 
325 #define DBD_NODE_STATE_DOWN  1
326 #define DBD_NODE_STATE_UP    2
327 typedef struct dbd_node_state_msg {
328 	time_t event_time;	/* time of transition */
329 	char *hostlist;		/* name of hosts */
330 	uint16_t new_state;	/* new state of host, see DBD_NODE_STATE_* */
331 	char *reason;		/* explanation for the node's state */
332 	uint32_t reason_uid;   	/* User that set the reason, ignore if
333 				 * no reason is set. */
334 	uint32_t state;         /* current state of node.  Used to get
335 				   flags on the state (i.e. maintenance) */
336 	char *tres_str;	        /* Simple comma separated list of TRES */
337 } dbd_node_state_msg_t;
338 
339 typedef struct dbd_register_ctld_msg {
340 	uint16_t dimensions;    /* dimensions of system */
341 	uint32_t flags;         /* flags for cluster */
342 	uint32_t plugin_id_select; /* the select plugin_id */
343 	uint16_t port;		/* slurmctld's comm port */
344 } dbd_register_ctld_msg_t;
345 
346 typedef struct dbd_step_comp_msg {
347 	uint32_t assoc_id;	/* accounting association id */
348 	uint64_t db_index;	/* index into the db for this job */
349 	time_t   end_time;	/* job termintation time */
350 	uint32_t exit_code;	/* job exit code or signal */
351 	jobacctinfo_t *jobacct; /* status info */
352 	uint32_t job_id;	/* job ID */
353 	time_t   job_submit_time;/* job submit time needed to find job record
354 				  * in db */
355 	char    *job_tres_alloc_str;/* Simple comma separated list of TRES for
356 				     * the job (primarily for the energy of the
357 				     * completing job.  This is only filled in
358 				     * on the last step in the job. */
359 	uint32_t req_uid;	/* requester user ID */
360 	time_t   start_time;	/* step start time */
361 	uint16_t state;         /* current state of node.  Used to get
362 				   flags on the state (i.e. maintenance) */
363 	uint32_t step_id;	/* step ID */
364 	uint32_t total_tasks;	/* count of tasks for step */
365 } dbd_step_comp_msg_t;
366 
367 typedef struct dbd_step_start_msg {
368 	uint32_t assoc_id;	/* accounting association id */
369 	uint64_t db_index;	/* index into the db for this job */
370 	uint32_t job_id;	/* job ID */
371 	char *   name;		/* step name */
372 	char *   nodes;		/* hosts allocated to the step */
373 	char *   node_inx;	/* bitmap index of hosts allocated to
374 				 * the step */
375 	uint32_t node_cnt;      /* how many nodes used in step */
376 	time_t   start_time;	/* step start time */
377 	time_t   job_submit_time;/* job submit time needed to find job record
378 				  * in db */
379 	uint32_t req_cpufreq_min; /* requested minimum CPU frequency  */
380 	uint32_t req_cpufreq_max; /* requested maximum CPU frequency  */
381 	uint32_t req_cpufreq_gov; /* requested CPU frequency governor */
382 	uint32_t step_id;	/* step ID */
383 	uint32_t task_dist;     /* layout method of step */
384 	uint32_t total_tasks;	/* count of tasks for step */
385 	char *tres_alloc_str;   /* Simple comma separated list of TRES */
386 } dbd_step_start_msg_t;
387 
388 /*****************************************************************************\
389  * Slurm DBD message processing functions
390 \*****************************************************************************/
391 
392 extern slurmdbd_msg_type_t str_2_slurmdbd_msg_type(char *msg_type);
393 extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type,
394 				     int get_enum);
395 
396 /*****************************************************************************\
397  * Free various SlurmDBD message structures
398 \*****************************************************************************/
399 extern void slurmdbd_free_buffer(void *x);
400 
401 extern void slurmdbd_free_acct_coord_msg(dbd_acct_coord_msg_t *msg);
402 extern void slurmdbd_free_cluster_tres_msg(dbd_cluster_tres_msg_t *msg);
403 extern void slurmdbd_free_msg(persist_msg_t *msg);
404 extern void slurmdbd_free_rec_msg(dbd_rec_msg_t *msg, slurmdbd_msg_type_t type);
405 extern void slurmdbd_free_cond_msg(dbd_cond_msg_t *msg,
406 				   slurmdbd_msg_type_t type);
407 extern void slurmdbd_free_fini_msg(dbd_fini_msg_t *msg);
408 extern void slurmdbd_free_job_complete_msg(dbd_job_comp_msg_t *msg);
409 extern void slurmdbd_free_job_start_msg(void *in);
410 extern void slurmdbd_free_id_rc_msg(void *in);
411 extern void slurmdbd_free_job_suspend_msg(dbd_job_suspend_msg_t *msg);
412 extern void slurmdbd_free_list_msg(dbd_list_msg_t *msg);
413 extern void slurmdbd_free_modify_msg(dbd_modify_msg_t *msg,
414 				     slurmdbd_msg_type_t type);
415 extern void slurmdbd_free_node_state_msg(dbd_node_state_msg_t *msg);
416 extern void slurmdbd_free_register_ctld_msg(dbd_register_ctld_msg_t *msg);
417 extern void slurmdbd_free_roll_usage_msg(dbd_roll_usage_msg_t *msg);
418 extern void slurmdbd_free_step_complete_msg(dbd_step_comp_msg_t *msg);
419 extern void slurmdbd_free_step_start_msg(dbd_step_start_msg_t *msg);
420 extern void slurmdbd_free_usage_msg(dbd_usage_msg_t *msg,
421 				    slurmdbd_msg_type_t type);
422 
423 #endif	/* !_SLURMDBD_DEFS_H */
424