1 /****************************************************************************\ 2 * slurmdbd_defs.h - definitions used for Slurm DBD RPCs 3 ***************************************************************************** 4 * Copyright (C) 2008-2010 Lawrence Livermore National Security. 5 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 6 * Written by Morris Jette <jette1@llnl.gov> 7 * CODE-OCEC-09-009. All rights reserved. 8 * 9 * This file is part of Slurm, a resource management program. 10 * For details, see <https://slurm.schedmd.com/>. 11 * Please also read the included file: DISCLAIMER. 12 * 13 * Slurm is free software; you can redistribute it and/or modify it under 14 * the terms of the GNU General Public License as published by the Free 15 * Software Foundation; either version 2 of the License, or (at your option) 16 * any later version. 17 * 18 * In addition, as a special exception, the copyright holders give permission 19 * to link the code of portions of this program with the OpenSSL library under 20 * certain conditions as described in each individual source file, and 21 * distribute linked combinations including the two. You must obey the GNU 22 * General Public License in all respects for all of the code used other than 23 * OpenSSL. If you modify file(s) with this exception, you may extend this 24 * exception to your version of the file(s), but you are not obligated to do 25 * so. If you do not wish to do so, delete this exception statement from your 26 * version. If you delete this exception statement from all source files in 27 * the program, then also delete it here. 28 * 29 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY 30 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 31 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 32 * details. 33 * 34 * You should have received a copy of the GNU General Public License along 35 * with Slurm; if not, write to the Free Software Foundation, Inc., 36 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 37 \*****************************************************************************/ 38 39 #ifndef _SLURMDBD_DEFS_H 40 #define _SLURMDBD_DEFS_H 41 42 #include <inttypes.h> 43 44 #include "slurm/slurm.h" 45 46 #include "src/common/list.h" 47 #include "src/common/slurm_accounting_storage.h" 48 49 /* Slurm DBD message types */ 50 /* ANY TIME YOU ADD TO THIS LIST UPDATE THE CONVERSION FUNCTIONS! */ 51 typedef enum { 52 DEFUNCT_DBD_INIT = 1400,/* Connection initialization */ 53 DBD_FINI, /* Connection finalization */ 54 DBD_ADD_ACCOUNTS, /* Add new account to the mix */ 55 DBD_ADD_ACCOUNT_COORDS, /* Add new coordinatior to an account */ 56 DBD_ADD_ASSOCS, /* Add new association to the mix */ 57 DBD_ADD_CLUSTERS, /* Add new cluster to the mix */ 58 DBD_ADD_USERS, /* Add new user to the mix */ 59 DBD_CLUSTER_TRES, /* Record total tres on cluster */ 60 DBD_FLUSH_JOBS, /* End jobs that are still running 61 * when a controller is restarted. */ 62 DBD_GET_ACCOUNTS, /* Get account information */ 63 DBD_GET_ASSOCS, /* #1410, Get association information */ 64 DBD_GET_ASSOC_USAGE, /* Get assoc usage information */ 65 DBD_GET_CLUSTERS, /* Get account information */ 66 DBD_GET_CLUSTER_USAGE, /* Get cluster usage information */ 67 DBD_RECONFIG, /* Reread the slurmdbd.conf */ 68 DBD_GET_USERS, /* Get account information */ 69 DBD_GOT_ACCOUNTS, /* Response to DBD_GET_ACCOUNTS */ 70 DBD_GOT_ASSOCS, /* Response to DBD_GET_ASSOCS */ 71 DBD_GOT_ASSOC_USAGE, /* Response to DBD_GET_ASSOC_USAGE */ 72 DBD_GOT_CLUSTERS, /* Response to DBD_GET_CLUSTERS */ 73 DBD_GOT_CLUSTER_USAGE, /* #1420, Response to DBD_GET_CLUSTER_USAGE */ 74 DBD_GOT_JOBS, /* Response to DBD_GET_JOBS */ 75 DBD_GOT_LIST, /* Response to DBD_MODIFY/REMOVE MOVE_* */ 76 DBD_GOT_USERS, /* Response to DBD_GET_USERS */ 77 DBD_JOB_COMPLETE, /* Record job completion */ 78 DBD_JOB_START, /* Record job starting */ 79 DBD_ID_RC, /* return db_index from job 80 * insertion, or any other id from 81 * other commands. */ 82 DBD_JOB_SUSPEND, /* Record job suspension */ 83 DBD_MODIFY_ACCOUNTS, /* Modify existing account */ 84 DBD_MODIFY_ASSOCS, /* Modify existing association */ 85 DBD_MODIFY_CLUSTERS, /* #1430, Modify existing cluster */ 86 DBD_MODIFY_USERS, /* Modify existing user */ 87 DBD_NODE_STATE, /* Record node state transition */ 88 DBD_DEFUNCT_RPC_1433, /* Free for reuse */ 89 DBD_REGISTER_CTLD, /* Register a slurmctld's comm port */ 90 DBD_REMOVE_ACCOUNTS, /* Remove existing account */ 91 DBD_REMOVE_ACCOUNT_COORDS,/* Remove existing coordinator from 92 * an account */ 93 DBD_REMOVE_ASSOCS, /* Remove existing association */ 94 DBD_REMOVE_CLUSTERS, /* Remove existing cluster */ 95 DBD_REMOVE_USERS, /* Remove existing user */ 96 DBD_ROLL_USAGE, /* #1440 Roll up usage */ 97 DBD_STEP_COMPLETE, /* Record step completion */ 98 DBD_STEP_START, /* Record step starting */ 99 DBD_DEFUNCT_RPC_1443, /* Free for reuse */ 100 DBD_GET_JOBS_COND, /* Get job information with a condition */ 101 DBD_GET_TXN, /* Get transaction information */ 102 DBD_GOT_TXN, /* Got transaction information */ 103 DBD_ADD_QOS, /* Add QOS information */ 104 DBD_GET_QOS, /* Get QOS information */ 105 DBD_GOT_QOS, /* Got QOS information */ 106 DBD_REMOVE_QOS, /* #1450, Remove QOS information */ 107 DBD_MODIFY_QOS, /* Modify existing QOS */ 108 DBD_ADD_WCKEYS, /* Add WCKEY information */ 109 DBD_GET_WCKEYS, /* Get WCKEY information */ 110 DBD_GOT_WCKEYS, /* Got WCKEY information */ 111 DBD_REMOVE_WCKEYS, /* Remove WCKEY information */ 112 DBD_MODIFY_WCKEYS, /* Modify existing WCKEY */ 113 DBD_GET_WCKEY_USAGE, /* Get wckey usage information */ 114 DBD_GOT_WCKEY_USAGE, /* Get wckey usage information */ 115 DBD_ARCHIVE_DUMP, /* issue a request to dump jobs to 116 * archive */ 117 DBD_ARCHIVE_LOAD, /* #1460, load an archive file */ 118 DBD_ADD_RESV, /* add a reservation */ 119 DBD_REMOVE_RESV, /* remove a reservation */ 120 DBD_MODIFY_RESV, /* modify a reservation */ 121 DBD_GET_RESVS, /* Get reservation information */ 122 DBD_GOT_RESVS, /* Response to DBD_GET_RESV */ 123 DBD_GET_CONFIG, /* Get configuration information */ 124 DBD_GOT_CONFIG, /* Response to DBD_GET_CONFIG */ 125 DBD_GET_PROBS, /* Get problems existing in accounting */ 126 DBD_GOT_PROBS, /* Response to DBD_GET_PROBS */ 127 DBD_GET_EVENTS, /* #1470, Get event information */ 128 DBD_GOT_EVENTS, /* Response to DBD_GET_EVENTS */ 129 DBD_SEND_MULT_JOB_START,/* Send multiple job starts */ 130 DBD_GOT_MULT_JOB_START, /* Get response to DBD_SEND_MULT_JOB_START */ 131 DBD_SEND_MULT_MSG, /* Send multiple message */ 132 DBD_GOT_MULT_MSG, /* Get response to DBD_SEND_MULT_MSG */ 133 DBD_MODIFY_JOB, /* Modify existing Job(s) */ 134 DBD_ADD_RES, /* Add new system resource to the mix */ 135 DBD_GET_RES, /* Get resource information */ 136 DBD_GOT_RES, /* Got resource information */ 137 DBD_REMOVE_RES, /* #1480, Remove existing resource */ 138 DBD_MODIFY_RES, /* Modify existing resource */ 139 DBD_ADD_CLUS_RES, /* Add cluster using a resource */ 140 DBD_REMOVE_CLUS_RES, /* Remove existing cluster resource */ 141 DBD_MODIFY_CLUS_RES, /* Modify existing cluster resource */ 142 DBD_ADD_TRES, /* Add tres to the database */ 143 DBD_GET_TRES, /* Get tres from the database */ 144 DBD_GOT_TRES, /* Got tres from the database */ 145 DBD_FIX_RUNAWAY_JOB, /* Fix any runaway jobs */ 146 DBD_GET_STATS, /* Get daemon statistics */ 147 DBD_GOT_STATS, /* #1490 ,Got daemon statistics data */ 148 DBD_CLEAR_STATS, /* Clear daemon statistics */ 149 DBD_SHUTDOWN, /* Shutdown daemon */ 150 DBD_ADD_FEDERATIONS, /* Add new federation to the mix */ 151 DBD_GET_FEDERATIONS, /* Get federation information */ 152 DBD_GOT_FEDERATIONS, /* Response to DBD_GET_FEDERATIONS */ 153 DBD_MODIFY_FEDERATIONS, /* Modify existing federation */ 154 DBD_REMOVE_FEDERATIONS, /* Removing existing federation */ 155 156 SLURM_PERSIST_INIT = 6500, /* So we don't use the 157 * REQUEST_PERSIST_INIT also used here. 158 */ 159 } slurmdbd_msg_type_t; 160 161 /*****************************************************************************\ 162 * Slurm DBD protocol data structures 163 \*****************************************************************************/ 164 165 typedef struct { 166 List acct_list; /* list of account names (char *'s) */ 167 slurmdb_user_cond_t *cond; 168 } dbd_acct_coord_msg_t; 169 170 typedef struct dbd_cluster_tres_msg { 171 char *cluster_nodes; /* nodes in cluster */ 172 time_t event_time; /* time of transition */ 173 char *tres_str; /* Simple comma separated list of TRES */ 174 } dbd_cluster_tres_msg_t; 175 176 typedef struct { 177 void *rec; /* this could be anything based on the type types 178 * are defined in slurm_accounting_storage.h 179 * *_rec_t */ 180 } dbd_rec_msg_t; 181 182 typedef struct { 183 void *cond; /* this could be anything based on the type types 184 * are defined in slurm_accounting_storage.h 185 * *_cond_t */ 186 } dbd_cond_msg_t; 187 188 typedef struct { 189 uint16_t archive_data; 190 time_t end; 191 time_t start; 192 } dbd_roll_usage_msg_t; 193 194 typedef struct { 195 time_t end; 196 void *rec; 197 time_t start; 198 } dbd_usage_msg_t; 199 200 typedef struct dbd_get_jobs_msg { 201 char *cluster_name; /* name of cluster to query */ 202 uint16_t completion; /* get job completion records instead 203 * of accounting record */ 204 uint32_t gid; /* group id */ 205 time_t last_update; /* time of latest info */ 206 List selected_steps; /* List of slurmdb_selected_step_t *'s */ 207 List selected_parts; /* List of char *'s */ 208 char *user; /* user name */ 209 } dbd_get_jobs_msg_t; 210 211 typedef struct dbd_init_msg { 212 char *cluster_name; /* cluster this message is coming from */ 213 uint16_t version; /* protocol version */ 214 uint32_t uid; /* UID originating connection, 215 * filled by authtentication plugin*/ 216 } dbd_init_msg_t; 217 218 typedef struct dbd_fini_msg { 219 uint16_t close_conn; /* to close connection 1, 0 will keep 220 connection open */ 221 uint16_t commit; /* to rollback(0) or commit(1) changes */ 222 } dbd_fini_msg_t; 223 224 typedef struct dbd_job_comp_msg { 225 char * admin_comment; /* job admin comment field */ 226 uint32_t assoc_id; /* accounting association id needed to 227 * find job record in db */ 228 char * comment; /* job comment field */ 229 uint64_t db_index; /* index into the db for this job */ 230 uint32_t derived_ec; /* derived job exit code or signal */ 231 time_t end_time; /* job termintation time */ 232 uint32_t exit_code; /* job exit code or signal */ 233 uint32_t job_id; /* job ID */ 234 uint32_t job_state; /* job state */ 235 char * nodes; /* hosts allocated to the job */ 236 uint32_t req_uid; /* requester user ID */ 237 time_t start_time; /* job start time */ 238 time_t submit_time; /* job submit time needed to find job 239 * record in db */ 240 char * system_comment;/* job system comment field */ 241 char *tres_alloc_str;/* Simple comma separated list of TRES */ 242 } dbd_job_comp_msg_t; 243 244 typedef struct dbd_job_start_msg { 245 char * account; /* Account name for those not running 246 * with associations */ 247 uint32_t alloc_nodes; /* how many nodes used in job */ 248 uint32_t array_job_id; /* job_id of a job array or 0 if N/A */ 249 uint32_t array_max_tasks;/* max number of tasks able to run at once */ 250 uint32_t array_task_id; /* task_id of a job array of NO_VAL 251 * if N/A */ 252 char * array_task_str;/* hex string of unstarted tasks */ 253 uint32_t array_task_pending;/* number of tasks still pending */ 254 uint32_t assoc_id; /* accounting association id */ 255 char * constraints; /* features/constraints requested by job */ 256 uint32_t db_flags; /* flags about job */ 257 uint64_t db_index; /* index into the db for this job */ 258 time_t eligible_time; /* time job becomes eligible to run */ 259 uint32_t gid; /* group ID */ 260 uint32_t het_job_id; /* ID of hetjob leader or 0 */ 261 uint32_t het_job_offset; /* Hetjob component ID, zero-origin */ 262 uint32_t job_id; /* job ID */ 263 uint32_t job_state; /* job state */ 264 char * mcs_label; /* job mcs_label */ 265 char * name; /* job name */ 266 char * nodes; /* hosts allocated to the job */ 267 char * node_inx; /* ranged bitmap string of hosts 268 * allocated to the job */ 269 char * partition; /* partition job is running on */ 270 uint32_t priority; /* job priority */ 271 uint32_t qos_id; /* qos job is running with */ 272 uint32_t req_cpus; /* count of req processors */ 273 uint64_t req_mem; /* requested minimum memory */ 274 uint32_t resv_id; /* reservation id */ 275 time_t start_time; /* job start time */ 276 uint32_t state_reason_prev; /* Last reason of blocking before job 277 * started */ 278 time_t submit_time; /* job submit time */ 279 uint32_t timelimit; /* job timelimit */ 280 uint32_t uid; /* user ID if associations are being used */ 281 char* gres_alloc; /* String depicting the allocated GRES by 282 * type for the entire job on all nodes. */ 283 char* gres_req; /* String depicting the requested GRES by 284 * type for the entire job on all nodes. */ 285 char* gres_used; /* String depicting the GRES actually used by 286 * type for the entire job on all nodes. */ 287 char *tres_alloc_str;/* Simple comma separated list of TRES */ 288 char *tres_req_str; /* Simple comma separated list of TRES */ 289 char * wckey; /* wckey name */ 290 char *work_dir; /* work dir of job */ 291 } dbd_job_start_msg_t; 292 293 /* returns a uint32_t along with a return code */ 294 typedef struct dbd_id_rc_msg { 295 uint32_t job_id; 296 uint64_t db_index; 297 uint32_t return_code; 298 } dbd_id_rc_msg_t; 299 300 typedef struct dbd_job_suspend_msg { 301 uint32_t assoc_id; /* accounting association id needed 302 * to find job record in db */ 303 uint64_t db_index; /* index into the db for this job */ 304 uint32_t job_id; /* job ID needed to find job record 305 * in db */ 306 uint32_t job_state; /* job state */ 307 time_t submit_time; /* job submit time needed to find job record 308 * in db */ 309 time_t suspend_time; /* job suspend or resume time */ 310 } dbd_job_suspend_msg_t; 311 312 typedef struct { 313 List my_list; /* this list could be of any type as long as it 314 * is handled correctly on both ends */ 315 uint32_t return_code; /* If there was an error and a list of 316 * them this is the type of error it 317 * was */ 318 } dbd_list_msg_t; 319 320 typedef struct { 321 void *cond; 322 void *rec; 323 } dbd_modify_msg_t; 324 325 #define DBD_NODE_STATE_DOWN 1 326 #define DBD_NODE_STATE_UP 2 327 typedef struct dbd_node_state_msg { 328 time_t event_time; /* time of transition */ 329 char *hostlist; /* name of hosts */ 330 uint16_t new_state; /* new state of host, see DBD_NODE_STATE_* */ 331 char *reason; /* explanation for the node's state */ 332 uint32_t reason_uid; /* User that set the reason, ignore if 333 * no reason is set. */ 334 uint32_t state; /* current state of node. Used to get 335 flags on the state (i.e. maintenance) */ 336 char *tres_str; /* Simple comma separated list of TRES */ 337 } dbd_node_state_msg_t; 338 339 typedef struct dbd_register_ctld_msg { 340 uint16_t dimensions; /* dimensions of system */ 341 uint32_t flags; /* flags for cluster */ 342 uint32_t plugin_id_select; /* the select plugin_id */ 343 uint16_t port; /* slurmctld's comm port */ 344 } dbd_register_ctld_msg_t; 345 346 typedef struct dbd_step_comp_msg { 347 uint32_t assoc_id; /* accounting association id */ 348 uint64_t db_index; /* index into the db for this job */ 349 time_t end_time; /* job termintation time */ 350 uint32_t exit_code; /* job exit code or signal */ 351 jobacctinfo_t *jobacct; /* status info */ 352 uint32_t job_id; /* job ID */ 353 time_t job_submit_time;/* job submit time needed to find job record 354 * in db */ 355 char *job_tres_alloc_str;/* Simple comma separated list of TRES for 356 * the job (primarily for the energy of the 357 * completing job. This is only filled in 358 * on the last step in the job. */ 359 uint32_t req_uid; /* requester user ID */ 360 time_t start_time; /* step start time */ 361 uint16_t state; /* current state of node. Used to get 362 flags on the state (i.e. maintenance) */ 363 uint32_t step_id; /* step ID */ 364 uint32_t total_tasks; /* count of tasks for step */ 365 } dbd_step_comp_msg_t; 366 367 typedef struct dbd_step_start_msg { 368 uint32_t assoc_id; /* accounting association id */ 369 uint64_t db_index; /* index into the db for this job */ 370 uint32_t job_id; /* job ID */ 371 char * name; /* step name */ 372 char * nodes; /* hosts allocated to the step */ 373 char * node_inx; /* bitmap index of hosts allocated to 374 * the step */ 375 uint32_t node_cnt; /* how many nodes used in step */ 376 time_t start_time; /* step start time */ 377 time_t job_submit_time;/* job submit time needed to find job record 378 * in db */ 379 uint32_t req_cpufreq_min; /* requested minimum CPU frequency */ 380 uint32_t req_cpufreq_max; /* requested maximum CPU frequency */ 381 uint32_t req_cpufreq_gov; /* requested CPU frequency governor */ 382 uint32_t step_id; /* step ID */ 383 uint32_t task_dist; /* layout method of step */ 384 uint32_t total_tasks; /* count of tasks for step */ 385 char *tres_alloc_str; /* Simple comma separated list of TRES */ 386 } dbd_step_start_msg_t; 387 388 /*****************************************************************************\ 389 * Slurm DBD message processing functions 390 \*****************************************************************************/ 391 392 extern slurmdbd_msg_type_t str_2_slurmdbd_msg_type(char *msg_type); 393 extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type, 394 int get_enum); 395 396 /*****************************************************************************\ 397 * Free various SlurmDBD message structures 398 \*****************************************************************************/ 399 extern void slurmdbd_free_buffer(void *x); 400 401 extern void slurmdbd_free_acct_coord_msg(dbd_acct_coord_msg_t *msg); 402 extern void slurmdbd_free_cluster_tres_msg(dbd_cluster_tres_msg_t *msg); 403 extern void slurmdbd_free_msg(persist_msg_t *msg); 404 extern void slurmdbd_free_rec_msg(dbd_rec_msg_t *msg, slurmdbd_msg_type_t type); 405 extern void slurmdbd_free_cond_msg(dbd_cond_msg_t *msg, 406 slurmdbd_msg_type_t type); 407 extern void slurmdbd_free_fini_msg(dbd_fini_msg_t *msg); 408 extern void slurmdbd_free_job_complete_msg(dbd_job_comp_msg_t *msg); 409 extern void slurmdbd_free_job_start_msg(void *in); 410 extern void slurmdbd_free_id_rc_msg(void *in); 411 extern void slurmdbd_free_job_suspend_msg(dbd_job_suspend_msg_t *msg); 412 extern void slurmdbd_free_list_msg(dbd_list_msg_t *msg); 413 extern void slurmdbd_free_modify_msg(dbd_modify_msg_t *msg, 414 slurmdbd_msg_type_t type); 415 extern void slurmdbd_free_node_state_msg(dbd_node_state_msg_t *msg); 416 extern void slurmdbd_free_register_ctld_msg(dbd_register_ctld_msg_t *msg); 417 extern void slurmdbd_free_roll_usage_msg(dbd_roll_usage_msg_t *msg); 418 extern void slurmdbd_free_step_complete_msg(dbd_step_comp_msg_t *msg); 419 extern void slurmdbd_free_step_start_msg(dbd_step_start_msg_t *msg); 420 extern void slurmdbd_free_usage_msg(dbd_usage_msg_t *msg, 421 slurmdbd_msg_type_t type); 422 423 #endif /* !_SLURMDBD_DEFS_H */ 424