1 /****************************************************************************\ 2 * slurm_protocol_defs.h - definitions used for RPCs 3 ***************************************************************************** 4 * Copyright (C) 2002-2007 The Regents of the University of California. 5 * Copyright (C) 2008-2010 Lawrence Livermore National Security. 6 * Portions Copyright (C) 2010-2014 SchedMD <https://www.schedmd.com>. 7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 8 * Written by Kevin Tew <tew1@llnl.gov>. 9 * CODE-OCEC-09-009. All rights reserved. 10 * 11 * This file is part of Slurm, a resource management program. 12 * For details, see <https://slurm.schedmd.com/>. 13 * Please also read the included file: DISCLAIMER. 14 * 15 * Slurm is free software; you can redistribute it and/or modify it under 16 * the terms of the GNU General Public License as published by the Free 17 * Software Foundation; either version 2 of the License, or (at your option) 18 * any later version. 19 * 20 * In addition, as a special exception, the copyright holders give permission 21 * to link the code of portions of this program with the OpenSSL library under 22 * certain conditions as described in each individual source file, and 23 * distribute linked combinations including the two. You must obey the GNU 24 * General Public License in all respects for all of the code used other than 25 * OpenSSL. If you modify file(s) with this exception, you may extend this 26 * exception to your version of the file(s), but you are not obligated to do 27 * so. If you do not wish to do so, delete this exception statement from your 28 * version. If you delete this exception statement from all source files in 29 * the program, then also delete it here. 30 * 31 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY 32 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 33 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 34 * details. 35 * 36 * You should have received a copy of the GNU General Public License along 37 * with Slurm; if not, write to the Free Software Foundation, Inc., 38 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 39 \*****************************************************************************/ 40 41 #ifndef _SLURM_PROTOCOL_DEFS_H 42 #define _SLURM_PROTOCOL_DEFS_H 43 44 #include <inttypes.h> 45 #include <sys/wait.h> 46 47 #ifdef HAVE_SYSCTLBYNAME 48 #if defined(__FreeBSD__) 49 #include <sys/types.h> 50 #else 51 #include <sys/param.h> 52 #endif 53 # include <sys/sysctl.h> 54 #endif 55 56 #include "slurm/slurm.h" 57 #include "slurm/slurmdb.h" 58 #include "src/common/bitstring.h" 59 #include "src/common/job_options.h" 60 #include "src/common/list.h" 61 #include "src/common/macros.h" 62 #include "src/common/slurm_cred.h" 63 #include "src/common/slurm_protocol_common.h" 64 #include "src/common/slurm_persist_conn.h" 65 #include "src/common/slurm_step_layout.h" 66 #include "src/common/slurmdb_defs.h" 67 #include "src/common/working_cluster.h" 68 #include "src/common/xassert.h" 69 70 #define MAX_SLURM_NAME 64 71 #define FORWARD_INIT 0xfffe 72 73 /* Defined job states */ 74 #define IS_JOB_PENDING(_X) \ 75 ((_X->job_state & JOB_STATE_BASE) == JOB_PENDING) 76 #define IS_JOB_RUNNING(_X) \ 77 ((_X->job_state & JOB_STATE_BASE) == JOB_RUNNING) 78 #define IS_JOB_SUSPENDED(_X) \ 79 ((_X->job_state & JOB_STATE_BASE) == JOB_SUSPENDED) 80 #define IS_JOB_COMPLETE(_X) \ 81 ((_X->job_state & JOB_STATE_BASE) == JOB_COMPLETE) 82 #define IS_JOB_CANCELLED(_X) \ 83 ((_X->job_state & JOB_STATE_BASE) == JOB_CANCELLED) 84 #define IS_JOB_FAILED(_X) \ 85 ((_X->job_state & JOB_STATE_BASE) == JOB_FAILED) 86 #define IS_JOB_TIMEOUT(_X) \ 87 ((_X->job_state & JOB_STATE_BASE) == JOB_TIMEOUT) 88 #define IS_JOB_NODE_FAILED(_X) \ 89 ((_X->job_state & JOB_STATE_BASE) == JOB_NODE_FAIL) 90 #define IS_JOB_DEADLINE(_X) \ 91 ((_X->job_state & JOB_STATE_BASE) == JOB_DEADLINE) 92 #define IS_JOB_OOM(_X) \ 93 ((_X->job_state & JOB_STATE_BASE) == JOB_OOM) 94 #define IS_JOB_POWER_UP_NODE(_X) \ 95 (_X->job_state & JOB_POWER_UP_NODE) 96 97 /* Derived job states */ 98 #define IS_JOB_COMPLETING(_X) \ 99 (_X->job_state & JOB_COMPLETING) 100 #define IS_JOB_CONFIGURING(_X) \ 101 (_X->job_state & JOB_CONFIGURING) 102 #define IS_JOB_STARTED(_X) \ 103 ((_X->job_state & JOB_STATE_BASE) > JOB_PENDING) 104 #define IS_JOB_FINISHED(_X) \ 105 ((_X->job_state & JOB_STATE_BASE) > JOB_SUSPENDED) 106 #define IS_JOB_COMPLETED(_X) \ 107 (IS_JOB_FINISHED(_X) && ((_X->job_state & JOB_COMPLETING) == 0)) 108 #define IS_JOB_RESIZING(_X) \ 109 (_X->job_state & JOB_RESIZING) 110 #define IS_JOB_REQUEUED(_X) \ 111 (_X->job_state & JOB_REQUEUE) 112 #define IS_JOB_FED_REQUEUED(_X) \ 113 (_X->job_state & JOB_REQUEUE_FED) 114 #define IS_JOB_UPDATE_DB(_X) \ 115 (_X->job_state & JOB_UPDATE_DB) 116 #define IS_JOB_REVOKED(_X) \ 117 (_X->job_state & JOB_REVOKED) 118 #define IS_JOB_SIGNALING(_X) \ 119 (_X->job_state & JOB_SIGNALING) 120 #define IS_JOB_STAGE_OUT(_X) \ 121 (_X->job_state & JOB_STAGE_OUT) 122 123 /* Defined node states */ 124 #define IS_NODE_UNKNOWN(_X) \ 125 ((_X->node_state & NODE_STATE_BASE) == NODE_STATE_UNKNOWN) 126 #define IS_NODE_DOWN(_X) \ 127 ((_X->node_state & NODE_STATE_BASE) == NODE_STATE_DOWN) 128 #define IS_NODE_IDLE(_X) \ 129 ((_X->node_state & NODE_STATE_BASE) == NODE_STATE_IDLE) 130 #define IS_NODE_ALLOCATED(_X) \ 131 ((_X->node_state & NODE_STATE_BASE) == NODE_STATE_ALLOCATED) 132 #define IS_NODE_MIXED(_X) \ 133 ((_X->node_state & NODE_STATE_BASE) == NODE_STATE_MIXED) 134 #define IS_NODE_FUTURE(_X) \ 135 ((_X->node_state & NODE_STATE_BASE) == NODE_STATE_FUTURE) 136 137 /* Derived node states */ 138 #define IS_NODE_CLOUD(_X) \ 139 (_X->node_state & NODE_STATE_CLOUD) 140 #define IS_NODE_DRAIN(_X) \ 141 (_X->node_state & NODE_STATE_DRAIN) 142 #define IS_NODE_DRAINING(_X) \ 143 ((_X->node_state & NODE_STATE_DRAIN) \ 144 && (IS_NODE_ALLOCATED(_X) || IS_NODE_MIXED(_X))) 145 #define IS_NODE_DRAINED(_X) \ 146 (IS_NODE_DRAIN(_X) && !IS_NODE_DRAINING(_X)) 147 #define IS_NODE_COMPLETING(_X) \ 148 (_X->node_state & NODE_STATE_COMPLETING) 149 #define IS_NODE_NO_RESPOND(_X) \ 150 (_X->node_state & NODE_STATE_NO_RESPOND) 151 #define IS_NODE_POWER_SAVE(_X) \ 152 (_X->node_state & NODE_STATE_POWER_SAVE) 153 #define IS_NODE_POWERING_DOWN(_X) \ 154 (_X->node_state & NODE_STATE_POWERING_DOWN) 155 #define IS_NODE_FAIL(_X) \ 156 (_X->node_state & NODE_STATE_FAIL) 157 #define IS_NODE_POWER_UP(_X) \ 158 (_X->node_state & NODE_STATE_POWER_UP) 159 #define IS_NODE_MAINT(_X) \ 160 (_X->node_state & NODE_STATE_MAINT) 161 #define IS_NODE_REBOOT(_X) \ 162 (_X->node_state & NODE_STATE_REBOOT) 163 #define IS_NODE_RUNNING_JOB(_X) \ 164 (_X->comp_job_cnt || _X->run_job_cnt || _X->sus_job_cnt) 165 166 #define THIS_FILE ((strrchr(__FILE__, '/') ?: __FILE__ - 1) + 1) 167 #define INFO_LINE(fmt, ...) \ 168 info("%s (%s:%d) "fmt, __func__, THIS_FILE, __LINE__, ##__VA_ARGS__); 169 170 #define YEAR_MINUTES (365 * 24 * 60) 171 #define YEAR_SECONDS (365 * 24 * 60 * 60) 172 173 /* Read as 'how many X are in a Y' */ 174 #define MSEC_IN_SEC 1000 175 #define USEC_IN_SEC 1000000 176 #define NSEC_IN_SEC 1000000000 177 #define NSEC_IN_USEC 1000 178 #define NSEC_IN_MSEC 1000000 179 180 #define SLURMD_REG_FLAG_STARTUP 0x0001 181 #define SLURMD_REG_FLAG_RESP 0x0002 182 183 /* These defines have to be here to avoid circular dependancy with 184 * switch.h 185 */ 186 #ifndef __switch_jobinfo_t_defined 187 # define __switch_jobinfo_t_defined 188 typedef struct switch_jobinfo switch_jobinfo_t; 189 #endif 190 #ifndef __switch_node_info_t_defined 191 # define __switch_node_info_t_defined 192 typedef struct switch_node_info switch_node_info_t; 193 #endif 194 195 /* 196 * Slurm Message types 197 * 198 * IMPORTANT: ADD NEW MESSAGE TYPES TO THE *END* OF ONE OF THESE NUMBERED 199 * SECTIONS. ADDING ONE ELSEWHERE WOULD SHIFT THE VALUES OF EXISTING MESSAGE 200 * TYPES IN CURRENT PROGRAMS AND PREVENT BACKWARD COMPATABILITY. 201 */ 202 typedef enum { 203 REQUEST_NODE_REGISTRATION_STATUS = 1001, 204 MESSAGE_NODE_REGISTRATION_STATUS, 205 REQUEST_RECONFIGURE, 206 REQUEST_RECONFIGURE_WITH_CONFIG, 207 REQUEST_SHUTDOWN, 208 REQUEST_SHUTDOWN_IMMEDIATE, 209 DEFUNCT_RPC_1007, 210 REQUEST_PING, 211 REQUEST_CONTROL, 212 REQUEST_SET_DEBUG_LEVEL, /* 1010 */ 213 REQUEST_HEALTH_CHECK, 214 REQUEST_TAKEOVER, 215 REQUEST_SET_SCHEDLOG_LEVEL, 216 REQUEST_SET_DEBUG_FLAGS, 217 REQUEST_REBOOT_NODES, 218 RESPONSE_PING_SLURMD, 219 REQUEST_ACCT_GATHER_UPDATE, 220 RESPONSE_ACCT_GATHER_UPDATE, 221 REQUEST_ACCT_GATHER_ENERGY, 222 RESPONSE_ACCT_GATHER_ENERGY, /* 1020 */ 223 REQUEST_LICENSE_INFO, 224 RESPONSE_LICENSE_INFO, 225 REQUEST_SET_FS_DAMPENING_FACTOR, 226 RESPONSE_NODE_REGISTRATION, 227 228 PERSIST_RC = 1433, /* To mirror the DBD_RC this is replacing */ 229 /* Don't make any messages in this range as this is what the DBD uses 230 * unless mirroring */ 231 DBD_MESSAGES_END = 2000, 232 233 REQUEST_BUILD_INFO = 2001, 234 RESPONSE_BUILD_INFO, 235 REQUEST_JOB_INFO, 236 RESPONSE_JOB_INFO, 237 REQUEST_JOB_STEP_INFO, 238 RESPONSE_JOB_STEP_INFO, 239 REQUEST_NODE_INFO, 240 RESPONSE_NODE_INFO, 241 REQUEST_PARTITION_INFO, 242 RESPONSE_PARTITION_INFO, /* 2010 */ 243 DEFUNCT_RPC_2011, 244 DEFUNCT_RPC_2012, 245 REQUEST_JOB_ID, 246 RESPONSE_JOB_ID, 247 REQUEST_CONFIG, 248 RESPONSE_CONFIG, 249 REQUEST_TRIGGER_SET, 250 REQUEST_TRIGGER_GET, 251 REQUEST_TRIGGER_CLEAR, 252 RESPONSE_TRIGGER_GET, /* 2020 */ 253 REQUEST_JOB_INFO_SINGLE, 254 REQUEST_SHARE_INFO, 255 RESPONSE_SHARE_INFO, 256 REQUEST_RESERVATION_INFO, 257 RESPONSE_RESERVATION_INFO, 258 REQUEST_PRIORITY_FACTORS, 259 RESPONSE_PRIORITY_FACTORS, 260 REQUEST_TOPO_INFO, 261 RESPONSE_TOPO_INFO, 262 REQUEST_TRIGGER_PULL, /* 2030 */ 263 REQUEST_FRONT_END_INFO, 264 RESPONSE_FRONT_END_INFO, 265 DEFUNCT_RPC_2033, 266 DEFUNCT_RPC_2034, 267 REQUEST_STATS_INFO, 268 RESPONSE_STATS_INFO, 269 REQUEST_BURST_BUFFER_INFO, 270 RESPONSE_BURST_BUFFER_INFO, 271 REQUEST_JOB_USER_INFO, 272 REQUEST_NODE_INFO_SINGLE, /* 2040 */ 273 REQUEST_POWERCAP_INFO, 274 RESPONSE_POWERCAP_INFO, 275 REQUEST_ASSOC_MGR_INFO, 276 RESPONSE_ASSOC_MGR_INFO, 277 REQUEST_EVENT_LOG, 278 DEFUNCT_RPC_2046, /* free for reuse */ 279 REQUEST_LAYOUT_INFO, 280 RESPONSE_LAYOUT_INFO, 281 REQUEST_FED_INFO, 282 RESPONSE_FED_INFO, /* 2050 */ 283 REQUEST_BATCH_SCRIPT, 284 RESPONSE_BATCH_SCRIPT, 285 REQUEST_CONTROL_STATUS, 286 RESPONSE_CONTROL_STATUS, 287 REQUEST_BURST_BUFFER_STATUS, 288 RESPONSE_BURST_BUFFER_STATUS, 289 290 REQUEST_UPDATE_JOB = 3001, 291 REQUEST_UPDATE_NODE, 292 REQUEST_CREATE_PARTITION, 293 REQUEST_DELETE_PARTITION, 294 REQUEST_UPDATE_PARTITION, 295 REQUEST_CREATE_RESERVATION, 296 RESPONSE_CREATE_RESERVATION, 297 REQUEST_DELETE_RESERVATION, 298 REQUEST_UPDATE_RESERVATION, 299 DEFUNCT_RPC_3010, /* free for reuse */ 300 REQUEST_UPDATE_FRONT_END, /* 3011 */ 301 REQUEST_UPDATE_LAYOUT, 302 REQUEST_UPDATE_POWERCAP, 303 304 REQUEST_RESOURCE_ALLOCATION = 4001, 305 RESPONSE_RESOURCE_ALLOCATION, 306 REQUEST_SUBMIT_BATCH_JOB, 307 RESPONSE_SUBMIT_BATCH_JOB, 308 REQUEST_BATCH_JOB_LAUNCH, 309 REQUEST_CANCEL_JOB, 310 DEFUNCT_RPC_4007, 311 DEFUNCT_RPC_4008, 312 DEFUNCT_RPC_4009, 313 DEFUNCT_RPC_4010, /* 4010 */ 314 DEFUNCT_RPC_4011, 315 REQUEST_JOB_WILL_RUN, 316 RESPONSE_JOB_WILL_RUN, 317 REQUEST_JOB_ALLOCATION_INFO, 318 RESPONSE_JOB_ALLOCATION_INFO, 319 DEFUNCT_RPC_4017, /* free for reuse */ 320 DEFUNCT_RPC_4018, /* free for reuse */ 321 REQUEST_UPDATE_JOB_TIME, 322 REQUEST_JOB_READY, 323 RESPONSE_JOB_READY, /* 4020 */ 324 REQUEST_JOB_END_TIME, 325 REQUEST_JOB_NOTIFY, 326 REQUEST_JOB_SBCAST_CRED, 327 RESPONSE_JOB_SBCAST_CRED, 328 REQUEST_HET_JOB_ALLOCATION, 329 RESPONSE_HET_JOB_ALLOCATION, 330 REQUEST_HET_JOB_ALLOC_INFO, 331 REQUEST_SUBMIT_BATCH_HET_JOB, 332 333 REQUEST_CTLD_MULT_MSG = 4500, 334 RESPONSE_CTLD_MULT_MSG, 335 REQUEST_SIB_MSG, 336 REQUEST_SIB_JOB_LOCK, 337 REQUEST_SIB_JOB_UNLOCK, 338 REQUEST_SEND_DEP, 339 REQUEST_UPDATE_ORIGIN_DEP, 340 341 REQUEST_JOB_STEP_CREATE = 5001, 342 RESPONSE_JOB_STEP_CREATE, 343 DEFUNCT_RPC_5003, 344 DEFUNCT_RPC_5004, 345 REQUEST_CANCEL_JOB_STEP, 346 DEFUNCT_RPC_5006, 347 REQUEST_UPDATE_JOB_STEP, 348 DEFUNCT_RPC_5008, 349 DEFUNCT_RPC_5009, 350 DEFUNCT_RPC_5010, /* 5010 */ 351 DEFUNCT_RPC_5011, 352 DEFUNCT_RPC_5012, 353 DEFUNCT_RPC_5013, 354 REQUEST_SUSPEND, 355 DEFUNCT_RPC_5015, 356 REQUEST_STEP_COMPLETE, 357 REQUEST_COMPLETE_JOB_ALLOCATION, 358 REQUEST_COMPLETE_BATCH_SCRIPT, 359 REQUEST_JOB_STEP_STAT, 360 RESPONSE_JOB_STEP_STAT, /* 5020 */ 361 REQUEST_STEP_LAYOUT, 362 RESPONSE_STEP_LAYOUT, 363 REQUEST_JOB_REQUEUE, 364 REQUEST_DAEMON_STATUS, 365 RESPONSE_SLURMD_STATUS, 366 DEFUNCT_RPC_5026, 367 REQUEST_JOB_STEP_PIDS, 368 RESPONSE_JOB_STEP_PIDS, 369 REQUEST_FORWARD_DATA, 370 DEFUNCT_RPC_5030, /* 5030 */ 371 REQUEST_SUSPEND_INT, 372 REQUEST_KILL_JOB, /* 5032 */ 373 DEFUNCT_RPC_5033, 374 RESPONSE_JOB_ARRAY_ERRORS, 375 REQUEST_NETWORK_CALLERID, 376 RESPONSE_NETWORK_CALLERID, 377 REQUEST_STEP_COMPLETE_AGGR, 378 REQUEST_TOP_JOB, /* 5038 */ 379 REQUEST_AUTH_TOKEN, 380 RESPONSE_AUTH_TOKEN, 381 382 REQUEST_LAUNCH_TASKS = 6001, 383 RESPONSE_LAUNCH_TASKS, 384 MESSAGE_TASK_EXIT, 385 REQUEST_SIGNAL_TASKS, 386 DEFUNCT_RPC_6005, 387 REQUEST_TERMINATE_TASKS, 388 REQUEST_REATTACH_TASKS, 389 RESPONSE_REATTACH_TASKS, 390 REQUEST_KILL_TIMELIMIT, 391 DEFUNCT_RPC_6010, /* free for reuse */ 392 REQUEST_TERMINATE_JOB, /* 6011 */ 393 MESSAGE_EPILOG_COMPLETE, 394 REQUEST_ABORT_JOB, /* job shouldn't be running, kill it without 395 * job/step/task complete responses */ 396 REQUEST_FILE_BCAST, 397 TASK_USER_MANAGED_IO_STREAM, 398 REQUEST_KILL_PREEMPTED, 399 400 REQUEST_LAUNCH_PROLOG, 401 REQUEST_COMPLETE_PROLOG, 402 RESPONSE_PROLOG_EXECUTING, /* 6019 */ 403 404 REQUEST_PERSIST_INIT = 6500, 405 406 SRUN_PING = 7001, 407 SRUN_TIMEOUT, 408 SRUN_NODE_FAIL, 409 SRUN_JOB_COMPLETE, 410 SRUN_USER_MSG, 411 SRUN_EXEC, 412 SRUN_STEP_MISSING, 413 SRUN_REQUEST_SUSPEND, 414 SRUN_STEP_SIGNAL, /* for launch plugins aprun and poe, 415 * srun forwards signal to the launch command */ 416 SRUN_NET_FORWARD, 417 418 PMI_KVS_PUT_REQ = 7201, 419 DEFUNCT_RPC_7202, 420 PMI_KVS_GET_REQ, 421 PMI_KVS_GET_RESP, 422 423 RESPONSE_SLURM_RC = 8001, 424 RESPONSE_SLURM_RC_MSG, 425 RESPONSE_SLURM_REROUTE_MSG, 426 427 RESPONSE_FORWARD_FAILED = 9001, 428 429 ACCOUNTING_UPDATE_MSG = 10001, 430 ACCOUNTING_FIRST_REG, 431 ACCOUNTING_REGISTER_CTLD, 432 ACCOUNTING_TRES_CHANGE_DB, 433 ACCOUNTING_NODES_CHANGE_DB, 434 435 MESSAGE_COMPOSITE = 11001, 436 RESPONSE_MESSAGE_COMPOSITE, 437 } slurm_msg_type_t; 438 439 /*****************************************************************************\ 440 * core api configuration struct 441 \*****************************************************************************/ 442 typedef struct forward { 443 uint16_t cnt; /* number of nodes to forward to */ 444 uint16_t init; /* tell me it has been set (FORWARD_INIT) */ 445 char *nodelist; /* ranged string of who to forward the 446 * message to */ 447 uint32_t timeout; /* original timeout increments */ 448 uint16_t tree_width; /* what the treewidth should be */ 449 } forward_t; 450 451 /*core api protocol message structures */ 452 typedef struct slurm_protocol_header { 453 uint16_t version; 454 uint16_t flags; 455 uint16_t msg_index; 456 uint16_t msg_type; /* really slurm_msg_type_t but needs to be 457 uint16_t for packing purposes. */ 458 uint32_t body_length; 459 uint16_t ret_cnt; 460 forward_t forward; 461 slurm_addr_t orig_addr; 462 List ret_list; 463 } header_t; 464 465 typedef struct forward_struct { 466 char *buf; 467 int buf_len; 468 uint16_t fwd_cnt; 469 pthread_mutex_t forward_mutex; 470 pthread_cond_t notify; 471 List ret_list; 472 uint32_t timeout; 473 } forward_struct_t; 474 475 typedef struct forward_message { 476 forward_struct_t *fwd_struct; 477 header_t header; 478 int timeout; 479 } forward_msg_t; 480 481 typedef struct slurm_protocol_config { 482 uint32_t control_cnt; 483 slurm_addr_t *controller_addr; 484 bool vip_addr_set; 485 slurm_addr_t vip_addr; 486 } slurm_protocol_config_t; 487 488 typedef struct slurm_msg { 489 slurm_addr_t address; 490 void *auth_cred; 491 int auth_index; /* DON'T PACK: zero for normal communication. 492 * index value copied from incoming connection, 493 * so that we'll respond with the same auth 494 * plugin used to connect to us originally. 495 */ 496 uint32_t body_offset; /* DON'T PACK: offset in buffer where body part of 497 buffer starts. */ 498 Buf buffer; /* DON't PACK! ptr to buffer that msg was unpacked from. */ 499 slurm_persist_conn_t *conn; /* DON'T PACK OR FREE! this is here to 500 * distinguish a persistent connection from 501 * a normal connection it should be filled 502 * in with the connection before sending the 503 * message so that it is handled correctly. 504 */ 505 int conn_fd; /* Only used when the message isn't on a persistent 506 * connection. */ 507 void *data; 508 uint32_t data_size; 509 uint16_t flags; 510 uint16_t msg_index; 511 uint16_t msg_type; /* really a slurm_msg_type_t but needs to be 512 * this way for packing purposes. message type */ 513 uint16_t protocol_version; /* DON'T PACK! Only used if 514 * message coming from non-default 515 * slurm protocol. Initted to 516 * NO_VAL meaning use the default. */ 517 /* The following were all added for the forward.c code */ 518 forward_t forward; 519 forward_struct_t *forward_struct; 520 slurm_addr_t orig_addr; 521 List ret_list; 522 } slurm_msg_t; 523 524 typedef struct ret_data_info { 525 uint16_t type; /* really a slurm_msg_type_t but needs to be 526 * this way for packing purposes. message type */ 527 uint32_t err; 528 char *node_name; 529 void *data; /* used to hold the return message data (i.e. 530 return_code_msg_t */ 531 } ret_data_info_t; 532 533 /*****************************************************************************\ 534 * Slurm Protocol Data Structures 535 \*****************************************************************************/ 536 struct kvs_hosts { 537 uint32_t task_id; /* job step's task id */ 538 uint16_t port; /* communication port */ 539 char * hostname; /* communication host */ 540 }; 541 struct kvs_comm { 542 char * kvs_name; 543 uint32_t kvs_cnt; /* count of key-pairs */ 544 char ** kvs_keys; 545 char ** kvs_values; 546 uint16_t * kvs_key_sent; 547 }; 548 typedef struct kvs_comm_set { 549 550 uint16_t host_cnt; /* hosts getting this message */ 551 struct kvs_hosts *kvs_host_ptr; /* host forwarding info */ 552 uint16_t kvs_comm_recs; /* count of kvs_comm entries */ 553 struct kvs_comm **kvs_comm_ptr; /* pointers to kvs_comm entries */ 554 } kvs_comm_set_t; 555 556 typedef struct assoc_shares_object { 557 uint32_t assoc_id; /* association ID */ 558 559 char *cluster; /* cluster name */ 560 char *name; /* name */ 561 char *parent; /* parent name */ 562 char *partition; /* partition */ 563 564 double shares_norm; /* normalized shares */ 565 uint32_t shares_raw; /* number of shares allocated */ 566 567 uint64_t *tres_run_secs; /* currently running tres-secs 568 * = grp_used_tres_run_secs */ 569 uint64_t *tres_grp_mins; /* tres-minute limit */ 570 571 double usage_efctv; /* effective, normalized usage */ 572 double usage_norm; /* normalized usage */ 573 uint64_t usage_raw; /* measure of TRESBillableUnits usage */ 574 long double *usage_tres_raw; /* measure of each TRES usage */ 575 double fs_factor; /* fairshare factor */ 576 double level_fs; /* fairshare factor at this level. stored on an 577 * assoc as a long double, but that is not 578 * needed for display in sshare */ 579 uint16_t user; /* 1 if user association 0 if account 580 * association */ 581 } assoc_shares_object_t; 582 583 typedef struct shares_request_msg { 584 List acct_list; 585 List user_list; 586 } shares_request_msg_t; 587 588 typedef struct shares_response_msg { 589 List assoc_shares_list; /* list of assoc_shares_object_t *'s */ 590 uint64_t tot_shares; 591 uint32_t tres_cnt; 592 char **tres_names; 593 } shares_response_msg_t; 594 595 typedef struct priority_factors_request_msg { 596 List job_id_list; 597 char *partitions; 598 List uid_list; 599 } priority_factors_request_msg_t; 600 601 typedef struct job_notify_msg { 602 uint32_t job_id; 603 uint32_t job_step_id; /* currently not used */ 604 char * message; 605 } job_notify_msg_t; 606 607 typedef struct job_id_msg { 608 uint32_t job_id; 609 uint16_t show_flags; 610 } job_id_msg_t; 611 612 typedef struct job_user_id_msg { 613 uint32_t user_id; 614 uint16_t show_flags; 615 } job_user_id_msg_t; 616 617 typedef struct job_step_id_msg { 618 uint32_t job_id; 619 uint32_t step_id; 620 } job_step_id_msg_t; 621 622 typedef struct job_info_request_msg { 623 time_t last_update; 624 uint16_t show_flags; 625 List job_ids; /* Optional list of job_ids, otherwise show all 626 * jobs. */ 627 } job_info_request_msg_t; 628 629 typedef struct job_step_info_request_msg { 630 time_t last_update; 631 uint32_t job_id; 632 uint32_t step_id; 633 uint16_t show_flags; 634 } job_step_info_request_msg_t; 635 636 typedef struct node_info_request_msg { 637 time_t last_update; 638 uint16_t show_flags; 639 } node_info_request_msg_t; 640 641 typedef struct node_info_single_msg { 642 char *node_name; 643 uint16_t show_flags; 644 } node_info_single_msg_t; 645 646 typedef struct front_end_info_request_msg { 647 time_t last_update; 648 } front_end_info_request_msg_t; 649 650 typedef struct part_info_request_msg { 651 time_t last_update; 652 uint16_t show_flags; 653 } part_info_request_msg_t; 654 655 typedef struct resv_info_request_msg { 656 time_t last_update; 657 } resv_info_request_msg_t; 658 659 #define LAYOUTS_DUMP_NOLAYOUT 0x00000001 660 #define LAYOUTS_DUMP_STATE 0x10000000 661 typedef struct layout_info_request_msg { 662 char* layout_type; 663 char* entities; 664 char* type; 665 uint32_t flags; 666 } layout_info_request_msg_t; 667 668 typedef struct complete_job_allocation { 669 uint32_t job_id; 670 uint32_t job_rc; 671 } complete_job_allocation_msg_t; 672 673 typedef struct complete_batch_script { 674 jobacctinfo_t *jobacct; 675 uint32_t job_id; 676 uint32_t job_rc; 677 uint32_t slurm_rc; 678 char *node_name; 679 uint32_t user_id; /* user the job runs as */ 680 } complete_batch_script_msg_t; 681 682 typedef struct complete_prolog { 683 uint32_t job_id; 684 uint32_t prolog_rc; 685 } complete_prolog_msg_t; 686 687 typedef struct step_complete_msg { 688 uint32_t job_id; 689 uint32_t job_step_id; 690 uint32_t range_first; /* First node rank within job step's alloc */ 691 uint32_t range_last; /* Last node rank within job step's alloc */ 692 uint32_t step_rc; /* largest task return code */ 693 jobacctinfo_t *jobacct; 694 } step_complete_msg_t; 695 696 typedef struct signal_tasks_msg { 697 uint16_t flags; 698 uint32_t job_id; 699 uint32_t job_step_id; 700 uint16_t signal; 701 } signal_tasks_msg_t; 702 703 typedef struct epilog_complete_msg { 704 uint32_t job_id; 705 uint32_t return_code; 706 char *node_name; 707 } epilog_complete_msg_t; 708 709 #define REBOOT_FLAGS_ASAP 0x0001 /* Drain to reboot ASAP */ 710 typedef struct reboot_msg { 711 char *features; 712 uint16_t flags; 713 uint32_t next_state; /* state after reboot */ 714 char *node_list; 715 char *reason; 716 } reboot_msg_t; 717 718 typedef struct shutdown_msg { 719 uint16_t options; 720 } shutdown_msg_t; 721 722 typedef struct last_update_msg { 723 time_t last_update; 724 } last_update_msg_t; 725 726 typedef struct set_debug_flags_msg { 727 uint64_t debug_flags_minus; 728 uint64_t debug_flags_plus; 729 } set_debug_flags_msg_t; 730 731 typedef struct set_debug_level_msg { 732 uint32_t debug_level; 733 } set_debug_level_msg_t; 734 735 typedef struct job_step_specs { 736 uint32_t cpu_count; /* count of required processors */ 737 uint32_t cpu_freq_gov; /* cpu frequency governor */ 738 uint32_t cpu_freq_max; /* Maximum cpu frequency */ 739 uint32_t cpu_freq_min; /* Minimum cpu frequency */ 740 char *cpus_per_tres; /* semicolon delimited list of TRES=# values */ 741 uint16_t exclusive; /* 1 if CPUs not shared with other steps */ 742 char *features; /* required node features, default NONE */ 743 char *host; /* host to contact initiating srun */ 744 uint16_t immediate; /* 1 if allocate to run or fail immediately, 745 * 0 if to be queued awaiting resources */ 746 uint32_t job_id; /* job ID */ 747 uint64_t pn_min_memory; /* minimum real memory per node OR 748 * real memory per CPU | MEM_PER_CPU, 749 * default=0 (use job limit) */ 750 char *name; /* name of the job step, default "" */ 751 char *network; /* network use spec */ 752 uint32_t min_nodes; /* minimum number of nodes required by job, 753 * default=0 */ 754 uint32_t max_nodes; /* maximum number of nodes usable by job, 755 * default=0 */ 756 char *mem_per_tres; /* semicolon delimited list of TRES=# values */ 757 uint8_t no_kill; /* 1 if no kill on node failure */ 758 char *node_list; /* list of required nodes */ 759 uint32_t num_tasks; /* number of tasks required */ 760 uint8_t overcommit; /* flag, 1 to allow overcommit of processors, 761 0 to disallow overcommit. default is 0 */ 762 uint16_t plane_size; /* plane size when task_dist = 763 SLURM_DIST_PLANE */ 764 uint16_t port; /* port to contact initiating srun */ 765 uint16_t relative; /* first node to use of job's allocation */ 766 uint16_t resv_port_cnt; /* reserve ports for MPI if set */ 767 uint32_t step_id; /* Desired step ID or NO_VAL */ 768 uint32_t srun_pid; /* PID of srun command, also see host */ 769 uint32_t task_dist; /* see enum task_dist_state in slurm.h */ 770 uint32_t time_limit; /* maximum run time in minutes, default is 771 * partition limit */ 772 char *tres_bind; /* Task to TRES binding directives */ 773 char *tres_freq; /* TRES frequency directives */ 774 char *tres_per_step; /* semicolon delimited list of TRES=# values */ 775 char *tres_per_node; /* semicolon delimited list of TRES=# values */ 776 char *tres_per_socket; /* semicolon delimited list of TRES=# values */ 777 char *tres_per_task; /* semicolon delimited list of TRES=# values */ 778 uint32_t user_id; /* user the job runs as */ 779 } job_step_create_request_msg_t; 780 781 typedef struct job_step_create_response_msg { 782 uint32_t def_cpu_bind_type; /* Default CPU bind type */ 783 uint32_t job_step_id; /* assigned job step id */ 784 char *resv_ports; /* reserved ports */ 785 slurm_step_layout_t *step_layout; /* information about how the 786 * step is laid out */ 787 slurm_cred_t *cred; /* slurm job credential */ 788 dynamic_plugin_data_t *select_jobinfo; /* select opaque data type */ 789 dynamic_plugin_data_t *switch_job; /* switch opaque data type */ 790 uint16_t use_protocol_ver; /* Lowest protocol version running on 791 * the slurmd's in this step. 792 */ 793 } job_step_create_response_msg_t; 794 795 #define LAUNCH_PARALLEL_DEBUG 0x00000001 796 #define LAUNCH_MULTI_PROG 0x00000002 797 #define LAUNCH_PTY 0x00000004 798 #define LAUNCH_BUFFERED_IO 0x00000008 799 #define LAUNCH_LABEL_IO 0x00000010 800 #define LAUNCH_USER_MANAGED_IO 0x00000020 801 #define LAUNCH_NO_ALLOC 0x00000040 802 803 typedef struct launch_tasks_request_msg { 804 uint32_t job_id; 805 uint32_t job_step_id; 806 uint32_t het_job_node_offset; /* Hetjob node offset or NO_VAL */ 807 uint32_t het_job_id; /* Hetjob ID or NO_VAL */ 808 uint32_t het_job_nnodes; /* total node count for entire hetjob */ 809 uint32_t het_job_ntasks; /* total task count for entire hetjob */ 810 uint16_t *het_job_task_cnts; /* Tasks count on each node in hetjob */ 811 uint32_t **het_job_tids; /* Task IDs on each node of hetjob */ 812 uint32_t *het_job_tid_offsets; /* map of tasks (by id) to originating 813 * hetjob */ 814 uint32_t het_job_offset; /* Hetjob offset or NO_VAL */ 815 uint32_t het_job_step_cnt; /* number of steps for entire hetjob */ 816 uint32_t het_job_task_offset; /* Hetjob task ID offset or NO_VAL */ 817 char *het_job_node_list; /* Hetjob step node list */ 818 uint32_t nnodes; /* number of nodes in this job step */ 819 uint32_t ntasks; /* number of tasks in this job step */ 820 uint16_t ntasks_per_board;/* number of tasks to invoke on each board */ 821 uint16_t ntasks_per_core; /* number of tasks to invoke on each core */ 822 uint16_t ntasks_per_socket;/* number of tasks to invoke on 823 * each socket */ 824 uint32_t uid; 825 char *user_name; 826 uint32_t gid; 827 uint32_t ngids; 828 uint32_t *gids; 829 uint64_t job_mem_lim; /* MB of memory reserved by job per node OR 830 * real memory per CPU | MEM_PER_CPU, 831 * default=0 (no limit) */ 832 uint64_t step_mem_lim; /* MB of memory reserved by step */ 833 uint16_t *tasks_to_launch; 834 uint32_t envc; 835 uint32_t argc; 836 uint16_t node_cpus; 837 uint16_t cpus_per_task; 838 char **env; 839 char **argv; 840 char *cwd; 841 uint16_t cpu_bind_type; /* --cpu-bind= */ 842 char *cpu_bind; /* binding map for map/mask_cpu */ 843 uint16_t mem_bind_type; /* --mem-bind= */ 844 char *mem_bind; /* binding map for tasks to memory */ 845 uint16_t accel_bind_type; /* --accel-bind= */ 846 char *tres_bind; /* task binding to TRES (e.g. GPUs) */ 847 char *tres_freq; /* frequency/power for TRES (e.g. GPUs) */ 848 uint16_t num_resp_port; 849 uint16_t *resp_port; /* array of available response ports */ 850 851 /* Distribution at the lowest level of logical processor (lllp) */ 852 uint32_t task_dist; /* --distribution=, -m dist */ 853 uint32_t flags; /* See LAUNCH_* flags defined above */ 854 uint32_t **global_task_ids; 855 slurm_addr_t orig_addr; /* where message really came from for io */ 856 uint8_t open_mode; /* stdout/err append or truncate */ 857 char *acctg_freq; /* accounting polling intervals */ 858 uint32_t cpu_freq_min; /* Minimum cpu frequency */ 859 uint32_t cpu_freq_max; /* Maximum cpu frequency */ 860 uint32_t cpu_freq_gov; /* cpu frequency governor */ 861 uint16_t job_core_spec; /* Count of specialized cores */ 862 863 /********** START "normal" IO only options **********/ 864 /* These options are ignored if user_managed_io is 1 */ 865 char *ofname; /* stdout filename pattern */ 866 char *efname; /* stderr filename pattern */ 867 char *ifname; /* stdin filename pattern */ 868 uint16_t num_io_port; 869 uint16_t *io_port; /* array of available client IO listen ports */ 870 /********** END "normal" IO only options **********/ 871 872 uint32_t profile; 873 char *task_prolog; 874 char *task_epilog; 875 876 uint16_t slurmd_debug; /* remote slurmd debug level */ 877 878 slurm_cred_t *cred; /* job credential */ 879 dynamic_plugin_data_t *switch_job; /* switch credential for the job */ 880 job_options_t options; /* Arbitrary job options */ 881 char *complete_nodelist; 882 char **spank_job_env; 883 uint32_t spank_job_env_size; 884 dynamic_plugin_data_t *select_jobinfo; /* select context, opaque data */ 885 char *alias_list; /* node name/address/hostname aliases */ 886 char *partition; /* partition that job is running in */ 887 888 /* only filled out if step is SLURM_EXTERN_CONT */ 889 uint16_t x11; /* X11 forwarding setup flags */ 890 char *x11_alloc_host; /* host to proxy through */ 891 uint16_t x11_alloc_port; /* port to proxy through */ 892 char *x11_magic_cookie; /* X11 auth cookie to abuse */ 893 char *x11_target; /* X11 target host, or unix socket */ 894 uint16_t x11_target_port; /* X11 target port */ 895 } launch_tasks_request_msg_t; 896 897 typedef struct task_user_managed_io_msg { 898 uint32_t task_id; 899 } task_user_managed_io_msg_t; 900 901 typedef struct partition_info partition_desc_msg_t; 902 903 typedef struct return_code_msg { 904 uint32_t return_code; 905 } return_code_msg_t; 906 typedef struct return_code2_msg { 907 uint32_t return_code; 908 char *err_msg; 909 } return_code2_msg_t; 910 911 typedef struct { 912 slurmdb_cluster_rec_t *working_cluster_rec; 913 } reroute_msg_t; 914 915 /* defined in slurm.h 916 typedef struct network_callerid_msg { 917 unsigned char ip_src[16]; 918 unsigned char ip_dst[16]; 919 uint32_t port_src; 920 uint32_t port_dst; 921 int32_t af; // NOTE: un/packed as uint32_t 922 } network_callerid_msg_t; */ 923 924 typedef struct network_callerid_resp { 925 uint32_t job_id; 926 uint32_t return_code; 927 char *node_name; 928 } network_callerid_resp_t; 929 930 typedef struct composite_msg { 931 slurm_addr_t sender; /* address of sending node/port */ 932 List msg_list; 933 } composite_msg_t; 934 935 typedef struct set_fs_dampening_factor_msg { 936 uint16_t dampening_factor; 937 } set_fs_dampening_factor_msg_t; 938 939 typedef struct control_status_msg { 940 uint16_t backup_inx; /* Our BackupController# index, 941 * between 0 and (MAX_CONTROLLERS-1) */ 942 time_t control_time; /* Time we became primary slurmctld (or 0) */ 943 } control_status_msg_t; 944 945 /* 946 * Note: We include the node list here for reliable cleanup on XCPU systems. 947 * 948 * Note: We include select_jobinfo here in addition to the job launch 949 * RPC in order to ensure reliable clean-up of a BlueGene partition in 950 * the event of some launch failure or race condition preventing slurmd 951 * from getting the MPIRUN_PARTITION at that time. It is needed for 952 * the job epilog. 953 */ 954 955 #define SIG_OOM 253 /* Dummy signal value for out of memory 956 * (OOM) notification. Exist status reported as 957 * 0:125 (0x80 is the signal flag and 958 * 253 - 128 = 125) */ 959 #define SIG_TERM_KILL 991 /* Send SIGCONT + SIGTERM + SIGKILL */ 960 #define SIG_UME 992 /* Dummy signal value for uncorrectable memory 961 * error (UME) notification */ 962 #define SIG_REQUEUED 993 /* Dummy signal value to job requeue */ 963 #define SIG_PREEMPTED 994 /* Dummy signal value for job preemption */ 964 #define SIG_DEBUG_WAKE 995 /* Dummy signal value to wake procs stopped 965 * for debugger */ 966 #define SIG_TIME_LIMIT 996 /* Dummy signal value for time limit reached */ 967 #define SIG_ABORT 997 /* Dummy signal value to abort a job */ 968 #define SIG_NODE_FAIL 998 /* Dummy signal value to signify node failure */ 969 #define SIG_FAILURE 999 /* Dummy signal value to signify sys failure */ 970 typedef struct kill_job_msg { 971 uint32_t het_job_id; 972 List job_gres_info; /* Used to set Epilog environment variables */ 973 uint32_t job_id; 974 uint32_t job_state; 975 uint32_t job_uid; 976 uint32_t job_gid; 977 char *nodes; 978 dynamic_plugin_data_t *select_jobinfo; /* opaque data type */ 979 char **spank_job_env; 980 uint32_t spank_job_env_size; 981 time_t start_time; /* time of job start, track job requeue */ 982 uint32_t step_id; 983 time_t time; /* slurmctld's time of request */ 984 } kill_job_msg_t; 985 986 typedef struct job_time_msg { 987 uint32_t job_id; 988 time_t expiration_time; 989 } job_time_msg_t; 990 991 typedef struct reattach_tasks_request_msg { 992 uint32_t job_id; 993 uint32_t job_step_id; 994 uint16_t num_resp_port; 995 uint16_t *resp_port; /* array of available response ports */ 996 uint16_t num_io_port; 997 uint16_t *io_port; /* array of available client IO ports */ 998 slurm_cred_t *cred; /* used only a weak authentication mechanism 999 for the slurmstepd to use when connecting 1000 back to the client */ 1001 } reattach_tasks_request_msg_t; 1002 1003 typedef struct reattach_tasks_response_msg { 1004 char *node_name; 1005 uint32_t return_code; 1006 uint32_t ntasks; /* number of tasks on this node */ 1007 uint32_t *gtids; /* Global task id assignments */ 1008 uint32_t *local_pids; /* list of process ids on this node */ 1009 char **executable_names; /* array of length "ntasks" */ 1010 } reattach_tasks_response_msg_t; 1011 1012 typedef struct prolog_launch_msg { 1013 char *alias_list; /* node name/address/hostname aliases */ 1014 slurm_cred_t *cred; 1015 uint32_t gid; 1016 uint32_t het_job_id; /* HetJob id or NO_VAL */ 1017 List job_gres_info; /* Used to set Prolog env vars */ 1018 uint32_t job_id; /* slurm job_id */ 1019 uint64_t job_mem_limit; /* job's memory limit, passed via cred */ 1020 uint32_t nnodes; /* count of nodes, passed via cred */ 1021 char *nodes; /* list of nodes allocated to job_step */ 1022 char *partition; /* partition the job is running in */ 1023 dynamic_plugin_data_t *select_jobinfo; /* opaque data type */ 1024 char **spank_job_env; /* SPANK job environment variables */ 1025 uint32_t spank_job_env_size; /* size of spank_job_env */ 1026 char *std_err; /* pathname of stderr */ 1027 char *std_out; /* pathname of stdout */ 1028 uint32_t uid; 1029 char *user_name; /* job's user name */ 1030 char *work_dir; /* full pathname of working directory */ 1031 uint16_t x11; /* X11 forwarding setup flags */ 1032 char *x11_alloc_host; /* srun/salloc host to setup proxy */ 1033 uint16_t x11_alloc_port; /* srun/salloc port to setup proxy */ 1034 char *x11_magic_cookie; /* X11 auth cookie to abuse */ 1035 char *x11_target; /* X11 target host, or unix socket */ 1036 uint16_t x11_target_port; /* X11 target port */ 1037 } prolog_launch_msg_t; 1038 1039 typedef struct batch_job_launch_msg { 1040 char *account; /* account under which the job is running */ 1041 char *acctg_freq; /* accounting polling intervals */ 1042 char *alias_list; /* node name/address/hostname aliases */ 1043 uint32_t array_job_id; /* job array master job ID */ 1044 uint32_t array_task_id; /* job array ID or NO_VAL */ 1045 uint32_t cpu_freq_min; /* Minimum cpu frequency */ 1046 uint32_t cpu_freq_max; /* Maximum cpu frequency */ 1047 uint32_t cpu_freq_gov; /* cpu frequency governor */ 1048 uint32_t het_job_id; 1049 uint32_t job_id; 1050 uint32_t step_id; 1051 uint32_t uid; 1052 uint32_t gid; 1053 char *user_name; 1054 uint32_t ngids; 1055 uint32_t *gids; 1056 uint32_t ntasks; /* number of tasks in this job */ 1057 uint32_t num_cpu_groups;/* elements in below cpu arrays */ 1058 uint16_t cpu_bind_type; /* This currently does not do anything 1059 * but here in case we wanted to bind 1060 * the batch step differently than 1061 * using all the cpus in the 1062 * allocation. */ 1063 char *cpu_bind; /* This currently does not do anything 1064 * but here in case we wanted to bind 1065 * the batch step differently than 1066 * using all the cpus in the 1067 * allocation. */ 1068 uint16_t *cpus_per_node;/* cpus per node */ 1069 uint32_t *cpu_count_reps;/* how many nodes have same cpu count */ 1070 uint16_t cpus_per_task; /* number of CPUs requested per task */ 1071 uint16_t job_core_spec; /* Count of specialized cores */ 1072 char *nodes; /* list of nodes allocated to job_step */ 1073 uint32_t profile; /* what to profile for the batch step */ 1074 char *script; /* the actual job script, default NONE */ 1075 Buf script_buf; /* the job script as a mmap buf */ 1076 char *std_err; /* pathname of stderr */ 1077 char *std_in; /* pathname of stdin */ 1078 char *qos; /* qos the job is running under */ 1079 char *std_out; /* pathname of stdout */ 1080 char *work_dir; /* full pathname of working directory */ 1081 1082 uint32_t argc; 1083 char **argv; 1084 uint32_t envc; /* element count in environment */ 1085 char **environment; /* environment variables to set for job, 1086 * name=value pairs, one per line */ 1087 dynamic_plugin_data_t *select_jobinfo; /* opaque data type */ 1088 slurm_cred_t *cred; 1089 uint8_t open_mode; /* stdout/err append or truncate */ 1090 uint8_t overcommit; /* if resources being over subscribed */ 1091 char *partition; /* partition used to run job */ 1092 uint64_t pn_min_memory; /* minimum real memory per node OR 1093 * real memory per CPU | MEM_PER_CPU, 1094 * default=0 (no limit) */ 1095 uint64_t job_mem; /* memory limit for job */ 1096 uint16_t restart_cnt; /* batch job restart count */ 1097 char *resv_name; /* job's reservation */ 1098 char **spank_job_env; /* SPANK job environment variables */ 1099 uint32_t spank_job_env_size; /* size of spank_job_env */ 1100 char *tres_bind; /* task binding to TRES (e.g. GPUs), 1101 * included for possible future use */ 1102 char *tres_freq; /* frequency/power for TRES (e.g. GPUs) */ 1103 } batch_job_launch_msg_t; 1104 1105 typedef struct job_id_request_msg { 1106 uint32_t job_pid; /* local process_id of a job */ 1107 } job_id_request_msg_t; 1108 1109 typedef struct job_id_response_msg { 1110 uint32_t job_id; /* slurm job_id */ 1111 uint32_t return_code; /* slurm return code */ 1112 } job_id_response_msg_t; 1113 1114 typedef enum { 1115 CONFIG_REQUEST_SLURM_CONF = 0, 1116 CONFIG_REQUEST_SLURMD, 1117 } config_request_flags_t; 1118 1119 typedef struct { 1120 uint32_t flags; /* see config_request_flags_t */ 1121 } config_request_msg_t; 1122 1123 typedef struct { 1124 char *config; 1125 char *acct_gather_config; 1126 char *cgroup_config; 1127 char *cgroup_allowed_devices_file_config; 1128 char *ext_sensors_config; 1129 char *gres_config; 1130 char *knl_cray_config; 1131 char *knl_generic_config; 1132 char *plugstack_config; 1133 char *topology_config; 1134 char *xtra_config; /* in case we forgot one ;) */ 1135 1136 char *slurmd_spooldir; 1137 } config_response_msg_t; 1138 1139 typedef struct srun_exec_msg { 1140 uint32_t job_id; /* slurm job_id */ 1141 uint32_t step_id; /* step_id or NO_VAL */ 1142 uint32_t argc; /* argument count */ 1143 char ** argv; /* program arguments */ 1144 } srun_exec_msg_t; 1145 1146 typedef struct kvs_get_msg { 1147 uint32_t task_id; /* job step's task id */ 1148 uint32_t size; /* count of tasks in job */ 1149 uint16_t port; /* port to be sent the kvs data */ 1150 char * hostname; /* hostname to be sent the kvs data */ 1151 } kvs_get_msg_t; 1152 1153 enum compress_type { 1154 COMPRESS_OFF = 0x0, /* no compression */ 1155 COMPRESS_ZLIB, /* zlib (aka gzip) compression */ 1156 COMPRESS_LZ4 /* lz4 compression */ 1157 }; 1158 1159 typedef struct file_bcast_msg { 1160 char *fname; /* name of the destination file */ 1161 uint32_t block_no; /* block number of this data */ 1162 uint16_t last_block; /* last block of bcast if set (flag) */ 1163 uint16_t force; /* replace existing file if set (flag) */ 1164 uint16_t compress; /* compress file if set, use compress_type */ 1165 uint16_t modes; /* access rights for destination file */ 1166 uint32_t uid; /* owner for destination file */ 1167 char *user_name; 1168 uint32_t gid; /* group for destination file */ 1169 time_t atime; /* last access time for destination file */ 1170 time_t mtime; /* last modification time for dest file */ 1171 sbcast_cred_t *cred; /* credential for the RPC */ 1172 uint32_t block_len; /* length of this data block */ 1173 uint64_t block_offset; /* offset for this data block */ 1174 uint32_t uncomp_len; /* uncompressed length of this data block */ 1175 char *block; /* data for this block */ 1176 uint64_t file_size; /* file size */ 1177 } file_bcast_msg_t; 1178 1179 typedef struct multi_core_data { 1180 uint16_t boards_per_node; /* boards per node required by job */ 1181 uint16_t sockets_per_board; /* sockets per board required by job */ 1182 uint16_t sockets_per_node; /* sockets per node required by job */ 1183 uint16_t cores_per_socket; /* cores per cpu required by job */ 1184 uint16_t threads_per_core; /* threads per core required by job */ 1185 1186 uint16_t ntasks_per_board; /* number of tasks to invoke on each board */ 1187 uint16_t ntasks_per_socket; /* number of tasks to invoke on each socket */ 1188 uint16_t ntasks_per_core; /* number of tasks to invoke on each core */ 1189 uint16_t plane_size; /* plane size when task_dist = SLURM_DIST_PLANE */ 1190 } multi_core_data_t; 1191 1192 typedef struct pty_winsz { 1193 uint16_t cols; 1194 uint16_t rows; 1195 } pty_winsz_t; 1196 1197 typedef struct forward_data_msg { 1198 char *address; 1199 uint32_t len; 1200 char *data; 1201 } forward_data_msg_t; 1202 1203 /* suspend_msg_t variant for internal slurm daemon communications */ 1204 typedef struct suspend_int_msg { 1205 uint8_t indf_susp; /* non-zero if being suspended indefinitely */ 1206 uint16_t job_core_spec; /* Count of specialized cores */ 1207 uint32_t job_id; /* slurm job_id */ 1208 uint16_t op; /* suspend operation, see enum suspend_opts */ 1209 void * switch_info; /* opaque data for switch plugin */ 1210 } suspend_int_msg_t; 1211 1212 typedef struct ping_slurmd_resp_msg { 1213 uint32_t cpu_load; /* CPU load * 100 */ 1214 uint64_t free_mem; /* Free memory in MiB */ 1215 } ping_slurmd_resp_msg_t; 1216 1217 typedef struct license_info_request_msg { 1218 time_t last_update; 1219 uint16_t show_flags; 1220 } license_info_request_msg_t; 1221 1222 typedef struct bb_status_req_msg { 1223 uint32_t argc; 1224 char **argv; 1225 } bb_status_req_msg_t; 1226 1227 typedef struct bb_status_resp_msg { 1228 char *status_resp; 1229 } bb_status_resp_msg_t; 1230 1231 /*****************************************************************************\ 1232 * Slurm API Message Types 1233 \*****************************************************************************/ 1234 typedef struct slurm_node_registration_status_msg { 1235 char *arch; 1236 uint16_t cores; 1237 uint16_t cpus; 1238 uint32_t cpu_load; /* CPU load * 100 */ 1239 uint16_t flags; /* Flags from the slurmd SLURMD_REG_FLAG_* */ 1240 uint64_t free_mem; /* Free memory in MiB */ 1241 char *cpu_spec_list; /* list of specialized CPUs */ 1242 acct_gather_energy_t *energy; 1243 char *features_active; /* Currently active node features */ 1244 char *features_avail; /* Available node features */ 1245 Buf gres_info; /* generic resource info */ 1246 uint32_t hash_val; /* hash value of slurm.conf and included files 1247 * existing on node */ 1248 uint32_t job_count; /* number of associate job_id's */ 1249 uint32_t *job_id; /* IDs of running job (if any) */ 1250 char *node_name; 1251 uint16_t boards; 1252 char *os; 1253 uint64_t real_memory; 1254 time_t slurmd_start_time; 1255 uint32_t status; /* node status code, same as return codes */ 1256 uint32_t *step_id; /* IDs of running job steps (if any) */ 1257 uint16_t sockets; 1258 switch_node_info_t *switch_nodeinfo; /* set only if startup != 0 */ 1259 uint16_t threads; 1260 time_t timestamp; 1261 uint32_t tmp_disk; 1262 uint32_t up_time; /* seconds since reboot */ 1263 char *version; 1264 } slurm_node_registration_status_msg_t; 1265 1266 typedef struct slurm_node_reg_resp_msg { 1267 List tres_list; 1268 } slurm_node_reg_resp_msg_t; 1269 1270 typedef struct requeue_msg { 1271 uint32_t job_id; /* slurm job ID (number) */ 1272 char * job_id_str; /* slurm job ID (string) */ 1273 uint32_t flags; /* JobExitRequeue | Hold | JobFailed | etc. */ 1274 } requeue_msg_t; 1275 1276 typedef struct slurm_event_log_msg { 1277 uint16_t level; /* Message level, from log.h */ 1278 char * string; /* String for slurmctld to log */ 1279 } slurm_event_log_msg_t; 1280 1281 typedef struct { 1282 uint32_t cluster_id; /* cluster id of cluster making request */ 1283 void *data; /* Unpacked buffer 1284 * Only populated on the receiving side. */ 1285 Buf data_buffer; /* Buffer that holds an unpacked data type. 1286 * Only populated on the sending side. */ 1287 uint32_t data_offset; /* DON'T PACK: offset where body part of buffer 1288 * starts -- the part that gets sent. */ 1289 uint16_t data_type; /* date type to unpack */ 1290 uint16_t data_version; /* Version that data is packed with */ 1291 uint64_t fed_siblings; /* sibling bitmap of job */ 1292 uint32_t job_id; /* job_id of job - set in job_desc on receiving 1293 * side */ 1294 uint32_t job_state; /* state of job */ 1295 uint32_t return_code; /* return code of job */ 1296 time_t start_time; /* time sibling job started */ 1297 char *resp_host; /* response host for interactive allocations */ 1298 uint32_t req_uid; /* uid of user making the request. e.g if a 1299 cancel is happening from a user and being 1300 passed to a remote then the uid will be the 1301 user and not the SlurmUser. */ 1302 uint16_t sib_msg_type; /* fed_job_update_type */ 1303 char *submit_host; /* node job was submitted from */ 1304 } sib_msg_t; 1305 1306 typedef struct { 1307 uint32_t array_job_id; 1308 uint32_t array_task_id; 1309 char *dependency; 1310 bool is_array; 1311 uint32_t job_id; 1312 char *job_name; 1313 uint32_t user_id; 1314 } dep_msg_t; 1315 1316 typedef struct { 1317 List depend_list; 1318 uint32_t job_id; 1319 } dep_update_origin_msg_t; 1320 1321 typedef struct { 1322 List my_list; /* this list could be of any type as long as it 1323 * is handled correctly on both ends */ 1324 } ctld_list_msg_t; 1325 1326 /*****************************************************************************\ 1327 * ACCOUNTING PUSHS 1328 \*****************************************************************************/ 1329 1330 typedef struct { 1331 List update_list; /* of type slurmdb_update_object_t *'s */ 1332 uint16_t rpc_version; 1333 } accounting_update_msg_t; 1334 1335 typedef struct { 1336 uint32_t job_id; /* ID of job of request */ 1337 } spank_env_request_msg_t; 1338 1339 typedef struct { 1340 uint32_t spank_job_env_size; 1341 char **spank_job_env; /* spank environment */ 1342 } spank_env_responce_msg_t; 1343 1344 typedef struct slurm_ctl_conf slurm_ctl_conf_info_msg_t; 1345 /*****************************************************************************\ 1346 * SLURM MESSAGE INITIALIZATION 1347 \*****************************************************************************/ 1348 1349 /* 1350 * slurm_msg_t_init - initialize a slurm message 1351 * OUT msg - pointer to the slurm_msg_t structure which will be initialized 1352 */ 1353 extern void slurm_msg_t_init (slurm_msg_t *msg); 1354 1355 /* 1356 * slurm_msg_t_copy - initialize a slurm_msg_t structure "dest" with 1357 * values from the "src" slurm_msg_t structure. 1358 * IN src - Pointer to the initialized message from which "dest" will 1359 * be initialized. 1360 * OUT dest - Pointer to the slurm_msg_t which will be initialized. 1361 * NOTE: the "dest" structure will contain pointers into the contents of "src". 1362 */ 1363 extern void slurm_msg_t_copy(slurm_msg_t *dest, slurm_msg_t *src); 1364 1365 /* here to add \\ to all \" in a string this needs to be xfreed later */ 1366 extern char *slurm_add_slash_to_quotes(char *str); 1367 extern List slurm_copy_char_list(List char_list); 1368 extern int slurm_addto_char_list(List char_list, char *names); 1369 extern int slurm_addto_char_list_with_case(List char_list, char *names, 1370 bool lower_case_normalization); 1371 extern int slurm_addto_id_char_list(List char_list, char *names, bool gid); 1372 extern int slurm_addto_mode_char_list(List char_list, char *names, int mode); 1373 extern int slurm_addto_step_list(List step_list, char *names); 1374 extern int slurm_char_list_copy(List dst, List src); 1375 extern char *slurm_char_list_to_xstr(List char_list); 1376 extern int slurm_find_char_in_list(void *x, void *key); 1377 extern int slurm_sort_char_list_asc(void *, void *); 1378 extern int slurm_sort_char_list_desc(void *, void *); 1379 1380 /* free message functions */ 1381 extern void slurm_free_dep_msg(dep_msg_t *msg); 1382 extern void slurm_free_dep_update_origin_msg(dep_update_origin_msg_t *msg); 1383 extern void slurm_free_last_update_msg(last_update_msg_t * msg); 1384 extern void slurm_free_return_code_msg(return_code_msg_t * msg); 1385 extern void slurm_free_reroute_msg(reroute_msg_t *msg); 1386 extern void slurm_free_job_alloc_info_msg(job_alloc_info_msg_t * msg); 1387 extern void slurm_free_job_info_request_msg(job_info_request_msg_t *msg); 1388 extern void slurm_free_job_step_info_request_msg( 1389 job_step_info_request_msg_t *msg); 1390 extern void slurm_free_front_end_info_request_msg( 1391 front_end_info_request_msg_t *msg); 1392 extern void slurm_free_node_info_request_msg(node_info_request_msg_t *msg); 1393 extern void slurm_free_node_info_single_msg(node_info_single_msg_t *msg); 1394 extern void slurm_free_part_info_request_msg(part_info_request_msg_t *msg); 1395 extern void slurm_free_sib_msg(sib_msg_t *msg); 1396 extern void slurm_free_stats_info_request_msg(stats_info_request_msg_t *msg); 1397 extern void slurm_free_stats_response_msg(stats_info_response_msg_t *msg); 1398 extern void slurm_free_step_alloc_info_msg(step_alloc_info_msg_t * msg); 1399 extern void slurm_free_resv_info_request_msg(resv_info_request_msg_t *msg); 1400 extern void slurm_free_set_debug_flags_msg(set_debug_flags_msg_t *msg); 1401 extern void slurm_free_set_debug_level_msg(set_debug_level_msg_t *msg); 1402 extern void slurm_destroy_assoc_shares_object(void *object); 1403 extern void slurm_free_shares_request_msg(shares_request_msg_t *msg); 1404 extern void slurm_free_shares_response_msg(shares_response_msg_t *msg); 1405 extern void slurm_destroy_priority_factors_object(void *object); 1406 extern void slurm_copy_priority_factors_object(priority_factors_object_t *dest, 1407 priority_factors_object_t *src); 1408 extern void slurm_free_priority_factors_request_msg( 1409 priority_factors_request_msg_t *msg); 1410 extern void slurm_free_forward_data_msg(forward_data_msg_t *msg); 1411 extern void slurm_free_comp_msg_list(void *x); 1412 extern void slurm_free_composite_msg(composite_msg_t *msg); 1413 extern void slurm_free_ping_slurmd_resp(ping_slurmd_resp_msg_t *msg); 1414 1415 #define slurm_free_timelimit_msg(msg) \ 1416 slurm_free_kill_job_msg(msg) 1417 1418 extern void slurm_init_reboot_msg(reboot_msg_t * msg, bool clear); 1419 extern void slurm_free_reboot_msg(reboot_msg_t * msg); 1420 1421 extern void slurm_free_shutdown_msg(shutdown_msg_t * msg); 1422 1423 extern void slurm_free_job_desc_msg(job_desc_msg_t * msg); 1424 extern void slurm_free_event_log_msg(slurm_event_log_msg_t * msg); 1425 1426 extern void 1427 slurm_free_node_registration_status_msg(slurm_node_registration_status_msg_t * 1428 msg); 1429 extern void slurm_free_node_reg_resp_msg( 1430 slurm_node_reg_resp_msg_t *msg); 1431 1432 extern void slurm_free_job_info(job_info_t * job); 1433 extern void slurm_free_job_info_members(job_info_t * job); 1434 1435 extern void slurm_free_batch_script_msg(char *msg); 1436 extern void slurm_free_job_id_msg(job_id_msg_t * msg); 1437 extern void slurm_free_job_user_id_msg(job_user_id_msg_t * msg); 1438 extern void slurm_free_job_id_request_msg(job_id_request_msg_t * msg); 1439 extern void slurm_free_job_id_response_msg(job_id_response_msg_t * msg); 1440 extern void slurm_free_config_request_msg(config_request_msg_t *msg); 1441 extern void slurm_free_config_response_msg(config_response_msg_t *msg); 1442 1443 extern void slurm_free_job_step_id_msg(job_step_id_msg_t *msg); 1444 1445 extern void slurm_free_job_launch_msg(batch_job_launch_msg_t * msg); 1446 1447 extern void slurm_free_update_front_end_msg(update_front_end_msg_t * msg); 1448 extern void slurm_free_update_node_msg(update_node_msg_t * msg); 1449 extern void slurm_free_update_layout_msg(update_layout_msg_t * msg); 1450 extern void slurm_free_update_part_msg(update_part_msg_t * msg); 1451 extern void slurm_free_delete_part_msg(delete_part_msg_t * msg); 1452 extern void slurm_free_resv_desc_msg(resv_desc_msg_t * msg); 1453 extern void slurm_free_resv_name_msg(reservation_name_msg_t * msg); 1454 extern void slurm_free_resv_info_request_msg(resv_info_request_msg_t * msg); 1455 extern void slurm_free_job_step_create_request_msg( 1456 job_step_create_request_msg_t * msg); 1457 extern void slurm_free_job_step_create_response_msg( 1458 job_step_create_response_msg_t *msg); 1459 extern void slurm_free_complete_job_allocation_msg( 1460 complete_job_allocation_msg_t * msg); 1461 extern void slurm_free_prolog_launch_msg(prolog_launch_msg_t * msg); 1462 extern void slurm_free_complete_batch_script_msg( 1463 complete_batch_script_msg_t * msg); 1464 extern void slurm_free_complete_prolog_msg( 1465 complete_prolog_msg_t * msg); 1466 extern void slurm_free_launch_tasks_request_msg( 1467 launch_tasks_request_msg_t * msg); 1468 extern void slurm_free_launch_tasks_response_msg( 1469 launch_tasks_response_msg_t * msg); 1470 extern void slurm_free_task_user_managed_io_stream_msg( 1471 task_user_managed_io_msg_t *msg); 1472 extern void slurm_free_task_exit_msg(task_exit_msg_t * msg); 1473 extern void slurm_free_signal_tasks_msg(signal_tasks_msg_t * msg); 1474 extern void slurm_free_reattach_tasks_request_msg( 1475 reattach_tasks_request_msg_t * msg); 1476 extern void slurm_free_reattach_tasks_response_msg( 1477 reattach_tasks_response_msg_t * msg); 1478 extern void slurm_free_kill_job_msg(kill_job_msg_t * msg); 1479 extern void slurm_free_update_job_time_msg(job_time_msg_t * msg); 1480 extern void slurm_free_job_step_kill_msg(job_step_kill_msg_t * msg); 1481 extern void slurm_free_epilog_complete_msg(epilog_complete_msg_t * msg); 1482 extern void slurm_free_srun_job_complete_msg(srun_job_complete_msg_t * msg); 1483 extern void slurm_free_srun_exec_msg(srun_exec_msg_t *msg); 1484 extern void slurm_free_srun_ping_msg(srun_ping_msg_t * msg); 1485 extern void slurm_free_net_forward_msg(net_forward_msg_t *msg); 1486 extern void slurm_free_srun_node_fail_msg(srun_node_fail_msg_t * msg); 1487 extern void slurm_free_srun_step_missing_msg(srun_step_missing_msg_t * msg); 1488 extern void slurm_free_srun_timeout_msg(srun_timeout_msg_t * msg); 1489 extern void slurm_free_srun_user_msg(srun_user_msg_t * msg); 1490 extern void slurm_free_suspend_msg(suspend_msg_t *msg); 1491 extern void slurm_free_suspend_int_msg(suspend_int_msg_t *msg); 1492 extern void slurm_free_top_job_msg(top_job_msg_t *msg); 1493 extern void slurm_free_token_request_msg(token_request_msg_t *msg); 1494 extern void slurm_free_token_response_msg(token_response_msg_t *msg); 1495 extern void slurm_free_update_step_msg(step_update_request_msg_t * msg); 1496 extern void slurm_free_resource_allocation_response_msg_members ( 1497 resource_allocation_response_msg_t * msg); 1498 extern void slurm_free_resource_allocation_response_msg ( 1499 resource_allocation_response_msg_t * msg); 1500 extern void slurm_free_job_step_create_response_msg( 1501 job_step_create_response_msg_t * msg); 1502 extern void slurm_free_submit_response_response_msg( 1503 submit_response_msg_t * msg); 1504 extern void slurm_free_ctl_conf(slurm_ctl_conf_info_msg_t * config_ptr); 1505 extern void slurm_free_job_info_msg(job_info_msg_t * job_buffer_ptr); 1506 extern void slurm_free_job_step_info_response_msg( 1507 job_step_info_response_msg_t * msg); 1508 extern void slurm_free_job_step_info_members (job_step_info_t * msg); 1509 extern void slurm_free_front_end_info_msg (front_end_info_msg_t * msg); 1510 extern void slurm_free_front_end_info_members(front_end_info_t * front_end); 1511 extern void slurm_free_node_info_msg(node_info_msg_t * msg); 1512 extern void slurm_init_node_info_t(node_info_t * msg, bool clear); 1513 extern void slurm_free_node_info_members(node_info_t * node); 1514 extern void slurm_free_partition_info_msg(partition_info_msg_t * msg); 1515 extern void slurm_free_partition_info_members(partition_info_t * part); 1516 extern void slurm_free_layout_info_msg(layout_info_msg_t * msg); 1517 extern void slurm_free_layout_info_request_msg(layout_info_request_msg_t * msg); 1518 extern void slurm_free_reservation_info_msg(reserve_info_msg_t * msg); 1519 extern void slurm_free_get_kvs_msg(kvs_get_msg_t *msg); 1520 extern void slurm_free_kvs_comm_set(kvs_comm_set_t *msg); 1521 extern void slurm_free_will_run_response_msg(will_run_response_msg_t *msg); 1522 extern void slurm_free_reserve_info_members(reserve_info_t * resv); 1523 extern void slurm_free_topo_info_msg(topo_info_response_msg_t *msg); 1524 extern void slurm_free_file_bcast_msg(file_bcast_msg_t *msg); 1525 extern void slurm_free_step_complete_msg(step_complete_msg_t *msg); 1526 extern void slurm_free_job_step_stat(void *object); 1527 extern void slurm_free_job_step_pids(void *object); 1528 extern void slurm_free_acct_gather_node_resp_msg( 1529 acct_gather_node_resp_msg_t *msg); 1530 extern void slurm_free_acct_gather_energy_req_msg( 1531 acct_gather_energy_req_msg_t *msg); 1532 extern void slurm_free_job_notify_msg(job_notify_msg_t * msg); 1533 extern void slurm_free_ctld_multi_msg(ctld_list_msg_t *msg); 1534 1535 extern void slurm_free_accounting_update_msg(accounting_update_msg_t *msg); 1536 extern void slurm_free_spank_env_request_msg(spank_env_request_msg_t *msg); 1537 extern void slurm_free_spank_env_responce_msg(spank_env_responce_msg_t *msg); 1538 extern void slurm_free_requeue_msg(requeue_msg_t *); 1539 extern int slurm_free_msg_data(slurm_msg_type_t type, void *data); 1540 extern void slurm_free_license_info_request_msg(license_info_request_msg_t *msg); 1541 extern uint32_t slurm_get_return_code(slurm_msg_type_t type, void *data); 1542 extern void slurm_free_network_callerid_msg(network_callerid_msg_t *mesg); 1543 extern void slurm_free_network_callerid_resp(network_callerid_resp_t *resp); 1544 extern void slurm_free_set_fs_dampening_factor_msg( 1545 set_fs_dampening_factor_msg_t *msg); 1546 extern void slurm_free_control_status_msg(control_status_msg_t *msg); 1547 1548 extern void slurm_free_bb_status_req_msg(bb_status_req_msg_t *msg); 1549 extern void slurm_free_bb_status_resp_msg(bb_status_resp_msg_t *msg); 1550 1551 extern const char *preempt_mode_string(uint16_t preempt_mode); 1552 extern uint16_t preempt_mode_num(const char *preempt_mode); 1553 1554 extern char *log_num2string(uint16_t inx); 1555 extern uint16_t log_string2num(const char *name); 1556 1557 /* Translate a burst buffer numeric value to its equivalent state string */ 1558 extern char *bb_state_string(uint16_t state); 1559 /* Translate a burst buffer state string to its equivalent numeric value */ 1560 extern uint16_t bb_state_num(char *tok); 1561 1562 /* Convert HealthCheckNodeState numeric value to a string. 1563 * Caller must xfree() the return value */ 1564 extern char *health_check_node_state_str(uint32_t node_state); 1565 1566 extern char *job_reason_string(enum job_state_reason inx); 1567 extern enum job_state_reason job_reason_num(char *reason); 1568 extern bool job_state_qos_grp_limit(enum job_state_reason state_reason); 1569 extern char *job_share_string(uint16_t shared); 1570 extern char *job_state_string(uint32_t inx); 1571 extern char *job_state_string_compact(uint32_t inx); 1572 /* Caller must xfree() the return value */ 1573 extern char *job_state_string_complete(uint32_t state); 1574 extern uint32_t job_state_num(const char *state_name); 1575 extern char *node_state_string(uint32_t inx); 1576 extern char *node_state_string_compact(uint32_t inx); 1577 1578 extern uint16_t power_flags_id(const char *power_flags); 1579 extern char *power_flags_str(uint16_t power_flags); 1580 1581 extern void private_data_string(uint16_t private_data, char *str, int str_len); 1582 extern void accounting_enforce_string(uint16_t enforce, 1583 char *str, int str_len); 1584 1585 /* Translate a Slurm nodelist to a char * of numbers 1586 * nid000[36-37] -> 36-37 1587 * IN - hl_in - if NULL will be made from nodelist 1588 * IN - nodelist - generate hl from list if hl is NULL 1589 * RET - nid list, needs to be xfreed. 1590 */ 1591 extern char *cray_nodelist2nids(hostlist_t hl_in, char *nodelist); 1592 1593 /* Validate SPANK specified job environment does not contain any invalid 1594 * names. Log failures using info() */ 1595 extern bool valid_spank_job_env(char **spank_job_env, 1596 uint32_t spank_job_env_size, uid_t uid); 1597 1598 extern char *trigger_res_type(uint16_t res_type); 1599 extern char *trigger_type(uint32_t trig_type); 1600 1601 /* user needs to xfree return value */ 1602 extern char *priority_flags_string(uint16_t priority_flags); 1603 1604 /* user needs to xfree return value */ 1605 extern char *reservation_flags_string(reserve_info_t * resv_ptr); 1606 1607 /* Functions to convert burst buffer flags between strings and numbers */ 1608 extern char * slurm_bb_flags2str(uint32_t bb_flags); 1609 extern uint32_t slurm_bb_str2flags(char *bb_str); 1610 1611 /* Function to convert enforce type flags between strings and numbers */ 1612 extern int parse_part_enforce_type(char *enforce_part_type, uint16_t *param); 1613 extern char * parse_part_enforce_type_2str (uint16_t type); 1614 1615 /* Return true if this cluster_name is in a federation */ 1616 extern bool cluster_in_federation(void *ptr, char *cluster_name); 1617 1618 /* Find where cluster_name nodes start in the node_array */ 1619 extern int get_cluster_node_offset(char *cluster_name, 1620 node_info_msg_t *node_info_ptr); 1621 1622 /* 1623 * Print the char* given. 1624 * 1625 * Each \n will result in a new line. 1626 * If inx is != -1 it is prepended to the string. 1627 */ 1628 extern void print_multi_line_string(char *user_msg, int inx, 1629 log_level_t loglevel); 1630 1631 /* Given a protocol opcode return its string 1632 * description mapping the slurm_msg_type_t 1633 * to its name. 1634 */ 1635 extern char *rpc_num2string(uint16_t opcode); 1636 1637 /* 1638 * Given a numeric suffix, return the equivalent multiplier for the numeric 1639 * portion. For example: "k" returns 1024, "KB" returns 1000, etc. 1640 * The return value for an invalid suffix is NO_VAL64. 1641 */ 1642 extern uint64_t suffix_mult(char *suffix); 1643 1644 #define safe_read(fd, buf, size) do { \ 1645 int remaining = size; \ 1646 char *ptr = (char *) buf; \ 1647 int rc; \ 1648 while (remaining > 0) { \ 1649 rc = read(fd, ptr, remaining); \ 1650 if ((rc == 0) && (remaining == size)) { \ 1651 debug("%s:%d: %s: safe_read EOF", \ 1652 __FILE__, __LINE__, __func__); \ 1653 goto rwfail; \ 1654 } else if (rc == 0) { \ 1655 debug("%s:%d: %s: safe_read (%d of %d) EOF", \ 1656 __FILE__, __LINE__, __func__, \ 1657 remaining, (int)size); \ 1658 goto rwfail; \ 1659 } else if (rc < 0) { \ 1660 if ((errno == EAGAIN) || \ 1661 (errno == EINTR) || \ 1662 (errno == EWOULDBLOCK)) \ 1663 continue; \ 1664 debug("%s:%d: %s: safe_read (%d of %d) failed: %m", \ 1665 __FILE__, __LINE__, __func__, \ 1666 remaining, (int)size); \ 1667 goto rwfail; \ 1668 } else { \ 1669 ptr += rc; \ 1670 remaining -= rc; \ 1671 if (remaining > 0) \ 1672 debug3("%s:%d: %s: safe_read (%d of %d) partial read", \ 1673 __FILE__, __LINE__, __func__, \ 1674 remaining, (int)size); \ 1675 } \ 1676 } \ 1677 } while (0) 1678 1679 #define safe_write(fd, buf, size) do { \ 1680 int remaining = size; \ 1681 char *ptr = (char *) buf; \ 1682 int rc; \ 1683 while(remaining > 0) { \ 1684 rc = write(fd, ptr, remaining); \ 1685 if (rc < 0) { \ 1686 if ((errno == EAGAIN) || (errno == EINTR))\ 1687 continue; \ 1688 debug("%s:%d: %s: safe_write (%d of %d) failed: %m", \ 1689 __FILE__, __LINE__, __func__, \ 1690 remaining, (int)size); \ 1691 goto rwfail; \ 1692 } else { \ 1693 ptr += rc; \ 1694 remaining -= rc; \ 1695 if (remaining > 0) \ 1696 debug3("%s:%d: %s: safe_write (%d of %d) partial write", \ 1697 __FILE__, __LINE__, __func__, \ 1698 remaining, (int)size); \ 1699 } \ 1700 } \ 1701 } while (0) 1702 1703 #endif 1704