1 /* 2 * Copyright (C) by Argonne National Laboratory 3 * See COPYRIGHT in top-level directory 4 */ 5 6 #ifndef HYDRA_H_INCLUDED 7 #define HYDRA_H_INCLUDED 8 9 /* hydra_config.h must come first, otherwise feature macros like _USE_GNU that 10 * were defined by AC_USE_SYSTEM_EXTENSIONS will not be defined yet when mpl.h 11 * indirectly includes features.h. This leads to a mismatch between the 12 * behavior determined by configure and the behavior actually caused by 13 * "#include"ing unistd.h, for example. */ 14 #include "hydra_config.h" 15 16 #include "mpl.h" 17 #include "uthash.h" 18 19 extern char *HYD_dbg_prefix; 20 21 /* C89 headers can be included without a check */ 22 #if defined STDC_HEADERS 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <stdarg.h> 27 #include <errno.h> 28 #include <signal.h> 29 #else 30 #error "STDC_HEADERS are assumed in the Hydra code" 31 #endif /* STDC_HEADERS */ 32 33 #if defined NEEDS_POSIX_FOR_SIGACTION 34 #define _POSIX_SOURCE 35 #endif /* NEEDS_POSIX_FOR_SIGACTION */ 36 37 #if defined HAVE_WINDOWS_H 38 #define WIN32_LEAN_AND_MEAN 39 #include <windows.h> 40 #endif /* HAVE_WINDOWS_H */ 41 42 #if defined HAVE_UNISTD_H 43 #include <unistd.h> 44 #endif /* HAVE_UNISTD_H */ 45 46 #if defined HAVE_STRINGS_H 47 #include <strings.h> 48 #endif /* HAVE_STRINGS_H */ 49 50 #if defined HAVE_SYS_TYPES_H 51 #include <sys/types.h> 52 #endif /* HAVE_SYS_TYPES_H */ 53 54 #if defined HAVE_SYS_STAT_H 55 #include <sys/stat.h> 56 #endif /* HAVE_SYS_STAT_H */ 57 58 #if defined HAVE_TIME_H 59 #include <time.h> 60 #endif /* HAVE_TIME_H */ 61 62 #if defined HAVE_SYS_TIME_H 63 #include <sys/time.h> 64 #endif /* HAVE_SYS_TIME_H */ 65 66 #if defined HAVE_IFADDRS_H 67 #include <ifaddrs.h> 68 #endif /* HAVE_IFADDRS_H */ 69 70 #if defined HAVE_ARPA_INET_H 71 #include <arpa/inet.h> 72 #endif /* HAVE_ARPA_INET_H */ 73 74 #if !defined HAVE_GETTIMEOFDAY 75 #error "hydra requires gettimeofday support" 76 #endif /* HAVE_GETTIMEOFDAY */ 77 78 #if !defined HAVE_MACRO_VA_ARGS 79 #error "hydra requires VA_ARGS support" 80 #endif /* HAVE_MACRO_VA_ARGS */ 81 82 #if defined MAXHOSTNAMELEN 83 #define MAX_HOSTNAME_LEN MAXHOSTNAMELEN 84 #else 85 #define MAX_HOSTNAME_LEN 256 86 #endif /* MAXHOSTNAMELEN */ 87 88 #define HYDRA_MAX_PATH 4096 89 90 /* sockets required headers */ 91 #ifdef HAVE_POLL_H 92 #include <poll.h> 93 #endif /* HAVE_POLL_H */ 94 #ifdef HAVE_FCNTL_H 95 #include <fcntl.h> 96 #endif /* HAVE_FCNTL_H */ 97 #ifdef HAVE_NETDB_H 98 #include <netdb.h> 99 #endif /* HAVE_NETDB_H */ 100 #ifdef HAVE_NETINET_IN_H 101 #include <netinet/in.h> 102 #endif /* HAVE_NETINET_IN_H */ 103 #ifdef HAVE_NETINET_TCP_H 104 #include <netinet/tcp.h> 105 #endif /* HAVE_NETINET_TCP_H */ 106 107 #ifdef HAVE_SYS_SOCKET_H 108 #include <sys/socket.h> 109 #endif /* HAVE_SYS_SOCKET_H */ 110 111 #ifdef HAVE_SIGNAL_H 112 #include <signal.h> 113 #endif /* HAVE_SIGNAL_H */ 114 115 #define HYD_POLLIN (0x0001) 116 #define HYD_POLLOUT (0x0002) 117 #define HYD_POLLHUP (0x0004) 118 119 #define HYD_TMPBUF_SIZE (64 * 1024) 120 #define HYD_TMP_STRLEN (16 * 1024) 121 #define HYD_NUM_TMP_STRINGS 1000 122 123 #define HYD_DEFAULT_RETRY_COUNT (10) 124 #define HYD_CONNECT_DELAY (10) 125 126 #define dprintf(...) 127 128 #ifndef ATTRIBUTE 129 #ifdef HAVE_GCC_ATTRIBUTE 130 #define ATTRIBUTE(a_) __attribute__(a_) 131 #else 132 #define ATTRIBUTE(a_) 133 #endif 134 #endif 135 136 #define HYD_DRAW_LINE(x) \ 137 { \ 138 int i_; \ 139 for (i_ = 0; i_ < (x); i_++) \ 140 printf("="); \ 141 printf("\n"); \ 142 } 143 144 #define HYD_CONVERT_FALSE_TO_NULL(x) \ 145 { \ 146 if ((x) == NULL) { \ 147 } \ 148 else if (!strcasecmp((x), "none") || !strcasecmp((x), "no") || \ 149 !strcasecmp((x), "dummy") || !strcasecmp((x), "null") || \ 150 !strcasecmp((x), "nil") || !strcasecmp((x), "false")) { \ 151 MPL_free((x)); \ 152 (x) = NULL; \ 153 } \ 154 } 155 156 #if defined MANUAL_EXTERN_ENVIRON 157 extern char **environ; 158 #endif /* MANUAL_EXTERN_ENVIRON */ 159 160 #if defined NEEDS_HSTRERROR_DECL 161 const char *hstrerror(int err); 162 #endif /* NEEDS_HSTRERROR_DECL */ 163 164 #if defined NEEDS_GETTIMEOFDAY_DECL 165 int gettimeofday(struct timeval *tv, struct timezone *tz); 166 #endif /* NEEDS_GETTIMEOFDAY_DECL */ 167 168 #if defined NEEDS_GETPGID_DECL 169 pid_t getpgid(pid_t pid); 170 #endif /* NEEDS_GETPGID_DECL */ 171 172 #if defined NEEDS_KILLPG_DECL 173 int killpg(int pgrp, int sig); 174 #endif /* NEEDS_KILLPG_DECL */ 175 176 #define HYD_SILENT_ERROR(status) (((status) == HYD_GRACEFUL_ABORT) || ((status) == HYD_TIMED_OUT)) 177 178 #define HYDRA_NAMESERVER_DEFAULT_PORT 6392 179 180 struct HYD_string_stash { 181 char **strlist; 182 int max_count; 183 int cur_count; 184 }; 185 186 #define HYD_STRING_STASH_INIT(stash) \ 187 do { \ 188 (stash).strlist = NULL; \ 189 (stash).max_count = 0; \ 190 (stash).cur_count = 0; \ 191 } while (0) 192 193 #define HYD_STRING_STASH(stash, str, status) \ 194 do { \ 195 if ((stash).cur_count >= (stash).max_count - 1) { \ 196 HYDU_REALLOC_OR_JUMP((stash).strlist, char **, \ 197 ((stash).max_count + HYD_NUM_TMP_STRINGS) * sizeof(char *), \ 198 (status)); \ 199 (stash).max_count += HYD_NUM_TMP_STRINGS; \ 200 } \ 201 (stash).strlist[(stash).cur_count++] = (str); \ 202 (stash).strlist[(stash).cur_count] = NULL; \ 203 } while (0) 204 205 #define HYD_STRING_SPIT(stash, str, status) \ 206 do { \ 207 if ((stash).cur_count == 0) { \ 208 (str) = MPL_strdup(""); \ 209 } \ 210 else { \ 211 (status) = HYDU_str_alloc_and_join((stash).strlist, &(str)); \ 212 HYDU_ERR_POP((status), "unable to join strings\n"); \ 213 HYDU_free_strlist((stash).strlist); \ 214 MPL_free((stash).strlist); \ 215 HYD_STRING_STASH_INIT((stash)); \ 216 } \ 217 } while (0) 218 219 #define HYD_STRING_STASH_FREE(stash) \ 220 do { \ 221 if ((stash).strlist == NULL) \ 222 break; \ 223 HYDU_free_strlist((stash).strlist); \ 224 MPL_free((stash).strlist); \ 225 (stash).max_count = 0; \ 226 (stash).cur_count = 0; \ 227 } while (0) 228 229 enum HYD_bool { 230 HYD_FALSE = 0, 231 HYD_TRUE = 1 232 }; 233 234 /* fd state */ 235 enum HYD_fd_state { 236 HYD_FD_UNSET = -1, 237 HYD_FD_CLOSED = -2 238 }; 239 240 /* Status information */ 241 typedef enum { 242 HYD_SUCCESS = 0, 243 HYD_FAILURE, /* general failure */ 244 245 /* Silent errors */ 246 HYD_GRACEFUL_ABORT, 247 HYD_TIMED_OUT, 248 249 /* Regular errors */ 250 HYD_NO_MEM, 251 HYD_SOCK_ERROR, 252 HYD_INVALID_PARAM, 253 HYD_INTERNAL_ERROR 254 } HYD_status; 255 256 #define HYD_USIZE_UNSET (0) 257 #define HYD_USIZE_SYSTEM (-1) 258 #define HYD_USIZE_INFINITE (-2) 259 260 #define HYD_GPUS_PER_PROC_UNSET (-1) 261 #define HYD_GPUS_PER_PROC_AUTO (-2) 262 263 #if defined(NEEDS_GETHOSTNAME_DECL) 264 int gethostname(char *name, size_t len); 265 #endif 266 267 typedef unsigned short HYD_event_t; 268 269 /* Argument matching functions */ 270 struct HYD_arg_match_table { 271 const char *arg; 272 HYD_status(*handler_fn) (char *arg, char ***argv_p); 273 void (*help_fn) (void); 274 }; 275 276 277 /* Environment information */ 278 struct HYD_env { 279 char *env_name; 280 char *env_value; 281 struct HYD_env *next; 282 }; 283 284 typedef enum HYD_env_overwrite { 285 HYD_ENV_OVERWRITE_TRUE, 286 HYD_ENV_OVERWRITE_FALSE 287 } HYD_env_overwrite_t; 288 289 typedef enum { 290 HYD_ENV_PROP_UNSET, 291 HYD_ENV_PROP_ALL, 292 HYD_ENV_PROP_NONE, 293 HYD_ENV_PROP_LIST 294 } HYD_env_prop_t; 295 296 struct HYD_env_global { 297 struct HYD_env *system; 298 struct HYD_env *user; 299 struct HYD_env *inherited; 300 char *prop; 301 }; 302 303 /* Executable information */ 304 struct HYD_exec { 305 char *exec[HYD_NUM_TMP_STRINGS]; 306 char *wdir; 307 308 int proc_count; 309 struct HYD_env *user_env; 310 char *env_prop; 311 312 int appnum; 313 314 struct HYD_exec *next; 315 }; 316 317 /* Process group */ 318 struct HYD_pg { 319 int pgid; 320 struct HYD_proxy *proxy_list; 321 int proxy_count; 322 int pg_process_count; 323 int barrier_count; 324 325 struct HYD_pg *spawner_pg; 326 327 /* user-specified node-list */ 328 struct HYD_node *user_node_list; 329 int pg_core_count; 330 331 /* scratch space for the PM */ 332 void *pg_scratch; 333 334 struct HYD_pg *next; 335 }; 336 337 /* Information about the node itself */ 338 struct HYD_node { 339 char *hostname; 340 int core_count; 341 int active_processes; 342 343 int node_id; 344 345 /* Username */ 346 char *user; 347 348 /* Node-specific binding information */ 349 char *local_binding; 350 351 struct HYD_node *next; 352 }; 353 354 /* Proxy information */ 355 struct HYD_proxy { 356 struct HYD_node *node; 357 358 struct HYD_pg *pg; /* Back pointer to the PG */ 359 360 char **exec_launch_info; 361 362 int proxy_id; 363 364 int proxy_process_count; 365 366 /* Filler processes that we are adding on this proxy */ 367 int filler_processes; 368 369 struct HYD_exec *exec_list; 370 371 int *pid; 372 int *exit_status; 373 int control_fd; 374 375 struct HYD_proxy *next; 376 377 UT_hash_handle hh; 378 }; 379 380 /* Global user parameters */ 381 struct HYD_user_global { 382 /* RMK */ 383 char *rmk; 384 385 /* Launcher */ 386 char *launcher; 387 char *launcher_exec; 388 389 /* Processor/Memory topology */ 390 char *topolib; 391 char *binding; 392 char *mapping; 393 char *membind; 394 int topo_debug; 395 396 /* Demux engine */ 397 char *demux; 398 399 /* Network interface */ 400 char *iface; 401 402 /* Other random parameters */ 403 int enablex; 404 int debug; 405 int usize; 406 407 int auto_cleanup; 408 int pmi_port; 409 int skip_launch_node; 410 int gpus_per_proc; 411 412 struct HYD_env_global global_env; 413 }; 414 415 #define HYDU_dump_prefix(fp) \ 416 { \ 417 fprintf(fp, "[%s] ", HYD_dbg_prefix ? HYD_dbg_prefix : "unknown"); \ 418 fflush(fp); \ 419 } 420 421 #define HYDU_dump_noprefix(fp, ...) \ 422 { \ 423 fprintf(fp, __VA_ARGS__); \ 424 fflush(fp); \ 425 } 426 427 #define HYDU_dump(fp, ...) \ 428 { \ 429 HYDU_dump_prefix(fp); \ 430 HYDU_dump_noprefix(fp, __VA_ARGS__); \ 431 } 432 433 #if defined HAVE__FUNC__ 434 #define HYDU_FUNC __func__ 435 #elif defined HAVE_CAP__FUNC__ 436 #define HYDU_FUNC __FUNC__ 437 #elif defined HAVE__FUNCTION__ 438 #define HYDU_FUNC __FUNCTION__ 439 #endif 440 441 #if defined __FILE__ && defined HYDU_FUNC 442 #define HYDU_error_printf(...) \ 443 { \ 444 HYDU_dump_prefix(stderr); \ 445 HYDU_dump_noprefix(stderr, "%s (%s:%d): ", HYDU_FUNC, __FILE__, __LINE__); \ 446 HYDU_dump_noprefix(stderr, __VA_ARGS__); \ 447 } 448 #elif defined __FILE__ 449 #define HYDU_error_printf(...) \ 450 { \ 451 HYDU_dump_prefix(stderr); \ 452 HYDU_dump_noprefix(stderr, "%s (%d): ", __FILE__, __LINE__); \ 453 HYDU_dump_noprefix(stderr, __VA_ARGS__); \ 454 } 455 #else 456 #define HYDU_error_printf(...) \ 457 { \ 458 HYDU_dump_prefix(stderr); \ 459 HYDU_dump_noprefix(stderr, __VA_ARGS__); \ 460 } 461 #endif 462 463 #define HYDU_ASSERT(x, status) \ 464 { \ 465 if ((x) == 0) { \ 466 HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, \ 467 "assert (%s) failed\n", #x); \ 468 } \ 469 } 470 471 #define HYDU_IGNORE_TIMEOUT(status) \ 472 { \ 473 if ((status) == HYD_TIMED_OUT) \ 474 (status) = HYD_SUCCESS; \ 475 } 476 477 #define HYDU_ERR_POP(status, ...) \ 478 { \ 479 if (status && !HYD_SILENT_ERROR(status)) { \ 480 HYDU_error_printf(__VA_ARGS__); \ 481 goto fn_fail; \ 482 } \ 483 else if (HYD_SILENT_ERROR(status)) { \ 484 goto fn_exit; \ 485 } \ 486 } 487 488 #define HYDU_ERR_SETANDJUMP(status, error, ...) \ 489 { \ 490 status = error; \ 491 HYDU_ERR_POP(status, __VA_ARGS__); \ 492 } 493 494 #define HYDU_ERR_CHKANDJUMP(status, chk, error, ...) \ 495 { \ 496 if ((chk)) \ 497 HYDU_ERR_SETANDJUMP(status, error, __VA_ARGS__); \ 498 } 499 500 #if defined ENABLE_WARNINGS 501 #define HYDU_warn_printf HYDU_error_printf 502 #else 503 #define HYDU_warn_printf(...) 504 #endif /* ENABLE_WARNINGS */ 505 506 /* Disable for now; we might add something here in the future */ 507 #define HYDU_FUNC_ENTER() do {} while (0) 508 #define HYDU_FUNC_EXIT() do {} while (0) 509 510 511 /* alloc */ 512 void HYDU_init_user_global(struct HYD_user_global *user_global); 513 void HYDU_finalize_user_global(struct HYD_user_global *user_global); 514 void HYDU_init_global_env(struct HYD_env_global *global_env); 515 void HYDU_finalize_global_env(struct HYD_env_global *global_env); 516 HYD_status HYDU_alloc_node(struct HYD_node **node); 517 void HYDU_free_node_list(struct HYD_node *node_list); 518 void HYDU_init_pg(struct HYD_pg *pg, int pgid); 519 HYD_status HYDU_alloc_pg(struct HYD_pg **pg, int pgid); 520 void HYDU_free_pg_list(struct HYD_pg *pg_list); 521 void HYDU_free_proxy_list(struct HYD_proxy *proxy_list); 522 HYD_status HYDU_alloc_exec(struct HYD_exec **exec); 523 void HYDU_free_exec_list(struct HYD_exec *exec_list); 524 HYD_status HYDU_create_proxy_list(struct HYD_exec *exec_list, struct HYD_node *node_list, 525 struct HYD_pg *pg); 526 HYD_status HYDU_correct_wdir(char **wdir); 527 528 /* args */ 529 HYD_status HYDU_find_in_path(const char *execname, char **path); 530 HYD_status HYDU_parse_array(char ***argv, struct HYD_arg_match_table *match_table); 531 HYD_status HYDU_set_str(char *arg, char **var, const char *val); 532 HYD_status HYDU_set_int(char *arg, int *var, int val); 533 char *HYDU_getcwd(void); 534 HYD_status HYDU_process_mfile_token(char *token, int newline, struct HYD_node **node_list); 535 char *HYDU_get_abs_wd(const char *wd); 536 HYD_status HYDU_parse_hostfile(const char *hostfile, struct HYD_node **node_list, 537 HYD_status(*process_token) (char *token, int newline, 538 struct HYD_node ** node_list)); 539 char *HYDU_find_full_path(const char *execname); 540 HYD_status HYDU_send_strlist(int fd, char **strlist); 541 542 /* debug */ 543 HYD_status HYDU_dbg_init(const char *str); 544 void HYDU_dbg_finalize(void); 545 546 /* env */ 547 HYD_status HYDU_env_to_str(struct HYD_env *env, char **str); 548 HYD_status HYDU_list_inherited_env(struct HYD_env **env_list); 549 struct HYD_env *HYDU_env_list_dup(struct HYD_env *env); 550 HYD_status HYDU_env_create(struct HYD_env **env, const char *env_name, const char *env_value); 551 HYD_status HYDU_env_free(struct HYD_env *env); 552 HYD_status HYDU_env_free_list(struct HYD_env *env); 553 struct HYD_env *HYDU_env_lookup(char *env_name, struct HYD_env *env_list); 554 HYD_status HYDU_append_env_to_list(const char *env_name, const char *env_value, 555 struct HYD_env **env_list); 556 HYD_status HYDU_append_env_str_to_list(const char *str, struct HYD_env **env_list); 557 HYD_status HYDU_putenv(struct HYD_env *env, HYD_env_overwrite_t overwrite); 558 HYD_status HYDU_putenv_list(struct HYD_env *env_list, HYD_env_overwrite_t overwrite); 559 HYD_status HYDU_comma_list_to_env_list(char *str, struct HYD_env **env_list); 560 561 /* launch */ 562 HYD_status HYDU_create_process(char **client_arg, struct HYD_env *env_list, 563 int *in, int *out, int *err, int *pid, int idx); 564 565 /* others */ 566 int HYDU_dceil(int x, int y); 567 HYD_status HYDU_add_to_node_list(const char *hostname, int num_procs, struct HYD_node **node_list); 568 void HYDU_delay(unsigned long delay); 569 570 /* signals */ 571 #ifdef NEEDS_POSIX_FOR_SIGACTION 572 #define _POSIX_SOURCE 573 #endif 574 575 #include <sys/wait.h> 576 #if defined(USE_SIGNAL) || defined(USE_SIGACTION) 577 #include <signal.h> 578 #else 579 #error no signal choice 580 #endif 581 #ifdef NEEDS_STRSIGNAL_DECL 582 extern char *strsignal(int); 583 #endif 584 585 HYD_status HYDU_set_signal(int signum, void (*handler) (int)); 586 HYD_status HYDU_set_common_signals(void (*handler) (int)); 587 588 /* Sock utilities */ 589 enum HYDU_sock_comm_flag { 590 HYDU_SOCK_COMM_NONE = 0, 591 HYDU_SOCK_COMM_MSGWAIT = 1 592 }; 593 594 HYD_status HYDU_sock_listen(int *listen_fd, char *port_range, uint16_t * port); 595 596 /* delay is in microseconds */ 597 HYD_status HYDU_sock_connect(const char *host, uint16_t port, int *fd, int retries, 598 unsigned long delay); 599 HYD_status HYDU_sock_accept(int listen_fd, int *fd); 600 HYD_status HYDU_sock_read(int fd, void *buf, int maxlen, int *recvd, int *closed, 601 enum HYDU_sock_comm_flag flag); 602 HYD_status HYDU_sock_write(int fd, const void *buf, int maxlen, int *sent, int *closed, 603 enum HYDU_sock_comm_flag flag); 604 HYD_status HYDU_sock_set_nonblock(int fd); 605 HYD_status HYDU_sock_set_block(int fd); 606 HYD_status HYDU_sock_forward_stdio(int in, int out, int *closed); 607 void HYDU_sock_finalize(void); 608 HYD_status HYDU_sock_get_iface_ip(char *iface, char **ip); 609 HYD_status 610 HYDU_sock_create_and_listen_portstr(char *iface, char *hostname, char *port_range, 611 char **port_str, 612 HYD_status(*callback) (int fd, HYD_event_t events, 613 void *userp), void *userp); 614 HYD_status HYDU_sock_cloexec(int fd); 615 616 617 /* Memory utilities */ 618 #include <ctype.h> 619 620 #define HYDU_MALLOC_OR_JUMP(p, type, size, status) \ 621 { \ 622 (p) = (type) MPL_malloc((size), MPL_MEM_PM); \ 623 if ((size != 0) && ((p) == NULL)) \ 624 HYDU_ERR_SETANDJUMP((status), HYD_NO_MEM, \ 625 "failed to allocate %d bytes\n", \ 626 (int) (size)); \ 627 } 628 629 #define HYDU_REALLOC_OR_JUMP(p, type, size, status) \ 630 { \ 631 (p) = (type) MPL_realloc((p),(size), MPL_MEM_PM); \ 632 if ((size != 0) && ((p) == NULL)) \ 633 HYDU_ERR_SETANDJUMP((status), HYD_NO_MEM, \ 634 "failed to allocate %d bytes\n", \ 635 (int) (size)); \ 636 } 637 638 HYD_status HYDU_list_append_strlist(char **exec, char **client_arg); 639 HYD_status HYDU_print_strlist(char **args); 640 void HYDU_free_strlist(char **args); 641 HYD_status HYDU_str_alloc_and_join(char **strlist, char **strjoin); 642 HYD_status HYDU_strsplit(char *str, char **str1, char **str2, char sep); 643 HYD_status HYDU_strdup_list(char *src[], char **dest[]); 644 char *HYDU_size_t_to_str(size_t x); 645 char *HYDU_int_to_str(int x); 646 char *HYDU_int_to_str_pad(int x, int maxlen); 647 648 #if defined HAVE_HERROR 649 #define HYDU_herror herror 650 #else 651 #define HYDU_herror HYDU_int_to_str 652 #endif /* HAVE_HERROR */ 653 654 int HYDU_strlist_lastidx(char **strlist); 655 char **HYDU_str_to_strlist(char *str); 656 657 /*! 658 * @} 659 */ 660 661 #endif /* HYDRA_H_INCLUDED */ 662