1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include "hydra.h"
7 #include "bsci.h"
8 #include "pbs.h"
9
10 static struct HYD_node *pbs_node_list = NULL;
11
find_pbs_node_id(const char * hostname,int * node_id)12 static HYD_status find_pbs_node_id(const char *hostname, int *node_id)
13 {
14 struct HYD_node *t;
15 HYD_status status = HYD_SUCCESS;
16
17 HYDU_FUNC_ENTER();
18
19 *node_id = 0;
20 for (t = pbs_node_list; t; t = t->next) {
21 if (!strcmp(hostname, t->hostname))
22 break;
23 *node_id += t->core_count;
24 }
25
26 HYDU_ERR_CHKANDJUMP(status, t == NULL, HYD_INTERNAL_ERROR,
27 "user specified host not in the PBS allocated list\n");
28
29 fn_exit:
30 HYDU_FUNC_EXIT();
31 return status;
32
33 fn_fail:
34 goto fn_exit;
35 }
36
HYDT_bscd_pbs_launch_procs(char ** args,struct HYD_proxy * proxy_list,int use_rmk,int * control_fd)37 HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list, int use_rmk,
38 int *control_fd)
39 {
40 int proxy_count, i, args_count, err, hostid;
41 struct HYD_proxy *proxy;
42 char *targs[HYD_NUM_TMP_STRINGS];
43 HYD_status status = HYD_SUCCESS;
44
45 HYDU_FUNC_ENTER();
46
47 /* If the RMK is not PBS, query for the PBS node list, and convert
48 * the user-specified node IDs to PBS node IDs */
49 if (use_rmk == HYD_FALSE || strcmp(HYDT_bsci_info.rmk, "pbs")) {
50 status = HYDT_bscd_pbs_query_node_list(&pbs_node_list);
51 HYDU_ERR_POP(status, "error querying PBS node list\n");
52 }
53
54 proxy_count = 0;
55 for (proxy = proxy_list; proxy; proxy = proxy->next)
56 proxy_count++;
57
58 /* Duplicate the args in local copy, targs */
59 for (args_count = 0; args[args_count]; args_count++)
60 targs[args_count] = MPL_strdup(args[args_count]);
61
62 HYDU_MALLOC_OR_JUMP(HYDT_bscd_pbs_sys->task_id, tm_task_id *, proxy_count * sizeof(tm_task_id),
63 status);
64 HYDU_MALLOC_OR_JUMP(HYDT_bscd_pbs_sys->spawn_events, tm_event_t *,
65 proxy_count * sizeof(tm_event_t), status);
66
67 /* Spawn a process on each allocated node through tm_spawn() which
68 * returns a taskID for the process + a eventID for the
69 * spawning. */
70 hostid = 0;
71 for (i = 0, proxy = proxy_list; proxy; proxy = proxy->next, i++) {
72 if (pbs_node_list) {
73 status = find_pbs_node_id(proxy->node->hostname, &hostid);
74 HYDU_ERR_POP(status, "error finding PBS node ID for host %s\n", proxy->node->hostname);
75 }
76
77 targs[args_count] = HYDU_int_to_str(i);
78
79 /* The task_id field is not filled in during tm_spawn(). The
80 * TM library just stores this address and fills it in when
81 * the event is completed by a call to tm_poll(). */
82 if (HYDT_bsci_info.debug) {
83 HYDU_dump(stdout, "Spawn arguments (host id %d): ", hostid);
84
85 /* NULL terminate the arguments list to pass to
86 * HYDU_print_strlist() */
87 targs[args_count + 1] = NULL;
88 HYDU_print_strlist(targs);
89 }
90
91 /* The args_count below does not include the possible NULL
92 * termination, as I'm not sure how tm_spawn() handles NULL
93 * arguments. Besides the last NULL string is not needed for
94 * tm_spawn(). */
95 err = tm_spawn(args_count + 1, targs, NULL, hostid, &HYDT_bscd_pbs_sys->task_id[i],
96 &HYDT_bscd_pbs_sys->spawn_events[i]);
97 HYDU_ERR_CHKANDJUMP(status, err != TM_SUCCESS, HYD_INTERNAL_ERROR,
98 "tm_spawn() failed with TM error %d\n", err);
99
100 if (!pbs_node_list)
101 hostid += proxy->node->core_count;
102 }
103 HYDT_bscd_pbs_sys->spawn_count = i;
104
105 fn_exit:
106 HYDU_FUNC_EXIT();
107 return status;
108
109 fn_fail:
110 goto fn_exit;
111 }
112