1 /*****************************************************************************\
2 * src/slurmd/slurmstepd/slurmstepd_job.h stepd_step_rec_t definition
3 *****************************************************************************
4 * Copyright (C) 2002-2007 The Regents of the University of California.
5 * Copyright (C) 2008-2010 Lawrence Livermore National Security.
6 * Copyright (C) 2013 Intel, Inc.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Mark Grondona <mgrondona@llnl.gov>.
9 * CODE-OCEC-09-009. All rights reserved.
10 *
11 * This file is part of Slurm, a resource management program.
12 * For details, see <https://slurm.schedmd.com/>.
13 * Please also read the included file: DISCLAIMER.
14 *
15 * Slurm is free software; you can redistribute it and/or modify it under
16 * the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 *
20 * In addition, as a special exception, the copyright holders give permission
21 * to link the code of portions of this program with the OpenSSL library under
22 * certain conditions as described in each individual source file, and
23 * distribute linked combinations including the two. You must obey the GNU
24 * General Public License in all respects for all of the code used other than
25 * OpenSSL. If you modify file(s) with this exception, you may extend this
26 * exception to your version of the file(s), but you are not obligated to do
27 * so. If you do not wish to do so, delete this exception statement from your
28 * version. If you delete this exception statement from all source files in
29 * the program, then also delete it here.
30 *
31 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
32 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
34 * details.
35 *
36 * You should have received a copy of the GNU General Public License along
37 * with Slurm; if not, write to the Free Software Foundation, Inc.,
38 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
39 \*****************************************************************************/
40
41 #ifndef _SLURMSTEPD_JOB_H
42 #define _SLURMSTEPD_JOB_H
43
44 #include <pthread.h>
45 #include <pwd.h>
46
47 #include "src/common/macros.h"
48 #include "src/common/slurm_protocol_api.h"
49 #include "src/common/slurm_protocol_defs.h"
50 #include "src/common/list.h"
51 #include "src/common/eio.h"
52 #include "src/common/env.h"
53 #include "src/common/io_hdr.h"
54 #include "src/common/job_options.h"
55 #include "src/common/stepd_api.h"
56
57 #ifndef MAXHOSTNAMELEN
58 #define MAXHOSTNAMELEN 64
59 #endif
60
61 typedef struct {
62 unsigned char data[SLURM_IO_KEY_SIZE];
63 } srun_key_t;
64
65 typedef struct {
66 srun_key_t *key; /* srun key for IO verification */
67 slurm_addr_t resp_addr; /* response addr for task exit msg */
68 slurm_addr_t ioaddr; /* Address to connect on for normal I/O.
69 Spawn IO uses messages to the normal
70 resp_addr. */
71 uint16_t protocol_version; /* protocol_version of the srun */
72 } srun_info_t;
73
74 typedef enum {
75 STEPD_STEP_TASK_INIT,
76 STEPD_STEP_TASK_STARTING,
77 STEPD_STEP_TASK_RUNNING,
78 STEPD_STEP_TASK_COMPLETE
79 } stepd_step_task_state_t;
80
81 typedef struct {
82 pthread_mutex_t mutex; /* mutex to protect task state */
83 stepd_step_task_state_t state; /* task state */
84
85 int id; /* local task id */
86 uint32_t gtid; /* global task id */
87 pid_t pid; /* task pid */
88
89 char *ifname; /* standard input file name */
90 char *ofname; /* standard output file name */
91 char *efname; /* standard error file name */
92 int stdin_fd; /* standard input file descriptor */
93 int stdout_fd; /* standard output file descriptor */
94 int stderr_fd; /* standard error file descriptor */
95 int to_stdin; /* write file descriptor for task stdin */
96 int from_stdout;/* read file descriptor from task stdout*/
97 int from_stderr;/* read file descriptor from task stderr*/
98 eio_obj_t *in; /* standard input event IO object */
99 eio_obj_t *out; /* standard output event IO object */
100 eio_obj_t *err; /* standard error event IO object */
101
102 bool killed_by_cmd; /* true if task killed by our signal */
103 bool aborted; /* true if task called abort */
104 bool esent; /* true if exit status has been sent */
105 bool exited; /* true if task has exited */
106 int estatus; /* this task's exit status */
107
108 uint32_t argc;
109 char **argv;
110 } stepd_step_task_info_t;
111
112 typedef struct { /* MPMD specifications, needed for Cray */
113 uint64_t apid; /* Application ID */
114 int num_cmds; /* Number of executables in MPMD set */
115 char **args; /* Array of argument string for each executable */
116 char **command; /* Array of command name for each executable */
117 int *first_pe; /* First rank on this node of each executable,
118 * -1 if executable not on this node */
119 int *start_pe; /* Starting rank of each executable in set */
120 int *total_pe; /* Total ranks of each executable in set */
121
122 int *placement; /* NID of each rank (ntasks in length) */
123 } mpmd_set_t;
124
125 typedef struct {
126 slurmstepd_state_t state; /* Job state */
127 pthread_cond_t state_cond; /* Job state conditional */
128 pthread_mutex_t state_mutex; /* Job state mutex */
129 uint32_t jobid; /* Current Slurm job id */
130 uint32_t stepid; /* Current step id (or NO_VAL) */
131 uint32_t array_job_id; /* job array master job ID */
132 uint32_t array_task_id; /* job array ID */
133 uint32_t nnodes; /* number of nodes in current job */
134 uint32_t ntasks; /* total number of tasks in current job */
135 uint32_t nodeid; /* relative position of this node in job */
136 uint32_t node_tasks; /* number of tasks on *this* node */
137 uint32_t het_job_id; /* Hetjob ID or NO_VAL */
138 uint32_t het_job_nnodes; /* total node count for entire hetjob */
139 char *het_job_node_list; /* Hetjob step node list */
140 uint32_t het_job_node_offset;/* Hetjob node offset or NO_VAL */
141 uint32_t het_job_ntasks; /* total task count for entire hetjob */
142 uint32_t het_job_offset; /* Hetjob offset or NO_VAL */
143 uint32_t het_job_step_cnt; /* number of steps for entire hetjob */
144 uint32_t het_job_task_offset;/* Hetjob task offset or NO_VAL */
145 uint16_t *het_job_task_cnts; /* Number of tasks on each node in hetjob */
146 uint32_t **het_job_tids; /* Task IDs on each node of hetjob */
147 uint32_t *het_job_tid_offsets;/* map of tasks (by id) to originating hetjob*/
148 uint16_t *task_cnts; /* Number of tasks on each node in job */
149 uint32_t cpus_per_task; /* number of cpus desired per task */
150 uint32_t debug; /* debug level for job slurmd */
151 uint64_t job_mem; /* MB of memory reserved for the job */
152 uint64_t step_mem; /* MB of memory reserved for the step */
153 uint16_t cpus; /* number of cpus to use for this job */
154 uint32_t argc; /* number of commandline arguments */
155 char **env; /* job environment */
156 char **argv; /* job argument vector */
157 char *cwd; /* path to current working directory */
158 task_dist_states_t task_dist;/* -m distribution */
159 char *node_name; /* node name of node running job
160 * needed for front-end systems */
161 cpu_bind_type_t cpu_bind_type; /* --cpu-bind= */
162 char *cpu_bind; /* binding map for map/mask_cpu */
163 mem_bind_type_t mem_bind_type; /* --mem-bind= */
164 char *mem_bind; /* binding map for tasks to memory */
165 uint16_t accel_bind_type; /* --accel_bind= */
166 uint32_t cpu_freq_min; /* Minimum cpu frequency */
167 uint32_t cpu_freq_max; /* Maximum cpu frequency */
168 uint32_t cpu_freq_gov; /* cpu frequency governor */
169 dynamic_plugin_data_t *switch_job; /* switch-specific job information */
170 uid_t uid; /* user id for job */
171 char *user_name;
172 /* fields from the launch cred used to support nss_slurm */
173 char *pw_gecos;
174 char *pw_dir;
175 char *pw_shell;
176 gid_t gid; /* group ID for job */
177 int ngids; /* length of the following gids array */
178 char **gr_names;
179 gid_t *gids; /* array of gids for user specified in uid */
180 bool aborted; /* true if already aborted */
181 bool batch; /* true if this is a batch job */
182 bool run_prolog; /* true if need to run prolog */
183 time_t timelimit; /* time at which job must stop */
184 uint32_t profile; /* Level of acct_gather_profile */
185 char *task_prolog; /* per-task prolog */
186 char *task_epilog; /* per-task epilog */
187 stepd_step_task_info_t **task; /* array of task information pointers*/
188 eio_handle_t *eio;
189 List sruns; /* List of srun_info_t pointers */
190 List clients; /* List of struct client_io_info pointers */
191 List stdout_eio_objs; /* List of objs that gather stdout from tasks */
192 List stderr_eio_objs; /* List of objs that gather stderr from tasks */
193 List free_incoming; /* List of free struct io_buf * for incoming
194 * traffic. "incoming" means traffic from srun
195 * to the tasks.
196 */
197 List free_outgoing; /* List of free struct io_buf * for outgoing
198 * traffic "outgoing" means traffic from the
199 * tasks to srun.
200 */
201 int incoming_count; /* Count of total incoming message buffers
202 * including free_incoming buffers and
203 * buffers in use.
204 */
205 int outgoing_count; /* Count of total outgoing message buffers
206 * including free_outgoing buffers and
207 * buffers in use.
208 */
209
210 List outgoing_cache; /* cache of outgoing stdio messages
211 * used when a new client attaches
212 */
213
214 pthread_t ioid; /* pthread id of IO thread */
215 pthread_t msgid; /* pthread id of message thread */
216 eio_handle_t *msg_handle; /* eio handle for the message thread */
217
218 pid_t jmgr_pid; /* job manager pid */
219 pid_t pgid; /* process group id for tasks */
220 uint32_t flags; /* See LAUNCH_* flags defined in slurm_protocol_defs.h */
221 uint16_t overcommit;
222 env_t *envtp;
223 uint64_t cont_id;
224
225 char *batchdir;
226 jobacctinfo_t *jobacct;
227 uint8_t open_mode; /* stdout/err append or truncate */
228 job_options_t options;
229 uint32_t resv_id; /* Cray/BASIL reservation ID */
230 uint16_t restart_cnt; /* batch job restart count */
231 char *job_alloc_cores; /* needed by the SPANK cpuset plugin */
232 char *step_alloc_cores;/* needed by the SPANK cpuset plugin */
233 List job_gres_list; /* Needed by GRES plugin */
234 List step_gres_list; /* Needed by GRES plugin */
235 char *tres_bind; /* TRES binding */
236 char *tres_freq; /* TRES frequency */
237 launch_tasks_request_msg_t *msg; /* When a non-batch step this
238 * is the message sent. DO
239 * NOT FREE, IT IS JUST A
240 * POINTER. */
241 mpmd_set_t *mpmd_set; /* MPMD specifications for Cray */
242 uint16_t job_core_spec; /* count of specialized cores */
243 int non_smp; /* Set if task IDs are not monotonically
244 * increasing across all nodes, set only
245 * native Cray systems */
246 bool oom_error; /* step out of memory error */
247
248 uint16_t x11; /* only set for extern step */
249 int x11_display; /* display number if x11 forwarding setup */
250 char *x11_alloc_host; /* remote host to proxy through */
251 uint16_t x11_alloc_port; /* remote port to proxy through */
252 char *x11_magic_cookie; /* xauth magic cookie value */
253 char *x11_target; /* remote target. unix socket if port == 0 */
254 uint16_t x11_target_port; /* remote x11 port to connect back to */
255 char *x11_xauthority; /* temporary XAUTHORITY location, or NULL */
256 } stepd_step_rec_t;
257
258
259 stepd_step_rec_t * stepd_step_rec_create(launch_tasks_request_msg_t *msg,
260 uint16_t protocol_version);
261 stepd_step_rec_t * batch_stepd_step_rec_create(batch_job_launch_msg_t *msg);
262
263 void stepd_step_rec_destroy(stepd_step_rec_t *job);
264
265 srun_info_t * srun_info_create(slurm_cred_t *cred, slurm_addr_t *respaddr,
266 slurm_addr_t *ioaddr, uint16_t protocol_version);
267
268 void srun_info_destroy(srun_info_t *srun);
269
270 stepd_step_task_info_t * task_info_create(int taskid, int gtaskid,
271 char *ifname, char *ofname,
272 char *efname);
273
274 /*
275 * Return a task info structure corresponding to pid.
276 * We inline it here so that it can be included from src/common/plugstack.c
277 * without undefined symbol warnings.
278 */
279 static inline stepd_step_task_info_t *
job_task_info_by_pid(stepd_step_rec_t * job,pid_t pid)280 job_task_info_by_pid (stepd_step_rec_t *job, pid_t pid)
281 {
282 uint32_t i;
283
284 if (!job)
285 return NULL;
286
287 for (i = 0; i < job->node_tasks; i++) {
288 if (job->task[i]->pid == pid)
289 return (job->task[i]);
290 }
291 return (NULL);
292 }
293
294 #endif /* !_SLURMSTEPD_JOB_H */
295