1 /*****************************************************************************\
2  *  srun_comm.h - definitions srun communications
3  *****************************************************************************
4  *  Copyright (C) 2002-2007 The Regents of the University of California.
5  *  Copyright (C) 2008-2010 Lawrence Livermore National Security.
6  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7  *  Written by Morris Jette <jette@llnl.gov> et. al.
8  *  CODE-OCEC-09-009. All rights reserved.
9  *
10  *  This file is part of Slurm, a resource management program.
11  *  For details, see <https://slurm.schedmd.com/>.
12  *  Please also read the included file: DISCLAIMER.
13  *
14  *  Slurm is free software; you can redistribute it and/or modify it under
15  *  the terms of the GNU General Public License as published by the Free
16  *  Software Foundation; either version 2 of the License, or (at your option)
17  *  any later version.
18  *
19  *  In addition, as a special exception, the copyright holders give permission
20  *  to link the code of portions of this program with the OpenSSL library under
21  *  certain conditions as described in each individual source file, and
22  *  distribute linked combinations including the two. You must obey the GNU
23  *  General Public License in all respects for all of the code used other than
24  *  OpenSSL. If you modify file(s) with this exception, you may extend this
25  *  exception to your version of the file(s), but you are not obligated to do
26  *  so. If you do not wish to do so, delete this exception statement from your
27  *  version.  If you delete this exception statement from all source files in
28  *  the program, then also delete it here.
29  *
30  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
31  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
32  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
33  *  details.
34  *
35  *  You should have received a copy of the GNU General Public License along
36  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
37  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
38 \*****************************************************************************/
39 
40 #ifndef _HAVE_SRUN_COMM_H
41 #define _HAVE_SRUN_COMM_H
42 
43 #include <sys/types.h>
44 #include <time.h>
45 
46 #include "src/slurmctld/slurmctld.h"
47 
48 /*
49  * srun_allocate - notify srun of a resource allocation
50  * IN job_ptr - job allocated resources
51  */
52 extern void srun_allocate(job_record_t *job_ptr);
53 
54 /*
55  * srun_allocate_abort - notify srun of a resource allocation failure
56  * IN job_ptr - job allocated resources
57  */
58 extern void srun_allocate_abort(job_record_t *job_ptr);
59 
60 /*
61  * srun_exec - request that srun execute a specific command
62  *	and route it's output to stdout
63  * IN step_ptr - pointer to the slurmctld job step record
64  * IN argv - command and arguments to execute
65  */
66 extern void srun_exec(step_record_t *step_ptr, char **argv);
67 
68 /*
69  * srun_job_complete - notify srun of a job's termination
70  * IN job_ptr - pointer to the slurmctld job record
71  */
72 extern void srun_job_complete(job_record_t *job_ptr);
73 
74 
75 /*
76  * srun_job_suspend - notify salloc of suspend/resume operation
77  * IN job_ptr - pointer to the slurmctld job record
78  * IN op - SUSPEND_JOB or RESUME_JOB (enum suspend_opts from slurm.h)
79  * RET - true if message send, otherwise false
80  */
81 extern bool srun_job_suspend(job_record_t *job_ptr, uint16_t op);
82 
83 /*
84  * srun_step_complete - notify srun of a job step's termination
85  * IN step_ptr - pointer to the slurmctld job step record
86  */
87 extern void srun_step_complete(step_record_t *step_ptr);
88 
89 /*
90  * srun_step_missing - notify srun that a job step is missing from
91  *		       a node we expect to find it on
92  * IN step_ptr  - pointer to the slurmctld job step record
93  * IN node_list - name of nodes we did not find the step on
94  */
95 extern void srun_step_missing(step_record_t *step_ptr, char *node_list);
96 
97 /*
98  * srun_step_signal - notify srun that a job step should be signaled
99  * NOTE: Needed on BlueGene/Q to signal runjob process
100  * IN step_ptr  - pointer to the slurmctld job step record
101  * IN signal - signal number
102  */
103 extern void srun_step_signal(step_record_t *step_ptr, uint16_t signal);
104 
105 /*
106  * srun_node_fail - notify srun of a node's failure
107  * IN job_ptr - job to notify
108  * IN node_name - name of failed node
109  */
110 extern void srun_node_fail(job_record_t *job_ptr, char *node_name);
111 
112 /* srun_ping - ping all srun commands that have not been heard from recently */
113 extern void srun_ping (void);
114 
115 /*
116  * srun_response - note that srun has responded
117  * IN job_id  - id of job responding
118  * IN step_id - id of step responding or NO_VAL if not a step
119  */
120 extern void srun_response(uint32_t job_id, uint32_t step_id);
121 
122 /*
123  * srun_step_timeout - notify srun of a job step's imminent timeout
124  * IN step_ptr - pointer to the slurmctld step record
125  * IN timeout_val - when it is going to time out
126  */
127 extern void srun_step_timeout(step_record_t *step_ptr, time_t timeout_val);
128 
129 /*
130  * srun_timeout - notify srun of a job's timeout
131  * IN job_ptr - pointer to the slurmctld job record
132  */
133 extern void srun_timeout(job_record_t *job_ptr);
134 
135 /*
136  * srun_user_message - Send arbitrary message to an srun job (no job steps)
137  */
138 extern int srun_user_message(job_record_t *job_ptr, char *msg);
139 
140 #endif /* !_HAVE_SRUN_COMM_H */
141