1 /*****************************************************************************\ 2 * srun_comm.h - definitions srun communications 3 ***************************************************************************** 4 * Copyright (C) 2002-2007 The Regents of the University of California. 5 * Copyright (C) 2008-2010 Lawrence Livermore National Security. 6 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 7 * Written by Morris Jette <jette@llnl.gov> et. al. 8 * CODE-OCEC-09-009. All rights reserved. 9 * 10 * This file is part of Slurm, a resource management program. 11 * For details, see <https://slurm.schedmd.com/>. 12 * Please also read the included file: DISCLAIMER. 13 * 14 * Slurm is free software; you can redistribute it and/or modify it under 15 * the terms of the GNU General Public License as published by the Free 16 * Software Foundation; either version 2 of the License, or (at your option) 17 * any later version. 18 * 19 * In addition, as a special exception, the copyright holders give permission 20 * to link the code of portions of this program with the OpenSSL library under 21 * certain conditions as described in each individual source file, and 22 * distribute linked combinations including the two. You must obey the GNU 23 * General Public License in all respects for all of the code used other than 24 * OpenSSL. If you modify file(s) with this exception, you may extend this 25 * exception to your version of the file(s), but you are not obligated to do 26 * so. If you do not wish to do so, delete this exception statement from your 27 * version. If you delete this exception statement from all source files in 28 * the program, then also delete it here. 29 * 30 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY 31 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 32 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 33 * details. 34 * 35 * You should have received a copy of the GNU General Public License along 36 * with Slurm; if not, write to the Free Software Foundation, Inc., 37 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 38 \*****************************************************************************/ 39 40 #ifndef _HAVE_SRUN_COMM_H 41 #define _HAVE_SRUN_COMM_H 42 43 #include <sys/types.h> 44 #include <time.h> 45 46 #include "src/slurmctld/slurmctld.h" 47 48 /* 49 * srun_allocate - notify srun of a resource allocation 50 * IN job_ptr - job allocated resources 51 */ 52 extern void srun_allocate(job_record_t *job_ptr); 53 54 /* 55 * srun_allocate_abort - notify srun of a resource allocation failure 56 * IN job_ptr - job allocated resources 57 */ 58 extern void srun_allocate_abort(job_record_t *job_ptr); 59 60 /* 61 * srun_exec - request that srun execute a specific command 62 * and route it's output to stdout 63 * IN step_ptr - pointer to the slurmctld job step record 64 * IN argv - command and arguments to execute 65 */ 66 extern void srun_exec(step_record_t *step_ptr, char **argv); 67 68 /* 69 * srun_job_complete - notify srun of a job's termination 70 * IN job_ptr - pointer to the slurmctld job record 71 */ 72 extern void srun_job_complete(job_record_t *job_ptr); 73 74 75 /* 76 * srun_job_suspend - notify salloc of suspend/resume operation 77 * IN job_ptr - pointer to the slurmctld job record 78 * IN op - SUSPEND_JOB or RESUME_JOB (enum suspend_opts from slurm.h) 79 * RET - true if message send, otherwise false 80 */ 81 extern bool srun_job_suspend(job_record_t *job_ptr, uint16_t op); 82 83 /* 84 * srun_step_complete - notify srun of a job step's termination 85 * IN step_ptr - pointer to the slurmctld job step record 86 */ 87 extern void srun_step_complete(step_record_t *step_ptr); 88 89 /* 90 * srun_step_missing - notify srun that a job step is missing from 91 * a node we expect to find it on 92 * IN step_ptr - pointer to the slurmctld job step record 93 * IN node_list - name of nodes we did not find the step on 94 */ 95 extern void srun_step_missing(step_record_t *step_ptr, char *node_list); 96 97 /* 98 * srun_step_signal - notify srun that a job step should be signaled 99 * NOTE: Needed on BlueGene/Q to signal runjob process 100 * IN step_ptr - pointer to the slurmctld job step record 101 * IN signal - signal number 102 */ 103 extern void srun_step_signal(step_record_t *step_ptr, uint16_t signal); 104 105 /* 106 * srun_node_fail - notify srun of a node's failure 107 * IN job_ptr - job to notify 108 * IN node_name - name of failed node 109 */ 110 extern void srun_node_fail(job_record_t *job_ptr, char *node_name); 111 112 /* srun_ping - ping all srun commands that have not been heard from recently */ 113 extern void srun_ping (void); 114 115 /* 116 * srun_response - note that srun has responded 117 * IN job_id - id of job responding 118 * IN step_id - id of step responding or NO_VAL if not a step 119 */ 120 extern void srun_response(uint32_t job_id, uint32_t step_id); 121 122 /* 123 * srun_step_timeout - notify srun of a job step's imminent timeout 124 * IN step_ptr - pointer to the slurmctld step record 125 * IN timeout_val - when it is going to time out 126 */ 127 extern void srun_step_timeout(step_record_t *step_ptr, time_t timeout_val); 128 129 /* 130 * srun_timeout - notify srun of a job's timeout 131 * IN job_ptr - pointer to the slurmctld job record 132 */ 133 extern void srun_timeout(job_record_t *job_ptr); 134 135 /* 136 * srun_user_message - Send arbitrary message to an srun job (no job steps) 137 */ 138 extern int srun_user_message(job_record_t *job_ptr, char *msg); 139 140 #endif /* !_HAVE_SRUN_COMM_H */ 141