1 /*****************************************************************************\
2 * update_step.c - update step functions for scontrol.
3 *****************************************************************************
4 * Copyright (C) 2002-2007 The Regents of the University of California.
5 * Copyright (C) 2008-2010 Lawrence Livermore National Security.
6 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7 * Written by Morris Jette <jette1@llnl.gov>
8 * CODE-OCEC-09-009. All rights reserved.
9 *
10 * This file is part of Slurm, a resource management program.
11 * For details, see <https://slurm.schedmd.com/>.
12 * Please also read the included file: DISCLAIMER.
13 *
14 * Slurm is free software; you can redistribute it and/or modify it under
15 * the terms of the GNU General Public License as published by the Free
16 * Software Foundation; either version 2 of the License, or (at your option)
17 * any later version.
18 *
19 * In addition, as a special exception, the copyright holders give permission
20 * to link the code of portions of this program with the OpenSSL library under
21 * certain conditions as described in each individual source file, and
22 * distribute linked combinations including the two. You must obey the GNU
23 * General Public License in all respects for all of the code used other than
24 * OpenSSL. If you modify file(s) with this exception, you may extend this
25 * exception to your version of the file(s), but you are not obligated to do
26 * so. If you do not wish to do so, delete this exception statement from your
27 * version. If you delete this exception statement from all source files in
28 * the program, then also delete it here.
29 *
30 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
31 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
32 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
33 * details.
34 *
35 * You should have received a copy of the GNU General Public License along
36 * with Slurm; if not, write to the Free Software Foundation, Inc.,
37 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
38 \*****************************************************************************/
39
40 #include "scontrol.h"
41 #include "src/common/proc_args.h"
42 #include "src/common/slurm_jobacct_gather.h"
43
44 #define MAX_RECORD_FIELDS 100
45 #define BUFFER_SIZE 4096
46
47 /* Return the current time limit of the specified job/step_id or NO_VAL if the
48 * information is not available */
_get_step_time(uint32_t job_id,uint32_t step_id)49 static uint32_t _get_step_time(uint32_t job_id, uint32_t step_id)
50 {
51 uint32_t time_limit = NO_VAL;
52 int i, rc;
53 job_step_info_response_msg_t *resp;
54
55 rc = slurm_get_job_steps((time_t) 0, job_id, step_id, &resp, SHOW_ALL);
56 if (rc == SLURM_SUCCESS) {
57 for (i = 0; i < resp->job_step_count; i++) {
58 if ((resp->job_steps[i].job_id != job_id) ||
59 (resp->job_steps[i].step_id != step_id))
60 continue; /* should not happen */
61 time_limit = resp->job_steps[i].time_limit;
62 break;
63 }
64 slurm_free_job_step_info_response_msg(resp);
65 } else {
66 error("Could not load state information for step %u.%u: %m",
67 job_id, step_id);
68 }
69
70 return time_limit;
71 }
72
_parse_comp_file(char * file,step_update_request_msg_t * step_msg)73 static int _parse_comp_file(
74 char *file, step_update_request_msg_t *step_msg)
75 {
76 int i;
77 FILE *fd = fopen(file, "r");
78 char line[BUFFER_SIZE];
79 char *fptr;
80 int version;
81 char *update[MAX_RECORD_FIELDS+1]; /* End list with null entry and,
82 possibly, more data than we
83 expected */
84
85 enum { UPDATE_STEP_VERSION,
86 UPDATE_STEP_EXTRA,
87 UPDATE_STEP_INBLOCKS,
88 UPDATE_STEP_OUTBLOCKS,
89 UPDATE_STEP_EXITCODE,
90 UPDATE_STEP_CPU_ALLOC,
91 UPDATE_STEP_START,
92 UPDATE_STEP_END,
93 UPDATE_STEP_USER_SEC,
94 UPDATE_STEP_SYS_SEC,
95 UPDATE_STEP_MAX_RSS,
96 UPDATE_STEP_UID,
97 UPDATE_STEP_STEPNAME,
98 UPDATE_STEP_VER1_LENGTH
99 };
100
101 if (fd == NULL) {
102 perror(file);
103 return SLURM_ERROR;
104 }
105
106 if (!fgets(line, BUFFER_SIZE, fd)) {
107 fprintf(stderr, "Empty step update completion file\n");
108 (void) fclose(fd);
109 return SLURM_ERROR;
110 }
111 (void) fclose(fd);
112
113 fptr = line; /* break the record into NULL-terminated strings */
114 for (i = 0; i < MAX_RECORD_FIELDS; i++) {
115 update[i] = fptr;
116 fptr = strstr(fptr, " ");
117 if (fptr == NULL) {
118 fptr = strstr(update[i], "\n");
119 if (fptr)
120 *fptr = 0;
121 break;
122 } else
123 *fptr++ = 0;
124 }
125
126 if (i < MAX_RECORD_FIELDS)
127 i++;
128 update[i] = 0;
129
130 version = atoi(update[UPDATE_STEP_VERSION]);
131 switch (version) {
132 case 1:
133 if (i != UPDATE_STEP_VER1_LENGTH) {
134 fprintf(stderr,
135 "Bad step update completion file length\n");
136 return SLURM_ERROR;
137 }
138 step_msg->jobacct = jobacctinfo_create(NULL);
139 step_msg->exit_code = atoi(update[UPDATE_STEP_EXITCODE]);
140 step_msg->start_time = atoi(update[UPDATE_STEP_START]);
141 step_msg->end_time = atoi(update[UPDATE_STEP_END]);
142 step_msg->jobacct->user_cpu_sec =
143 atoi(update[UPDATE_STEP_USER_SEC]);
144 step_msg->jobacct->sys_cpu_sec =
145 atoi(update[UPDATE_STEP_SYS_SEC]);
146 step_msg->jobacct->tres_usage_in_min[TRES_ARRAY_CPU] =
147 step_msg->jobacct->user_cpu_sec
148 + step_msg->jobacct->sys_cpu_sec;
149 step_msg->jobacct->tres_usage_in_max[TRES_ARRAY_MEM] =
150 atoi(update[UPDATE_STEP_MAX_RSS]);
151 step_msg->name =
152 xstrdup(xbasename(update[UPDATE_STEP_STEPNAME]));
153 break;
154 default:
155 fprintf(stderr, "Unsupported step update "
156 "completion file version: %d\n",
157 version);
158 return SLURM_ERROR;
159 break;
160 }
161
162
163 return SLURM_SUCCESS;
164 }
165
166 /*
167 * scontrol_update_step - update the slurm step configuration per the supplied
168 * arguments
169 * IN argc - count of arguments
170 * IN argv - list of arguments
171 * RET 0 if no slurm error, errno otherwise. parsing error prints
172 * error message and returns 0
173 */
scontrol_update_step(int argc,char ** argv)174 extern int scontrol_update_step (int argc, char **argv)
175 {
176 int i, update_cnt = 0;
177 char *tag, *val;
178 int taglen;
179 step_update_request_msg_t step_msg;
180
181 slurm_init_update_step_msg (&step_msg);
182
183 for (i=0; i<argc; i++) {
184 tag = argv[i];
185 val = strchr(argv[i], '=');
186 if (val) {
187 taglen = val - argv[i];
188 val++;
189 } else {
190 exit_code = 1;
191 fprintf (stderr, "Invalid input: %s\n", argv[i]);
192 fprintf (stderr, "Request aborted\n");
193 return -1;
194 }
195
196 if (xstrncasecmp(tag, "StepId", MAX(taglen, 4)) == 0) {
197 char *end_ptr;
198 step_msg.job_id = (uint32_t) strtol(val, &end_ptr, 10);
199 if (end_ptr[0] == '.') {
200 step_msg.step_id = (uint32_t)
201 strtol(end_ptr+1, (char **) NULL, 10);
202 } else if (end_ptr[0] != '\0') {
203 exit_code = 1;
204 fprintf (stderr, "Invalid StepID parameter: "
205 "%s\n", argv[i]);
206 fprintf (stderr, "Request aborted\n");
207 return 0;
208 } /* else apply to all steps of this job_id */
209 } else if (xstrncasecmp(tag, "TimeLimit", MAX(taglen, 2)) == 0) {
210 bool incr, decr;
211 uint32_t step_current_time, time_limit;
212
213 incr = (val[0] == '+');
214 decr = (val[0] == '-');
215 if (incr || decr)
216 val++;
217 time_limit = time_str2mins(val);
218 if (time_limit == NO_VAL) {
219 error("Invalid TimeLimit value");
220 exit_code = 1;
221 return 0;
222 }
223 if (incr || decr) {
224 step_current_time = _get_step_time(
225 step_msg.job_id,
226 step_msg.step_id);
227 if (step_current_time == NO_VAL) {
228 exit_code = 1;
229 return 0;
230 }
231 if (incr) {
232 time_limit += step_current_time;
233 } else if (time_limit > step_current_time) {
234 error("TimeLimit decrement larger than"
235 " current time limit (%u > %u)",
236 time_limit, step_current_time);
237 exit_code = 1;
238 return 0;
239 } else {
240 time_limit = step_current_time -
241 time_limit;
242 }
243 }
244 step_msg.time_limit = time_limit;
245 update_cnt++;
246 } else if (xstrncasecmp(tag, "CompFile", MAX(taglen, 2)) == 0) {
247 if (_parse_comp_file(val, &step_msg)) {
248 exit_code = 1;
249 fprintf(stderr,
250 "Bad completion file (%s) given\n"
251 "Request aborted\n", val);
252 return 0;
253 }
254 update_cnt++;
255 } else {
256 exit_code = 1;
257 fprintf (stderr, "Update of this parameter is not "
258 "supported: %s\n", argv[i]);
259 fprintf (stderr, "Request aborted\n");
260 return 0;
261 }
262 }
263
264 if (update_cnt == 0) {
265 exit_code = 1;
266 fprintf (stderr, "No changes specified\n");
267 return 0;
268 }
269
270 if (slurm_update_step(&step_msg))
271 return slurm_get_errno ();
272 else
273 return 0;
274 }
275