1 /*****************************************************************************\
2  *  update_step.c - update step functions for scontrol.
3  *****************************************************************************
4  *  Copyright (C) 2002-2007 The Regents of the University of California.
5  *  Copyright (C) 2008-2010 Lawrence Livermore National Security.
6  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7  *  Written by Morris Jette <jette1@llnl.gov>
8  *  CODE-OCEC-09-009. All rights reserved.
9  *
10  *  This file is part of Slurm, a resource management program.
11  *  For details, see <https://slurm.schedmd.com/>.
12  *  Please also read the included file: DISCLAIMER.
13  *
14  *  Slurm is free software; you can redistribute it and/or modify it under
15  *  the terms of the GNU General Public License as published by the Free
16  *  Software Foundation; either version 2 of the License, or (at your option)
17  *  any later version.
18  *
19  *  In addition, as a special exception, the copyright holders give permission
20  *  to link the code of portions of this program with the OpenSSL library under
21  *  certain conditions as described in each individual source file, and
22  *  distribute linked combinations including the two. You must obey the GNU
23  *  General Public License in all respects for all of the code used other than
24  *  OpenSSL. If you modify file(s) with this exception, you may extend this
25  *  exception to your version of the file(s), but you are not obligated to do
26  *  so. If you do not wish to do so, delete this exception statement from your
27  *  version.  If you delete this exception statement from all source files in
28  *  the program, then also delete it here.
29  *
30  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
31  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
32  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
33  *  details.
34  *
35  *  You should have received a copy of the GNU General Public License along
36  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
37  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
38 \*****************************************************************************/
39 
40 #include "scontrol.h"
41 #include "src/common/proc_args.h"
42 #include "src/common/slurm_jobacct_gather.h"
43 
44 #define MAX_RECORD_FIELDS 100
45 #define BUFFER_SIZE 4096
46 
47 /* Return the current time limit of the specified job/step_id or NO_VAL if the
48  * information is not available */
_get_step_time(uint32_t job_id,uint32_t step_id)49 static uint32_t _get_step_time(uint32_t job_id, uint32_t step_id)
50 {
51 	uint32_t time_limit = NO_VAL;
52 	int i, rc;
53 	job_step_info_response_msg_t *resp;
54 
55 	rc = slurm_get_job_steps((time_t) 0, job_id, step_id, &resp, SHOW_ALL);
56 	if (rc == SLURM_SUCCESS) {
57 		for (i = 0; i < resp->job_step_count; i++) {
58 			if ((resp->job_steps[i].job_id != job_id) ||
59 			    (resp->job_steps[i].step_id != step_id))
60 				continue;	/* should not happen */
61 			time_limit = resp->job_steps[i].time_limit;
62 			break;
63 		}
64 		slurm_free_job_step_info_response_msg(resp);
65 	} else {
66 		error("Could not load state information for step %u.%u: %m",
67 		      job_id, step_id);
68 	}
69 
70 	return time_limit;
71 }
72 
_parse_comp_file(char * file,step_update_request_msg_t * step_msg)73 static int _parse_comp_file(
74 	char *file, step_update_request_msg_t *step_msg)
75 {
76 	int i;
77 	FILE *fd = fopen(file, "r");
78 	char line[BUFFER_SIZE];
79 	char *fptr;
80 	int version;
81 	char *update[MAX_RECORD_FIELDS+1];    /* End list with null entry and,
82 						 possibly, more data than we
83 						 expected */
84 
85 	enum {	UPDATE_STEP_VERSION,
86 		UPDATE_STEP_EXTRA,
87 		UPDATE_STEP_INBLOCKS,
88 		UPDATE_STEP_OUTBLOCKS,
89 		UPDATE_STEP_EXITCODE,
90 		UPDATE_STEP_CPU_ALLOC,
91 		UPDATE_STEP_START,
92 		UPDATE_STEP_END,
93 		UPDATE_STEP_USER_SEC,
94 		UPDATE_STEP_SYS_SEC,
95 		UPDATE_STEP_MAX_RSS,
96 		UPDATE_STEP_UID,
97 		UPDATE_STEP_STEPNAME,
98 		UPDATE_STEP_VER1_LENGTH
99 	};
100 
101 	if (fd == NULL) {
102 		perror(file);
103 		return SLURM_ERROR;
104 	}
105 
106 	if (!fgets(line, BUFFER_SIZE, fd)) {
107 		fprintf(stderr, "Empty step update completion file\n");
108 		(void) fclose(fd);
109 		return SLURM_ERROR;
110 	}
111 	(void) fclose(fd);
112 
113 	fptr = line;	/* break the record into NULL-terminated strings */
114 	for (i = 0; i < MAX_RECORD_FIELDS; i++) {
115 		update[i] = fptr;
116 		fptr = strstr(fptr, " ");
117 		if (fptr == NULL) {
118 			fptr = strstr(update[i], "\n");
119 			if (fptr)
120 				*fptr = 0;
121 			break;
122 		} else
123 			*fptr++ = 0;
124 	}
125 
126 	if (i < MAX_RECORD_FIELDS)
127 		i++;
128 	update[i] = 0;
129 
130 	version = atoi(update[UPDATE_STEP_VERSION]);
131 	switch (version) {
132 	case 1:
133 		if (i != UPDATE_STEP_VER1_LENGTH) {
134 			fprintf(stderr,
135 				"Bad step update completion file length\n");
136 			return SLURM_ERROR;
137 		}
138 		step_msg->jobacct = jobacctinfo_create(NULL);
139 		step_msg->exit_code = atoi(update[UPDATE_STEP_EXITCODE]);
140 		step_msg->start_time = atoi(update[UPDATE_STEP_START]);
141 		step_msg->end_time = atoi(update[UPDATE_STEP_END]);
142 		step_msg->jobacct->user_cpu_sec =
143 			atoi(update[UPDATE_STEP_USER_SEC]);
144 		step_msg->jobacct->sys_cpu_sec =
145 			atoi(update[UPDATE_STEP_SYS_SEC]);
146 		step_msg->jobacct->tres_usage_in_min[TRES_ARRAY_CPU] =
147 			step_msg->jobacct->user_cpu_sec
148 			+ step_msg->jobacct->sys_cpu_sec;
149 		step_msg->jobacct->tres_usage_in_max[TRES_ARRAY_MEM] =
150 			atoi(update[UPDATE_STEP_MAX_RSS]);
151 		step_msg->name =
152 			xstrdup(xbasename(update[UPDATE_STEP_STEPNAME]));
153 		break;
154 	default:
155 		fprintf(stderr, "Unsupported step update "
156 			"completion file version: %d\n",
157 			version);
158 		return SLURM_ERROR;
159 		break;
160 	}
161 
162 
163 	return SLURM_SUCCESS;
164 }
165 
166 /*
167  * scontrol_update_step - update the slurm step configuration per the supplied
168  *	arguments
169  * IN argc - count of arguments
170  * IN argv - list of arguments
171  * RET 0 if no slurm error, errno otherwise. parsing error prints
172  *			error message and returns 0
173  */
scontrol_update_step(int argc,char ** argv)174 extern int scontrol_update_step (int argc, char **argv)
175 {
176 	int i, update_cnt = 0;
177 	char *tag, *val;
178 	int taglen;
179 	step_update_request_msg_t step_msg;
180 
181 	slurm_init_update_step_msg (&step_msg);
182 
183 	for (i=0; i<argc; i++) {
184 		tag = argv[i];
185 		val = strchr(argv[i], '=');
186 		if (val) {
187 			taglen = val - argv[i];
188 			val++;
189 		} else {
190 			exit_code = 1;
191 			fprintf (stderr, "Invalid input: %s\n", argv[i]);
192 			fprintf (stderr, "Request aborted\n");
193 			return -1;
194 		}
195 
196 		if (xstrncasecmp(tag, "StepId", MAX(taglen, 4)) == 0) {
197 			char *end_ptr;
198 			step_msg.job_id = (uint32_t) strtol(val, &end_ptr, 10);
199 			if (end_ptr[0] == '.') {
200 				step_msg.step_id = (uint32_t)
201 					strtol(end_ptr+1, (char **) NULL, 10);
202 			} else if (end_ptr[0] != '\0') {
203 				exit_code = 1;
204 				fprintf (stderr, "Invalid StepID parameter: "
205 					 "%s\n", argv[i]);
206 				fprintf (stderr, "Request aborted\n");
207 				return 0;
208 			} /* else apply to all steps of this job_id */
209 		} else if (xstrncasecmp(tag, "TimeLimit", MAX(taglen, 2)) == 0) {
210 			bool incr, decr;
211 			uint32_t step_current_time, time_limit;
212 
213 			incr = (val[0] == '+');
214 			decr = (val[0] == '-');
215 			if (incr || decr)
216 				val++;
217 			time_limit = time_str2mins(val);
218 			if (time_limit == NO_VAL) {
219 				error("Invalid TimeLimit value");
220 				exit_code = 1;
221 				return 0;
222 			}
223 			if (incr || decr) {
224 				step_current_time = _get_step_time(
225 							step_msg.job_id,
226 							step_msg.step_id);
227 				if (step_current_time == NO_VAL) {
228 					exit_code = 1;
229 					return 0;
230 				}
231 				if (incr) {
232 					time_limit += step_current_time;
233 				} else if (time_limit > step_current_time) {
234 					error("TimeLimit decrement larger than"
235 					      " current time limit (%u > %u)",
236 					      time_limit, step_current_time);
237 					exit_code = 1;
238 					return 0;
239 				} else {
240 					time_limit = step_current_time -
241 						     time_limit;
242 				}
243 			}
244 			step_msg.time_limit = time_limit;
245 			update_cnt++;
246 		} else if (xstrncasecmp(tag, "CompFile", MAX(taglen, 2)) == 0) {
247 			if (_parse_comp_file(val, &step_msg)) {
248 				exit_code = 1;
249 				fprintf(stderr,
250 					"Bad completion file (%s) given\n"
251 					"Request aborted\n", val);
252 				return 0;
253 			}
254 			update_cnt++;
255 		} else {
256 			exit_code = 1;
257 			fprintf (stderr, "Update of this parameter is not "
258 				 "supported: %s\n", argv[i]);
259 			fprintf (stderr, "Request aborted\n");
260 			return 0;
261 		}
262 	}
263 
264 	if (update_cnt == 0) {
265 		exit_code = 1;
266 		fprintf (stderr, "No changes specified\n");
267 		return 0;
268 	}
269 
270 	if (slurm_update_step(&step_msg))
271 		return slurm_get_errno ();
272 	else
273 		return 0;
274 }
275