1 /*****************************************************************************\
2  *  as_mysql_rollup.c - functions for rolling up data for associations
3  *                   and machines from the as_mysql storage.
4  *****************************************************************************
5  *  Copyright (C) 2004-2007 The Regents of the University of California.
6  *  Copyright (C) 2008-2009 Lawrence Livermore National Security.
7  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8  *  Written by Danny Auble <da@llnl.gov>
9  *  CODE-OCEC-09-009. All rights reserved.
10  *
11  *  This file is part of Slurm, a resource management program.
12  *  For details, see <https://slurm.schedmd.com/>.
13  *  Please also read the included file: DISCLAIMER.
14  *
15  *  Slurm is free software; you can redistribute it and/or modify it under
16  *  the terms of the GNU General Public License as published by the Free
17  *  Software Foundation; either version 2 of the License, or (at your option)
18  *  any later version.
19  *
20  *  In addition, as a special exception, the copyright holders give permission
21  *  to link the code of portions of this program with the OpenSSL library under
22  *  certain conditions as described in each individual source file, and
23  *  distribute linked combinations including the two. You must obey the GNU
24  *  General Public License in all respects for all of the code used other than
25  *  OpenSSL. If you modify file(s) with this exception, you may extend this
26  *  exception to your version of the file(s), but you are not obligated to do
27  *  so. If you do not wish to do so, delete this exception statement from your
28  *  version.  If you delete this exception statement from all source files in
29  *  the program, then also delete it here.
30  *
31  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
32  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
34  *  details.
35  *
36  *  You should have received a copy of the GNU General Public License along
37  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
38  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
39 \*****************************************************************************/
40 
41 #include "as_mysql_rollup.h"
42 #include "as_mysql_archive.h"
43 #include "src/common/parse_time.h"
44 #include "src/common/slurm_time.h"
45 
46 enum {
47 	TIME_ALLOC,
48 	TIME_DOWN,
49 	TIME_PDOWN,
50 	TIME_RESV
51 };
52 
53 enum {
54 	ASSOC_TABLES,
55 	WCKEY_TABLES
56 };
57 
58 typedef struct {
59 	uint64_t count;
60 	uint32_t id;
61 	uint64_t time_alloc;
62 	uint64_t time_down;
63 	uint64_t time_idle;
64 	uint64_t time_over;
65 	uint64_t time_pd;
66 	uint64_t time_resv;
67 	uint64_t total_time;
68 } local_tres_usage_t;
69 
70 typedef struct {
71 	int id;
72 	List loc_tres;
73 } local_id_usage_t;
74 
75 typedef struct {
76 	time_t end;
77 	int id; /*only needed for reservations */
78 	List loc_tres;
79 	time_t start;
80 } local_cluster_usage_t;
81 
82 typedef struct {
83 	time_t end;
84 	uint32_t flags;
85 	int id;
86 	hostlist_t hl;
87 	List local_assocs; /* list of assocs to spread unused time
88 			      over of type local_id_usage_t */
89 	List loc_tres;
90 	time_t orig_start;
91 	time_t start;
92 	double unused_wall;
93 } local_resv_usage_t;
94 
_destroy_local_tres_usage(void * object)95 static void _destroy_local_tres_usage(void *object)
96 {
97 	local_tres_usage_t *a_usage = (local_tres_usage_t *)object;
98 	if (a_usage) {
99 		xfree(a_usage);
100 	}
101 }
102 
_destroy_local_id_usage(void * object)103 static void _destroy_local_id_usage(void *object)
104 {
105 	local_id_usage_t *a_usage = (local_id_usage_t *)object;
106 	if (a_usage) {
107 		FREE_NULL_LIST(a_usage->loc_tres);
108 		xfree(a_usage);
109 	}
110 }
111 
_destroy_local_cluster_usage(void * object)112 static void _destroy_local_cluster_usage(void *object)
113 {
114 	local_cluster_usage_t *c_usage = (local_cluster_usage_t *)object;
115 	if (c_usage) {
116 		FREE_NULL_LIST(c_usage->loc_tres);
117 		xfree(c_usage);
118 	}
119 }
120 
_destroy_local_resv_usage(void * object)121 static void _destroy_local_resv_usage(void *object)
122 {
123 	local_resv_usage_t *r_usage = (local_resv_usage_t *)object;
124 	if (r_usage) {
125 		FREE_NULL_HOSTLIST(r_usage->hl);
126 		FREE_NULL_LIST(r_usage->local_assocs);
127 		FREE_NULL_LIST(r_usage->loc_tres);
128 		xfree(r_usage);
129 	}
130 }
131 
_find_loc_tres(void * x,void * key)132 static int _find_loc_tres(void *x, void *key)
133 {
134 	local_tres_usage_t *loc_tres = (local_tres_usage_t *)x;
135 	uint32_t tres_id = *(uint32_t *)key;
136 
137 	if (loc_tres->id == tres_id)
138 		return 1;
139 	return 0;
140 }
141 
_find_id_usage(void * x,void * key)142 static int _find_id_usage(void *x, void *key)
143 {
144 	local_id_usage_t *loc = (local_id_usage_t *)x;
145 	uint32_t id = *(uint32_t *)key;
146 
147 	if (loc->id == id)
148 		return 1;
149 	return 0;
150 }
151 
_remove_job_tres_time_from_cluster(List c_tres,List j_tres,int seconds)152 static void _remove_job_tres_time_from_cluster(List c_tres, List j_tres,
153 					       int seconds)
154 {
155 	ListIterator c_itr;
156 	local_tres_usage_t *loc_c_tres, *loc_j_tres;
157 	uint64_t time;
158 
159 	if ((seconds <= 0) || !c_tres || !j_tres ||
160 	    !list_count(c_tres) || !list_count(j_tres))
161 		return;
162 
163 	c_itr = list_iterator_create(c_tres);
164 	while ((loc_c_tres = list_next(c_itr))) {
165 		if (!(loc_j_tres = list_find_first(
166 			      j_tres, _find_loc_tres, &loc_c_tres->id)))
167 			continue;
168 		time = seconds * loc_j_tres->count;
169 
170 		if (time >= loc_c_tres->total_time)
171 			loc_c_tres->total_time = 0;
172 		else
173 			loc_c_tres->total_time -= time;
174 	}
175 	list_iterator_destroy(c_itr);
176 }
177 
178 
_add_time_tres(List tres_list,int type,uint32_t id,uint64_t time,bool times_count)179 static local_tres_usage_t *_add_time_tres(List tres_list, int type, uint32_t id,
180 					  uint64_t time, bool times_count)
181 {
182 	local_tres_usage_t *loc_tres;
183 
184 	/* Energy TRES could have a NO_VAL64, we want to skip those as it is the
185 	 * same as a 0 since nothing was gathered.
186 	 */
187 	if (!time || (time == NO_VAL64))
188 		return NULL;
189 
190 	loc_tres = list_find_first(tres_list, _find_loc_tres, &id);
191 
192 	if (!loc_tres) {
193 		if (times_count)
194 			return NULL;
195 		loc_tres = xmalloc(sizeof(local_tres_usage_t));
196 		loc_tres->id = id;
197 		list_append(tres_list, loc_tres);
198 	}
199 
200 	if (times_count) {
201 		if (!loc_tres->count)
202 			return NULL;
203 		time *= loc_tres->count;
204 	}
205 
206 	switch (type) {
207 	case TIME_ALLOC:
208 		loc_tres->time_alloc += time;
209 		break;
210 	case TIME_DOWN:
211 		loc_tres->time_down += time;
212 		break;
213 	case TIME_PDOWN:
214 		loc_tres->time_pd += time;
215 		break;
216 	case TIME_RESV:
217 		loc_tres->time_resv += time;
218 		break;
219 	default:
220 		error("_add_time_tres: unknown type %d given", type);
221 		xassert(0);
222 		break;
223 	}
224 
225 	return loc_tres;
226 }
227 
_add_time_tres_list(List tres_list_out,List tres_list_in,int type,uint64_t time_in,bool times_count)228 static void _add_time_tres_list(List tres_list_out, List tres_list_in, int type,
229 				uint64_t time_in, bool times_count)
230 {
231 	ListIterator itr;
232 	local_tres_usage_t *loc_tres;
233 
234 	xassert(tres_list_in);
235 	xassert(tres_list_out);
236 
237 	itr = list_iterator_create(tres_list_in);
238 	while ((loc_tres = list_next(itr)))
239 		_add_time_tres(tres_list_out, type,
240 			       loc_tres->id,
241 			       time_in ? time_in : loc_tres->total_time,
242 			       times_count);
243 	list_iterator_destroy(itr);
244 }
245 
246 /*
247  * Job usage is a ratio of its tres to the reservation's tres:
248  * Unused wall = unused wall - job_seconds * job_tres / resv_tres
249  */
_update_unused_wall(local_resv_usage_t * r_usage,List job_tres,int job_seconds)250 static int _update_unused_wall(local_resv_usage_t *r_usage, List job_tres,
251 			       int job_seconds)
252 {
253 	ListIterator resv_itr;
254 	local_tres_usage_t *loc_tres;
255 	uint32_t resv_tres_id;
256 	uint64_t resv_tres_count;
257 	double tres_ratio = 0.0;
258 
259 	/* Get TRES counts. Make sure the TRES types match. */
260 	resv_itr = list_iterator_create(r_usage->loc_tres);
261 	while ((loc_tres = list_next(resv_itr))) {
262 		/* Avoid dividing by zero. */
263 		if (!loc_tres->count)
264 			continue;
265 		resv_tres_id = loc_tres->id;
266 		resv_tres_count = loc_tres->count;
267 		if ((loc_tres = list_find_first(job_tres,
268 						_find_loc_tres,
269 						&resv_tres_id))) {
270 			tres_ratio = (double)loc_tres->count /
271 				(double)resv_tres_count;
272 			break;
273 		}
274 	}
275 	list_iterator_destroy(resv_itr);
276 
277 	/*
278 	 * Here we are converting TRES seconds to wall seconds.  This is needed
279 	 * to determine how much time is actually idle in the reservation.
280 	 */
281 	r_usage->unused_wall -=	(double)job_seconds * tres_ratio;
282 
283 	if (r_usage->unused_wall < 0) {
284 		/*
285 		 * With a Flex reservation you can easily have more time than is
286 		 * possible.  Just print this debug3 warning if it happens.
287 		 */
288 		debug3("WARNING: Unused wall is less than zero; this should never happen outside a Flex reservation. Setting it to zero for resv id = %d, start = %ld.",
289 		       r_usage->id, r_usage->orig_start);
290 		r_usage->unused_wall = 0;
291 	}
292 	return SLURM_SUCCESS;
293 }
294 
_add_job_alloc_time_to_cluster(List c_tres_list,List j_tres)295 static void _add_job_alloc_time_to_cluster(List c_tres_list, List j_tres)
296 {
297 	ListIterator c_itr = list_iterator_create(c_tres_list);
298 	local_tres_usage_t *loc_c_tres, *loc_j_tres;
299 
300 	while ((loc_c_tres = list_next(c_itr))) {
301 		if (!(loc_j_tres = list_find_first(
302 			      j_tres, _find_loc_tres, &loc_c_tres->id)))
303 			continue;
304 		loc_c_tres->time_alloc += loc_j_tres->time_alloc;
305 	}
306 	list_iterator_destroy(c_itr);
307 }
308 
_setup_cluster_tres(List tres_list,uint32_t id,uint64_t count,int seconds)309 static void _setup_cluster_tres(List tres_list, uint32_t id,
310 				uint64_t count, int seconds)
311 {
312 	local_tres_usage_t *loc_tres =
313 		list_find_first(tres_list, _find_loc_tres, &id);
314 
315 	if (!loc_tres) {
316 		loc_tres = xmalloc(sizeof(local_tres_usage_t));
317 		loc_tres->id = id;
318 		list_append(tres_list, loc_tres);
319 	}
320 
321 	loc_tres->count = count;
322 	loc_tres->total_time += seconds * loc_tres->count;
323 }
324 
_add_tres_2_list(List tres_list,char * tres_str,int seconds)325 static void _add_tres_2_list(List tres_list, char *tres_str, int seconds)
326 {
327 	char *tmp_str = tres_str;
328 	int id;
329 	uint64_t count;
330 
331 	xassert(tres_list);
332 
333 	if (!tres_str || !tres_str[0])
334 		return;
335 
336 	while (tmp_str) {
337 		id = atoi(tmp_str);
338 		if (id < 1) {
339 			error("_add_tres_2_list: no id "
340 			      "found at %s instead", tmp_str);
341 			break;
342 		}
343 
344 		/* We don't run rollup on a node basis
345 		 * because they are shared resources on
346 		 * many systems so it will almost always
347 		 * have over committed resources.
348 		 */
349 		if (id != TRES_NODE) {
350 			if (!(tmp_str = strchr(tmp_str, '='))) {
351 				error("_add_tres_2_list: no value found");
352 				xassert(0);
353 				break;
354 			}
355 			count = slurm_atoull(++tmp_str);
356 			_setup_cluster_tres(tres_list, id, count, seconds);
357 		}
358 
359 		if (!(tmp_str = strchr(tmp_str, ',')))
360 			break;
361 		tmp_str++;
362 	}
363 
364 	return;
365 }
366 
_add_job_alloc_time_to_assoc(List a_tres_list,List j_tres_list)367 static void _add_job_alloc_time_to_assoc(List a_tres_list, List j_tres_list)
368 {
369 	ListIterator itr;
370 	local_tres_usage_t *loc_a_tres, *loc_j_tres;
371 
372 	/*
373 	 * NOTE: you can not use list_pop, or list_push
374 	 * anywhere either, since as_mysql is
375 	 * exporting something of the same type as a macro,
376 	 * which messes everything up
377 	 * (my_list.h is the bad boy).
378 	 */
379 	itr = list_iterator_create(j_tres_list);
380 	while ((loc_j_tres = list_next(itr))) {
381 		if (!(loc_a_tres = list_find_first(
382 			      a_tres_list, _find_loc_tres, &loc_j_tres->id))) {
383 			/*
384 			 * New TRES we haven't seen before in this association
385 			 * just transfer it over.
386 			 */
387 			list_append(a_tres_list, loc_j_tres);
388 			list_remove(itr);
389 			continue;
390 		}
391 		loc_a_tres->time_alloc += loc_j_tres->time_alloc;
392 		/*
393 		 * We are freeing this list right after this might as well
394 		 * delete it now.
395 		 */
396 		list_delete_item(itr);
397 	}
398 	list_iterator_destroy(itr);
399 }
400 
401 /* This will destroy the *loc_tres given after it is transfered */
_transfer_loc_tres(List * loc_tres,local_id_usage_t * usage)402 static void _transfer_loc_tres(List *loc_tres, local_id_usage_t *usage)
403 {
404 	if (!usage || !*loc_tres) {
405 		FREE_NULL_LIST(*loc_tres);
406 		return;
407 	}
408 
409 	if (!usage->loc_tres) {
410 		usage->loc_tres = *loc_tres;
411 		*loc_tres = NULL;
412 	} else {
413 		_add_job_alloc_time_to_assoc(usage->loc_tres, *loc_tres);
414 		FREE_NULL_LIST(*loc_tres);
415 	}
416 }
417 
_add_tres_time_2_list(List tres_list,char * tres_str,int type,int seconds,int suspend_seconds,bool times_count)418 static void _add_tres_time_2_list(List tres_list, char *tres_str,
419 				  int type, int seconds, int suspend_seconds,
420 				  bool times_count)
421 {
422 	char *tmp_str = tres_str;
423 	int id;
424 	uint64_t time, count;
425 	local_tres_usage_t *loc_tres;
426 
427 	xassert(tres_list);
428 
429 	if (!tres_str || !tres_str[0])
430 		return;
431 
432 	while (tmp_str) {
433 		int loc_seconds = seconds;
434 
435 		id = atoi(tmp_str);
436 		if (id < 1) {
437 			error("_add_tres_time_2_list: no id "
438 			      "found at %s", tmp_str);
439 			break;
440 		}
441 		if (!(tmp_str = strchr(tmp_str, '='))) {
442 			error("_add_tres_time_2_list: no value found for "
443 			      "id %d '%s'", id, tres_str);
444 			xassert(0);
445 			break;
446 		}
447 
448 		/* Take away suspended time from TRES that are idle when the
449 		 * job was suspended, currently only CPU's fill that bill.
450 		 */
451 		if (suspend_seconds && (id == TRES_CPU)) {
452 			loc_seconds -= suspend_seconds;
453 			if (loc_seconds < 1)
454 				loc_seconds = 0;
455 		}
456 
457 		time = count = slurm_atoull(++tmp_str);
458 		/* ENERGY is already totalled for the entire job so don't
459 		 * multiple with time.
460 		 */
461 		if (id != TRES_ENERGY)
462 			time *= loc_seconds;
463 
464 		loc_tres = _add_time_tres(tres_list, type, id,
465 					  time, times_count);
466 
467 		if (loc_tres && !loc_tres->count)
468 			loc_tres->count = count;
469 
470 		if (!(tmp_str = strchr(tmp_str, ',')))
471 			break;
472 		tmp_str++;
473 	}
474 
475 	return;
476 }
477 
_process_purge(mysql_conn_t * mysql_conn,char * cluster_name,uint16_t archive_data,uint32_t purge_period)478 static int _process_purge(mysql_conn_t *mysql_conn,
479 			  char *cluster_name,
480 			  uint16_t archive_data,
481 			  uint32_t purge_period)
482 {
483 	int rc = SLURM_SUCCESS;
484 	slurmdb_archive_cond_t arch_cond;
485 	slurmdb_job_cond_t job_cond;
486 
487 	/* if we didn't ask for archive data return here and don't do
488 	   anything extra just rollup */
489 
490 	if (!archive_data)
491 		return SLURM_SUCCESS;
492 
493 	if (!slurmdbd_conf)
494 		return SLURM_SUCCESS;
495 
496 	memset(&job_cond, 0, sizeof(job_cond));
497 	memset(&arch_cond, 0, sizeof(arch_cond));
498 	arch_cond.archive_dir = slurmdbd_conf->archive_dir;
499 	arch_cond.archive_script = slurmdbd_conf->archive_script;
500 
501 	if (purge_period & slurmdbd_conf->purge_event)
502 		arch_cond.purge_event = slurmdbd_conf->purge_event;
503 	else
504 		arch_cond.purge_event = NO_VAL;
505 	if (purge_period & slurmdbd_conf->purge_job)
506 		arch_cond.purge_job = slurmdbd_conf->purge_job;
507 	else
508 		arch_cond.purge_job = NO_VAL;
509 
510 	if (purge_period & slurmdbd_conf->purge_resv)
511 		arch_cond.purge_resv = slurmdbd_conf->purge_resv;
512 	else
513 		arch_cond.purge_resv = NO_VAL;
514 
515 	if (purge_period & slurmdbd_conf->purge_step)
516 		arch_cond.purge_step = slurmdbd_conf->purge_step;
517 	else
518 		arch_cond.purge_step = NO_VAL;
519 	if (purge_period & slurmdbd_conf->purge_suspend)
520 		arch_cond.purge_suspend = slurmdbd_conf->purge_suspend;
521 	else
522 		arch_cond.purge_suspend = NO_VAL;
523 	if (purge_period & slurmdbd_conf->purge_txn)
524 		arch_cond.purge_txn = slurmdbd_conf->purge_txn;
525 	else
526 		arch_cond.purge_txn = NO_VAL;
527 	if (purge_period & slurmdbd_conf->purge_usage)
528 		arch_cond.purge_usage = slurmdbd_conf->purge_usage;
529 	else
530 		arch_cond.purge_usage = NO_VAL;
531 
532 	job_cond.cluster_list = list_create(NULL);
533 	list_append(job_cond.cluster_list, cluster_name);
534 
535 	arch_cond.job_cond = &job_cond;
536 	rc = as_mysql_jobacct_process_archive(mysql_conn, &arch_cond);
537 	FREE_NULL_LIST(job_cond.cluster_list);
538 
539 	return rc;
540 }
541 
_setup_cluster_tres_usage(mysql_conn_t * mysql_conn,char * cluster_name,time_t curr_start,time_t curr_end,time_t now,time_t use_start,local_tres_usage_t * loc_tres,char ** query)542 static void _setup_cluster_tres_usage(mysql_conn_t *mysql_conn,
543 				      char *cluster_name,
544 				      time_t curr_start, time_t curr_end,
545 				      time_t now, time_t use_start,
546 				      local_tres_usage_t *loc_tres,
547 				      char **query)
548 {
549 	char start_char[20], end_char[20];
550 	uint64_t total_used;
551 
552 	if (!loc_tres)
553 		return;
554 
555 	/* Now put the lists into the usage tables */
556 
557 	/* sanity check to make sure we don't have more
558 	   allocated cpus than possible. */
559 	if (loc_tres->total_time
560 	    && (loc_tres->total_time < loc_tres->time_alloc)) {
561 		slurm_make_time_str(&curr_start, start_char,
562 				    sizeof(start_char));
563 		slurm_make_time_str(&curr_end, end_char,
564 				    sizeof(end_char));
565 		error("We have more allocated time than is "
566 		      "possible (%"PRIu64" > %"PRIu64") for "
567 		      "cluster %s(%"PRIu64") from %s - %s tres %u",
568 		      loc_tres->time_alloc, loc_tres->total_time,
569 		      cluster_name, loc_tres->count,
570 		      start_char, end_char, loc_tres->id);
571 		loc_tres->time_alloc = loc_tres->total_time;
572 	}
573 
574 	total_used = loc_tres->time_alloc +
575 		loc_tres->time_down + loc_tres->time_pd;
576 
577 	/* Make sure the total time we care about
578 	   doesn't go over the limit */
579 	if (loc_tres->total_time && (loc_tres->total_time < total_used)) {
580 		int64_t overtime;
581 
582 		slurm_make_time_str(&curr_start, start_char,
583 				    sizeof(start_char));
584 		slurm_make_time_str(&curr_end, end_char,
585 				    sizeof(end_char));
586 		error("We have more time than is "
587 		      "possible (%"PRIu64"+%"PRIu64"+%"
588 		      PRIu64")(%"PRIu64") > %"PRIu64" for "
589 		      "cluster %s(%"PRIu64") from %s - %s tres %u",
590 		      loc_tres->time_alloc, loc_tres->time_down,
591 		      loc_tres->time_pd, total_used,
592 		      loc_tres->total_time,
593 		      cluster_name, loc_tres->count,
594 		      start_char, end_char, loc_tres->id);
595 
596 		/* First figure out how much actual down time
597 		   we have and then how much
598 		   planned down time we have. */
599 		overtime = (int64_t)(loc_tres->total_time -
600 				     (loc_tres->time_alloc +
601 				      loc_tres->time_down));
602 		if (overtime < 0) {
603 			loc_tres->time_down += overtime;
604 			if ((int64_t)loc_tres->time_down < 0)
605 				loc_tres->time_down = 0;
606 		}
607 
608 		overtime = (int64_t)(loc_tres->total_time -
609 				     (loc_tres->time_alloc +
610 				      loc_tres->time_down +
611 				      loc_tres->time_pd));
612 		if (overtime < 0) {
613 			loc_tres->time_pd += overtime;
614 			if ((int64_t)loc_tres->time_pd < 0)
615 				loc_tres->time_pd = 0;
616 		}
617 
618 		total_used = loc_tres->time_alloc +
619 			loc_tres->time_down + loc_tres->time_pd;
620 		/* info("We now have (%"PRIu64"+%"PRIu64"+" */
621 		/*      "%"PRIu64")(%"PRIu64") " */
622 		/*       "?= %"PRIu64"", */
623 		/*       loc_tres->time_alloc, loc_tres->time_down, */
624 		/*       loc_tres->time_pd, total_used, */
625 		/*       loc_tres->total_time); */
626 	}
627 	/* info("Cluster %s now has (%"PRIu64"+%"PRIu64"+" */
628 	/*      "%"PRIu64")(%"PRIu64") ?= %"PRIu64"", */
629 	/*      cluster_name, */
630 	/*      c_usage->a_cpu, c_usage->d_cpu, */
631 	/*      c_usage->pd_cpu, total_used, */
632 	/*      c_usage->total_time); */
633 
634 	loc_tres->time_idle = loc_tres->total_time -
635 		total_used - loc_tres->time_resv;
636 	/* sanity check just to make sure we have a
637 	 * legitimate time after we calulated
638 	 * idle/reserved time put extra in the over
639 	 * commit field
640 	 */
641 	/* info("%s got idle of %lld", loc_tres->name, */
642 	/*      (int64_t)loc_tres->time_idle); */
643 	if ((int64_t)loc_tres->time_idle < 0) {
644 		/* info("got %d %d %d", loc_tres->time_resv, */
645 		/*      loc_tres->time_idle, loc_tres->time_over); */
646 		loc_tres->time_resv += (int64_t)loc_tres->time_idle;
647 		loc_tres->time_over -= (int64_t)loc_tres->time_idle;
648 		loc_tres->time_idle = 0;
649 		if ((int64_t)loc_tres->time_resv < 0)
650 			loc_tres->time_resv = 0;
651 	}
652 
653 	/* info("cluster %s(%u) down %"PRIu64" alloc %"PRIu64" " */
654 	/*      "resv %"PRIu64" idle %"PRIu64" over %"PRIu64" " */
655 	/*      "total= %"PRIu64" ?= %"PRIu64" from %s", */
656 	/*      cluster_name, */
657 	/*      loc_tres->count, loc_tres->time_down, */
658 	/*      loc_tres->time_alloc, */
659 	/*      loc_tres->time_resv, loc_tres->time_idle, */
660 	/*      loc_tres->time_over, */
661 	/*      loc_tres->time_down + loc_tres->time_alloc + */
662 	/*      loc_tres->time_resv + loc_tres->time_idle, */
663 	/*      loc_tres->total_time, */
664 	/*      slurm_ctime2(&loc_tres->start)); */
665 	/* info("to %s", slurm_ctime2(&loc_tres->end)); */
666 	if (*query)
667 		xstrfmtcat(*query, ", (%ld, %ld, %ld, %u, %"PRIu64", "
668 			   "%"PRIu64", %"PRIu64", %"PRIu64", "
669 			   "%"PRIu64", %"PRIu64", %"PRIu64")",
670 			   now, now, use_start, loc_tres->id,
671 			   loc_tres->count,
672 			   loc_tres->time_alloc,
673 			   loc_tres->time_down,
674 			   loc_tres->time_pd,
675 			   loc_tres->time_idle,
676 			   loc_tres->time_over,
677 			   loc_tres->time_resv);
678 	else
679 		xstrfmtcat(*query, "insert into \"%s_%s\" "
680 			   "(creation_time, mod_time, "
681 			   "time_start, id_tres, count, "
682 			   "alloc_secs, down_secs, pdown_secs, "
683 			   "idle_secs, over_secs, resv_secs) "
684 			   "values (%ld, %ld, %ld, %u, %"PRIu64", "
685 			   "%"PRIu64", %"PRIu64", %"PRIu64", "
686 			   "%"PRIu64", %"PRIu64", %"PRIu64")",
687 			   cluster_name, cluster_hour_table,
688 			   now, now,
689 			   use_start, loc_tres->id,
690 			   loc_tres->count,
691 			   loc_tres->time_alloc,
692 			   loc_tres->time_down,
693 			   loc_tres->time_pd,
694 			   loc_tres->time_idle,
695 			   loc_tres->time_over,
696 			   loc_tres->time_resv);
697 
698 	return;
699 }
700 
_process_cluster_usage(mysql_conn_t * mysql_conn,char * cluster_name,time_t curr_start,time_t curr_end,time_t now,local_cluster_usage_t * c_usage)701 static int _process_cluster_usage(mysql_conn_t *mysql_conn,
702 				  char *cluster_name,
703 				  time_t curr_start, time_t curr_end,
704 				  time_t now, local_cluster_usage_t *c_usage)
705 {
706 	int rc = SLURM_SUCCESS;
707 	char *query = NULL;
708 	ListIterator itr;
709 	local_tres_usage_t *loc_tres;
710 
711 	if (!c_usage)
712 		return rc;
713 	/* Now put the lists into the usage tables */
714 
715 	xassert(c_usage->loc_tres);
716 	itr = list_iterator_create(c_usage->loc_tres);
717 	while ((loc_tres = list_next(itr))) {
718 		_setup_cluster_tres_usage(mysql_conn, cluster_name,
719 					  curr_start, curr_end, now,
720 					  c_usage->start, loc_tres, &query);
721 	}
722 	list_iterator_destroy(itr);
723 
724 	if (!query)
725 		return rc;
726 
727 	xstrfmtcat(query,
728 		   " on duplicate key update "
729 		   "mod_time=%ld, count=VALUES(count), "
730 		   "alloc_secs=VALUES(alloc_secs), "
731 		   "down_secs=VALUES(down_secs), "
732 		   "pdown_secs=VALUES(pdown_secs), "
733 		   "idle_secs=VALUES(idle_secs), "
734 		   "over_secs=VALUES(over_secs), "
735 		   "resv_secs=VALUES(resv_secs)",
736 		   now);
737 
738 	/* Spacing out the inserts here instead of doing them
739 	   all at once in the end proves to be faster.  Just FYI
740 	   so we don't go testing again and again.
741 	*/
742 	if (debug_flags & DEBUG_FLAG_DB_USAGE)
743 		DB_DEBUG(mysql_conn->conn, "query\n%s", query);
744 	rc = mysql_db_query(mysql_conn, query);
745 	xfree(query);
746 	if (rc != SLURM_SUCCESS)
747 		error("Couldn't add cluster hour rollup");
748 
749 	return rc;
750 }
751 
_create_id_usage_insert(char * cluster_name,int type,time_t curr_start,time_t now,local_id_usage_t * id_usage,char ** query)752 static void _create_id_usage_insert(char *cluster_name, int type,
753 				    time_t curr_start, time_t now,
754 				    local_id_usage_t *id_usage,
755 				    char **query)
756 {
757 	local_tres_usage_t *loc_tres;
758 	ListIterator itr;
759 	bool first;
760 	char *table = NULL, *id_name = NULL;
761 
762 	xassert(query);
763 
764 	switch (type) {
765 	case ASSOC_TABLES:
766 		id_name = "id_assoc";
767 		table = assoc_hour_table;
768 		break;
769 	case WCKEY_TABLES:
770 		id_name = "id_wckey";
771 		table = wckey_hour_table;
772 		break;
773 	default:
774 		error("_create_id_usage_insert: unknown type %d", type);
775 		return;
776 		break;
777 	}
778 
779 	if (!id_usage->loc_tres || !list_count(id_usage->loc_tres)) {
780 		error("%s %d doesn't have any tres", id_name, id_usage->id);
781 		return;
782 	}
783 
784 	first = 1;
785 	itr = list_iterator_create(id_usage->loc_tres);
786 	while ((loc_tres = list_next(itr))) {
787 		if (!first) {
788 			xstrfmtcat(*query,
789 				   ", (%ld, %ld, %u, %ld, %u, %"PRIu64")",
790 				   now, now,
791 				   id_usage->id, curr_start, loc_tres->id,
792 				   loc_tres->time_alloc);
793 		} else {
794 			xstrfmtcat(*query,
795 				   "insert into \"%s_%s\" "
796 				   "(creation_time, mod_time, id, "
797 				   "time_start, id_tres, alloc_secs) "
798 				   "values (%ld, %ld, %u, %ld, %u, %"PRIu64")",
799 				   cluster_name, table, now, now,
800 				   id_usage->id, curr_start, loc_tres->id,
801 				   loc_tres->time_alloc);
802 			first = 0;
803 		}
804 	}
805 	list_iterator_destroy(itr);
806 	xstrfmtcat(*query,
807 		   " on duplicate key update mod_time=%ld, "
808 		   "alloc_secs=VALUES(alloc_secs);", now);
809 }
810 
_add_resv_usage_to_cluster(void * object,void * arg)811 static int _add_resv_usage_to_cluster(void *object, void *arg)
812 {
813 	local_resv_usage_t *r_usage = (local_resv_usage_t *)object;
814 	local_cluster_usage_t *c_usage = (local_cluster_usage_t *)arg;
815 
816 	xassert(c_usage);
817 
818 	/*
819 	 * Only record time for the clusters that have
820 	 * registered, or if a reservation has the IGNORE_JOBS
821 	 * flag we don't have an easy way to distinguish the
822 	 * cpus a job not running in the reservation, but on
823 	 * it's cpus.
824 	 * We still need them for figuring out unused wall time,
825 	 * but for cluster utilization we will just ignore them.
826 	 */
827 	if (r_usage->flags & RESERVE_FLAG_IGN_JOBS)
828 		return SLURM_SUCCESS;
829 
830 	/*
831 	 * Since this reservation was added to the
832 	 * cluster and only certain people could run
833 	 * there we will use this as allocated time on
834 	 * the system.  If the reservation was a
835 	 * maintenance then we add the time to planned
836 	 * down time.
837 	 */
838 
839 	_add_time_tres_list(c_usage->loc_tres,
840 			    r_usage->loc_tres,
841 			    (r_usage->flags & RESERVE_FLAG_MAINT) ?
842 			    TIME_PDOWN : TIME_ALLOC, 0, 0);
843 
844 	/* slurm_make_time_str(&r_usage->start, start_char, */
845 	/* 		    sizeof(start_char)); */
846 	/* slurm_make_time_str(&r_usage->end, end_char, */
847 	/* 		    sizeof(end_char)); */
848 	/* info("adding this much %lld to cluster %s " */
849 	/*      "%d %d %s - %s", */
850 	/*      r_usage->total_time, c_usage->name, */
851 	/*      (row_flags & RESERVE_FLAG_MAINT),  */
852 	/*      r_usage->id, start_char, end_char); */
853 
854 	return SLURM_SUCCESS;
855 }
856 
_setup_cluster_usage(mysql_conn_t * mysql_conn,char * cluster_name,time_t curr_start,time_t curr_end,List resv_usage_list,List cluster_down_list)857 static local_cluster_usage_t *_setup_cluster_usage(mysql_conn_t *mysql_conn,
858 						   char *cluster_name,
859 						   time_t curr_start,
860 						   time_t curr_end,
861 						   List resv_usage_list,
862 						   List cluster_down_list)
863 {
864 	local_cluster_usage_t *c_usage = NULL;
865 	char *query = NULL;
866 	MYSQL_RES *result = NULL;
867 	MYSQL_ROW row;
868 	int i = 0;
869 	ListIterator d_itr = NULL;
870 	local_cluster_usage_t *loc_c_usage;
871 
872 	char *event_req_inx[] = {
873 		"node_name",
874 		"time_start",
875 		"time_end",
876 		"state",
877 		"tres",
878 	};
879 	char *event_str = NULL;
880 	enum {
881 		EVENT_REQ_NAME,
882 		EVENT_REQ_START,
883 		EVENT_REQ_END,
884 		EVENT_REQ_STATE,
885 		EVENT_REQ_TRES,
886 		EVENT_REQ_COUNT
887 	};
888 
889 	xstrfmtcat(event_str, "%s", event_req_inx[i]);
890 	for(i=1; i<EVENT_REQ_COUNT; i++) {
891 		xstrfmtcat(event_str, ", %s", event_req_inx[i]);
892 	}
893 
894 	/* first get the events during this time.  All that is
895 	 * except things with the maintainance flag set in the
896 	 * state.  We handle those later with the reservations.
897 	 */
898 	query = xstrdup_printf("select %s from \"%s_%s\" where "
899 			       "!(state & %d) && (time_start < %ld "
900 			       "&& (time_end >= %ld "
901 			       "|| time_end = 0)) "
902 			       "order by node_name, time_start",
903 			       event_str, cluster_name, event_table,
904 			       NODE_STATE_MAINT,
905 			       curr_end, curr_start);
906 	xfree(event_str);
907 
908 	if (debug_flags & DEBUG_FLAG_DB_USAGE)
909 		DB_DEBUG(mysql_conn->conn, "query\n%s", query);
910 	if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
911 		xfree(query);
912 		return NULL;
913 	}
914 
915 	xfree(query);
916 
917 	d_itr = list_iterator_create(cluster_down_list);
918 	while ((row = mysql_fetch_row(result))) {
919 		time_t row_start = slurm_atoul(row[EVENT_REQ_START]);
920 		time_t row_end = slurm_atoul(row[EVENT_REQ_END]);
921 		uint16_t state = slurm_atoul(row[EVENT_REQ_STATE]);
922 		time_t local_start, local_end;
923 		int seconds;
924 
925 		if (row_start < curr_start)
926 			row_start = curr_start;
927 
928 		if (!row_end || row_end > curr_end)
929 			row_end = curr_end;
930 
931 		/* Don't worry about it if the time is less
932 		 * than 1 second.
933 		 */
934 		if ((seconds = (row_end - row_start)) < 1)
935 			continue;
936 
937 		/* this means we are a cluster registration
938 		   entry */
939 		if (!row[EVENT_REQ_NAME][0]) {
940 			local_cluster_usage_t *loc_c_usage;
941 
942 			/* if the cpu count changes we will
943 			 * only care about the last cpu count but
944 			 * we will keep a total of the time for
945 			 * all cpus to get the correct cpu time
946 			 * for the entire period.
947 			 */
948 
949 			if (state || !c_usage) {
950 				loc_c_usage = xmalloc(
951 					sizeof(local_cluster_usage_t));
952 				loc_c_usage->start = row_start;
953 				loc_c_usage->loc_tres =
954 					list_create(_destroy_local_tres_usage);
955 				/* If this has a state it
956 				   means the slurmctld went
957 				   down and we should put this
958 				   on the list and remove any
959 				   jobs from this time that
960 				   were running later.
961 				*/
962 				if (state)
963 					list_append(cluster_down_list,
964 						    loc_c_usage);
965 				else
966 					c_usage = loc_c_usage;
967 			} else
968 				loc_c_usage = c_usage;
969 
970 			loc_c_usage->end = row_end;
971 
972 			_add_tres_2_list(loc_c_usage->loc_tres,
973 					 row[EVENT_REQ_TRES], seconds);
974 
975 			continue;
976 		}
977 
978 		/*
979 		 * Only record down time for the cluster we
980 		 * are looking for.  If it was during this
981 		 * time period we would already have it.
982 		 */
983 		if (!c_usage)
984 			continue;
985 
986 		local_start = row_start;
987 		local_end = row_end;
988 
989 		if (local_start < c_usage->start)
990 			local_start = c_usage->start;
991 		if (local_end > c_usage->end)
992 			local_end = c_usage->end;
993 
994 		/* Don't worry about it if the time is less than 1 second. */
995 		if ((seconds = (local_end - local_start)) < 1)
996 			continue;
997 
998 		_add_tres_time_2_list(c_usage->loc_tres,
999 				      row[EVENT_REQ_TRES],
1000 				      TIME_DOWN,
1001 				      seconds, 0, 0);
1002 
1003 		/*
1004 		 * Now remove this time if there was a
1005 		 * disconnected slurmctld during the down time.
1006 		 */
1007 		list_iterator_reset(d_itr);
1008 		while ((loc_c_usage = list_next(d_itr))) {
1009 			time_t temp_end = row_end;
1010 			time_t temp_start = row_start;
1011 			if (loc_c_usage->start > temp_start)
1012 				temp_start = loc_c_usage->start;
1013 			if (loc_c_usage->end < temp_end)
1014 				temp_end = loc_c_usage->end;
1015 			seconds = (temp_end - temp_start);
1016 			if (seconds < 1)
1017 				continue;
1018 
1019 			_remove_job_tres_time_from_cluster(
1020 				loc_c_usage->loc_tres,
1021 				c_usage->loc_tres, seconds);
1022 			/* info("Node %s was down for " */
1023 			/*      "%d seconds while " */
1024 			/*      "cluster %s's slurmctld " */
1025 			/*      "wasn't responding", */
1026 			/*      row[EVENT_REQ_NAME], */
1027 			/*      seconds, cluster_name); */
1028 		}
1029 	}
1030 	mysql_free_result(result);
1031 
1032 	list_iterator_destroy(d_itr);
1033 
1034 	if (c_usage)
1035 		(void)list_for_each(resv_usage_list,
1036 				    _add_resv_usage_to_cluster,
1037 				    c_usage);
1038 	return c_usage;
1039 }
1040 
_setup_resv_usage(mysql_conn_t * mysql_conn,char * cluster_name,time_t curr_start,time_t curr_end,List resv_usage_list,int dims)1041 extern int _setup_resv_usage(mysql_conn_t *mysql_conn,
1042 			     char *cluster_name,
1043 			     time_t curr_start,
1044 			     time_t curr_end,
1045 			     List resv_usage_list,
1046 			     int dims)
1047 {
1048 	MYSQL_RES *result = NULL;
1049 	MYSQL_ROW row;
1050 	int i;
1051 	char *query;
1052 	char *resv_str = NULL;
1053 	local_resv_usage_t *r_usage = NULL;
1054 	char *resv_req_inx[] = {
1055 		"id_resv",
1056 		"assoclist",
1057 		"flags",
1058 		"nodelist",
1059 		"tres",
1060 		"time_start",
1061 		"time_end",
1062 		"unused_wall"
1063 	};
1064 	enum {
1065 		RESV_REQ_ID,
1066 		RESV_REQ_ASSOCS,
1067 		RESV_REQ_FLAGS,
1068 		RESV_REQ_NODES,
1069 		RESV_REQ_TRES,
1070 		RESV_REQ_START,
1071 		RESV_REQ_END,
1072 		RESV_REQ_UNUSED,
1073 		RESV_REQ_COUNT
1074 	};
1075 
1076 	/* now get the reservations during this time */
1077 
1078 	i=0;
1079 	xstrfmtcat(resv_str, "%s", resv_req_inx[i]);
1080 	for(i=1; i<RESV_REQ_COUNT; i++)
1081 		xstrfmtcat(resv_str, ", %s", resv_req_inx[i]);
1082 
1083 	query = xstrdup_printf("select %s from \"%s_%s\" where "
1084 			       "(time_start < %ld && time_end >= %ld) "
1085 			       "order by time_start",
1086 			       resv_str, cluster_name, resv_table,
1087 			       curr_end, curr_start);
1088 	xfree(resv_str);
1089 	if (debug_flags & DEBUG_FLAG_DB_USAGE)
1090 		DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1091 
1092 	result = mysql_db_query_ret(mysql_conn, query, 0);
1093 	xfree(query);
1094 
1095 	if (!result)
1096 		return SLURM_ERROR;
1097 
1098 	/*
1099 	 * If a reservation overlaps another reservation we
1100 	 * total up everything here as if they didn't but when
1101 	 * calculating the total time for a cluster we will
1102 	 * remove the extra time received.  This may result in
1103 	 * unexpected results with association based reports
1104 	 * since the association is given the total amount of
1105 	 * time of each reservation, thus equaling more time
1106 	 * than is available.  Job/Cluster/Reservation reports
1107 	 * should be fine though since we really don't over
1108 	 * allocate resources.  The issue with us not being
1109 	 * able to handle overlapping reservations here is
1110 	 * unless the reservation completely overlaps the
1111 	 * other reservation we have no idea how many cpus
1112 	 * should be removed since this could be a
1113 	 * heterogeneous system.  This same problem exists
1114 	 * when a reservation is created with the ignore_jobs
1115 	 * option which will allow jobs to continue to run in the
1116 	 * reservation that aren't suppose to.
1117 	 */
1118 	while ((row = mysql_fetch_row(result))) {
1119 		time_t row_start = slurm_atoul(row[RESV_REQ_START]);
1120 		time_t row_end = slurm_atoul(row[RESV_REQ_END]);
1121 		int unused;
1122 		int resv_seconds;
1123 		time_t orig_start = row_start;
1124 
1125 		if (row_start >= curr_start) {
1126 			/*
1127 			 * This is the first time we are seeing this
1128 			 * reservation, so set our unused to be 0.
1129 			 * This is mostly helpful when
1130 			 * rerolling set it back to 0.
1131 			 */
1132 			unused = 0;
1133 		} else
1134 			unused = slurm_atoul(row[RESV_REQ_UNUSED]);
1135 
1136 		if (row_start <= curr_start)
1137 			row_start = curr_start;
1138 
1139 		if (!row_end || row_end > curr_end)
1140 			row_end = curr_end;
1141 
1142 		/* Don't worry about it if the time is less
1143 		 * than 1 second.
1144 		 */
1145 		if ((resv_seconds = (row_end - row_start)) < 1)
1146 			continue;
1147 
1148 		r_usage = xmalloc(sizeof(local_resv_usage_t));
1149 		r_usage->flags = slurm_atoul(row[RESV_REQ_FLAGS]);
1150 		r_usage->id = slurm_atoul(row[RESV_REQ_ID]);
1151 
1152 		r_usage->local_assocs = list_create(xfree_ptr);
1153 		slurm_addto_char_list(r_usage->local_assocs,
1154 				      row[RESV_REQ_ASSOCS]);
1155 		r_usage->loc_tres =
1156 			list_create(_destroy_local_tres_usage);
1157 
1158 		_add_tres_2_list(r_usage->loc_tres,
1159 				 row[RESV_REQ_TRES], resv_seconds);
1160 
1161 		/*
1162 		 * Original start is needed when updating the
1163 		 * reservation's unused_wall later on.
1164 		 */
1165 		r_usage->orig_start = orig_start;
1166 		r_usage->start = row_start;
1167 		r_usage->end = row_end;
1168 		r_usage->unused_wall = unused + resv_seconds;
1169 		r_usage->hl = hostlist_create_dims(row[RESV_REQ_NODES], dims);
1170 		list_append(resv_usage_list, r_usage);
1171 	}
1172 	mysql_free_result(result);
1173 
1174 	return SLURM_SUCCESS;
1175 }
1176 
as_mysql_hourly_rollup(mysql_conn_t * mysql_conn,char * cluster_name,time_t start,time_t end,uint16_t archive_data)1177 extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn,
1178 				  char *cluster_name,
1179 				  time_t start, time_t end,
1180 				  uint16_t archive_data)
1181 {
1182 	int rc = SLURM_SUCCESS;
1183 	int add_sec = 3600;
1184 	int i=0, dims;
1185 	time_t now = time(NULL);
1186 	time_t curr_start = start;
1187 	time_t curr_end = curr_start + add_sec;
1188 	char *query = NULL;
1189 	MYSQL_RES *result = NULL;
1190 	MYSQL_ROW row;
1191 	ListIterator a_itr = NULL;
1192 	ListIterator c_itr = NULL;
1193 	ListIterator w_itr = NULL;
1194 	ListIterator r_itr = NULL;
1195 	List assoc_usage_list = list_create(_destroy_local_id_usage);
1196 	List cluster_down_list = list_create(_destroy_local_cluster_usage);
1197 	List wckey_usage_list = list_create(_destroy_local_id_usage);
1198 	List resv_usage_list = list_create(_destroy_local_resv_usage);
1199 	uint16_t track_wckey = slurm_get_track_wckey();
1200 	local_cluster_usage_t *loc_c_usage = NULL;
1201 	local_cluster_usage_t *c_usage = NULL;
1202 	local_resv_usage_t *r_usage = NULL;
1203 	local_id_usage_t *a_usage = NULL;
1204 	local_id_usage_t *w_usage = NULL;
1205 	/* char start_char[20], end_char[20]; */
1206 
1207 	char *job_req_inx[] = {
1208 		"job.job_db_inx",
1209 //		"job.id_job",
1210 		"job.id_assoc",
1211 		"job.id_wckey",
1212 		"job.array_task_pending",
1213 		"job.time_eligible",
1214 		"job.time_start",
1215 		"job.time_end",
1216 		"job.time_suspended",
1217 		"job.cpus_req",
1218 		"job.id_resv",
1219 		"job.tres_alloc"
1220 	};
1221 	char *job_str = NULL;
1222 	enum {
1223 		JOB_REQ_DB_INX,
1224 //		JOB_REQ_JOBID,
1225 		JOB_REQ_ASSOCID,
1226 		JOB_REQ_WCKEYID,
1227 		JOB_REQ_ARRAY_PENDING,
1228 		JOB_REQ_ELG,
1229 		JOB_REQ_START,
1230 		JOB_REQ_END,
1231 		JOB_REQ_SUSPENDED,
1232 		JOB_REQ_RCPU,
1233 		JOB_REQ_RESVID,
1234 		JOB_REQ_TRES,
1235 		JOB_REQ_COUNT
1236 	};
1237 
1238 	char *suspend_req_inx[] = {
1239 		"time_start",
1240 		"time_end"
1241 	};
1242 	char *suspend_str = NULL;
1243 	enum {
1244 		SUSPEND_REQ_START,
1245 		SUSPEND_REQ_END,
1246 		SUSPEND_REQ_COUNT
1247 	};
1248 
1249 	i=0;
1250 	xstrfmtcat(job_str, "%s", job_req_inx[i]);
1251 	for(i=1; i<JOB_REQ_COUNT; i++) {
1252 		xstrfmtcat(job_str, ", %s", job_req_inx[i]);
1253 	}
1254 
1255 	i=0;
1256 	xstrfmtcat(suspend_str, "%s", suspend_req_inx[i]);
1257 	for(i=1; i<SUSPEND_REQ_COUNT; i++) {
1258 		xstrfmtcat(suspend_str, ", %s", suspend_req_inx[i]);
1259 	}
1260 
1261 	/* We need to figure out the dimensions of this cluster */
1262 	query = xstrdup_printf("select dimensions from %s where name='%s'",
1263 			       cluster_table, cluster_name);
1264 	if (debug_flags & DEBUG_FLAG_DB_USAGE)
1265 		DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1266 	result = mysql_db_query_ret(mysql_conn, query, 0);
1267 	xfree(query);
1268 
1269 	if (!result) {
1270 		error("%s: error querying cluster_table", __func__);
1271 		rc = SLURM_ERROR;
1272 		goto end_it;
1273 	}
1274 	row = mysql_fetch_row(result);
1275 
1276 	if (!row) {
1277 		error("%s: no cluster by name %s known",
1278 		      __func__, cluster_name);
1279 		rc = SLURM_ERROR;
1280 		goto end_it;
1281 	}
1282 
1283 	dims = atoi(row[0]);
1284 	mysql_free_result(result);
1285 
1286 /* 	info("begin start %s", slurm_ctime2(&curr_start)); */
1287 /* 	info("begin end %s", slurm_ctime2(&curr_end)); */
1288 	a_itr = list_iterator_create(assoc_usage_list);
1289 	c_itr = list_iterator_create(cluster_down_list);
1290 	w_itr = list_iterator_create(wckey_usage_list);
1291 	r_itr = list_iterator_create(resv_usage_list);
1292 	while (curr_start < end) {
1293 		int last_id = -1;
1294 		int last_wckeyid = -1;
1295 
1296 		if (debug_flags & DEBUG_FLAG_DB_USAGE)
1297 			DB_DEBUG(mysql_conn->conn,
1298 				 "%s curr hour is now %ld-%ld",
1299 				 cluster_name, curr_start, curr_end);
1300 /* 		info("start %s", slurm_ctime2(&curr_start)); */
1301 /* 		info("end %s", slurm_ctime2(&curr_end)); */
1302 
1303 		if ((rc = _setup_resv_usage(mysql_conn, cluster_name,
1304 					    curr_start, curr_end,
1305 					    resv_usage_list, dims))
1306 		    != SLURM_SUCCESS)
1307 			goto end_it;
1308 
1309 		c_usage = _setup_cluster_usage(mysql_conn, cluster_name,
1310 					       curr_start, curr_end,
1311 					       resv_usage_list,
1312 					       cluster_down_list);
1313 
1314 		if (c_usage)
1315 			xassert(c_usage->loc_tres);
1316 
1317 		/* now get the jobs during this time only  */
1318 		query = xstrdup_printf("select %s from \"%s_%s\" as job "
1319 				       "where (job.time_eligible && "
1320 				       "job.time_eligible < %ld && "
1321 				       "(job.time_end >= %ld || "
1322 				       "job.time_end = 0)) "
1323 				       "group by job.job_db_inx "
1324 				       "order by job.id_assoc, "
1325 				       "job.time_eligible",
1326 				       job_str, cluster_name, job_table,
1327 				       curr_end, curr_start);
1328 
1329 		if (debug_flags & DEBUG_FLAG_DB_USAGE)
1330 			DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1331 		if (!(result = mysql_db_query_ret(
1332 			      mysql_conn, query, 0))) {
1333 			rc = SLURM_ERROR;
1334 			goto end_it;
1335 		}
1336 		xfree(query);
1337 
1338 		while ((row = mysql_fetch_row(result))) {
1339 			//uint32_t job_id = slurm_atoul(row[JOB_REQ_JOBID]);
1340 			uint32_t assoc_id = slurm_atoul(row[JOB_REQ_ASSOCID]);
1341 			uint32_t wckey_id = slurm_atoul(row[JOB_REQ_WCKEYID]);
1342 			uint32_t array_pending =
1343 				slurm_atoul(row[JOB_REQ_ARRAY_PENDING]);
1344 			uint32_t resv_id = slurm_atoul(row[JOB_REQ_RESVID]);
1345 			time_t row_eligible = slurm_atoul(row[JOB_REQ_ELG]);
1346 			time_t row_start = slurm_atoul(row[JOB_REQ_START]);
1347 			time_t row_end = slurm_atoul(row[JOB_REQ_END]);
1348 			uint32_t row_rcpu = slurm_atoul(row[JOB_REQ_RCPU]);
1349 			List loc_tres = NULL;
1350 			int loc_seconds = 0;
1351 			int seconds = 0, suspend_seconds = 0;
1352 
1353 			if (row_start && (row_start < curr_start))
1354 				row_start = curr_start;
1355 
1356 			if (!row_start && row_end)
1357 				row_start = row_end;
1358 
1359 			if (!row_end || row_end > curr_end)
1360 				row_end = curr_end;
1361 
1362 			if (!row_start || ((row_end - row_start) < 1))
1363 				goto calc_cluster;
1364 
1365 			seconds = (row_end - row_start);
1366 
1367 			if (slurm_atoul(row[JOB_REQ_SUSPENDED])) {
1368 				MYSQL_RES *result2 = NULL;
1369 				MYSQL_ROW row2;
1370 				/* get the suspended time for this job */
1371 				query = xstrdup_printf(
1372 					"select %s from \"%s_%s\" where "
1373 					"(time_start < %ld && (time_end >= %ld "
1374 					"|| time_end = 0)) && job_db_inx=%s "
1375 					"order by time_start",
1376 					suspend_str, cluster_name,
1377 					suspend_table,
1378 					curr_end, curr_start,
1379 					row[JOB_REQ_DB_INX]);
1380 
1381 				debug4("%d(%s:%d) query\n%s",
1382 				       mysql_conn->conn, THIS_FILE,
1383 				       __LINE__, query);
1384 				if (!(result2 = mysql_db_query_ret(
1385 					      mysql_conn,
1386 					      query, 0))) {
1387 					rc = SLURM_ERROR;
1388 					mysql_free_result(result);
1389 					goto end_it;
1390 				}
1391 				xfree(query);
1392 				while ((row2 = mysql_fetch_row(result2))) {
1393 					int tot_time = 0;
1394 					time_t local_start = slurm_atoul(
1395 						row2[SUSPEND_REQ_START]);
1396 					time_t local_end = slurm_atoul(
1397 						row2[SUSPEND_REQ_END]);
1398 
1399 					if (!local_start)
1400 						continue;
1401 
1402 					if (row_start > local_start)
1403 						local_start = row_start;
1404 					if (!local_end || row_end < local_end)
1405 						local_end = row_end;
1406 					tot_time = (local_end - local_start);
1407 
1408 					if (tot_time > 0)
1409 						suspend_seconds += tot_time;
1410 				}
1411 				mysql_free_result(result2);
1412 			}
1413 
1414 			if (last_id != assoc_id) {
1415 				a_usage = xmalloc(sizeof(local_id_usage_t));
1416 				a_usage->id = assoc_id;
1417 				list_append(assoc_usage_list, a_usage);
1418 				last_id = assoc_id;
1419 				/* a_usage->loc_tres is made later,
1420 				   don't do it here.
1421 				*/
1422 			}
1423 
1424 			/* Short circuit this so so we don't get a pointer. */
1425 			if (!track_wckey)
1426 				last_wckeyid = wckey_id;
1427 
1428 			/* do the wckey calculation */
1429 			if (last_wckeyid != wckey_id) {
1430 				list_iterator_reset(w_itr);
1431 				while ((w_usage = list_next(w_itr)))
1432 					if (w_usage->id == wckey_id)
1433 						break;
1434 
1435 				if (!w_usage) {
1436 					w_usage = xmalloc(
1437 						sizeof(local_id_usage_t));
1438 					w_usage->id = wckey_id;
1439 					list_append(wckey_usage_list,
1440 						    w_usage);
1441 					w_usage->loc_tres = list_create(
1442 						_destroy_local_tres_usage);
1443 				}
1444 				last_wckeyid = wckey_id;
1445 			}
1446 
1447 			/* do the cluster allocated calculation */
1448 		calc_cluster:
1449 
1450 			/*
1451 			 * We need to have this clean for each job
1452 			 * since we add the time to the cluster individually.
1453 			 */
1454 			loc_tres = list_create(_destroy_local_tres_usage);
1455 
1456 			_add_tres_time_2_list(loc_tres, row[JOB_REQ_TRES],
1457 					      TIME_ALLOC, seconds,
1458 					      suspend_seconds, 0);
1459 			if (w_usage)
1460 				_add_tres_time_2_list(w_usage->loc_tres,
1461 						      row[JOB_REQ_TRES],
1462 						      TIME_ALLOC, seconds,
1463 						      suspend_seconds, 0);
1464 
1465 			/*
1466 			 * Now figure out there was a disconnected
1467 			 * slurmctld during this job.
1468 			 */
1469 			list_iterator_reset(c_itr);
1470 			while ((loc_c_usage = list_next(c_itr))) {
1471 				int temp_end = row_end;
1472 				int temp_start = row_start;
1473 				if (loc_c_usage->start > temp_start)
1474 					temp_start = loc_c_usage->start;
1475 				if (loc_c_usage->end < temp_end)
1476 					temp_end = loc_c_usage->end;
1477 				loc_seconds = (temp_end - temp_start);
1478 				if (loc_seconds < 1)
1479 					continue;
1480 
1481 				_remove_job_tres_time_from_cluster(
1482 					loc_c_usage->loc_tres,
1483 					loc_tres,
1484 					loc_seconds);
1485 				/* info("Job %u was running for " */
1486 				/*      "%d seconds while " */
1487 				/*      "cluster %s's slurmctld " */
1488 				/*      "wasn't responding", */
1489 				/*      job_id, loc_seconds, cluster_name); */
1490 			}
1491 
1492 			/* first figure out the reservation */
1493 			if (resv_id) {
1494 				if (seconds <= 0) {
1495 					_transfer_loc_tres(&loc_tres, a_usage);
1496 					continue;
1497 				}
1498 				/*
1499 				 * Since we have already added the entire
1500 				 * reservation as used time on the cluster we
1501 				 * only need to calculate the used time for the
1502 				 * reservation and then divy up the unused time
1503 				 * over the associations able to run in the
1504 				 * reservation. Since the job was to run, or ran
1505 				 * a reservation we don't care about eligible
1506 				 * time since that could totally skew the
1507 				 * clusters reserved time since the job may be
1508 				 * able to run outside of the reservation.
1509 				 */
1510 				list_iterator_reset(r_itr);
1511 				while ((r_usage = list_next(r_itr))) {
1512 					int temp_end, temp_start;
1513 					/*
1514 					 * since the reservation could have
1515 					 * changed in some way, thus making a
1516 					 * new reservation record in the
1517 					 * database, we have to make sure all
1518 					 * of the reservations are checked to
1519 					 * see if such a thing has happened
1520 					 */
1521 					if (r_usage->id != resv_id)
1522 						continue;
1523 					temp_end = row_end;
1524 					temp_start = row_start;
1525 					if (r_usage->start > temp_start)
1526 						temp_start =
1527 							r_usage->start;
1528 					if (r_usage->end < temp_end)
1529 						temp_end = r_usage->end;
1530 
1531 					loc_seconds = (temp_end - temp_start);
1532 
1533 					if (loc_seconds <= 0)
1534 						continue;
1535 
1536 					if (c_usage &&
1537 					    (r_usage->flags &
1538 					     RESERVE_FLAG_IGN_JOBS))
1539 						/*
1540 						 * job usage was not
1541 						 * bundled with resv
1542 						 * usage so need to
1543 						 * account for it
1544 						 * individually here
1545 						 */
1546 						_add_tres_time_2_list(
1547 							c_usage->loc_tres,
1548 							row[JOB_REQ_TRES],
1549 							TIME_ALLOC,
1550 							loc_seconds,
1551 							0, 0);
1552 
1553 					_add_time_tres_list(
1554 						r_usage->loc_tres,
1555 						loc_tres, TIME_ALLOC,
1556 						loc_seconds, 1);
1557 					if ((rc = _update_unused_wall(
1558 						     r_usage,
1559 						     loc_tres,
1560 						     loc_seconds))
1561 					    != SLURM_SUCCESS)
1562 						goto end_it;
1563 				}
1564 
1565 				_transfer_loc_tres(&loc_tres, a_usage);
1566 				continue;
1567 			}
1568 
1569 			/*
1570 			 * only record time for the clusters that have
1571 			 * registered.  This continue should rarely if
1572 			 * ever happen.
1573 			 */
1574 			if (!c_usage) {
1575 				_transfer_loc_tres(&loc_tres, a_usage);
1576 				continue;
1577 			}
1578 
1579 			if (row_start && (seconds > 0)) {
1580 				/* info("%d assoc %d adds " */
1581 				/*      "(%d)(%d-%d) * %d = %d " */
1582 				/*      "to %d", */
1583 				/*      job_id, */
1584 				/*      a_usage->id, */
1585 				/*      seconds, */
1586 				/*      row_end, row_start, */
1587 				/*      row_acpu, */
1588 				/*      seconds * row_acpu, */
1589 				/*      row_acpu); */
1590 
1591 				_add_job_alloc_time_to_cluster(
1592 					c_usage->loc_tres,
1593 					loc_tres);
1594 			}
1595 
1596 			/*
1597 			 * The loc_tres isn't needed after this so transfer to
1598 			 * the association and go on our merry way.
1599 			 */
1600 			_transfer_loc_tres(&loc_tres, a_usage);
1601 
1602 			/* now reserved time */
1603 			if (!row_start || (row_start >= c_usage->start)) {
1604 				int temp_end = row_start;
1605 				int temp_start = row_eligible;
1606 				if (c_usage->start > temp_start)
1607 					temp_start = c_usage->start;
1608 				if (c_usage->end < temp_end)
1609 					temp_end = c_usage->end;
1610 				loc_seconds = (temp_end - temp_start);
1611 				if (loc_seconds > 0) {
1612 					/*
1613 					 * If we have pending jobs in an array
1614 					 * they haven't been inserted into the
1615 					 * database yet as proper job records,
1616 					 * so handle them here.
1617 					 */
1618 					if (array_pending)
1619 						loc_seconds *= array_pending;
1620 
1621 					/* info("%d assoc %d reserved " */
1622 					/*      "(%d)(%d-%d) * %d * %d = %d " */
1623 					/*      "to %d", */
1624 					/*      job_id, */
1625 					/*      assoc_id, */
1626 					/*      temp_end - temp_start, */
1627 					/*      temp_end, temp_start, */
1628 					/*      row_rcpu, */
1629 					/*      array_pending, */
1630 					/*      loc_seconds, */
1631 					/*      row_rcpu); */
1632 
1633 					_add_time_tres(c_usage->loc_tres,
1634 						       TIME_RESV, TRES_CPU,
1635 						       loc_seconds *
1636 						       (uint64_t) row_rcpu,
1637 						       0);
1638 				}
1639 			}
1640 		}
1641 		mysql_free_result(result);
1642 
1643 		/* now figure out how much more to add to the
1644 		   associations that could had run in the reservation
1645 		*/
1646 		query = NULL;
1647 		list_iterator_reset(r_itr);
1648 		while ((r_usage = list_next(r_itr))) {
1649 			ListIterator t_itr;
1650 			local_tres_usage_t *loc_tres;
1651 
1652 			xstrfmtcat(query, "update \"%s_%s\" set unused_wall=%f where id_resv=%u and time_start=%ld;",
1653 				   cluster_name, resv_table,
1654 				   r_usage->unused_wall, r_usage->id,
1655 				   r_usage->orig_start);
1656 
1657 			if (!r_usage->loc_tres ||
1658 			    !list_count(r_usage->loc_tres))
1659 				continue;
1660 
1661 			t_itr = list_iterator_create(r_usage->loc_tres);
1662 			while ((loc_tres = list_next(t_itr))) {
1663 				int64_t idle = loc_tres->total_time -
1664 					loc_tres->time_alloc;
1665 				char *assoc = NULL;
1666 				ListIterator tmp_itr = NULL;
1667 				int assoc_cnt, resv_unused_secs;
1668 
1669 				if (idle <= 0)
1670 					break; /* since this will be
1671 						* the same for all TRES	*/
1672 
1673 				/* now divide that time by the number of
1674 				   associations in the reservation and add
1675 				   them to each association */
1676 				resv_unused_secs = idle;
1677 				assoc_cnt = list_count(r_usage->local_assocs);
1678 				if (assoc_cnt)
1679 					resv_unused_secs /= assoc_cnt;
1680 				/* info("resv %d got %d seconds for TRES %u " */
1681 				/*      "for %d assocs", */
1682 				/*      r_usage->id, resv_unused_secs, */
1683 				/*      loc_tres->id, */
1684 				/*      list_count(r_usage->local_assocs)); */
1685 				tmp_itr = list_iterator_create(
1686 					r_usage->local_assocs);
1687 				while ((assoc = list_next(tmp_itr))) {
1688 					uint32_t associd = slurm_atoul(assoc);
1689 					if ((last_id != associd) &&
1690 					    !(a_usage = list_find_first(
1691 						      assoc_usage_list,
1692 						      _find_id_usage,
1693 						      &associd))) {
1694 						a_usage = xmalloc(
1695 							sizeof(local_id_usage_t));
1696 						a_usage->id = associd;
1697 						list_append(assoc_usage_list,
1698 							    a_usage);
1699 						last_id = associd;
1700 						a_usage->loc_tres = list_create(
1701 							_destroy_local_tres_usage);
1702 					}
1703 
1704 					_add_time_tres(a_usage->loc_tres,
1705 						       TIME_ALLOC, loc_tres->id,
1706 						       resv_unused_secs, 0);
1707 				}
1708 				list_iterator_destroy(tmp_itr);
1709 			}
1710 			list_iterator_destroy(t_itr);
1711 		}
1712 
1713 		if (query) {
1714 			if (debug_flags & DEBUG_FLAG_DB_USAGE)
1715 				DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1716 			rc = mysql_db_query(mysql_conn, query);
1717 			xfree(query);
1718 			if (rc != SLURM_SUCCESS) {
1719 				error("couldn't update reservations with unused time");
1720 				goto end_it;
1721 			}
1722 		}
1723 
1724 		/* now apply the down time from the slurmctld disconnects */
1725 		if (c_usage) {
1726 			list_iterator_reset(c_itr);
1727 			while ((loc_c_usage = list_next(c_itr))) {
1728 				local_tres_usage_t *loc_tres;
1729 				ListIterator tmp_itr = list_iterator_create(
1730 					loc_c_usage->loc_tres);
1731 				while ((loc_tres = list_next(tmp_itr)))
1732 					_add_time_tres(c_usage->loc_tres,
1733 						       TIME_DOWN,
1734 						       loc_tres->id,
1735 						       loc_tres->total_time,
1736 						       0);
1737 				list_iterator_destroy(tmp_itr);
1738 			}
1739 
1740 			if ((rc = _process_cluster_usage(
1741 				     mysql_conn, cluster_name, curr_start,
1742 				     curr_end, now, c_usage))
1743 			    != SLURM_SUCCESS) {
1744 				goto end_it;
1745 			}
1746 		}
1747 
1748 		list_iterator_reset(a_itr);
1749 		while ((a_usage = list_next(a_itr)))
1750 			_create_id_usage_insert(cluster_name, ASSOC_TABLES,
1751 						curr_start, now,
1752 						a_usage, &query);
1753 		if (query) {
1754 			if (debug_flags & DEBUG_FLAG_DB_USAGE)
1755 				DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1756 			rc = mysql_db_query(mysql_conn, query);
1757 			xfree(query);
1758 			if (rc != SLURM_SUCCESS) {
1759 				error("Couldn't add assoc hour rollup");
1760 				goto end_it;
1761 			}
1762 		}
1763 
1764 		if (!track_wckey)
1765 			goto end_loop;
1766 
1767 		list_iterator_reset(w_itr);
1768 		while ((w_usage = list_next(w_itr)))
1769 			_create_id_usage_insert(cluster_name, WCKEY_TABLES,
1770 						curr_start, now,
1771 						w_usage, &query);
1772 		if (query) {
1773 			if (debug_flags & DEBUG_FLAG_DB_USAGE)
1774 				DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1775 			rc = mysql_db_query(mysql_conn, query);
1776 			xfree(query);
1777 			if (rc != SLURM_SUCCESS) {
1778 				error("Couldn't add wckey hour rollup");
1779 				goto end_it;
1780 			}
1781 		}
1782 
1783 	end_loop:
1784 		_destroy_local_cluster_usage(c_usage);
1785 
1786 		c_usage     = NULL;
1787 		r_usage     = NULL;
1788 		a_usage     = NULL;
1789 		w_usage     = NULL;
1790 
1791 		list_flush(assoc_usage_list);
1792 		list_flush(cluster_down_list);
1793 		list_flush(wckey_usage_list);
1794 		list_flush(resv_usage_list);
1795 		curr_start = curr_end;
1796 		curr_end = curr_start + add_sec;
1797 	}
1798 end_it:
1799 	xfree(query);
1800 	xfree(suspend_str);
1801 	xfree(job_str);
1802 	_destroy_local_cluster_usage(c_usage);
1803 
1804 	if (a_itr)
1805 		list_iterator_destroy(a_itr);
1806 	if (c_itr)
1807 		list_iterator_destroy(c_itr);
1808 	if (w_itr)
1809 		list_iterator_destroy(w_itr);
1810 	if (r_itr)
1811 		list_iterator_destroy(r_itr);
1812 
1813 	FREE_NULL_LIST(assoc_usage_list);
1814 	FREE_NULL_LIST(cluster_down_list);
1815 	FREE_NULL_LIST(wckey_usage_list);
1816 	FREE_NULL_LIST(resv_usage_list);
1817 
1818 /* 	info("stop start %s", slurm_ctime2(&curr_start)); */
1819 /* 	info("stop end %s", slurm_ctime2(&curr_end)); */
1820 
1821 	/* go check to see if we archive and purge */
1822 
1823 	if (rc == SLURM_SUCCESS) {
1824 		if (mysql_db_commit(mysql_conn)) {
1825 			char start[25], end[25];
1826 			error("Couldn't commit cluster (%s) "
1827 			      "hour rollup for %s - %s",
1828 			      cluster_name, slurm_ctime2_r(&curr_start, start),
1829 			      slurm_ctime2_r(&curr_end, end));
1830 			rc = SLURM_ERROR;
1831 		} else
1832 			rc = _process_purge(mysql_conn, cluster_name,
1833 					    archive_data, SLURMDB_PURGE_HOURS);
1834 	}
1835 
1836 	return rc;
1837 }
as_mysql_nonhour_rollup(mysql_conn_t * mysql_conn,bool run_month,char * cluster_name,time_t start,time_t end,uint16_t archive_data)1838 extern int as_mysql_nonhour_rollup(mysql_conn_t *mysql_conn,
1839 				   bool run_month,
1840 				   char *cluster_name,
1841 				   time_t start, time_t end,
1842 				   uint16_t archive_data)
1843 {
1844 	/* can't just add 86400 since daylight savings starts and ends every
1845 	 * once in a while
1846 	 */
1847 	int rc = SLURM_SUCCESS;
1848 	struct tm start_tm;
1849 	time_t curr_start = start;
1850 	time_t curr_end;
1851 	time_t now = time(NULL);
1852 	char *query = NULL;
1853 	uint16_t track_wckey = slurm_get_track_wckey();
1854 	char *unit_name;
1855 
1856 	while (curr_start < end) {
1857 		if (!localtime_r(&curr_start, &start_tm)) {
1858 			error("Couldn't get localtime from start %ld",
1859 			      curr_start);
1860 			return SLURM_ERROR;
1861 		}
1862 		start_tm.tm_sec = 0;
1863 		start_tm.tm_min = 0;
1864 		start_tm.tm_hour = 0;
1865 
1866 		if (run_month) {
1867 			unit_name = "month";
1868 			start_tm.tm_mday = 1;
1869 			start_tm.tm_mon++;
1870 		} else {
1871 			unit_name = "day";
1872 			start_tm.tm_mday++;
1873 		}
1874 
1875 		curr_end = slurm_mktime(&start_tm);
1876 
1877 		if (debug_flags & DEBUG_FLAG_DB_USAGE)
1878 			DB_DEBUG(mysql_conn->conn,
1879 				 "curr %s is now %ld-%ld",
1880 				 unit_name, curr_start, curr_end);
1881 /* 		info("start %s", slurm_ctime2(&curr_start)); */
1882 /* 		info("end %s", slurm_ctime2(&curr_end)); */
1883 		query = xstrdup_printf(
1884 			"insert into \"%s_%s\" (creation_time, mod_time, id, "
1885 			"id_tres, time_start, alloc_secs) "
1886 			"select %ld, %ld, id, id_tres, "
1887 			"%ld, @ASUM:=SUM(alloc_secs) from \"%s_%s\" where "
1888 			"(time_start < %ld && time_start >= %ld) "
1889 			"group by id, id_tres on duplicate key update "
1890 			"mod_time=%ld, alloc_secs=@ASUM;",
1891 			cluster_name,
1892 			run_month ? assoc_month_table : assoc_day_table,
1893 			now, now, curr_start,
1894 			cluster_name,
1895 			run_month ? assoc_day_table : assoc_hour_table,
1896 			curr_end, curr_start, now);
1897 
1898 		/* We group on deleted here so if there are no entries
1899 		   we don't get an error, just nothing is returned.
1900 		   Else we get a bunch of NULL's
1901 		*/
1902 		xstrfmtcat(query,
1903 			   "insert into \"%s_%s\" (creation_time, "
1904 			   "mod_time, time_start, id_tres, count, "
1905 			   "alloc_secs, down_secs, pdown_secs, "
1906 			   "idle_secs, over_secs, resv_secs) "
1907 			   "select %ld, %ld, "
1908 			   "%ld, id_tres, @CPU:=MAX(count), "
1909 			   "@ASUM:=SUM(alloc_secs), "
1910 			   "@DSUM:=SUM(down_secs), "
1911 			   "@PDSUM:=SUM(pdown_secs), "
1912 			   "@ISUM:=SUM(idle_secs), "
1913 			   "@OSUM:=SUM(over_secs), "
1914 			   "@RSUM:=SUM(resv_secs) from \"%s_%s\" where "
1915 			   "(time_start < %ld && time_start >= %ld) "
1916 			   "group by deleted, id_tres "
1917 			   "on duplicate key update "
1918 			   "mod_time=%ld, count=@CPU, "
1919 			   "alloc_secs=@ASUM, down_secs=@DSUM, "
1920 			   "pdown_secs=@PDSUM, idle_secs=@ISUM, "
1921 			   "over_secs=@OSUM, resv_secs=@RSUM;",
1922 			   cluster_name,
1923 			   run_month ? cluster_month_table : cluster_day_table,
1924 			   now, now, curr_start,
1925 			   cluster_name,
1926 			   run_month ? cluster_day_table : cluster_hour_table,
1927 			   curr_end, curr_start, now);
1928 		if (track_wckey) {
1929 			xstrfmtcat(query,
1930 				   "insert into \"%s_%s\" (creation_time, "
1931 				   "mod_time, id, id_tres, time_start, "
1932 				   "alloc_secs) "
1933 				   "select %ld, %ld, "
1934 				   "id, id_tres, %ld, @ASUM:=SUM(alloc_secs) "
1935 				   "from \"%s_%s\" where (time_start < %ld && "
1936 				   "time_start >= %ld) group by id, id_tres "
1937 				   "on duplicate key update "
1938 				   "mod_time=%ld, alloc_secs=@ASUM;",
1939 				   cluster_name,
1940 				   run_month ? wckey_month_table :
1941 				   wckey_day_table,
1942 				   now, now, curr_start,
1943 				   cluster_name,
1944 				   run_month ? wckey_day_table :
1945 				   wckey_hour_table,
1946 				   curr_end, curr_start, now);
1947 		}
1948 		if (debug_flags & DEBUG_FLAG_DB_USAGE)
1949 			DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1950 		rc = mysql_db_query(mysql_conn, query);
1951 		xfree(query);
1952 		if (rc != SLURM_SUCCESS) {
1953 			error("Couldn't add %s rollup", unit_name);
1954 			return SLURM_ERROR;
1955 		}
1956 
1957 		curr_start = curr_end;
1958 	}
1959 
1960 /* 	info("stop start %s", slurm_ctime2(&curr_start)); */
1961 /* 	info("stop end %s", slurm_ctime2(&curr_end)); */
1962 
1963 	/* go check to see if we archive and purge */
1964 	rc = _process_purge(mysql_conn, cluster_name, archive_data,
1965 			    run_month ? SLURMDB_PURGE_MONTHS :
1966 			    SLURMDB_PURGE_DAYS);
1967 	return rc;
1968 }
1969