1 /*****************************************************************************\
2 * as_mysql_rollup.c - functions for rolling up data for associations
3 * and machines from the as_mysql storage.
4 *****************************************************************************
5 * Copyright (C) 2004-2007 The Regents of the University of California.
6 * Copyright (C) 2008-2009 Lawrence Livermore National Security.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Danny Auble <da@llnl.gov>
9 * CODE-OCEC-09-009. All rights reserved.
10 *
11 * This file is part of Slurm, a resource management program.
12 * For details, see <https://slurm.schedmd.com/>.
13 * Please also read the included file: DISCLAIMER.
14 *
15 * Slurm is free software; you can redistribute it and/or modify it under
16 * the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 *
20 * In addition, as a special exception, the copyright holders give permission
21 * to link the code of portions of this program with the OpenSSL library under
22 * certain conditions as described in each individual source file, and
23 * distribute linked combinations including the two. You must obey the GNU
24 * General Public License in all respects for all of the code used other than
25 * OpenSSL. If you modify file(s) with this exception, you may extend this
26 * exception to your version of the file(s), but you are not obligated to do
27 * so. If you do not wish to do so, delete this exception statement from your
28 * version. If you delete this exception statement from all source files in
29 * the program, then also delete it here.
30 *
31 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
32 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
34 * details.
35 *
36 * You should have received a copy of the GNU General Public License along
37 * with Slurm; if not, write to the Free Software Foundation, Inc.,
38 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
39 \*****************************************************************************/
40
41 #include "as_mysql_rollup.h"
42 #include "as_mysql_archive.h"
43 #include "src/common/parse_time.h"
44 #include "src/common/slurm_time.h"
45
46 enum {
47 TIME_ALLOC,
48 TIME_DOWN,
49 TIME_PDOWN,
50 TIME_RESV
51 };
52
53 enum {
54 ASSOC_TABLES,
55 WCKEY_TABLES
56 };
57
58 typedef struct {
59 uint64_t count;
60 uint32_t id;
61 uint64_t time_alloc;
62 uint64_t time_down;
63 uint64_t time_idle;
64 uint64_t time_over;
65 uint64_t time_pd;
66 uint64_t time_resv;
67 uint64_t total_time;
68 } local_tres_usage_t;
69
70 typedef struct {
71 int id;
72 List loc_tres;
73 } local_id_usage_t;
74
75 typedef struct {
76 time_t end;
77 int id; /*only needed for reservations */
78 List loc_tres;
79 time_t start;
80 } local_cluster_usage_t;
81
82 typedef struct {
83 time_t end;
84 uint32_t flags;
85 int id;
86 hostlist_t hl;
87 List local_assocs; /* list of assocs to spread unused time
88 over of type local_id_usage_t */
89 List loc_tres;
90 time_t orig_start;
91 time_t start;
92 double unused_wall;
93 } local_resv_usage_t;
94
_destroy_local_tres_usage(void * object)95 static void _destroy_local_tres_usage(void *object)
96 {
97 local_tres_usage_t *a_usage = (local_tres_usage_t *)object;
98 if (a_usage) {
99 xfree(a_usage);
100 }
101 }
102
_destroy_local_id_usage(void * object)103 static void _destroy_local_id_usage(void *object)
104 {
105 local_id_usage_t *a_usage = (local_id_usage_t *)object;
106 if (a_usage) {
107 FREE_NULL_LIST(a_usage->loc_tres);
108 xfree(a_usage);
109 }
110 }
111
_destroy_local_cluster_usage(void * object)112 static void _destroy_local_cluster_usage(void *object)
113 {
114 local_cluster_usage_t *c_usage = (local_cluster_usage_t *)object;
115 if (c_usage) {
116 FREE_NULL_LIST(c_usage->loc_tres);
117 xfree(c_usage);
118 }
119 }
120
_destroy_local_resv_usage(void * object)121 static void _destroy_local_resv_usage(void *object)
122 {
123 local_resv_usage_t *r_usage = (local_resv_usage_t *)object;
124 if (r_usage) {
125 FREE_NULL_HOSTLIST(r_usage->hl);
126 FREE_NULL_LIST(r_usage->local_assocs);
127 FREE_NULL_LIST(r_usage->loc_tres);
128 xfree(r_usage);
129 }
130 }
131
_find_loc_tres(void * x,void * key)132 static int _find_loc_tres(void *x, void *key)
133 {
134 local_tres_usage_t *loc_tres = (local_tres_usage_t *)x;
135 uint32_t tres_id = *(uint32_t *)key;
136
137 if (loc_tres->id == tres_id)
138 return 1;
139 return 0;
140 }
141
_find_id_usage(void * x,void * key)142 static int _find_id_usage(void *x, void *key)
143 {
144 local_id_usage_t *loc = (local_id_usage_t *)x;
145 uint32_t id = *(uint32_t *)key;
146
147 if (loc->id == id)
148 return 1;
149 return 0;
150 }
151
_remove_job_tres_time_from_cluster(List c_tres,List j_tres,int seconds)152 static void _remove_job_tres_time_from_cluster(List c_tres, List j_tres,
153 int seconds)
154 {
155 ListIterator c_itr;
156 local_tres_usage_t *loc_c_tres, *loc_j_tres;
157 uint64_t time;
158
159 if ((seconds <= 0) || !c_tres || !j_tres ||
160 !list_count(c_tres) || !list_count(j_tres))
161 return;
162
163 c_itr = list_iterator_create(c_tres);
164 while ((loc_c_tres = list_next(c_itr))) {
165 if (!(loc_j_tres = list_find_first(
166 j_tres, _find_loc_tres, &loc_c_tres->id)))
167 continue;
168 time = seconds * loc_j_tres->count;
169
170 if (time >= loc_c_tres->total_time)
171 loc_c_tres->total_time = 0;
172 else
173 loc_c_tres->total_time -= time;
174 }
175 list_iterator_destroy(c_itr);
176 }
177
178
_add_time_tres(List tres_list,int type,uint32_t id,uint64_t time,bool times_count)179 static local_tres_usage_t *_add_time_tres(List tres_list, int type, uint32_t id,
180 uint64_t time, bool times_count)
181 {
182 local_tres_usage_t *loc_tres;
183
184 /* Energy TRES could have a NO_VAL64, we want to skip those as it is the
185 * same as a 0 since nothing was gathered.
186 */
187 if (!time || (time == NO_VAL64))
188 return NULL;
189
190 loc_tres = list_find_first(tres_list, _find_loc_tres, &id);
191
192 if (!loc_tres) {
193 if (times_count)
194 return NULL;
195 loc_tres = xmalloc(sizeof(local_tres_usage_t));
196 loc_tres->id = id;
197 list_append(tres_list, loc_tres);
198 }
199
200 if (times_count) {
201 if (!loc_tres->count)
202 return NULL;
203 time *= loc_tres->count;
204 }
205
206 switch (type) {
207 case TIME_ALLOC:
208 loc_tres->time_alloc += time;
209 break;
210 case TIME_DOWN:
211 loc_tres->time_down += time;
212 break;
213 case TIME_PDOWN:
214 loc_tres->time_pd += time;
215 break;
216 case TIME_RESV:
217 loc_tres->time_resv += time;
218 break;
219 default:
220 error("_add_time_tres: unknown type %d given", type);
221 xassert(0);
222 break;
223 }
224
225 return loc_tres;
226 }
227
_add_time_tres_list(List tres_list_out,List tres_list_in,int type,uint64_t time_in,bool times_count)228 static void _add_time_tres_list(List tres_list_out, List tres_list_in, int type,
229 uint64_t time_in, bool times_count)
230 {
231 ListIterator itr;
232 local_tres_usage_t *loc_tres;
233
234 xassert(tres_list_in);
235 xassert(tres_list_out);
236
237 itr = list_iterator_create(tres_list_in);
238 while ((loc_tres = list_next(itr)))
239 _add_time_tres(tres_list_out, type,
240 loc_tres->id,
241 time_in ? time_in : loc_tres->total_time,
242 times_count);
243 list_iterator_destroy(itr);
244 }
245
246 /*
247 * Job usage is a ratio of its tres to the reservation's tres:
248 * Unused wall = unused wall - job_seconds * job_tres / resv_tres
249 */
_update_unused_wall(local_resv_usage_t * r_usage,List job_tres,int job_seconds)250 static int _update_unused_wall(local_resv_usage_t *r_usage, List job_tres,
251 int job_seconds)
252 {
253 ListIterator resv_itr;
254 local_tres_usage_t *loc_tres;
255 uint32_t resv_tres_id;
256 uint64_t resv_tres_count;
257 double tres_ratio = 0.0;
258
259 /* Get TRES counts. Make sure the TRES types match. */
260 resv_itr = list_iterator_create(r_usage->loc_tres);
261 while ((loc_tres = list_next(resv_itr))) {
262 /* Avoid dividing by zero. */
263 if (!loc_tres->count)
264 continue;
265 resv_tres_id = loc_tres->id;
266 resv_tres_count = loc_tres->count;
267 if ((loc_tres = list_find_first(job_tres,
268 _find_loc_tres,
269 &resv_tres_id))) {
270 tres_ratio = (double)loc_tres->count /
271 (double)resv_tres_count;
272 break;
273 }
274 }
275 list_iterator_destroy(resv_itr);
276
277 /*
278 * Here we are converting TRES seconds to wall seconds. This is needed
279 * to determine how much time is actually idle in the reservation.
280 */
281 r_usage->unused_wall -= (double)job_seconds * tres_ratio;
282
283 if (r_usage->unused_wall < 0) {
284 /*
285 * With a Flex reservation you can easily have more time than is
286 * possible. Just print this debug3 warning if it happens.
287 */
288 debug3("WARNING: Unused wall is less than zero; this should never happen outside a Flex reservation. Setting it to zero for resv id = %d, start = %ld.",
289 r_usage->id, r_usage->orig_start);
290 r_usage->unused_wall = 0;
291 }
292 return SLURM_SUCCESS;
293 }
294
_add_job_alloc_time_to_cluster(List c_tres_list,List j_tres)295 static void _add_job_alloc_time_to_cluster(List c_tres_list, List j_tres)
296 {
297 ListIterator c_itr = list_iterator_create(c_tres_list);
298 local_tres_usage_t *loc_c_tres, *loc_j_tres;
299
300 while ((loc_c_tres = list_next(c_itr))) {
301 if (!(loc_j_tres = list_find_first(
302 j_tres, _find_loc_tres, &loc_c_tres->id)))
303 continue;
304 loc_c_tres->time_alloc += loc_j_tres->time_alloc;
305 }
306 list_iterator_destroy(c_itr);
307 }
308
_setup_cluster_tres(List tres_list,uint32_t id,uint64_t count,int seconds)309 static void _setup_cluster_tres(List tres_list, uint32_t id,
310 uint64_t count, int seconds)
311 {
312 local_tres_usage_t *loc_tres =
313 list_find_first(tres_list, _find_loc_tres, &id);
314
315 if (!loc_tres) {
316 loc_tres = xmalloc(sizeof(local_tres_usage_t));
317 loc_tres->id = id;
318 list_append(tres_list, loc_tres);
319 }
320
321 loc_tres->count = count;
322 loc_tres->total_time += seconds * loc_tres->count;
323 }
324
_add_tres_2_list(List tres_list,char * tres_str,int seconds)325 static void _add_tres_2_list(List tres_list, char *tres_str, int seconds)
326 {
327 char *tmp_str = tres_str;
328 int id;
329 uint64_t count;
330
331 xassert(tres_list);
332
333 if (!tres_str || !tres_str[0])
334 return;
335
336 while (tmp_str) {
337 id = atoi(tmp_str);
338 if (id < 1) {
339 error("_add_tres_2_list: no id "
340 "found at %s instead", tmp_str);
341 break;
342 }
343
344 /* We don't run rollup on a node basis
345 * because they are shared resources on
346 * many systems so it will almost always
347 * have over committed resources.
348 */
349 if (id != TRES_NODE) {
350 if (!(tmp_str = strchr(tmp_str, '='))) {
351 error("_add_tres_2_list: no value found");
352 xassert(0);
353 break;
354 }
355 count = slurm_atoull(++tmp_str);
356 _setup_cluster_tres(tres_list, id, count, seconds);
357 }
358
359 if (!(tmp_str = strchr(tmp_str, ',')))
360 break;
361 tmp_str++;
362 }
363
364 return;
365 }
366
_add_job_alloc_time_to_assoc(List a_tres_list,List j_tres_list)367 static void _add_job_alloc_time_to_assoc(List a_tres_list, List j_tres_list)
368 {
369 ListIterator itr;
370 local_tres_usage_t *loc_a_tres, *loc_j_tres;
371
372 /*
373 * NOTE: you can not use list_pop, or list_push
374 * anywhere either, since as_mysql is
375 * exporting something of the same type as a macro,
376 * which messes everything up
377 * (my_list.h is the bad boy).
378 */
379 itr = list_iterator_create(j_tres_list);
380 while ((loc_j_tres = list_next(itr))) {
381 if (!(loc_a_tres = list_find_first(
382 a_tres_list, _find_loc_tres, &loc_j_tres->id))) {
383 /*
384 * New TRES we haven't seen before in this association
385 * just transfer it over.
386 */
387 list_append(a_tres_list, loc_j_tres);
388 list_remove(itr);
389 continue;
390 }
391 loc_a_tres->time_alloc += loc_j_tres->time_alloc;
392 /*
393 * We are freeing this list right after this might as well
394 * delete it now.
395 */
396 list_delete_item(itr);
397 }
398 list_iterator_destroy(itr);
399 }
400
401 /* This will destroy the *loc_tres given after it is transfered */
_transfer_loc_tres(List * loc_tres,local_id_usage_t * usage)402 static void _transfer_loc_tres(List *loc_tres, local_id_usage_t *usage)
403 {
404 if (!usage || !*loc_tres) {
405 FREE_NULL_LIST(*loc_tres);
406 return;
407 }
408
409 if (!usage->loc_tres) {
410 usage->loc_tres = *loc_tres;
411 *loc_tres = NULL;
412 } else {
413 _add_job_alloc_time_to_assoc(usage->loc_tres, *loc_tres);
414 FREE_NULL_LIST(*loc_tres);
415 }
416 }
417
_add_tres_time_2_list(List tres_list,char * tres_str,int type,int seconds,int suspend_seconds,bool times_count)418 static void _add_tres_time_2_list(List tres_list, char *tres_str,
419 int type, int seconds, int suspend_seconds,
420 bool times_count)
421 {
422 char *tmp_str = tres_str;
423 int id;
424 uint64_t time, count;
425 local_tres_usage_t *loc_tres;
426
427 xassert(tres_list);
428
429 if (!tres_str || !tres_str[0])
430 return;
431
432 while (tmp_str) {
433 int loc_seconds = seconds;
434
435 id = atoi(tmp_str);
436 if (id < 1) {
437 error("_add_tres_time_2_list: no id "
438 "found at %s", tmp_str);
439 break;
440 }
441 if (!(tmp_str = strchr(tmp_str, '='))) {
442 error("_add_tres_time_2_list: no value found for "
443 "id %d '%s'", id, tres_str);
444 xassert(0);
445 break;
446 }
447
448 /* Take away suspended time from TRES that are idle when the
449 * job was suspended, currently only CPU's fill that bill.
450 */
451 if (suspend_seconds && (id == TRES_CPU)) {
452 loc_seconds -= suspend_seconds;
453 if (loc_seconds < 1)
454 loc_seconds = 0;
455 }
456
457 time = count = slurm_atoull(++tmp_str);
458 /* ENERGY is already totalled for the entire job so don't
459 * multiple with time.
460 */
461 if (id != TRES_ENERGY)
462 time *= loc_seconds;
463
464 loc_tres = _add_time_tres(tres_list, type, id,
465 time, times_count);
466
467 if (loc_tres && !loc_tres->count)
468 loc_tres->count = count;
469
470 if (!(tmp_str = strchr(tmp_str, ',')))
471 break;
472 tmp_str++;
473 }
474
475 return;
476 }
477
_process_purge(mysql_conn_t * mysql_conn,char * cluster_name,uint16_t archive_data,uint32_t purge_period)478 static int _process_purge(mysql_conn_t *mysql_conn,
479 char *cluster_name,
480 uint16_t archive_data,
481 uint32_t purge_period)
482 {
483 int rc = SLURM_SUCCESS;
484 slurmdb_archive_cond_t arch_cond;
485 slurmdb_job_cond_t job_cond;
486
487 /* if we didn't ask for archive data return here and don't do
488 anything extra just rollup */
489
490 if (!archive_data)
491 return SLURM_SUCCESS;
492
493 if (!slurmdbd_conf)
494 return SLURM_SUCCESS;
495
496 memset(&job_cond, 0, sizeof(job_cond));
497 memset(&arch_cond, 0, sizeof(arch_cond));
498 arch_cond.archive_dir = slurmdbd_conf->archive_dir;
499 arch_cond.archive_script = slurmdbd_conf->archive_script;
500
501 if (purge_period & slurmdbd_conf->purge_event)
502 arch_cond.purge_event = slurmdbd_conf->purge_event;
503 else
504 arch_cond.purge_event = NO_VAL;
505 if (purge_period & slurmdbd_conf->purge_job)
506 arch_cond.purge_job = slurmdbd_conf->purge_job;
507 else
508 arch_cond.purge_job = NO_VAL;
509
510 if (purge_period & slurmdbd_conf->purge_resv)
511 arch_cond.purge_resv = slurmdbd_conf->purge_resv;
512 else
513 arch_cond.purge_resv = NO_VAL;
514
515 if (purge_period & slurmdbd_conf->purge_step)
516 arch_cond.purge_step = slurmdbd_conf->purge_step;
517 else
518 arch_cond.purge_step = NO_VAL;
519 if (purge_period & slurmdbd_conf->purge_suspend)
520 arch_cond.purge_suspend = slurmdbd_conf->purge_suspend;
521 else
522 arch_cond.purge_suspend = NO_VAL;
523 if (purge_period & slurmdbd_conf->purge_txn)
524 arch_cond.purge_txn = slurmdbd_conf->purge_txn;
525 else
526 arch_cond.purge_txn = NO_VAL;
527 if (purge_period & slurmdbd_conf->purge_usage)
528 arch_cond.purge_usage = slurmdbd_conf->purge_usage;
529 else
530 arch_cond.purge_usage = NO_VAL;
531
532 job_cond.cluster_list = list_create(NULL);
533 list_append(job_cond.cluster_list, cluster_name);
534
535 arch_cond.job_cond = &job_cond;
536 rc = as_mysql_jobacct_process_archive(mysql_conn, &arch_cond);
537 FREE_NULL_LIST(job_cond.cluster_list);
538
539 return rc;
540 }
541
_setup_cluster_tres_usage(mysql_conn_t * mysql_conn,char * cluster_name,time_t curr_start,time_t curr_end,time_t now,time_t use_start,local_tres_usage_t * loc_tres,char ** query)542 static void _setup_cluster_tres_usage(mysql_conn_t *mysql_conn,
543 char *cluster_name,
544 time_t curr_start, time_t curr_end,
545 time_t now, time_t use_start,
546 local_tres_usage_t *loc_tres,
547 char **query)
548 {
549 char start_char[20], end_char[20];
550 uint64_t total_used;
551
552 if (!loc_tres)
553 return;
554
555 /* Now put the lists into the usage tables */
556
557 /* sanity check to make sure we don't have more
558 allocated cpus than possible. */
559 if (loc_tres->total_time
560 && (loc_tres->total_time < loc_tres->time_alloc)) {
561 slurm_make_time_str(&curr_start, start_char,
562 sizeof(start_char));
563 slurm_make_time_str(&curr_end, end_char,
564 sizeof(end_char));
565 error("We have more allocated time than is "
566 "possible (%"PRIu64" > %"PRIu64") for "
567 "cluster %s(%"PRIu64") from %s - %s tres %u",
568 loc_tres->time_alloc, loc_tres->total_time,
569 cluster_name, loc_tres->count,
570 start_char, end_char, loc_tres->id);
571 loc_tres->time_alloc = loc_tres->total_time;
572 }
573
574 total_used = loc_tres->time_alloc +
575 loc_tres->time_down + loc_tres->time_pd;
576
577 /* Make sure the total time we care about
578 doesn't go over the limit */
579 if (loc_tres->total_time && (loc_tres->total_time < total_used)) {
580 int64_t overtime;
581
582 slurm_make_time_str(&curr_start, start_char,
583 sizeof(start_char));
584 slurm_make_time_str(&curr_end, end_char,
585 sizeof(end_char));
586 error("We have more time than is "
587 "possible (%"PRIu64"+%"PRIu64"+%"
588 PRIu64")(%"PRIu64") > %"PRIu64" for "
589 "cluster %s(%"PRIu64") from %s - %s tres %u",
590 loc_tres->time_alloc, loc_tres->time_down,
591 loc_tres->time_pd, total_used,
592 loc_tres->total_time,
593 cluster_name, loc_tres->count,
594 start_char, end_char, loc_tres->id);
595
596 /* First figure out how much actual down time
597 we have and then how much
598 planned down time we have. */
599 overtime = (int64_t)(loc_tres->total_time -
600 (loc_tres->time_alloc +
601 loc_tres->time_down));
602 if (overtime < 0) {
603 loc_tres->time_down += overtime;
604 if ((int64_t)loc_tres->time_down < 0)
605 loc_tres->time_down = 0;
606 }
607
608 overtime = (int64_t)(loc_tres->total_time -
609 (loc_tres->time_alloc +
610 loc_tres->time_down +
611 loc_tres->time_pd));
612 if (overtime < 0) {
613 loc_tres->time_pd += overtime;
614 if ((int64_t)loc_tres->time_pd < 0)
615 loc_tres->time_pd = 0;
616 }
617
618 total_used = loc_tres->time_alloc +
619 loc_tres->time_down + loc_tres->time_pd;
620 /* info("We now have (%"PRIu64"+%"PRIu64"+" */
621 /* "%"PRIu64")(%"PRIu64") " */
622 /* "?= %"PRIu64"", */
623 /* loc_tres->time_alloc, loc_tres->time_down, */
624 /* loc_tres->time_pd, total_used, */
625 /* loc_tres->total_time); */
626 }
627 /* info("Cluster %s now has (%"PRIu64"+%"PRIu64"+" */
628 /* "%"PRIu64")(%"PRIu64") ?= %"PRIu64"", */
629 /* cluster_name, */
630 /* c_usage->a_cpu, c_usage->d_cpu, */
631 /* c_usage->pd_cpu, total_used, */
632 /* c_usage->total_time); */
633
634 loc_tres->time_idle = loc_tres->total_time -
635 total_used - loc_tres->time_resv;
636 /* sanity check just to make sure we have a
637 * legitimate time after we calulated
638 * idle/reserved time put extra in the over
639 * commit field
640 */
641 /* info("%s got idle of %lld", loc_tres->name, */
642 /* (int64_t)loc_tres->time_idle); */
643 if ((int64_t)loc_tres->time_idle < 0) {
644 /* info("got %d %d %d", loc_tres->time_resv, */
645 /* loc_tres->time_idle, loc_tres->time_over); */
646 loc_tres->time_resv += (int64_t)loc_tres->time_idle;
647 loc_tres->time_over -= (int64_t)loc_tres->time_idle;
648 loc_tres->time_idle = 0;
649 if ((int64_t)loc_tres->time_resv < 0)
650 loc_tres->time_resv = 0;
651 }
652
653 /* info("cluster %s(%u) down %"PRIu64" alloc %"PRIu64" " */
654 /* "resv %"PRIu64" idle %"PRIu64" over %"PRIu64" " */
655 /* "total= %"PRIu64" ?= %"PRIu64" from %s", */
656 /* cluster_name, */
657 /* loc_tres->count, loc_tres->time_down, */
658 /* loc_tres->time_alloc, */
659 /* loc_tres->time_resv, loc_tres->time_idle, */
660 /* loc_tres->time_over, */
661 /* loc_tres->time_down + loc_tres->time_alloc + */
662 /* loc_tres->time_resv + loc_tres->time_idle, */
663 /* loc_tres->total_time, */
664 /* slurm_ctime2(&loc_tres->start)); */
665 /* info("to %s", slurm_ctime2(&loc_tres->end)); */
666 if (*query)
667 xstrfmtcat(*query, ", (%ld, %ld, %ld, %u, %"PRIu64", "
668 "%"PRIu64", %"PRIu64", %"PRIu64", "
669 "%"PRIu64", %"PRIu64", %"PRIu64")",
670 now, now, use_start, loc_tres->id,
671 loc_tres->count,
672 loc_tres->time_alloc,
673 loc_tres->time_down,
674 loc_tres->time_pd,
675 loc_tres->time_idle,
676 loc_tres->time_over,
677 loc_tres->time_resv);
678 else
679 xstrfmtcat(*query, "insert into \"%s_%s\" "
680 "(creation_time, mod_time, "
681 "time_start, id_tres, count, "
682 "alloc_secs, down_secs, pdown_secs, "
683 "idle_secs, over_secs, resv_secs) "
684 "values (%ld, %ld, %ld, %u, %"PRIu64", "
685 "%"PRIu64", %"PRIu64", %"PRIu64", "
686 "%"PRIu64", %"PRIu64", %"PRIu64")",
687 cluster_name, cluster_hour_table,
688 now, now,
689 use_start, loc_tres->id,
690 loc_tres->count,
691 loc_tres->time_alloc,
692 loc_tres->time_down,
693 loc_tres->time_pd,
694 loc_tres->time_idle,
695 loc_tres->time_over,
696 loc_tres->time_resv);
697
698 return;
699 }
700
_process_cluster_usage(mysql_conn_t * mysql_conn,char * cluster_name,time_t curr_start,time_t curr_end,time_t now,local_cluster_usage_t * c_usage)701 static int _process_cluster_usage(mysql_conn_t *mysql_conn,
702 char *cluster_name,
703 time_t curr_start, time_t curr_end,
704 time_t now, local_cluster_usage_t *c_usage)
705 {
706 int rc = SLURM_SUCCESS;
707 char *query = NULL;
708 ListIterator itr;
709 local_tres_usage_t *loc_tres;
710
711 if (!c_usage)
712 return rc;
713 /* Now put the lists into the usage tables */
714
715 xassert(c_usage->loc_tres);
716 itr = list_iterator_create(c_usage->loc_tres);
717 while ((loc_tres = list_next(itr))) {
718 _setup_cluster_tres_usage(mysql_conn, cluster_name,
719 curr_start, curr_end, now,
720 c_usage->start, loc_tres, &query);
721 }
722 list_iterator_destroy(itr);
723
724 if (!query)
725 return rc;
726
727 xstrfmtcat(query,
728 " on duplicate key update "
729 "mod_time=%ld, count=VALUES(count), "
730 "alloc_secs=VALUES(alloc_secs), "
731 "down_secs=VALUES(down_secs), "
732 "pdown_secs=VALUES(pdown_secs), "
733 "idle_secs=VALUES(idle_secs), "
734 "over_secs=VALUES(over_secs), "
735 "resv_secs=VALUES(resv_secs)",
736 now);
737
738 /* Spacing out the inserts here instead of doing them
739 all at once in the end proves to be faster. Just FYI
740 so we don't go testing again and again.
741 */
742 if (debug_flags & DEBUG_FLAG_DB_USAGE)
743 DB_DEBUG(mysql_conn->conn, "query\n%s", query);
744 rc = mysql_db_query(mysql_conn, query);
745 xfree(query);
746 if (rc != SLURM_SUCCESS)
747 error("Couldn't add cluster hour rollup");
748
749 return rc;
750 }
751
_create_id_usage_insert(char * cluster_name,int type,time_t curr_start,time_t now,local_id_usage_t * id_usage,char ** query)752 static void _create_id_usage_insert(char *cluster_name, int type,
753 time_t curr_start, time_t now,
754 local_id_usage_t *id_usage,
755 char **query)
756 {
757 local_tres_usage_t *loc_tres;
758 ListIterator itr;
759 bool first;
760 char *table = NULL, *id_name = NULL;
761
762 xassert(query);
763
764 switch (type) {
765 case ASSOC_TABLES:
766 id_name = "id_assoc";
767 table = assoc_hour_table;
768 break;
769 case WCKEY_TABLES:
770 id_name = "id_wckey";
771 table = wckey_hour_table;
772 break;
773 default:
774 error("_create_id_usage_insert: unknown type %d", type);
775 return;
776 break;
777 }
778
779 if (!id_usage->loc_tres || !list_count(id_usage->loc_tres)) {
780 error("%s %d doesn't have any tres", id_name, id_usage->id);
781 return;
782 }
783
784 first = 1;
785 itr = list_iterator_create(id_usage->loc_tres);
786 while ((loc_tres = list_next(itr))) {
787 if (!first) {
788 xstrfmtcat(*query,
789 ", (%ld, %ld, %u, %ld, %u, %"PRIu64")",
790 now, now,
791 id_usage->id, curr_start, loc_tres->id,
792 loc_tres->time_alloc);
793 } else {
794 xstrfmtcat(*query,
795 "insert into \"%s_%s\" "
796 "(creation_time, mod_time, id, "
797 "time_start, id_tres, alloc_secs) "
798 "values (%ld, %ld, %u, %ld, %u, %"PRIu64")",
799 cluster_name, table, now, now,
800 id_usage->id, curr_start, loc_tres->id,
801 loc_tres->time_alloc);
802 first = 0;
803 }
804 }
805 list_iterator_destroy(itr);
806 xstrfmtcat(*query,
807 " on duplicate key update mod_time=%ld, "
808 "alloc_secs=VALUES(alloc_secs);", now);
809 }
810
_add_resv_usage_to_cluster(void * object,void * arg)811 static int _add_resv_usage_to_cluster(void *object, void *arg)
812 {
813 local_resv_usage_t *r_usage = (local_resv_usage_t *)object;
814 local_cluster_usage_t *c_usage = (local_cluster_usage_t *)arg;
815
816 xassert(c_usage);
817
818 /*
819 * Only record time for the clusters that have
820 * registered, or if a reservation has the IGNORE_JOBS
821 * flag we don't have an easy way to distinguish the
822 * cpus a job not running in the reservation, but on
823 * it's cpus.
824 * We still need them for figuring out unused wall time,
825 * but for cluster utilization we will just ignore them.
826 */
827 if (r_usage->flags & RESERVE_FLAG_IGN_JOBS)
828 return SLURM_SUCCESS;
829
830 /*
831 * Since this reservation was added to the
832 * cluster and only certain people could run
833 * there we will use this as allocated time on
834 * the system. If the reservation was a
835 * maintenance then we add the time to planned
836 * down time.
837 */
838
839 _add_time_tres_list(c_usage->loc_tres,
840 r_usage->loc_tres,
841 (r_usage->flags & RESERVE_FLAG_MAINT) ?
842 TIME_PDOWN : TIME_ALLOC, 0, 0);
843
844 /* slurm_make_time_str(&r_usage->start, start_char, */
845 /* sizeof(start_char)); */
846 /* slurm_make_time_str(&r_usage->end, end_char, */
847 /* sizeof(end_char)); */
848 /* info("adding this much %lld to cluster %s " */
849 /* "%d %d %s - %s", */
850 /* r_usage->total_time, c_usage->name, */
851 /* (row_flags & RESERVE_FLAG_MAINT), */
852 /* r_usage->id, start_char, end_char); */
853
854 return SLURM_SUCCESS;
855 }
856
_setup_cluster_usage(mysql_conn_t * mysql_conn,char * cluster_name,time_t curr_start,time_t curr_end,List resv_usage_list,List cluster_down_list)857 static local_cluster_usage_t *_setup_cluster_usage(mysql_conn_t *mysql_conn,
858 char *cluster_name,
859 time_t curr_start,
860 time_t curr_end,
861 List resv_usage_list,
862 List cluster_down_list)
863 {
864 local_cluster_usage_t *c_usage = NULL;
865 char *query = NULL;
866 MYSQL_RES *result = NULL;
867 MYSQL_ROW row;
868 int i = 0;
869 ListIterator d_itr = NULL;
870 local_cluster_usage_t *loc_c_usage;
871
872 char *event_req_inx[] = {
873 "node_name",
874 "time_start",
875 "time_end",
876 "state",
877 "tres",
878 };
879 char *event_str = NULL;
880 enum {
881 EVENT_REQ_NAME,
882 EVENT_REQ_START,
883 EVENT_REQ_END,
884 EVENT_REQ_STATE,
885 EVENT_REQ_TRES,
886 EVENT_REQ_COUNT
887 };
888
889 xstrfmtcat(event_str, "%s", event_req_inx[i]);
890 for(i=1; i<EVENT_REQ_COUNT; i++) {
891 xstrfmtcat(event_str, ", %s", event_req_inx[i]);
892 }
893
894 /* first get the events during this time. All that is
895 * except things with the maintainance flag set in the
896 * state. We handle those later with the reservations.
897 */
898 query = xstrdup_printf("select %s from \"%s_%s\" where "
899 "!(state & %d) && (time_start < %ld "
900 "&& (time_end >= %ld "
901 "|| time_end = 0)) "
902 "order by node_name, time_start",
903 event_str, cluster_name, event_table,
904 NODE_STATE_MAINT,
905 curr_end, curr_start);
906 xfree(event_str);
907
908 if (debug_flags & DEBUG_FLAG_DB_USAGE)
909 DB_DEBUG(mysql_conn->conn, "query\n%s", query);
910 if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
911 xfree(query);
912 return NULL;
913 }
914
915 xfree(query);
916
917 d_itr = list_iterator_create(cluster_down_list);
918 while ((row = mysql_fetch_row(result))) {
919 time_t row_start = slurm_atoul(row[EVENT_REQ_START]);
920 time_t row_end = slurm_atoul(row[EVENT_REQ_END]);
921 uint16_t state = slurm_atoul(row[EVENT_REQ_STATE]);
922 time_t local_start, local_end;
923 int seconds;
924
925 if (row_start < curr_start)
926 row_start = curr_start;
927
928 if (!row_end || row_end > curr_end)
929 row_end = curr_end;
930
931 /* Don't worry about it if the time is less
932 * than 1 second.
933 */
934 if ((seconds = (row_end - row_start)) < 1)
935 continue;
936
937 /* this means we are a cluster registration
938 entry */
939 if (!row[EVENT_REQ_NAME][0]) {
940 local_cluster_usage_t *loc_c_usage;
941
942 /* if the cpu count changes we will
943 * only care about the last cpu count but
944 * we will keep a total of the time for
945 * all cpus to get the correct cpu time
946 * for the entire period.
947 */
948
949 if (state || !c_usage) {
950 loc_c_usage = xmalloc(
951 sizeof(local_cluster_usage_t));
952 loc_c_usage->start = row_start;
953 loc_c_usage->loc_tres =
954 list_create(_destroy_local_tres_usage);
955 /* If this has a state it
956 means the slurmctld went
957 down and we should put this
958 on the list and remove any
959 jobs from this time that
960 were running later.
961 */
962 if (state)
963 list_append(cluster_down_list,
964 loc_c_usage);
965 else
966 c_usage = loc_c_usage;
967 } else
968 loc_c_usage = c_usage;
969
970 loc_c_usage->end = row_end;
971
972 _add_tres_2_list(loc_c_usage->loc_tres,
973 row[EVENT_REQ_TRES], seconds);
974
975 continue;
976 }
977
978 /*
979 * Only record down time for the cluster we
980 * are looking for. If it was during this
981 * time period we would already have it.
982 */
983 if (!c_usage)
984 continue;
985
986 local_start = row_start;
987 local_end = row_end;
988
989 if (local_start < c_usage->start)
990 local_start = c_usage->start;
991 if (local_end > c_usage->end)
992 local_end = c_usage->end;
993
994 /* Don't worry about it if the time is less than 1 second. */
995 if ((seconds = (local_end - local_start)) < 1)
996 continue;
997
998 _add_tres_time_2_list(c_usage->loc_tres,
999 row[EVENT_REQ_TRES],
1000 TIME_DOWN,
1001 seconds, 0, 0);
1002
1003 /*
1004 * Now remove this time if there was a
1005 * disconnected slurmctld during the down time.
1006 */
1007 list_iterator_reset(d_itr);
1008 while ((loc_c_usage = list_next(d_itr))) {
1009 time_t temp_end = row_end;
1010 time_t temp_start = row_start;
1011 if (loc_c_usage->start > temp_start)
1012 temp_start = loc_c_usage->start;
1013 if (loc_c_usage->end < temp_end)
1014 temp_end = loc_c_usage->end;
1015 seconds = (temp_end - temp_start);
1016 if (seconds < 1)
1017 continue;
1018
1019 _remove_job_tres_time_from_cluster(
1020 loc_c_usage->loc_tres,
1021 c_usage->loc_tres, seconds);
1022 /* info("Node %s was down for " */
1023 /* "%d seconds while " */
1024 /* "cluster %s's slurmctld " */
1025 /* "wasn't responding", */
1026 /* row[EVENT_REQ_NAME], */
1027 /* seconds, cluster_name); */
1028 }
1029 }
1030 mysql_free_result(result);
1031
1032 list_iterator_destroy(d_itr);
1033
1034 if (c_usage)
1035 (void)list_for_each(resv_usage_list,
1036 _add_resv_usage_to_cluster,
1037 c_usage);
1038 return c_usage;
1039 }
1040
_setup_resv_usage(mysql_conn_t * mysql_conn,char * cluster_name,time_t curr_start,time_t curr_end,List resv_usage_list,int dims)1041 extern int _setup_resv_usage(mysql_conn_t *mysql_conn,
1042 char *cluster_name,
1043 time_t curr_start,
1044 time_t curr_end,
1045 List resv_usage_list,
1046 int dims)
1047 {
1048 MYSQL_RES *result = NULL;
1049 MYSQL_ROW row;
1050 int i;
1051 char *query;
1052 char *resv_str = NULL;
1053 local_resv_usage_t *r_usage = NULL;
1054 char *resv_req_inx[] = {
1055 "id_resv",
1056 "assoclist",
1057 "flags",
1058 "nodelist",
1059 "tres",
1060 "time_start",
1061 "time_end",
1062 "unused_wall"
1063 };
1064 enum {
1065 RESV_REQ_ID,
1066 RESV_REQ_ASSOCS,
1067 RESV_REQ_FLAGS,
1068 RESV_REQ_NODES,
1069 RESV_REQ_TRES,
1070 RESV_REQ_START,
1071 RESV_REQ_END,
1072 RESV_REQ_UNUSED,
1073 RESV_REQ_COUNT
1074 };
1075
1076 /* now get the reservations during this time */
1077
1078 i=0;
1079 xstrfmtcat(resv_str, "%s", resv_req_inx[i]);
1080 for(i=1; i<RESV_REQ_COUNT; i++)
1081 xstrfmtcat(resv_str, ", %s", resv_req_inx[i]);
1082
1083 query = xstrdup_printf("select %s from \"%s_%s\" where "
1084 "(time_start < %ld && time_end >= %ld) "
1085 "order by time_start",
1086 resv_str, cluster_name, resv_table,
1087 curr_end, curr_start);
1088 xfree(resv_str);
1089 if (debug_flags & DEBUG_FLAG_DB_USAGE)
1090 DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1091
1092 result = mysql_db_query_ret(mysql_conn, query, 0);
1093 xfree(query);
1094
1095 if (!result)
1096 return SLURM_ERROR;
1097
1098 /*
1099 * If a reservation overlaps another reservation we
1100 * total up everything here as if they didn't but when
1101 * calculating the total time for a cluster we will
1102 * remove the extra time received. This may result in
1103 * unexpected results with association based reports
1104 * since the association is given the total amount of
1105 * time of each reservation, thus equaling more time
1106 * than is available. Job/Cluster/Reservation reports
1107 * should be fine though since we really don't over
1108 * allocate resources. The issue with us not being
1109 * able to handle overlapping reservations here is
1110 * unless the reservation completely overlaps the
1111 * other reservation we have no idea how many cpus
1112 * should be removed since this could be a
1113 * heterogeneous system. This same problem exists
1114 * when a reservation is created with the ignore_jobs
1115 * option which will allow jobs to continue to run in the
1116 * reservation that aren't suppose to.
1117 */
1118 while ((row = mysql_fetch_row(result))) {
1119 time_t row_start = slurm_atoul(row[RESV_REQ_START]);
1120 time_t row_end = slurm_atoul(row[RESV_REQ_END]);
1121 int unused;
1122 int resv_seconds;
1123 time_t orig_start = row_start;
1124
1125 if (row_start >= curr_start) {
1126 /*
1127 * This is the first time we are seeing this
1128 * reservation, so set our unused to be 0.
1129 * This is mostly helpful when
1130 * rerolling set it back to 0.
1131 */
1132 unused = 0;
1133 } else
1134 unused = slurm_atoul(row[RESV_REQ_UNUSED]);
1135
1136 if (row_start <= curr_start)
1137 row_start = curr_start;
1138
1139 if (!row_end || row_end > curr_end)
1140 row_end = curr_end;
1141
1142 /* Don't worry about it if the time is less
1143 * than 1 second.
1144 */
1145 if ((resv_seconds = (row_end - row_start)) < 1)
1146 continue;
1147
1148 r_usage = xmalloc(sizeof(local_resv_usage_t));
1149 r_usage->flags = slurm_atoul(row[RESV_REQ_FLAGS]);
1150 r_usage->id = slurm_atoul(row[RESV_REQ_ID]);
1151
1152 r_usage->local_assocs = list_create(xfree_ptr);
1153 slurm_addto_char_list(r_usage->local_assocs,
1154 row[RESV_REQ_ASSOCS]);
1155 r_usage->loc_tres =
1156 list_create(_destroy_local_tres_usage);
1157
1158 _add_tres_2_list(r_usage->loc_tres,
1159 row[RESV_REQ_TRES], resv_seconds);
1160
1161 /*
1162 * Original start is needed when updating the
1163 * reservation's unused_wall later on.
1164 */
1165 r_usage->orig_start = orig_start;
1166 r_usage->start = row_start;
1167 r_usage->end = row_end;
1168 r_usage->unused_wall = unused + resv_seconds;
1169 r_usage->hl = hostlist_create_dims(row[RESV_REQ_NODES], dims);
1170 list_append(resv_usage_list, r_usage);
1171 }
1172 mysql_free_result(result);
1173
1174 return SLURM_SUCCESS;
1175 }
1176
as_mysql_hourly_rollup(mysql_conn_t * mysql_conn,char * cluster_name,time_t start,time_t end,uint16_t archive_data)1177 extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn,
1178 char *cluster_name,
1179 time_t start, time_t end,
1180 uint16_t archive_data)
1181 {
1182 int rc = SLURM_SUCCESS;
1183 int add_sec = 3600;
1184 int i=0, dims;
1185 time_t now = time(NULL);
1186 time_t curr_start = start;
1187 time_t curr_end = curr_start + add_sec;
1188 char *query = NULL;
1189 MYSQL_RES *result = NULL;
1190 MYSQL_ROW row;
1191 ListIterator a_itr = NULL;
1192 ListIterator c_itr = NULL;
1193 ListIterator w_itr = NULL;
1194 ListIterator r_itr = NULL;
1195 List assoc_usage_list = list_create(_destroy_local_id_usage);
1196 List cluster_down_list = list_create(_destroy_local_cluster_usage);
1197 List wckey_usage_list = list_create(_destroy_local_id_usage);
1198 List resv_usage_list = list_create(_destroy_local_resv_usage);
1199 uint16_t track_wckey = slurm_get_track_wckey();
1200 local_cluster_usage_t *loc_c_usage = NULL;
1201 local_cluster_usage_t *c_usage = NULL;
1202 local_resv_usage_t *r_usage = NULL;
1203 local_id_usage_t *a_usage = NULL;
1204 local_id_usage_t *w_usage = NULL;
1205 /* char start_char[20], end_char[20]; */
1206
1207 char *job_req_inx[] = {
1208 "job.job_db_inx",
1209 // "job.id_job",
1210 "job.id_assoc",
1211 "job.id_wckey",
1212 "job.array_task_pending",
1213 "job.time_eligible",
1214 "job.time_start",
1215 "job.time_end",
1216 "job.time_suspended",
1217 "job.cpus_req",
1218 "job.id_resv",
1219 "job.tres_alloc"
1220 };
1221 char *job_str = NULL;
1222 enum {
1223 JOB_REQ_DB_INX,
1224 // JOB_REQ_JOBID,
1225 JOB_REQ_ASSOCID,
1226 JOB_REQ_WCKEYID,
1227 JOB_REQ_ARRAY_PENDING,
1228 JOB_REQ_ELG,
1229 JOB_REQ_START,
1230 JOB_REQ_END,
1231 JOB_REQ_SUSPENDED,
1232 JOB_REQ_RCPU,
1233 JOB_REQ_RESVID,
1234 JOB_REQ_TRES,
1235 JOB_REQ_COUNT
1236 };
1237
1238 char *suspend_req_inx[] = {
1239 "time_start",
1240 "time_end"
1241 };
1242 char *suspend_str = NULL;
1243 enum {
1244 SUSPEND_REQ_START,
1245 SUSPEND_REQ_END,
1246 SUSPEND_REQ_COUNT
1247 };
1248
1249 i=0;
1250 xstrfmtcat(job_str, "%s", job_req_inx[i]);
1251 for(i=1; i<JOB_REQ_COUNT; i++) {
1252 xstrfmtcat(job_str, ", %s", job_req_inx[i]);
1253 }
1254
1255 i=0;
1256 xstrfmtcat(suspend_str, "%s", suspend_req_inx[i]);
1257 for(i=1; i<SUSPEND_REQ_COUNT; i++) {
1258 xstrfmtcat(suspend_str, ", %s", suspend_req_inx[i]);
1259 }
1260
1261 /* We need to figure out the dimensions of this cluster */
1262 query = xstrdup_printf("select dimensions from %s where name='%s'",
1263 cluster_table, cluster_name);
1264 if (debug_flags & DEBUG_FLAG_DB_USAGE)
1265 DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1266 result = mysql_db_query_ret(mysql_conn, query, 0);
1267 xfree(query);
1268
1269 if (!result) {
1270 error("%s: error querying cluster_table", __func__);
1271 rc = SLURM_ERROR;
1272 goto end_it;
1273 }
1274 row = mysql_fetch_row(result);
1275
1276 if (!row) {
1277 error("%s: no cluster by name %s known",
1278 __func__, cluster_name);
1279 rc = SLURM_ERROR;
1280 goto end_it;
1281 }
1282
1283 dims = atoi(row[0]);
1284 mysql_free_result(result);
1285
1286 /* info("begin start %s", slurm_ctime2(&curr_start)); */
1287 /* info("begin end %s", slurm_ctime2(&curr_end)); */
1288 a_itr = list_iterator_create(assoc_usage_list);
1289 c_itr = list_iterator_create(cluster_down_list);
1290 w_itr = list_iterator_create(wckey_usage_list);
1291 r_itr = list_iterator_create(resv_usage_list);
1292 while (curr_start < end) {
1293 int last_id = -1;
1294 int last_wckeyid = -1;
1295
1296 if (debug_flags & DEBUG_FLAG_DB_USAGE)
1297 DB_DEBUG(mysql_conn->conn,
1298 "%s curr hour is now %ld-%ld",
1299 cluster_name, curr_start, curr_end);
1300 /* info("start %s", slurm_ctime2(&curr_start)); */
1301 /* info("end %s", slurm_ctime2(&curr_end)); */
1302
1303 if ((rc = _setup_resv_usage(mysql_conn, cluster_name,
1304 curr_start, curr_end,
1305 resv_usage_list, dims))
1306 != SLURM_SUCCESS)
1307 goto end_it;
1308
1309 c_usage = _setup_cluster_usage(mysql_conn, cluster_name,
1310 curr_start, curr_end,
1311 resv_usage_list,
1312 cluster_down_list);
1313
1314 if (c_usage)
1315 xassert(c_usage->loc_tres);
1316
1317 /* now get the jobs during this time only */
1318 query = xstrdup_printf("select %s from \"%s_%s\" as job "
1319 "where (job.time_eligible && "
1320 "job.time_eligible < %ld && "
1321 "(job.time_end >= %ld || "
1322 "job.time_end = 0)) "
1323 "group by job.job_db_inx "
1324 "order by job.id_assoc, "
1325 "job.time_eligible",
1326 job_str, cluster_name, job_table,
1327 curr_end, curr_start);
1328
1329 if (debug_flags & DEBUG_FLAG_DB_USAGE)
1330 DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1331 if (!(result = mysql_db_query_ret(
1332 mysql_conn, query, 0))) {
1333 rc = SLURM_ERROR;
1334 goto end_it;
1335 }
1336 xfree(query);
1337
1338 while ((row = mysql_fetch_row(result))) {
1339 //uint32_t job_id = slurm_atoul(row[JOB_REQ_JOBID]);
1340 uint32_t assoc_id = slurm_atoul(row[JOB_REQ_ASSOCID]);
1341 uint32_t wckey_id = slurm_atoul(row[JOB_REQ_WCKEYID]);
1342 uint32_t array_pending =
1343 slurm_atoul(row[JOB_REQ_ARRAY_PENDING]);
1344 uint32_t resv_id = slurm_atoul(row[JOB_REQ_RESVID]);
1345 time_t row_eligible = slurm_atoul(row[JOB_REQ_ELG]);
1346 time_t row_start = slurm_atoul(row[JOB_REQ_START]);
1347 time_t row_end = slurm_atoul(row[JOB_REQ_END]);
1348 uint32_t row_rcpu = slurm_atoul(row[JOB_REQ_RCPU]);
1349 List loc_tres = NULL;
1350 int loc_seconds = 0;
1351 int seconds = 0, suspend_seconds = 0;
1352
1353 if (row_start && (row_start < curr_start))
1354 row_start = curr_start;
1355
1356 if (!row_start && row_end)
1357 row_start = row_end;
1358
1359 if (!row_end || row_end > curr_end)
1360 row_end = curr_end;
1361
1362 if (!row_start || ((row_end - row_start) < 1))
1363 goto calc_cluster;
1364
1365 seconds = (row_end - row_start);
1366
1367 if (slurm_atoul(row[JOB_REQ_SUSPENDED])) {
1368 MYSQL_RES *result2 = NULL;
1369 MYSQL_ROW row2;
1370 /* get the suspended time for this job */
1371 query = xstrdup_printf(
1372 "select %s from \"%s_%s\" where "
1373 "(time_start < %ld && (time_end >= %ld "
1374 "|| time_end = 0)) && job_db_inx=%s "
1375 "order by time_start",
1376 suspend_str, cluster_name,
1377 suspend_table,
1378 curr_end, curr_start,
1379 row[JOB_REQ_DB_INX]);
1380
1381 debug4("%d(%s:%d) query\n%s",
1382 mysql_conn->conn, THIS_FILE,
1383 __LINE__, query);
1384 if (!(result2 = mysql_db_query_ret(
1385 mysql_conn,
1386 query, 0))) {
1387 rc = SLURM_ERROR;
1388 mysql_free_result(result);
1389 goto end_it;
1390 }
1391 xfree(query);
1392 while ((row2 = mysql_fetch_row(result2))) {
1393 int tot_time = 0;
1394 time_t local_start = slurm_atoul(
1395 row2[SUSPEND_REQ_START]);
1396 time_t local_end = slurm_atoul(
1397 row2[SUSPEND_REQ_END]);
1398
1399 if (!local_start)
1400 continue;
1401
1402 if (row_start > local_start)
1403 local_start = row_start;
1404 if (!local_end || row_end < local_end)
1405 local_end = row_end;
1406 tot_time = (local_end - local_start);
1407
1408 if (tot_time > 0)
1409 suspend_seconds += tot_time;
1410 }
1411 mysql_free_result(result2);
1412 }
1413
1414 if (last_id != assoc_id) {
1415 a_usage = xmalloc(sizeof(local_id_usage_t));
1416 a_usage->id = assoc_id;
1417 list_append(assoc_usage_list, a_usage);
1418 last_id = assoc_id;
1419 /* a_usage->loc_tres is made later,
1420 don't do it here.
1421 */
1422 }
1423
1424 /* Short circuit this so so we don't get a pointer. */
1425 if (!track_wckey)
1426 last_wckeyid = wckey_id;
1427
1428 /* do the wckey calculation */
1429 if (last_wckeyid != wckey_id) {
1430 list_iterator_reset(w_itr);
1431 while ((w_usage = list_next(w_itr)))
1432 if (w_usage->id == wckey_id)
1433 break;
1434
1435 if (!w_usage) {
1436 w_usage = xmalloc(
1437 sizeof(local_id_usage_t));
1438 w_usage->id = wckey_id;
1439 list_append(wckey_usage_list,
1440 w_usage);
1441 w_usage->loc_tres = list_create(
1442 _destroy_local_tres_usage);
1443 }
1444 last_wckeyid = wckey_id;
1445 }
1446
1447 /* do the cluster allocated calculation */
1448 calc_cluster:
1449
1450 /*
1451 * We need to have this clean for each job
1452 * since we add the time to the cluster individually.
1453 */
1454 loc_tres = list_create(_destroy_local_tres_usage);
1455
1456 _add_tres_time_2_list(loc_tres, row[JOB_REQ_TRES],
1457 TIME_ALLOC, seconds,
1458 suspend_seconds, 0);
1459 if (w_usage)
1460 _add_tres_time_2_list(w_usage->loc_tres,
1461 row[JOB_REQ_TRES],
1462 TIME_ALLOC, seconds,
1463 suspend_seconds, 0);
1464
1465 /*
1466 * Now figure out there was a disconnected
1467 * slurmctld during this job.
1468 */
1469 list_iterator_reset(c_itr);
1470 while ((loc_c_usage = list_next(c_itr))) {
1471 int temp_end = row_end;
1472 int temp_start = row_start;
1473 if (loc_c_usage->start > temp_start)
1474 temp_start = loc_c_usage->start;
1475 if (loc_c_usage->end < temp_end)
1476 temp_end = loc_c_usage->end;
1477 loc_seconds = (temp_end - temp_start);
1478 if (loc_seconds < 1)
1479 continue;
1480
1481 _remove_job_tres_time_from_cluster(
1482 loc_c_usage->loc_tres,
1483 loc_tres,
1484 loc_seconds);
1485 /* info("Job %u was running for " */
1486 /* "%d seconds while " */
1487 /* "cluster %s's slurmctld " */
1488 /* "wasn't responding", */
1489 /* job_id, loc_seconds, cluster_name); */
1490 }
1491
1492 /* first figure out the reservation */
1493 if (resv_id) {
1494 if (seconds <= 0) {
1495 _transfer_loc_tres(&loc_tres, a_usage);
1496 continue;
1497 }
1498 /*
1499 * Since we have already added the entire
1500 * reservation as used time on the cluster we
1501 * only need to calculate the used time for the
1502 * reservation and then divy up the unused time
1503 * over the associations able to run in the
1504 * reservation. Since the job was to run, or ran
1505 * a reservation we don't care about eligible
1506 * time since that could totally skew the
1507 * clusters reserved time since the job may be
1508 * able to run outside of the reservation.
1509 */
1510 list_iterator_reset(r_itr);
1511 while ((r_usage = list_next(r_itr))) {
1512 int temp_end, temp_start;
1513 /*
1514 * since the reservation could have
1515 * changed in some way, thus making a
1516 * new reservation record in the
1517 * database, we have to make sure all
1518 * of the reservations are checked to
1519 * see if such a thing has happened
1520 */
1521 if (r_usage->id != resv_id)
1522 continue;
1523 temp_end = row_end;
1524 temp_start = row_start;
1525 if (r_usage->start > temp_start)
1526 temp_start =
1527 r_usage->start;
1528 if (r_usage->end < temp_end)
1529 temp_end = r_usage->end;
1530
1531 loc_seconds = (temp_end - temp_start);
1532
1533 if (loc_seconds <= 0)
1534 continue;
1535
1536 if (c_usage &&
1537 (r_usage->flags &
1538 RESERVE_FLAG_IGN_JOBS))
1539 /*
1540 * job usage was not
1541 * bundled with resv
1542 * usage so need to
1543 * account for it
1544 * individually here
1545 */
1546 _add_tres_time_2_list(
1547 c_usage->loc_tres,
1548 row[JOB_REQ_TRES],
1549 TIME_ALLOC,
1550 loc_seconds,
1551 0, 0);
1552
1553 _add_time_tres_list(
1554 r_usage->loc_tres,
1555 loc_tres, TIME_ALLOC,
1556 loc_seconds, 1);
1557 if ((rc = _update_unused_wall(
1558 r_usage,
1559 loc_tres,
1560 loc_seconds))
1561 != SLURM_SUCCESS)
1562 goto end_it;
1563 }
1564
1565 _transfer_loc_tres(&loc_tres, a_usage);
1566 continue;
1567 }
1568
1569 /*
1570 * only record time for the clusters that have
1571 * registered. This continue should rarely if
1572 * ever happen.
1573 */
1574 if (!c_usage) {
1575 _transfer_loc_tres(&loc_tres, a_usage);
1576 continue;
1577 }
1578
1579 if (row_start && (seconds > 0)) {
1580 /* info("%d assoc %d adds " */
1581 /* "(%d)(%d-%d) * %d = %d " */
1582 /* "to %d", */
1583 /* job_id, */
1584 /* a_usage->id, */
1585 /* seconds, */
1586 /* row_end, row_start, */
1587 /* row_acpu, */
1588 /* seconds * row_acpu, */
1589 /* row_acpu); */
1590
1591 _add_job_alloc_time_to_cluster(
1592 c_usage->loc_tres,
1593 loc_tres);
1594 }
1595
1596 /*
1597 * The loc_tres isn't needed after this so transfer to
1598 * the association and go on our merry way.
1599 */
1600 _transfer_loc_tres(&loc_tres, a_usage);
1601
1602 /* now reserved time */
1603 if (!row_start || (row_start >= c_usage->start)) {
1604 int temp_end = row_start;
1605 int temp_start = row_eligible;
1606 if (c_usage->start > temp_start)
1607 temp_start = c_usage->start;
1608 if (c_usage->end < temp_end)
1609 temp_end = c_usage->end;
1610 loc_seconds = (temp_end - temp_start);
1611 if (loc_seconds > 0) {
1612 /*
1613 * If we have pending jobs in an array
1614 * they haven't been inserted into the
1615 * database yet as proper job records,
1616 * so handle them here.
1617 */
1618 if (array_pending)
1619 loc_seconds *= array_pending;
1620
1621 /* info("%d assoc %d reserved " */
1622 /* "(%d)(%d-%d) * %d * %d = %d " */
1623 /* "to %d", */
1624 /* job_id, */
1625 /* assoc_id, */
1626 /* temp_end - temp_start, */
1627 /* temp_end, temp_start, */
1628 /* row_rcpu, */
1629 /* array_pending, */
1630 /* loc_seconds, */
1631 /* row_rcpu); */
1632
1633 _add_time_tres(c_usage->loc_tres,
1634 TIME_RESV, TRES_CPU,
1635 loc_seconds *
1636 (uint64_t) row_rcpu,
1637 0);
1638 }
1639 }
1640 }
1641 mysql_free_result(result);
1642
1643 /* now figure out how much more to add to the
1644 associations that could had run in the reservation
1645 */
1646 query = NULL;
1647 list_iterator_reset(r_itr);
1648 while ((r_usage = list_next(r_itr))) {
1649 ListIterator t_itr;
1650 local_tres_usage_t *loc_tres;
1651
1652 xstrfmtcat(query, "update \"%s_%s\" set unused_wall=%f where id_resv=%u and time_start=%ld;",
1653 cluster_name, resv_table,
1654 r_usage->unused_wall, r_usage->id,
1655 r_usage->orig_start);
1656
1657 if (!r_usage->loc_tres ||
1658 !list_count(r_usage->loc_tres))
1659 continue;
1660
1661 t_itr = list_iterator_create(r_usage->loc_tres);
1662 while ((loc_tres = list_next(t_itr))) {
1663 int64_t idle = loc_tres->total_time -
1664 loc_tres->time_alloc;
1665 char *assoc = NULL;
1666 ListIterator tmp_itr = NULL;
1667 int assoc_cnt, resv_unused_secs;
1668
1669 if (idle <= 0)
1670 break; /* since this will be
1671 * the same for all TRES */
1672
1673 /* now divide that time by the number of
1674 associations in the reservation and add
1675 them to each association */
1676 resv_unused_secs = idle;
1677 assoc_cnt = list_count(r_usage->local_assocs);
1678 if (assoc_cnt)
1679 resv_unused_secs /= assoc_cnt;
1680 /* info("resv %d got %d seconds for TRES %u " */
1681 /* "for %d assocs", */
1682 /* r_usage->id, resv_unused_secs, */
1683 /* loc_tres->id, */
1684 /* list_count(r_usage->local_assocs)); */
1685 tmp_itr = list_iterator_create(
1686 r_usage->local_assocs);
1687 while ((assoc = list_next(tmp_itr))) {
1688 uint32_t associd = slurm_atoul(assoc);
1689 if ((last_id != associd) &&
1690 !(a_usage = list_find_first(
1691 assoc_usage_list,
1692 _find_id_usage,
1693 &associd))) {
1694 a_usage = xmalloc(
1695 sizeof(local_id_usage_t));
1696 a_usage->id = associd;
1697 list_append(assoc_usage_list,
1698 a_usage);
1699 last_id = associd;
1700 a_usage->loc_tres = list_create(
1701 _destroy_local_tres_usage);
1702 }
1703
1704 _add_time_tres(a_usage->loc_tres,
1705 TIME_ALLOC, loc_tres->id,
1706 resv_unused_secs, 0);
1707 }
1708 list_iterator_destroy(tmp_itr);
1709 }
1710 list_iterator_destroy(t_itr);
1711 }
1712
1713 if (query) {
1714 if (debug_flags & DEBUG_FLAG_DB_USAGE)
1715 DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1716 rc = mysql_db_query(mysql_conn, query);
1717 xfree(query);
1718 if (rc != SLURM_SUCCESS) {
1719 error("couldn't update reservations with unused time");
1720 goto end_it;
1721 }
1722 }
1723
1724 /* now apply the down time from the slurmctld disconnects */
1725 if (c_usage) {
1726 list_iterator_reset(c_itr);
1727 while ((loc_c_usage = list_next(c_itr))) {
1728 local_tres_usage_t *loc_tres;
1729 ListIterator tmp_itr = list_iterator_create(
1730 loc_c_usage->loc_tres);
1731 while ((loc_tres = list_next(tmp_itr)))
1732 _add_time_tres(c_usage->loc_tres,
1733 TIME_DOWN,
1734 loc_tres->id,
1735 loc_tres->total_time,
1736 0);
1737 list_iterator_destroy(tmp_itr);
1738 }
1739
1740 if ((rc = _process_cluster_usage(
1741 mysql_conn, cluster_name, curr_start,
1742 curr_end, now, c_usage))
1743 != SLURM_SUCCESS) {
1744 goto end_it;
1745 }
1746 }
1747
1748 list_iterator_reset(a_itr);
1749 while ((a_usage = list_next(a_itr)))
1750 _create_id_usage_insert(cluster_name, ASSOC_TABLES,
1751 curr_start, now,
1752 a_usage, &query);
1753 if (query) {
1754 if (debug_flags & DEBUG_FLAG_DB_USAGE)
1755 DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1756 rc = mysql_db_query(mysql_conn, query);
1757 xfree(query);
1758 if (rc != SLURM_SUCCESS) {
1759 error("Couldn't add assoc hour rollup");
1760 goto end_it;
1761 }
1762 }
1763
1764 if (!track_wckey)
1765 goto end_loop;
1766
1767 list_iterator_reset(w_itr);
1768 while ((w_usage = list_next(w_itr)))
1769 _create_id_usage_insert(cluster_name, WCKEY_TABLES,
1770 curr_start, now,
1771 w_usage, &query);
1772 if (query) {
1773 if (debug_flags & DEBUG_FLAG_DB_USAGE)
1774 DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1775 rc = mysql_db_query(mysql_conn, query);
1776 xfree(query);
1777 if (rc != SLURM_SUCCESS) {
1778 error("Couldn't add wckey hour rollup");
1779 goto end_it;
1780 }
1781 }
1782
1783 end_loop:
1784 _destroy_local_cluster_usage(c_usage);
1785
1786 c_usage = NULL;
1787 r_usage = NULL;
1788 a_usage = NULL;
1789 w_usage = NULL;
1790
1791 list_flush(assoc_usage_list);
1792 list_flush(cluster_down_list);
1793 list_flush(wckey_usage_list);
1794 list_flush(resv_usage_list);
1795 curr_start = curr_end;
1796 curr_end = curr_start + add_sec;
1797 }
1798 end_it:
1799 xfree(query);
1800 xfree(suspend_str);
1801 xfree(job_str);
1802 _destroy_local_cluster_usage(c_usage);
1803
1804 if (a_itr)
1805 list_iterator_destroy(a_itr);
1806 if (c_itr)
1807 list_iterator_destroy(c_itr);
1808 if (w_itr)
1809 list_iterator_destroy(w_itr);
1810 if (r_itr)
1811 list_iterator_destroy(r_itr);
1812
1813 FREE_NULL_LIST(assoc_usage_list);
1814 FREE_NULL_LIST(cluster_down_list);
1815 FREE_NULL_LIST(wckey_usage_list);
1816 FREE_NULL_LIST(resv_usage_list);
1817
1818 /* info("stop start %s", slurm_ctime2(&curr_start)); */
1819 /* info("stop end %s", slurm_ctime2(&curr_end)); */
1820
1821 /* go check to see if we archive and purge */
1822
1823 if (rc == SLURM_SUCCESS) {
1824 if (mysql_db_commit(mysql_conn)) {
1825 char start[25], end[25];
1826 error("Couldn't commit cluster (%s) "
1827 "hour rollup for %s - %s",
1828 cluster_name, slurm_ctime2_r(&curr_start, start),
1829 slurm_ctime2_r(&curr_end, end));
1830 rc = SLURM_ERROR;
1831 } else
1832 rc = _process_purge(mysql_conn, cluster_name,
1833 archive_data, SLURMDB_PURGE_HOURS);
1834 }
1835
1836 return rc;
1837 }
as_mysql_nonhour_rollup(mysql_conn_t * mysql_conn,bool run_month,char * cluster_name,time_t start,time_t end,uint16_t archive_data)1838 extern int as_mysql_nonhour_rollup(mysql_conn_t *mysql_conn,
1839 bool run_month,
1840 char *cluster_name,
1841 time_t start, time_t end,
1842 uint16_t archive_data)
1843 {
1844 /* can't just add 86400 since daylight savings starts and ends every
1845 * once in a while
1846 */
1847 int rc = SLURM_SUCCESS;
1848 struct tm start_tm;
1849 time_t curr_start = start;
1850 time_t curr_end;
1851 time_t now = time(NULL);
1852 char *query = NULL;
1853 uint16_t track_wckey = slurm_get_track_wckey();
1854 char *unit_name;
1855
1856 while (curr_start < end) {
1857 if (!localtime_r(&curr_start, &start_tm)) {
1858 error("Couldn't get localtime from start %ld",
1859 curr_start);
1860 return SLURM_ERROR;
1861 }
1862 start_tm.tm_sec = 0;
1863 start_tm.tm_min = 0;
1864 start_tm.tm_hour = 0;
1865
1866 if (run_month) {
1867 unit_name = "month";
1868 start_tm.tm_mday = 1;
1869 start_tm.tm_mon++;
1870 } else {
1871 unit_name = "day";
1872 start_tm.tm_mday++;
1873 }
1874
1875 curr_end = slurm_mktime(&start_tm);
1876
1877 if (debug_flags & DEBUG_FLAG_DB_USAGE)
1878 DB_DEBUG(mysql_conn->conn,
1879 "curr %s is now %ld-%ld",
1880 unit_name, curr_start, curr_end);
1881 /* info("start %s", slurm_ctime2(&curr_start)); */
1882 /* info("end %s", slurm_ctime2(&curr_end)); */
1883 query = xstrdup_printf(
1884 "insert into \"%s_%s\" (creation_time, mod_time, id, "
1885 "id_tres, time_start, alloc_secs) "
1886 "select %ld, %ld, id, id_tres, "
1887 "%ld, @ASUM:=SUM(alloc_secs) from \"%s_%s\" where "
1888 "(time_start < %ld && time_start >= %ld) "
1889 "group by id, id_tres on duplicate key update "
1890 "mod_time=%ld, alloc_secs=@ASUM;",
1891 cluster_name,
1892 run_month ? assoc_month_table : assoc_day_table,
1893 now, now, curr_start,
1894 cluster_name,
1895 run_month ? assoc_day_table : assoc_hour_table,
1896 curr_end, curr_start, now);
1897
1898 /* We group on deleted here so if there are no entries
1899 we don't get an error, just nothing is returned.
1900 Else we get a bunch of NULL's
1901 */
1902 xstrfmtcat(query,
1903 "insert into \"%s_%s\" (creation_time, "
1904 "mod_time, time_start, id_tres, count, "
1905 "alloc_secs, down_secs, pdown_secs, "
1906 "idle_secs, over_secs, resv_secs) "
1907 "select %ld, %ld, "
1908 "%ld, id_tres, @CPU:=MAX(count), "
1909 "@ASUM:=SUM(alloc_secs), "
1910 "@DSUM:=SUM(down_secs), "
1911 "@PDSUM:=SUM(pdown_secs), "
1912 "@ISUM:=SUM(idle_secs), "
1913 "@OSUM:=SUM(over_secs), "
1914 "@RSUM:=SUM(resv_secs) from \"%s_%s\" where "
1915 "(time_start < %ld && time_start >= %ld) "
1916 "group by deleted, id_tres "
1917 "on duplicate key update "
1918 "mod_time=%ld, count=@CPU, "
1919 "alloc_secs=@ASUM, down_secs=@DSUM, "
1920 "pdown_secs=@PDSUM, idle_secs=@ISUM, "
1921 "over_secs=@OSUM, resv_secs=@RSUM;",
1922 cluster_name,
1923 run_month ? cluster_month_table : cluster_day_table,
1924 now, now, curr_start,
1925 cluster_name,
1926 run_month ? cluster_day_table : cluster_hour_table,
1927 curr_end, curr_start, now);
1928 if (track_wckey) {
1929 xstrfmtcat(query,
1930 "insert into \"%s_%s\" (creation_time, "
1931 "mod_time, id, id_tres, time_start, "
1932 "alloc_secs) "
1933 "select %ld, %ld, "
1934 "id, id_tres, %ld, @ASUM:=SUM(alloc_secs) "
1935 "from \"%s_%s\" where (time_start < %ld && "
1936 "time_start >= %ld) group by id, id_tres "
1937 "on duplicate key update "
1938 "mod_time=%ld, alloc_secs=@ASUM;",
1939 cluster_name,
1940 run_month ? wckey_month_table :
1941 wckey_day_table,
1942 now, now, curr_start,
1943 cluster_name,
1944 run_month ? wckey_day_table :
1945 wckey_hour_table,
1946 curr_end, curr_start, now);
1947 }
1948 if (debug_flags & DEBUG_FLAG_DB_USAGE)
1949 DB_DEBUG(mysql_conn->conn, "query\n%s", query);
1950 rc = mysql_db_query(mysql_conn, query);
1951 xfree(query);
1952 if (rc != SLURM_SUCCESS) {
1953 error("Couldn't add %s rollup", unit_name);
1954 return SLURM_ERROR;
1955 }
1956
1957 curr_start = curr_end;
1958 }
1959
1960 /* info("stop start %s", slurm_ctime2(&curr_start)); */
1961 /* info("stop end %s", slurm_ctime2(&curr_end)); */
1962
1963 /* go check to see if we archive and purge */
1964 rc = _process_purge(mysql_conn, cluster_name, archive_data,
1965 run_month ? SLURMDB_PURGE_MONTHS :
1966 SLURMDB_PURGE_DAYS);
1967 return rc;
1968 }
1969