1 /*****************************************************************************\ 2 * assoc_mgr.h - keep track of local cache of accounting data. 3 ***************************************************************************** 4 * Copyright (C) 2004-2007 The Regents of the University of California. 5 * Copyright (C) 2008 Lawrence Livermore National Security. 6 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 7 * Written by Danny Auble <da@llnl.gov> 8 * CODE-OCEC-09-009. All rights reserved. 9 * 10 * This file is part of Slurm, a resource management program. 11 * For details, see <https://slurm.schedmd.com/>. 12 * Please also read the included file: DISCLAIMER. 13 * 14 * Slurm is free software; you can redistribute it and/or modify it under 15 * the terms of the GNU General Public License as published by the Free 16 * Software Foundation; either version 2 of the License, or (at your option) 17 * any later version. 18 * 19 * In addition, as a special exception, the copyright holders give permission 20 * to link the code of portions of this program with the OpenSSL library under 21 * certain conditions as described in each individual source file, and 22 * distribute linked combinations including the two. You must obey the GNU 23 * General Public License in all respects for all of the code used other than 24 * OpenSSL. If you modify file(s) with this exception, you may extend this 25 * exception to your version of the file(s), but you are not obligated to do 26 * so. If you do not wish to do so, delete this exception statement from your 27 * version. If you delete this exception statement from all source files in 28 * the program, then also delete it here. 29 * 30 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY 31 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 32 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 33 * details. 34 * 35 * You should have received a copy of the GNU General Public License along 36 * with Slurm; if not, write to the Free Software Foundation, Inc., 37 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 38 ***************************************************************************** 39 * NOTE: When using lock_slurmctld() and assoc_mgr_lock(), always call 40 * lock_slurmctld() before calling assoc_mgr_lock() and then call 41 * assoc_mgr_unlock() before calling unlock_slurmctld(). 42 \*****************************************************************************/ 43 44 #ifndef _SLURM_ASSOC_MGR_H 45 #define _SLURM_ASSOC_MGR_H 46 47 #include "src/common/list.h" 48 #include "src/common/slurm_accounting_storage.h" 49 #include "src/common/slurmdbd_defs.h" 50 #include "src/slurmctld/slurmctld.h" 51 #include "src/slurmctld/locks.h" 52 #include "slurm/slurm.h" 53 #include "slurm/slurm_errno.h" 54 55 #define ASSOC_MGR_CACHE_ASSOC 0x0001 56 #define ASSOC_MGR_CACHE_QOS 0x0002 57 #define ASSOC_MGR_CACHE_USER 0x0004 58 #define ASSOC_MGR_CACHE_WCKEY 0x0008 59 #define ASSOC_MGR_CACHE_RES 0x0010 60 #define ASSOC_MGR_CACHE_TRES 0x0020 61 #define ASSOC_MGR_CACHE_ALL 0xffff 62 63 /* to lock or not */ 64 typedef struct { 65 lock_level_t assoc; 66 lock_level_t file; 67 lock_level_t qos; 68 lock_level_t res; 69 lock_level_t tres; 70 lock_level_t user; 71 lock_level_t wckey; 72 } assoc_mgr_lock_t; 73 74 typedef enum { 75 ASSOC_LOCK, 76 FILE_LOCK, 77 QOS_LOCK, 78 RES_LOCK, 79 TRES_LOCK, 80 USER_LOCK, 81 WCKEY_LOCK, 82 ASSOC_MGR_ENTITY_COUNT 83 } assoc_mgr_lock_datatype_t; 84 85 typedef struct { 86 uint16_t cache_level; 87 uint16_t enforce; 88 uint16_t *running_cache; 89 void (*add_license_notify) (slurmdb_res_rec_t *rec); 90 void (*resize_qos_notify) (void); 91 void (*remove_assoc_notify) (slurmdb_assoc_rec_t *rec); 92 void (*remove_license_notify) (slurmdb_res_rec_t *rec); 93 void (*remove_qos_notify) (slurmdb_qos_rec_t *rec); 94 char **state_save_location; 95 void (*sync_license_notify) (List clus_res_list); 96 void (*update_assoc_notify) (slurmdb_assoc_rec_t *rec); 97 void (*update_cluster_tres) (void); 98 void (*update_license_notify) (slurmdb_res_rec_t *rec); 99 void (*update_qos_notify) (slurmdb_qos_rec_t *rec); 100 void (*update_resvs) (); 101 } assoc_init_args_t; 102 103 extern List assoc_mgr_tres_list; 104 extern slurmdb_tres_rec_t **assoc_mgr_tres_array; 105 extern char **assoc_mgr_tres_name_array; 106 extern List assoc_mgr_assoc_list; 107 extern List assoc_mgr_res_list; 108 extern List assoc_mgr_qos_list; 109 extern List assoc_mgr_user_list; 110 extern List assoc_mgr_wckey_list; 111 112 extern slurmdb_assoc_rec_t *assoc_mgr_root_assoc; 113 114 extern uint32_t g_qos_max_priority; /* max priority in all qos's */ 115 extern uint32_t g_qos_count; /* count used for generating qos bitstr's */ 116 extern uint32_t g_user_assoc_count; /* Number of associations which are users */ 117 extern uint32_t g_tres_count; /* Number of TRES from the database 118 * which also is the number of elements 119 * in the assoc_mgr_tres_array */ 120 121 extern int assoc_mgr_init(void *db_conn, assoc_init_args_t *args, 122 int db_conn_errno); 123 extern int assoc_mgr_fini(bool save_state); 124 extern void assoc_mgr_lock(assoc_mgr_lock_t *locks); 125 extern void assoc_mgr_unlock(assoc_mgr_lock_t *locks); 126 127 #ifndef NDEBUG 128 extern bool verify_assoc_lock(assoc_mgr_lock_datatype_t datatype, lock_level_t level); 129 #endif 130 131 /* ran after a new tres_list is given */ 132 extern int assoc_mgr_post_tres_list(List new_list); 133 134 /* 135 * get info from the storage 136 * IN: assoc - slurmdb_assoc_rec_t with at least cluster and 137 * account set for account association. To get user 138 * association set user, and optional partition. 139 * Sets "id" field with the association ID. 140 * IN: enforce - return an error if no such association exists 141 * IN/OUT: assoc_list - contains a list of assoc_rec ptrs to 142 * associations this user has in the list. This 143 * list should be created with list_create(NULL) 144 * since we are putting pointers to memory used elsewhere. 145 * RET: SLURM_SUCCESS on success, else SLURM_ERROR 146 * 147 * NOTE: Since the returned assoc_list is full of pointers from the 148 * assoc_mgr_assoc_list assoc_mgr_lock_t READ_LOCK on 149 * associations must be set before calling this function and while 150 * handling it after a return. 151 */ 152 extern int assoc_mgr_get_user_assocs(void *db_conn, 153 slurmdb_assoc_rec_t *assoc, 154 int enforce, 155 List assoc_list); 156 157 /* 158 * get info from the storage 159 * IN/OUT: tres - slurmdb_tres_rec_t with at least id or type and 160 * optional name set. 161 * IN: enforce - return an error if no such tres exists 162 * IN/OUT: tres_pptr - if non-NULL then return a pointer to the 163 * slurmdb_tres record in cache on success 164 * DO NOT FREE. 165 * IN: locked - If you plan on using tres_pptr after this function 166 * you need to have an assoc_mgr_lock_t READ_LOCK for 167 * tres while you use it before and after the 168 * return. This is not required if using the assoc for 169 * non-pointer portions. 170 * RET: SLURM_SUCCESS on success, else SLURM_ERROR 171 */ 172 extern int assoc_mgr_fill_in_tres(void *db_conn, 173 slurmdb_tres_rec_t *tres, 174 int enforce, 175 slurmdb_tres_rec_t **tres_pptr, 176 bool locked); 177 178 /* 179 * get info from the storage 180 * IN/OUT: assoc - slurmdb_assoc_rec_t with at least cluster and 181 * account set for account association. To get user 182 * association set user, and optional partition. 183 * Sets "id" field with the association ID. 184 * IN: enforce - return an error if no such association exists 185 * IN/OUT: assoc_pptr - if non-NULL then return a pointer to the 186 * slurmdb_assoc record in cache on success 187 * DO NOT FREE. 188 * IN: locked - If you plan on using assoc_pptr after this function 189 * you need to have an assoc_mgr_lock_t READ_LOCK for 190 * associations and users while you use it before and after the 191 * return. This is not required if using the assoc for 192 * non-pointer portions. 193 * RET: SLURM_SUCCESS on success, else SLURM_ERROR 194 */ 195 extern int assoc_mgr_fill_in_assoc(void *db_conn, 196 slurmdb_assoc_rec_t *assoc, 197 int enforce, 198 slurmdb_assoc_rec_t **assoc_pptr, 199 bool locked); 200 201 /* 202 * get info from the storage 203 * IN/OUT: user - slurmdb_user_rec_t with the name set of the user. 204 * "default_account" will be filled in on 205 * successful return DO NOT FREE. 206 * IN/OUT: user_pptr - if non-NULL then return a pointer to the 207 * slurmdb_user record in cache on success 208 * DO NOT FREE. 209 * IN: locked - If you plan on using user_pptr outside 210 * this function you need to have an assoc_mgr_lock_t 211 * READ_LOCK for User while you use it before and after the 212 * return. This is not required if using the assoc for 213 * non-pointer portions. 214 * RET: SLURM_SUCCESS on success SLURM_ERROR else 215 */ 216 extern int assoc_mgr_fill_in_user(void *db_conn, slurmdb_user_rec_t *user, 217 int enforce, 218 slurmdb_user_rec_t **user_pptr, bool locked); 219 220 /* 221 * get info from the storage 222 * IN/OUT: qos - slurmdb_qos_rec_t with the id set of the qos. 223 * IN/OUT: qos_pptr - if non-NULL then return a pointer to the 224 * slurmdb_qos record in cache on success 225 * DO NOT FREE. 226 * IN: locked - If you plan on using qos_pptr, or g_qos_count outside 227 * this function you need to have an assoc_mgr_lock_t 228 * READ_LOCK for QOS while you use it before and after the 229 * return. This is not required if using the assoc for 230 * non-pointer portions. 231 * RET: SLURM_SUCCESS on success SLURM_ERROR else 232 */ 233 extern int assoc_mgr_fill_in_qos(void *db_conn, slurmdb_qos_rec_t *qos, 234 int enforce, 235 slurmdb_qos_rec_t **qos_pptr, bool locked); 236 /* 237 * get info from the storage 238 * IN/OUT: wckey - slurmdb_wckey_rec_t with the name, cluster and user 239 * for the wckey association. 240 * Sets "id" field with the wckey ID. 241 * IN: enforce - return an error if no such wckey exists 242 * IN/OUT: wckey_pptr - if non-NULL then return a pointer to the 243 * slurmdb_wckey record in cache on success 244 * IN: locked - If you plan on using wckey_pptr outside 245 * this function you need to have an assoc_mgr_lock_t 246 * READ_LOCK for WCKey and Users while you use it before and after 247 * the return. This is not required if using the assoc for 248 * non-pointer portions. 249 * RET: SLURM_SUCCESS on success, else SLURM_ERROR 250 */ 251 extern int assoc_mgr_fill_in_wckey(void *db_conn, 252 slurmdb_wckey_rec_t *wckey, 253 int enforce, 254 slurmdb_wckey_rec_t **wckey_pptr, 255 bool locked); 256 257 /* 258 * get admin_level of uid 259 * IN: uid - uid of user to check admin_level of. 260 * RET: admin level SLURMDB_ADMIN_NOTSET on error 261 */ 262 extern slurmdb_admin_level_t assoc_mgr_get_admin_level(void *db_conn, 263 uint32_t uid); 264 265 /* 266 * see if user is coordinator of given acct 267 * IN: uid - uid of user to check. 268 * IN: acct - name of account 269 * RET: true or false 270 */ 271 extern bool assoc_mgr_is_user_acct_coord(void *db_conn, uint32_t uid, 272 char *acct); 273 274 /* 275 * get the share information from the association list 276 * IN: uid: uid_t of user issuing the request 277 * IN: req_msg: info about request 278 * IN/OUT: resp_msg: message filled in with assoc_mgr info 279 */ 280 extern void assoc_mgr_get_shares(void *db_conn, 281 uid_t uid, shares_request_msg_t *req_msg, 282 shares_response_msg_t *resp_msg); 283 284 /* 285 * get the state of the association manager and pack it up in buffer 286 * OUT buffer_ptr - the pointer is set to the allocated buffer. 287 * OUT buffer_size - set to size of the buffer in bytes 288 * IN: msg: request for various states 289 * IN: uid: uid_t of user issuing the request 290 * IN: db_conn: needed if not already connected to the database or DBD 291 * IN: protocol_version: version of Slurm we are sending to. 292 */ 293 extern void assoc_mgr_info_get_pack_msg( 294 char **buffer_ptr, int *buffer_size, 295 assoc_mgr_info_request_msg_t *msg, uid_t uid, 296 void *db_conn, uint16_t protocol_version); 297 298 /* 299 * unpack the packing of the above assoc_mgr_get_pack_state_msg function. 300 * OUT: object - what to unpack into 301 * IN: buffer - buffer to unpack 302 * IN: version of Slurm this is packed in 303 * RET: SLURM_SUCCESS on SUCCESS, SLURM_ERROR else 304 */ 305 extern int assoc_mgr_info_unpack_msg( 306 assoc_mgr_info_msg_t **object, Buf buffer, uint16_t protocol_version); 307 308 /* 309 * assoc_mgr_update - update the association manager 310 * IN update_list: updates to perform 311 * IN locked: if appropriate write locks are locked before calling or not 312 * RET: error code 313 * NOTE: the items in update_list are not deleted 314 */ 315 extern int assoc_mgr_update(List update_list, bool locked); 316 317 /* 318 * update associations in cache 319 * IN: slurmdb_update_object_t *object 320 * IN locked: if appropriate write locks are locked before calling or not 321 * RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else 322 */ 323 extern int assoc_mgr_update_assocs(slurmdb_update_object_t *update, 324 bool locked); 325 326 /* 327 * update wckeys in cache 328 * IN: slurmdb_update_object_t *object 329 * IN locked: if appropriate write locks are locked before calling or not 330 * RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else 331 */ 332 extern int assoc_mgr_update_wckeys(slurmdb_update_object_t *update, 333 bool locked); 334 335 /* 336 * update qos in cache 337 * IN: slurmdb_update_object_t *object 338 * IN locked: if appropriate write locks are locked before calling or not 339 * RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else 340 */ 341 extern int assoc_mgr_update_qos(slurmdb_update_object_t *update, 342 bool locked); 343 344 /* 345 * update cluster resources in cache 346 * IN: slurmdb_update_object_t *object 347 * IN locked: if appropriate write locks are locked before calling or not 348 * RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else 349 */ 350 extern int assoc_mgr_update_res(slurmdb_update_object_t *update, 351 bool locked); 352 353 /* 354 * update cluster tres in cache 355 * IN: slurmdb_update_object_t *object 356 * IN locked: if appropriate write locks are locked before calling or not 357 * RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else 358 */ 359 extern int assoc_mgr_update_tres(slurmdb_update_object_t *update, 360 bool locked); 361 362 /* 363 * update users in cache 364 * IN: slurmdb_update_object_t *object 365 * IN locked: if appropriate write locks are locked before calling or not 366 * RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else 367 */ 368 extern int assoc_mgr_update_users(slurmdb_update_object_t *update, 369 bool locked); 370 371 /* 372 * validate that an association ID is still valid 373 * IN: assoc_id - association ID previously returned by 374 * get_assoc_id(void *db_conn, 375 ) 376 * RET: SLURM_SUCCESS on success SLURM_ERROR else 377 */ 378 extern int assoc_mgr_validate_assoc_id(void *db_conn, 379 uint32_t assoc_id, 380 int enforce); 381 382 /* 383 * clear the used_* fields from every association, 384 * used on reconfiguration 385 */ 386 extern void assoc_mgr_clear_used_info(void); 387 388 /* 389 * Remove the association's accumulated usage 390 * IN: slurmdb_assoc_rec_t *assoc 391 * RET: SLURM_SUCCESS on success or else SLURM_ERROR 392 */ 393 extern void assoc_mgr_remove_assoc_usage(slurmdb_assoc_rec_t *assoc); 394 395 /* 396 * Remove the QOS's accumulated usage 397 * IN: slurmdb_qos_rec_t *qos 398 * RET: SLURM_SUCCESS on success or else SLURM_ERROR 399 */ 400 extern void assoc_mgr_remove_qos_usage(slurmdb_qos_rec_t *qos); 401 402 /* 403 * Dump the state information of the association mgr just in case the 404 * database isn't up next time we run. 405 */ 406 extern int dump_assoc_mgr_state(void); 407 408 /* 409 * Read in the past usage for associations. 410 */ 411 extern int load_assoc_usage(void); 412 413 /* 414 * Read in the past usage for qos. 415 */ 416 extern int load_qos_usage(void); 417 418 /* 419 * Read in the past tres list. 420 */ 421 extern int load_assoc_mgr_last_tres(void); 422 423 /* 424 * Read in the information of the association mgr if the database 425 * isn't up when starting. 426 */ 427 extern int load_assoc_mgr_state(bool only_tres); 428 429 /* 430 * Refresh the lists if when running_cache is set this will load new 431 * information from the database (if any) and update the cached list. 432 */ 433 extern int assoc_mgr_refresh_lists(void *db_conn, uint16_t cache_level); 434 435 /* 436 * Sets the uids of users added to the system after the start of the 437 * calling program. 438 */ 439 extern int assoc_mgr_set_missing_uids(); 440 441 /* Normalize shares for an association. External so a priority plugin 442 * can call it if needed. 443 */ 444 extern void assoc_mgr_normalize_assoc_shares(slurmdb_assoc_rec_t *assoc); 445 446 /* 447 * Find the position of the given TRES ID or type/name in the 448 * assoc_mgr_tres_array. If the TRES name or ID isn't found -1 is returned. 449 */ 450 extern int assoc_mgr_find_tres_pos(slurmdb_tres_rec_t *tres_rec, bool locked); 451 452 /* 453 * Find the position of the given TRES name in the 454 * assoc_mgr_tres_array. Ignore anything after ":" in the TRES name. 455 * So tres_rec->name of "gpu" can match accounting TRES name of "gpu:tesla". 456 * If the TRES name isn't found -1 is returned. 457 */ 458 extern int assoc_mgr_find_tres_pos2(slurmdb_tres_rec_t *tres_rec, bool locked); 459 460 /* 461 * Calls assoc_mgr_find_tres_pos and returns the pointer in the 462 * assoc_mgr_tres_array. 463 * NOTE: The assoc_mgr tres read lock needs to be locked before calling this 464 * function and while using the returned record. 465 */ 466 extern slurmdb_tres_rec_t *assoc_mgr_find_tres_rec( 467 slurmdb_tres_rec_t *tres_rec); 468 469 /* 470 * Calls assoc_mgr_find_tres_pos and returns the pointer in the 471 * assoc_mgr_tres_array. Ignores GRES "type" option. 472 * NOTE: The assoc_mgr tres read lock needs to be locked before calling this 473 * function and while using the returned record. 474 */ 475 extern slurmdb_tres_rec_t *assoc_mgr_find_tres_rec2( 476 slurmdb_tres_rec_t *tres_rec); 477 478 /* fills in allocates and sets tres_cnt based off tres_str 479 * OUT tres_cnt - array to be filled in g_tres_cnt in length 480 * IN tres_str - simple format of tres used with id and count set 481 * IN init_val - what the initial value is going to be set to 482 * IN locked - if the assoc_mgr tres read lock is locked or not. 483 * RET if positions changed in array from string 1 if nothing changed 0 484 */ 485 extern int assoc_mgr_set_tres_cnt_array(uint64_t **tres_cnt, char *tres_str, 486 uint64_t init_val, bool locked); 487 488 /* Creates all the tres arrays for an association. 489 * NOTE: The assoc_mgr tres read lock needs to be locked before this 490 * is called. */ 491 extern void assoc_mgr_set_assoc_tres_cnt(slurmdb_assoc_rec_t *assoc); 492 493 /* Creates all the tres arrays for a QOS. 494 * NOTE: The assoc_mgr tres read lock needs to be locked before this 495 * is called. */ 496 extern void assoc_mgr_set_qos_tres_cnt(slurmdb_qos_rec_t *qos); 497 498 /* Make a simple tres string from a tres count array. 499 * IN tres_cnt - counts of each tres used 500 * IN flags - TRES_STR_FLAG_SIMPLE or 0 for formatted string 501 * IN locked - if the assoc_mgr tres read lock is locked or not. 502 * RET char * of simple tres string 503 */ 504 extern char *assoc_mgr_make_tres_str_from_array( 505 uint64_t *tres_cnt, uint32_t flags, bool locked); 506 507 /* Fill in the default qos id or name given an association record. If 508 * none is given it gives the default qos for the system. 509 * IN/OUT: qos_rec - fills in the name or id of the default qos 510 * 511 * NOTE: READ lock needs to be set on associations and QOS before 512 * calling this. */ 513 extern void assoc_mgr_get_default_qos_info( 514 slurmdb_assoc_rec_t *assoc_ptr, slurmdb_qos_rec_t *qos_rec); 515 516 /* 517 * Calculate a weighted tres value. 518 * IN: tres_cnt - array of tres values of size g_tres_count. 519 * IN: weights - weights to apply to tres values of size g_tres_count. 520 * IN: flags - priority flags (toogle between MAX or SUM of tres). 521 * IN: locked - whether the tres read assoc mgr lock is locked or not. 522 * RET: returns the calculated tres weight. 523 */ 524 extern double assoc_mgr_tres_weighted(uint64_t *tres_cnt, double *weights, 525 uint16_t flags, bool locked); 526 527 /* Get TRES's old position. 528 * IN: cur_pos - the current position in the tres array. 529 */ 530 extern int assoc_mgr_get_old_tres_pos(int cur_pos); 531 532 /* Test whether the tres positions have changed since last reading the tres 533 * list. 534 */ 535 extern int assoc_mgr_tres_pos_changed(); 536 537 #endif /* _SLURM_ASSOC_MGR_H */ 538