1 /*****************************************************************************\
2 * slurm_acct_gather_filesystem.c - implementation-independent job filesystem
3 * accounting plugin definitions
4 *****************************************************************************
5 * Copyright (C) 2013 Bull.
6 * Written by Yiannis Georgiou <yiannis.georgiou@bull.net>
7 *
8 * This file is part of Slurm, a resource management program.
9 * For details, see <https://slurm.schedmd.com>.
10 * Please also read the included file: DISCLAIMER.
11 *
12 * Slurm is free software; you can redistribute it and/or modify it under
13 * the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 2 of the License, or (at your option)
15 * any later version.
16 *
17 * In addition, as a special exception, the copyright holders give permission
18 * to link the code of portions of this program with the OpenSSL library under
19 * certain conditions as described in each individual source file, and
20 * distribute linked combinations including the two. You must obey the GNU
21 * General Public License in all respects for all of the code used other than
22 * OpenSSL. If you modify file(s) with this exception, you may extend this
23 * exception to your version of the file(s), but you are not obligated to do
24 * so. If you do not wish to do so, delete this exception statement from your
25 * version. If you delete this exception statement from all source files in
26 * the program, then also delete it here.
27 *
28 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
29 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
31 * details.
32 *
33 * You should have received a copy of the GNU General Public License along
34 * with Slurm; if not, write to the Free Software Foundation, Inc.,
35 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
36 \*****************************************************************************/
37
38 #ifdef HAVE_CONFIG_H
39 # include "config.h"
40 #endif
41
42 #include <pthread.h>
43 #include <stdlib.h>
44 #include <string.h>
45
46 #if HAVE_SYS_PRCTL_H
47 # include <sys/prctl.h>
48 #endif
49
50 #include "src/common/macros.h"
51 #include "src/common/plugin.h"
52 #include "src/common/plugrack.h"
53 #include "src/common/slurm_protocol_api.h"
54 #include "src/common/xmalloc.h"
55 #include "src/common/xstring.h"
56 #include "src/common/slurm_acct_gather_filesystem.h"
57 #include "src/slurmd/slurmstepd/slurmstepd_job.h"
58
59 typedef struct slurm_acct_gather_filesystem_ops {
60 int (*node_update) (void);
61 void (*conf_options) (s_p_options_t **full_options,
62 int *full_options_cnt);
63 void (*conf_set) (s_p_hashtbl_t *tbl);
64 void (*conf_values) (List *data);
65 int (*get_data) (acct_gather_data_t *data);
66 } slurm_acct_gather_filesystem_ops_t;
67 /*
68 * These strings must be kept in the same order as the fields
69 * declared for slurm_acct_gather_filesystem_ops_t.
70 */
71 static const char *syms[] = {
72 "acct_gather_filesystem_p_node_update",
73 "acct_gather_filesystem_p_conf_options",
74 "acct_gather_filesystem_p_conf_set",
75 "acct_gather_filesystem_p_conf_values",
76 "acct_gather_filesystem_p_get_data",
77 };
78
79 static slurm_acct_gather_filesystem_ops_t ops;
80 static plugin_context_t *g_context = NULL;
81 static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER;
82 static bool init_run = false;
83 static bool acct_shutdown = true;
84 static int freq = 0;
85 static pthread_t watch_node_thread_id = 0;
86 static acct_gather_profile_timer_t *profile_timer =
87 &acct_gather_profile_timer[PROFILE_FILESYSTEM];
88
_watch_node(void * arg)89 static void *_watch_node(void *arg)
90 {
91 #if HAVE_SYS_PRCTL_H
92 if (prctl(PR_SET_NAME, "acctg_fs", NULL, NULL, NULL) < 0) {
93 error("%s: cannot set my name to %s %m", __func__, "acctg_fs");
94 }
95 #endif
96
97 while (init_run && acct_gather_profile_test()) {
98 /* Do this until shutdown is requested */
99 slurm_mutex_lock(&g_context_lock);
100 (*(ops.node_update))();
101 slurm_mutex_unlock(&g_context_lock);
102
103 slurm_mutex_lock(&profile_timer->notify_mutex);
104 slurm_cond_wait(&profile_timer->notify,
105 &profile_timer->notify_mutex);
106 slurm_mutex_unlock(&profile_timer->notify_mutex);
107 }
108 return NULL;
109 }
110
acct_gather_filesystem_init(void)111 extern int acct_gather_filesystem_init(void)
112 {
113 int retval = SLURM_SUCCESS;
114 char *plugin_type = "acct_gather_filesystem";
115 char *type = NULL;
116
117 if (init_run && g_context)
118 return retval;
119
120 slurm_mutex_lock(&g_context_lock);
121
122 if (g_context)
123 goto done;
124
125 type = slurm_get_acct_gather_filesystem_type();
126
127 g_context = plugin_context_create(
128 plugin_type, type, (void **)&ops, syms, sizeof(syms));
129
130 if (!g_context) {
131 error("cannot create %s context for %s", plugin_type, type);
132 retval = SLURM_ERROR;
133 goto done;
134 }
135 init_run = true;
136
137 done:
138 slurm_mutex_unlock(&g_context_lock);
139 if (retval == SLURM_SUCCESS)
140 retval = acct_gather_conf_init();
141 if (retval != SLURM_SUCCESS)
142 fatal("can not open the %s plugin", type);
143 xfree(type);
144
145 return retval;
146 }
147
acct_gather_filesystem_fini(void)148 extern int acct_gather_filesystem_fini(void)
149 {
150 int rc = SLURM_SUCCESS;
151
152 slurm_mutex_lock(&g_context_lock);
153 if (g_context) {
154 init_run = false;
155
156 if (watch_node_thread_id) {
157 slurm_mutex_unlock(&g_context_lock);
158 slurm_mutex_lock(&profile_timer->notify_mutex);
159 slurm_cond_signal(&profile_timer->notify);
160 slurm_mutex_unlock(&profile_timer->notify_mutex);
161 pthread_join(watch_node_thread_id, NULL);
162 slurm_mutex_lock(&g_context_lock);
163 }
164
165 rc = plugin_context_destroy(g_context);
166 g_context = NULL;
167 }
168 slurm_mutex_unlock(&g_context_lock);
169
170 return rc;
171 }
172
173 /*
174 * This is sent an array that will be filled in from the plugin(s). It is not a
175 * direct pointer since we could have (in the future) this be stackable.
176 */
acct_gather_filesystem_g_get_data(acct_gather_data_t * data)177 extern int acct_gather_filesystem_g_get_data(acct_gather_data_t *data)
178 {
179 int retval = SLURM_SUCCESS;
180
181 if (acct_gather_filesystem_init() < 0)
182 return SLURM_ERROR;
183 retval = (*(ops.get_data))(data);
184 return retval;
185 }
186
acct_gather_filesystem_startpoll(uint32_t frequency)187 extern int acct_gather_filesystem_startpoll(uint32_t frequency)
188 {
189 int retval = SLURM_SUCCESS;
190
191 if (acct_gather_filesystem_init() < 0)
192 return SLURM_ERROR;
193
194 if (!acct_shutdown) {
195 error("acct_gather_filesystem_startpoll: "
196 "poll already started!");
197 return retval;
198 }
199
200 acct_shutdown = false;
201
202 freq = frequency;
203
204 if (frequency == 0) { /* don't want dynamic monitoring? */
205 debug2("acct_gather_filesystem dynamic logging disabled");
206 return retval;
207 }
208
209 /* create polling thread */
210 slurm_thread_create(&watch_node_thread_id, _watch_node, NULL);
211
212 debug3("acct_gather_filesystem dynamic logging enabled");
213
214 return retval;
215 }
216
217
acct_gather_filesystem_g_conf_options(s_p_options_t ** full_options,int * full_options_cnt)218 extern int acct_gather_filesystem_g_conf_options(s_p_options_t **full_options,
219 int *full_options_cnt)
220 {
221 if (acct_gather_filesystem_init() < 0)
222 return SLURM_ERROR;
223 (*(ops.conf_options))(full_options, full_options_cnt);
224 return SLURM_SUCCESS;
225 }
226
acct_gather_filesystem_g_conf_set(s_p_hashtbl_t * tbl)227 extern int acct_gather_filesystem_g_conf_set(s_p_hashtbl_t *tbl)
228 {
229 if (acct_gather_filesystem_init() < 0)
230 return SLURM_ERROR;
231
232 (*(ops.conf_set))(tbl);
233 return SLURM_SUCCESS;
234 }
235
236
acct_gather_filesystem_g_conf_values(void * data)237 extern int acct_gather_filesystem_g_conf_values(void *data)
238 {
239 if (acct_gather_filesystem_init() < 0)
240 return SLURM_ERROR;
241
242 (*(ops.conf_values))(data);
243 return SLURM_SUCCESS;
244 }
245