1 /*****************************************************************************\
2  *  slurm_acct_gather_filesystem.c - implementation-independent job filesystem
3  *  accounting plugin definitions
4  *****************************************************************************
5  *  Copyright (C) 2013 Bull.
6  *  Written by Yiannis Georgiou <yiannis.georgiou@bull.net>
7  *
8  *  This file is part of Slurm, a resource management program.
9  *  For details, see <https://slurm.schedmd.com>.
10  *  Please also read the included file: DISCLAIMER.
11  *
12  *  Slurm is free software; you can redistribute it and/or modify it under
13  *  the terms of the GNU General Public License as published by the Free
14  *  Software Foundation; either version 2 of the License, or (at your option)
15  *  any later version.
16  *
17  *  In addition, as a special exception, the copyright holders give permission
18  *  to link the code of portions of this program with the OpenSSL library under
19  *  certain conditions as described in each individual source file, and
20  *  distribute linked combinations including the two. You must obey the GNU
21  *  General Public License in all respects for all of the code used other than
22  *  OpenSSL. If you modify file(s) with this exception, you may extend this
23  *  exception to your version of the file(s), but you are not obligated to do
24  *  so. If you do not wish to do so, delete this exception statement from your
25  *  version.  If you delete this exception statement from all source files in
26  *  the program, then also delete it here.
27  *
28  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
29  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
31  *  details.
32  *
33  *  You should have received a copy of the GNU General Public License along
34  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
35  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
36 \*****************************************************************************/
37 
38 #ifdef HAVE_CONFIG_H
39 #  include "config.h"
40 #endif
41 
42 #include <pthread.h>
43 #include <stdlib.h>
44 #include <string.h>
45 
46 #if HAVE_SYS_PRCTL_H
47 #  include <sys/prctl.h>
48 #endif
49 
50 #include "src/common/macros.h"
51 #include "src/common/plugin.h"
52 #include "src/common/plugrack.h"
53 #include "src/common/slurm_protocol_api.h"
54 #include "src/common/xmalloc.h"
55 #include "src/common/xstring.h"
56 #include "src/common/slurm_acct_gather_filesystem.h"
57 #include "src/slurmd/slurmstepd/slurmstepd_job.h"
58 
59 typedef struct slurm_acct_gather_filesystem_ops {
60 	int (*node_update)	(void);
61 	void (*conf_options)	(s_p_options_t **full_options,
62 				 int *full_options_cnt);
63 	void (*conf_set)	(s_p_hashtbl_t *tbl);
64 	void (*conf_values)        (List *data);
65 	int (*get_data)		(acct_gather_data_t *data);
66 } slurm_acct_gather_filesystem_ops_t;
67 /*
68  * These strings must be kept in the same order as the fields
69  * declared for slurm_acct_gather_filesystem_ops_t.
70  */
71 static const char *syms[] = {
72 	"acct_gather_filesystem_p_node_update",
73 	"acct_gather_filesystem_p_conf_options",
74 	"acct_gather_filesystem_p_conf_set",
75 	"acct_gather_filesystem_p_conf_values",
76 	"acct_gather_filesystem_p_get_data",
77 };
78 
79 static slurm_acct_gather_filesystem_ops_t ops;
80 static plugin_context_t *g_context = NULL;
81 static pthread_mutex_t g_context_lock =	PTHREAD_MUTEX_INITIALIZER;
82 static bool init_run = false;
83 static bool acct_shutdown = true;
84 static int freq = 0;
85 static pthread_t watch_node_thread_id = 0;
86 static acct_gather_profile_timer_t *profile_timer =
87 	&acct_gather_profile_timer[PROFILE_FILESYSTEM];
88 
_watch_node(void * arg)89 static void *_watch_node(void *arg)
90 {
91 #if HAVE_SYS_PRCTL_H
92 	if (prctl(PR_SET_NAME, "acctg_fs", NULL, NULL, NULL) < 0) {
93 		error("%s: cannot set my name to %s %m", __func__, "acctg_fs");
94 	}
95 #endif
96 
97 	while (init_run && acct_gather_profile_test()) {
98 		/* Do this until shutdown is requested */
99 		slurm_mutex_lock(&g_context_lock);
100 		(*(ops.node_update))();
101 		slurm_mutex_unlock(&g_context_lock);
102 
103 		slurm_mutex_lock(&profile_timer->notify_mutex);
104 		slurm_cond_wait(&profile_timer->notify,
105 				&profile_timer->notify_mutex);
106 		slurm_mutex_unlock(&profile_timer->notify_mutex);
107 	}
108 	return NULL;
109 }
110 
acct_gather_filesystem_init(void)111 extern int acct_gather_filesystem_init(void)
112 {
113 	int retval = SLURM_SUCCESS;
114 	char *plugin_type = "acct_gather_filesystem";
115 	char *type = NULL;
116 
117 	if (init_run && g_context)
118 		return retval;
119 
120 	slurm_mutex_lock(&g_context_lock);
121 
122 	if (g_context)
123 		goto done;
124 
125 	type = slurm_get_acct_gather_filesystem_type();
126 
127 	g_context = plugin_context_create(
128 		plugin_type, type, (void **)&ops, syms, sizeof(syms));
129 
130 	if (!g_context) {
131 		error("cannot create %s context for %s", plugin_type, type);
132 		retval = SLURM_ERROR;
133 		goto done;
134 	}
135 	init_run = true;
136 
137 done:
138 	slurm_mutex_unlock(&g_context_lock);
139 	if (retval == SLURM_SUCCESS)
140                 retval = acct_gather_conf_init();
141 	if (retval != SLURM_SUCCESS)
142 		fatal("can not open the %s plugin", type);
143 	xfree(type);
144 
145 	return retval;
146 }
147 
acct_gather_filesystem_fini(void)148 extern int acct_gather_filesystem_fini(void)
149 {
150 	int rc = SLURM_SUCCESS;
151 
152 	slurm_mutex_lock(&g_context_lock);
153 	if (g_context) {
154 		init_run = false;
155 
156 		if (watch_node_thread_id) {
157 			slurm_mutex_unlock(&g_context_lock);
158 			slurm_mutex_lock(&profile_timer->notify_mutex);
159 			slurm_cond_signal(&profile_timer->notify);
160 			slurm_mutex_unlock(&profile_timer->notify_mutex);
161 			pthread_join(watch_node_thread_id, NULL);
162 			slurm_mutex_lock(&g_context_lock);
163 		}
164 
165 		rc = plugin_context_destroy(g_context);
166 		g_context = NULL;
167 	}
168 	slurm_mutex_unlock(&g_context_lock);
169 
170 	return rc;
171 }
172 
173 /*
174  * This is sent an array that will be filled in from the plugin(s).  It is not a
175  * direct pointer since we could have (in the future) this be stackable.
176  */
acct_gather_filesystem_g_get_data(acct_gather_data_t * data)177 extern int acct_gather_filesystem_g_get_data(acct_gather_data_t *data)
178 {
179 	int retval = SLURM_SUCCESS;
180 
181 	if (acct_gather_filesystem_init() < 0)
182 		return SLURM_ERROR;
183 	retval = (*(ops.get_data))(data);
184 	return retval;
185 }
186 
acct_gather_filesystem_startpoll(uint32_t frequency)187 extern int acct_gather_filesystem_startpoll(uint32_t frequency)
188 {
189 	int retval = SLURM_SUCCESS;
190 
191 	if (acct_gather_filesystem_init() < 0)
192 		return SLURM_ERROR;
193 
194 	if (!acct_shutdown) {
195 		error("acct_gather_filesystem_startpoll: "
196 		      "poll already started!");
197 		return retval;
198 	}
199 
200 	acct_shutdown = false;
201 
202 	freq = frequency;
203 
204 	if (frequency == 0) {   /* don't want dynamic monitoring? */
205 		debug2("acct_gather_filesystem dynamic logging disabled");
206 		return retval;
207 	}
208 
209 	/* create polling thread */
210 	slurm_thread_create(&watch_node_thread_id, _watch_node, NULL);
211 
212 	debug3("acct_gather_filesystem dynamic logging enabled");
213 
214 	return retval;
215 }
216 
217 
acct_gather_filesystem_g_conf_options(s_p_options_t ** full_options,int * full_options_cnt)218 extern int acct_gather_filesystem_g_conf_options(s_p_options_t **full_options,
219 						  int *full_options_cnt)
220 {
221         if (acct_gather_filesystem_init() < 0)
222 		return SLURM_ERROR;
223         (*(ops.conf_options))(full_options, full_options_cnt);
224 	return SLURM_SUCCESS;
225 }
226 
acct_gather_filesystem_g_conf_set(s_p_hashtbl_t * tbl)227 extern int acct_gather_filesystem_g_conf_set(s_p_hashtbl_t *tbl)
228 {
229         if (acct_gather_filesystem_init() < 0)
230 		return SLURM_ERROR;
231 
232         (*(ops.conf_set))(tbl);
233 	return SLURM_SUCCESS;
234 }
235 
236 
acct_gather_filesystem_g_conf_values(void * data)237 extern int acct_gather_filesystem_g_conf_values(void *data)
238 {
239 	if (acct_gather_filesystem_init() < 0)
240 		return SLURM_ERROR;
241 
242 	(*(ops.conf_values))(data);
243 	return SLURM_SUCCESS;
244 }
245