1 /*
2 * Copyright 2004-2020 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <unistd.h> /* pid_t, sleep, ssize_t */
13
14 #include <crm/cib.h>
15 #include <crm/cluster.h>
16 #include <crm/common/xml.h>
17 #include <crm/crm.h>
18 #include <crm/msg_xml.h>
19 #include <crm/common/xml_internal.h>
20
21 #include <pacemaker-controld.h>
22
23 static mainloop_io_t *pe_subsystem = NULL;
24
25 /*!
26 * \internal
27 * \brief Close any scheduler connection and free associated memory
28 */
29 void
pe_subsystem_free(void)30 pe_subsystem_free(void)
31 {
32 controld_clear_fsa_input_flags(R_PE_REQUIRED);
33 if (pe_subsystem) {
34 controld_expect_sched_reply(NULL);
35 mainloop_del_ipc_client(pe_subsystem);
36 pe_subsystem = NULL;
37 controld_clear_fsa_input_flags(R_PE_CONNECTED);
38 }
39 }
40
41 /*!
42 * \internal
43 * \brief Save CIB query result to file, raising FSA error
44 *
45 * \param[in] msg Ignored
46 * \param[in] call_id Call ID of CIB query
47 * \param[in] rc Return code of CIB query
48 * \param[in] output Result of CIB query
49 * \param[in] user_data Unique identifier for filename (will be freed)
50 *
51 * \note This is intended to be called after a scheduler connection fails.
52 */
53 static void
save_cib_contents(xmlNode * msg,int call_id,int rc,xmlNode * output,void * user_data)54 save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
55 void *user_data)
56 {
57 char *id = user_data;
58
59 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
60 CRM_CHECK(id != NULL, return);
61
62 if (rc == pcmk_ok) {
63 char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);
64
65 if (write_xml_file(output, filename, TRUE) < 0) {
66 crm_err("Could not save Cluster Information Base to %s after scheduler crash",
67 filename);
68 } else {
69 crm_notice("Saved Cluster Information Base to %s after scheduler crash",
70 filename);
71 }
72 free(filename);
73 }
74 }
75
76 /*!
77 * \internal
78 * \brief Respond to scheduler connection failure
79 *
80 * \param[in] user_data Ignored
81 */
82 static void
pe_ipc_destroy(gpointer user_data)83 pe_ipc_destroy(gpointer user_data)
84 {
85 // If we aren't connected to the scheduler, we can't expect a reply
86 controld_expect_sched_reply(NULL);
87
88 if (pcmk_is_set(fsa_input_register, R_PE_REQUIRED)) {
89 int rc = pcmk_ok;
90 char *uuid_str = crm_generate_uuid();
91
92 crm_crit("Connection to the scheduler failed "
93 CRM_XS " uuid=%s", uuid_str);
94
95 /*
96 * The scheduler died...
97 *
98 * Save the current CIB so that we have a chance of
99 * figuring out what killed it.
100 *
101 * Delay raising the I_ERROR until the query below completes or
102 * 5s is up, whichever comes first.
103 *
104 */
105 rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
106 fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents);
107
108 } else {
109 crm_info("Connection to the scheduler released");
110 }
111
112 controld_clear_fsa_input_flags(R_PE_CONNECTED);
113 pe_subsystem = NULL;
114 mainloop_set_trigger(fsa_source);
115 return;
116 }
117
118 /*!
119 * \internal
120 * \brief Handle message from scheduler connection
121 *
122 * \param[in] buffer XML message (will be freed)
123 * \param[in] length Ignored
124 * \param[in] userdata Ignored
125 *
126 * \return 0
127 */
128 static int
pe_ipc_dispatch(const char * buffer,ssize_t length,gpointer userdata)129 pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata)
130 {
131 xmlNode *msg = string2xml(buffer);
132
133 if (msg) {
134 route_message(C_IPC_MESSAGE, msg);
135 }
136 free_xml(msg);
137 return 0;
138 }
139
140 /*!
141 * \internal
142 * \brief Make new connection to scheduler
143 *
144 * \return TRUE on success, FALSE otherwise
145 */
146 static bool
pe_subsystem_new(void)147 pe_subsystem_new(void)
148 {
149 struct ipc_client_callbacks pe_callbacks = {
150 .dispatch = pe_ipc_dispatch,
151 .destroy = pe_ipc_destroy
152 };
153 static bool retry_one = TRUE;
154
155 controld_set_fsa_input_flags(R_PE_REQUIRED);
156 retry:
157 pe_subsystem = mainloop_add_ipc_client(CRM_SYSTEM_PENGINE,
158 G_PRIORITY_DEFAULT,
159 5 * 1024 * 1024 /* 5MB */,
160 NULL, &pe_callbacks);
161 if (pe_subsystem == NULL) {
162 crm_debug("Could not connect to scheduler : %s(%d)", pcmk_rc_str(errno), errno);
163 if (errno == EAGAIN && retry_one) {
164 /* In rare cases, a SIGTERM may be received and the connection may fail when the cluster shuts down. */
165 /* At this time, the connection will be retried only once. */
166 crm_debug("Scheduler connection attempt.");
167 retry_one = FALSE;
168 goto retry;
169 }
170 return FALSE;
171 }
172 controld_set_fsa_input_flags(R_PE_CONNECTED);
173 return TRUE;
174 }
175
176 /*!
177 * \internal
178 * \brief Send an XML message to the scheduler
179 *
180 * \param[in] cmd XML message to send
181 *
182 * \return pcmk_ok on success, -errno otherwise
183 */
184 static int
pe_subsystem_send(xmlNode * cmd)185 pe_subsystem_send(xmlNode *cmd)
186 {
187 if (pe_subsystem) {
188 int sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem), cmd,
189 0, 0, NULL);
190
191 if (sent == 0) {
192 sent = -ENODATA;
193 } else if (sent > 0) {
194 sent = pcmk_ok;
195 }
196 return sent;
197 }
198 return -ENOTCONN;
199 }
200
201 static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
202 xmlNode *output, void *user_data);
203
204 /* A_PE_START, A_PE_STOP, O_PE_RESTART */
205 void
do_pe_control(long long action,enum crmd_fsa_cause cause,enum crmd_fsa_state cur_state,enum crmd_fsa_input current_input,fsa_data_t * msg_data)206 do_pe_control(long long action,
207 enum crmd_fsa_cause cause,
208 enum crmd_fsa_state cur_state,
209 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
210 {
211 if (action & A_PE_STOP) {
212 pe_subsystem_free();
213 }
214 if ((action & A_PE_START)
215 && !pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) {
216
217 if (cur_state == S_STOPPING) {
218 crm_info("Ignoring request to connect to scheduler while shutting down");
219
220 } else if (!pe_subsystem_new()) {
221 crm_warn("Could not connect to scheduler");
222 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
223 }
224 }
225 }
226
227 int fsa_pe_query = 0;
228 char *fsa_pe_ref = NULL;
229 static mainloop_timer_t *controld_sched_timer = NULL;
230
231 // @TODO Make this a configurable cluster option if there's demand for it
232 #define SCHED_TIMEOUT_MS (120000)
233
234 /*!
235 * \internal
236 * \brief Handle a timeout waiting for scheduler reply
237 *
238 * \param[in] user_data Ignored
239 *
240 * \return FALSE (indicating that timer should not be restarted)
241 */
242 static gboolean
controld_sched_timeout(gpointer user_data)243 controld_sched_timeout(gpointer user_data)
244 {
245 if (AM_I_DC) {
246 /* If this node is the DC but can't communicate with the scheduler, just
247 * exit (and likely get fenced) so this node doesn't interfere with any
248 * further DC elections.
249 *
250 * @TODO We could try something less drastic first, like disconnecting
251 * and reconnecting to the scheduler, but something is likely going
252 * seriously wrong, so perhaps it's better to just fail as quickly as
253 * possible.
254 */
255 crmd_exit(CRM_EX_FATAL);
256 }
257 return FALSE;
258 }
259
260 void
controld_stop_sched_timer(void)261 controld_stop_sched_timer(void)
262 {
263 if (controld_sched_timer && fsa_pe_ref) {
264 crm_trace("Stopping timer for scheduler reply %s", fsa_pe_ref);
265 }
266 mainloop_timer_stop(controld_sched_timer);
267 }
268
269 /*!
270 * \internal
271 * \brief Set the scheduler request currently being waited on
272 *
273 * \param[in] msg Request to expect reply to (or NULL for none)
274 */
275 void
controld_expect_sched_reply(xmlNode * msg)276 controld_expect_sched_reply(xmlNode *msg)
277 {
278 char *ref = NULL;
279
280 if (msg) {
281 ref = crm_element_value_copy(msg, XML_ATTR_REFERENCE);
282 CRM_ASSERT(ref != NULL);
283
284 if (controld_sched_timer == NULL) {
285 controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
286 SCHED_TIMEOUT_MS, FALSE,
287 controld_sched_timeout,
288 NULL);
289 }
290 mainloop_timer_start(controld_sched_timer);
291 } else {
292 controld_stop_sched_timer();
293 }
294 free(fsa_pe_ref);
295 fsa_pe_ref = ref;
296 }
297
298 /*!
299 * \internal
300 * \brief Free the scheduler reply timer
301 */
302 void
controld_free_sched_timer(void)303 controld_free_sched_timer(void)
304 {
305 if (controld_sched_timer != NULL) {
306 mainloop_timer_del(controld_sched_timer);
307 controld_sched_timer = NULL;
308 }
309 }
310
311 /* A_PE_INVOKE */
312 void
do_pe_invoke(long long action,enum crmd_fsa_cause cause,enum crmd_fsa_state cur_state,enum crmd_fsa_input current_input,fsa_data_t * msg_data)313 do_pe_invoke(long long action,
314 enum crmd_fsa_cause cause,
315 enum crmd_fsa_state cur_state,
316 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
317 {
318 if (AM_I_DC == FALSE) {
319 crm_err("Not invoking scheduler because not DC: %s",
320 fsa_action2string(action));
321 return;
322 }
323
324 if (!pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) {
325 if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
326 crm_err("Cannot shut down gracefully without the scheduler");
327 register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
328
329 } else {
330 crm_info("Waiting for the scheduler to connect");
331 crmd_fsa_stall(FALSE);
332 controld_set_fsa_action_flags(A_PE_START);
333 trigger_fsa();
334 }
335 return;
336 }
337
338 if (cur_state != S_POLICY_ENGINE) {
339 crm_notice("Not invoking scheduler because in state %s",
340 fsa_state2string(cur_state));
341 return;
342 }
343 if (!pcmk_is_set(fsa_input_register, R_HAVE_CIB)) {
344 crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
345
346 /* start the join from scratch */
347 register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
348 return;
349 }
350
351 fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
352
353 crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
354 fsa_state2string(fsa_state));
355
356 controld_expect_sched_reply(NULL);
357 fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback);
358 }
359
360 static void
force_local_option(xmlNode * xml,const char * attr_name,const char * attr_value)361 force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
362 {
363 int max = 0;
364 int lpc = 0;
365 char *xpath_string = NULL;
366 xmlXPathObjectPtr xpathObj = NULL;
367
368 xpath_string = crm_strdup_printf("%.128s//%s//nvpair[@name='%.128s']",
369 get_object_path(XML_CIB_TAG_CRMCONFIG),
370 XML_CIB_TAG_PROPSET, attr_name);
371 xpathObj = xpath_search(xml, xpath_string);
372 max = numXpathResults(xpathObj);
373 free(xpath_string);
374
375 for (lpc = 0; lpc < max; lpc++) {
376 xmlNode *match = getXpathResult(xpathObj, lpc);
377 crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value);
378 crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value);
379 }
380
381 if(max == 0) {
382 xmlNode *configuration = NULL;
383 xmlNode *crm_config = NULL;
384 xmlNode *cluster_property_set = NULL;
385
386 crm_trace("Creating %s-%s for %s=%s",
387 CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value);
388
389 configuration = pcmk__xe_match(xml, XML_CIB_TAG_CONFIGURATION, NULL,
390 NULL);
391 if (configuration == NULL) {
392 configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION);
393 }
394
395 crm_config = pcmk__xe_match(configuration, XML_CIB_TAG_CRMCONFIG, NULL,
396 NULL);
397 if (crm_config == NULL) {
398 crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG);
399 }
400
401 cluster_property_set = pcmk__xe_match(crm_config, XML_CIB_TAG_PROPSET,
402 NULL, NULL);
403 if (cluster_property_set == NULL) {
404 cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET);
405 crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST);
406 }
407
408 xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR);
409
410 crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name);
411 crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name);
412 crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value);
413 }
414 freeXpathObject(xpathObj);
415 }
416
417 static void
do_pe_invoke_callback(xmlNode * msg,int call_id,int rc,xmlNode * output,void * user_data)418 do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
419 {
420 xmlNode *cmd = NULL;
421 pid_t watchdog = pcmk__locate_sbd();
422
423 if (rc != pcmk_ok) {
424 crm_err("Could not retrieve the Cluster Information Base: %s "
425 CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
426 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
427 return;
428
429 } else if (call_id != fsa_pe_query) {
430 crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
431 return;
432
433 } else if (!AM_I_DC || !pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) {
434 crm_debug("No need to invoke the scheduler anymore");
435 return;
436
437 } else if (fsa_state != S_POLICY_ENGINE) {
438 crm_debug("Discarding scheduler request in state: %s",
439 fsa_state2string(fsa_state));
440 return;
441
442 /* this callback counts as 1 */
443 } else if (num_cib_op_callbacks() > 1) {
444 crm_debug("Re-asking for the CIB: %d other peer updates still pending",
445 (num_cib_op_callbacks() - 1));
446 sleep(1);
447 controld_set_fsa_action_flags(A_PE_INVOKE);
448 trigger_fsa();
449 return;
450 }
451
452 CRM_LOG_ASSERT(output != NULL);
453
454 /* Refresh the remote node cache and the known node cache when the
455 * scheduler is invoked */
456 pcmk__refresh_node_caches_from_cib(output);
457
458 crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
459 crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
460
461 force_local_option(output, XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog));
462
463 if (ever_had_quorum && crm_have_quorum == FALSE) {
464 crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
465 }
466
467 cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL);
468
469 rc = pe_subsystem_send(cmd);
470 if (rc < 0) {
471 crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d",
472 pcmk_strerror(rc), rc);
473 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
474 } else {
475 controld_expect_sched_reply(cmd);
476 crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, quorate=%d",
477 fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum);
478 }
479 free_xml(cmd);
480 }
481