1 /*
2 * Copyright 2013-2019 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <crm_internal.h>
22 #include <crm/crm.h>
23 #include <crm/msg_xml.h>
24
25 #include <crmd.h>
26 #include <crmd_fsa.h>
27 #include <crmd_messages.h>
28 #include <crmd_callbacks.h>
29 #include <crmd_lrm.h>
30 #include <crm/lrmd.h>
31 #include <crm/services.h>
32
33 #define REMOTE_LRMD_RA "remote"
34
35 /* The max start timeout before cmd retry */
36 #define MAX_START_TIMEOUT_MS 10000
37
38 typedef struct remote_ra_cmd_s {
39 /*! the local node the cmd is issued from */
40 char *owner;
41 /*! the remote node the cmd is executed on */
42 char *rsc_id;
43 /*! the action to execute */
44 char *action;
45 /*! some string the client wants us to give it back */
46 char *userdata;
47 char *exit_reason; // descriptive text on error
48 /*! start delay in ms */
49 int start_delay;
50 /*! timer id used for start delay. */
51 int delay_id;
52 /*! timeout in ms for cmd */
53 int timeout;
54 int remaining_timeout;
55 /*! recurring interval in ms */
56 int interval;
57 /*! interval timer id */
58 int interval_id;
59 int reported_success;
60 int monitor_timeout_id;
61 int takeover_timeout_id;
62 /*! action parameters */
63 lrmd_key_value_t *params;
64 /*! executed rc */
65 int rc;
66 int op_status;
67 int call_id;
68 time_t start_time;
69 gboolean cancel;
70 } remote_ra_cmd_t;
71
72 enum remote_migration_status {
73 expect_takeover = 1,
74 takeover_complete,
75 };
76
77 typedef struct remote_ra_data_s {
78 crm_trigger_t *work;
79 remote_ra_cmd_t *cur_cmd;
80 GList *cmds;
81 GList *recurring_cmds;
82
83 enum remote_migration_status migrate_status;
84
85 gboolean active;
86 gboolean is_maintenance; /* kind of complex to determine from crmd-context
87 * so we have it signalled back with the
88 * transition from pengine
89 */
90 gboolean controlling_guest; /* Similar for if we are controlling a guest
91 * or a bare-metal remote.
92 * Fortunately there is a meta-attribute in
93 * the transition already and as the
94 * situation doesn't change over time we can
95 * use the resource start for noting down
96 * the information for later use when the
97 * attributes aren't at hand.
98 */
99 } remote_ra_data_t;
100
101 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
102 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
103 static GList *fail_all_monitor_cmds(GList * list);
104
105 static void
free_cmd(gpointer user_data)106 free_cmd(gpointer user_data)
107 {
108 remote_ra_cmd_t *cmd = user_data;
109
110 if (!cmd) {
111 return;
112 }
113 if (cmd->delay_id) {
114 g_source_remove(cmd->delay_id);
115 }
116 if (cmd->interval_id) {
117 g_source_remove(cmd->interval_id);
118 }
119 if (cmd->monitor_timeout_id) {
120 g_source_remove(cmd->monitor_timeout_id);
121 }
122 if (cmd->takeover_timeout_id) {
123 g_source_remove(cmd->takeover_timeout_id);
124 }
125 free(cmd->owner);
126 free(cmd->rsc_id);
127 free(cmd->action);
128 free(cmd->userdata);
129 free(cmd->exit_reason);
130 lrmd_key_value_freeall(cmd->params);
131 free(cmd);
132 }
133
134 static int
generate_callid(void)135 generate_callid(void)
136 {
137 static int remote_ra_callid = 0;
138
139 remote_ra_callid++;
140 if (remote_ra_callid <= 0) {
141 remote_ra_callid = 1;
142 }
143
144 return remote_ra_callid;
145 }
146
147 static gboolean
recurring_helper(gpointer data)148 recurring_helper(gpointer data)
149 {
150 remote_ra_cmd_t *cmd = data;
151 lrm_state_t *connection_rsc = NULL;
152
153 cmd->interval_id = 0;
154 connection_rsc = lrm_state_find(cmd->rsc_id);
155 if (connection_rsc && connection_rsc->remote_ra_data) {
156 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
157
158 ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
159
160 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
161 mainloop_set_trigger(ra_data->work);
162 }
163 return FALSE;
164 }
165
166 static gboolean
start_delay_helper(gpointer data)167 start_delay_helper(gpointer data)
168 {
169 remote_ra_cmd_t *cmd = data;
170 lrm_state_t *connection_rsc = NULL;
171
172 cmd->delay_id = 0;
173 connection_rsc = lrm_state_find(cmd->rsc_id);
174 if (connection_rsc && connection_rsc->remote_ra_data) {
175 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
176
177 mainloop_set_trigger(ra_data->work);
178 }
179 return FALSE;
180 }
181
182 /*!
183 * \internal
184 * \brief Handle cluster communication related to pacemaker_remote node joining
185 *
186 * \param[in] node_name Name of newly integrated pacemaker_remote node
187 */
188 static void
remote_node_up(const char * node_name)189 remote_node_up(const char *node_name)
190 {
191 int call_opt, call_id = 0;
192 xmlNode *update, *state;
193 crm_node_t *node;
194
195 CRM_CHECK(node_name != NULL, return);
196 crm_info("Announcing pacemaker_remote node %s", node_name);
197
198 /* Clear node's entire state (resource history and transient attributes).
199 * The transient attributes should and normally will be cleared when the
200 * node leaves, but since remote node state has a number of corner cases,
201 * clear them here as well, to be sure.
202 */
203 call_opt = crmd_cib_smart_opt();
204 controld_delete_node_state(node_name, controld_section_all, call_opt);
205
206 /* Clear node's probed attribute */
207 update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
208
209 /* Ensure node is in the remote peer cache with member status */
210 node = crm_remote_peer_get(node_name);
211 CRM_CHECK(node != NULL, return);
212 crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
213
214 /* pacemaker_remote nodes don't participate in the membership layer,
215 * so cluster nodes don't automatically get notified when they come and go.
216 * We send a cluster message to the DC, and update the CIB node state entry,
217 * so the DC will get it sooner (via message) or later (via CIB refresh),
218 * and any other interested parties can query the CIB.
219 */
220 send_remote_state_message(node_name, TRUE);
221
222 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
223 state = create_node_state_update(node, node_update_cluster, update,
224 __FUNCTION__);
225
226 /* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
227 * needs to be fenced, this flag will allow various actions to determine
228 * whether the fencing has happened yet.
229 */
230 crm_xml_add(state, XML_NODE_IS_FENCED, "0");
231
232 /* TODO: If the remote connection drops, and this (async) CIB update either
233 * failed or has not yet completed, later actions could mistakenly think the
234 * node has already been fenced (if the XML_NODE_IS_FENCED attribute was
235 * previously set, because it won't have been cleared). This could prevent
236 * actual fencing or allow recurring monitor failures to be cleared too
237 * soon. Ideally, we wouldn't rely on the CIB for the fenced status.
238 */
239 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
240 if (call_id < 0) {
241 crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
242 }
243 free_xml(update);
244 }
245
246 enum down_opts {
247 DOWN_KEEP_LRM,
248 DOWN_ERASE_LRM
249 };
250
251 /*!
252 * \internal
253 * \brief Handle cluster communication related to pacemaker_remote node leaving
254 *
255 * \param[in] node_name Name of lost node
256 * \param[in] opts Whether to keep or erase LRM history
257 */
258 static void
remote_node_down(const char * node_name,const enum down_opts opts)259 remote_node_down(const char *node_name, const enum down_opts opts)
260 {
261 xmlNode *update;
262 int call_id = 0;
263 int call_opt = crmd_cib_smart_opt();
264 crm_node_t *node;
265
266 /* Purge node from attrd's memory */
267 update_attrd_remote_node_removed(node_name, NULL);
268
269 /* Normally, only node attributes should be erased, and the resource history
270 * should be kept until the node comes back up. However, after a successful
271 * fence, we want to clear the history as well, so we don't think resources
272 * are still running on the node.
273 */
274 if (opts == DOWN_ERASE_LRM) {
275 controld_delete_node_state(node_name, controld_section_all, call_opt);
276 } else {
277 controld_delete_node_state(node_name, controld_section_attrs, call_opt);
278 }
279
280 /* Ensure node is in the remote peer cache with lost state */
281 node = crm_remote_peer_get(node_name);
282 CRM_CHECK(node != NULL, return);
283 crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0);
284
285 /* Notify DC */
286 send_remote_state_message(node_name, FALSE);
287
288 /* Update CIB node state */
289 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
290 create_node_state_update(node, node_update_cluster, update, __FUNCTION__);
291 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
292 if (call_id < 0) {
293 crm_perror(LOG_ERR, "%s CIB node state update", node_name);
294 }
295 free_xml(update);
296 }
297
298 /*!
299 * \internal
300 * \brief Handle effects of a remote RA command on node state
301 *
302 * \param[in] cmd Completed remote RA command
303 */
304 static void
check_remote_node_state(remote_ra_cmd_t * cmd)305 check_remote_node_state(remote_ra_cmd_t *cmd)
306 {
307 /* Only successful actions can change node state */
308 if (cmd->rc != PCMK_OCF_OK) {
309 return;
310 }
311
312 if (safe_str_eq(cmd->action, "start")) {
313 remote_node_up(cmd->rsc_id);
314
315 } else if (safe_str_eq(cmd->action, "migrate_from")) {
316 /* After a successful migration, we don't need to do remote_node_up()
317 * because the DC already knows the node is up, and we don't want to
318 * clear LRM history etc. We do need to add the remote node to this
319 * host's remote peer cache, because (unless it happens to be DC)
320 * it hasn't been tracking the remote node, and other code relies on
321 * the cache to distinguish remote nodes from unseen cluster nodes.
322 */
323 crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
324
325 CRM_CHECK(node != NULL, return);
326 crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
327
328 } else if (safe_str_eq(cmd->action, "stop")) {
329 lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
330 remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
331
332 if (ra_data) {
333 if (ra_data->migrate_status != takeover_complete) {
334 /* Stop means down if we didn't successfully migrate elsewhere */
335 remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
336 } else if (AM_I_DC == FALSE) {
337 /* Only the connection host and DC track node state,
338 * so if the connection migrated elsewhere and we aren't DC,
339 * un-cache the node, so we don't have stale info
340 */
341 crm_remote_peer_cache_remove(cmd->rsc_id);
342 }
343 }
344 }
345
346 /* We don't do anything for successful monitors, which is correct for
347 * routine recurring monitors, and for monitors on nodes where the
348 * connection isn't supposed to be (the cluster will stop the connection in
349 * that case). However, if the initial probe finds the connection already
350 * active on the node where we want it, we probably should do
351 * remote_node_up(). Unfortunately, we can't distinguish that case here.
352 * Given that connections have to be initiated by the cluster, the chance of
353 * that should be close to zero.
354 */
355 }
356
357 static void
report_remote_ra_result(remote_ra_cmd_t * cmd)358 report_remote_ra_result(remote_ra_cmd_t * cmd)
359 {
360 lrmd_event_data_t op = { 0, };
361
362 check_remote_node_state(cmd);
363
364 op.type = lrmd_event_exec_complete;
365 op.rsc_id = cmd->rsc_id;
366 op.op_type = cmd->action;
367 op.user_data = cmd->userdata;
368 op.exit_reason = cmd->exit_reason;
369 op.timeout = cmd->timeout;
370 op.interval = cmd->interval;
371 op.rc = cmd->rc;
372 op.op_status = cmd->op_status;
373 op.t_run = cmd->start_time;
374 op.t_rcchange = cmd->start_time;
375 if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) {
376 op.t_rcchange = time(NULL);
377 /* This edge case will likely never ever occur, but if it does the
378 * result is that a failure will not be processed correctly. This is only
379 * remotely possible because we are able to detect a connection resource's tcp
380 * connection has failed at any moment after start has completed. The actual
381 * recurring operation is just a connectivity ping.
382 *
383 * basically, we are not guaranteed that the first successful monitor op and
384 * a subsequent failed monitor op will not occur in the same timestamp. We have to
385 * make it look like the operations occurred at separate times though. */
386 if (op.t_rcchange == op.t_run) {
387 op.t_rcchange++;
388 }
389 }
390
391 if (cmd->params) {
392 lrmd_key_value_t *tmp;
393
394 op.params = crm_str_table_new();
395 for (tmp = cmd->params; tmp; tmp = tmp->next) {
396 g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
397 }
398
399 }
400 op.call_id = cmd->call_id;
401 op.remote_nodename = cmd->owner;
402
403 lrm_op_callback(&op);
404
405 if (op.params) {
406 g_hash_table_destroy(op.params);
407 }
408 }
409
410 static void
update_remaining_timeout(remote_ra_cmd_t * cmd)411 update_remaining_timeout(remote_ra_cmd_t * cmd)
412 {
413 cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
414 }
415
416 static gboolean
retry_start_cmd_cb(gpointer data)417 retry_start_cmd_cb(gpointer data)
418 {
419 lrm_state_t *lrm_state = data;
420 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
421 remote_ra_cmd_t *cmd = NULL;
422 int rc = -1;
423
424 if (!ra_data || !ra_data->cur_cmd) {
425 return FALSE;
426 }
427 cmd = ra_data->cur_cmd;
428 if (safe_str_neq(cmd->action, "start") && safe_str_neq(cmd->action, "migrate_from")) {
429 return FALSE;
430 }
431 update_remaining_timeout(cmd);
432
433 if (cmd->remaining_timeout > 0) {
434 rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
435 }
436
437 if (rc != 0) {
438 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
439 cmd->op_status = PCMK_LRM_OP_ERROR;
440 report_remote_ra_result(cmd);
441
442 if (ra_data->cmds) {
443 mainloop_set_trigger(ra_data->work);
444 }
445 ra_data->cur_cmd = NULL;
446 free_cmd(cmd);
447 } else {
448 /* wait for connection event */
449 }
450
451 return FALSE;
452 }
453
454
455 static gboolean
connection_takeover_timeout_cb(gpointer data)456 connection_takeover_timeout_cb(gpointer data)
457 {
458 lrm_state_t *lrm_state = NULL;
459 remote_ra_cmd_t *cmd = data;
460
461 crm_info("takeover event timed out for node %s", cmd->rsc_id);
462 cmd->takeover_timeout_id = 0;
463
464 lrm_state = lrm_state_find(cmd->rsc_id);
465
466 handle_remote_ra_stop(lrm_state, cmd);
467 free_cmd(cmd);
468
469 return FALSE;
470 }
471
472 static gboolean
monitor_timeout_cb(gpointer data)473 monitor_timeout_cb(gpointer data)
474 {
475 lrm_state_t *lrm_state = NULL;
476 remote_ra_cmd_t *cmd = data;
477
478 lrm_state = lrm_state_find(cmd->rsc_id);
479
480 crm_info("Poke async response timed out for node %s (%p)", cmd->rsc_id, lrm_state);
481 cmd->monitor_timeout_id = 0;
482 cmd->op_status = PCMK_LRM_OP_TIMEOUT;
483 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
484
485 if (lrm_state && lrm_state->remote_ra_data) {
486 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
487
488 if (ra_data->cur_cmd == cmd) {
489 ra_data->cur_cmd = NULL;
490 }
491 if (ra_data->cmds) {
492 mainloop_set_trigger(ra_data->work);
493 }
494 }
495
496 report_remote_ra_result(cmd);
497 free_cmd(cmd);
498
499 if(lrm_state) {
500 lrm_state_disconnect(lrm_state);
501 }
502 return FALSE;
503 }
504
505 static void
synthesize_lrmd_success(lrm_state_t * lrm_state,const char * rsc_id,const char * op_type)506 synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
507 {
508 lrmd_event_data_t op = { 0, };
509
510 if (lrm_state == NULL) {
511 /* if lrm_state not given assume local */
512 lrm_state = lrm_state_find(fsa_our_uname);
513 }
514 CRM_ASSERT(lrm_state != NULL);
515
516 op.type = lrmd_event_exec_complete;
517 op.rsc_id = rsc_id;
518 op.op_type = op_type;
519 op.rc = PCMK_OCF_OK;
520 op.op_status = PCMK_LRM_OP_DONE;
521 op.t_run = time(NULL);
522 op.t_rcchange = op.t_run;
523 op.call_id = generate_callid();
524 process_lrm_event(lrm_state, &op, NULL, NULL);
525 }
526
527 void
remote_lrm_op_callback(lrmd_event_data_t * op)528 remote_lrm_op_callback(lrmd_event_data_t * op)
529 {
530 gboolean cmd_handled = FALSE;
531 lrm_state_t *lrm_state = NULL;
532 remote_ra_data_t *ra_data = NULL;
533 remote_ra_cmd_t *cmd = NULL;
534
535 crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
536 "(%d) status=%s (%d)",
537 (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
538 lrmd_event_type2str(op->type), op->remote_nodename,
539 services_ocf_exitcode_str(op->rc), op->rc,
540 services_lrm_status_str(op->op_status), op->op_status);
541
542 lrm_state = lrm_state_find(op->remote_nodename);
543 if (!lrm_state || !lrm_state->remote_ra_data) {
544 crm_debug("lrm_state info not found for remote lrmd connection event");
545 return;
546 }
547 ra_data = lrm_state->remote_ra_data;
548
549 /* Another client has connected to the remote daemon,
550 * determine if this is expected. */
551 if (op->type == lrmd_event_new_client) {
552 /* great, we new this was coming */
553 if (ra_data->migrate_status == expect_takeover) {
554 ra_data->migrate_status = takeover_complete;
555 } else {
556 crm_err("Unexpected pacemaker_remote client takeover for %s. Disconnecting", op->remote_nodename);
557 /* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
558 /* Do not free lrm_state->conn yet. */
559 /* It'll be freed in the following stop action. */
560 lrm_state_disconnect_only(lrm_state);
561 }
562 return;
563 }
564
565 /* filter all EXEC events up */
566 if (op->type == lrmd_event_exec_complete) {
567 if (ra_data->migrate_status == takeover_complete) {
568 crm_debug("ignoring event, this connection is taken over by another node");
569 } else {
570 lrm_op_callback(op);
571 }
572 return;
573 }
574
575 if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
576
577 if (ra_data->active == FALSE) {
578 crm_debug("Disconnection from Pacemaker Remote node %s complete",
579 lrm_state->node_name);
580
581 } else if (!remote_ra_is_in_maintenance(lrm_state)) {
582 crm_err("Lost connection to Pacemaker Remote node %s",
583 lrm_state->node_name);
584 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
585 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
586
587 } else {
588 crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
589 lrm_state->node_name);
590 /* Do roughly what a 'stop' on the remote-resource would do */
591 handle_remote_ra_stop(lrm_state, NULL);
592 remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
593 /* now fake the reply of a successful 'stop' */
594 synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
595 }
596 return;
597 }
598
599 if (!ra_data->cur_cmd) {
600 crm_debug("no event to match");
601 return;
602 }
603
604 cmd = ra_data->cur_cmd;
605
606 /* Start actions and migrate from actions complete after connection
607 * comes back to us. */
608 if (op->type == lrmd_event_connect && (safe_str_eq(cmd->action, "start") ||
609 safe_str_eq(cmd->action, "migrate_from"))) {
610
611 if (op->connection_rc < 0) {
612 update_remaining_timeout(cmd);
613
614 if (op->connection_rc == -ENOKEY) {
615 // Hard error, don't retry
616 cmd->op_status = PCMK_LRM_OP_ERROR;
617 cmd->rc = PCMK_OCF_INVALID_PARAM;
618 cmd->exit_reason = strdup("Authentication key not readable");
619
620 } else if (cmd->remaining_timeout > 3000) {
621 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
622 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
623 return;
624
625 } else {
626 crm_trace("can't reschedule start, remaining timeout too small %d",
627 cmd->remaining_timeout);
628 cmd->op_status = PCMK_LRM_OP_TIMEOUT;
629 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
630 }
631
632 } else {
633 lrm_state_reset_tables(lrm_state, TRUE);
634 cmd->rc = PCMK_OCF_OK;
635 cmd->op_status = PCMK_LRM_OP_DONE;
636 ra_data->active = TRUE;
637 }
638
639 crm_debug("remote lrmd connect event matched %s action. ", cmd->action);
640 report_remote_ra_result(cmd);
641 cmd_handled = TRUE;
642
643 } else if (op->type == lrmd_event_poke && safe_str_eq(cmd->action, "monitor")) {
644
645 if (cmd->monitor_timeout_id) {
646 g_source_remove(cmd->monitor_timeout_id);
647 cmd->monitor_timeout_id = 0;
648 }
649
650 /* Only report success the first time, after that only worry about failures.
651 * For this function, if we get the poke pack, it is always a success. Pokes
652 * only fail if the send fails, or the response times out. */
653 if (!cmd->reported_success) {
654 cmd->rc = PCMK_OCF_OK;
655 cmd->op_status = PCMK_LRM_OP_DONE;
656 report_remote_ra_result(cmd);
657 cmd->reported_success = 1;
658 }
659
660 crm_debug("remote lrmd poke event matched %s action. ", cmd->action);
661
662 /* success, keep rescheduling if interval is present. */
663 if (cmd->interval && (cmd->cancel == FALSE)) {
664 ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
665 cmd->interval_id = g_timeout_add(cmd->interval, recurring_helper, cmd);
666 cmd = NULL; /* prevent free */
667 }
668 cmd_handled = TRUE;
669
670 } else if (op->type == lrmd_event_disconnect && safe_str_eq(cmd->action, "monitor")) {
671 if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
672 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
673 cmd->op_status = PCMK_LRM_OP_ERROR;
674 report_remote_ra_result(cmd);
675 crm_err("remote-node %s unexpectedly disconneced during monitor operation", lrm_state->node_name);
676 }
677 cmd_handled = TRUE;
678
679 } else if (op->type == lrmd_event_new_client && safe_str_eq(cmd->action, "stop")) {
680
681 handle_remote_ra_stop(lrm_state, cmd);
682 cmd_handled = TRUE;
683
684 } else {
685 crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
686 }
687
688 if (cmd_handled) {
689 ra_data->cur_cmd = NULL;
690 if (ra_data->cmds) {
691 mainloop_set_trigger(ra_data->work);
692 }
693 free_cmd(cmd);
694 }
695 }
696
697 static void
handle_remote_ra_stop(lrm_state_t * lrm_state,remote_ra_cmd_t * cmd)698 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
699 {
700 remote_ra_data_t *ra_data = NULL;
701
702 CRM_ASSERT(lrm_state);
703 ra_data = lrm_state->remote_ra_data;
704
705 if (ra_data->migrate_status != takeover_complete) {
706 /* delete pending ops when ever the remote connection is intentionally stopped */
707 g_hash_table_remove_all(lrm_state->pending_ops);
708 } else {
709 /* we no longer hold the history if this connection has been migrated,
710 * however, we keep metadata cache for future use */
711 lrm_state_reset_tables(lrm_state, FALSE);
712 }
713
714 ra_data->active = FALSE;
715 lrm_state_disconnect(lrm_state);
716
717 if (ra_data->cmds) {
718 g_list_free_full(ra_data->cmds, free_cmd);
719 }
720 if (ra_data->recurring_cmds) {
721 g_list_free_full(ra_data->recurring_cmds, free_cmd);
722 }
723 ra_data->cmds = NULL;
724 ra_data->recurring_cmds = NULL;
725 ra_data->cur_cmd = NULL;
726
727 if (cmd) {
728 cmd->rc = PCMK_OCF_OK;
729 cmd->op_status = PCMK_LRM_OP_DONE;
730
731 report_remote_ra_result(cmd);
732 }
733 }
734
735 static int
handle_remote_ra_start(lrm_state_t * lrm_state,remote_ra_cmd_t * cmd,int timeout_ms)736 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
737 {
738 const char *server = NULL;
739 lrmd_key_value_t *tmp = NULL;
740 int port = 0;
741 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
742 int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
743
744 for (tmp = cmd->params; tmp; tmp = tmp->next) {
745 if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR) ||
746 safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_SERVER)) {
747 server = tmp->value;
748 } else if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT)) {
749 port = atoi(tmp->value);
750 } else if (safe_str_eq(tmp->key, CRM_META"_"XML_RSC_ATTR_CONTAINER)) {
751 ra_data->controlling_guest = TRUE;
752 }
753 }
754
755 return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used);
756 }
757
758 static gboolean
handle_remote_ra_exec(gpointer user_data)759 handle_remote_ra_exec(gpointer user_data)
760 {
761 int rc = 0;
762 lrm_state_t *lrm_state = user_data;
763 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
764 remote_ra_cmd_t *cmd;
765 GList *first = NULL;
766
767 if (ra_data->cur_cmd) {
768 /* still waiting on previous cmd */
769 return TRUE;
770 }
771
772 while (ra_data->cmds) {
773 first = ra_data->cmds;
774 cmd = first->data;
775 if (cmd->delay_id) {
776 /* still waiting for start delay timer to trip */
777 return TRUE;
778 }
779
780 ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
781 g_list_free_1(first);
782
783 if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
784 ra_data->migrate_status = 0;
785 rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
786 if (rc == 0) {
787 /* take care of this later when we get async connection result */
788 crm_debug("began remote lrmd connect, waiting for connect event.");
789 ra_data->cur_cmd = cmd;
790 return TRUE;
791 } else {
792 crm_debug("connect failed, not expecting to match any connection event later");
793 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
794 cmd->op_status = PCMK_LRM_OP_ERROR;
795 }
796 report_remote_ra_result(cmd);
797
798 } else if (!strcmp(cmd->action, "monitor")) {
799
800 if (lrm_state_is_connected(lrm_state) == TRUE) {
801 rc = lrm_state_poke_connection(lrm_state);
802 if (rc < 0) {
803 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
804 cmd->op_status = PCMK_LRM_OP_ERROR;
805 }
806 } else {
807 rc = -1;
808 cmd->op_status = PCMK_LRM_OP_DONE;
809 cmd->rc = PCMK_OCF_NOT_RUNNING;
810 }
811
812 if (rc == 0) {
813 crm_debug("poked remote lrmd at node %s, waiting for async response.", cmd->rsc_id);
814 ra_data->cur_cmd = cmd;
815 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
816 return TRUE;
817 }
818 report_remote_ra_result(cmd);
819
820 } else if (!strcmp(cmd->action, "stop")) {
821
822 if (ra_data->migrate_status == expect_takeover) {
823 /* briefly wait on stop for the takeover event to occur. If the
824 * takeover event does not occur during the wait period, that's fine.
825 * It just means that the remote-node's lrm_status section is going to get
826 * cleared which will require all the resources running in the remote-node
827 * to be explicitly re-detected via probe actions. If the takeover does occur
828 * successfully, then we can leave the status section intact. */
829 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
830 ra_data->cur_cmd = cmd;
831 return TRUE;
832 }
833
834 handle_remote_ra_stop(lrm_state, cmd);
835
836 } else if (!strcmp(cmd->action, "migrate_to")) {
837 ra_data->migrate_status = expect_takeover;
838 cmd->rc = PCMK_OCF_OK;
839 cmd->op_status = PCMK_LRM_OP_DONE;
840 report_remote_ra_result(cmd);
841 } else if (!strcmp(cmd->action, "reload")) {
842 /* reloads are a no-op right now, add logic here when they become important */
843 cmd->rc = PCMK_OCF_OK;
844 cmd->op_status = PCMK_LRM_OP_DONE;
845 report_remote_ra_result(cmd);
846 }
847
848 free_cmd(cmd);
849 }
850
851 return TRUE;
852 }
853
854 static void
remote_ra_data_init(lrm_state_t * lrm_state)855 remote_ra_data_init(lrm_state_t * lrm_state)
856 {
857 remote_ra_data_t *ra_data = NULL;
858
859 if (lrm_state->remote_ra_data) {
860 return;
861 }
862
863 ra_data = calloc(1, sizeof(remote_ra_data_t));
864 ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
865 lrm_state->remote_ra_data = ra_data;
866 }
867
868 void
remote_ra_cleanup(lrm_state_t * lrm_state)869 remote_ra_cleanup(lrm_state_t * lrm_state)
870 {
871 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
872
873 if (!ra_data) {
874 return;
875 }
876
877 if (ra_data->cmds) {
878 g_list_free_full(ra_data->cmds, free_cmd);
879 }
880
881 if (ra_data->recurring_cmds) {
882 g_list_free_full(ra_data->recurring_cmds, free_cmd);
883 }
884 mainloop_destroy_trigger(ra_data->work);
885 free(ra_data);
886 lrm_state->remote_ra_data = NULL;
887 }
888
889 gboolean
is_remote_lrmd_ra(const char * agent,const char * provider,const char * id)890 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
891 {
892 if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
893 return TRUE;
894 }
895 if (id && lrm_state_find(id) && safe_str_neq(id, fsa_our_uname)) {
896 return TRUE;
897 }
898
899 return FALSE;
900 }
901
902 lrmd_rsc_info_t *
remote_ra_get_rsc_info(lrm_state_t * lrm_state,const char * rsc_id)903 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
904 {
905 lrmd_rsc_info_t *info = NULL;
906
907 if ((lrm_state_find(rsc_id))) {
908 info = calloc(1, sizeof(lrmd_rsc_info_t));
909
910 info->id = strdup(rsc_id);
911 info->type = strdup(REMOTE_LRMD_RA);
912 info->class = strdup(PCMK_RESOURCE_CLASS_OCF);
913 info->provider = strdup("pacemaker");
914 }
915
916 return info;
917 }
918
919 static gboolean
is_remote_ra_supported_action(const char * action)920 is_remote_ra_supported_action(const char *action)
921 {
922 if (!action) {
923 return FALSE;
924 } else if (strcmp(action, "start") &&
925 strcmp(action, "stop") &&
926 strcmp(action, "reload") &&
927 strcmp(action, "migrate_to") &&
928 strcmp(action, "migrate_from") && strcmp(action, "monitor")) {
929 return FALSE;
930 }
931
932 return TRUE;
933 }
934
935 static GList *
fail_all_monitor_cmds(GList * list)936 fail_all_monitor_cmds(GList * list)
937 {
938 GList *rm_list = NULL;
939 remote_ra_cmd_t *cmd = NULL;
940 GListPtr gIter = NULL;
941
942 for (gIter = list; gIter != NULL; gIter = gIter->next) {
943 cmd = gIter->data;
944 if (cmd->interval > 0 && safe_str_eq(cmd->action, "monitor")) {
945 rm_list = g_list_append(rm_list, cmd);
946 }
947 }
948
949 for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
950 cmd = gIter->data;
951
952 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
953 cmd->op_status = PCMK_LRM_OP_ERROR;
954 crm_trace("Pre-emptively failing %s %s (interval=%d, %s)", cmd->action, cmd->rsc_id, cmd->interval, cmd->userdata);
955 report_remote_ra_result(cmd);
956
957 list = g_list_remove(list, cmd);
958 free_cmd(cmd);
959 }
960
961 /* frees only the list data, not the cmds */
962 g_list_free(rm_list);
963 return list;
964 }
965
966 static GList *
remove_cmd(GList * list,const char * action,int interval)967 remove_cmd(GList * list, const char *action, int interval)
968 {
969 remote_ra_cmd_t *cmd = NULL;
970 GListPtr gIter = NULL;
971
972 for (gIter = list; gIter != NULL; gIter = gIter->next) {
973 cmd = gIter->data;
974 if (cmd->interval == interval && safe_str_eq(cmd->action, action)) {
975 break;
976 }
977 cmd = NULL;
978 }
979 if (cmd) {
980 list = g_list_remove(list, cmd);
981 free_cmd(cmd);
982 }
983 return list;
984 }
985
986 int
remote_ra_cancel(lrm_state_t * lrm_state,const char * rsc_id,const char * action,int interval)987 remote_ra_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval)
988 {
989 lrm_state_t *connection_rsc = NULL;
990 remote_ra_data_t *ra_data = NULL;
991
992 connection_rsc = lrm_state_find(rsc_id);
993 if (!connection_rsc || !connection_rsc->remote_ra_data) {
994 return -EINVAL;
995 }
996
997 ra_data = connection_rsc->remote_ra_data;
998 ra_data->cmds = remove_cmd(ra_data->cmds, action, interval);
999 ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action, interval);
1000 if (ra_data->cur_cmd &&
1001 (ra_data->cur_cmd->interval == interval) &&
1002 (safe_str_eq(ra_data->cur_cmd->action, action))) {
1003
1004 ra_data->cur_cmd->cancel = TRUE;
1005 }
1006
1007 return 0;
1008 }
1009
1010 static remote_ra_cmd_t *
handle_dup_monitor(remote_ra_data_t * ra_data,int interval,const char * userdata)1011 handle_dup_monitor(remote_ra_data_t *ra_data, int interval, const char *userdata)
1012 {
1013 GList *gIter = NULL;
1014 remote_ra_cmd_t *cmd = NULL;
1015
1016 /* there are 3 places a potential duplicate monitor operation
1017 * could exist.
1018 * 1. recurring_cmds list. where the op is waiting for its next interval
1019 * 2. cmds list, where the op is queued to get executed immediately
1020 * 3. cur_cmd, which means the monitor op is in flight right now.
1021 */
1022 if (interval == 0) {
1023 return NULL;
1024 }
1025
1026 if (ra_data->cur_cmd &&
1027 ra_data->cur_cmd->cancel == FALSE &&
1028 ra_data->cur_cmd->interval == interval &&
1029 safe_str_eq(ra_data->cur_cmd->action, "monitor")) {
1030
1031 cmd = ra_data->cur_cmd;
1032 goto handle_dup;
1033 }
1034
1035 for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
1036 cmd = gIter->data;
1037 if (cmd->interval == interval && safe_str_eq(cmd->action, "monitor")) {
1038 goto handle_dup;
1039 }
1040 }
1041
1042 for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
1043 cmd = gIter->data;
1044 if (cmd->interval == interval && safe_str_eq(cmd->action, "monitor")) {
1045 goto handle_dup;
1046 }
1047 }
1048
1049 return NULL;
1050
1051 handle_dup:
1052
1053 crm_trace("merging duplicate monitor cmd %s_monitor_%d", cmd->rsc_id, interval);
1054
1055 /* update the userdata */
1056 if (userdata) {
1057 free(cmd->userdata);
1058 cmd->userdata = strdup(userdata);
1059 }
1060
1061 /* if we've already reported success, generate a new call id */
1062 if (cmd->reported_success) {
1063 cmd->start_time = time(NULL);
1064 cmd->call_id = generate_callid();
1065 cmd->reported_success = 0;
1066 }
1067
1068 /* if we have an interval_id set, that means we are in the process of
1069 * waiting for this cmd's next interval. instead of waiting, cancel
1070 * the timer and execute the action immediately */
1071 if (cmd->interval_id) {
1072 g_source_remove(cmd->interval_id);
1073 cmd->interval_id = 0;
1074 recurring_helper(cmd);
1075 }
1076
1077 return cmd;
1078 }
1079
1080 int
remote_ra_exec(lrm_state_t * lrm_state,const char * rsc_id,const char * action,const char * userdata,int interval,int timeout,int start_delay,lrmd_key_value_t * params)1081 remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */
1082 int timeout, /* ms */
1083 int start_delay, /* ms */
1084 lrmd_key_value_t * params)
1085 {
1086 int rc = 0;
1087 lrm_state_t *connection_rsc = NULL;
1088 remote_ra_cmd_t *cmd = NULL;
1089 remote_ra_data_t *ra_data = NULL;
1090
1091 if (is_remote_ra_supported_action(action) == FALSE) {
1092 rc = -EINVAL;
1093 goto exec_done;
1094 }
1095
1096 connection_rsc = lrm_state_find(rsc_id);
1097 if (!connection_rsc) {
1098 rc = -EINVAL;
1099 goto exec_done;
1100 }
1101
1102 remote_ra_data_init(connection_rsc);
1103 ra_data = connection_rsc->remote_ra_data;
1104
1105 cmd = handle_dup_monitor(ra_data, interval, userdata);
1106 if (cmd) {
1107 rc = cmd->call_id;
1108 goto exec_done;
1109 }
1110
1111 cmd = calloc(1, sizeof(remote_ra_cmd_t));
1112 cmd->owner = strdup(lrm_state->node_name);
1113 cmd->rsc_id = strdup(rsc_id);
1114 cmd->action = strdup(action);
1115 cmd->userdata = strdup(userdata);
1116 cmd->interval = interval;
1117 cmd->timeout = timeout;
1118 cmd->start_delay = start_delay;
1119 cmd->params = params;
1120 cmd->start_time = time(NULL);
1121
1122 cmd->call_id = generate_callid();
1123
1124 if (cmd->start_delay) {
1125 cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
1126 }
1127
1128 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
1129 mainloop_set_trigger(ra_data->work);
1130
1131 return cmd->call_id;
1132 exec_done:
1133
1134 lrmd_key_value_freeall(params);
1135 return rc;
1136 }
1137
1138 /*!
1139 * \internal
1140 * \brief Immediately fail all monitors of a remote node, if proxied here
1141 *
1142 * \param[in] node_name Name of pacemaker_remote node
1143 */
1144 void
remote_ra_fail(const char * node_name)1145 remote_ra_fail(const char *node_name)
1146 {
1147 lrm_state_t *lrm_state = lrm_state_find(node_name);
1148
1149 if (lrm_state && lrm_state_is_connected(lrm_state)) {
1150 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1151
1152 crm_info("Failing monitors on pacemaker_remote node %s", node_name);
1153 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
1154 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
1155 }
1156 }
1157
1158 /* A guest node fencing implied by host fencing looks like:
1159 *
1160 * <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off"
1161 * on_node="lxc1" on_node_uuid="lxc1">
1162 * <attributes CRM_meta_master_lxc_ms="10" CRM_meta_on_node="lxc1"
1163 * CRM_meta_on_node_uuid="lxc1" CRM_meta_stonith_action="off"
1164 * crm_feature_set="3.0.12"/>
1165 * <downed>
1166 * <node id="lxc1"/>
1167 * </downed>
1168 * </pseudo_event>
1169 */
1170 #define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1171 "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
1172 "/" XML_CIB_TAG_NODE
1173
1174 /*!
1175 * \internal
1176 * \brief Check a pseudo-action for Pacemaker Remote node side effects
1177 *
1178 * \param[in] xml XML of pseudo-action to check
1179 */
1180 void
remote_ra_process_pseudo(xmlNode * xml)1181 remote_ra_process_pseudo(xmlNode *xml)
1182 {
1183 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
1184
1185 if (numXpathResults(search) == 1) {
1186 xmlNode *result = getXpathResult(search, 0);
1187
1188 /* Normally, we handle the necessary side effects of a guest node stop
1189 * action when reporting the remote agent's result. However, if the stop
1190 * is implied due to fencing, it will be a fencing pseudo-event, and
1191 * there won't be a result to report. Handle that case here.
1192 *
1193 * This will result in a duplicate call to remote_node_down() if the
1194 * guest stop was real instead of implied, but that shouldn't hurt.
1195 *
1196 * There is still one corner case that isn't handled: if a guest node
1197 * isn't running any resources when its host is fenced, it will appear
1198 * to be cleanly stopped, so there will be no pseudo-fence, and our
1199 * peer cache state will be incorrect unless and until the guest is
1200 * recovered.
1201 */
1202 if (result) {
1203 const char *remote = ID(result);
1204
1205 if (remote) {
1206 remote_node_down(remote, DOWN_ERASE_LRM);
1207 }
1208 }
1209 }
1210 freeXpathObject(search);
1211 }
1212
1213 static void
remote_ra_maintenance(lrm_state_t * lrm_state,gboolean maintenance)1214 remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
1215 {
1216 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1217 xmlNode *update, *state;
1218 int call_opt, call_id = 0;
1219 crm_node_t *node;
1220
1221 call_opt = crmd_cib_smart_opt();
1222 node = crm_remote_peer_get(lrm_state->node_name);
1223 CRM_CHECK(node != NULL, return);
1224 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
1225 state = create_node_state_update(node, node_update_none, update,
1226 __FUNCTION__);
1227 crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
1228 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
1229 if (call_id < 0) {
1230 crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
1231 } else {
1232 /* TODO: still not 100% sure that async update will succeed ... */
1233 ra_data->is_maintenance = maintenance;
1234 }
1235 free_xml(update);
1236 }
1237
1238 #define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1239 "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
1240 XML_GRAPH_TAG_MAINTENANCE
1241
1242 /*!
1243 * \internal
1244 * \brief Check a pseudo-action holding updates for maintenance state
1245 *
1246 * \param[in] xml XML of pseudo-action to check
1247 */
1248
1249 void
remote_ra_process_maintenance_nodes(xmlNode * xml)1250 remote_ra_process_maintenance_nodes(xmlNode *xml)
1251 {
1252 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
1253
1254 if (numXpathResults(search) == 1) {
1255 xmlNode *node;
1256 int cnt = 0, cnt_remote = 0;
1257
1258 for (node =
1259 first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
1260 node; node = __xml_next(node)) {
1261 lrm_state_t *lrm_state = lrm_state_find(ID(node));
1262
1263 cnt++;
1264 if (lrm_state && lrm_state->remote_ra_data &&
1265 ((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
1266 cnt_remote++;
1267 remote_ra_maintenance(lrm_state,
1268 crm_atoi(crm_element_value(node,
1269 XML_NODE_IS_MAINTENANCE), "0"));
1270
1271 }
1272 }
1273 crm_trace("Action holds %d nodes (%d remotes found) "
1274 "adjusting maintenance-mode", cnt, cnt_remote);
1275 }
1276 freeXpathObject(search);
1277 }
1278
1279 gboolean
remote_ra_is_in_maintenance(lrm_state_t * lrm_state)1280 remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
1281 {
1282 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1283
1284 return ra_data->is_maintenance;
1285 }
1286
1287 gboolean
remote_ra_controlling_guest(lrm_state_t * lrm_state)1288 remote_ra_controlling_guest(lrm_state_t * lrm_state)
1289 {
1290 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1291
1292 return ra_data->controlling_guest;
1293 }
1294