1 /*
2  * Copyright 2013-2019 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  */
20 
21 #include <crm_internal.h>
22 #include <crm/crm.h>
23 #include <crm/msg_xml.h>
24 
25 #include <crmd.h>
26 #include <crmd_fsa.h>
27 #include <crmd_messages.h>
28 #include <crmd_callbacks.h>
29 #include <crmd_lrm.h>
30 #include <crm/lrmd.h>
31 #include <crm/services.h>
32 
33 #define REMOTE_LRMD_RA "remote"
34 
35 /* The max start timeout before cmd retry */
36 #define MAX_START_TIMEOUT_MS 10000
37 
38 typedef struct remote_ra_cmd_s {
39     /*! the local node the cmd is issued from */
40     char *owner;
41     /*! the remote node the cmd is executed on */
42     char *rsc_id;
43     /*! the action to execute */
44     char *action;
45     /*! some string the client wants us to give it back */
46     char *userdata;
47     char *exit_reason;          // descriptive text on error
48     /*! start delay in ms */
49     int start_delay;
50     /*! timer id used for start delay. */
51     int delay_id;
52     /*! timeout in ms for cmd */
53     int timeout;
54     int remaining_timeout;
55     /*! recurring interval in ms */
56     int interval;
57     /*! interval timer id */
58     int interval_id;
59     int reported_success;
60     int monitor_timeout_id;
61     int takeover_timeout_id;
62     /*! action parameters */
63     lrmd_key_value_t *params;
64     /*! executed rc */
65     int rc;
66     int op_status;
67     int call_id;
68     time_t start_time;
69     gboolean cancel;
70 } remote_ra_cmd_t;
71 
72 enum remote_migration_status {
73     expect_takeover = 1,
74     takeover_complete,
75 };
76 
77 typedef struct remote_ra_data_s {
78     crm_trigger_t *work;
79     remote_ra_cmd_t *cur_cmd;
80     GList *cmds;
81     GList *recurring_cmds;
82 
83     enum remote_migration_status migrate_status;
84 
85     gboolean active;
86     gboolean is_maintenance; /* kind of complex to determine from crmd-context
87                               * so we have it signalled back with the
88                               * transition from pengine
89                               */
90     gboolean controlling_guest; /* Similar for if we are controlling a guest
91                                  * or a bare-metal remote.
92                                  * Fortunately there is a meta-attribute in
93                                  * the transition already and as the
94                                  * situation doesn't change over time we can
95                                  * use the resource start for noting down
96                                  * the information for later use when the
97                                  * attributes aren't at hand.
98                                  */
99 } remote_ra_data_t;
100 
101 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
102 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
103 static GList *fail_all_monitor_cmds(GList * list);
104 
105 static void
free_cmd(gpointer user_data)106 free_cmd(gpointer user_data)
107 {
108     remote_ra_cmd_t *cmd = user_data;
109 
110     if (!cmd) {
111         return;
112     }
113     if (cmd->delay_id) {
114         g_source_remove(cmd->delay_id);
115     }
116     if (cmd->interval_id) {
117         g_source_remove(cmd->interval_id);
118     }
119     if (cmd->monitor_timeout_id) {
120         g_source_remove(cmd->monitor_timeout_id);
121     }
122     if (cmd->takeover_timeout_id) {
123         g_source_remove(cmd->takeover_timeout_id);
124     }
125     free(cmd->owner);
126     free(cmd->rsc_id);
127     free(cmd->action);
128     free(cmd->userdata);
129     free(cmd->exit_reason);
130     lrmd_key_value_freeall(cmd->params);
131     free(cmd);
132 }
133 
134 static int
generate_callid(void)135 generate_callid(void)
136 {
137     static int remote_ra_callid = 0;
138 
139     remote_ra_callid++;
140     if (remote_ra_callid <= 0) {
141         remote_ra_callid = 1;
142     }
143 
144     return remote_ra_callid;
145 }
146 
147 static gboolean
recurring_helper(gpointer data)148 recurring_helper(gpointer data)
149 {
150     remote_ra_cmd_t *cmd = data;
151     lrm_state_t *connection_rsc = NULL;
152 
153     cmd->interval_id = 0;
154     connection_rsc = lrm_state_find(cmd->rsc_id);
155     if (connection_rsc && connection_rsc->remote_ra_data) {
156         remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
157 
158         ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
159 
160         ra_data->cmds = g_list_append(ra_data->cmds, cmd);
161         mainloop_set_trigger(ra_data->work);
162     }
163     return FALSE;
164 }
165 
166 static gboolean
start_delay_helper(gpointer data)167 start_delay_helper(gpointer data)
168 {
169     remote_ra_cmd_t *cmd = data;
170     lrm_state_t *connection_rsc = NULL;
171 
172     cmd->delay_id = 0;
173     connection_rsc = lrm_state_find(cmd->rsc_id);
174     if (connection_rsc && connection_rsc->remote_ra_data) {
175         remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
176 
177         mainloop_set_trigger(ra_data->work);
178     }
179     return FALSE;
180 }
181 
182 /*!
183  * \internal
184  * \brief Handle cluster communication related to pacemaker_remote node joining
185  *
186  * \param[in] node_name  Name of newly integrated pacemaker_remote node
187  */
188 static void
remote_node_up(const char * node_name)189 remote_node_up(const char *node_name)
190 {
191     int call_opt, call_id = 0;
192     xmlNode *update, *state;
193     crm_node_t *node;
194 
195     CRM_CHECK(node_name != NULL, return);
196     crm_info("Announcing pacemaker_remote node %s", node_name);
197 
198     /* Clear node's entire state (resource history and transient attributes).
199      * The transient attributes should and normally will be cleared when the
200      * node leaves, but since remote node state has a number of corner cases,
201      * clear them here as well, to be sure.
202      */
203     call_opt = crmd_cib_smart_opt();
204     controld_delete_node_state(node_name, controld_section_all, call_opt);
205 
206     /* Clear node's probed attribute */
207     update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
208 
209     /* Ensure node is in the remote peer cache with member status */
210     node = crm_remote_peer_get(node_name);
211     CRM_CHECK(node != NULL, return);
212     crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
213 
214     /* pacemaker_remote nodes don't participate in the membership layer,
215      * so cluster nodes don't automatically get notified when they come and go.
216      * We send a cluster message to the DC, and update the CIB node state entry,
217      * so the DC will get it sooner (via message) or later (via CIB refresh),
218      * and any other interested parties can query the CIB.
219      */
220     send_remote_state_message(node_name, TRUE);
221 
222     update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
223     state = create_node_state_update(node, node_update_cluster, update,
224                                      __FUNCTION__);
225 
226     /* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
227      * needs to be fenced, this flag will allow various actions to determine
228      * whether the fencing has happened yet.
229      */
230     crm_xml_add(state, XML_NODE_IS_FENCED, "0");
231 
232     /* TODO: If the remote connection drops, and this (async) CIB update either
233      * failed or has not yet completed, later actions could mistakenly think the
234      * node has already been fenced (if the XML_NODE_IS_FENCED attribute was
235      * previously set, because it won't have been cleared). This could prevent
236      * actual fencing or allow recurring monitor failures to be cleared too
237      * soon. Ideally, we wouldn't rely on the CIB for the fenced status.
238      */
239     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
240     if (call_id < 0) {
241         crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
242     }
243     free_xml(update);
244 }
245 
246 enum down_opts {
247     DOWN_KEEP_LRM,
248     DOWN_ERASE_LRM
249 };
250 
251 /*!
252  * \internal
253  * \brief Handle cluster communication related to pacemaker_remote node leaving
254  *
255  * \param[in] node_name  Name of lost node
256  * \param[in] opts       Whether to keep or erase LRM history
257  */
258 static void
remote_node_down(const char * node_name,const enum down_opts opts)259 remote_node_down(const char *node_name, const enum down_opts opts)
260 {
261     xmlNode *update;
262     int call_id = 0;
263     int call_opt = crmd_cib_smart_opt();
264     crm_node_t *node;
265 
266     /* Purge node from attrd's memory */
267     update_attrd_remote_node_removed(node_name, NULL);
268 
269     /* Normally, only node attributes should be erased, and the resource history
270      * should be kept until the node comes back up. However, after a successful
271      * fence, we want to clear the history as well, so we don't think resources
272      * are still running on the node.
273      */
274     if (opts == DOWN_ERASE_LRM) {
275         controld_delete_node_state(node_name, controld_section_all, call_opt);
276     } else {
277         controld_delete_node_state(node_name, controld_section_attrs, call_opt);
278     }
279 
280     /* Ensure node is in the remote peer cache with lost state */
281     node = crm_remote_peer_get(node_name);
282     CRM_CHECK(node != NULL, return);
283     crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0);
284 
285     /* Notify DC */
286     send_remote_state_message(node_name, FALSE);
287 
288     /* Update CIB node state */
289     update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
290     create_node_state_update(node, node_update_cluster, update, __FUNCTION__);
291     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
292     if (call_id < 0) {
293         crm_perror(LOG_ERR, "%s CIB node state update", node_name);
294     }
295     free_xml(update);
296 }
297 
298 /*!
299  * \internal
300  * \brief Handle effects of a remote RA command on node state
301  *
302  * \param[in] cmd  Completed remote RA command
303  */
304 static void
check_remote_node_state(remote_ra_cmd_t * cmd)305 check_remote_node_state(remote_ra_cmd_t *cmd)
306 {
307     /* Only successful actions can change node state */
308     if (cmd->rc != PCMK_OCF_OK) {
309         return;
310     }
311 
312     if (safe_str_eq(cmd->action, "start")) {
313         remote_node_up(cmd->rsc_id);
314 
315     } else if (safe_str_eq(cmd->action, "migrate_from")) {
316         /* After a successful migration, we don't need to do remote_node_up()
317          * because the DC already knows the node is up, and we don't want to
318          * clear LRM history etc. We do need to add the remote node to this
319          * host's remote peer cache, because (unless it happens to be DC)
320          * it hasn't been tracking the remote node, and other code relies on
321          * the cache to distinguish remote nodes from unseen cluster nodes.
322          */
323         crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
324 
325         CRM_CHECK(node != NULL, return);
326         crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
327 
328     } else if (safe_str_eq(cmd->action, "stop")) {
329         lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
330         remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
331 
332         if (ra_data) {
333             if (ra_data->migrate_status != takeover_complete) {
334                 /* Stop means down if we didn't successfully migrate elsewhere */
335                 remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
336             } else if (AM_I_DC == FALSE) {
337                 /* Only the connection host and DC track node state,
338                  * so if the connection migrated elsewhere and we aren't DC,
339                  * un-cache the node, so we don't have stale info
340                  */
341                 crm_remote_peer_cache_remove(cmd->rsc_id);
342             }
343         }
344     }
345 
346     /* We don't do anything for successful monitors, which is correct for
347      * routine recurring monitors, and for monitors on nodes where the
348      * connection isn't supposed to be (the cluster will stop the connection in
349      * that case). However, if the initial probe finds the connection already
350      * active on the node where we want it, we probably should do
351      * remote_node_up(). Unfortunately, we can't distinguish that case here.
352      * Given that connections have to be initiated by the cluster, the chance of
353      * that should be close to zero.
354      */
355 }
356 
357 static void
report_remote_ra_result(remote_ra_cmd_t * cmd)358 report_remote_ra_result(remote_ra_cmd_t * cmd)
359 {
360     lrmd_event_data_t op = { 0, };
361 
362     check_remote_node_state(cmd);
363 
364     op.type = lrmd_event_exec_complete;
365     op.rsc_id = cmd->rsc_id;
366     op.op_type = cmd->action;
367     op.user_data = cmd->userdata;
368     op.exit_reason = cmd->exit_reason;
369     op.timeout = cmd->timeout;
370     op.interval = cmd->interval;
371     op.rc = cmd->rc;
372     op.op_status = cmd->op_status;
373     op.t_run = cmd->start_time;
374     op.t_rcchange = cmd->start_time;
375     if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) {
376         op.t_rcchange = time(NULL);
377         /* This edge case will likely never ever occur, but if it does the
378          * result is that a failure will not be processed correctly. This is only
379          * remotely possible because we are able to detect a connection resource's tcp
380          * connection has failed at any moment after start has completed. The actual
381          * recurring operation is just a connectivity ping.
382          *
383          * basically, we are not guaranteed that the first successful monitor op and
384          * a subsequent failed monitor op will not occur in the same timestamp. We have to
385          * make it look like the operations occurred at separate times though. */
386         if (op.t_rcchange == op.t_run) {
387             op.t_rcchange++;
388         }
389     }
390 
391     if (cmd->params) {
392         lrmd_key_value_t *tmp;
393 
394         op.params = crm_str_table_new();
395         for (tmp = cmd->params; tmp; tmp = tmp->next) {
396             g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
397         }
398 
399     }
400     op.call_id = cmd->call_id;
401     op.remote_nodename = cmd->owner;
402 
403     lrm_op_callback(&op);
404 
405     if (op.params) {
406         g_hash_table_destroy(op.params);
407     }
408 }
409 
410 static void
update_remaining_timeout(remote_ra_cmd_t * cmd)411 update_remaining_timeout(remote_ra_cmd_t * cmd)
412 {
413     cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
414 }
415 
416 static gboolean
retry_start_cmd_cb(gpointer data)417 retry_start_cmd_cb(gpointer data)
418 {
419     lrm_state_t *lrm_state = data;
420     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
421     remote_ra_cmd_t *cmd = NULL;
422     int rc = -1;
423 
424     if (!ra_data || !ra_data->cur_cmd) {
425         return FALSE;
426     }
427     cmd = ra_data->cur_cmd;
428     if (safe_str_neq(cmd->action, "start") && safe_str_neq(cmd->action, "migrate_from")) {
429         return FALSE;
430     }
431     update_remaining_timeout(cmd);
432 
433     if (cmd->remaining_timeout > 0) {
434         rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
435     }
436 
437     if (rc != 0) {
438         cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
439         cmd->op_status = PCMK_LRM_OP_ERROR;
440         report_remote_ra_result(cmd);
441 
442         if (ra_data->cmds) {
443             mainloop_set_trigger(ra_data->work);
444         }
445         ra_data->cur_cmd = NULL;
446         free_cmd(cmd);
447     } else {
448         /* wait for connection event */
449     }
450 
451     return FALSE;
452 }
453 
454 
455 static gboolean
connection_takeover_timeout_cb(gpointer data)456 connection_takeover_timeout_cb(gpointer data)
457 {
458     lrm_state_t *lrm_state = NULL;
459     remote_ra_cmd_t *cmd = data;
460 
461     crm_info("takeover event timed out for node %s", cmd->rsc_id);
462     cmd->takeover_timeout_id = 0;
463 
464     lrm_state = lrm_state_find(cmd->rsc_id);
465 
466     handle_remote_ra_stop(lrm_state, cmd);
467     free_cmd(cmd);
468 
469     return FALSE;
470 }
471 
472 static gboolean
monitor_timeout_cb(gpointer data)473 monitor_timeout_cb(gpointer data)
474 {
475     lrm_state_t *lrm_state = NULL;
476     remote_ra_cmd_t *cmd = data;
477 
478     lrm_state = lrm_state_find(cmd->rsc_id);
479 
480     crm_info("Poke async response timed out for node %s (%p)", cmd->rsc_id, lrm_state);
481     cmd->monitor_timeout_id = 0;
482     cmd->op_status = PCMK_LRM_OP_TIMEOUT;
483     cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
484 
485     if (lrm_state && lrm_state->remote_ra_data) {
486         remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
487 
488         if (ra_data->cur_cmd == cmd) {
489             ra_data->cur_cmd = NULL;
490         }
491         if (ra_data->cmds) {
492             mainloop_set_trigger(ra_data->work);
493         }
494     }
495 
496     report_remote_ra_result(cmd);
497     free_cmd(cmd);
498 
499     if(lrm_state) {
500         lrm_state_disconnect(lrm_state);
501     }
502     return FALSE;
503 }
504 
505 static void
synthesize_lrmd_success(lrm_state_t * lrm_state,const char * rsc_id,const char * op_type)506 synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
507 {
508     lrmd_event_data_t op = { 0, };
509 
510     if (lrm_state == NULL) {
511         /* if lrm_state not given assume local */
512         lrm_state = lrm_state_find(fsa_our_uname);
513     }
514     CRM_ASSERT(lrm_state != NULL);
515 
516     op.type = lrmd_event_exec_complete;
517     op.rsc_id = rsc_id;
518     op.op_type = op_type;
519     op.rc = PCMK_OCF_OK;
520     op.op_status = PCMK_LRM_OP_DONE;
521     op.t_run = time(NULL);
522     op.t_rcchange = op.t_run;
523     op.call_id = generate_callid();
524     process_lrm_event(lrm_state, &op, NULL, NULL);
525 }
526 
527 void
remote_lrm_op_callback(lrmd_event_data_t * op)528 remote_lrm_op_callback(lrmd_event_data_t * op)
529 {
530     gboolean cmd_handled = FALSE;
531     lrm_state_t *lrm_state = NULL;
532     remote_ra_data_t *ra_data = NULL;
533     remote_ra_cmd_t *cmd = NULL;
534 
535     crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
536               "(%d) status=%s (%d)",
537               (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
538               lrmd_event_type2str(op->type), op->remote_nodename,
539               services_ocf_exitcode_str(op->rc), op->rc,
540               services_lrm_status_str(op->op_status), op->op_status);
541 
542     lrm_state = lrm_state_find(op->remote_nodename);
543     if (!lrm_state || !lrm_state->remote_ra_data) {
544         crm_debug("lrm_state info not found for remote lrmd connection event");
545         return;
546     }
547     ra_data = lrm_state->remote_ra_data;
548 
549     /* Another client has connected to the remote daemon,
550      * determine if this is expected. */
551     if (op->type == lrmd_event_new_client) {
552         /* great, we new this was coming */
553         if (ra_data->migrate_status == expect_takeover) {
554             ra_data->migrate_status = takeover_complete;
555         } else {
556             crm_err("Unexpected pacemaker_remote client takeover for %s. Disconnecting", op->remote_nodename);
557             /* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
558             /* Do not free lrm_state->conn yet. */
559             /* It'll be freed in the following stop action. */
560             lrm_state_disconnect_only(lrm_state);
561         }
562         return;
563     }
564 
565     /* filter all EXEC events up */
566     if (op->type == lrmd_event_exec_complete) {
567         if (ra_data->migrate_status == takeover_complete) {
568             crm_debug("ignoring event, this connection is taken over by another node");
569         } else {
570             lrm_op_callback(op);
571         }
572         return;
573     }
574 
575     if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
576 
577         if (ra_data->active == FALSE) {
578             crm_debug("Disconnection from Pacemaker Remote node %s complete",
579                       lrm_state->node_name);
580 
581         } else if (!remote_ra_is_in_maintenance(lrm_state)) {
582             crm_err("Lost connection to Pacemaker Remote node %s",
583                     lrm_state->node_name);
584             ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
585             ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
586 
587         } else {
588             crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
589                        lrm_state->node_name);
590             /* Do roughly what a 'stop' on the remote-resource would do */
591             handle_remote_ra_stop(lrm_state, NULL);
592             remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
593             /* now fake the reply of a successful 'stop' */
594             synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
595         }
596         return;
597     }
598 
599     if (!ra_data->cur_cmd) {
600         crm_debug("no event to match");
601         return;
602     }
603 
604     cmd = ra_data->cur_cmd;
605 
606     /* Start actions and migrate from actions complete after connection
607      * comes back to us. */
608     if (op->type == lrmd_event_connect && (safe_str_eq(cmd->action, "start") ||
609                                            safe_str_eq(cmd->action, "migrate_from"))) {
610 
611         if (op->connection_rc < 0) {
612             update_remaining_timeout(cmd);
613 
614             if (op->connection_rc == -ENOKEY) {
615                 // Hard error, don't retry
616                 cmd->op_status = PCMK_LRM_OP_ERROR;
617                 cmd->rc = PCMK_OCF_INVALID_PARAM;
618                 cmd->exit_reason = strdup("Authentication key not readable");
619 
620             } else if (cmd->remaining_timeout > 3000) {
621                 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
622                 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
623                 return;
624 
625             } else {
626                 crm_trace("can't reschedule start, remaining timeout too small %d",
627                           cmd->remaining_timeout);
628                 cmd->op_status = PCMK_LRM_OP_TIMEOUT;
629                 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
630             }
631 
632         } else {
633             lrm_state_reset_tables(lrm_state, TRUE);
634             cmd->rc = PCMK_OCF_OK;
635             cmd->op_status = PCMK_LRM_OP_DONE;
636             ra_data->active = TRUE;
637         }
638 
639         crm_debug("remote lrmd connect event matched %s action. ", cmd->action);
640         report_remote_ra_result(cmd);
641         cmd_handled = TRUE;
642 
643     } else if (op->type == lrmd_event_poke && safe_str_eq(cmd->action, "monitor")) {
644 
645         if (cmd->monitor_timeout_id) {
646             g_source_remove(cmd->monitor_timeout_id);
647             cmd->monitor_timeout_id = 0;
648         }
649 
650         /* Only report success the first time, after that only worry about failures.
651          * For this function, if we get the poke pack, it is always a success. Pokes
652          * only fail if the send fails, or the response times out. */
653         if (!cmd->reported_success) {
654             cmd->rc = PCMK_OCF_OK;
655             cmd->op_status = PCMK_LRM_OP_DONE;
656             report_remote_ra_result(cmd);
657             cmd->reported_success = 1;
658         }
659 
660         crm_debug("remote lrmd poke event matched %s action. ", cmd->action);
661 
662         /* success, keep rescheduling if interval is present. */
663         if (cmd->interval && (cmd->cancel == FALSE)) {
664             ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
665             cmd->interval_id = g_timeout_add(cmd->interval, recurring_helper, cmd);
666             cmd = NULL;         /* prevent free */
667         }
668         cmd_handled = TRUE;
669 
670     } else if (op->type == lrmd_event_disconnect && safe_str_eq(cmd->action, "monitor")) {
671         if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
672             cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
673             cmd->op_status = PCMK_LRM_OP_ERROR;
674             report_remote_ra_result(cmd);
675             crm_err("remote-node %s unexpectedly disconneced during monitor operation", lrm_state->node_name);
676         }
677         cmd_handled = TRUE;
678 
679     } else if (op->type == lrmd_event_new_client && safe_str_eq(cmd->action, "stop")) {
680 
681         handle_remote_ra_stop(lrm_state, cmd);
682         cmd_handled = TRUE;
683 
684     } else {
685         crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
686     }
687 
688     if (cmd_handled) {
689         ra_data->cur_cmd = NULL;
690         if (ra_data->cmds) {
691             mainloop_set_trigger(ra_data->work);
692         }
693         free_cmd(cmd);
694     }
695 }
696 
697 static void
handle_remote_ra_stop(lrm_state_t * lrm_state,remote_ra_cmd_t * cmd)698 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
699 {
700     remote_ra_data_t *ra_data = NULL;
701 
702     CRM_ASSERT(lrm_state);
703     ra_data = lrm_state->remote_ra_data;
704 
705     if (ra_data->migrate_status != takeover_complete) {
706         /* delete pending ops when ever the remote connection is intentionally stopped */
707         g_hash_table_remove_all(lrm_state->pending_ops);
708     } else {
709         /* we no longer hold the history if this connection has been migrated,
710          * however, we keep metadata cache for future use */
711         lrm_state_reset_tables(lrm_state, FALSE);
712     }
713 
714     ra_data->active = FALSE;
715     lrm_state_disconnect(lrm_state);
716 
717     if (ra_data->cmds) {
718         g_list_free_full(ra_data->cmds, free_cmd);
719     }
720     if (ra_data->recurring_cmds) {
721         g_list_free_full(ra_data->recurring_cmds, free_cmd);
722     }
723     ra_data->cmds = NULL;
724     ra_data->recurring_cmds = NULL;
725     ra_data->cur_cmd = NULL;
726 
727     if (cmd) {
728         cmd->rc = PCMK_OCF_OK;
729         cmd->op_status = PCMK_LRM_OP_DONE;
730 
731         report_remote_ra_result(cmd);
732     }
733 }
734 
735 static int
handle_remote_ra_start(lrm_state_t * lrm_state,remote_ra_cmd_t * cmd,int timeout_ms)736 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
737 {
738     const char *server = NULL;
739     lrmd_key_value_t *tmp = NULL;
740     int port = 0;
741     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
742     int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
743 
744     for (tmp = cmd->params; tmp; tmp = tmp->next) {
745         if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR) ||
746             safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_SERVER)) {
747             server = tmp->value;
748         } else if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT)) {
749             port = atoi(tmp->value);
750         } else if (safe_str_eq(tmp->key, CRM_META"_"XML_RSC_ATTR_CONTAINER)) {
751             ra_data->controlling_guest = TRUE;
752         }
753     }
754 
755     return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used);
756 }
757 
758 static gboolean
handle_remote_ra_exec(gpointer user_data)759 handle_remote_ra_exec(gpointer user_data)
760 {
761     int rc = 0;
762     lrm_state_t *lrm_state = user_data;
763     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
764     remote_ra_cmd_t *cmd;
765     GList *first = NULL;
766 
767     if (ra_data->cur_cmd) {
768         /* still waiting on previous cmd */
769         return TRUE;
770     }
771 
772     while (ra_data->cmds) {
773         first = ra_data->cmds;
774         cmd = first->data;
775         if (cmd->delay_id) {
776             /* still waiting for start delay timer to trip */
777             return TRUE;
778         }
779 
780         ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
781         g_list_free_1(first);
782 
783         if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
784             ra_data->migrate_status = 0;
785             rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
786             if (rc == 0) {
787                 /* take care of this later when we get async connection result */
788                 crm_debug("began remote lrmd connect, waiting for connect event.");
789                 ra_data->cur_cmd = cmd;
790                 return TRUE;
791             } else {
792                 crm_debug("connect failed, not expecting to match any connection event later");
793                 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
794                 cmd->op_status = PCMK_LRM_OP_ERROR;
795             }
796             report_remote_ra_result(cmd);
797 
798         } else if (!strcmp(cmd->action, "monitor")) {
799 
800             if (lrm_state_is_connected(lrm_state) == TRUE) {
801                 rc = lrm_state_poke_connection(lrm_state);
802                 if (rc < 0) {
803                     cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
804                     cmd->op_status = PCMK_LRM_OP_ERROR;
805                 }
806             } else {
807                 rc = -1;
808                 cmd->op_status = PCMK_LRM_OP_DONE;
809                 cmd->rc = PCMK_OCF_NOT_RUNNING;
810             }
811 
812             if (rc == 0) {
813                 crm_debug("poked remote lrmd at node %s, waiting for async response.", cmd->rsc_id);
814                 ra_data->cur_cmd = cmd;
815                 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
816                 return TRUE;
817             }
818             report_remote_ra_result(cmd);
819 
820         } else if (!strcmp(cmd->action, "stop")) {
821 
822             if (ra_data->migrate_status == expect_takeover) {
823                 /* briefly wait on stop for the takeover event to occur. If the
824                  * takeover event does not occur during the wait period, that's fine.
825                  * It just means that the remote-node's lrm_status section is going to get
826                  * cleared which will require all the resources running in the remote-node
827                  * to be explicitly re-detected via probe actions.  If the takeover does occur
828                  * successfully, then we can leave the status section intact. */
829                 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
830                 ra_data->cur_cmd = cmd;
831                 return TRUE;
832             }
833 
834             handle_remote_ra_stop(lrm_state, cmd);
835 
836         } else if (!strcmp(cmd->action, "migrate_to")) {
837             ra_data->migrate_status = expect_takeover;
838             cmd->rc = PCMK_OCF_OK;
839             cmd->op_status = PCMK_LRM_OP_DONE;
840             report_remote_ra_result(cmd);
841         } else if (!strcmp(cmd->action, "reload")) {
842             /* reloads are a no-op right now, add logic here when they become important */
843             cmd->rc = PCMK_OCF_OK;
844             cmd->op_status = PCMK_LRM_OP_DONE;
845             report_remote_ra_result(cmd);
846         }
847 
848         free_cmd(cmd);
849     }
850 
851     return TRUE;
852 }
853 
854 static void
remote_ra_data_init(lrm_state_t * lrm_state)855 remote_ra_data_init(lrm_state_t * lrm_state)
856 {
857     remote_ra_data_t *ra_data = NULL;
858 
859     if (lrm_state->remote_ra_data) {
860         return;
861     }
862 
863     ra_data = calloc(1, sizeof(remote_ra_data_t));
864     ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
865     lrm_state->remote_ra_data = ra_data;
866 }
867 
868 void
remote_ra_cleanup(lrm_state_t * lrm_state)869 remote_ra_cleanup(lrm_state_t * lrm_state)
870 {
871     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
872 
873     if (!ra_data) {
874         return;
875     }
876 
877     if (ra_data->cmds) {
878         g_list_free_full(ra_data->cmds, free_cmd);
879     }
880 
881     if (ra_data->recurring_cmds) {
882         g_list_free_full(ra_data->recurring_cmds, free_cmd);
883     }
884     mainloop_destroy_trigger(ra_data->work);
885     free(ra_data);
886     lrm_state->remote_ra_data = NULL;
887 }
888 
889 gboolean
is_remote_lrmd_ra(const char * agent,const char * provider,const char * id)890 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
891 {
892     if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
893         return TRUE;
894     }
895     if (id && lrm_state_find(id) && safe_str_neq(id, fsa_our_uname)) {
896         return TRUE;
897     }
898 
899     return FALSE;
900 }
901 
902 lrmd_rsc_info_t *
remote_ra_get_rsc_info(lrm_state_t * lrm_state,const char * rsc_id)903 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
904 {
905     lrmd_rsc_info_t *info = NULL;
906 
907     if ((lrm_state_find(rsc_id))) {
908         info = calloc(1, sizeof(lrmd_rsc_info_t));
909 
910         info->id = strdup(rsc_id);
911         info->type = strdup(REMOTE_LRMD_RA);
912         info->class = strdup(PCMK_RESOURCE_CLASS_OCF);
913         info->provider = strdup("pacemaker");
914     }
915 
916     return info;
917 }
918 
919 static gboolean
is_remote_ra_supported_action(const char * action)920 is_remote_ra_supported_action(const char *action)
921 {
922     if (!action) {
923         return FALSE;
924     } else if (strcmp(action, "start") &&
925                strcmp(action, "stop") &&
926                strcmp(action, "reload") &&
927                strcmp(action, "migrate_to") &&
928                strcmp(action, "migrate_from") && strcmp(action, "monitor")) {
929         return FALSE;
930     }
931 
932     return TRUE;
933 }
934 
935 static GList *
fail_all_monitor_cmds(GList * list)936 fail_all_monitor_cmds(GList * list)
937 {
938     GList *rm_list = NULL;
939     remote_ra_cmd_t *cmd = NULL;
940     GListPtr gIter = NULL;
941 
942     for (gIter = list; gIter != NULL; gIter = gIter->next) {
943         cmd = gIter->data;
944         if (cmd->interval > 0 && safe_str_eq(cmd->action, "monitor")) {
945             rm_list = g_list_append(rm_list, cmd);
946         }
947     }
948 
949     for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
950         cmd = gIter->data;
951 
952         cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
953         cmd->op_status = PCMK_LRM_OP_ERROR;
954         crm_trace("Pre-emptively failing %s %s (interval=%d, %s)", cmd->action, cmd->rsc_id, cmd->interval, cmd->userdata);
955         report_remote_ra_result(cmd);
956 
957         list = g_list_remove(list, cmd);
958         free_cmd(cmd);
959     }
960 
961     /* frees only the list data, not the cmds */
962     g_list_free(rm_list);
963     return list;
964 }
965 
966 static GList *
remove_cmd(GList * list,const char * action,int interval)967 remove_cmd(GList * list, const char *action, int interval)
968 {
969     remote_ra_cmd_t *cmd = NULL;
970     GListPtr gIter = NULL;
971 
972     for (gIter = list; gIter != NULL; gIter = gIter->next) {
973         cmd = gIter->data;
974         if (cmd->interval == interval && safe_str_eq(cmd->action, action)) {
975             break;
976         }
977         cmd = NULL;
978     }
979     if (cmd) {
980         list = g_list_remove(list, cmd);
981         free_cmd(cmd);
982     }
983     return list;
984 }
985 
986 int
remote_ra_cancel(lrm_state_t * lrm_state,const char * rsc_id,const char * action,int interval)987 remote_ra_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval)
988 {
989     lrm_state_t *connection_rsc = NULL;
990     remote_ra_data_t *ra_data = NULL;
991 
992     connection_rsc = lrm_state_find(rsc_id);
993     if (!connection_rsc || !connection_rsc->remote_ra_data) {
994         return -EINVAL;
995     }
996 
997     ra_data = connection_rsc->remote_ra_data;
998     ra_data->cmds = remove_cmd(ra_data->cmds, action, interval);
999     ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action, interval);
1000     if (ra_data->cur_cmd &&
1001         (ra_data->cur_cmd->interval == interval) &&
1002         (safe_str_eq(ra_data->cur_cmd->action, action))) {
1003 
1004         ra_data->cur_cmd->cancel = TRUE;
1005     }
1006 
1007     return 0;
1008 }
1009 
1010 static remote_ra_cmd_t *
handle_dup_monitor(remote_ra_data_t * ra_data,int interval,const char * userdata)1011 handle_dup_monitor(remote_ra_data_t *ra_data, int interval, const char *userdata)
1012 {
1013     GList *gIter = NULL;
1014     remote_ra_cmd_t *cmd = NULL;
1015 
1016     /* there are 3 places a potential duplicate monitor operation
1017      * could exist.
1018      * 1. recurring_cmds list. where the op is waiting for its next interval
1019      * 2. cmds list, where the op is queued to get executed immediately
1020      * 3. cur_cmd, which means the monitor op is in flight right now.
1021      */
1022     if (interval == 0) {
1023         return NULL;
1024     }
1025 
1026     if (ra_data->cur_cmd &&
1027         ra_data->cur_cmd->cancel == FALSE &&
1028         ra_data->cur_cmd->interval == interval &&
1029         safe_str_eq(ra_data->cur_cmd->action, "monitor")) {
1030 
1031         cmd = ra_data->cur_cmd;
1032         goto handle_dup;
1033     }
1034 
1035     for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
1036         cmd = gIter->data;
1037         if (cmd->interval == interval && safe_str_eq(cmd->action, "monitor")) {
1038             goto handle_dup;
1039         }
1040     }
1041 
1042     for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
1043         cmd = gIter->data;
1044         if (cmd->interval == interval && safe_str_eq(cmd->action, "monitor")) {
1045             goto handle_dup;
1046         }
1047     }
1048 
1049     return NULL;
1050 
1051 handle_dup:
1052 
1053     crm_trace("merging duplicate monitor cmd %s_monitor_%d", cmd->rsc_id, interval);
1054 
1055     /* update the userdata */
1056     if (userdata) {
1057        free(cmd->userdata);
1058        cmd->userdata = strdup(userdata);
1059     }
1060 
1061     /* if we've already reported success, generate a new call id */
1062     if (cmd->reported_success) {
1063         cmd->start_time = time(NULL);
1064         cmd->call_id = generate_callid();
1065         cmd->reported_success = 0;
1066     }
1067 
1068     /* if we have an interval_id set, that means we are in the process of
1069      * waiting for this cmd's next interval. instead of waiting, cancel
1070      * the timer and execute the action immediately */
1071     if (cmd->interval_id) {
1072         g_source_remove(cmd->interval_id);
1073         cmd->interval_id = 0;
1074         recurring_helper(cmd);
1075     }
1076 
1077     return cmd;
1078 }
1079 
1080 int
remote_ra_exec(lrm_state_t * lrm_state,const char * rsc_id,const char * action,const char * userdata,int interval,int timeout,int start_delay,lrmd_key_value_t * params)1081 remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval,     /* ms */
1082                int timeout,     /* ms */
1083                int start_delay, /* ms */
1084                lrmd_key_value_t * params)
1085 {
1086     int rc = 0;
1087     lrm_state_t *connection_rsc = NULL;
1088     remote_ra_cmd_t *cmd = NULL;
1089     remote_ra_data_t *ra_data = NULL;
1090 
1091     if (is_remote_ra_supported_action(action) == FALSE) {
1092         rc = -EINVAL;
1093         goto exec_done;
1094     }
1095 
1096     connection_rsc = lrm_state_find(rsc_id);
1097     if (!connection_rsc) {
1098         rc = -EINVAL;
1099         goto exec_done;
1100     }
1101 
1102     remote_ra_data_init(connection_rsc);
1103     ra_data = connection_rsc->remote_ra_data;
1104 
1105     cmd = handle_dup_monitor(ra_data, interval, userdata);
1106     if (cmd) {
1107         rc = cmd->call_id;
1108         goto exec_done;
1109     }
1110 
1111     cmd = calloc(1, sizeof(remote_ra_cmd_t));
1112     cmd->owner = strdup(lrm_state->node_name);
1113     cmd->rsc_id = strdup(rsc_id);
1114     cmd->action = strdup(action);
1115     cmd->userdata = strdup(userdata);
1116     cmd->interval = interval;
1117     cmd->timeout = timeout;
1118     cmd->start_delay = start_delay;
1119     cmd->params = params;
1120     cmd->start_time = time(NULL);
1121 
1122     cmd->call_id = generate_callid();
1123 
1124     if (cmd->start_delay) {
1125         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
1126     }
1127 
1128     ra_data->cmds = g_list_append(ra_data->cmds, cmd);
1129     mainloop_set_trigger(ra_data->work);
1130 
1131     return cmd->call_id;
1132   exec_done:
1133 
1134     lrmd_key_value_freeall(params);
1135     return rc;
1136 }
1137 
1138 /*!
1139  * \internal
1140  * \brief Immediately fail all monitors of a remote node, if proxied here
1141  *
1142  * \param[in] node_name  Name of pacemaker_remote node
1143  */
1144 void
remote_ra_fail(const char * node_name)1145 remote_ra_fail(const char *node_name)
1146 {
1147     lrm_state_t *lrm_state = lrm_state_find(node_name);
1148 
1149     if (lrm_state && lrm_state_is_connected(lrm_state)) {
1150         remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1151 
1152         crm_info("Failing monitors on pacemaker_remote node %s", node_name);
1153         ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
1154         ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
1155     }
1156 }
1157 
1158 /* A guest node fencing implied by host fencing looks like:
1159  *
1160  *  <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off"
1161  *                on_node="lxc1" on_node_uuid="lxc1">
1162  *     <attributes CRM_meta_master_lxc_ms="10" CRM_meta_on_node="lxc1"
1163  *                 CRM_meta_on_node_uuid="lxc1" CRM_meta_stonith_action="off"
1164  *                 crm_feature_set="3.0.12"/>
1165  *     <downed>
1166  *       <node id="lxc1"/>
1167  *     </downed>
1168  *  </pseudo_event>
1169  */
1170 #define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1171     "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
1172     "/" XML_CIB_TAG_NODE
1173 
1174 /*!
1175  * \internal
1176  * \brief Check a pseudo-action for Pacemaker Remote node side effects
1177  *
1178  * \param[in] xml  XML of pseudo-action to check
1179  */
1180 void
remote_ra_process_pseudo(xmlNode * xml)1181 remote_ra_process_pseudo(xmlNode *xml)
1182 {
1183     xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
1184 
1185     if (numXpathResults(search) == 1) {
1186         xmlNode *result = getXpathResult(search, 0);
1187 
1188         /* Normally, we handle the necessary side effects of a guest node stop
1189          * action when reporting the remote agent's result. However, if the stop
1190          * is implied due to fencing, it will be a fencing pseudo-event, and
1191          * there won't be a result to report. Handle that case here.
1192          *
1193          * This will result in a duplicate call to remote_node_down() if the
1194          * guest stop was real instead of implied, but that shouldn't hurt.
1195          *
1196          * There is still one corner case that isn't handled: if a guest node
1197          * isn't running any resources when its host is fenced, it will appear
1198          * to be cleanly stopped, so there will be no pseudo-fence, and our
1199          * peer cache state will be incorrect unless and until the guest is
1200          * recovered.
1201          */
1202         if (result) {
1203             const char *remote = ID(result);
1204 
1205             if (remote) {
1206                 remote_node_down(remote, DOWN_ERASE_LRM);
1207             }
1208         }
1209     }
1210     freeXpathObject(search);
1211 }
1212 
1213 static void
remote_ra_maintenance(lrm_state_t * lrm_state,gboolean maintenance)1214 remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
1215 {
1216     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1217     xmlNode *update, *state;
1218     int call_opt, call_id = 0;
1219     crm_node_t *node;
1220 
1221     call_opt = crmd_cib_smart_opt();
1222     node = crm_remote_peer_get(lrm_state->node_name);
1223     CRM_CHECK(node != NULL, return);
1224     update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
1225     state = create_node_state_update(node, node_update_none, update,
1226                                      __FUNCTION__);
1227     crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
1228     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
1229     if (call_id < 0) {
1230         crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
1231     } else {
1232         /* TODO: still not 100% sure that async update will succeed ... */
1233         ra_data->is_maintenance = maintenance;
1234     }
1235     free_xml(update);
1236 }
1237 
1238 #define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1239     "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
1240     XML_GRAPH_TAG_MAINTENANCE
1241 
1242 /*!
1243  * \internal
1244  * \brief Check a pseudo-action holding updates for maintenance state
1245  *
1246  * \param[in] xml  XML of pseudo-action to check
1247  */
1248 
1249 void
remote_ra_process_maintenance_nodes(xmlNode * xml)1250 remote_ra_process_maintenance_nodes(xmlNode *xml)
1251 {
1252     xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
1253 
1254     if (numXpathResults(search) == 1) {
1255         xmlNode *node;
1256         int cnt = 0, cnt_remote = 0;
1257 
1258         for (node =
1259                 first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
1260             node; node = __xml_next(node)) {
1261             lrm_state_t *lrm_state = lrm_state_find(ID(node));
1262 
1263             cnt++;
1264             if (lrm_state && lrm_state->remote_ra_data &&
1265                 ((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
1266                 cnt_remote++;
1267                 remote_ra_maintenance(lrm_state,
1268                                         crm_atoi(crm_element_value(node,
1269                                             XML_NODE_IS_MAINTENANCE), "0"));
1270 
1271             }
1272         }
1273         crm_trace("Action holds %d nodes (%d remotes found) "
1274                     "adjusting maintenance-mode", cnt, cnt_remote);
1275     }
1276     freeXpathObject(search);
1277 }
1278 
1279 gboolean
remote_ra_is_in_maintenance(lrm_state_t * lrm_state)1280 remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
1281 {
1282     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1283 
1284     return ra_data->is_maintenance;
1285 }
1286 
1287 gboolean
remote_ra_controlling_guest(lrm_state_t * lrm_state)1288 remote_ra_controlling_guest(lrm_state_t * lrm_state)
1289 {
1290     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1291 
1292     return ra_data->controlling_guest;
1293 }
1294