1 /*
2  * Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This software is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 #include <crm_internal.h>
20 
21 #include <sys/param.h>
22 #include <stdio.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <sys/stat.h>
26 #include <unistd.h>
27 #include <sys/utsname.h>
28 
29 #include <stdlib.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <ctype.h>
33 #include <regex.h>
34 
35 #include <crm/crm.h>
36 #include <crm/msg_xml.h>
37 #include <crm/common/ipc.h>
38 #include <crm/common/ipcs.h>
39 #include <crm/cluster/internal.h>
40 
41 #include <crm/stonith-ng.h>
42 #include <crm/fencing/internal.h>
43 #include <crm/common/xml.h>
44 
45 #include <crm/common/util.h>
46 #include <internal.h>
47 
48 #define TIMEOUT_MULTIPLY_FACTOR 1.2
49 
50 /* When one stonithd queries its peers for devices able to handle a fencing
51  * request, each peer will reply with a list of such devices available to it.
52  * Each reply will be parsed into a st_query_result_t, with each device's
53  * information kept in a device_properties_t.
54  */
55 
56 typedef struct device_properties_s {
57     /* Whether access to this device has been verified */
58     gboolean verified;
59 
60     /* The remaining members are indexed by the operation's "phase" */
61 
62     /* Whether this device has been executed in each phase */
63     gboolean executed[st_phase_max];
64     /* Whether this device is disallowed from executing in each phase */
65     gboolean disallowed[st_phase_max];
66     /* Action-specific timeout for each phase */
67     int custom_action_timeout[st_phase_max];
68     /* Action-specific maximum random delay for each phase */
69     int delay_max[st_phase_max];
70     /* Action-specific base delay for each phase */
71     int delay_base[st_phase_max];
72 } device_properties_t;
73 
74 typedef struct st_query_result_s {
75     /* Name of peer that sent this result */
76     char *host;
77     /* Only try peers for non-topology based operations once */
78     gboolean tried;
79     /* Number of entries in the devices table */
80     int ndevices;
81     /* Devices available to this host that are capable of fencing the target */
82     GHashTable *devices;
83 } st_query_result_t;
84 
85 GHashTable *stonith_remote_op_list = NULL;
86 void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer);
87 static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
88 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
89                                   int call_options);
90 
91 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
92 static int get_op_total_timeout(const remote_fencing_op_t *op,
93                                 const st_query_result_t *chosen_peer);
94 
95 static gint
sort_strings(gconstpointer a,gconstpointer b)96 sort_strings(gconstpointer a, gconstpointer b)
97 {
98     return strcmp(a, b);
99 }
100 
101 static void
free_remote_query(gpointer data)102 free_remote_query(gpointer data)
103 {
104     if (data) {
105         st_query_result_t *query = data;
106 
107         crm_trace("Free'ing query result from %s", query->host);
108         g_hash_table_destroy(query->devices);
109         free(query->host);
110         free(query);
111     }
112 }
113 
114 void
free_stonith_remote_op_list()115 free_stonith_remote_op_list()
116 {
117     if (stonith_remote_op_list != NULL) {
118         g_hash_table_destroy(stonith_remote_op_list);
119         stonith_remote_op_list = NULL;
120     }
121 }
122 
123 static void
clear_remote_op_timers(remote_fencing_op_t * op)124 clear_remote_op_timers(remote_fencing_op_t * op)
125 {
126     if (op->query_timer) {
127         g_source_remove(op->query_timer);
128         op->query_timer = 0;
129     }
130     if (op->op_timer_total) {
131         g_source_remove(op->op_timer_total);
132         op->op_timer_total = 0;
133     }
134     if (op->op_timer_one) {
135         g_source_remove(op->op_timer_one);
136         op->op_timer_one = 0;
137     }
138 }
139 
140 static void
free_remote_op(gpointer data)141 free_remote_op(gpointer data)
142 {
143     remote_fencing_op_t *op = data;
144 
145     crm_trace("Free'ing op %s for %s", op->id, op->target);
146     crm_log_xml_debug(op->request, "Destroying");
147 
148     clear_remote_op_timers(op);
149 
150     free(op->id);
151     free(op->action);
152     free(op->delegate);
153     free(op->target);
154     free(op->client_id);
155     free(op->client_name);
156     free(op->originator);
157 
158     if (op->query_results) {
159         g_list_free_full(op->query_results, free_remote_query);
160     }
161     if (op->request) {
162         free_xml(op->request);
163         op->request = NULL;
164     }
165     if (op->devices_list) {
166         g_list_free_full(op->devices_list, free);
167         op->devices_list = NULL;
168     }
169     g_list_free_full(op->automatic_list, free);
170     g_list_free(op->duplicates);
171     free(op);
172 }
173 
174 void
init_stonith_remote_op_hash_table(GHashTable ** table)175 init_stonith_remote_op_hash_table(GHashTable **table)
176 {
177     if (*table == NULL) {
178         *table = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op);
179     }
180 }
181 
182 struct peer_count_data {
183     const remote_fencing_op_t *op;
184     gboolean verified_only;
185     int count;
186 };
187 
188 /*!
189  * \internal
190  * \brief Increment a counter if a device has not been executed yet
191  *
192  * \param[in] key        Device ID (ignored)
193  * \param[in] value      Device properties
194  * \param[in] user_data  Peer count data
195  */
196 static void
count_peer_device(gpointer key,gpointer value,gpointer user_data)197 count_peer_device(gpointer key, gpointer value, gpointer user_data)
198 {
199     device_properties_t *props = (device_properties_t*)value;
200     struct peer_count_data *data = user_data;
201 
202     if (!props->executed[data->op->phase]
203         && (!data->verified_only || props->verified)) {
204         ++(data->count);
205     }
206 }
207 
208 /*!
209  * \internal
210  * \brief Check the number of available devices in a peer's query results
211  *
212  * \param[in] op             Operation that results are for
213  * \param[in] peer           Peer to count
214  * \param[in] verified_only  Whether to count only verified devices
215  *
216  * \return Number of devices available to peer that were not already executed
217  */
218 static int
count_peer_devices(const remote_fencing_op_t * op,const st_query_result_t * peer,gboolean verified_only)219 count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer,
220                    gboolean verified_only)
221 {
222     struct peer_count_data data;
223 
224     data.op = op;
225     data.verified_only = verified_only;
226     data.count = 0;
227     if (peer) {
228         g_hash_table_foreach(peer->devices, count_peer_device, &data);
229     }
230     return data.count;
231 }
232 
233 /*!
234  * \internal
235  * \brief Search for a device in a query result
236  *
237  * \param[in] op      Operation that result is for
238  * \param[in] peer    Query result for a peer
239  * \param[in] device  Device ID to search for
240  *
241  * \return Device properties if found, NULL otherwise
242  */
243 static device_properties_t *
find_peer_device(const remote_fencing_op_t * op,const st_query_result_t * peer,const char * device)244 find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer,
245                  const char *device)
246 {
247     device_properties_t *props = g_hash_table_lookup(peer->devices, device);
248 
249     return (props && !props->executed[op->phase]
250            && !props->disallowed[op->phase])? props : NULL;
251 }
252 
253 /*!
254  * \internal
255  * \brief Find a device in a peer's device list and mark it as executed
256  *
257  * \param[in]     op                     Operation that peer result is for
258  * \param[in,out] peer                   Peer with results to search
259  * \param[in]     device                 ID of device to mark as done
260  * \param[in]     verified_devices_only  Only consider verified devices
261  *
262  * \return TRUE if device was found and marked, FALSE otherwise
263  */
264 static gboolean
grab_peer_device(const remote_fencing_op_t * op,st_query_result_t * peer,const char * device,gboolean verified_devices_only)265 grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer,
266                  const char *device, gboolean verified_devices_only)
267 {
268     device_properties_t *props = find_peer_device(op, peer, device);
269 
270     if ((props == NULL) || (verified_devices_only && !props->verified)) {
271         return FALSE;
272     }
273 
274     crm_trace("Removing %s from %s (%d remaining)",
275               device, peer->host, count_peer_devices(op, peer, FALSE));
276     props->executed[op->phase] = TRUE;
277     return TRUE;
278 }
279 
280 /*!
281  * \internal
282  * \brief Return an operation's originally requested action (before any remap)
283  *
284  * \param[in] op  Operation to check
285  *
286  * \return Operation's original action
287  */
288 static const char *
op_requested_action(const remote_fencing_op_t * op)289 op_requested_action(const remote_fencing_op_t *op)
290 {
291     return ((op->phase > st_phase_requested)? "reboot" : op->action);
292 }
293 
294 /*!
295  * \internal
296  * \brief Remap a "reboot" operation to the "off" phase
297  *
298  * \param[in,out] op      Operation to remap
299  */
300 static void
op_phase_off(remote_fencing_op_t * op)301 op_phase_off(remote_fencing_op_t *op)
302 {
303     crm_info("Remapping multiple-device reboot targeting %s (%s) to 'off'",
304              op->target, op->id);
305     op->phase = st_phase_off;
306 
307     /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
308      * memory allocation at each phase.
309      */
310     strcpy(op->action, "off");
311 }
312 
313 /*!
314  * \internal
315  * \brief Advance a remapped reboot operation to the "on" phase
316  *
317  * \param[in,out] op  Operation to remap
318  */
319 static void
op_phase_on(remote_fencing_op_t * op)320 op_phase_on(remote_fencing_op_t *op)
321 {
322     GListPtr iter = NULL;
323 
324     crm_info("Remapped 'off' targeting %s complete, "
325              "remapping to 'on' for %s.%.8s",
326              op->target, op->client_name, op->id);
327     op->phase = st_phase_on;
328     strcpy(op->action, "on");
329 
330     /* Skip devices with automatic unfencing, because the cluster will handle it
331      * when the node rejoins.
332      */
333     for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
334         GListPtr match = g_list_find_custom(op->devices_list, iter->data,
335                                             sort_strings);
336 
337         if (match) {
338             op->devices_list = g_list_remove(op->devices_list, match->data);
339         }
340     }
341     g_list_free_full(op->automatic_list, free);
342     op->automatic_list = NULL;
343 
344     /* Rewind device list pointer */
345     op->devices = op->devices_list;
346 }
347 
348 /*!
349  * \internal
350  * \brief Reset a remapped reboot operation
351  *
352  * \param[in,out] op  Operation to reset
353  */
354 static void
undo_op_remap(remote_fencing_op_t * op)355 undo_op_remap(remote_fencing_op_t *op)
356 {
357     if (op->phase > 0) {
358         crm_info("Undoing remap of reboot targeting %s for %s.%.8s",
359                  op->target, op->client_name, op->id);
360         op->phase = st_phase_requested;
361         strcpy(op->action, "reboot");
362     }
363 }
364 
365 static xmlNode *
create_op_done_notify(remote_fencing_op_t * op,int rc)366 create_op_done_notify(remote_fencing_op_t * op, int rc)
367 {
368     xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
369 
370     crm_xml_add_int(notify_data, "state", op->state);
371     crm_xml_add_int(notify_data, F_STONITH_RC, rc);
372     crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
373     crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
374     crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
375     crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
376     crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
377     crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
378     crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
379 
380     return notify_data;
381 }
382 
383 void
stonith_bcast_result_to_peers(remote_fencing_op_t * op,int rc)384 stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc)
385 {
386     static int count = 0;
387     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
388     xmlNode *notify_data = create_op_done_notify(op, rc);
389 
390     count++;
391     crm_trace("Broadcasting result to peers");
392     crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
393     crm_xml_add(bcast, F_SUBTYPE, "broadcast");
394     crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
395     crm_xml_add_int(bcast, "count", count);
396     add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
397     send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
398     free_xml(notify_data);
399     free_xml(bcast);
400 
401     return;
402 }
403 
404 static void
handle_local_reply_and_notify(remote_fencing_op_t * op,xmlNode * data,int rc)405 handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
406 {
407     xmlNode *notify_data = NULL;
408     xmlNode *reply = NULL;
409 
410     if (op->notify_sent == TRUE) {
411         /* nothing to do */
412         return;
413     }
414 
415     /* Do notification with a clean data object */
416     notify_data = create_op_done_notify(op, rc);
417     crm_xml_add_int(data, "state", op->state);
418     crm_xml_add(data, F_STONITH_TARGET, op->target);
419     crm_xml_add(data, F_STONITH_OPERATION, op->action);
420 
421     reply = stonith_construct_reply(op->request, NULL, data, rc);
422     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
423 
424     /* Send fencing OP reply to local client that initiated fencing */
425     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
426 
427     /* bcast to all local clients that the fencing operation happend */
428     do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
429     do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
430 
431     /* mark this op as having notify's already sent */
432     op->notify_sent = TRUE;
433     free_xml(reply);
434     free_xml(notify_data);
435 }
436 
437 static void
handle_duplicates(remote_fencing_op_t * op,xmlNode * data,int rc)438 handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
439 {
440     GListPtr iter = NULL;
441 
442     for (iter = op->duplicates; iter != NULL; iter = iter->next) {
443         remote_fencing_op_t *other = iter->data;
444 
445         if (other->state == st_duplicate) {
446             /* Ie. it hasn't timed out already */
447             other->state = op->state;
448             crm_debug("Peforming duplicate notification for %s@%s.%.8s = %s", other->client_name,
449                       other->originator, other->id, pcmk_strerror(rc));
450             remote_op_done(other, data, rc, TRUE);
451 
452         } else {
453             crm_err("Skipping duplicate notification for %s@%s - %d", other->client_name,
454                     other->originator, other->state);
455         }
456     }
457 }
458 
459 /*!
460  * \internal
461  * \brief Finalize a remote operation.
462  *
463  * \description This function has two code paths.
464  *
465  * Path 1. This node is the owner of the operation and needs
466  *         to notify the cpg group via a broadcast as to the operation's
467  *         results.
468  *
469  * Path 2. The cpg broadcast is received. All nodes notify their local
470  *         stonith clients the operation results.
471  *
472  * So, The owner of the operation first notifies the cluster of the result,
473  * and once that cpg notify is received back it notifies all the local clients.
474  *
475  * Nodes that are passive watchers of the operation will receive the
476  * broadcast and only need to notify their local clients the operation finished.
477  *
478  * \param op, The fencing operation to finalize
479  * \param data, The xml msg reply (if present) of the last delegated fencing
480  *              operation.
481  * \param dup, Is this operation a duplicate, if so treat it a little differently
482  *             making sure the broadcast is not sent out.
483  */
484 static void
remote_op_done(remote_fencing_op_t * op,xmlNode * data,int rc,int dup)485 remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
486 {
487     int level = LOG_ERR;
488     const char *subt = NULL;
489     xmlNode *local_data = NULL;
490 
491     op->completed = time(NULL);
492     clear_remote_op_timers(op);
493     undo_op_remap(op);
494 
495     if (op->notify_sent == TRUE) {
496         crm_err("Already sent notifications for '%s' targeting %s on %s for "
497                 "client %s@%s.%.8s: %s " CRM_XS " rc=%d state=%d",
498                 op->action, op->target,
499                 (op->delegate? op->delegate : "unknown node"),
500                 op->client_name, op->originator, op->id, pcmk_strerror(rc),
501                 rc, op->state);
502         goto remote_op_done_cleanup;
503     }
504 
505     if (!op->delegate && data && rc != -ENODEV && rc != -EHOSTUNREACH) {
506         xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_TRACE);
507         if(ndata) {
508             op->delegate = crm_element_value_copy(ndata, F_STONITH_DELEGATE);
509         } else {
510             op->delegate = crm_element_value_copy(data, F_ORIG);
511         }
512     }
513 
514     if (data == NULL) {
515         data = create_xml_node(NULL, "remote-op");
516         local_data = data;
517     }
518 
519     /* Tell everyone the operation is done, we will continue
520      * with doing the local notifications once we receive
521      * the broadcast back. */
522     subt = crm_element_value(data, F_SUBTYPE);
523     if (dup == FALSE && safe_str_neq(subt, "broadcast")) {
524         /* Defer notification until the bcast message arrives */
525         stonith_bcast_result_to_peers(op, rc);
526         goto remote_op_done_cleanup;
527     }
528 
529     if (rc == pcmk_ok || dup) {
530         level = LOG_NOTICE;
531     } else if (safe_str_neq(op->originator, stonith_our_uname)) {
532         level = LOG_NOTICE;
533     }
534 
535     do_crm_log(level, "Operation '%s'%s%s on %s for %s@%s.%.8s: %s",
536                op->action, (op->target? " targeting " : ""),
537                (op->target? op->target : ""),
538                (op->delegate? op->delegate : "<no-one>"),
539                op->client_name, op->originator, op->id, pcmk_strerror(rc));
540 
541     handle_local_reply_and_notify(op, data, rc);
542 
543     if (dup == FALSE) {
544         handle_duplicates(op, data, rc);
545     }
546 
547     /* Free non-essential parts of the record
548      * Keep the record around so we can query the history
549      */
550     if (op->query_results) {
551         g_list_free_full(op->query_results, free_remote_query);
552         op->query_results = NULL;
553     }
554 
555     if (op->request) {
556         free_xml(op->request);
557         op->request = NULL;
558     }
559 
560   remote_op_done_cleanup:
561     free_xml(local_data);
562 }
563 
564 static gboolean
remote_op_watchdog_done(gpointer userdata)565 remote_op_watchdog_done(gpointer userdata)
566 {
567     remote_fencing_op_t *op = userdata;
568 
569     op->op_timer_one = 0;
570 
571     crm_notice("Self-fencing (%s) by %s for %s.%8s assumed complete",
572                op->action, op->target, op->client_name, op->id);
573     op->state = st_done;
574     remote_op_done(op, NULL, pcmk_ok, FALSE);
575     return FALSE;
576 }
577 
578 static gboolean
remote_op_timeout_one(gpointer userdata)579 remote_op_timeout_one(gpointer userdata)
580 {
581     remote_fencing_op_t *op = userdata;
582 
583     op->op_timer_one = 0;
584 
585     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
586                " id=%s", op->action, op->target, op->client_name, op->id);
587     call_remote_stonith(op, NULL);
588     return FALSE;
589 }
590 
591 static gboolean
remote_op_timeout(gpointer userdata)592 remote_op_timeout(gpointer userdata)
593 {
594     remote_fencing_op_t *op = userdata;
595 
596     op->op_timer_total = 0;
597 
598     if (op->state == st_done) {
599         crm_debug("Action '%s' targeting %s for client %s already completed "
600                   CRM_XS " id=%s",
601                   op->action, op->target, op->client_name, op->id);
602         return FALSE;
603     }
604 
605     crm_debug("Action '%s' targeting %s for client %s timed out "
606               CRM_XS " id=%s",
607               op->action, op->target, op->client_name, op->id);
608 
609     if (op->phase == st_phase_on) {
610         /* A remapped reboot operation timed out in the "on" phase, but the
611          * "off" phase completed successfully, so quit trying any further
612          * devices, and return success.
613          */
614         remote_op_done(op, NULL, pcmk_ok, FALSE);
615         return FALSE;
616     }
617 
618     op->state = st_failed;
619 
620     remote_op_done(op, NULL, -ETIME, FALSE);
621 
622     return FALSE;
623 }
624 
625 static gboolean
remote_op_query_timeout(gpointer data)626 remote_op_query_timeout(gpointer data)
627 {
628     remote_fencing_op_t *op = data;
629 
630     op->query_timer = 0;
631     if (op->state == st_done) {
632         crm_debug("Operation %s targeting %s already completed",
633                   op->id, op->target);
634     } else if (op->state == st_exec) {
635         crm_debug("Operation %s targeting %s already in progress",
636                   op->id, op->target);
637     } else if (op->query_results) {
638         crm_debug("Query %s targeting %s complete (state=%d)",
639                   op->id, op->target, op->state);
640         call_remote_stonith(op, NULL);
641     } else {
642         crm_debug("Query %s targeting %s timed out (state=%d)",
643                   op->id, op->target, op->state);
644         if (op->op_timer_total) {
645             g_source_remove(op->op_timer_total);
646             op->op_timer_total = 0;
647         }
648         remote_op_timeout(op);
649     }
650 
651     return FALSE;
652 }
653 
654 static gboolean
topology_is_empty(stonith_topology_t * tp)655 topology_is_empty(stonith_topology_t *tp)
656 {
657     int i;
658 
659     if (tp == NULL) {
660         return TRUE;
661     }
662 
663     for (i = 0; i < ST_LEVEL_MAX; i++) {
664         if (tp->levels[i] != NULL) {
665             return FALSE;
666         }
667     }
668     return TRUE;
669 }
670 
671 /*!
672  * \internal
673  * \brief Add a device to an operation's automatic unfencing list
674  *
675  * \param[in,out] op      Operation to modify
676  * \param[in]     device  Device ID to add
677  */
678 static void
add_required_device(remote_fencing_op_t * op,const char * device)679 add_required_device(remote_fencing_op_t *op, const char *device)
680 {
681     GListPtr match  = g_list_find_custom(op->automatic_list, device,
682                                          sort_strings);
683 
684     if (!match) {
685         op->automatic_list = g_list_prepend(op->automatic_list, strdup(device));
686     }
687 }
688 
689 /*!
690  * \internal
691  * \brief Remove a device from the automatic unfencing list
692  *
693  * \param[in,out] op      Operation to modify
694  * \param[in]     device  Device ID to remove
695  */
696 static void
remove_required_device(remote_fencing_op_t * op,const char * device)697 remove_required_device(remote_fencing_op_t *op, const char *device)
698 {
699     GListPtr match = g_list_find_custom(op->automatic_list, device,
700                                         sort_strings);
701 
702     if (match) {
703         op->automatic_list = g_list_remove(op->automatic_list, match->data);
704     }
705 }
706 
707 /* deep copy the device list */
708 static void
set_op_device_list(remote_fencing_op_t * op,GListPtr devices)709 set_op_device_list(remote_fencing_op_t * op, GListPtr devices)
710 {
711     GListPtr lpc = NULL;
712 
713     if (op->devices_list) {
714         g_list_free_full(op->devices_list, free);
715         op->devices_list = NULL;
716     }
717     for (lpc = devices; lpc != NULL; lpc = lpc->next) {
718         op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
719     }
720     op->devices = op->devices_list;
721 }
722 
723 /*!
724  * \internal
725  * \brief Check whether a node matches a topology target
726  *
727  * \param[in] tp    Topology table entry to check
728  * \param[in] node  Name of node to check
729  *
730  * \return TRUE if node matches topology target
731  */
732 static gboolean
topology_matches(const stonith_topology_t * tp,const char * node)733 topology_matches(const stonith_topology_t *tp, const char *node)
734 {
735     regex_t r_patt;
736 
737     CRM_CHECK(node && tp && tp->target, return FALSE);
738     switch(tp->kind) {
739         case 2:
740             /* This level targets by attribute, so tp->target is a NAME=VALUE pair
741              * of a permanent attribute applied to targeted nodes. The test below
742              * relies on the locally cached copy of the CIB, so if fencing needs to
743              * be done before the initial CIB is received or after a malformed CIB
744              * is received, then the topology will be unable to be used.
745              */
746             if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
747                 crm_notice("Matched %s with %s by attribute", node, tp->target);
748                 return TRUE;
749             }
750             break;
751         case 1:
752             /* This level targets by name, so tp->target is a regular expression
753              * matching names of nodes to be targeted.
754              */
755 
756             if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
757                 crm_info("Bad regex '%s' for fencing level", tp->target);
758             } else {
759                 int status = regexec(&r_patt, node, 0, NULL, 0);
760 
761                 regfree(&r_patt);
762                 if (status == 0) {
763                     crm_notice("Matched %s with %s by name", node, tp->target);
764                     return TRUE;
765                 }
766             }
767             break;
768         case 0:
769             crm_trace("Testing %s against %s", node, tp->target);
770             return safe_str_eq(tp->target, node);
771     }
772     crm_trace("No match for %s with %s", node, tp->target);
773     return FALSE;
774 }
775 
776 stonith_topology_t *
find_topology_for_host(const char * host)777 find_topology_for_host(const char *host)
778 {
779     GHashTableIter tIter;
780     stonith_topology_t *tp = g_hash_table_lookup(topology, host);
781 
782     if(tp != NULL) {
783         crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
784         return tp;
785     }
786 
787     g_hash_table_iter_init(&tIter, topology);
788     while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
789         if (topology_matches(tp, host)) {
790             crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
791             return tp;
792         }
793     }
794 
795     crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
796     return NULL;
797 }
798 
799 /*!
800  * \internal
801  * \brief Set fencing operation's device list to target's next topology level
802  *
803  * \param[in,out] op  Remote fencing operation to modify
804  *
805  * \return pcmk_ok if successful, target was not specified (i.e. queries) or
806  *         target has no topology, or -EINVAL if no more topology levels to try
807  */
808 static int
stonith_topology_next(remote_fencing_op_t * op)809 stonith_topology_next(remote_fencing_op_t * op)
810 {
811     stonith_topology_t *tp = NULL;
812 
813     if (op->target) {
814         /* Queries don't have a target set */
815         tp = find_topology_for_host(op->target);
816     }
817     if (topology_is_empty(tp)) {
818         return pcmk_ok;
819     }
820 
821     set_bit(op->call_options, st_opt_topology);
822 
823     /* This is a new level, so undo any remapping left over from previous */
824     undo_op_remap(op);
825 
826     do {
827         op->level++;
828 
829     } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
830 
831     if (op->level < ST_LEVEL_MAX) {
832         crm_trace("Attempting fencing level %d targeting %s (%d devices) "
833                   "for client %s@%s.%.8s",
834                   op->level, op->target, g_list_length(tp->levels[op->level]),
835                   op->client_name, op->originator, op->id);
836         set_op_device_list(op, tp->levels[op->level]);
837 
838         // The requested delay has been applied for the first fencing level
839         if (op->level > 1 && op->delay > 0) {
840             op->delay = 0;
841         }
842 
843         if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) {
844             /* A reboot has been requested for a topology level with multiple
845              * devices. Instead of rebooting the devices sequentially, we will
846              * turn them all off, then turn them all on again. (Think about
847              * switched power outlets for redundant power supplies.)
848              */
849             op_phase_off(op);
850         }
851         return pcmk_ok;
852     }
853 
854     crm_notice("All fencing options targeting %s for client %s@%s.%.8s failed",
855                op->target, op->client_name, op->originator, op->id);
856     return -EINVAL;
857 }
858 
859 /*!
860  * \brief Check to see if this operation is a duplicate of another in flight
861  * operation. If so merge this operation into the inflight operation, and mark
862  * it as a duplicate.
863  */
864 static void
merge_duplicates(remote_fencing_op_t * op)865 merge_duplicates(remote_fencing_op_t * op)
866 {
867     GHashTableIter iter;
868     remote_fencing_op_t *other = NULL;
869 
870     time_t now = time(NULL);
871 
872     g_hash_table_iter_init(&iter, stonith_remote_op_list);
873     while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
874         crm_node_t *peer = NULL;
875         const char *other_action = op_requested_action(other);
876 
877         if (other->state > st_exec) {
878             /* Must be in-progress */
879             continue;
880         } else if (safe_str_neq(op->target, other->target)) {
881             /* Must be for the same node */
882             continue;
883         } else if (safe_str_neq(op->action, other_action)) {
884             crm_trace("Must be for the same action: %s vs. %s",
885                       op->action, other_action);
886             continue;
887         } else if (safe_str_eq(op->client_name, other->client_name)) {
888             crm_trace("Must be for different clients: %s", op->client_name);
889             continue;
890         } else if (safe_str_eq(other->target, other->originator)) {
891             crm_trace("Can't be a suicide operation: %s", other->target);
892             continue;
893         }
894 
895         peer = crm_get_peer(0, other->originator);
896         if(fencing_peer_active(peer) == FALSE) {
897             crm_notice("Failing action '%s' targeting %s originating from "
898                        "client %s@%s.%.8s: Originator is dead",
899                        other->action, other->target, other->client_name, other->originator, other->id);
900             other->state = st_failed;
901             continue;
902 
903         } else if(other->total_timeout > 0 && now > (other->total_timeout + other->created)) {
904             crm_info("Action '%s' targeting %s originating from client "
905                      "%s@%s.%.8s is too old: %ld vs. %ld + %d",
906                      other->action, other->target, other->client_name, other->originator, other->id,
907                      now, other->created, other->total_timeout);
908             continue;
909         }
910 
911         /* There is another in-flight request to fence the same host
912          * Piggyback on that instead.  If it fails, so do we.
913          */
914         other->duplicates = g_list_append(other->duplicates, op);
915         if (other->total_timeout == 0) {
916             crm_trace("Making a best-guess as to the timeout used");
917             other->total_timeout = op->total_timeout =
918                 TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
919         }
920         crm_notice("Merging stonith action '%s' targeting %s originating from "
921                    "client %s.%.8s with identical request from %s@%s.%.8s (%ds)",
922                    op->action, op->target, op->client_name, op->id,
923                    other->client_name, other->originator, other->id,
924                    other->total_timeout);
925         report_timeout_period(op, other->total_timeout);
926         op->state = st_duplicate;
927     }
928 }
929 
fencing_active_peers(void)930 static uint32_t fencing_active_peers(void)
931 {
932     uint32_t count = 0;
933     crm_node_t *entry;
934     GHashTableIter gIter;
935 
936     g_hash_table_iter_init(&gIter, crm_peer_cache);
937     while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
938         if(fencing_peer_active(entry)) {
939             count++;
940         }
941     }
942     return count;
943 }
944 
945 int
stonith_manual_ack(xmlNode * msg,remote_fencing_op_t * op)946 stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
947 {
948     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
949 
950     op->state = st_done;
951     op->completed = time(NULL);
952     op->delegate = strdup("a human");
953 
954     crm_notice("Injecting manual confirmation that %s is safely off/down",
955                crm_element_value(dev, F_STONITH_TARGET));
956 
957     remote_op_done(op, msg, pcmk_ok, FALSE);
958 
959     /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */
960     return -EINPROGRESS;
961 }
962 
963 /*!
964  * \internal
965  * \brief Create a new remote stonith operation
966  *
967  * \param[in] client   ID of local stonith client that initiated the operation
968  * \param[in] request  The request from the client that started the operation
969  * \param[in] peer     TRUE if this operation is owned by another stonith peer
970  *                     (an operation owned by one peer is stored on all peers,
971  *                     but only the owner executes it; all nodes get the results
972  *                     once the owner finishes execution)
973  */
974 void *
create_remote_stonith_op(const char * client,xmlNode * request,gboolean peer)975 create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
976 {
977     remote_fencing_op_t *op = NULL;
978     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
979     int call_options = 0;
980     const char *operation = NULL;
981 
982     init_stonith_remote_op_hash_table(&stonith_remote_op_list);
983 
984     /* If this operation is owned by another node, check to make
985      * sure we haven't already created this operation. */
986     if (peer && dev) {
987         const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
988 
989         CRM_CHECK(op_id != NULL, return NULL);
990 
991         op = g_hash_table_lookup(stonith_remote_op_list, op_id);
992         if (op) {
993             crm_debug("%s already exists", op_id);
994             return op;
995         }
996     }
997 
998     op = calloc(1, sizeof(remote_fencing_op_t));
999 
1000     crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
1001     // Value -1 means disable any static/random fencing delays
1002     crm_element_value_int(request, F_STONITH_DELAY, &(op->delay));
1003 
1004     if (peer && dev) {
1005         op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
1006     } else {
1007         op->id = crm_generate_uuid();
1008     }
1009 
1010     g_hash_table_replace(stonith_remote_op_list, op->id, op);
1011     CRM_LOG_ASSERT(g_hash_table_lookup(stonith_remote_op_list, op->id) != NULL);
1012     crm_trace("Created %s", op->id);
1013 
1014     op->state = st_query;
1015     op->replies_expected = fencing_active_peers();
1016     op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
1017     op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
1018     op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */
1019     op->created = time(NULL);
1020 
1021     if (op->originator == NULL) {
1022         /* Local or relayed request */
1023         op->originator = strdup(stonith_our_uname);
1024     }
1025 
1026     CRM_LOG_ASSERT(client != NULL);
1027     if (client) {
1028         op->client_id = strdup(client);
1029     }
1030 
1031     /* For a RELAY operation, set fenced on the client. */
1032     operation = crm_element_value(request, F_STONITH_OPERATION);
1033 
1034     if (crm_str_eq(operation, STONITH_OP_RELAY, TRUE)) {
1035         op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
1036                                          (unsigned long) getpid());
1037     } else {
1038         op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
1039     }
1040 
1041     op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
1042     op->request = copy_xml(request);    /* TODO: Figure out how to avoid this */
1043     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
1044     op->call_options = call_options;
1045 
1046     crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid));
1047 
1048     crm_trace("%s new stonith op %s ('%s' targeting %s for client %s)",
1049               (peer && dev)? "Recorded" : "Generated", op->id, op->action,
1050               op->target, op->client_name);
1051 
1052     if (op->call_options & st_opt_cs_nodeid) {
1053         int nodeid = crm_atoi(op->target, NULL);
1054         crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY);
1055 
1056         /* Ensure the conversion only happens once */
1057         op->call_options &= ~st_opt_cs_nodeid;
1058 
1059         if (node && node->uname) {
1060             free(op->target);
1061             op->target = strdup(node->uname);
1062 
1063         } else {
1064             crm_warn("Could not expand nodeid '%s' into a host name", op->target);
1065         }
1066     }
1067 
1068     /* check to see if this is a duplicate operation of another in-flight operation */
1069     merge_duplicates(op);
1070 
1071     if (op->state != st_duplicate) {
1072         /* kick history readers */
1073         do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
1074     }
1075 
1076     /* safe to trim as long as that doesn't touch pending ops */
1077     stonith_fence_history_trim();
1078 
1079     return op;
1080 }
1081 
1082 remote_fencing_op_t *
initiate_remote_stonith_op(crm_client_t * client,xmlNode * request,gboolean manual_ack)1083 initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack)
1084 {
1085     int query_timeout = 0;
1086     xmlNode *query = NULL;
1087     const char *client_id = NULL;
1088     remote_fencing_op_t *op = NULL;
1089     const char *relay_op_id = NULL;
1090     const char *operation = NULL;
1091 
1092     if (client) {
1093         client_id = client->id;
1094     } else {
1095         client_id = crm_element_value(request, F_STONITH_CLIENTID);
1096     }
1097 
1098     CRM_LOG_ASSERT(client_id != NULL);
1099     op = create_remote_stonith_op(client_id, request, FALSE);
1100     op->owner = TRUE;
1101     if (manual_ack) {
1102         crm_notice("Initiating manual confirmation for %s: %s",
1103                    op->target, op->id);
1104         return op;
1105     }
1106 
1107     CRM_CHECK(op->action, return NULL);
1108 
1109     if (stonith_topology_next(op) != pcmk_ok) {
1110         op->state = st_failed;
1111     }
1112 
1113     switch (op->state) {
1114         case st_failed:
1115             crm_warn("Could not request peer fencing (%s) targeting %s "
1116                      CRM_XS " id=%s", op->action, op->target, op->id);
1117             remote_op_done(op, NULL, -EINVAL, FALSE);
1118             return op;
1119 
1120         case st_duplicate:
1121             crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
1122                      CRM_XS " id=%s", op->action, op->target, op->id);
1123             return op;
1124 
1125         default:
1126             crm_notice("Requesting peer fencing (%s) targeting %s "
1127                        CRM_XS " id=%s state=%d",
1128                        op->action, op->target, op->id, op->state);
1129     }
1130 
1131     query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
1132                               NULL, op->call_options);
1133 
1134     crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
1135     crm_xml_add(query, F_STONITH_TARGET, op->target);
1136     crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op));
1137     crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
1138     crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
1139     crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
1140     crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
1141 
1142     /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */
1143     operation = crm_element_value(request, F_STONITH_OPERATION);
1144     if (crm_str_eq(operation, STONITH_OP_RELAY, TRUE)) {
1145         relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
1146         if (relay_op_id) {
1147             crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id);
1148         }
1149     }
1150 
1151     send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
1152     free_xml(query);
1153 
1154     query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
1155     op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
1156 
1157     return op;
1158 }
1159 
1160 enum find_best_peer_options {
1161     /*! Skip checking the target peer for capable fencing devices */
1162     FIND_PEER_SKIP_TARGET = 0x0001,
1163     /*! Only check the target peer for capable fencing devices */
1164     FIND_PEER_TARGET_ONLY = 0x0002,
1165     /*! Skip peers and devices that are not verified */
1166     FIND_PEER_VERIFIED_ONLY = 0x0004,
1167 };
1168 
1169 static st_query_result_t *
find_best_peer(const char * device,remote_fencing_op_t * op,enum find_best_peer_options options)1170 find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
1171 {
1172     GListPtr iter = NULL;
1173     gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
1174 
1175     if (!device && is_set(op->call_options, st_opt_topology)) {
1176         return NULL;
1177     }
1178 
1179     for (iter = op->query_results; iter != NULL; iter = iter->next) {
1180         st_query_result_t *peer = iter->data;
1181 
1182         crm_trace("Testing result from %s targeting %s with %d devices: %d %x",
1183                   peer->host, op->target, peer->ndevices, peer->tried, options);
1184         if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) {
1185             continue;
1186         }
1187         if ((options & FIND_PEER_TARGET_ONLY) && safe_str_neq(peer->host, op->target)) {
1188             continue;
1189         }
1190 
1191         if (is_set(op->call_options, st_opt_topology)) {
1192 
1193             if (grab_peer_device(op, peer, device, verified_devices_only)) {
1194                 return peer;
1195             }
1196 
1197         } else if ((peer->tried == FALSE)
1198                    && count_peer_devices(op, peer, verified_devices_only)) {
1199 
1200             /* No topology: Use the current best peer */
1201             crm_trace("Simple fencing");
1202             return peer;
1203         }
1204     }
1205 
1206     return NULL;
1207 }
1208 
1209 static st_query_result_t *
stonith_choose_peer(remote_fencing_op_t * op)1210 stonith_choose_peer(remote_fencing_op_t * op)
1211 {
1212     const char *device = NULL;
1213     st_query_result_t *peer = NULL;
1214     uint32_t active = fencing_active_peers();
1215 
1216     do {
1217         if (op->devices) {
1218             device = op->devices->data;
1219             crm_trace("Checking for someone to fence (%s) %s with %s",
1220                       op->action, op->target, device);
1221         } else {
1222             crm_trace("Checking for someone to fence (%s) %s",
1223                       op->action, op->target);
1224         }
1225 
1226         /* Best choice is a peer other than the target with verified access */
1227         peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
1228         if (peer) {
1229             crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
1230             return peer;
1231         }
1232 
1233         if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
1234             crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
1235             return NULL;
1236         }
1237 
1238         /* If no other peer has verified access, next best is unverified access */
1239         peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
1240         if (peer) {
1241             crm_trace("Found best unverified peer %s", peer->host);
1242             return peer;
1243         }
1244 
1245         /* If no other peer can do it, last option is self-fencing
1246          * (which is never allowed for the "on" phase of a remapped reboot)
1247          */
1248         if (op->phase != st_phase_on) {
1249             peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
1250             if (peer) {
1251                 crm_trace("%s will fence itself", peer->host);
1252                 return peer;
1253             }
1254         }
1255 
1256         /* Try the next fencing level if there is one (unless we're in the "on"
1257          * phase of a remapped "reboot", because we ignore errors in that case)
1258          */
1259     } while ((op->phase != st_phase_on)
1260              && is_set(op->call_options, st_opt_topology)
1261              && stonith_topology_next(op) == pcmk_ok);
1262 
1263     crm_notice("Couldn't find anyone to fence (%s) %s with %s",
1264                op->action, op->target, (device? device : "any device"));
1265     return NULL;
1266 }
1267 
1268 static int
get_device_timeout(const remote_fencing_op_t * op,const st_query_result_t * peer,const char * device)1269 get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer,
1270                    const char *device)
1271 {
1272     device_properties_t *props;
1273 
1274     if (!peer || !device) {
1275         return op->base_timeout;
1276     }
1277 
1278     props = g_hash_table_lookup(peer->devices, device);
1279     if (!props) {
1280         return op->base_timeout;
1281     }
1282 
1283     return (props->custom_action_timeout[op->phase]?
1284            props->custom_action_timeout[op->phase] : op->base_timeout)
1285            + props->delay_max[op->phase];
1286 }
1287 
1288 struct timeout_data {
1289     const remote_fencing_op_t *op;
1290     const st_query_result_t *peer;
1291     int total_timeout;
1292 };
1293 
1294 /*!
1295  * \internal
1296  * \brief Add timeout to a total if device has not been executed yet
1297  *
1298  * \param[in] key        GHashTable key (device ID)
1299  * \param[in] value      GHashTable value (device properties)
1300  * \param[in] user_data  Timeout data
1301  */
1302 static void
add_device_timeout(gpointer key,gpointer value,gpointer user_data)1303 add_device_timeout(gpointer key, gpointer value, gpointer user_data)
1304 {
1305     const char *device_id = key;
1306     device_properties_t *props = value;
1307     struct timeout_data *timeout = user_data;
1308 
1309     if (!props->executed[timeout->op->phase]
1310         && !props->disallowed[timeout->op->phase]) {
1311         timeout->total_timeout += get_device_timeout(timeout->op,
1312                                                      timeout->peer, device_id);
1313     }
1314 }
1315 
1316 static int
get_peer_timeout(const remote_fencing_op_t * op,const st_query_result_t * peer)1317 get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer)
1318 {
1319     struct timeout_data timeout;
1320 
1321     timeout.op = op;
1322     timeout.peer = peer;
1323     timeout.total_timeout = 0;
1324 
1325     g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
1326 
1327     return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
1328 }
1329 
1330 static int
get_op_total_timeout(const remote_fencing_op_t * op,const st_query_result_t * chosen_peer)1331 get_op_total_timeout(const remote_fencing_op_t *op,
1332                      const st_query_result_t *chosen_peer)
1333 {
1334     int total_timeout = 0;
1335     stonith_topology_t *tp = find_topology_for_host(op->target);
1336 
1337     if (is_set(op->call_options, st_opt_topology) && tp) {
1338         int i;
1339         GListPtr device_list = NULL;
1340         GListPtr iter = NULL;
1341 
1342         /* Yep, this looks scary, nested loops all over the place.
1343          * Here is what is going on.
1344          * Loop1: Iterate through fencing levels.
1345          * Loop2: If a fencing level has devices, loop through each device
1346          * Loop3: For each device in a fencing level, see what peer owns it
1347          *        and what that peer has reported the timeout is for the device.
1348          */
1349         for (i = 0; i < ST_LEVEL_MAX; i++) {
1350             if (!tp->levels[i]) {
1351                 continue;
1352             }
1353             for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
1354                 for (iter = op->query_results; iter != NULL; iter = iter->next) {
1355                     const st_query_result_t *peer = iter->data;
1356 
1357                     if (find_peer_device(op, peer, device_list->data)) {
1358                         total_timeout += get_device_timeout(op, peer,
1359                                                             device_list->data);
1360                         break;
1361                     }
1362                 }               /* End Loop3: match device with peer that owns device, find device's timeout period */
1363             }                   /* End Loop2: iterate through devices at a specific level */
1364         }                       /*End Loop1: iterate through fencing levels */
1365 
1366     } else if (chosen_peer) {
1367         total_timeout = get_peer_timeout(op, chosen_peer);
1368     } else {
1369         total_timeout = op->base_timeout;
1370     }
1371 
1372     return total_timeout ? total_timeout : op->base_timeout;
1373 }
1374 
1375 static void
report_timeout_period(remote_fencing_op_t * op,int op_timeout)1376 report_timeout_period(remote_fencing_op_t * op, int op_timeout)
1377 {
1378     GListPtr iter = NULL;
1379     xmlNode *update = NULL;
1380     const char *client_node = NULL;
1381     const char *client_id = NULL;
1382     const char *call_id = NULL;
1383 
1384     if (op->call_options & st_opt_sync_call) {
1385         /* There is no reason to report the timeout for a synchronous call. It
1386          * is impossible to use the reported timeout to do anything when the client
1387          * is blocking for the response.  This update is only important for
1388          * async calls that require a callback to report the results in. */
1389         return;
1390     } else if (!op->request) {
1391         return;
1392     }
1393 
1394     crm_trace("Reporting timeout for %s.%.8s", op->client_name, op->id);
1395     client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
1396     call_id = crm_element_value(op->request, F_STONITH_CALLID);
1397     client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
1398     if (!client_node || !call_id || !client_id) {
1399         return;
1400     }
1401 
1402     if (safe_str_eq(client_node, stonith_our_uname)) {
1403         /* The client is connected to this node, send the update direclty to them */
1404         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
1405         return;
1406     }
1407 
1408     /* The client is connected to another node, relay this update to them */
1409     update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
1410     crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
1411     crm_xml_add(update, F_STONITH_CLIENTID, client_id);
1412     crm_xml_add(update, F_STONITH_CALLID, call_id);
1413     crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
1414 
1415     send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
1416 
1417     free_xml(update);
1418 
1419     for (iter = op->duplicates; iter != NULL; iter = iter->next) {
1420         remote_fencing_op_t *dup = iter->data;
1421 
1422         crm_trace("Reporting timeout for duplicate %s.%.8s", dup->client_name, dup->id);
1423         report_timeout_period(iter->data, op_timeout);
1424     }
1425 }
1426 
1427 /*!
1428  * \internal
1429  * \brief Advance an operation to the next device in its topology
1430  *
1431  * \param[in,out] op      Operation to advance
1432  * \param[in]     device  ID of device just completed
1433  * \param[in]     msg     XML reply that contained device result (if available)
1434  * \param[in]     rc      Return code of device's execution
1435  */
1436 static void
advance_op_topology(remote_fencing_op_t * op,const char * device,xmlNode * msg,int rc)1437 advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg,
1438                     int rc)
1439 {
1440     /* Advance to the next device at this topology level, if any */
1441     if (op->devices) {
1442         op->devices = op->devices->next;
1443     }
1444 
1445     /* Handle automatic unfencing if an "on" action was requested */
1446     if ((op->phase == st_phase_requested) && safe_str_eq(op->action, "on")) {
1447         /* If the device we just executed was required, it's not anymore */
1448         remove_required_device(op, device);
1449 
1450         /* If there are no more devices at this topology level, run through any
1451          * remaining devices with automatic unfencing
1452          */
1453         if (op->devices == NULL) {
1454             op->devices = op->automatic_list;
1455         }
1456     }
1457 
1458     if ((op->devices == NULL) && (op->phase == st_phase_off)) {
1459         /* We're done with this level and with required devices, but we had
1460          * remapped "reboot" to "off", so start over with "on". If any devices
1461          * need to be turned back on, op->devices will be non-NULL after this.
1462          */
1463         op_phase_on(op);
1464     }
1465 
1466     if (op->devices) {
1467         /* Necessary devices remain, so execute the next one */
1468         crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)",
1469                   op->target, op->originator, op->client_name, rc);
1470 
1471         // The requested delay has been applied for the first device
1472         if (op->delay > 0) {
1473             op->delay = 0;
1474         }
1475 
1476         call_remote_stonith(op, NULL);
1477     } else {
1478         /* We're done with all devices and phases, so finalize operation */
1479         crm_trace("Marking complex fencing op targeting %s as complete",
1480                   op->target);
1481         op->state = st_done;
1482         remote_op_done(op, msg, rc, FALSE);
1483     }
1484 }
1485 
1486 void
call_remote_stonith(remote_fencing_op_t * op,st_query_result_t * peer)1487 call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
1488 {
1489     const char *device = NULL;
1490     int timeout = op->base_timeout;
1491 
1492     crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state);
1493     if (peer == NULL && !is_set(op->call_options, st_opt_topology)) {
1494         peer = stonith_choose_peer(op);
1495     }
1496 
1497     if (!op->op_timer_total) {
1498         int total_timeout = get_op_total_timeout(op, peer);
1499 
1500         op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout;
1501         op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
1502         report_timeout_period(op, op->total_timeout);
1503         crm_info("Total timeout set to %d for peer's fencing targeting %s for %s"
1504                  CRM_XS "id=%s",
1505                  total_timeout, op->target, op->client_name, op->id);
1506     }
1507 
1508     if (is_set(op->call_options, st_opt_topology) && op->devices) {
1509         /* Ignore any peer preference, they might not have the device we need */
1510         /* When using topology, stonith_choose_peer() removes the device from
1511          * further consideration, so be sure to calculate timeout beforehand */
1512         peer = stonith_choose_peer(op);
1513 
1514         device = op->devices->data;
1515         timeout = get_device_timeout(op, peer, device);
1516     }
1517 
1518     if (peer) {
1519         int timeout_one = 0;
1520         xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
1521 
1522         crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
1523         crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
1524         crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
1525         crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
1526         crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
1527         crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
1528         crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
1529         crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
1530         crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);
1531 
1532         if (device) {
1533             timeout_one = TIMEOUT_MULTIPLY_FACTOR *
1534                           get_device_timeout(op, peer, device);
1535             crm_notice("Requesting that %s perform '%s' action targeting %s "
1536                        "using '%s' " CRM_XS " for client %s (%ds)",
1537                        peer->host, op->action, op->target, device,
1538                        op->client_name, timeout_one);
1539             crm_xml_add(remote_op, F_STONITH_DEVICE, device);
1540             crm_xml_add(remote_op, F_STONITH_MODE, "slave");
1541 
1542         } else {
1543             timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
1544             crm_notice("Requesting that %s perform '%s' action targeting %s "
1545                        CRM_XS " for client %s (%ds, %lds)",
1546                        peer->host, op->action, op->target, op->client_name,
1547                        timeout_one, stonith_watchdog_timeout_ms);
1548             crm_xml_add(remote_op, F_STONITH_MODE, "smart");
1549         }
1550 
1551         op->state = st_exec;
1552         if (op->op_timer_one) {
1553             g_source_remove(op->op_timer_one);
1554         }
1555 
1556         if(stonith_watchdog_timeout_ms > 0 && device && safe_str_eq(device, "watchdog")) {
1557             crm_notice("Waiting %lds for %s to self-fence (%s) for client %s.%.8s",
1558                        stonith_watchdog_timeout_ms/1000, op->target, op->action,
1559                        op->client_name, op->id);
1560             op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
1561 
1562             /* TODO check devices to verify watchdog will be in use */
1563         } else if(stonith_watchdog_timeout_ms > 0
1564                   && safe_str_eq(peer->host, op->target)
1565                   && safe_str_neq(op->action, "on")) {
1566             crm_notice("Waiting %lds for %s to self-fence (%s) for client %s.%.8s",
1567                        stonith_watchdog_timeout_ms/1000, op->target, op->action,
1568                        op->client_name, op->id);
1569             op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
1570 
1571         } else {
1572             op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
1573         }
1574 
1575 
1576         send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
1577         peer->tried = TRUE;
1578         free_xml(remote_op);
1579         return;
1580 
1581     } else if (op->phase == st_phase_on) {
1582         /* A remapped "on" cannot be executed, but the node was already
1583          * turned off successfully, so ignore the error and continue.
1584          */
1585         crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
1586                  "after successful 'off'", device, op->target);
1587         advance_op_topology(op, device, NULL, pcmk_ok);
1588         return;
1589 
1590     } else if (op->owner == FALSE) {
1591         crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
1592                 op->action, op->target, op->client_name);
1593 
1594     } else if (op->query_timer == 0) {
1595         /* We've exhausted all available peers */
1596         crm_info("No remaining peers capable of fencing (%s) %s for client %s "
1597                  CRM_XS " state=%d",
1598                  op->action, op->target, op->client_name, op->state);
1599         CRM_LOG_ASSERT(op->state < st_done);
1600         remote_op_timeout(op);
1601 
1602     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
1603         int rc = -EHOSTUNREACH;
1604 
1605         /* if the operation never left the query state,
1606          * but we have all the expected replies, then no devices
1607          * are available to execute the fencing operation. */
1608 
1609         if(stonith_watchdog_timeout_ms && (device == NULL || safe_str_eq(device, "watchdog"))) {
1610             crm_notice("Waiting %lds for %s to self-fence (%s) for client %s.%.8s",
1611                      stonith_watchdog_timeout_ms/1000, op->target,
1612                      op->action, op->client_name, op->id);
1613 
1614             op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
1615             return;
1616         }
1617 
1618         if (op->state == st_query) {
1619            crm_info("No peers (out of %d) have devices capable of fencing "
1620                     "(%s) %s for client %s " CRM_XS " state=%d",
1621                     op->replies, op->action, op->target, op->client_name,
1622                     op->state);
1623 
1624             rc = -ENODEV;
1625         } else {
1626            crm_info("No peers (out of %d) are capable of fencing (%s) %s "
1627                     "for client %s " CRM_XS " state=%d",
1628                     op->replies, op->action, op->target, op->client_name,
1629                     op->state);
1630         }
1631 
1632         op->state = st_failed;
1633         remote_op_done(op, NULL, rc, FALSE);
1634 
1635     } else {
1636         crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
1637                  "for client %s%.8s",
1638                  op->action, op->target, (device? " with " : ""),
1639                  (device? device : ""), op->client_name, op->id);
1640     }
1641 }
1642 
1643 /*!
1644  * \internal
1645  * \brief Comparison function for sorting query results
1646  *
1647  * \param[in] a  GList item to compare
1648  * \param[in] b  GList item to compare
1649  *
1650  * \return Per the glib documentation, "a negative integer if the first value
1651  *         comes before the second, 0 if they are equal, or a positive integer
1652  *         if the first value comes after the second."
1653  */
1654 static gint
sort_peers(gconstpointer a,gconstpointer b)1655 sort_peers(gconstpointer a, gconstpointer b)
1656 {
1657     const st_query_result_t *peer_a = a;
1658     const st_query_result_t *peer_b = b;
1659 
1660     return (peer_b->ndevices - peer_a->ndevices);
1661 }
1662 
1663 /*!
1664  * \internal
1665  * \brief Determine if all the devices in the topology are found or not
1666  */
1667 static gboolean
all_topology_devices_found(remote_fencing_op_t * op)1668 all_topology_devices_found(remote_fencing_op_t * op)
1669 {
1670     GListPtr device = NULL;
1671     GListPtr iter = NULL;
1672     device_properties_t *match = NULL;
1673     stonith_topology_t *tp = NULL;
1674     gboolean skip_target = FALSE;
1675     int i;
1676 
1677     tp = find_topology_for_host(op->target);
1678     if (!tp) {
1679         return FALSE;
1680     }
1681     if (safe_str_eq(op->action, "off") || safe_str_eq(op->action, "reboot")) {
1682         /* Don't count the devices on the target node if we are killing
1683          * the target node. */
1684         skip_target = TRUE;
1685     }
1686 
1687     for (i = 0; i < ST_LEVEL_MAX; i++) {
1688         for (device = tp->levels[i]; device; device = device->next) {
1689             match = NULL;
1690             for (iter = op->query_results; iter && !match; iter = iter->next) {
1691                 st_query_result_t *peer = iter->data;
1692 
1693                 if (skip_target && safe_str_eq(peer->host, op->target)) {
1694                     continue;
1695                 }
1696                 match = find_peer_device(op, peer, device->data);
1697             }
1698             if (!match) {
1699                 return FALSE;
1700             }
1701         }
1702     }
1703 
1704     return TRUE;
1705 }
1706 
1707 /*!
1708  * \internal
1709  * \brief Parse action-specific device properties from XML
1710  *
1711  * \param[in]     msg     XML element containing the properties
1712  * \param[in]     peer    Name of peer that sent XML (for logs)
1713  * \param[in]     device  Device ID (for logs)
1714  * \param[in]     action  Action the properties relate to (for logs)
1715  * \param[in]     phase   Phase the properties relate to
1716  * \param[in,out] props   Device properties to update
1717  */
1718 static void
parse_action_specific(xmlNode * xml,const char * peer,const char * device,const char * action,remote_fencing_op_t * op,enum st_remap_phase phase,device_properties_t * props)1719 parse_action_specific(xmlNode *xml, const char *peer, const char *device,
1720                       const char *action, remote_fencing_op_t *op,
1721                       enum st_remap_phase phase, device_properties_t *props)
1722 {
1723     props->custom_action_timeout[phase] = 0;
1724     crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT,
1725                           &props->custom_action_timeout[phase]);
1726     if (props->custom_action_timeout[phase]) {
1727         crm_trace("Peer %s with device %s returned %s action timeout %d",
1728                   peer, device, action, props->custom_action_timeout[phase]);
1729     }
1730 
1731     props->delay_max[phase] = 0;
1732     crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]);
1733     if (props->delay_max[phase]) {
1734         crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
1735                   peer, device, props->delay_max[phase], action);
1736     }
1737 
1738     props->delay_base[phase] = 0;
1739     crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]);
1740     if (props->delay_base[phase]) {
1741         crm_trace("Peer %s with device %s returned base delay %d for %s",
1742                   peer, device, props->delay_base[phase], action);
1743     }
1744 
1745     /* Handle devices with automatic unfencing */
1746     if (safe_str_eq(action, "on")) {
1747         int required = 0;
1748 
1749         crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required);
1750         if (required) {
1751             crm_trace("Peer %s requires device %s to execute for action %s",
1752                       peer, device, action);
1753             add_required_device(op, device);
1754         }
1755     }
1756 
1757     /* If a reboot is remapped to off+on, it's possible that a node is allowed
1758      * to perform one action but not another.
1759      */
1760     if (crm_is_true(crm_element_value(xml, F_STONITH_ACTION_DISALLOWED))) {
1761         props->disallowed[phase] = TRUE;
1762         crm_trace("Peer %s is disallowed from executing %s for device %s",
1763                   peer, action, device);
1764     }
1765 }
1766 
1767 /*!
1768  * \internal
1769  * \brief Parse one device's properties from peer's XML query reply
1770  *
1771  * \param[in]     xml       XML node containing device properties
1772  * \param[in,out] op        Operation that query and reply relate to
1773  * \param[in,out] result    Peer's results
1774  * \param[in]     device    ID of device being parsed
1775  */
1776 static void
add_device_properties(xmlNode * xml,remote_fencing_op_t * op,st_query_result_t * result,const char * device)1777 add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
1778                       st_query_result_t *result, const char *device)
1779 {
1780     xmlNode *child;
1781     int verified = 0;
1782     device_properties_t *props = calloc(1, sizeof(device_properties_t));
1783 
1784     /* Add a new entry to this result's devices list */
1785     CRM_ASSERT(props != NULL);
1786     g_hash_table_insert(result->devices, strdup(device), props);
1787 
1788     /* Peers with verified (monitored) access will be preferred */
1789     crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
1790     if (verified) {
1791         crm_trace("Peer %s has confirmed a verified device %s",
1792                   result->host, device);
1793         props->verified = TRUE;
1794     }
1795 
1796     /* Parse action-specific device properties */
1797     parse_action_specific(xml, result->host, device, op_requested_action(op),
1798                           op, st_phase_requested, props);
1799     for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
1800         /* Replies for "reboot" operations will include the action-specific
1801          * values for "off" and "on" in child elements, just in case the reboot
1802          * winds up getting remapped.
1803          */
1804         if (safe_str_eq(ID(child), "off")) {
1805             parse_action_specific(child, result->host, device, "off",
1806                                   op, st_phase_off, props);
1807         } else if (safe_str_eq(ID(child), "on")) {
1808             parse_action_specific(child, result->host, device, "on",
1809                                   op, st_phase_on, props);
1810         }
1811     }
1812 }
1813 
1814 /*!
1815  * \internal
1816  * \brief Parse a peer's XML query reply and add it to operation's results
1817  *
1818  * \param[in,out] op        Operation that query and reply relate to
1819  * \param[in]     host      Name of peer that sent this reply
1820  * \param[in]     ndevices  Number of devices expected in reply
1821  * \param[in]     xml       XML node containing device list
1822  *
1823  * \return Newly allocated result structure with parsed reply
1824  */
1825 static st_query_result_t *
add_result(remote_fencing_op_t * op,const char * host,int ndevices,xmlNode * xml)1826 add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml)
1827 {
1828     st_query_result_t *result = calloc(1, sizeof(st_query_result_t));
1829     xmlNode *child;
1830 
1831     CRM_CHECK(result != NULL, return NULL);
1832     result->host = strdup(host);
1833     result->devices = crm_str_table_new();
1834 
1835     /* Each child element describes one capable device available to the peer */
1836     for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
1837         const char *device = ID(child);
1838 
1839         if (device) {
1840             add_device_properties(child, op, result, device);
1841         }
1842     }
1843 
1844     result->ndevices = g_hash_table_size(result->devices);
1845     CRM_CHECK(ndevices == result->ndevices,
1846               crm_err("Query claimed to have %d devices but %d found",
1847                       ndevices, result->ndevices));
1848 
1849     op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
1850     return result;
1851 }
1852 
1853 /*!
1854  * \internal
1855  * \brief Handle a peer's reply to our fencing query
1856  *
1857  * Parse a query result from XML and store it in the remote operation
1858  * table, and when enough replies have been received, issue a fencing request.
1859  *
1860  * \param[in] msg  XML reply received
1861  *
1862  * \return pcmk_ok on success, -errno on error
1863  *
1864  * \note See initiate_remote_stonith_op() for how the XML query was initially
1865  *       formed, and stonith_query() for how the peer formed its XML reply.
1866  */
1867 int
process_remote_stonith_query(xmlNode * msg)1868 process_remote_stonith_query(xmlNode * msg)
1869 {
1870     int ndevices = 0;
1871     gboolean host_is_target = FALSE;
1872     gboolean have_all_replies = FALSE;
1873     const char *id = NULL;
1874     const char *host = NULL;
1875     remote_fencing_op_t *op = NULL;
1876     st_query_result_t *result = NULL;
1877     uint32_t replies_expected;
1878     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
1879 
1880     CRM_CHECK(dev != NULL, return -EPROTO);
1881 
1882     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
1883     CRM_CHECK(id != NULL, return -EPROTO);
1884 
1885     dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR);
1886     CRM_CHECK(dev != NULL, return -EPROTO);
1887     crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices);
1888 
1889     op = g_hash_table_lookup(stonith_remote_op_list, id);
1890     if (op == NULL) {
1891         crm_debug("Received query reply for unknown or expired operation %s",
1892                   id);
1893         return -EOPNOTSUPP;
1894     }
1895 
1896     replies_expected = QB_MIN(op->replies_expected, fencing_active_peers());
1897     if ((++op->replies >= replies_expected) && (op->state == st_query)) {
1898         have_all_replies = TRUE;
1899     }
1900     host = crm_element_value(msg, F_ORIG);
1901     host_is_target = safe_str_eq(host, op->target);
1902 
1903     crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s",
1904              op->replies, replies_expected, host,
1905              op->target, op->action, ndevices, id);
1906     if (ndevices > 0) {
1907         result = add_result(op, host, ndevices, dev);
1908     }
1909 
1910     if (is_set(op->call_options, st_opt_topology)) {
1911         /* If we start the fencing before all the topology results are in,
1912          * it is possible fencing levels will be skipped because of the missing
1913          * query results. */
1914         if (op->state == st_query && all_topology_devices_found(op)) {
1915             /* All the query results are in for the topology, start the fencing ops. */
1916             crm_trace("All topology devices found");
1917             call_remote_stonith(op, result);
1918 
1919         } else if (have_all_replies) {
1920             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
1921                      replies_expected, op->replies);
1922             call_remote_stonith(op, NULL);
1923         }
1924 
1925     } else if (op->state == st_query) {
1926         int nverified = count_peer_devices(op, result, TRUE);
1927 
1928         /* We have a result for a non-topology fencing op that looks promising,
1929          * go ahead and start fencing before query timeout */
1930         if (result && (host_is_target == FALSE) && nverified) {
1931             /* we have a verified device living on a peer that is not the target */
1932             crm_trace("Found %d verified devices", nverified);
1933             call_remote_stonith(op, result);
1934 
1935         } else if (have_all_replies) {
1936             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
1937                      replies_expected, op->replies);
1938             call_remote_stonith(op, NULL);
1939 
1940         } else {
1941             crm_trace("Waiting for more peer results before launching fencing operation");
1942         }
1943 
1944     } else if (result && (op->state == st_done)) {
1945         crm_info("Discarding query result from %s (%d devices): Operation is in state %d",
1946                  result->host, result->ndevices, op->state);
1947     }
1948 
1949     return pcmk_ok;
1950 }
1951 
1952 /*!
1953  * \internal
1954  * \brief Handle a peer's reply to a fencing request
1955  *
1956  * Parse a fencing reply from XML, and either finalize the operation
1957  * or attempt another device as appropriate.
1958  *
1959  * \param[in] msg  XML reply received
1960  *
1961  * \return pcmk_ok on success, -errno on error
1962  */
1963 int
process_remote_stonith_exec(xmlNode * msg)1964 process_remote_stonith_exec(xmlNode * msg)
1965 {
1966     int rc = 0;
1967     const char *id = NULL;
1968     const char *device = NULL;
1969     remote_fencing_op_t *op = NULL;
1970     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
1971 
1972     CRM_CHECK(dev != NULL, return -EPROTO);
1973 
1974     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
1975     CRM_CHECK(id != NULL, return -EPROTO);
1976 
1977     dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
1978     CRM_CHECK(dev != NULL, return -EPROTO);
1979 
1980     crm_element_value_int(dev, F_STONITH_RC, &rc);
1981 
1982     device = crm_element_value(dev, F_STONITH_DEVICE);
1983 
1984     if (stonith_remote_op_list) {
1985         op = g_hash_table_lookup(stonith_remote_op_list, id);
1986     }
1987 
1988     if (op == NULL && rc == pcmk_ok) {
1989         /* Record successful fencing operations */
1990         const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
1991 
1992         op = create_remote_stonith_op(client_id, dev, TRUE);
1993     }
1994 
1995     if (op == NULL) {
1996         /* Could be for an event that began before we started */
1997         /* TODO: Record the op for later querying */
1998         crm_info("Received peer result of unknown or expired operation %s", id);
1999         return -EOPNOTSUPP;
2000     }
2001 
2002     if (op->devices && device && safe_str_neq(op->devices->data, device)) {
2003         crm_err("Received outdated reply for device %s (instead of %s) to "
2004                 "fence (%s) %s. Operation already timed out at peer level.",
2005                 device, op->devices->data, op->action, op->target);
2006         return rc;
2007     }
2008 
2009     if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) {
2010         crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)",
2011                   op->action, op->target, op->client_name, op->id, op->originator,
2012                   pcmk_strerror(rc), rc);
2013         if (rc == pcmk_ok) {
2014             op->state = st_done;
2015         } else {
2016             op->state = st_failed;
2017         }
2018         remote_op_done(op, msg, rc, FALSE);
2019         return pcmk_ok;
2020     } else if (safe_str_neq(op->originator, stonith_our_uname)) {
2021         /* If this isn't a remote level broadcast, and we are not the
2022          * originator of the operation, we should not be receiving this msg. */
2023         crm_err
2024             ("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)",
2025              stonith_our_uname, device, op->target);
2026         return rc;
2027     }
2028 
2029     if (is_set(op->call_options, st_opt_topology)) {
2030         const char *device = crm_element_value(msg, F_STONITH_DEVICE);
2031 
2032         crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s "
2033                    CRM_XS " rc=%d",
2034                    op->action, op->target, device, op->client_name,
2035                    op->originator, pcmk_strerror(rc), rc);
2036 
2037         /* We own the op, and it is complete. broadcast the result to all nodes
2038          * and notify our local clients. */
2039         if (op->state == st_done) {
2040             remote_op_done(op, msg, rc, FALSE);
2041             return rc;
2042         }
2043 
2044         if ((op->phase == 2) && (rc != pcmk_ok)) {
2045             /* A remapped "on" failed, but the node was already turned off
2046              * successfully, so ignore the error and continue.
2047              */
2048             crm_warn("Ignoring %s 'on' failure (exit code %d) targeting %s "
2049                      "after successful 'off'", device, rc, op->target);
2050             rc = pcmk_ok;
2051         }
2052 
2053         if (rc == pcmk_ok) {
2054             /* An operation completed successfully. Try another device if
2055              * necessary, otherwise mark the operation as done. */
2056             advance_op_topology(op, device, msg, rc);
2057             return rc;
2058         } else {
2059             /* This device failed, time to try another topology level. If no other
2060              * levels are available, mark this operation as failed and report results. */
2061             if (stonith_topology_next(op) != pcmk_ok) {
2062                 op->state = st_failed;
2063                 remote_op_done(op, msg, rc, FALSE);
2064                 return rc;
2065             }
2066         }
2067     } else if (rc == pcmk_ok && op->devices == NULL) {
2068         crm_trace("All done for %s", op->target);
2069 
2070         op->state = st_done;
2071         remote_op_done(op, msg, rc, FALSE);
2072         return rc;
2073     } else if (rc == -ETIME && op->devices == NULL) {
2074         /* If the operation timed out don't bother retrying other peers. */
2075         op->state = st_failed;
2076         remote_op_done(op, msg, rc, FALSE);
2077         return rc;
2078     } else {
2079         /* fall-through and attempt other fencing action using another peer */
2080     }
2081 
2082     /* Retry on failure */
2083     crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
2084               op->client_name, rc);
2085     call_remote_stonith(op, NULL);
2086     return rc;
2087 }
2088 
2089 gboolean
stonith_check_fence_tolerance(int tolerance,const char * target,const char * action)2090 stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
2091 {
2092     GHashTableIter iter;
2093     time_t now = time(NULL);
2094     remote_fencing_op_t *rop = NULL;
2095 
2096     crm_trace("tolerance=%d, remote_op_list=%p", tolerance,
2097               stonith_remote_op_list);
2098 
2099     if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
2100         action == NULL) {
2101         return FALSE;
2102     }
2103 
2104     g_hash_table_iter_init(&iter, stonith_remote_op_list);
2105     while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
2106         if (strcmp(rop->target, target) != 0) {
2107             continue;
2108         } else if (rop->state != st_done) {
2109             continue;
2110         /* We don't have to worry about remapped reboots here
2111          * because if state is done, any remapping has been undone
2112          */
2113         } else if (strcmp(rop->action, action) != 0) {
2114             continue;
2115         } else if ((rop->completed + tolerance) < now) {
2116             continue;
2117         }
2118 
2119         crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
2120                    target, action, tolerance, rop->delegate, rop->originator);
2121         return TRUE;
2122     }
2123     return FALSE;
2124 }
2125