1 /*
2 * Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This software is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include <crm_internal.h>
20
21 #include <sys/param.h>
22 #include <stdio.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <sys/stat.h>
26 #include <unistd.h>
27 #include <sys/utsname.h>
28
29 #include <stdlib.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <ctype.h>
33 #include <regex.h>
34
35 #include <crm/crm.h>
36 #include <crm/msg_xml.h>
37 #include <crm/common/ipc.h>
38 #include <crm/common/ipcs.h>
39 #include <crm/cluster/internal.h>
40
41 #include <crm/stonith-ng.h>
42 #include <crm/fencing/internal.h>
43 #include <crm/common/xml.h>
44
45 #include <crm/common/util.h>
46 #include <internal.h>
47
48 #define TIMEOUT_MULTIPLY_FACTOR 1.2
49
50 /* When one stonithd queries its peers for devices able to handle a fencing
51 * request, each peer will reply with a list of such devices available to it.
52 * Each reply will be parsed into a st_query_result_t, with each device's
53 * information kept in a device_properties_t.
54 */
55
56 typedef struct device_properties_s {
57 /* Whether access to this device has been verified */
58 gboolean verified;
59
60 /* The remaining members are indexed by the operation's "phase" */
61
62 /* Whether this device has been executed in each phase */
63 gboolean executed[st_phase_max];
64 /* Whether this device is disallowed from executing in each phase */
65 gboolean disallowed[st_phase_max];
66 /* Action-specific timeout for each phase */
67 int custom_action_timeout[st_phase_max];
68 /* Action-specific maximum random delay for each phase */
69 int delay_max[st_phase_max];
70 /* Action-specific base delay for each phase */
71 int delay_base[st_phase_max];
72 } device_properties_t;
73
74 typedef struct st_query_result_s {
75 /* Name of peer that sent this result */
76 char *host;
77 /* Only try peers for non-topology based operations once */
78 gboolean tried;
79 /* Number of entries in the devices table */
80 int ndevices;
81 /* Devices available to this host that are capable of fencing the target */
82 GHashTable *devices;
83 } st_query_result_t;
84
85 GHashTable *stonith_remote_op_list = NULL;
86 void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer);
87 static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
88 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
89 int call_options);
90
91 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
92 static int get_op_total_timeout(const remote_fencing_op_t *op,
93 const st_query_result_t *chosen_peer);
94
95 static gint
sort_strings(gconstpointer a,gconstpointer b)96 sort_strings(gconstpointer a, gconstpointer b)
97 {
98 return strcmp(a, b);
99 }
100
101 static void
free_remote_query(gpointer data)102 free_remote_query(gpointer data)
103 {
104 if (data) {
105 st_query_result_t *query = data;
106
107 crm_trace("Free'ing query result from %s", query->host);
108 g_hash_table_destroy(query->devices);
109 free(query->host);
110 free(query);
111 }
112 }
113
114 void
free_stonith_remote_op_list()115 free_stonith_remote_op_list()
116 {
117 if (stonith_remote_op_list != NULL) {
118 g_hash_table_destroy(stonith_remote_op_list);
119 stonith_remote_op_list = NULL;
120 }
121 }
122
123 static void
clear_remote_op_timers(remote_fencing_op_t * op)124 clear_remote_op_timers(remote_fencing_op_t * op)
125 {
126 if (op->query_timer) {
127 g_source_remove(op->query_timer);
128 op->query_timer = 0;
129 }
130 if (op->op_timer_total) {
131 g_source_remove(op->op_timer_total);
132 op->op_timer_total = 0;
133 }
134 if (op->op_timer_one) {
135 g_source_remove(op->op_timer_one);
136 op->op_timer_one = 0;
137 }
138 }
139
140 static void
free_remote_op(gpointer data)141 free_remote_op(gpointer data)
142 {
143 remote_fencing_op_t *op = data;
144
145 crm_trace("Free'ing op %s for %s", op->id, op->target);
146 crm_log_xml_debug(op->request, "Destroying");
147
148 clear_remote_op_timers(op);
149
150 free(op->id);
151 free(op->action);
152 free(op->delegate);
153 free(op->target);
154 free(op->client_id);
155 free(op->client_name);
156 free(op->originator);
157
158 if (op->query_results) {
159 g_list_free_full(op->query_results, free_remote_query);
160 }
161 if (op->request) {
162 free_xml(op->request);
163 op->request = NULL;
164 }
165 if (op->devices_list) {
166 g_list_free_full(op->devices_list, free);
167 op->devices_list = NULL;
168 }
169 g_list_free_full(op->automatic_list, free);
170 g_list_free(op->duplicates);
171 free(op);
172 }
173
174 void
init_stonith_remote_op_hash_table(GHashTable ** table)175 init_stonith_remote_op_hash_table(GHashTable **table)
176 {
177 if (*table == NULL) {
178 *table = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op);
179 }
180 }
181
182 struct peer_count_data {
183 const remote_fencing_op_t *op;
184 gboolean verified_only;
185 int count;
186 };
187
188 /*!
189 * \internal
190 * \brief Increment a counter if a device has not been executed yet
191 *
192 * \param[in] key Device ID (ignored)
193 * \param[in] value Device properties
194 * \param[in] user_data Peer count data
195 */
196 static void
count_peer_device(gpointer key,gpointer value,gpointer user_data)197 count_peer_device(gpointer key, gpointer value, gpointer user_data)
198 {
199 device_properties_t *props = (device_properties_t*)value;
200 struct peer_count_data *data = user_data;
201
202 if (!props->executed[data->op->phase]
203 && (!data->verified_only || props->verified)) {
204 ++(data->count);
205 }
206 }
207
208 /*!
209 * \internal
210 * \brief Check the number of available devices in a peer's query results
211 *
212 * \param[in] op Operation that results are for
213 * \param[in] peer Peer to count
214 * \param[in] verified_only Whether to count only verified devices
215 *
216 * \return Number of devices available to peer that were not already executed
217 */
218 static int
count_peer_devices(const remote_fencing_op_t * op,const st_query_result_t * peer,gboolean verified_only)219 count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer,
220 gboolean verified_only)
221 {
222 struct peer_count_data data;
223
224 data.op = op;
225 data.verified_only = verified_only;
226 data.count = 0;
227 if (peer) {
228 g_hash_table_foreach(peer->devices, count_peer_device, &data);
229 }
230 return data.count;
231 }
232
233 /*!
234 * \internal
235 * \brief Search for a device in a query result
236 *
237 * \param[in] op Operation that result is for
238 * \param[in] peer Query result for a peer
239 * \param[in] device Device ID to search for
240 *
241 * \return Device properties if found, NULL otherwise
242 */
243 static device_properties_t *
find_peer_device(const remote_fencing_op_t * op,const st_query_result_t * peer,const char * device)244 find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer,
245 const char *device)
246 {
247 device_properties_t *props = g_hash_table_lookup(peer->devices, device);
248
249 return (props && !props->executed[op->phase]
250 && !props->disallowed[op->phase])? props : NULL;
251 }
252
253 /*!
254 * \internal
255 * \brief Find a device in a peer's device list and mark it as executed
256 *
257 * \param[in] op Operation that peer result is for
258 * \param[in,out] peer Peer with results to search
259 * \param[in] device ID of device to mark as done
260 * \param[in] verified_devices_only Only consider verified devices
261 *
262 * \return TRUE if device was found and marked, FALSE otherwise
263 */
264 static gboolean
grab_peer_device(const remote_fencing_op_t * op,st_query_result_t * peer,const char * device,gboolean verified_devices_only)265 grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer,
266 const char *device, gboolean verified_devices_only)
267 {
268 device_properties_t *props = find_peer_device(op, peer, device);
269
270 if ((props == NULL) || (verified_devices_only && !props->verified)) {
271 return FALSE;
272 }
273
274 crm_trace("Removing %s from %s (%d remaining)",
275 device, peer->host, count_peer_devices(op, peer, FALSE));
276 props->executed[op->phase] = TRUE;
277 return TRUE;
278 }
279
280 /*!
281 * \internal
282 * \brief Return an operation's originally requested action (before any remap)
283 *
284 * \param[in] op Operation to check
285 *
286 * \return Operation's original action
287 */
288 static const char *
op_requested_action(const remote_fencing_op_t * op)289 op_requested_action(const remote_fencing_op_t *op)
290 {
291 return ((op->phase > st_phase_requested)? "reboot" : op->action);
292 }
293
294 /*!
295 * \internal
296 * \brief Remap a "reboot" operation to the "off" phase
297 *
298 * \param[in,out] op Operation to remap
299 */
300 static void
op_phase_off(remote_fencing_op_t * op)301 op_phase_off(remote_fencing_op_t *op)
302 {
303 crm_info("Remapping multiple-device reboot targeting %s (%s) to 'off'",
304 op->target, op->id);
305 op->phase = st_phase_off;
306
307 /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
308 * memory allocation at each phase.
309 */
310 strcpy(op->action, "off");
311 }
312
313 /*!
314 * \internal
315 * \brief Advance a remapped reboot operation to the "on" phase
316 *
317 * \param[in,out] op Operation to remap
318 */
319 static void
op_phase_on(remote_fencing_op_t * op)320 op_phase_on(remote_fencing_op_t *op)
321 {
322 GListPtr iter = NULL;
323
324 crm_info("Remapped 'off' targeting %s complete, "
325 "remapping to 'on' for %s.%.8s",
326 op->target, op->client_name, op->id);
327 op->phase = st_phase_on;
328 strcpy(op->action, "on");
329
330 /* Skip devices with automatic unfencing, because the cluster will handle it
331 * when the node rejoins.
332 */
333 for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
334 GListPtr match = g_list_find_custom(op->devices_list, iter->data,
335 sort_strings);
336
337 if (match) {
338 op->devices_list = g_list_remove(op->devices_list, match->data);
339 }
340 }
341 g_list_free_full(op->automatic_list, free);
342 op->automatic_list = NULL;
343
344 /* Rewind device list pointer */
345 op->devices = op->devices_list;
346 }
347
348 /*!
349 * \internal
350 * \brief Reset a remapped reboot operation
351 *
352 * \param[in,out] op Operation to reset
353 */
354 static void
undo_op_remap(remote_fencing_op_t * op)355 undo_op_remap(remote_fencing_op_t *op)
356 {
357 if (op->phase > 0) {
358 crm_info("Undoing remap of reboot targeting %s for %s.%.8s",
359 op->target, op->client_name, op->id);
360 op->phase = st_phase_requested;
361 strcpy(op->action, "reboot");
362 }
363 }
364
365 static xmlNode *
create_op_done_notify(remote_fencing_op_t * op,int rc)366 create_op_done_notify(remote_fencing_op_t * op, int rc)
367 {
368 xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
369
370 crm_xml_add_int(notify_data, "state", op->state);
371 crm_xml_add_int(notify_data, F_STONITH_RC, rc);
372 crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
373 crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
374 crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
375 crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
376 crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
377 crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
378 crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
379
380 return notify_data;
381 }
382
383 void
stonith_bcast_result_to_peers(remote_fencing_op_t * op,int rc)384 stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc)
385 {
386 static int count = 0;
387 xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
388 xmlNode *notify_data = create_op_done_notify(op, rc);
389
390 count++;
391 crm_trace("Broadcasting result to peers");
392 crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
393 crm_xml_add(bcast, F_SUBTYPE, "broadcast");
394 crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
395 crm_xml_add_int(bcast, "count", count);
396 add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
397 send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
398 free_xml(notify_data);
399 free_xml(bcast);
400
401 return;
402 }
403
404 static void
handle_local_reply_and_notify(remote_fencing_op_t * op,xmlNode * data,int rc)405 handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
406 {
407 xmlNode *notify_data = NULL;
408 xmlNode *reply = NULL;
409
410 if (op->notify_sent == TRUE) {
411 /* nothing to do */
412 return;
413 }
414
415 /* Do notification with a clean data object */
416 notify_data = create_op_done_notify(op, rc);
417 crm_xml_add_int(data, "state", op->state);
418 crm_xml_add(data, F_STONITH_TARGET, op->target);
419 crm_xml_add(data, F_STONITH_OPERATION, op->action);
420
421 reply = stonith_construct_reply(op->request, NULL, data, rc);
422 crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
423
424 /* Send fencing OP reply to local client that initiated fencing */
425 do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
426
427 /* bcast to all local clients that the fencing operation happend */
428 do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
429 do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
430
431 /* mark this op as having notify's already sent */
432 op->notify_sent = TRUE;
433 free_xml(reply);
434 free_xml(notify_data);
435 }
436
437 static void
handle_duplicates(remote_fencing_op_t * op,xmlNode * data,int rc)438 handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
439 {
440 GListPtr iter = NULL;
441
442 for (iter = op->duplicates; iter != NULL; iter = iter->next) {
443 remote_fencing_op_t *other = iter->data;
444
445 if (other->state == st_duplicate) {
446 /* Ie. it hasn't timed out already */
447 other->state = op->state;
448 crm_debug("Peforming duplicate notification for %s@%s.%.8s = %s", other->client_name,
449 other->originator, other->id, pcmk_strerror(rc));
450 remote_op_done(other, data, rc, TRUE);
451
452 } else {
453 crm_err("Skipping duplicate notification for %s@%s - %d", other->client_name,
454 other->originator, other->state);
455 }
456 }
457 }
458
459 /*!
460 * \internal
461 * \brief Finalize a remote operation.
462 *
463 * \description This function has two code paths.
464 *
465 * Path 1. This node is the owner of the operation and needs
466 * to notify the cpg group via a broadcast as to the operation's
467 * results.
468 *
469 * Path 2. The cpg broadcast is received. All nodes notify their local
470 * stonith clients the operation results.
471 *
472 * So, The owner of the operation first notifies the cluster of the result,
473 * and once that cpg notify is received back it notifies all the local clients.
474 *
475 * Nodes that are passive watchers of the operation will receive the
476 * broadcast and only need to notify their local clients the operation finished.
477 *
478 * \param op, The fencing operation to finalize
479 * \param data, The xml msg reply (if present) of the last delegated fencing
480 * operation.
481 * \param dup, Is this operation a duplicate, if so treat it a little differently
482 * making sure the broadcast is not sent out.
483 */
484 static void
remote_op_done(remote_fencing_op_t * op,xmlNode * data,int rc,int dup)485 remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
486 {
487 int level = LOG_ERR;
488 const char *subt = NULL;
489 xmlNode *local_data = NULL;
490
491 op->completed = time(NULL);
492 clear_remote_op_timers(op);
493 undo_op_remap(op);
494
495 if (op->notify_sent == TRUE) {
496 crm_err("Already sent notifications for '%s' targeting %s on %s for "
497 "client %s@%s.%.8s: %s " CRM_XS " rc=%d state=%d",
498 op->action, op->target,
499 (op->delegate? op->delegate : "unknown node"),
500 op->client_name, op->originator, op->id, pcmk_strerror(rc),
501 rc, op->state);
502 goto remote_op_done_cleanup;
503 }
504
505 if (!op->delegate && data && rc != -ENODEV && rc != -EHOSTUNREACH) {
506 xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_TRACE);
507 if(ndata) {
508 op->delegate = crm_element_value_copy(ndata, F_STONITH_DELEGATE);
509 } else {
510 op->delegate = crm_element_value_copy(data, F_ORIG);
511 }
512 }
513
514 if (data == NULL) {
515 data = create_xml_node(NULL, "remote-op");
516 local_data = data;
517 }
518
519 /* Tell everyone the operation is done, we will continue
520 * with doing the local notifications once we receive
521 * the broadcast back. */
522 subt = crm_element_value(data, F_SUBTYPE);
523 if (dup == FALSE && safe_str_neq(subt, "broadcast")) {
524 /* Defer notification until the bcast message arrives */
525 stonith_bcast_result_to_peers(op, rc);
526 goto remote_op_done_cleanup;
527 }
528
529 if (rc == pcmk_ok || dup) {
530 level = LOG_NOTICE;
531 } else if (safe_str_neq(op->originator, stonith_our_uname)) {
532 level = LOG_NOTICE;
533 }
534
535 do_crm_log(level, "Operation '%s'%s%s on %s for %s@%s.%.8s: %s",
536 op->action, (op->target? " targeting " : ""),
537 (op->target? op->target : ""),
538 (op->delegate? op->delegate : "<no-one>"),
539 op->client_name, op->originator, op->id, pcmk_strerror(rc));
540
541 handle_local_reply_and_notify(op, data, rc);
542
543 if (dup == FALSE) {
544 handle_duplicates(op, data, rc);
545 }
546
547 /* Free non-essential parts of the record
548 * Keep the record around so we can query the history
549 */
550 if (op->query_results) {
551 g_list_free_full(op->query_results, free_remote_query);
552 op->query_results = NULL;
553 }
554
555 if (op->request) {
556 free_xml(op->request);
557 op->request = NULL;
558 }
559
560 remote_op_done_cleanup:
561 free_xml(local_data);
562 }
563
564 static gboolean
remote_op_watchdog_done(gpointer userdata)565 remote_op_watchdog_done(gpointer userdata)
566 {
567 remote_fencing_op_t *op = userdata;
568
569 op->op_timer_one = 0;
570
571 crm_notice("Self-fencing (%s) by %s for %s.%8s assumed complete",
572 op->action, op->target, op->client_name, op->id);
573 op->state = st_done;
574 remote_op_done(op, NULL, pcmk_ok, FALSE);
575 return FALSE;
576 }
577
578 static gboolean
remote_op_timeout_one(gpointer userdata)579 remote_op_timeout_one(gpointer userdata)
580 {
581 remote_fencing_op_t *op = userdata;
582
583 op->op_timer_one = 0;
584
585 crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
586 " id=%s", op->action, op->target, op->client_name, op->id);
587 call_remote_stonith(op, NULL);
588 return FALSE;
589 }
590
591 static gboolean
remote_op_timeout(gpointer userdata)592 remote_op_timeout(gpointer userdata)
593 {
594 remote_fencing_op_t *op = userdata;
595
596 op->op_timer_total = 0;
597
598 if (op->state == st_done) {
599 crm_debug("Action '%s' targeting %s for client %s already completed "
600 CRM_XS " id=%s",
601 op->action, op->target, op->client_name, op->id);
602 return FALSE;
603 }
604
605 crm_debug("Action '%s' targeting %s for client %s timed out "
606 CRM_XS " id=%s",
607 op->action, op->target, op->client_name, op->id);
608
609 if (op->phase == st_phase_on) {
610 /* A remapped reboot operation timed out in the "on" phase, but the
611 * "off" phase completed successfully, so quit trying any further
612 * devices, and return success.
613 */
614 remote_op_done(op, NULL, pcmk_ok, FALSE);
615 return FALSE;
616 }
617
618 op->state = st_failed;
619
620 remote_op_done(op, NULL, -ETIME, FALSE);
621
622 return FALSE;
623 }
624
625 static gboolean
remote_op_query_timeout(gpointer data)626 remote_op_query_timeout(gpointer data)
627 {
628 remote_fencing_op_t *op = data;
629
630 op->query_timer = 0;
631 if (op->state == st_done) {
632 crm_debug("Operation %s targeting %s already completed",
633 op->id, op->target);
634 } else if (op->state == st_exec) {
635 crm_debug("Operation %s targeting %s already in progress",
636 op->id, op->target);
637 } else if (op->query_results) {
638 crm_debug("Query %s targeting %s complete (state=%d)",
639 op->id, op->target, op->state);
640 call_remote_stonith(op, NULL);
641 } else {
642 crm_debug("Query %s targeting %s timed out (state=%d)",
643 op->id, op->target, op->state);
644 if (op->op_timer_total) {
645 g_source_remove(op->op_timer_total);
646 op->op_timer_total = 0;
647 }
648 remote_op_timeout(op);
649 }
650
651 return FALSE;
652 }
653
654 static gboolean
topology_is_empty(stonith_topology_t * tp)655 topology_is_empty(stonith_topology_t *tp)
656 {
657 int i;
658
659 if (tp == NULL) {
660 return TRUE;
661 }
662
663 for (i = 0; i < ST_LEVEL_MAX; i++) {
664 if (tp->levels[i] != NULL) {
665 return FALSE;
666 }
667 }
668 return TRUE;
669 }
670
671 /*!
672 * \internal
673 * \brief Add a device to an operation's automatic unfencing list
674 *
675 * \param[in,out] op Operation to modify
676 * \param[in] device Device ID to add
677 */
678 static void
add_required_device(remote_fencing_op_t * op,const char * device)679 add_required_device(remote_fencing_op_t *op, const char *device)
680 {
681 GListPtr match = g_list_find_custom(op->automatic_list, device,
682 sort_strings);
683
684 if (!match) {
685 op->automatic_list = g_list_prepend(op->automatic_list, strdup(device));
686 }
687 }
688
689 /*!
690 * \internal
691 * \brief Remove a device from the automatic unfencing list
692 *
693 * \param[in,out] op Operation to modify
694 * \param[in] device Device ID to remove
695 */
696 static void
remove_required_device(remote_fencing_op_t * op,const char * device)697 remove_required_device(remote_fencing_op_t *op, const char *device)
698 {
699 GListPtr match = g_list_find_custom(op->automatic_list, device,
700 sort_strings);
701
702 if (match) {
703 op->automatic_list = g_list_remove(op->automatic_list, match->data);
704 }
705 }
706
707 /* deep copy the device list */
708 static void
set_op_device_list(remote_fencing_op_t * op,GListPtr devices)709 set_op_device_list(remote_fencing_op_t * op, GListPtr devices)
710 {
711 GListPtr lpc = NULL;
712
713 if (op->devices_list) {
714 g_list_free_full(op->devices_list, free);
715 op->devices_list = NULL;
716 }
717 for (lpc = devices; lpc != NULL; lpc = lpc->next) {
718 op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
719 }
720 op->devices = op->devices_list;
721 }
722
723 /*!
724 * \internal
725 * \brief Check whether a node matches a topology target
726 *
727 * \param[in] tp Topology table entry to check
728 * \param[in] node Name of node to check
729 *
730 * \return TRUE if node matches topology target
731 */
732 static gboolean
topology_matches(const stonith_topology_t * tp,const char * node)733 topology_matches(const stonith_topology_t *tp, const char *node)
734 {
735 regex_t r_patt;
736
737 CRM_CHECK(node && tp && tp->target, return FALSE);
738 switch(tp->kind) {
739 case 2:
740 /* This level targets by attribute, so tp->target is a NAME=VALUE pair
741 * of a permanent attribute applied to targeted nodes. The test below
742 * relies on the locally cached copy of the CIB, so if fencing needs to
743 * be done before the initial CIB is received or after a malformed CIB
744 * is received, then the topology will be unable to be used.
745 */
746 if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
747 crm_notice("Matched %s with %s by attribute", node, tp->target);
748 return TRUE;
749 }
750 break;
751 case 1:
752 /* This level targets by name, so tp->target is a regular expression
753 * matching names of nodes to be targeted.
754 */
755
756 if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
757 crm_info("Bad regex '%s' for fencing level", tp->target);
758 } else {
759 int status = regexec(&r_patt, node, 0, NULL, 0);
760
761 regfree(&r_patt);
762 if (status == 0) {
763 crm_notice("Matched %s with %s by name", node, tp->target);
764 return TRUE;
765 }
766 }
767 break;
768 case 0:
769 crm_trace("Testing %s against %s", node, tp->target);
770 return safe_str_eq(tp->target, node);
771 }
772 crm_trace("No match for %s with %s", node, tp->target);
773 return FALSE;
774 }
775
776 stonith_topology_t *
find_topology_for_host(const char * host)777 find_topology_for_host(const char *host)
778 {
779 GHashTableIter tIter;
780 stonith_topology_t *tp = g_hash_table_lookup(topology, host);
781
782 if(tp != NULL) {
783 crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
784 return tp;
785 }
786
787 g_hash_table_iter_init(&tIter, topology);
788 while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
789 if (topology_matches(tp, host)) {
790 crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
791 return tp;
792 }
793 }
794
795 crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
796 return NULL;
797 }
798
799 /*!
800 * \internal
801 * \brief Set fencing operation's device list to target's next topology level
802 *
803 * \param[in,out] op Remote fencing operation to modify
804 *
805 * \return pcmk_ok if successful, target was not specified (i.e. queries) or
806 * target has no topology, or -EINVAL if no more topology levels to try
807 */
808 static int
stonith_topology_next(remote_fencing_op_t * op)809 stonith_topology_next(remote_fencing_op_t * op)
810 {
811 stonith_topology_t *tp = NULL;
812
813 if (op->target) {
814 /* Queries don't have a target set */
815 tp = find_topology_for_host(op->target);
816 }
817 if (topology_is_empty(tp)) {
818 return pcmk_ok;
819 }
820
821 set_bit(op->call_options, st_opt_topology);
822
823 /* This is a new level, so undo any remapping left over from previous */
824 undo_op_remap(op);
825
826 do {
827 op->level++;
828
829 } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
830
831 if (op->level < ST_LEVEL_MAX) {
832 crm_trace("Attempting fencing level %d targeting %s (%d devices) "
833 "for client %s@%s.%.8s",
834 op->level, op->target, g_list_length(tp->levels[op->level]),
835 op->client_name, op->originator, op->id);
836 set_op_device_list(op, tp->levels[op->level]);
837
838 // The requested delay has been applied for the first fencing level
839 if (op->level > 1 && op->delay > 0) {
840 op->delay = 0;
841 }
842
843 if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) {
844 /* A reboot has been requested for a topology level with multiple
845 * devices. Instead of rebooting the devices sequentially, we will
846 * turn them all off, then turn them all on again. (Think about
847 * switched power outlets for redundant power supplies.)
848 */
849 op_phase_off(op);
850 }
851 return pcmk_ok;
852 }
853
854 crm_notice("All fencing options targeting %s for client %s@%s.%.8s failed",
855 op->target, op->client_name, op->originator, op->id);
856 return -EINVAL;
857 }
858
859 /*!
860 * \brief Check to see if this operation is a duplicate of another in flight
861 * operation. If so merge this operation into the inflight operation, and mark
862 * it as a duplicate.
863 */
864 static void
merge_duplicates(remote_fencing_op_t * op)865 merge_duplicates(remote_fencing_op_t * op)
866 {
867 GHashTableIter iter;
868 remote_fencing_op_t *other = NULL;
869
870 time_t now = time(NULL);
871
872 g_hash_table_iter_init(&iter, stonith_remote_op_list);
873 while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
874 crm_node_t *peer = NULL;
875 const char *other_action = op_requested_action(other);
876
877 if (other->state > st_exec) {
878 /* Must be in-progress */
879 continue;
880 } else if (safe_str_neq(op->target, other->target)) {
881 /* Must be for the same node */
882 continue;
883 } else if (safe_str_neq(op->action, other_action)) {
884 crm_trace("Must be for the same action: %s vs. %s",
885 op->action, other_action);
886 continue;
887 } else if (safe_str_eq(op->client_name, other->client_name)) {
888 crm_trace("Must be for different clients: %s", op->client_name);
889 continue;
890 } else if (safe_str_eq(other->target, other->originator)) {
891 crm_trace("Can't be a suicide operation: %s", other->target);
892 continue;
893 }
894
895 peer = crm_get_peer(0, other->originator);
896 if(fencing_peer_active(peer) == FALSE) {
897 crm_notice("Failing action '%s' targeting %s originating from "
898 "client %s@%s.%.8s: Originator is dead",
899 other->action, other->target, other->client_name, other->originator, other->id);
900 other->state = st_failed;
901 continue;
902
903 } else if(other->total_timeout > 0 && now > (other->total_timeout + other->created)) {
904 crm_info("Action '%s' targeting %s originating from client "
905 "%s@%s.%.8s is too old: %ld vs. %ld + %d",
906 other->action, other->target, other->client_name, other->originator, other->id,
907 now, other->created, other->total_timeout);
908 continue;
909 }
910
911 /* There is another in-flight request to fence the same host
912 * Piggyback on that instead. If it fails, so do we.
913 */
914 other->duplicates = g_list_append(other->duplicates, op);
915 if (other->total_timeout == 0) {
916 crm_trace("Making a best-guess as to the timeout used");
917 other->total_timeout = op->total_timeout =
918 TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
919 }
920 crm_notice("Merging stonith action '%s' targeting %s originating from "
921 "client %s.%.8s with identical request from %s@%s.%.8s (%ds)",
922 op->action, op->target, op->client_name, op->id,
923 other->client_name, other->originator, other->id,
924 other->total_timeout);
925 report_timeout_period(op, other->total_timeout);
926 op->state = st_duplicate;
927 }
928 }
929
fencing_active_peers(void)930 static uint32_t fencing_active_peers(void)
931 {
932 uint32_t count = 0;
933 crm_node_t *entry;
934 GHashTableIter gIter;
935
936 g_hash_table_iter_init(&gIter, crm_peer_cache);
937 while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
938 if(fencing_peer_active(entry)) {
939 count++;
940 }
941 }
942 return count;
943 }
944
945 int
stonith_manual_ack(xmlNode * msg,remote_fencing_op_t * op)946 stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
947 {
948 xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
949
950 op->state = st_done;
951 op->completed = time(NULL);
952 op->delegate = strdup("a human");
953
954 crm_notice("Injecting manual confirmation that %s is safely off/down",
955 crm_element_value(dev, F_STONITH_TARGET));
956
957 remote_op_done(op, msg, pcmk_ok, FALSE);
958
959 /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */
960 return -EINPROGRESS;
961 }
962
963 /*!
964 * \internal
965 * \brief Create a new remote stonith operation
966 *
967 * \param[in] client ID of local stonith client that initiated the operation
968 * \param[in] request The request from the client that started the operation
969 * \param[in] peer TRUE if this operation is owned by another stonith peer
970 * (an operation owned by one peer is stored on all peers,
971 * but only the owner executes it; all nodes get the results
972 * once the owner finishes execution)
973 */
974 void *
create_remote_stonith_op(const char * client,xmlNode * request,gboolean peer)975 create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
976 {
977 remote_fencing_op_t *op = NULL;
978 xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
979 int call_options = 0;
980 const char *operation = NULL;
981
982 init_stonith_remote_op_hash_table(&stonith_remote_op_list);
983
984 /* If this operation is owned by another node, check to make
985 * sure we haven't already created this operation. */
986 if (peer && dev) {
987 const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
988
989 CRM_CHECK(op_id != NULL, return NULL);
990
991 op = g_hash_table_lookup(stonith_remote_op_list, op_id);
992 if (op) {
993 crm_debug("%s already exists", op_id);
994 return op;
995 }
996 }
997
998 op = calloc(1, sizeof(remote_fencing_op_t));
999
1000 crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
1001 // Value -1 means disable any static/random fencing delays
1002 crm_element_value_int(request, F_STONITH_DELAY, &(op->delay));
1003
1004 if (peer && dev) {
1005 op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
1006 } else {
1007 op->id = crm_generate_uuid();
1008 }
1009
1010 g_hash_table_replace(stonith_remote_op_list, op->id, op);
1011 CRM_LOG_ASSERT(g_hash_table_lookup(stonith_remote_op_list, op->id) != NULL);
1012 crm_trace("Created %s", op->id);
1013
1014 op->state = st_query;
1015 op->replies_expected = fencing_active_peers();
1016 op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
1017 op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
1018 op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */
1019 op->created = time(NULL);
1020
1021 if (op->originator == NULL) {
1022 /* Local or relayed request */
1023 op->originator = strdup(stonith_our_uname);
1024 }
1025
1026 CRM_LOG_ASSERT(client != NULL);
1027 if (client) {
1028 op->client_id = strdup(client);
1029 }
1030
1031 /* For a RELAY operation, set fenced on the client. */
1032 operation = crm_element_value(request, F_STONITH_OPERATION);
1033
1034 if (crm_str_eq(operation, STONITH_OP_RELAY, TRUE)) {
1035 op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
1036 (unsigned long) getpid());
1037 } else {
1038 op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
1039 }
1040
1041 op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
1042 op->request = copy_xml(request); /* TODO: Figure out how to avoid this */
1043 crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
1044 op->call_options = call_options;
1045
1046 crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid));
1047
1048 crm_trace("%s new stonith op %s ('%s' targeting %s for client %s)",
1049 (peer && dev)? "Recorded" : "Generated", op->id, op->action,
1050 op->target, op->client_name);
1051
1052 if (op->call_options & st_opt_cs_nodeid) {
1053 int nodeid = crm_atoi(op->target, NULL);
1054 crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY);
1055
1056 /* Ensure the conversion only happens once */
1057 op->call_options &= ~st_opt_cs_nodeid;
1058
1059 if (node && node->uname) {
1060 free(op->target);
1061 op->target = strdup(node->uname);
1062
1063 } else {
1064 crm_warn("Could not expand nodeid '%s' into a host name", op->target);
1065 }
1066 }
1067
1068 /* check to see if this is a duplicate operation of another in-flight operation */
1069 merge_duplicates(op);
1070
1071 if (op->state != st_duplicate) {
1072 /* kick history readers */
1073 do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
1074 }
1075
1076 /* safe to trim as long as that doesn't touch pending ops */
1077 stonith_fence_history_trim();
1078
1079 return op;
1080 }
1081
1082 remote_fencing_op_t *
initiate_remote_stonith_op(crm_client_t * client,xmlNode * request,gboolean manual_ack)1083 initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack)
1084 {
1085 int query_timeout = 0;
1086 xmlNode *query = NULL;
1087 const char *client_id = NULL;
1088 remote_fencing_op_t *op = NULL;
1089 const char *relay_op_id = NULL;
1090 const char *operation = NULL;
1091
1092 if (client) {
1093 client_id = client->id;
1094 } else {
1095 client_id = crm_element_value(request, F_STONITH_CLIENTID);
1096 }
1097
1098 CRM_LOG_ASSERT(client_id != NULL);
1099 op = create_remote_stonith_op(client_id, request, FALSE);
1100 op->owner = TRUE;
1101 if (manual_ack) {
1102 crm_notice("Initiating manual confirmation for %s: %s",
1103 op->target, op->id);
1104 return op;
1105 }
1106
1107 CRM_CHECK(op->action, return NULL);
1108
1109 if (stonith_topology_next(op) != pcmk_ok) {
1110 op->state = st_failed;
1111 }
1112
1113 switch (op->state) {
1114 case st_failed:
1115 crm_warn("Could not request peer fencing (%s) targeting %s "
1116 CRM_XS " id=%s", op->action, op->target, op->id);
1117 remote_op_done(op, NULL, -EINVAL, FALSE);
1118 return op;
1119
1120 case st_duplicate:
1121 crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
1122 CRM_XS " id=%s", op->action, op->target, op->id);
1123 return op;
1124
1125 default:
1126 crm_notice("Requesting peer fencing (%s) targeting %s "
1127 CRM_XS " id=%s state=%d",
1128 op->action, op->target, op->id, op->state);
1129 }
1130
1131 query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
1132 NULL, op->call_options);
1133
1134 crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
1135 crm_xml_add(query, F_STONITH_TARGET, op->target);
1136 crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op));
1137 crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
1138 crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
1139 crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
1140 crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
1141
1142 /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */
1143 operation = crm_element_value(request, F_STONITH_OPERATION);
1144 if (crm_str_eq(operation, STONITH_OP_RELAY, TRUE)) {
1145 relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
1146 if (relay_op_id) {
1147 crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id);
1148 }
1149 }
1150
1151 send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
1152 free_xml(query);
1153
1154 query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
1155 op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
1156
1157 return op;
1158 }
1159
1160 enum find_best_peer_options {
1161 /*! Skip checking the target peer for capable fencing devices */
1162 FIND_PEER_SKIP_TARGET = 0x0001,
1163 /*! Only check the target peer for capable fencing devices */
1164 FIND_PEER_TARGET_ONLY = 0x0002,
1165 /*! Skip peers and devices that are not verified */
1166 FIND_PEER_VERIFIED_ONLY = 0x0004,
1167 };
1168
1169 static st_query_result_t *
find_best_peer(const char * device,remote_fencing_op_t * op,enum find_best_peer_options options)1170 find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
1171 {
1172 GListPtr iter = NULL;
1173 gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
1174
1175 if (!device && is_set(op->call_options, st_opt_topology)) {
1176 return NULL;
1177 }
1178
1179 for (iter = op->query_results; iter != NULL; iter = iter->next) {
1180 st_query_result_t *peer = iter->data;
1181
1182 crm_trace("Testing result from %s targeting %s with %d devices: %d %x",
1183 peer->host, op->target, peer->ndevices, peer->tried, options);
1184 if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) {
1185 continue;
1186 }
1187 if ((options & FIND_PEER_TARGET_ONLY) && safe_str_neq(peer->host, op->target)) {
1188 continue;
1189 }
1190
1191 if (is_set(op->call_options, st_opt_topology)) {
1192
1193 if (grab_peer_device(op, peer, device, verified_devices_only)) {
1194 return peer;
1195 }
1196
1197 } else if ((peer->tried == FALSE)
1198 && count_peer_devices(op, peer, verified_devices_only)) {
1199
1200 /* No topology: Use the current best peer */
1201 crm_trace("Simple fencing");
1202 return peer;
1203 }
1204 }
1205
1206 return NULL;
1207 }
1208
1209 static st_query_result_t *
stonith_choose_peer(remote_fencing_op_t * op)1210 stonith_choose_peer(remote_fencing_op_t * op)
1211 {
1212 const char *device = NULL;
1213 st_query_result_t *peer = NULL;
1214 uint32_t active = fencing_active_peers();
1215
1216 do {
1217 if (op->devices) {
1218 device = op->devices->data;
1219 crm_trace("Checking for someone to fence (%s) %s with %s",
1220 op->action, op->target, device);
1221 } else {
1222 crm_trace("Checking for someone to fence (%s) %s",
1223 op->action, op->target);
1224 }
1225
1226 /* Best choice is a peer other than the target with verified access */
1227 peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
1228 if (peer) {
1229 crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
1230 return peer;
1231 }
1232
1233 if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
1234 crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
1235 return NULL;
1236 }
1237
1238 /* If no other peer has verified access, next best is unverified access */
1239 peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
1240 if (peer) {
1241 crm_trace("Found best unverified peer %s", peer->host);
1242 return peer;
1243 }
1244
1245 /* If no other peer can do it, last option is self-fencing
1246 * (which is never allowed for the "on" phase of a remapped reboot)
1247 */
1248 if (op->phase != st_phase_on) {
1249 peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
1250 if (peer) {
1251 crm_trace("%s will fence itself", peer->host);
1252 return peer;
1253 }
1254 }
1255
1256 /* Try the next fencing level if there is one (unless we're in the "on"
1257 * phase of a remapped "reboot", because we ignore errors in that case)
1258 */
1259 } while ((op->phase != st_phase_on)
1260 && is_set(op->call_options, st_opt_topology)
1261 && stonith_topology_next(op) == pcmk_ok);
1262
1263 crm_notice("Couldn't find anyone to fence (%s) %s with %s",
1264 op->action, op->target, (device? device : "any device"));
1265 return NULL;
1266 }
1267
1268 static int
get_device_timeout(const remote_fencing_op_t * op,const st_query_result_t * peer,const char * device)1269 get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer,
1270 const char *device)
1271 {
1272 device_properties_t *props;
1273
1274 if (!peer || !device) {
1275 return op->base_timeout;
1276 }
1277
1278 props = g_hash_table_lookup(peer->devices, device);
1279 if (!props) {
1280 return op->base_timeout;
1281 }
1282
1283 return (props->custom_action_timeout[op->phase]?
1284 props->custom_action_timeout[op->phase] : op->base_timeout)
1285 + props->delay_max[op->phase];
1286 }
1287
1288 struct timeout_data {
1289 const remote_fencing_op_t *op;
1290 const st_query_result_t *peer;
1291 int total_timeout;
1292 };
1293
1294 /*!
1295 * \internal
1296 * \brief Add timeout to a total if device has not been executed yet
1297 *
1298 * \param[in] key GHashTable key (device ID)
1299 * \param[in] value GHashTable value (device properties)
1300 * \param[in] user_data Timeout data
1301 */
1302 static void
add_device_timeout(gpointer key,gpointer value,gpointer user_data)1303 add_device_timeout(gpointer key, gpointer value, gpointer user_data)
1304 {
1305 const char *device_id = key;
1306 device_properties_t *props = value;
1307 struct timeout_data *timeout = user_data;
1308
1309 if (!props->executed[timeout->op->phase]
1310 && !props->disallowed[timeout->op->phase]) {
1311 timeout->total_timeout += get_device_timeout(timeout->op,
1312 timeout->peer, device_id);
1313 }
1314 }
1315
1316 static int
get_peer_timeout(const remote_fencing_op_t * op,const st_query_result_t * peer)1317 get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer)
1318 {
1319 struct timeout_data timeout;
1320
1321 timeout.op = op;
1322 timeout.peer = peer;
1323 timeout.total_timeout = 0;
1324
1325 g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
1326
1327 return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
1328 }
1329
1330 static int
get_op_total_timeout(const remote_fencing_op_t * op,const st_query_result_t * chosen_peer)1331 get_op_total_timeout(const remote_fencing_op_t *op,
1332 const st_query_result_t *chosen_peer)
1333 {
1334 int total_timeout = 0;
1335 stonith_topology_t *tp = find_topology_for_host(op->target);
1336
1337 if (is_set(op->call_options, st_opt_topology) && tp) {
1338 int i;
1339 GListPtr device_list = NULL;
1340 GListPtr iter = NULL;
1341
1342 /* Yep, this looks scary, nested loops all over the place.
1343 * Here is what is going on.
1344 * Loop1: Iterate through fencing levels.
1345 * Loop2: If a fencing level has devices, loop through each device
1346 * Loop3: For each device in a fencing level, see what peer owns it
1347 * and what that peer has reported the timeout is for the device.
1348 */
1349 for (i = 0; i < ST_LEVEL_MAX; i++) {
1350 if (!tp->levels[i]) {
1351 continue;
1352 }
1353 for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
1354 for (iter = op->query_results; iter != NULL; iter = iter->next) {
1355 const st_query_result_t *peer = iter->data;
1356
1357 if (find_peer_device(op, peer, device_list->data)) {
1358 total_timeout += get_device_timeout(op, peer,
1359 device_list->data);
1360 break;
1361 }
1362 } /* End Loop3: match device with peer that owns device, find device's timeout period */
1363 } /* End Loop2: iterate through devices at a specific level */
1364 } /*End Loop1: iterate through fencing levels */
1365
1366 } else if (chosen_peer) {
1367 total_timeout = get_peer_timeout(op, chosen_peer);
1368 } else {
1369 total_timeout = op->base_timeout;
1370 }
1371
1372 return total_timeout ? total_timeout : op->base_timeout;
1373 }
1374
1375 static void
report_timeout_period(remote_fencing_op_t * op,int op_timeout)1376 report_timeout_period(remote_fencing_op_t * op, int op_timeout)
1377 {
1378 GListPtr iter = NULL;
1379 xmlNode *update = NULL;
1380 const char *client_node = NULL;
1381 const char *client_id = NULL;
1382 const char *call_id = NULL;
1383
1384 if (op->call_options & st_opt_sync_call) {
1385 /* There is no reason to report the timeout for a synchronous call. It
1386 * is impossible to use the reported timeout to do anything when the client
1387 * is blocking for the response. This update is only important for
1388 * async calls that require a callback to report the results in. */
1389 return;
1390 } else if (!op->request) {
1391 return;
1392 }
1393
1394 crm_trace("Reporting timeout for %s.%.8s", op->client_name, op->id);
1395 client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
1396 call_id = crm_element_value(op->request, F_STONITH_CALLID);
1397 client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
1398 if (!client_node || !call_id || !client_id) {
1399 return;
1400 }
1401
1402 if (safe_str_eq(client_node, stonith_our_uname)) {
1403 /* The client is connected to this node, send the update direclty to them */
1404 do_stonith_async_timeout_update(client_id, call_id, op_timeout);
1405 return;
1406 }
1407
1408 /* The client is connected to another node, relay this update to them */
1409 update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
1410 crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
1411 crm_xml_add(update, F_STONITH_CLIENTID, client_id);
1412 crm_xml_add(update, F_STONITH_CALLID, call_id);
1413 crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
1414
1415 send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
1416
1417 free_xml(update);
1418
1419 for (iter = op->duplicates; iter != NULL; iter = iter->next) {
1420 remote_fencing_op_t *dup = iter->data;
1421
1422 crm_trace("Reporting timeout for duplicate %s.%.8s", dup->client_name, dup->id);
1423 report_timeout_period(iter->data, op_timeout);
1424 }
1425 }
1426
1427 /*!
1428 * \internal
1429 * \brief Advance an operation to the next device in its topology
1430 *
1431 * \param[in,out] op Operation to advance
1432 * \param[in] device ID of device just completed
1433 * \param[in] msg XML reply that contained device result (if available)
1434 * \param[in] rc Return code of device's execution
1435 */
1436 static void
advance_op_topology(remote_fencing_op_t * op,const char * device,xmlNode * msg,int rc)1437 advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg,
1438 int rc)
1439 {
1440 /* Advance to the next device at this topology level, if any */
1441 if (op->devices) {
1442 op->devices = op->devices->next;
1443 }
1444
1445 /* Handle automatic unfencing if an "on" action was requested */
1446 if ((op->phase == st_phase_requested) && safe_str_eq(op->action, "on")) {
1447 /* If the device we just executed was required, it's not anymore */
1448 remove_required_device(op, device);
1449
1450 /* If there are no more devices at this topology level, run through any
1451 * remaining devices with automatic unfencing
1452 */
1453 if (op->devices == NULL) {
1454 op->devices = op->automatic_list;
1455 }
1456 }
1457
1458 if ((op->devices == NULL) && (op->phase == st_phase_off)) {
1459 /* We're done with this level and with required devices, but we had
1460 * remapped "reboot" to "off", so start over with "on". If any devices
1461 * need to be turned back on, op->devices will be non-NULL after this.
1462 */
1463 op_phase_on(op);
1464 }
1465
1466 if (op->devices) {
1467 /* Necessary devices remain, so execute the next one */
1468 crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)",
1469 op->target, op->originator, op->client_name, rc);
1470
1471 // The requested delay has been applied for the first device
1472 if (op->delay > 0) {
1473 op->delay = 0;
1474 }
1475
1476 call_remote_stonith(op, NULL);
1477 } else {
1478 /* We're done with all devices and phases, so finalize operation */
1479 crm_trace("Marking complex fencing op targeting %s as complete",
1480 op->target);
1481 op->state = st_done;
1482 remote_op_done(op, msg, rc, FALSE);
1483 }
1484 }
1485
1486 void
call_remote_stonith(remote_fencing_op_t * op,st_query_result_t * peer)1487 call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
1488 {
1489 const char *device = NULL;
1490 int timeout = op->base_timeout;
1491
1492 crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state);
1493 if (peer == NULL && !is_set(op->call_options, st_opt_topology)) {
1494 peer = stonith_choose_peer(op);
1495 }
1496
1497 if (!op->op_timer_total) {
1498 int total_timeout = get_op_total_timeout(op, peer);
1499
1500 op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout;
1501 op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
1502 report_timeout_period(op, op->total_timeout);
1503 crm_info("Total timeout set to %d for peer's fencing targeting %s for %s"
1504 CRM_XS "id=%s",
1505 total_timeout, op->target, op->client_name, op->id);
1506 }
1507
1508 if (is_set(op->call_options, st_opt_topology) && op->devices) {
1509 /* Ignore any peer preference, they might not have the device we need */
1510 /* When using topology, stonith_choose_peer() removes the device from
1511 * further consideration, so be sure to calculate timeout beforehand */
1512 peer = stonith_choose_peer(op);
1513
1514 device = op->devices->data;
1515 timeout = get_device_timeout(op, peer, device);
1516 }
1517
1518 if (peer) {
1519 int timeout_one = 0;
1520 xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
1521
1522 crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
1523 crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
1524 crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
1525 crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
1526 crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
1527 crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
1528 crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
1529 crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
1530 crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);
1531
1532 if (device) {
1533 timeout_one = TIMEOUT_MULTIPLY_FACTOR *
1534 get_device_timeout(op, peer, device);
1535 crm_notice("Requesting that %s perform '%s' action targeting %s "
1536 "using '%s' " CRM_XS " for client %s (%ds)",
1537 peer->host, op->action, op->target, device,
1538 op->client_name, timeout_one);
1539 crm_xml_add(remote_op, F_STONITH_DEVICE, device);
1540 crm_xml_add(remote_op, F_STONITH_MODE, "slave");
1541
1542 } else {
1543 timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
1544 crm_notice("Requesting that %s perform '%s' action targeting %s "
1545 CRM_XS " for client %s (%ds, %lds)",
1546 peer->host, op->action, op->target, op->client_name,
1547 timeout_one, stonith_watchdog_timeout_ms);
1548 crm_xml_add(remote_op, F_STONITH_MODE, "smart");
1549 }
1550
1551 op->state = st_exec;
1552 if (op->op_timer_one) {
1553 g_source_remove(op->op_timer_one);
1554 }
1555
1556 if(stonith_watchdog_timeout_ms > 0 && device && safe_str_eq(device, "watchdog")) {
1557 crm_notice("Waiting %lds for %s to self-fence (%s) for client %s.%.8s",
1558 stonith_watchdog_timeout_ms/1000, op->target, op->action,
1559 op->client_name, op->id);
1560 op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
1561
1562 /* TODO check devices to verify watchdog will be in use */
1563 } else if(stonith_watchdog_timeout_ms > 0
1564 && safe_str_eq(peer->host, op->target)
1565 && safe_str_neq(op->action, "on")) {
1566 crm_notice("Waiting %lds for %s to self-fence (%s) for client %s.%.8s",
1567 stonith_watchdog_timeout_ms/1000, op->target, op->action,
1568 op->client_name, op->id);
1569 op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
1570
1571 } else {
1572 op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
1573 }
1574
1575
1576 send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
1577 peer->tried = TRUE;
1578 free_xml(remote_op);
1579 return;
1580
1581 } else if (op->phase == st_phase_on) {
1582 /* A remapped "on" cannot be executed, but the node was already
1583 * turned off successfully, so ignore the error and continue.
1584 */
1585 crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
1586 "after successful 'off'", device, op->target);
1587 advance_op_topology(op, device, NULL, pcmk_ok);
1588 return;
1589
1590 } else if (op->owner == FALSE) {
1591 crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
1592 op->action, op->target, op->client_name);
1593
1594 } else if (op->query_timer == 0) {
1595 /* We've exhausted all available peers */
1596 crm_info("No remaining peers capable of fencing (%s) %s for client %s "
1597 CRM_XS " state=%d",
1598 op->action, op->target, op->client_name, op->state);
1599 CRM_LOG_ASSERT(op->state < st_done);
1600 remote_op_timeout(op);
1601
1602 } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
1603 int rc = -EHOSTUNREACH;
1604
1605 /* if the operation never left the query state,
1606 * but we have all the expected replies, then no devices
1607 * are available to execute the fencing operation. */
1608
1609 if(stonith_watchdog_timeout_ms && (device == NULL || safe_str_eq(device, "watchdog"))) {
1610 crm_notice("Waiting %lds for %s to self-fence (%s) for client %s.%.8s",
1611 stonith_watchdog_timeout_ms/1000, op->target,
1612 op->action, op->client_name, op->id);
1613
1614 op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
1615 return;
1616 }
1617
1618 if (op->state == st_query) {
1619 crm_info("No peers (out of %d) have devices capable of fencing "
1620 "(%s) %s for client %s " CRM_XS " state=%d",
1621 op->replies, op->action, op->target, op->client_name,
1622 op->state);
1623
1624 rc = -ENODEV;
1625 } else {
1626 crm_info("No peers (out of %d) are capable of fencing (%s) %s "
1627 "for client %s " CRM_XS " state=%d",
1628 op->replies, op->action, op->target, op->client_name,
1629 op->state);
1630 }
1631
1632 op->state = st_failed;
1633 remote_op_done(op, NULL, rc, FALSE);
1634
1635 } else {
1636 crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
1637 "for client %s%.8s",
1638 op->action, op->target, (device? " with " : ""),
1639 (device? device : ""), op->client_name, op->id);
1640 }
1641 }
1642
1643 /*!
1644 * \internal
1645 * \brief Comparison function for sorting query results
1646 *
1647 * \param[in] a GList item to compare
1648 * \param[in] b GList item to compare
1649 *
1650 * \return Per the glib documentation, "a negative integer if the first value
1651 * comes before the second, 0 if they are equal, or a positive integer
1652 * if the first value comes after the second."
1653 */
1654 static gint
sort_peers(gconstpointer a,gconstpointer b)1655 sort_peers(gconstpointer a, gconstpointer b)
1656 {
1657 const st_query_result_t *peer_a = a;
1658 const st_query_result_t *peer_b = b;
1659
1660 return (peer_b->ndevices - peer_a->ndevices);
1661 }
1662
1663 /*!
1664 * \internal
1665 * \brief Determine if all the devices in the topology are found or not
1666 */
1667 static gboolean
all_topology_devices_found(remote_fencing_op_t * op)1668 all_topology_devices_found(remote_fencing_op_t * op)
1669 {
1670 GListPtr device = NULL;
1671 GListPtr iter = NULL;
1672 device_properties_t *match = NULL;
1673 stonith_topology_t *tp = NULL;
1674 gboolean skip_target = FALSE;
1675 int i;
1676
1677 tp = find_topology_for_host(op->target);
1678 if (!tp) {
1679 return FALSE;
1680 }
1681 if (safe_str_eq(op->action, "off") || safe_str_eq(op->action, "reboot")) {
1682 /* Don't count the devices on the target node if we are killing
1683 * the target node. */
1684 skip_target = TRUE;
1685 }
1686
1687 for (i = 0; i < ST_LEVEL_MAX; i++) {
1688 for (device = tp->levels[i]; device; device = device->next) {
1689 match = NULL;
1690 for (iter = op->query_results; iter && !match; iter = iter->next) {
1691 st_query_result_t *peer = iter->data;
1692
1693 if (skip_target && safe_str_eq(peer->host, op->target)) {
1694 continue;
1695 }
1696 match = find_peer_device(op, peer, device->data);
1697 }
1698 if (!match) {
1699 return FALSE;
1700 }
1701 }
1702 }
1703
1704 return TRUE;
1705 }
1706
1707 /*!
1708 * \internal
1709 * \brief Parse action-specific device properties from XML
1710 *
1711 * \param[in] msg XML element containing the properties
1712 * \param[in] peer Name of peer that sent XML (for logs)
1713 * \param[in] device Device ID (for logs)
1714 * \param[in] action Action the properties relate to (for logs)
1715 * \param[in] phase Phase the properties relate to
1716 * \param[in,out] props Device properties to update
1717 */
1718 static void
parse_action_specific(xmlNode * xml,const char * peer,const char * device,const char * action,remote_fencing_op_t * op,enum st_remap_phase phase,device_properties_t * props)1719 parse_action_specific(xmlNode *xml, const char *peer, const char *device,
1720 const char *action, remote_fencing_op_t *op,
1721 enum st_remap_phase phase, device_properties_t *props)
1722 {
1723 props->custom_action_timeout[phase] = 0;
1724 crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT,
1725 &props->custom_action_timeout[phase]);
1726 if (props->custom_action_timeout[phase]) {
1727 crm_trace("Peer %s with device %s returned %s action timeout %d",
1728 peer, device, action, props->custom_action_timeout[phase]);
1729 }
1730
1731 props->delay_max[phase] = 0;
1732 crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]);
1733 if (props->delay_max[phase]) {
1734 crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
1735 peer, device, props->delay_max[phase], action);
1736 }
1737
1738 props->delay_base[phase] = 0;
1739 crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]);
1740 if (props->delay_base[phase]) {
1741 crm_trace("Peer %s with device %s returned base delay %d for %s",
1742 peer, device, props->delay_base[phase], action);
1743 }
1744
1745 /* Handle devices with automatic unfencing */
1746 if (safe_str_eq(action, "on")) {
1747 int required = 0;
1748
1749 crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required);
1750 if (required) {
1751 crm_trace("Peer %s requires device %s to execute for action %s",
1752 peer, device, action);
1753 add_required_device(op, device);
1754 }
1755 }
1756
1757 /* If a reboot is remapped to off+on, it's possible that a node is allowed
1758 * to perform one action but not another.
1759 */
1760 if (crm_is_true(crm_element_value(xml, F_STONITH_ACTION_DISALLOWED))) {
1761 props->disallowed[phase] = TRUE;
1762 crm_trace("Peer %s is disallowed from executing %s for device %s",
1763 peer, action, device);
1764 }
1765 }
1766
1767 /*!
1768 * \internal
1769 * \brief Parse one device's properties from peer's XML query reply
1770 *
1771 * \param[in] xml XML node containing device properties
1772 * \param[in,out] op Operation that query and reply relate to
1773 * \param[in,out] result Peer's results
1774 * \param[in] device ID of device being parsed
1775 */
1776 static void
add_device_properties(xmlNode * xml,remote_fencing_op_t * op,st_query_result_t * result,const char * device)1777 add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
1778 st_query_result_t *result, const char *device)
1779 {
1780 xmlNode *child;
1781 int verified = 0;
1782 device_properties_t *props = calloc(1, sizeof(device_properties_t));
1783
1784 /* Add a new entry to this result's devices list */
1785 CRM_ASSERT(props != NULL);
1786 g_hash_table_insert(result->devices, strdup(device), props);
1787
1788 /* Peers with verified (monitored) access will be preferred */
1789 crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
1790 if (verified) {
1791 crm_trace("Peer %s has confirmed a verified device %s",
1792 result->host, device);
1793 props->verified = TRUE;
1794 }
1795
1796 /* Parse action-specific device properties */
1797 parse_action_specific(xml, result->host, device, op_requested_action(op),
1798 op, st_phase_requested, props);
1799 for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
1800 /* Replies for "reboot" operations will include the action-specific
1801 * values for "off" and "on" in child elements, just in case the reboot
1802 * winds up getting remapped.
1803 */
1804 if (safe_str_eq(ID(child), "off")) {
1805 parse_action_specific(child, result->host, device, "off",
1806 op, st_phase_off, props);
1807 } else if (safe_str_eq(ID(child), "on")) {
1808 parse_action_specific(child, result->host, device, "on",
1809 op, st_phase_on, props);
1810 }
1811 }
1812 }
1813
1814 /*!
1815 * \internal
1816 * \brief Parse a peer's XML query reply and add it to operation's results
1817 *
1818 * \param[in,out] op Operation that query and reply relate to
1819 * \param[in] host Name of peer that sent this reply
1820 * \param[in] ndevices Number of devices expected in reply
1821 * \param[in] xml XML node containing device list
1822 *
1823 * \return Newly allocated result structure with parsed reply
1824 */
1825 static st_query_result_t *
add_result(remote_fencing_op_t * op,const char * host,int ndevices,xmlNode * xml)1826 add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml)
1827 {
1828 st_query_result_t *result = calloc(1, sizeof(st_query_result_t));
1829 xmlNode *child;
1830
1831 CRM_CHECK(result != NULL, return NULL);
1832 result->host = strdup(host);
1833 result->devices = crm_str_table_new();
1834
1835 /* Each child element describes one capable device available to the peer */
1836 for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
1837 const char *device = ID(child);
1838
1839 if (device) {
1840 add_device_properties(child, op, result, device);
1841 }
1842 }
1843
1844 result->ndevices = g_hash_table_size(result->devices);
1845 CRM_CHECK(ndevices == result->ndevices,
1846 crm_err("Query claimed to have %d devices but %d found",
1847 ndevices, result->ndevices));
1848
1849 op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
1850 return result;
1851 }
1852
1853 /*!
1854 * \internal
1855 * \brief Handle a peer's reply to our fencing query
1856 *
1857 * Parse a query result from XML and store it in the remote operation
1858 * table, and when enough replies have been received, issue a fencing request.
1859 *
1860 * \param[in] msg XML reply received
1861 *
1862 * \return pcmk_ok on success, -errno on error
1863 *
1864 * \note See initiate_remote_stonith_op() for how the XML query was initially
1865 * formed, and stonith_query() for how the peer formed its XML reply.
1866 */
1867 int
process_remote_stonith_query(xmlNode * msg)1868 process_remote_stonith_query(xmlNode * msg)
1869 {
1870 int ndevices = 0;
1871 gboolean host_is_target = FALSE;
1872 gboolean have_all_replies = FALSE;
1873 const char *id = NULL;
1874 const char *host = NULL;
1875 remote_fencing_op_t *op = NULL;
1876 st_query_result_t *result = NULL;
1877 uint32_t replies_expected;
1878 xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
1879
1880 CRM_CHECK(dev != NULL, return -EPROTO);
1881
1882 id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
1883 CRM_CHECK(id != NULL, return -EPROTO);
1884
1885 dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR);
1886 CRM_CHECK(dev != NULL, return -EPROTO);
1887 crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices);
1888
1889 op = g_hash_table_lookup(stonith_remote_op_list, id);
1890 if (op == NULL) {
1891 crm_debug("Received query reply for unknown or expired operation %s",
1892 id);
1893 return -EOPNOTSUPP;
1894 }
1895
1896 replies_expected = QB_MIN(op->replies_expected, fencing_active_peers());
1897 if ((++op->replies >= replies_expected) && (op->state == st_query)) {
1898 have_all_replies = TRUE;
1899 }
1900 host = crm_element_value(msg, F_ORIG);
1901 host_is_target = safe_str_eq(host, op->target);
1902
1903 crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s",
1904 op->replies, replies_expected, host,
1905 op->target, op->action, ndevices, id);
1906 if (ndevices > 0) {
1907 result = add_result(op, host, ndevices, dev);
1908 }
1909
1910 if (is_set(op->call_options, st_opt_topology)) {
1911 /* If we start the fencing before all the topology results are in,
1912 * it is possible fencing levels will be skipped because of the missing
1913 * query results. */
1914 if (op->state == st_query && all_topology_devices_found(op)) {
1915 /* All the query results are in for the topology, start the fencing ops. */
1916 crm_trace("All topology devices found");
1917 call_remote_stonith(op, result);
1918
1919 } else if (have_all_replies) {
1920 crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
1921 replies_expected, op->replies);
1922 call_remote_stonith(op, NULL);
1923 }
1924
1925 } else if (op->state == st_query) {
1926 int nverified = count_peer_devices(op, result, TRUE);
1927
1928 /* We have a result for a non-topology fencing op that looks promising,
1929 * go ahead and start fencing before query timeout */
1930 if (result && (host_is_target == FALSE) && nverified) {
1931 /* we have a verified device living on a peer that is not the target */
1932 crm_trace("Found %d verified devices", nverified);
1933 call_remote_stonith(op, result);
1934
1935 } else if (have_all_replies) {
1936 crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
1937 replies_expected, op->replies);
1938 call_remote_stonith(op, NULL);
1939
1940 } else {
1941 crm_trace("Waiting for more peer results before launching fencing operation");
1942 }
1943
1944 } else if (result && (op->state == st_done)) {
1945 crm_info("Discarding query result from %s (%d devices): Operation is in state %d",
1946 result->host, result->ndevices, op->state);
1947 }
1948
1949 return pcmk_ok;
1950 }
1951
1952 /*!
1953 * \internal
1954 * \brief Handle a peer's reply to a fencing request
1955 *
1956 * Parse a fencing reply from XML, and either finalize the operation
1957 * or attempt another device as appropriate.
1958 *
1959 * \param[in] msg XML reply received
1960 *
1961 * \return pcmk_ok on success, -errno on error
1962 */
1963 int
process_remote_stonith_exec(xmlNode * msg)1964 process_remote_stonith_exec(xmlNode * msg)
1965 {
1966 int rc = 0;
1967 const char *id = NULL;
1968 const char *device = NULL;
1969 remote_fencing_op_t *op = NULL;
1970 xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
1971
1972 CRM_CHECK(dev != NULL, return -EPROTO);
1973
1974 id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
1975 CRM_CHECK(id != NULL, return -EPROTO);
1976
1977 dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
1978 CRM_CHECK(dev != NULL, return -EPROTO);
1979
1980 crm_element_value_int(dev, F_STONITH_RC, &rc);
1981
1982 device = crm_element_value(dev, F_STONITH_DEVICE);
1983
1984 if (stonith_remote_op_list) {
1985 op = g_hash_table_lookup(stonith_remote_op_list, id);
1986 }
1987
1988 if (op == NULL && rc == pcmk_ok) {
1989 /* Record successful fencing operations */
1990 const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
1991
1992 op = create_remote_stonith_op(client_id, dev, TRUE);
1993 }
1994
1995 if (op == NULL) {
1996 /* Could be for an event that began before we started */
1997 /* TODO: Record the op for later querying */
1998 crm_info("Received peer result of unknown or expired operation %s", id);
1999 return -EOPNOTSUPP;
2000 }
2001
2002 if (op->devices && device && safe_str_neq(op->devices->data, device)) {
2003 crm_err("Received outdated reply for device %s (instead of %s) to "
2004 "fence (%s) %s. Operation already timed out at peer level.",
2005 device, op->devices->data, op->action, op->target);
2006 return rc;
2007 }
2008
2009 if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) {
2010 crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)",
2011 op->action, op->target, op->client_name, op->id, op->originator,
2012 pcmk_strerror(rc), rc);
2013 if (rc == pcmk_ok) {
2014 op->state = st_done;
2015 } else {
2016 op->state = st_failed;
2017 }
2018 remote_op_done(op, msg, rc, FALSE);
2019 return pcmk_ok;
2020 } else if (safe_str_neq(op->originator, stonith_our_uname)) {
2021 /* If this isn't a remote level broadcast, and we are not the
2022 * originator of the operation, we should not be receiving this msg. */
2023 crm_err
2024 ("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)",
2025 stonith_our_uname, device, op->target);
2026 return rc;
2027 }
2028
2029 if (is_set(op->call_options, st_opt_topology)) {
2030 const char *device = crm_element_value(msg, F_STONITH_DEVICE);
2031
2032 crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s "
2033 CRM_XS " rc=%d",
2034 op->action, op->target, device, op->client_name,
2035 op->originator, pcmk_strerror(rc), rc);
2036
2037 /* We own the op, and it is complete. broadcast the result to all nodes
2038 * and notify our local clients. */
2039 if (op->state == st_done) {
2040 remote_op_done(op, msg, rc, FALSE);
2041 return rc;
2042 }
2043
2044 if ((op->phase == 2) && (rc != pcmk_ok)) {
2045 /* A remapped "on" failed, but the node was already turned off
2046 * successfully, so ignore the error and continue.
2047 */
2048 crm_warn("Ignoring %s 'on' failure (exit code %d) targeting %s "
2049 "after successful 'off'", device, rc, op->target);
2050 rc = pcmk_ok;
2051 }
2052
2053 if (rc == pcmk_ok) {
2054 /* An operation completed successfully. Try another device if
2055 * necessary, otherwise mark the operation as done. */
2056 advance_op_topology(op, device, msg, rc);
2057 return rc;
2058 } else {
2059 /* This device failed, time to try another topology level. If no other
2060 * levels are available, mark this operation as failed and report results. */
2061 if (stonith_topology_next(op) != pcmk_ok) {
2062 op->state = st_failed;
2063 remote_op_done(op, msg, rc, FALSE);
2064 return rc;
2065 }
2066 }
2067 } else if (rc == pcmk_ok && op->devices == NULL) {
2068 crm_trace("All done for %s", op->target);
2069
2070 op->state = st_done;
2071 remote_op_done(op, msg, rc, FALSE);
2072 return rc;
2073 } else if (rc == -ETIME && op->devices == NULL) {
2074 /* If the operation timed out don't bother retrying other peers. */
2075 op->state = st_failed;
2076 remote_op_done(op, msg, rc, FALSE);
2077 return rc;
2078 } else {
2079 /* fall-through and attempt other fencing action using another peer */
2080 }
2081
2082 /* Retry on failure */
2083 crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
2084 op->client_name, rc);
2085 call_remote_stonith(op, NULL);
2086 return rc;
2087 }
2088
2089 gboolean
stonith_check_fence_tolerance(int tolerance,const char * target,const char * action)2090 stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
2091 {
2092 GHashTableIter iter;
2093 time_t now = time(NULL);
2094 remote_fencing_op_t *rop = NULL;
2095
2096 crm_trace("tolerance=%d, remote_op_list=%p", tolerance,
2097 stonith_remote_op_list);
2098
2099 if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
2100 action == NULL) {
2101 return FALSE;
2102 }
2103
2104 g_hash_table_iter_init(&iter, stonith_remote_op_list);
2105 while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
2106 if (strcmp(rop->target, target) != 0) {
2107 continue;
2108 } else if (rop->state != st_done) {
2109 continue;
2110 /* We don't have to worry about remapped reboots here
2111 * because if state is done, any remapping has been undone
2112 */
2113 } else if (strcmp(rop->action, action) != 0) {
2114 continue;
2115 } else if ((rop->completed + tolerance) < now) {
2116 continue;
2117 }
2118
2119 crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
2120 target, action, tolerance, rop->delegate, rop->originator);
2121 return TRUE;
2122 }
2123 return FALSE;
2124 }
2125