1 /*
2  * Copyright 2004-2019 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This source code is licensed under the GNU General Public License version 2
5  * or later (GPLv2+) WITHOUT ANY WARRANTY.
6  */
7 
8 #include <crm_internal.h>
9 
10 #include <sys/param.h>
11 
12 #include <crm/crm.h>
13 #include <crm/cib.h>
14 #include <crm/msg_xml.h>
15 #include <crm/common/xml.h>
16 
17 #include <glib.h>
18 
19 #include <crm/pengine/status.h>
20 #include <pengine.h>
21 #include <allocate.h>
22 #include <utils.h>
23 
24 CRM_TRACE_INIT_DATA(pe_allocate);
25 
26 void set_alloc_actions(pe_working_set_t * data_set);
27 extern void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set);
28 extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set);
29 static void apply_remote_node_ordering(pe_working_set_t *data_set);
30 static enum remote_connection_state get_remote_node_state(pe_node_t *node);
31 
32 enum remote_connection_state {
33     remote_state_unknown = 0,
34     remote_state_alive = 1,
35     remote_state_resting = 2,
36     remote_state_failed = 3,
37     remote_state_stopped = 4
38 };
39 
40 static const char *
state2text(enum remote_connection_state state)41 state2text(enum remote_connection_state state)
42 {
43     switch (state) {
44         case remote_state_unknown:
45             return "unknown";
46         case remote_state_alive:
47             return "alive";
48         case remote_state_resting:
49             return "resting";
50         case remote_state_failed:
51             return "failed";
52         case remote_state_stopped:
53             return "stopped";
54     }
55 
56     return "impossible";
57 }
58 
59 resource_alloc_functions_t resource_class_alloc_functions[] = {
60     {
61      pcmk__native_merge_weights,
62      pcmk__native_allocate,
63      native_create_actions,
64      native_create_probe,
65      native_internal_constraints,
66      native_rsc_colocation_lh,
67      native_rsc_colocation_rh,
68      native_rsc_location,
69      native_action_flags,
70      native_update_actions,
71      native_expand,
72      native_append_meta,
73      },
74     {
75      pcmk__group_merge_weights,
76      pcmk__group_allocate,
77      group_create_actions,
78      native_create_probe,
79      group_internal_constraints,
80      group_rsc_colocation_lh,
81      group_rsc_colocation_rh,
82      group_rsc_location,
83      group_action_flags,
84      group_update_actions,
85      group_expand,
86      group_append_meta,
87      },
88     {
89      pcmk__native_merge_weights,
90      pcmk__clone_allocate,
91      clone_create_actions,
92      clone_create_probe,
93      clone_internal_constraints,
94      clone_rsc_colocation_lh,
95      clone_rsc_colocation_rh,
96      clone_rsc_location,
97      clone_action_flags,
98      container_update_actions,
99      clone_expand,
100      clone_append_meta,
101      },
102     {
103      master_merge_weights,
104      pcmk__set_instance_roles,
105      master_create_actions,
106      clone_create_probe,
107      master_internal_constraints,
108      clone_rsc_colocation_lh,
109      master_rsc_colocation_rh,
110      clone_rsc_location,
111      clone_action_flags,
112      container_update_actions,
113      clone_expand,
114      master_append_meta,
115      },
116     {
117      pcmk__native_merge_weights,
118      pcmk__bundle_allocate,
119      container_create_actions,
120      container_create_probe,
121      container_internal_constraints,
122      container_rsc_colocation_lh,
123      container_rsc_colocation_rh,
124      container_rsc_location,
125      container_action_flags,
126      container_update_actions,
127      container_expand,
128      container_append_meta,
129      }
130 };
131 
132 gboolean
update_action_flags(action_t * action,enum pe_action_flags flags,const char * source,int line)133 update_action_flags(action_t * action, enum pe_action_flags flags, const char *source, int line)
134 {
135     static unsigned long calls = 0;
136     gboolean changed = FALSE;
137     gboolean clear = is_set(flags, pe_action_clear);
138     enum pe_action_flags last = action->flags;
139 
140     if (clear) {
141         action->flags = crm_clear_bit(source, line, action->uuid, action->flags, flags);
142     } else {
143         action->flags = crm_set_bit(source, line, action->uuid, action->flags, flags);
144     }
145 
146     if (last != action->flags) {
147         calls++;
148         changed = TRUE;
149         /* Useful for tracking down _who_ changed a specific flag */
150         /* CRM_ASSERT(calls != 534); */
151         clear_bit(flags, pe_action_clear);
152         crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu, %s)",
153                   action->uuid, action->node ? action->node->details->uname : "[none]",
154                   clear ? "un-" : "", flags, last, action->flags, calls, source);
155     }
156 
157     return changed;
158 }
159 
160 static gboolean
check_rsc_parameters(resource_t * rsc,node_t * node,xmlNode * rsc_entry,gboolean active_here,pe_working_set_t * data_set)161 check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry,
162                      gboolean active_here, pe_working_set_t * data_set)
163 {
164     int attr_lpc = 0;
165     gboolean force_restart = FALSE;
166     gboolean delete_resource = FALSE;
167     gboolean changed = FALSE;
168 
169     const char *value = NULL;
170     const char *old_value = NULL;
171 
172     const char *attr_list[] = {
173         XML_ATTR_TYPE,
174         XML_AGENT_ATTR_CLASS,
175         XML_AGENT_ATTR_PROVIDER
176     };
177 
178     for (; attr_lpc < DIMOF(attr_list); attr_lpc++) {
179         value = crm_element_value(rsc->xml, attr_list[attr_lpc]);
180         old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]);
181         if (value == old_value  /* i.e. NULL */
182             || crm_str_eq(value, old_value, TRUE)) {
183             continue;
184         }
185 
186         changed = TRUE;
187         trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set);
188         if (active_here) {
189             force_restart = TRUE;
190             crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s",
191                        rsc->id, node->details->uname, attr_list[attr_lpc],
192                        crm_str(old_value), crm_str(value));
193         }
194     }
195     if (force_restart) {
196         /* make sure the restart happens */
197         stop_action(rsc, node, FALSE);
198         set_bit(rsc->flags, pe_rsc_start_pending);
199         delete_resource = TRUE;
200 
201     } else if (changed) {
202         delete_resource = TRUE;
203     }
204     return delete_resource;
205 }
206 
207 static void
CancelXmlOp(resource_t * rsc,xmlNode * xml_op,node_t * active_node,const char * reason,pe_working_set_t * data_set)208 CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node,
209             const char *reason, pe_working_set_t * data_set)
210 {
211     int interval = 0;
212     action_t *cancel = NULL;
213 
214     char *key = NULL;
215     const char *task = NULL;
216     const char *call_id = NULL;
217     const char *interval_s = NULL;
218 
219     CRM_CHECK(xml_op != NULL, return);
220     CRM_CHECK(active_node != NULL, return);
221 
222     task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
223     call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
224     interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
225 
226     interval = crm_parse_int(interval_s, "0");
227 
228     /* we need to reconstruct the key because of the way we used to construct resource IDs */
229     key = generate_op_key(rsc->id, task, interval);
230 
231     crm_info("Action %s on %s will be stopped: %s",
232              key, active_node->details->uname, reason ? reason : "unknown");
233 
234     /* TODO: This looks highly dangerous if we ever try to schedule 'key' too */
235     cancel = custom_action(rsc, strdup(key), RSC_CANCEL, active_node, FALSE, TRUE, data_set);
236 
237     free(cancel->task);
238     free(cancel->cancel_task);
239     cancel->task = strdup(RSC_CANCEL);
240     cancel->cancel_task = strdup(task);
241 
242     add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task);
243     add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
244     add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s);
245 
246     custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set);
247     free(key);
248     key = NULL;
249 }
250 
251 static gboolean
check_action_definition(resource_t * rsc,node_t * active_node,xmlNode * xml_op,pe_working_set_t * data_set)252 check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op,
253                         pe_working_set_t * data_set)
254 {
255     char *key = NULL;
256     int interval = 0;
257     const char *interval_s = NULL;
258     const op_digest_cache_t *digest_data = NULL;
259     gboolean did_change = FALSE;
260 
261     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
262     const char *digest_secure = NULL;
263 
264     CRM_CHECK(active_node != NULL, return FALSE);
265 
266     interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
267     interval = crm_parse_int(interval_s, "0");
268 
269     if (interval > 0) {
270         xmlNode *op_match = NULL;
271 
272         /* we need to reconstruct the key because of the way we used to construct resource IDs */
273         key = generate_op_key(rsc->id, task, interval);
274 
275         pe_rsc_trace(rsc, "Checking parameters for %s", key);
276         op_match = find_rsc_op_entry(rsc, key);
277 
278         if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) {
279             CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set);
280             free(key);
281             return TRUE;
282 
283         } else if (op_match == NULL) {
284             pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname);
285             free(key);
286             return TRUE;
287         }
288         free(key);
289         key = NULL;
290     }
291 
292     crm_trace("Testing %s_%s_%d on %s",
293               rsc->id, task, interval, active_node->details->uname);
294     if (interval == 0 && safe_str_eq(task, RSC_STATUS)) {
295         /* Reload based on the start action not a probe */
296         task = RSC_START;
297 
298     } else if (interval == 0 && safe_str_eq(task, RSC_MIGRATED)) {
299         /* Reload based on the start action not a migrate */
300         task = RSC_START;
301     } else if (interval == 0 && safe_str_eq(task, RSC_PROMOTE)) {
302         /* Reload based on the start action not a promote */
303         task = RSC_START;
304     }
305 
306     digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set);
307 
308     if(is_set(data_set->flags, pe_flag_sanitized)) {
309         digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
310     }
311 
312     if(digest_data->rc != RSC_DIGEST_MATCH
313        && digest_secure
314        && digest_data->digest_secure_calc
315        && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) {
316         if (is_set(data_set->flags, pe_flag_stdout)) {
317             printf("Only 'private' parameters to %s_%s_%d on %s changed: %s\n",
318                    rsc->id, task, interval, active_node->details->uname,
319                    crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
320         }
321 
322     } else if (digest_data->rc == RSC_DIGEST_RESTART) {
323         /* Changes that force a restart */
324         pe_action_t *required = NULL;
325 
326         did_change = TRUE;
327         key = generate_op_key(rsc->id, task, interval);
328         crm_log_xml_info(digest_data->params_restart, "params:restart");
329         required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
330         pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL,
331                                   "resource definition change", pe_action_optional, TRUE);
332 
333         trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set);
334 
335     } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) {
336         /* Changes that can potentially be handled by a reload */
337         const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
338 
339         did_change = TRUE;
340         trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set);
341         crm_log_xml_info(digest_data->params_all, "params:reload");
342         key = generate_op_key(rsc->id, task, interval);
343 
344         if (interval > 0) {
345             action_t *op = NULL;
346 
347 #if 0
348             /* Always reload/restart the entire resource */
349             ReloadRsc(rsc, active_node, data_set);
350 #else
351             /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */
352             op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set);
353             set_bit(op->flags, pe_action_reschedule);
354 #endif
355 
356         } else if (digest_restart && rsc->isolation_wrapper == NULL && (uber_parent(rsc))->isolation_wrapper == NULL) {
357             pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id);
358 
359             /* Reload this resource */
360             ReloadRsc(rsc, active_node, data_set);
361             free(key);
362 
363         } else {
364             pe_action_t *required = NULL;
365             pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id);
366 
367             /* Re-send the start/demote/promote op
368              * Recurring ops will be detected independently
369              */
370             required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
371             pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL,
372                                       "resource definition change", pe_action_optional, TRUE);
373         }
374     }
375 
376     return did_change;
377 }
378 
379 /*!
380  * \internal
381  * \brief Do deferred action checks after allocation
382  *
383  * \param[in] data_set  Working set for cluster
384  */
385 static void
check_params(pe_resource_t * rsc,pe_node_t * node,xmlNode * rsc_op,enum pe_check_parameters check,pe_working_set_t * data_set)386 check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op,
387              enum pe_check_parameters check, pe_working_set_t *data_set)
388 {
389     const char *reason = NULL;
390     op_digest_cache_t *digest_data = NULL;
391 
392     switch (check) {
393         case pe_check_active:
394             if (check_action_definition(rsc, node, rsc_op, data_set)
395                 && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
396                                     data_set)) {
397 
398                 reason = "action definition changed";
399             }
400             break;
401 
402         case pe_check_last_failure:
403             digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set);
404             switch (digest_data->rc) {
405                 case RSC_DIGEST_UNKNOWN:
406                     crm_trace("Resource %s history entry %s on %s has no digest to compare",
407                               rsc->id, ID(rsc_op), node->details->id);
408                     break;
409                 case RSC_DIGEST_MATCH:
410                     break;
411                 default:
412                     reason = "resource parameters have changed";
413                     break;
414             }
415             break;
416     }
417 
418     if (reason) {
419         pe__clear_failcount(rsc, node, reason, data_set);
420     }
421 }
422 
423 static void
check_actions_for(xmlNode * rsc_entry,resource_t * rsc,node_t * node,pe_working_set_t * data_set)424 check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set)
425 {
426     GListPtr gIter = NULL;
427     int offset = -1;
428     int interval = 0;
429     int stop_index = 0;
430     int start_index = 0;
431 
432     const char *task = NULL;
433     const char *interval_s = NULL;
434 
435     xmlNode *rsc_op = NULL;
436     GListPtr op_list = NULL;
437     GListPtr sorted_op_list = NULL;
438 
439     CRM_CHECK(node != NULL, return);
440 
441     if (is_set(rsc->flags, pe_rsc_orphan)) {
442         resource_t *parent = uber_parent(rsc);
443         if(parent == NULL
444            || pe_rsc_is_clone(parent) == FALSE
445            || is_set(parent->flags, pe_rsc_unique)) {
446             pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id);
447             DeleteRsc(rsc, node, FALSE, data_set);
448         } else {
449             pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id);
450         }
451         return;
452 
453     } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
454         if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) {
455             DeleteRsc(rsc, node, FALSE, data_set);
456         }
457         pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s",
458                      rsc->id, node->details->uname);
459         return;
460     }
461 
462     pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname);
463 
464     if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) {
465         DeleteRsc(rsc, node, FALSE, data_set);
466     }
467 
468     for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
469          rsc_op = __xml_next_element(rsc_op)) {
470 
471         if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
472             op_list = g_list_prepend(op_list, rsc_op);
473         }
474     }
475 
476     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
477     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
478 
479     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
480         xmlNode *rsc_op = (xmlNode *) gIter->data;
481 
482         offset++;
483 
484         if (start_index < stop_index) {
485             /* stopped */
486             continue;
487         } else if (offset < start_index) {
488             /* action occurred prior to a start */
489             continue;
490         }
491 
492         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
493 
494         interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL);
495         interval = crm_parse_int(interval_s, "0");
496 
497         if (interval > 0 &&
498             (is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) {
499             // Maintenance mode cancels recurring operations
500             CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set);
501 
502         } else if ((interval > 0)
503                    || safe_str_eq(task, RSC_STATUS)
504                    || safe_str_eq(task, RSC_START)
505                    || safe_str_eq(task, RSC_PROMOTE)
506                    || safe_str_eq(task, RSC_MIGRATED)) {
507 
508             /* If a resource operation failed, and the operation's definition
509              * has changed, clear any fail count so they can be retried fresh.
510              */
511 
512             if (container_fix_remote_addr(rsc)) {
513                 /* We haven't allocated resources to nodes yet, so if the
514                  * REMOTE_CONTAINER_HACK is used, we may calculate the digest
515                  * based on the literal "#uname" value rather than the properly
516                  * substituted value. That would mistakenly make the action
517                  * definition appear to have been changed. Defer the check until
518                  * later in this case.
519                  */
520                 pe__add_param_check(rsc_op, rsc, node, pe_check_active,
521                                     data_set);
522 
523             } else if (check_action_definition(rsc, node, rsc_op, data_set)
524                 && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
525                                     data_set)) {
526                 pe__clear_failcount(rsc, node, "action definition changed",
527                                     data_set);
528             }
529         }
530     }
531     g_list_free(sorted_op_list);
532 }
533 
534 static GListPtr
find_rsc_list(GListPtr result,resource_t * rsc,const char * id,gboolean renamed_clones,gboolean partial,pe_working_set_t * data_set)535 find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones,
536               gboolean partial, pe_working_set_t * data_set)
537 {
538     GListPtr gIter = NULL;
539     gboolean match = FALSE;
540 
541     if (id == NULL) {
542         return NULL;
543     }
544 
545     if (rsc == NULL) {
546         if (data_set == NULL) {
547             return NULL;
548         }
549         for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
550             pe_resource_t *child = (pe_resource_t *) gIter->data;
551 
552             result = find_rsc_list(result, child, id, renamed_clones, partial,
553                                    NULL);
554         }
555         return result;
556     }
557 
558     if (partial) {
559         if (strstr(rsc->id, id)) {
560             match = TRUE;
561 
562         } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) {
563             match = TRUE;
564         }
565 
566     } else {
567         if (strcmp(rsc->id, id) == 0) {
568             match = TRUE;
569 
570         } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
571             match = TRUE;
572         }
573     }
574 
575     if (match) {
576         result = g_list_prepend(result, rsc);
577     }
578 
579     if (rsc->children) {
580         gIter = rsc->children;
581         for (; gIter != NULL; gIter = gIter->next) {
582             resource_t *child = (resource_t *) gIter->data;
583 
584             result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
585         }
586     }
587 
588     return result;
589 }
590 
591 static void
check_actions(pe_working_set_t * data_set)592 check_actions(pe_working_set_t * data_set)
593 {
594     const char *id = NULL;
595     node_t *node = NULL;
596     xmlNode *lrm_rscs = NULL;
597     xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
598 
599     xmlNode *node_state = NULL;
600 
601     for (node_state = __xml_first_child_element(status); node_state != NULL;
602          node_state = __xml_next_element(node_state)) {
603         if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
604             id = crm_element_value(node_state, XML_ATTR_ID);
605             lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
606             lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE);
607 
608             node = pe_find_node_id(data_set->nodes, id);
609 
610             if (node == NULL) {
611                 continue;
612 
613             /* Still need to check actions for a maintenance node to cancel existing monitor operations */
614             } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) {
615                 crm_trace("Skipping param check for %s: can't run resources",
616                           node->details->uname);
617                 continue;
618             }
619 
620             crm_trace("Processing node %s", node->details->uname);
621             if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
622                 xmlNode *rsc_entry = NULL;
623 
624                 for (rsc_entry = __xml_first_child_element(lrm_rscs);
625                      rsc_entry != NULL;
626                      rsc_entry = __xml_next_element(rsc_entry)) {
627 
628                     if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
629 
630                         if (xml_has_children(rsc_entry)) {
631                             GListPtr gIter = NULL;
632                             GListPtr result = NULL;
633                             const char *rsc_id = ID(rsc_entry);
634 
635                             CRM_CHECK(rsc_id != NULL, return);
636 
637                             result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set);
638                             for (gIter = result; gIter != NULL; gIter = gIter->next) {
639                                 resource_t *rsc = (resource_t *) gIter->data;
640 
641                                 if (rsc->variant != pe_native) {
642                                     continue;
643                                 }
644                                 check_actions_for(rsc_entry, rsc, node, data_set);
645                             }
646                             g_list_free(result);
647                         }
648                     }
649                 }
650             }
651         }
652     }
653 }
654 
655 static void
apply_placement_constraints(pe_working_set_t * data_set)656 apply_placement_constraints(pe_working_set_t * data_set)
657 {
658     for (GList *gIter = data_set->placement_constraints;
659          gIter != NULL; gIter = gIter->next) {
660         pe__location_t *cons = gIter->data;
661 
662         cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
663     }
664 }
665 
666 static gboolean
failcount_clear_action_exists(node_t * node,resource_t * rsc)667 failcount_clear_action_exists(node_t * node, resource_t * rsc)
668 {
669     gboolean rc = FALSE;
670     char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
671     GListPtr list = find_actions_exact(rsc->actions, key, node);
672 
673     if (list) {
674         rc = TRUE;
675     }
676     g_list_free(list);
677     free(key);
678 
679     return rc;
680 }
681 
682 /*!
683  * \internal
684  * \brief Force resource away if failures hit migration threshold
685  *
686  * \param[in,out] rsc       Resource to check for failures
687  * \param[in,out] node      Node to check for failures
688  * \param[in,out] data_set  Cluster working set to update
689  */
690 static void
check_migration_threshold(resource_t * rsc,node_t * node,pe_working_set_t * data_set)691 check_migration_threshold(resource_t *rsc, node_t *node,
692                           pe_working_set_t *data_set)
693 {
694     int fail_count, countdown;
695     resource_t *failed;
696 
697     /* Migration threshold of 0 means never force away */
698     if (rsc->migration_threshold == 0) {
699         return;
700     }
701 
702     // If we're ignoring failures, also ignore the migration threshold
703     if (is_set(rsc->flags, pe_rsc_failure_ignored)) {
704         return;
705     }
706 
707     /* If there are no failures, there's no need to force away */
708     fail_count = pe_get_failcount(node, rsc, NULL,
709                                   pe_fc_effective|pe_fc_fillers, NULL,
710                                   data_set);
711     if (fail_count <= 0) {
712         return;
713     }
714 
715     /* How many more times recovery will be tried on this node */
716     countdown = QB_MAX(rsc->migration_threshold - fail_count, 0);
717 
718     /* If failed resource has a parent, we'll force the parent away */
719     failed = rsc;
720     if (is_not_set(rsc->flags, pe_rsc_unique)) {
721         failed = uber_parent(rsc);
722     }
723 
724     if (countdown == 0) {
725         resource_location(failed, node, -INFINITY, "__fail_limit__", data_set);
726         crm_warn("Forcing %s away from %s after %d failures (max=%d)",
727                  failed->id, node->details->uname, fail_count,
728                  rsc->migration_threshold);
729     } else {
730         crm_info("%s can fail %d more times on %s before being forced off",
731                  failed->id, countdown, node->details->uname);
732     }
733 }
734 
735 static void
common_apply_stickiness(resource_t * rsc,node_t * node,pe_working_set_t * data_set)736 common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set)
737 {
738     if (rsc->children) {
739         GListPtr gIter = rsc->children;
740 
741         for (; gIter != NULL; gIter = gIter->next) {
742             resource_t *child_rsc = (resource_t *) gIter->data;
743 
744             common_apply_stickiness(child_rsc, node, data_set);
745         }
746         return;
747     }
748 
749     if (is_set(rsc->flags, pe_rsc_managed)
750         && rsc->stickiness != 0 && g_list_length(rsc->running_on) == 1) {
751         node_t *current = pe_find_node_id(rsc->running_on, node->details->id);
752         node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
753 
754         if (current == NULL) {
755 
756         } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
757             resource_t *sticky_rsc = rsc;
758 
759             resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set);
760             pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location"
761                          " (node=%s, weight=%d)", sticky_rsc->id,
762                          node->details->uname, rsc->stickiness);
763         } else {
764             GHashTableIter iter;
765             node_t *nIter = NULL;
766 
767             pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric"
768                          " and node %s is not explicitly allowed", rsc->id, node->details->uname);
769             g_hash_table_iter_init(&iter, rsc->allowed_nodes);
770             while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) {
771                 crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight);
772             }
773         }
774     }
775 
776     /* Check the migration threshold only if a failcount clear action
777      * has not already been placed for this resource on the node.
778      * There is no sense in potentially forcing the resource from this
779      * node if the failcount is being reset anyway.
780      *
781      * @TODO A clear_failcount operation can be scheduled in stage4() via
782      * check_actions_for(), or in stage5() via check_params(). This runs in
783      * stage2(), so it cannot detect those, meaning we might check the migration
784      * threshold when we shouldn't -- worst case, we stop or move the resource,
785      * then move it back next transition.
786      */
787     if (failcount_clear_action_exists(node, rsc) == FALSE) {
788         check_migration_threshold(rsc, node, data_set);
789     }
790 }
791 
792 void
complex_set_cmds(resource_t * rsc)793 complex_set_cmds(resource_t * rsc)
794 {
795     GListPtr gIter = rsc->children;
796 
797     rsc->cmds = &resource_class_alloc_functions[rsc->variant];
798 
799     for (; gIter != NULL; gIter = gIter->next) {
800         resource_t *child_rsc = (resource_t *) gIter->data;
801 
802         complex_set_cmds(child_rsc);
803     }
804 }
805 
806 void
set_alloc_actions(pe_working_set_t * data_set)807 set_alloc_actions(pe_working_set_t * data_set)
808 {
809 
810     GListPtr gIter = data_set->resources;
811 
812     for (; gIter != NULL; gIter = gIter->next) {
813         resource_t *rsc = (resource_t *) gIter->data;
814 
815         complex_set_cmds(rsc);
816     }
817 }
818 
819 static void
calculate_system_health(gpointer gKey,gpointer gValue,gpointer user_data)820 calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data)
821 {
822     const char *key = (const char *)gKey;
823     const char *value = (const char *)gValue;
824     int *system_health = (int *)user_data;
825 
826     if (!gKey || !gValue || !user_data) {
827         return;
828     }
829 
830     if (crm_starts_with(key, "#health")) {
831         int score;
832 
833         /* Convert the value into an integer */
834         score = char2score(value);
835 
836         /* Add it to the running total */
837         *system_health = merge_weights(score, *system_health);
838     }
839 }
840 
841 static gboolean
apply_system_health(pe_working_set_t * data_set)842 apply_system_health(pe_working_set_t * data_set)
843 {
844     GListPtr gIter = NULL;
845     const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy");
846     int base_health = 0;
847 
848     if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) {
849         /* Prevent any accidental health -> score translation */
850         node_score_red = 0;
851         node_score_yellow = 0;
852         node_score_green = 0;
853         return TRUE;
854 
855     } else if (safe_str_eq(health_strategy, "migrate-on-red")) {
856 
857         /* Resources on nodes which have health values of red are
858          * weighted away from that node.
859          */
860         node_score_red = -INFINITY;
861         node_score_yellow = 0;
862         node_score_green = 0;
863 
864     } else if (safe_str_eq(health_strategy, "only-green")) {
865 
866         /* Resources on nodes which have health values of red or yellow
867          * are forced away from that node.
868          */
869         node_score_red = -INFINITY;
870         node_score_yellow = -INFINITY;
871         node_score_green = 0;
872 
873     } else if (safe_str_eq(health_strategy, "progressive")) {
874         /* Same as the above, but use the r/y/g scores provided by the user
875          * Defaults are provided by the pe_prefs table
876          * Also, custom health "base score" can be used
877          */
878         base_health = crm_parse_int(pe_pref(data_set->config_hash, "node-health-base"), "0");
879 
880     } else if (safe_str_eq(health_strategy, "custom")) {
881 
882         /* Requires the admin to configure the rsc_location constaints for
883          * processing the stored health scores
884          */
885         /* TODO: Check for the existence of appropriate node health constraints */
886         return TRUE;
887 
888     } else {
889         crm_err("Unknown node health strategy: %s", health_strategy);
890         return FALSE;
891     }
892 
893     crm_info("Applying automated node health strategy: %s", health_strategy);
894 
895     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
896         int system_health = base_health;
897         node_t *node = (node_t *) gIter->data;
898 
899         /* Search through the node hash table for system health entries. */
900         g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health);
901 
902         crm_info(" Node %s has an combined system health of %d",
903                  node->details->uname, system_health);
904 
905         /* If the health is non-zero, then create a new rsc2node so that the
906          * weight will be added later on.
907          */
908         if (system_health != 0) {
909 
910             GListPtr gIter2 = data_set->resources;
911 
912             for (; gIter2 != NULL; gIter2 = gIter2->next) {
913                 resource_t *rsc = (resource_t *) gIter2->data;
914 
915                 rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set);
916             }
917         }
918     }
919 
920     return TRUE;
921 }
922 
923 gboolean
stage0(pe_working_set_t * data_set)924 stage0(pe_working_set_t * data_set)
925 {
926     xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
927 
928     if (data_set->input == NULL) {
929         return FALSE;
930     }
931 
932     if (is_set(data_set->flags, pe_flag_have_status) == FALSE) {
933         crm_trace("Calculating status");
934         cluster_status(data_set);
935     }
936 
937     set_alloc_actions(data_set);
938     apply_system_health(data_set);
939     unpack_constraints(cib_constraints, data_set);
940 
941     return TRUE;
942 }
943 
944 /*
945  * Check nodes for resources started outside of the LRM
946  */
947 gboolean
probe_resources(pe_working_set_t * data_set)948 probe_resources(pe_working_set_t * data_set)
949 {
950     action_t *probe_node_complete = NULL;
951 
952     for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
953         node_t *node = (node_t *) gIter->data;
954         const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
955 
956         if (node->details->online == FALSE) {
957 
958             if (is_baremetal_remote_node(node) && node->details->remote_rsc
959                 && (get_remote_node_state(node) == remote_state_failed)) {
960 
961                 pe_fence_node(data_set, node, "the connection is unrecoverable", FALSE);
962             }
963             continue;
964 
965         } else if (node->details->unclean) {
966             continue;
967 
968         } else if (node->details->rsc_discovery_enabled == FALSE) {
969             /* resource discovery is disabled for this node */
970             continue;
971         }
972 
973         if (probed != NULL && crm_is_true(probed) == FALSE) {
974             action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname),
975                                                CRM_OP_REPROBE, node, FALSE, TRUE, data_set);
976 
977             add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
978             continue;
979         }
980 
981         for (GListPtr gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) {
982             resource_t *rsc = (resource_t *) gIter2->data;
983 
984             rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set);
985         }
986     }
987     return TRUE;
988 }
989 
990 static void
rsc_discover_filter(resource_t * rsc,node_t * node)991 rsc_discover_filter(resource_t *rsc, node_t *node)
992 {
993     GListPtr gIter = rsc->children;
994     resource_t *top = uber_parent(rsc);
995     node_t *match;
996 
997     if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) {
998         return;
999     }
1000 
1001     for (; gIter != NULL; gIter = gIter->next) {
1002         resource_t *child_rsc = (resource_t *) gIter->data;
1003         rsc_discover_filter(child_rsc, node);
1004     }
1005 
1006     match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
1007     if (match && match->rsc_discover_mode != pe_discover_exclusive) {
1008         match->weight = -INFINITY;
1009     }
1010 }
1011 
1012 /*
1013  * \internal
1014  * \brief Stage 2 of cluster status: apply node-specific criteria
1015  *
1016  * Count known nodes, and apply location constraints, stickiness, and exclusive
1017  * resource discovery.
1018  */
1019 gboolean
stage2(pe_working_set_t * data_set)1020 stage2(pe_working_set_t * data_set)
1021 {
1022     GListPtr gIter = NULL;
1023 
1024     if (is_not_set(data_set->flags, pe_flag_no_compat)) {
1025         // @COMPAT API backward compatibility
1026         for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1027             pe_node_t *node = (pe_node_t *) gIter->data;
1028 
1029             if (node && (node->weight >= 0) && node->details->online
1030                 && (node->details->type != node_ping)) {
1031                 data_set->max_valid_nodes++;
1032             }
1033         }
1034     }
1035 
1036     crm_trace("Applying placement constraints");
1037     apply_placement_constraints(data_set);
1038 
1039     gIter = data_set->nodes;
1040     for (; gIter != NULL; gIter = gIter->next) {
1041         GListPtr gIter2 = NULL;
1042         node_t *node = (node_t *) gIter->data;
1043 
1044         gIter2 = data_set->resources;
1045         for (; gIter2 != NULL; gIter2 = gIter2->next) {
1046             resource_t *rsc = (resource_t *) gIter2->data;
1047 
1048             common_apply_stickiness(rsc, node, data_set);
1049             rsc_discover_filter(rsc, node);
1050         }
1051     }
1052 
1053     return TRUE;
1054 }
1055 
1056 /*
1057  * Create internal resource constraints before allocation
1058  */
1059 gboolean
stage3(pe_working_set_t * data_set)1060 stage3(pe_working_set_t * data_set)
1061 {
1062 
1063     GListPtr gIter = data_set->resources;
1064 
1065     for (; gIter != NULL; gIter = gIter->next) {
1066         resource_t *rsc = (resource_t *) gIter->data;
1067 
1068         rsc->cmds->internal_constraints(rsc, data_set);
1069     }
1070 
1071     return TRUE;
1072 }
1073 
1074 /*
1075  * Check for orphaned or redefined actions
1076  */
1077 gboolean
stage4(pe_working_set_t * data_set)1078 stage4(pe_working_set_t * data_set)
1079 {
1080     check_actions(data_set);
1081     return TRUE;
1082 }
1083 
1084 static void *
convert_const_pointer(const void * ptr)1085 convert_const_pointer(const void *ptr)
1086 {
1087     /* Worst function ever */
1088     return (void *)ptr;
1089 }
1090 
1091 static gint
sort_rsc_process_order(gconstpointer a,gconstpointer b,gpointer data)1092 sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data)
1093 {
1094     int rc = 0;
1095     int r1_weight = -INFINITY;
1096     int r2_weight = -INFINITY;
1097 
1098     const char *reason = "existence";
1099 
1100     const GListPtr nodes = (GListPtr) data;
1101     const resource_t *resource1 = a;
1102     const resource_t *resource2 = b;
1103 
1104     node_t *r1_node = NULL;
1105     node_t *r2_node = NULL;
1106     GListPtr gIter = NULL;
1107     GHashTable *r1_nodes = NULL;
1108     GHashTable *r2_nodes = NULL;
1109 
1110     if (a == NULL && b == NULL) {
1111         goto done;
1112     }
1113     if (a == NULL) {
1114         return 1;
1115     }
1116     if (b == NULL) {
1117         return -1;
1118     }
1119 
1120     reason = "priority";
1121     r1_weight = resource1->priority;
1122     r2_weight = resource2->priority;
1123 
1124     if (r1_weight > r2_weight) {
1125         rc = -1;
1126         goto done;
1127     }
1128 
1129     if (r1_weight < r2_weight) {
1130         rc = 1;
1131         goto done;
1132     }
1133 
1134     reason = "no node list";
1135     if (nodes == NULL) {
1136         goto done;
1137     }
1138 
1139     r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1),
1140                                           resource1->id, NULL, NULL, 1,
1141                                           pe_weights_forward | pe_weights_init);
1142     pe__show_node_weights(true, NULL, resource1->id, r1_nodes);
1143 
1144     r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2),
1145                                           resource2->id, NULL, NULL, 1,
1146                                           pe_weights_forward | pe_weights_init);
1147     pe__show_node_weights(true, NULL, resource2->id, r2_nodes);
1148 
1149     /* Current location score */
1150     reason = "current location";
1151     r1_weight = -INFINITY;
1152     r2_weight = -INFINITY;
1153 
1154     if (resource1->running_on) {
1155         r1_node = pe__current_node(resource1);
1156         r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id);
1157         if (r1_node != NULL) {
1158             r1_weight = r1_node->weight;
1159         }
1160     }
1161     if (resource2->running_on) {
1162         r2_node = pe__current_node(resource2);
1163         r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id);
1164         if (r2_node != NULL) {
1165             r2_weight = r2_node->weight;
1166         }
1167     }
1168 
1169     if (r1_weight > r2_weight) {
1170         rc = -1;
1171         goto done;
1172     }
1173 
1174     if (r1_weight < r2_weight) {
1175         rc = 1;
1176         goto done;
1177     }
1178 
1179     reason = "score";
1180     for (gIter = nodes; gIter != NULL; gIter = gIter->next) {
1181         node_t *node = (node_t *) gIter->data;
1182 
1183         r1_node = NULL;
1184         r2_node = NULL;
1185 
1186         r1_weight = -INFINITY;
1187         if (r1_nodes) {
1188             r1_node = g_hash_table_lookup(r1_nodes, node->details->id);
1189         }
1190         if (r1_node) {
1191             r1_weight = r1_node->weight;
1192         }
1193 
1194         r2_weight = -INFINITY;
1195         if (r2_nodes) {
1196             r2_node = g_hash_table_lookup(r2_nodes, node->details->id);
1197         }
1198         if (r2_node) {
1199             r2_weight = r2_node->weight;
1200         }
1201 
1202         if (r1_weight > r2_weight) {
1203             rc = -1;
1204             goto done;
1205         }
1206 
1207         if (r1_weight < r2_weight) {
1208             rc = 1;
1209             goto done;
1210         }
1211     }
1212 
1213   done:
1214     crm_trace("%s (%d) on %s %c %s (%d) on %s: %s",
1215               resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a",
1216               rc < 0 ? '>' : rc > 0 ? '<' : '=',
1217               resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason);
1218 
1219     if (r1_nodes) {
1220         g_hash_table_destroy(r1_nodes);
1221     }
1222     if (r2_nodes) {
1223         g_hash_table_destroy(r2_nodes);
1224     }
1225 
1226     return rc;
1227 }
1228 
1229 static void
allocate_resources(pe_working_set_t * data_set)1230 allocate_resources(pe_working_set_t * data_set)
1231 {
1232     GListPtr gIter = NULL;
1233 
1234     if (is_set(data_set->flags, pe_flag_have_remote_nodes)) {
1235         /* Allocate remote connection resources first (which will also allocate
1236          * any colocation dependencies). If the connection is migrating, always
1237          * prefer the partial migration target.
1238          */
1239         for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1240             resource_t *rsc = (resource_t *) gIter->data;
1241             if (rsc->is_remote_node == FALSE) {
1242                 continue;
1243             }
1244             pe_rsc_trace(rsc, "Allocating remote connection resource '%s'",
1245                          rsc->id);
1246             rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set);
1247         }
1248     }
1249 
1250     /* now do the rest of the resources */
1251     for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1252         resource_t *rsc = (resource_t *) gIter->data;
1253         if (rsc->is_remote_node == TRUE) {
1254             continue;
1255         }
1256         pe_rsc_trace(rsc, "Allocating resource '%s'", rsc->id);
1257         rsc->cmds->allocate(rsc, NULL, data_set);
1258     }
1259 }
1260 
1261 /* We always use pe_order_preserve with these convenience functions to exempt
1262  * internally generated constraints from the prohibition of user constraints
1263  * involving remote connection resources.
1264  *
1265  * The start ordering additionally uses pe_order_runnable_left so that the
1266  * specified action is not runnable if the start is not runnable.
1267  */
1268 
1269 static inline void
order_start_then_action(resource_t * lh_rsc,action_t * rh_action,enum pe_ordering extra,pe_working_set_t * data_set)1270 order_start_then_action(resource_t *lh_rsc, action_t *rh_action,
1271                         enum pe_ordering extra, pe_working_set_t *data_set)
1272 {
1273     if (lh_rsc && rh_action && data_set) {
1274         custom_action_order(lh_rsc, start_key(lh_rsc), NULL,
1275                             rh_action->rsc, NULL, rh_action,
1276                             pe_order_preserve | pe_order_runnable_left | extra,
1277                             data_set);
1278     }
1279 }
1280 
1281 static inline void
order_action_then_stop(action_t * lh_action,resource_t * rh_rsc,enum pe_ordering extra,pe_working_set_t * data_set)1282 order_action_then_stop(action_t *lh_action, resource_t *rh_rsc,
1283                        enum pe_ordering extra, pe_working_set_t *data_set)
1284 {
1285     if (lh_action && rh_rsc && data_set) {
1286         custom_action_order(lh_action->rsc, NULL, lh_action,
1287                             rh_rsc, stop_key(rh_rsc), NULL,
1288                             pe_order_preserve | extra, data_set);
1289     }
1290 }
1291 
1292 static void
cleanup_orphans(resource_t * rsc,pe_working_set_t * data_set)1293 cleanup_orphans(resource_t * rsc, pe_working_set_t * data_set)
1294 {
1295     GListPtr gIter = NULL;
1296 
1297     if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
1298         return;
1299     }
1300 
1301     /* Don't recurse into ->children, those are just unallocated clone instances */
1302     if(is_not_set(rsc->flags, pe_rsc_orphan)) {
1303         return;
1304     }
1305 
1306     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1307         node_t *node = (node_t *) gIter->data;
1308 
1309         if (node->details->online
1310             && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
1311                                 data_set)) {
1312 
1313             pe_action_t *clear_op = NULL;
1314 
1315             clear_op = pe__clear_failcount(rsc, node, "it is orphaned",
1316                                            data_set);
1317 
1318             /* We can't use order_action_then_stop() here because its
1319              * pe_order_preserve breaks things
1320              */
1321             custom_action_order(clear_op->rsc, NULL, clear_op,
1322                                 rsc, stop_key(rsc), NULL,
1323                                 pe_order_optional, data_set);
1324         }
1325     }
1326 }
1327 
1328 gboolean
stage5(pe_working_set_t * data_set)1329 stage5(pe_working_set_t * data_set)
1330 {
1331     GListPtr gIter = NULL;
1332 
1333     if (safe_str_neq(data_set->placement_strategy, "default")) {
1334         GListPtr nodes = g_list_copy(data_set->nodes);
1335 
1336         nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL);
1337 
1338         data_set->resources =
1339             g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes);
1340 
1341         g_list_free(nodes);
1342     }
1343 
1344     gIter = data_set->nodes;
1345     for (; gIter != NULL; gIter = gIter->next) {
1346         node_t *node = (node_t *) gIter->data;
1347 
1348         dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Original", node);
1349     }
1350 
1351     crm_trace("Allocating services");
1352     /* Take (next) highest resource, assign it and create its actions */
1353 
1354     allocate_resources(data_set);
1355 
1356     gIter = data_set->nodes;
1357     for (; gIter != NULL; gIter = gIter->next) {
1358         node_t *node = (node_t *) gIter->data;
1359 
1360         dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Remaining", node);
1361     }
1362 
1363     // Process deferred action checks
1364     pe__foreach_param_check(data_set, check_params);
1365     pe__free_param_checks(data_set);
1366 
1367     if (is_set(data_set->flags, pe_flag_startup_probes)) {
1368         crm_trace("Calculating needed probes");
1369         /* This code probably needs optimization
1370          * ptest -x with 100 nodes, 100 clones and clone-max=100:
1371 
1372          With probes:
1373 
1374          ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
1375          ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
1376          ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
1377          ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions
1378          ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
1379          ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services
1380          ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes
1381          ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions
1382          ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done
1383          ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
1384          ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
1385          36s
1386          ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
1387 
1388          Without probes:
1389 
1390          ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
1391          ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
1392          ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
1393          ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions
1394          ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
1395          ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services
1396          ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions
1397          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done
1398          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
1399          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
1400          ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
1401         */
1402 
1403         probe_resources(data_set);
1404     }
1405 
1406     crm_trace("Handle orphans");
1407 
1408     for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1409         resource_t *rsc = (resource_t *) gIter->data;
1410         cleanup_orphans(rsc, data_set);
1411     }
1412 
1413     crm_trace("Creating actions");
1414 
1415     for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1416         resource_t *rsc = (resource_t *) gIter->data;
1417 
1418         rsc->cmds->create_actions(rsc, data_set);
1419     }
1420 
1421     crm_trace("Creating done");
1422     return TRUE;
1423 }
1424 
1425 static gboolean
is_managed(const resource_t * rsc)1426 is_managed(const resource_t * rsc)
1427 {
1428     GListPtr gIter = rsc->children;
1429 
1430     if (is_set(rsc->flags, pe_rsc_managed)) {
1431         return TRUE;
1432     }
1433 
1434     for (; gIter != NULL; gIter = gIter->next) {
1435         resource_t *child_rsc = (resource_t *) gIter->data;
1436 
1437         if (is_managed(child_rsc)) {
1438             return TRUE;
1439         }
1440     }
1441 
1442     return FALSE;
1443 }
1444 
1445 static gboolean
any_managed_resources(pe_working_set_t * data_set)1446 any_managed_resources(pe_working_set_t * data_set)
1447 {
1448 
1449     GListPtr gIter = data_set->resources;
1450 
1451     for (; gIter != NULL; gIter = gIter->next) {
1452         resource_t *rsc = (resource_t *) gIter->data;
1453 
1454         if (is_managed(rsc)) {
1455             return TRUE;
1456         }
1457     }
1458     return FALSE;
1459 }
1460 
1461 /*!
1462  * \internal
1463  * \brief Create pseudo-op for guest node fence, and order relative to it
1464  *
1465  * \param[in] node      Guest node to fence
1466  * \param[in] data_set  Working set of CIB state
1467  */
1468 static void
fence_guest(pe_node_t * node,pe_working_set_t * data_set)1469 fence_guest(pe_node_t *node, pe_working_set_t *data_set)
1470 {
1471     resource_t *container = node->details->remote_rsc->container;
1472     pe_action_t *stop = NULL;
1473     pe_action_t *stonith_op = NULL;
1474 
1475     /* The fence action is just a label; we don't do anything differently for
1476      * off vs. reboot. We specify it explicitly, rather than let it default to
1477      * cluster's default action, because we are not _initiating_ fencing -- we
1478      * are creating a pseudo-event to describe fencing that is already occurring
1479      * by other means (container recovery).
1480      */
1481     const char *fence_action = "off";
1482 
1483     /* Check whether guest's container resource is has any explicit stop or
1484      * start (the stop may be implied by fencing of the guest's host).
1485      */
1486     if (container) {
1487         stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL);
1488 
1489         if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) {
1490             fence_action = "reboot";
1491         }
1492     }
1493 
1494     /* Create a fence pseudo-event, so we have an event to order actions
1495      * against, and crmd can always detect it.
1496      */
1497     stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", FALSE, data_set);
1498     update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable,
1499                         __FUNCTION__, __LINE__);
1500 
1501     /* We want to imply stops/demotes after the guest is stopped, not wait until
1502      * it is restarted, so we always order pseudo-fencing after stop, not start
1503      * (even though start might be closer to what is done for a real reboot).
1504      */
1505     if(stop && is_set(stop->flags, pe_action_pseudo)) {
1506         pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, FALSE, data_set);
1507         crm_info("Implying guest node %s is down (action %d) after %s fencing",
1508                  node->details->uname, stonith_op->id, stop->node->details->uname);
1509         order_actions(parent_stonith_op, stonith_op,
1510                       pe_order_runnable_left|pe_order_implies_then);
1511 
1512     } else if (stop) {
1513         order_actions(stop, stonith_op,
1514                       pe_order_runnable_left|pe_order_implies_then);
1515         crm_info("Implying guest node %s is down (action %d) "
1516                  "after container %s is stopped (action %d)",
1517                  node->details->uname, stonith_op->id,
1518                  container->id, stop->id);
1519     } else {
1520         /* If we're fencing the guest node but there's no stop for the guest
1521          * resource, we must think the guest is already stopped. However, we may
1522          * think so because its resource history was just cleaned. To avoid
1523          * unnecessarily considering the guest node down if it's really up,
1524          * order the pseudo-fencing after any stop of the connection resource,
1525          * which will be ordered after any container (re-)probe.
1526          */
1527         stop = find_first_action(node->details->remote_rsc->actions, NULL,
1528                                  RSC_STOP, NULL);
1529 
1530         if (stop) {
1531             order_actions(stop, stonith_op, pe_order_optional);
1532             crm_info("Implying guest node %s is down (action %d) "
1533                      "after connection is stopped (action %d)",
1534                      node->details->uname, stonith_op->id, stop->id);
1535         } else {
1536             /* Not sure why we're fencing, but everything must already be
1537              * cleanly stopped.
1538              */
1539             crm_info("Implying guest node %s is down (action %d) ",
1540                      node->details->uname, stonith_op->id);
1541         }
1542     }
1543 
1544     /* Order/imply other actions relative to pseudo-fence as with real fence */
1545     stonith_constraints(node, stonith_op, data_set);
1546 }
1547 
1548 /*
1549  * Create dependencies for stonith and shutdown operations
1550  */
1551 gboolean
stage6(pe_working_set_t * data_set)1552 stage6(pe_working_set_t * data_set)
1553 {
1554     action_t *dc_down = NULL;
1555     action_t *stonith_op = NULL;
1556     gboolean integrity_lost = FALSE;
1557     gboolean need_stonith = TRUE;
1558     GListPtr gIter;
1559     GListPtr stonith_ops = NULL;
1560     GList *shutdown_ops = NULL;
1561 
1562     /* Remote ordering constraints need to happen prior to calculate
1563      * fencing because it is one more place we will mark the node as
1564      * dirty.
1565      *
1566      * A nice side-effect of doing it first is that we can remove a
1567      * bunch of special logic from apply_*_ordering() because its
1568      * already part of pe_fence_node()
1569      */
1570     crm_trace("Creating remote ordering constraints");
1571     apply_remote_node_ordering(data_set);
1572 
1573     crm_trace("Processing fencing and shutdown cases");
1574     if (any_managed_resources(data_set) == FALSE) {
1575         crm_notice("Delaying fencing operations until there are resources to manage");
1576         need_stonith = FALSE;
1577     }
1578 
1579     /* Check each node for stonith/shutdown */
1580     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1581         node_t *node = (node_t *) gIter->data;
1582 
1583         /* Guest nodes are "fenced" by recovering their container resource,
1584          * so handle them separately.
1585          */
1586         if (is_container_remote_node(node)) {
1587             if (node->details->remote_requires_reset && need_stonith
1588                 && pe_can_fence(data_set, node)) {
1589                 fence_guest(node, data_set);
1590             }
1591             continue;
1592         }
1593 
1594         stonith_op = NULL;
1595 
1596         if (node->details->unclean
1597             && need_stonith && pe_can_fence(data_set, node)) {
1598 
1599             stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, data_set);
1600             pe_warn("Scheduling Node %s for STONITH", node->details->uname);
1601 
1602             stonith_constraints(node, stonith_op, data_set);
1603 
1604             if (node->details->is_dc) {
1605                 // Remember if the DC is being fenced
1606                 dc_down = stonith_op;
1607 
1608             } else {
1609 
1610                 if (is_not_set(data_set->flags, pe_flag_concurrent_fencing)
1611                     && (stonith_ops != NULL)) {
1612                     /* Concurrent fencing is disabled, so order each non-DC
1613                      * fencing in a chain. If there is any DC fencing or
1614                      * shutdown, it will be ordered after the last action in the
1615                      * chain later.
1616                      */
1617                     order_actions((pe_action_t *) stonith_ops->data,
1618                                   stonith_op, pe_order_optional);
1619                 }
1620 
1621                 // Remember all non-DC fencing actions in a separate list
1622                 stonith_ops = g_list_prepend(stonith_ops, stonith_op);
1623             }
1624 
1625         } else if (node->details->online && node->details->shutdown &&
1626                 /* TODO define what a shutdown op means for a remote node.
1627                  * For now we do not send shutdown operations for remote nodes, but
1628                  * if we can come up with a good use for this in the future, we will. */
1629                     is_remote_node(node) == FALSE) {
1630 
1631             action_t *down_op = sched_shutdown_op(node, data_set);
1632 
1633             if (node->details->is_dc) {
1634                 // Remember if the DC is being shut down
1635                 dc_down = down_op;
1636             } else {
1637                 // Remember non-DC shutdowns for later ordering
1638                 shutdown_ops = g_list_prepend(shutdown_ops, down_op);
1639             }
1640         }
1641 
1642         if (node->details->unclean && stonith_op == NULL) {
1643             integrity_lost = TRUE;
1644             pe_warn("Node %s is unclean!", node->details->uname);
1645         }
1646     }
1647 
1648     if (integrity_lost) {
1649         if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
1650             pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED");
1651             pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE");
1652 
1653         } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) {
1654             crm_notice("Cannot fence unclean nodes until quorum is"
1655                        " attained (or no-quorum-policy is set to ignore)");
1656         }
1657     }
1658 
1659     if (dc_down != NULL) {
1660         /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
1661          * DC elections. However, we don't want to order non-DC shutdowns before
1662          * a DC *fencing*, because even though we don't want a node that's
1663          * shutting down to become DC, the DC fencing could be ordered before a
1664          * clone stop that's also ordered before the shutdowns, thus leading to
1665          * a graph loop.
1666          */
1667         if (safe_str_eq(dc_down->task, CRM_OP_SHUTDOWN)) {
1668             for (gIter = shutdown_ops; gIter != NULL; gIter = gIter->next) {
1669                 action_t *node_stop = (action_t *) gIter->data;
1670 
1671                 crm_debug("Ordering shutdown on %s before %s on DC %s",
1672                           node_stop->node->details->uname,
1673                           dc_down->task, dc_down->node->details->uname);
1674 
1675                 order_actions(node_stop, dc_down, pe_order_optional);
1676             }
1677         }
1678 
1679         // Order any non-DC fencing before any DC fencing or shutdown
1680 
1681         if (is_set(data_set->flags, pe_flag_concurrent_fencing)) {
1682             /* With concurrent fencing, order each non-DC fencing action
1683              * separately before any DC fencing or shutdown.
1684              */
1685             for (gIter = stonith_ops; gIter != NULL; gIter = gIter->next) {
1686                 order_actions((pe_action_t *) gIter->data, dc_down,
1687                               pe_order_optional);
1688             }
1689         } else if (stonith_ops) {
1690             /* Without concurrent fencing, the non-DC fencing actions are
1691              * already ordered relative to each other, so we just need to order
1692              * the DC fencing after the last action in the chain (which is the
1693              * first item in the list).
1694              */
1695             order_actions((pe_action_t *) stonith_ops->data, dc_down,
1696                           pe_order_optional);
1697         }
1698     }
1699     g_list_free(stonith_ops);
1700     g_list_free(shutdown_ops);
1701     return TRUE;
1702 }
1703 
1704 /*
1705  * Determine the sets of independent actions and the correct order for the
1706  *  actions in each set.
1707  *
1708  * Mark dependencies of un-runnable actions un-runnable
1709  *
1710  */
1711 static GListPtr
find_actions_by_task(GListPtr actions,resource_t * rsc,const char * original_key)1712 find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key)
1713 {
1714     GListPtr list = NULL;
1715 
1716     list = find_actions(actions, original_key, NULL);
1717     if (list == NULL) {
1718         /* we're potentially searching a child of the original resource */
1719         char *key = NULL;
1720         char *task = NULL;
1721         int interval = 0;
1722 
1723         if (parse_op_key(original_key, NULL, &task, &interval)) {
1724             key = generate_op_key(rsc->id, task, interval);
1725             list = find_actions(actions, key, NULL);
1726 
1727         } else {
1728             crm_err("search key: %s", original_key);
1729         }
1730 
1731         free(key);
1732         free(task);
1733     }
1734 
1735     return list;
1736 }
1737 
1738 static void
rsc_order_then(pe_action_t * lh_action,pe_resource_t * rsc,pe__ordering_t * order)1739 rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc,
1740                pe__ordering_t *order)
1741 {
1742     GListPtr gIter = NULL;
1743     GListPtr rh_actions = NULL;
1744     action_t *rh_action = NULL;
1745     enum pe_ordering type;
1746 
1747     CRM_CHECK(rsc != NULL, return);
1748     CRM_CHECK(order != NULL, return);
1749 
1750     type = order->type;
1751     rh_action = order->rh_action;
1752     crm_trace("Processing RH of ordering constraint %d", order->id);
1753 
1754     if (rh_action != NULL) {
1755         rh_actions = g_list_prepend(NULL, rh_action);
1756 
1757     } else if (rsc != NULL) {
1758         rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task);
1759     }
1760 
1761     if (rh_actions == NULL) {
1762         pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..."
1763                      " ignoring", rsc->id, order->rh_action_task);
1764         if (lh_action) {
1765             pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid);
1766         }
1767         return;
1768     }
1769 
1770     if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) {
1771         pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid,
1772                      order->rh_action_task);
1773         clear_bit(type, pe_order_implies_then);
1774     }
1775 
1776     gIter = rh_actions;
1777     for (; gIter != NULL; gIter = gIter->next) {
1778         action_t *rh_action_iter = (action_t *) gIter->data;
1779 
1780         if (lh_action) {
1781             order_actions(lh_action, rh_action_iter, type);
1782 
1783         } else if (type & pe_order_implies_then) {
1784             update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
1785             crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type);
1786         } else {
1787             crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type);
1788         }
1789     }
1790 
1791     g_list_free(rh_actions);
1792 }
1793 
1794 static void
rsc_order_first(pe_resource_t * lh_rsc,pe__ordering_t * order,pe_working_set_t * data_set)1795 rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order,
1796                 pe_working_set_t *data_set)
1797 {
1798     GListPtr gIter = NULL;
1799     GListPtr lh_actions = NULL;
1800     action_t *lh_action = order->lh_action;
1801     resource_t *rh_rsc = order->rh_rsc;
1802 
1803     crm_trace("Processing LH of ordering constraint %d", order->id);
1804     CRM_ASSERT(lh_rsc != NULL);
1805 
1806     if (lh_action != NULL) {
1807         lh_actions = g_list_prepend(NULL, lh_action);
1808 
1809     } else if (lh_action == NULL) {
1810         lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task);
1811     }
1812 
1813     if (lh_actions == NULL && lh_rsc != rh_rsc) {
1814         char *key = NULL;
1815         char *op_type = NULL;
1816         int interval = 0;
1817 
1818         parse_op_key(order->lh_action_task, NULL, &op_type, &interval);
1819         key = generate_op_key(lh_rsc->id, op_type, interval);
1820 
1821         if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) {
1822             free(key);
1823             pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
1824                          lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1825 
1826         } else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && safe_str_eq(op_type, RSC_DEMOTE)) {
1827             free(key);
1828             pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
1829                          lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1830 
1831         } else {
1832             pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating",
1833                          lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1834             lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set);
1835             lh_actions = g_list_prepend(NULL, lh_action);
1836         }
1837 
1838         free(op_type);
1839     }
1840 
1841     gIter = lh_actions;
1842     for (; gIter != NULL; gIter = gIter->next) {
1843         action_t *lh_action_iter = (action_t *) gIter->data;
1844 
1845         if (rh_rsc == NULL && order->rh_action) {
1846             rh_rsc = order->rh_action->rsc;
1847         }
1848         if (rh_rsc) {
1849             rsc_order_then(lh_action_iter, rh_rsc, order);
1850 
1851         } else if (order->rh_action) {
1852             order_actions(lh_action_iter, order->rh_action, order->type);
1853         }
1854     }
1855 
1856     g_list_free(lh_actions);
1857 }
1858 
1859 extern gboolean update_action(action_t * action);
1860 extern void update_colo_start_chain(action_t * action);
1861 
1862 static int
is_recurring_action(action_t * action)1863 is_recurring_action(action_t *action)
1864 {
1865     const char *interval_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL);
1866     int interval = crm_parse_int(interval_s, "0");
1867     if(interval > 0) {
1868         return TRUE;
1869     }
1870     return FALSE;
1871 }
1872 
1873 static void
apply_container_ordering(action_t * action,pe_working_set_t * data_set)1874 apply_container_ordering(action_t *action, pe_working_set_t *data_set)
1875 {
1876     /* VMs are also classified as containers for these purposes... in
1877      * that they both involve a 'thing' running on a real or remote
1878      * cluster node.
1879      *
1880      * This allows us to be smarter about the type and extent of
1881      * recovery actions required in various scenarios
1882      */
1883     resource_t *remote_rsc = NULL;
1884     resource_t *container = NULL;
1885     enum action_tasks task = text2task(action->task);
1886 
1887     CRM_ASSERT(action->rsc);
1888     CRM_ASSERT(action->node);
1889     CRM_ASSERT(is_remote_node(action->node));
1890 
1891     remote_rsc = action->node->details->remote_rsc;
1892     CRM_ASSERT(remote_rsc);
1893 
1894     container = remote_rsc->container;
1895     CRM_ASSERT(container);
1896 
1897     if(is_set(container->flags, pe_rsc_failed)) {
1898         pe_fence_node(data_set, action->node, "container failed", FALSE);
1899     }
1900 
1901     crm_trace("Order %s action %s relative to %s%s for %s%s",
1902               action->task, action->uuid,
1903               is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
1904               remote_rsc->id,
1905               is_set(container->flags, pe_rsc_failed)? "failed " : "",
1906               container->id);
1907 
1908     if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
1909         || safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) {
1910         /* Migration ops map to "no_action", but we need to apply the same
1911          * ordering as for stop or demote (see get_router_node()).
1912          */
1913         task = stop_rsc;
1914     }
1915 
1916     switch (task) {
1917         case start_rsc:
1918         case action_promote:
1919             /* Force resource recovery if the container is recovered */
1920             order_start_then_action(container, action, pe_order_implies_then,
1921                                     data_set);
1922 
1923             /* Wait for the connection resource to be up too */
1924             order_start_then_action(remote_rsc, action, pe_order_none,
1925                                     data_set);
1926             break;
1927 
1928         case stop_rsc:
1929         case action_demote:
1930             if (is_set(container->flags, pe_rsc_failed)) {
1931                 /* When the container representing a guest node fails, any stop
1932                  * or demote actions for resources running on the guest node
1933                  * are implied by the container stopping. This is similar to
1934                  * how fencing operations work for cluster nodes and remote
1935                  * nodes.
1936                  */
1937             } else {
1938                 /* Ensure the operation happens before the connection is brought
1939                  * down.
1940                  *
1941                  * If we really wanted to, we could order these after the
1942                  * connection start, IFF the container's current role was
1943                  * stopped (otherwise we re-introduce an ordering loop when the
1944                  * connection is restarting).
1945                  */
1946                 order_action_then_stop(action, remote_rsc, pe_order_none,
1947                                        data_set);
1948             }
1949             break;
1950 
1951         default:
1952             /* Wait for the connection resource to be up */
1953             if (is_recurring_action(action)) {
1954                 /* In case we ever get the recovery logic wrong, force
1955                  * recurring monitors to be restarted, even if just
1956                  * the connection was re-established
1957                  */
1958                 if(task != no_action) {
1959                     order_start_then_action(remote_rsc, action,
1960                                             pe_order_implies_then, data_set);
1961                 }
1962             } else {
1963                 order_start_then_action(remote_rsc, action, pe_order_none,
1964                                         data_set);
1965             }
1966             break;
1967     }
1968 }
1969 
1970 static enum remote_connection_state
get_remote_node_state(pe_node_t * node)1971 get_remote_node_state(pe_node_t *node)
1972 {
1973     resource_t *remote_rsc = NULL;
1974     node_t *cluster_node = NULL;
1975 
1976     CRM_ASSERT(node);
1977 
1978     remote_rsc = node->details->remote_rsc;
1979     CRM_ASSERT(remote_rsc);
1980 
1981     cluster_node = pe__current_node(remote_rsc);
1982 
1983     /* If the cluster node the remote connection resource resides on
1984      * is unclean or went offline, we can't process any operations
1985      * on that remote node until after it starts elsewhere.
1986      */
1987     if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) {
1988         /* The connection resource is not going to run anywhere */
1989 
1990         if (cluster_node && cluster_node->details->unclean) {
1991             /* The remote connection is failed because its resource is on a
1992              * failed node and can't be recovered elsewhere, so we must fence.
1993              */
1994             return remote_state_failed;
1995         }
1996 
1997         if (is_not_set(remote_rsc->flags, pe_rsc_failed)) {
1998             /* Connection resource is cleanly stopped */
1999             return remote_state_stopped;
2000         }
2001 
2002         /* Connection resource is failed */
2003 
2004         if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
2005             && remote_rsc->remote_reconnect_interval
2006             && node->details->remote_was_fenced
2007             && !pe__shutdown_requested(node)) {
2008 
2009             /* We won't know whether the connection is recoverable until the
2010              * reconnect interval expires and we reattempt connection.
2011              */
2012             return remote_state_unknown;
2013         }
2014 
2015         /* The remote connection is in a failed state. If there are any
2016          * resources known to be active on it (stop) or in an unknown state
2017          * (probe), we must assume the worst and fence it.
2018          */
2019         return remote_state_failed;
2020 
2021     } else if (cluster_node == NULL) {
2022         /* Connection is recoverable but not currently running anywhere, see if we can recover it first */
2023         return remote_state_unknown;
2024 
2025     } else if(cluster_node->details->unclean == TRUE
2026               || cluster_node->details->online == FALSE) {
2027         /* Connection is running on a dead node, see if we can recover it first */
2028         return remote_state_resting;
2029 
2030     } else if (g_list_length(remote_rsc->running_on) > 1
2031                && remote_rsc->partial_migration_source
2032                && remote_rsc->partial_migration_target) {
2033         /* We're in the middle of migrating a connection resource,
2034          * wait until after the resource migrates before performing
2035          * any actions.
2036          */
2037         return remote_state_resting;
2038 
2039     }
2040     return remote_state_alive;
2041 }
2042 
2043 /*!
2044  * \internal
2045  * \brief Order actions on remote node relative to actions for the connection
2046  */
2047 static void
apply_remote_ordering(action_t * action,pe_working_set_t * data_set)2048 apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
2049 {
2050     resource_t *remote_rsc = NULL;
2051     enum action_tasks task = text2task(action->task);
2052     enum remote_connection_state state = get_remote_node_state(action->node);
2053 
2054     enum pe_ordering order_opts = pe_order_none;
2055 
2056     if (action->rsc == NULL) {
2057         return;
2058     }
2059 
2060     CRM_ASSERT(action->node);
2061     CRM_ASSERT(is_remote_node(action->node));
2062 
2063     remote_rsc = action->node->details->remote_rsc;
2064     CRM_ASSERT(remote_rsc);
2065 
2066     crm_trace("Order %s action %s relative to %s%s (state: %s)",
2067               action->task, action->uuid,
2068               is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
2069               remote_rsc->id, state2text(state));
2070 
2071     if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
2072         || safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) {
2073         /* Migration ops map to "no_action", but we need to apply the same
2074          * ordering as for stop or demote (see get_router_node()).
2075          */
2076         task = stop_rsc;
2077     }
2078 
2079     switch (task) {
2080         case start_rsc:
2081         case action_promote:
2082             order_opts = pe_order_none;
2083 
2084             if (state == remote_state_failed) {
2085                 /* Force recovery, by making this action required */
2086                 order_opts |= pe_order_implies_then;
2087             }
2088 
2089             /* Ensure connection is up before running this action */
2090             order_start_then_action(remote_rsc, action, order_opts, data_set);
2091             break;
2092 
2093         case stop_rsc:
2094             if(state == remote_state_alive) {
2095                 order_action_then_stop(action, remote_rsc,
2096                                        pe_order_implies_first, data_set);
2097 
2098             } else if(state == remote_state_failed) {
2099                 /* The resource is active on the node, but since we don't have a
2100                  * valid connection, the only way to stop the resource is by
2101                  * fencing the node. There is no need to order the stop relative
2102                  * to the remote connection, since the stop will become implied
2103                  * by the fencing.
2104                  */
2105                 pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable", FALSE);
2106 
2107             } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) {
2108                 /* State must be remote_state_unknown or remote_state_stopped.
2109                  * Since the connection is not coming back up in this
2110                  * transition, stop this resource first.
2111                  */
2112                 order_action_then_stop(action, remote_rsc,
2113                                        pe_order_implies_first, data_set);
2114 
2115             } else {
2116                 /* The connection is going to be started somewhere else, so
2117                  * stop this resource after that completes.
2118                  */
2119                 order_start_then_action(remote_rsc, action, pe_order_none, data_set);
2120             }
2121             break;
2122 
2123         case action_demote:
2124             /* Only order this demote relative to the connection start if the
2125              * connection isn't being torn down. Otherwise, the demote would be
2126              * blocked because the connection start would not be allowed.
2127              */
2128             if(state == remote_state_resting || state == remote_state_unknown) {
2129                 order_start_then_action(remote_rsc, action, pe_order_none,
2130                                         data_set);
2131             } /* Otherwise we can rely on the stop ordering */
2132             break;
2133 
2134         default:
2135             /* Wait for the connection resource to be up */
2136             if (is_recurring_action(action)) {
2137                 /* In case we ever get the recovery logic wrong, force
2138                  * recurring monitors to be restarted, even if just
2139                  * the connection was re-established
2140                  */
2141                 order_start_then_action(remote_rsc, action,
2142                                         pe_order_implies_then, data_set);
2143 
2144             } else {
2145                 node_t *cluster_node = pe__current_node(remote_rsc);
2146 
2147                 if(task == monitor_rsc && state == remote_state_failed) {
2148                     /* We would only be here if we do not know the
2149                      * state of the resource on the remote node.
2150                      * Since we have no way to find out, it is
2151                      * necessary to fence the node.
2152                      */
2153                     pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable", FALSE);
2154                 }
2155 
2156                 if(cluster_node && state == remote_state_stopped) {
2157                     /* The connection is currently up, but is going
2158                      * down permanently.
2159                      *
2160                      * Make sure we check services are actually
2161                      * stopped _before_ we let the connection get
2162                      * closed
2163                      */
2164                     order_action_then_stop(action, remote_rsc,
2165                                            pe_order_runnable_left, data_set);
2166 
2167                 } else {
2168                     order_start_then_action(remote_rsc, action, pe_order_none,
2169                                             data_set);
2170                 }
2171             }
2172             break;
2173     }
2174 }
2175 
2176 static void
apply_remote_node_ordering(pe_working_set_t * data_set)2177 apply_remote_node_ordering(pe_working_set_t *data_set)
2178 {
2179     if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
2180         return;
2181     }
2182 
2183     for (GListPtr gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2184         action_t *action = (action_t *) gIter->data;
2185         resource_t *remote = NULL;
2186 
2187         // We are only interested in resource actions
2188         if (action->rsc == NULL) {
2189             continue;
2190         }
2191 
2192         /* Special case: If we are clearing the failcount of an actual
2193          * remote connection resource, then make sure this happens before
2194          * any start of the resource in this transition.
2195          */
2196         if (action->rsc->is_remote_node &&
2197             safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) {
2198 
2199             custom_action_order(action->rsc,
2200                 NULL,
2201                 action,
2202                 action->rsc,
2203                 generate_op_key(action->rsc->id, RSC_START, 0),
2204                 NULL,
2205                 pe_order_optional,
2206                 data_set);
2207 
2208             continue;
2209         }
2210 
2211         // We are only interested in actions allocated to a node
2212         if (action->node == NULL) {
2213             continue;
2214         }
2215 
2216         if (is_remote_node(action->node) == FALSE) {
2217             continue;
2218         }
2219 
2220         /* We are only interested in real actions.
2221          *
2222          * @TODO This is probably wrong; pseudo-actions might be converted to
2223          * real actions and vice versa later in update_actions() at the end of
2224          * stage7().
2225          */
2226         if (is_set(action->flags, pe_action_pseudo)) {
2227             continue;
2228         }
2229 
2230         remote = action->node->details->remote_rsc;
2231         if (remote == NULL) {
2232             // Orphaned
2233             continue;
2234         }
2235 
2236         /* Another special case: if a resource is moving to a Pacemaker Remote
2237          * node, order the stop on the original node after any start of the
2238          * remote connection. This ensures that if the connection fails to
2239          * start, we leave the resource running on the original node.
2240          */
2241         if (safe_str_eq(action->task, RSC_START)) {
2242             for (GList *item = action->rsc->actions; item != NULL;
2243                  item = item->next) {
2244                 pe_action_t *rsc_action = item->data;
2245 
2246                 if ((rsc_action->node->details != action->node->details)
2247                     && safe_str_eq(rsc_action->task, RSC_STOP)) {
2248                     custom_action_order(remote, start_key(remote), NULL,
2249                                         action->rsc, NULL, rsc_action,
2250                                         pe_order_optional, data_set);
2251                 }
2252             }
2253         }
2254 
2255         /* The action occurs across a remote connection, so create
2256          * ordering constraints that guarantee the action occurs while the node
2257          * is active (after start, before stop ... things like that).
2258          *
2259          * This is somewhat brittle in that we need to make sure the results of
2260          * this ordering are compatible with the result of get_router_node().
2261          * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part
2262          * of this logic rather than action2xml().
2263          */
2264         if (remote->container) {
2265             crm_trace("Container ordering for %s", action->uuid);
2266             apply_container_ordering(action, data_set);
2267 
2268         } else {
2269             crm_trace("Remote ordering for %s", action->uuid);
2270             apply_remote_ordering(action, data_set);
2271         }
2272     }
2273 }
2274 
2275 static gboolean
order_first_probe_unneeded(pe_action_t * probe,pe_action_t * rh_action)2276 order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action)
2277 {
2278     /* No need to probe the resource on the node that is being
2279      * unfenced. Otherwise it might introduce transition loop
2280      * since probe will be performed after the node is
2281      * unfenced.
2282      */
2283     if (safe_str_eq(rh_action->task, CRM_OP_FENCE)
2284          && probe->node && rh_action->node
2285          && probe->node->details == rh_action->node->details) {
2286         const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action");
2287 
2288         if (safe_str_eq(op, "on")) {
2289             return TRUE;
2290         }
2291     }
2292 
2293     // Shutdown waits for probe to complete only if it's on the same node
2294     if ((safe_str_eq(rh_action->task, CRM_OP_SHUTDOWN))
2295         && probe->node && rh_action->node
2296         && probe->node->details != rh_action->node->details) {
2297         return TRUE;
2298     }
2299     return FALSE;
2300 }
2301 
2302 static void
order_first_probes_imply_stops(pe_working_set_t * data_set)2303 order_first_probes_imply_stops(pe_working_set_t * data_set)
2304 {
2305     GListPtr gIter = NULL;
2306 
2307     for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
2308         pe__ordering_t *order = gIter->data;
2309         enum pe_ordering order_type = pe_order_optional;
2310 
2311         pe_resource_t *lh_rsc = order->lh_rsc;
2312         pe_resource_t *rh_rsc = order->rh_rsc;
2313         pe_action_t *lh_action = order->lh_action;
2314         pe_action_t *rh_action = order->rh_action;
2315         const char *lh_action_task = order->lh_action_task;
2316         const char *rh_action_task = order->rh_action_task;
2317 
2318         char *key = NULL;
2319         GListPtr probes = NULL;
2320         GListPtr rh_actions = NULL;
2321 
2322         GListPtr pIter = NULL;
2323 
2324         if (lh_rsc == NULL) {
2325             continue;
2326 
2327         } else if (rh_rsc && lh_rsc == rh_rsc) {
2328             continue;
2329         }
2330 
2331         if (lh_action == NULL && lh_action_task == NULL) {
2332             continue;
2333         }
2334 
2335         if (rh_action == NULL && rh_action_task == NULL) {
2336             continue;
2337         }
2338 
2339         /* Technically probe is expected to return "not running", which could be
2340          * the alternative of stop action if the status of the resource is
2341          * unknown yet.
2342          */
2343         if (lh_action && safe_str_neq(lh_action->task, RSC_STOP)) {
2344             continue;
2345 
2346         } else if (lh_action == NULL
2347                    && lh_action_task
2348                    && crm_ends_with(lh_action_task, "_" RSC_STOP "_0") == FALSE) {
2349             continue;
2350         }
2351 
2352         /* Do not probe the resource inside of a stopping container. Otherwise
2353          * it might introduce transition loop since probe will be performed
2354          * after the container starts again.
2355          */
2356         if (rh_rsc && lh_rsc->container == rh_rsc) {
2357             if (rh_action && safe_str_eq(rh_action->task, RSC_STOP)) {
2358                 continue;
2359 
2360             } else if (rh_action == NULL && rh_action_task
2361                        && crm_ends_with(rh_action_task,"_" RSC_STOP "_0")) {
2362                 continue;
2363             }
2364         }
2365 
2366         if (order->type == pe_order_none) {
2367             continue;
2368         }
2369 
2370         // Preserve the order options for future filtering
2371         if (is_set(order->type, pe_order_apply_first_non_migratable)) {
2372             set_bit(order_type, pe_order_apply_first_non_migratable);
2373         }
2374 
2375         if (is_set(order->type, pe_order_same_node)) {
2376             set_bit(order_type, pe_order_same_node);
2377         }
2378 
2379         // Keep the order types for future filtering
2380         if (order->type == pe_order_anti_colocation
2381                    || order->type == pe_order_load) {
2382             order_type = order->type;
2383         }
2384 
2385         key = generate_op_key(lh_rsc->id, RSC_STATUS, 0);
2386         probes = find_actions(lh_rsc->actions, key, NULL);
2387         free(key);
2388 
2389         if (probes == NULL) {
2390             continue;
2391         }
2392 
2393         if (rh_action) {
2394             rh_actions = g_list_prepend(rh_actions, rh_action);
2395 
2396         } else if (rh_rsc && rh_action_task) {
2397             rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL);
2398         }
2399 
2400         if (rh_actions == NULL) {
2401             g_list_free(probes);
2402             continue;
2403         }
2404 
2405         crm_trace("Processing for LH probe based on ordering constraint %s -> %s"
2406                   " (id=%d, type=%.6x)",
2407                   lh_action ? lh_action->uuid : lh_action_task,
2408                   rh_action ? rh_action->uuid : rh_action_task,
2409                   order->id, order->type);
2410 
2411         for (pIter = probes; pIter != NULL; pIter = pIter->next) {
2412             pe_action_t *probe = (pe_action_t *) pIter->data;
2413             GListPtr rIter = NULL;
2414 
2415             for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) {
2416                 pe_action_t *rh_action_iter = (pe_action_t *) rIter->data;
2417 
2418                 if (order_first_probe_unneeded(probe, rh_action_iter)) {
2419                     continue;
2420                 }
2421                 order_actions(probe, rh_action_iter, order_type);
2422             }
2423         }
2424 
2425         g_list_free(rh_actions);
2426         g_list_free(probes);
2427     }
2428 }
2429 
2430 static void
order_first_probe_then_restart_repromote(pe_action_t * probe,pe_action_t * after,pe_working_set_t * data_set)2431 order_first_probe_then_restart_repromote(pe_action_t * probe,
2432                                          pe_action_t * after,
2433                                          pe_working_set_t * data_set)
2434 {
2435     GListPtr gIter = NULL;
2436     bool interleave = FALSE;
2437     pe_resource_t *compatible_rsc = NULL;
2438 
2439     if (probe == NULL
2440         || probe->rsc == NULL
2441         || probe->rsc->variant != pe_native) {
2442         return;
2443     }
2444 
2445     if (after == NULL
2446         // Avoid running into any possible loop
2447         || is_set(after->flags, pe_action_tracking)) {
2448         return;
2449     }
2450 
2451     if (safe_str_neq(probe->task, RSC_STATUS)) {
2452         return;
2453     }
2454 
2455     pe_set_action_bit(after, pe_action_tracking);
2456 
2457     crm_trace("Processing based on %s %s -> %s %s",
2458               probe->uuid,
2459               probe->node ? probe->node->details->uname: "",
2460               after->uuid,
2461               after->node ? after->node->details->uname : "");
2462 
2463     if (after->rsc
2464         /* Better not build a dependency directly with a clone/group.
2465          * We are going to proceed through the ordering chain and build
2466          * dependencies with its children.
2467          */
2468         && after->rsc->variant == pe_native
2469         && probe->rsc != after->rsc) {
2470 
2471             GListPtr then_actions = NULL;
2472             enum pe_ordering probe_order_type = pe_order_optional;
2473 
2474             if (safe_str_eq(after->task, RSC_START)) {
2475                 char *key = generate_op_key(after->rsc->id, RSC_STOP, 0);
2476 
2477                 then_actions = find_actions(after->rsc->actions, key, NULL);
2478                 free(key);
2479 
2480             } else if (safe_str_eq(after->task, RSC_PROMOTE)) {
2481                 char *key = generate_op_key(after->rsc->id, RSC_DEMOTE, 0);
2482 
2483                 then_actions = find_actions(after->rsc->actions, key, NULL);
2484                 free(key);
2485             }
2486 
2487             for (gIter = then_actions; gIter != NULL; gIter = gIter->next) {
2488                 pe_action_t *then = (pe_action_t *) gIter->data;
2489 
2490                 // Skip any pseudo action which for example is implied by fencing
2491                 if (is_set(then->flags, pe_action_pseudo)) {
2492                     continue;
2493                 }
2494 
2495                 order_actions(probe, then, probe_order_type);
2496             }
2497             g_list_free(then_actions);
2498     }
2499 
2500     if (after->rsc
2501         && after->rsc->variant > pe_group) {
2502         const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
2503                                                        XML_RSC_ATTR_INTERLEAVE);
2504 
2505         interleave = crm_is_true(interleave_s);
2506 
2507         if (interleave) {
2508             /* For an interleaved clone, we should build a dependency only
2509              * with the relevant clone child.
2510              */
2511             compatible_rsc = find_compatible_child(probe->rsc,
2512                                                    after->rsc,
2513                                                    RSC_ROLE_UNKNOWN,
2514                                                    FALSE);
2515         }
2516     }
2517 
2518     for (gIter = after->actions_after; gIter != NULL; gIter = gIter->next) {
2519         action_wrapper_t *after_wrapper = (action_wrapper_t *) gIter->data;
2520         /* pe_order_implies_then is the reason why a required A.start
2521          * implies/enforces B.start to be required too, which is the cause of
2522          * B.restart/re-promote.
2523          *
2524          * Not sure about pe_order_implies_then_on_node though. It's now only
2525          * used for unfencing case, which tends to introduce transition
2526          * loops...
2527          */
2528 
2529         if (is_not_set(after_wrapper->type, pe_order_implies_then)) {
2530             /* The order type between a group/clone and its child such as
2531              * B.start-> B_child.start is:
2532              * pe_order_implies_first_printed | pe_order_runnable_left
2533              *
2534              * Proceed through the ordering chain and build dependencies with
2535              * its children.
2536              */
2537             if (after->rsc == NULL
2538                 || after->rsc->variant < pe_group
2539                 || probe->rsc->parent == after->rsc
2540                 || after_wrapper->action->rsc == NULL
2541                 || after_wrapper->action->rsc->variant > pe_group
2542                 || after->rsc != after_wrapper->action->rsc->parent) {
2543                 continue;
2544             }
2545 
2546             /* Proceed to the children of a group or a non-interleaved clone.
2547              * For an interleaved clone, proceed only to the relevant child.
2548              */
2549             if (after->rsc->variant > pe_group
2550                 && interleave == TRUE
2551                 && (compatible_rsc == NULL
2552                     || compatible_rsc != after_wrapper->action->rsc)) {
2553                 continue;
2554             }
2555         }
2556 
2557         crm_trace("Proceeding through %s %s -> %s %s (type=0x%.6x)",
2558                   after->uuid,
2559                   after->node ? after->node->details->uname: "",
2560                   after_wrapper->action->uuid,
2561                   after_wrapper->action->node ? after_wrapper->action->node->details->uname : "",
2562                   after_wrapper->type);
2563 
2564         order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
2565     }
2566 }
2567 
clear_actions_tracking_flag(pe_working_set_t * data_set)2568 static void clear_actions_tracking_flag(pe_working_set_t * data_set)
2569 {
2570     GListPtr gIter = NULL;
2571 
2572     for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2573         pe_action_t *action = (pe_action_t *) gIter->data;
2574 
2575         if (is_set(action->flags, pe_action_tracking)) {
2576             pe_clear_action_bit(action, pe_action_tracking);
2577         }
2578     }
2579 }
2580 
2581 static void
order_first_rsc_probes(pe_resource_t * rsc,pe_working_set_t * data_set)2582 order_first_rsc_probes(pe_resource_t * rsc, pe_working_set_t * data_set)
2583 {
2584     GListPtr gIter = NULL;
2585     GListPtr probes = NULL;
2586     char *key = NULL;
2587 
2588     for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
2589         pe_resource_t * child = (pe_resource_t *) gIter->data;
2590 
2591         order_first_rsc_probes(child, data_set);
2592     }
2593 
2594     if (rsc->variant != pe_native) {
2595         return;
2596     }
2597 
2598     key = generate_op_key(rsc->id, RSC_STATUS, 0);
2599     probes = find_actions(rsc->actions, key, NULL);
2600     free(key);
2601 
2602     for (gIter = probes; gIter != NULL; gIter= gIter->next) {
2603         pe_action_t *probe = (pe_action_t *) gIter->data;
2604         GListPtr aIter = NULL;
2605 
2606         for (aIter = probe->actions_after; aIter != NULL; aIter = aIter->next) {
2607             action_wrapper_t *after_wrapper = (action_wrapper_t *) aIter->data;
2608 
2609             order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
2610             clear_actions_tracking_flag(data_set);
2611         }
2612     }
2613 
2614     g_list_free(probes);
2615 }
2616 
2617 static void
order_first_probes(pe_working_set_t * data_set)2618 order_first_probes(pe_working_set_t * data_set)
2619 {
2620     GListPtr gIter = NULL;
2621 
2622     for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2623         pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2624 
2625         order_first_rsc_probes(rsc, data_set);
2626     }
2627 
2628     order_first_probes_imply_stops(data_set);
2629 }
2630 
2631 static void
order_then_probes(pe_working_set_t * data_set)2632 order_then_probes(pe_working_set_t * data_set)
2633 {
2634 #if 0
2635     GListPtr gIter = NULL;
2636 
2637     for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2638         resource_t *rsc = (resource_t *) gIter->data;
2639 
2640         /* Given "A then B", we would prefer to wait for A to be
2641          * started before probing B.
2642          *
2643          * If A was a filesystem on which the binaries and data for B
2644          * lived, it would have been useful if the author of B's agent
2645          * could assume that A is running before B.monitor will be
2646          * called.
2647          *
2648          * However we can't _only_ probe once A is running, otherwise
2649          * we'd not detect the state of B if A could not be started
2650          * for some reason.
2651          *
2652          * In practice however, we cannot even do an opportunistic
2653          * version of this because B may be moving:
2654          *
2655          *   B.probe -> B.start
2656          *   B.probe -> B.stop
2657          *   B.stop -> B.start
2658          *   A.stop -> A.start
2659          *   A.start -> B.probe
2660          *
2661          * So far so good, but if we add the result of this code:
2662          *
2663          *   B.stop -> A.stop
2664          *
2665          * Then we get a loop:
2666          *
2667          *   B.probe -> B.stop -> A.stop -> A.start -> B.probe
2668          *
2669          * We could kill the 'B.probe -> B.stop' dependency, but that
2670          * could mean stopping B "too" soon, because B.start must wait
2671          * for the probes to complete.
2672          *
2673          * Another option is to allow it only if A is a non-unique
2674          * clone with clone-max == node-max (since we'll never be
2675          * moving it).  However, we could still be stopping one
2676          * instance at the same time as starting another.
2677 
2678          * The complexity of checking for allowed conditions combined
2679          * with the ever narrowing usecase suggests that this code
2680          * should remain disabled until someone gets smarter.
2681          */
2682         action_t *start = NULL;
2683         GListPtr actions = NULL;
2684         GListPtr probes = NULL;
2685         char *key = NULL;
2686 
2687         key = start_key(rsc);
2688         actions = find_actions(rsc->actions, key, NULL);
2689         free(key);
2690 
2691         if (actions) {
2692             start = actions->data;
2693             g_list_free(actions);
2694         }
2695 
2696         if(start == NULL) {
2697             crm_err("No start action for %s", rsc->id);
2698             continue;
2699         }
2700 
2701         key = generate_op_key(rsc->id, CRMD_ACTION_STATUS, 0);
2702         probes = find_actions(rsc->actions, key, NULL);
2703         free(key);
2704 
2705         for (actions = start->actions_before; actions != NULL; actions = actions->next) {
2706             action_wrapper_t *before = (action_wrapper_t *) actions->data;
2707 
2708             GListPtr pIter = NULL;
2709             action_t *first = before->action;
2710             resource_t *first_rsc = first->rsc;
2711 
2712             if(first->required_runnable_before) {
2713                 GListPtr clone_actions = NULL;
2714                 for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) {
2715                     before = (action_wrapper_t *) clone_actions->data;
2716 
2717                     crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid);
2718 
2719                     CRM_ASSERT(before->action->rsc);
2720                     first_rsc = before->action->rsc;
2721                     break;
2722                 }
2723 
2724             } else if(safe_str_neq(first->task, RSC_START)) {
2725                 crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
2726             }
2727 
2728             if(first_rsc == NULL) {
2729                 continue;
2730 
2731             } else if(uber_parent(first_rsc) == uber_parent(start->rsc)) {
2732                 crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
2733                 continue;
2734 
2735             } else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) {
2736                 crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
2737                 continue;
2738             }
2739 
2740             crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant);
2741 
2742             for (pIter = probes; pIter != NULL; pIter = pIter->next) {
2743                 action_t *probe = (action_t *) pIter->data;
2744 
2745                 crm_err("Ordering %s before %s", first->uuid, probe->uuid);
2746                 order_actions(first, probe, pe_order_optional);
2747             }
2748         }
2749     }
2750 #endif
2751 }
2752 
2753 static void
order_probes(pe_working_set_t * data_set)2754 order_probes(pe_working_set_t * data_set)
2755 {
2756     order_first_probes(data_set);
2757     order_then_probes(data_set);
2758 }
2759 
2760 gboolean
stage7(pe_working_set_t * data_set)2761 stage7(pe_working_set_t * data_set)
2762 {
2763     GListPtr gIter = NULL;
2764 
2765     crm_trace("Applying ordering constraints");
2766 
2767     /* Don't ask me why, but apparently they need to be processed in
2768      * the order they were created in... go figure
2769      *
2770      * Also g_list_append() has horrendous performance characteristics
2771      * So we need to use g_list_prepend() and then reverse the list here
2772      */
2773     data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints);
2774 
2775     for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
2776         pe__ordering_t *order = gIter->data;
2777         resource_t *rsc = order->lh_rsc;
2778 
2779         crm_trace("Applying ordering constraint: %d", order->id);
2780 
2781         if (rsc != NULL) {
2782             crm_trace("rsc_action-to-*");
2783             rsc_order_first(rsc, order, data_set);
2784             continue;
2785         }
2786 
2787         rsc = order->rh_rsc;
2788         if (rsc != NULL) {
2789             crm_trace("action-to-rsc_action");
2790             rsc_order_then(order->lh_action, rsc, order);
2791 
2792         } else {
2793             crm_trace("action-to-action");
2794             order_actions(order->lh_action, order->rh_action, order->type);
2795         }
2796     }
2797 
2798     for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2799         action_t *action = (action_t *) gIter->data;
2800 
2801         update_colo_start_chain(action);
2802     }
2803 
2804     crm_trace("Ordering probes");
2805     order_probes(data_set);
2806 
2807     crm_trace("Updating %d actions", g_list_length(data_set->actions));
2808     for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2809         action_t *action = (action_t *) gIter->data;
2810 
2811         update_action(action);
2812     }
2813 
2814     LogNodeActions(data_set, FALSE);
2815     for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2816         resource_t *rsc = (resource_t *) gIter->data;
2817 
2818         LogActions(rsc, data_set, FALSE);
2819     }
2820     return TRUE;
2821 }
2822 
2823 int transition_id = -1;
2824 
2825 /*
2826  * Create a dependency graph to send to the transitioner (via the CRMd)
2827  */
2828 gboolean
stage8(pe_working_set_t * data_set)2829 stage8(pe_working_set_t * data_set)
2830 {
2831     GListPtr gIter = NULL;
2832     const char *value = NULL;
2833 
2834     transition_id++;
2835     crm_trace("Creating transition graph %d.", transition_id);
2836 
2837     data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
2838 
2839     value = pe_pref(data_set->config_hash, "cluster-delay");
2840     crm_xml_add(data_set->graph, "cluster-delay", value);
2841 
2842     value = pe_pref(data_set->config_hash, "stonith-timeout");
2843     crm_xml_add(data_set->graph, "stonith-timeout", value);
2844 
2845     crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
2846 
2847     if (is_set(data_set->flags, pe_flag_start_failure_fatal)) {
2848         crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
2849     } else {
2850         crm_xml_add(data_set->graph, "failed-start-offset", "1");
2851     }
2852 
2853     value = pe_pref(data_set->config_hash, "batch-limit");
2854     crm_xml_add(data_set->graph, "batch-limit", value);
2855 
2856     crm_xml_add_int(data_set->graph, "transition_id", transition_id);
2857 
2858     value = pe_pref(data_set->config_hash, "migration-limit");
2859     if (crm_int_helper(value, NULL) > 0) {
2860         crm_xml_add(data_set->graph, "migration-limit", value);
2861     }
2862 
2863 /* errors...
2864    slist_iter(action, action_t, action_list, lpc,
2865    if(action->optional == FALSE && action->runnable == FALSE) {
2866    print_action("Ignoring", action, TRUE);
2867    }
2868    );
2869 */
2870 
2871     gIter = data_set->resources;
2872     for (; gIter != NULL; gIter = gIter->next) {
2873         resource_t *rsc = (resource_t *) gIter->data;
2874 
2875         pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id);
2876         rsc->cmds->expand(rsc, data_set);
2877     }
2878 
2879     crm_log_xml_trace(data_set->graph, "created resource-driven action list");
2880 
2881     /* pseudo action to distribute list of nodes with maintenance state update */
2882     add_maintenance_update(data_set);
2883 
2884     /* catch any non-resource specific actions */
2885     crm_trace("processing non-resource actions");
2886 
2887     gIter = data_set->actions;
2888     for (; gIter != NULL; gIter = gIter->next) {
2889         action_t *action = (action_t *) gIter->data;
2890 
2891         if (action->rsc
2892             && action->node
2893             && action->node->details->shutdown
2894             && is_not_set(action->rsc->flags, pe_rsc_maintenance)
2895             && is_not_set(action->flags, pe_action_optional)
2896             && is_not_set(action->flags, pe_action_runnable)
2897             && crm_str_eq(action->task, RSC_STOP, TRUE)
2898             ) {
2899             /* Eventually we should just ignore the 'fence' case
2900              * But for now it's the best way to detect (in CTS) when
2901              * CIB resource updates are being lost
2902              */
2903             if (is_set(data_set->flags, pe_flag_have_quorum)
2904                 || data_set->no_quorum_policy == no_quorum_ignore) {
2905                 crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)",
2906                          action->node->details->unclean ? "fence" : "shut down",
2907                          action->node->details->uname, action->rsc->id,
2908                          is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked",
2909                          is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : "",
2910                          action->uuid);
2911             }
2912         }
2913 
2914         graph_element_from_action(action, data_set);
2915     }
2916 
2917     crm_log_xml_trace(data_set->graph, "created generic action list");
2918     crm_trace("Created transition graph %d.", transition_id);
2919 
2920     return TRUE;
2921 }
2922 
2923 void
LogNodeActions(pe_working_set_t * data_set,gboolean terminal)2924 LogNodeActions(pe_working_set_t * data_set, gboolean terminal)
2925 {
2926     GListPtr gIter = NULL;
2927 
2928     for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2929         char *node_name = NULL;
2930         char *task = NULL;
2931         action_t *action = (action_t *) gIter->data;
2932 
2933         if (action->rsc != NULL) {
2934             continue;
2935         } else if (is_set(action->flags, pe_action_optional)) {
2936             continue;
2937         }
2938 
2939         if (is_container_remote_node(action->node)) {
2940             node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id);
2941         } else if(action->node) {
2942             node_name = crm_strdup_printf("%s", action->node->details->uname);
2943         }
2944 
2945 
2946         if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
2947             task = strdup("Shutdown");
2948         } else if (safe_str_eq(action->task, CRM_OP_FENCE)) {
2949             const char *op = g_hash_table_lookup(action->meta, "stonith_action");
2950             task = crm_strdup_printf("Fence (%s)", op);
2951         }
2952 
2953         if(task == NULL) {
2954             /* Nothing to report */
2955         } else if(terminal && action->reason) {
2956             printf(" * %s %s '%s'\n", task, node_name, action->reason);
2957         } else if(terminal) {
2958             printf(" * %s %s\n", task, node_name);
2959         } else if(action->reason) {
2960             crm_notice(" * %s %s '%s'\n", task, node_name, action->reason);
2961         } else {
2962             crm_notice(" * %s %s\n", task, node_name);
2963         }
2964 
2965         free(node_name);
2966         free(task);
2967     }
2968 }
2969