1 /*
2 * Copyright 2004-2019 Andrew Beekhof <andrew@beekhof.net>
3 *
4 * This source code is licensed under the GNU General Public License version 2
5 * or later (GPLv2+) WITHOUT ANY WARRANTY.
6 */
7
8 #include <crm_internal.h>
9
10 #include <sys/param.h>
11
12 #include <crm/crm.h>
13 #include <crm/cib.h>
14 #include <crm/msg_xml.h>
15 #include <crm/common/xml.h>
16
17 #include <glib.h>
18
19 #include <crm/pengine/status.h>
20 #include <pengine.h>
21 #include <allocate.h>
22 #include <utils.h>
23
24 CRM_TRACE_INIT_DATA(pe_allocate);
25
26 void set_alloc_actions(pe_working_set_t * data_set);
27 extern void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set);
28 extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set);
29 static void apply_remote_node_ordering(pe_working_set_t *data_set);
30 static enum remote_connection_state get_remote_node_state(pe_node_t *node);
31
32 enum remote_connection_state {
33 remote_state_unknown = 0,
34 remote_state_alive = 1,
35 remote_state_resting = 2,
36 remote_state_failed = 3,
37 remote_state_stopped = 4
38 };
39
40 static const char *
state2text(enum remote_connection_state state)41 state2text(enum remote_connection_state state)
42 {
43 switch (state) {
44 case remote_state_unknown:
45 return "unknown";
46 case remote_state_alive:
47 return "alive";
48 case remote_state_resting:
49 return "resting";
50 case remote_state_failed:
51 return "failed";
52 case remote_state_stopped:
53 return "stopped";
54 }
55
56 return "impossible";
57 }
58
59 resource_alloc_functions_t resource_class_alloc_functions[] = {
60 {
61 pcmk__native_merge_weights,
62 pcmk__native_allocate,
63 native_create_actions,
64 native_create_probe,
65 native_internal_constraints,
66 native_rsc_colocation_lh,
67 native_rsc_colocation_rh,
68 native_rsc_location,
69 native_action_flags,
70 native_update_actions,
71 native_expand,
72 native_append_meta,
73 },
74 {
75 pcmk__group_merge_weights,
76 pcmk__group_allocate,
77 group_create_actions,
78 native_create_probe,
79 group_internal_constraints,
80 group_rsc_colocation_lh,
81 group_rsc_colocation_rh,
82 group_rsc_location,
83 group_action_flags,
84 group_update_actions,
85 group_expand,
86 group_append_meta,
87 },
88 {
89 pcmk__native_merge_weights,
90 pcmk__clone_allocate,
91 clone_create_actions,
92 clone_create_probe,
93 clone_internal_constraints,
94 clone_rsc_colocation_lh,
95 clone_rsc_colocation_rh,
96 clone_rsc_location,
97 clone_action_flags,
98 container_update_actions,
99 clone_expand,
100 clone_append_meta,
101 },
102 {
103 master_merge_weights,
104 pcmk__set_instance_roles,
105 master_create_actions,
106 clone_create_probe,
107 master_internal_constraints,
108 clone_rsc_colocation_lh,
109 master_rsc_colocation_rh,
110 clone_rsc_location,
111 clone_action_flags,
112 container_update_actions,
113 clone_expand,
114 master_append_meta,
115 },
116 {
117 pcmk__native_merge_weights,
118 pcmk__bundle_allocate,
119 container_create_actions,
120 container_create_probe,
121 container_internal_constraints,
122 container_rsc_colocation_lh,
123 container_rsc_colocation_rh,
124 container_rsc_location,
125 container_action_flags,
126 container_update_actions,
127 container_expand,
128 container_append_meta,
129 }
130 };
131
132 gboolean
update_action_flags(action_t * action,enum pe_action_flags flags,const char * source,int line)133 update_action_flags(action_t * action, enum pe_action_flags flags, const char *source, int line)
134 {
135 static unsigned long calls = 0;
136 gboolean changed = FALSE;
137 gboolean clear = is_set(flags, pe_action_clear);
138 enum pe_action_flags last = action->flags;
139
140 if (clear) {
141 action->flags = crm_clear_bit(source, line, action->uuid, action->flags, flags);
142 } else {
143 action->flags = crm_set_bit(source, line, action->uuid, action->flags, flags);
144 }
145
146 if (last != action->flags) {
147 calls++;
148 changed = TRUE;
149 /* Useful for tracking down _who_ changed a specific flag */
150 /* CRM_ASSERT(calls != 534); */
151 clear_bit(flags, pe_action_clear);
152 crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu, %s)",
153 action->uuid, action->node ? action->node->details->uname : "[none]",
154 clear ? "un-" : "", flags, last, action->flags, calls, source);
155 }
156
157 return changed;
158 }
159
160 static gboolean
check_rsc_parameters(resource_t * rsc,node_t * node,xmlNode * rsc_entry,gboolean active_here,pe_working_set_t * data_set)161 check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry,
162 gboolean active_here, pe_working_set_t * data_set)
163 {
164 int attr_lpc = 0;
165 gboolean force_restart = FALSE;
166 gboolean delete_resource = FALSE;
167 gboolean changed = FALSE;
168
169 const char *value = NULL;
170 const char *old_value = NULL;
171
172 const char *attr_list[] = {
173 XML_ATTR_TYPE,
174 XML_AGENT_ATTR_CLASS,
175 XML_AGENT_ATTR_PROVIDER
176 };
177
178 for (; attr_lpc < DIMOF(attr_list); attr_lpc++) {
179 value = crm_element_value(rsc->xml, attr_list[attr_lpc]);
180 old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]);
181 if (value == old_value /* i.e. NULL */
182 || crm_str_eq(value, old_value, TRUE)) {
183 continue;
184 }
185
186 changed = TRUE;
187 trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set);
188 if (active_here) {
189 force_restart = TRUE;
190 crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s",
191 rsc->id, node->details->uname, attr_list[attr_lpc],
192 crm_str(old_value), crm_str(value));
193 }
194 }
195 if (force_restart) {
196 /* make sure the restart happens */
197 stop_action(rsc, node, FALSE);
198 set_bit(rsc->flags, pe_rsc_start_pending);
199 delete_resource = TRUE;
200
201 } else if (changed) {
202 delete_resource = TRUE;
203 }
204 return delete_resource;
205 }
206
207 static void
CancelXmlOp(resource_t * rsc,xmlNode * xml_op,node_t * active_node,const char * reason,pe_working_set_t * data_set)208 CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node,
209 const char *reason, pe_working_set_t * data_set)
210 {
211 int interval = 0;
212 action_t *cancel = NULL;
213
214 char *key = NULL;
215 const char *task = NULL;
216 const char *call_id = NULL;
217 const char *interval_s = NULL;
218
219 CRM_CHECK(xml_op != NULL, return);
220 CRM_CHECK(active_node != NULL, return);
221
222 task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
223 call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
224 interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
225
226 interval = crm_parse_int(interval_s, "0");
227
228 /* we need to reconstruct the key because of the way we used to construct resource IDs */
229 key = generate_op_key(rsc->id, task, interval);
230
231 crm_info("Action %s on %s will be stopped: %s",
232 key, active_node->details->uname, reason ? reason : "unknown");
233
234 /* TODO: This looks highly dangerous if we ever try to schedule 'key' too */
235 cancel = custom_action(rsc, strdup(key), RSC_CANCEL, active_node, FALSE, TRUE, data_set);
236
237 free(cancel->task);
238 free(cancel->cancel_task);
239 cancel->task = strdup(RSC_CANCEL);
240 cancel->cancel_task = strdup(task);
241
242 add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task);
243 add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
244 add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s);
245
246 custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set);
247 free(key);
248 key = NULL;
249 }
250
251 static gboolean
check_action_definition(resource_t * rsc,node_t * active_node,xmlNode * xml_op,pe_working_set_t * data_set)252 check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op,
253 pe_working_set_t * data_set)
254 {
255 char *key = NULL;
256 int interval = 0;
257 const char *interval_s = NULL;
258 const op_digest_cache_t *digest_data = NULL;
259 gboolean did_change = FALSE;
260
261 const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
262 const char *digest_secure = NULL;
263
264 CRM_CHECK(active_node != NULL, return FALSE);
265
266 interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
267 interval = crm_parse_int(interval_s, "0");
268
269 if (interval > 0) {
270 xmlNode *op_match = NULL;
271
272 /* we need to reconstruct the key because of the way we used to construct resource IDs */
273 key = generate_op_key(rsc->id, task, interval);
274
275 pe_rsc_trace(rsc, "Checking parameters for %s", key);
276 op_match = find_rsc_op_entry(rsc, key);
277
278 if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) {
279 CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set);
280 free(key);
281 return TRUE;
282
283 } else if (op_match == NULL) {
284 pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname);
285 free(key);
286 return TRUE;
287 }
288 free(key);
289 key = NULL;
290 }
291
292 crm_trace("Testing %s_%s_%d on %s",
293 rsc->id, task, interval, active_node->details->uname);
294 if (interval == 0 && safe_str_eq(task, RSC_STATUS)) {
295 /* Reload based on the start action not a probe */
296 task = RSC_START;
297
298 } else if (interval == 0 && safe_str_eq(task, RSC_MIGRATED)) {
299 /* Reload based on the start action not a migrate */
300 task = RSC_START;
301 } else if (interval == 0 && safe_str_eq(task, RSC_PROMOTE)) {
302 /* Reload based on the start action not a promote */
303 task = RSC_START;
304 }
305
306 digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set);
307
308 if(is_set(data_set->flags, pe_flag_sanitized)) {
309 digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
310 }
311
312 if(digest_data->rc != RSC_DIGEST_MATCH
313 && digest_secure
314 && digest_data->digest_secure_calc
315 && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) {
316 if (is_set(data_set->flags, pe_flag_stdout)) {
317 printf("Only 'private' parameters to %s_%s_%d on %s changed: %s\n",
318 rsc->id, task, interval, active_node->details->uname,
319 crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
320 }
321
322 } else if (digest_data->rc == RSC_DIGEST_RESTART) {
323 /* Changes that force a restart */
324 pe_action_t *required = NULL;
325
326 did_change = TRUE;
327 key = generate_op_key(rsc->id, task, interval);
328 crm_log_xml_info(digest_data->params_restart, "params:restart");
329 required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
330 pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL,
331 "resource definition change", pe_action_optional, TRUE);
332
333 trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set);
334
335 } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) {
336 /* Changes that can potentially be handled by a reload */
337 const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
338
339 did_change = TRUE;
340 trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set);
341 crm_log_xml_info(digest_data->params_all, "params:reload");
342 key = generate_op_key(rsc->id, task, interval);
343
344 if (interval > 0) {
345 action_t *op = NULL;
346
347 #if 0
348 /* Always reload/restart the entire resource */
349 ReloadRsc(rsc, active_node, data_set);
350 #else
351 /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */
352 op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set);
353 set_bit(op->flags, pe_action_reschedule);
354 #endif
355
356 } else if (digest_restart && rsc->isolation_wrapper == NULL && (uber_parent(rsc))->isolation_wrapper == NULL) {
357 pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id);
358
359 /* Reload this resource */
360 ReloadRsc(rsc, active_node, data_set);
361 free(key);
362
363 } else {
364 pe_action_t *required = NULL;
365 pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id);
366
367 /* Re-send the start/demote/promote op
368 * Recurring ops will be detected independently
369 */
370 required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
371 pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL,
372 "resource definition change", pe_action_optional, TRUE);
373 }
374 }
375
376 return did_change;
377 }
378
379 /*!
380 * \internal
381 * \brief Do deferred action checks after allocation
382 *
383 * \param[in] data_set Working set for cluster
384 */
385 static void
check_params(pe_resource_t * rsc,pe_node_t * node,xmlNode * rsc_op,enum pe_check_parameters check,pe_working_set_t * data_set)386 check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op,
387 enum pe_check_parameters check, pe_working_set_t *data_set)
388 {
389 const char *reason = NULL;
390 op_digest_cache_t *digest_data = NULL;
391
392 switch (check) {
393 case pe_check_active:
394 if (check_action_definition(rsc, node, rsc_op, data_set)
395 && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
396 data_set)) {
397
398 reason = "action definition changed";
399 }
400 break;
401
402 case pe_check_last_failure:
403 digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set);
404 switch (digest_data->rc) {
405 case RSC_DIGEST_UNKNOWN:
406 crm_trace("Resource %s history entry %s on %s has no digest to compare",
407 rsc->id, ID(rsc_op), node->details->id);
408 break;
409 case RSC_DIGEST_MATCH:
410 break;
411 default:
412 reason = "resource parameters have changed";
413 break;
414 }
415 break;
416 }
417
418 if (reason) {
419 pe__clear_failcount(rsc, node, reason, data_set);
420 }
421 }
422
423 static void
check_actions_for(xmlNode * rsc_entry,resource_t * rsc,node_t * node,pe_working_set_t * data_set)424 check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set)
425 {
426 GListPtr gIter = NULL;
427 int offset = -1;
428 int interval = 0;
429 int stop_index = 0;
430 int start_index = 0;
431
432 const char *task = NULL;
433 const char *interval_s = NULL;
434
435 xmlNode *rsc_op = NULL;
436 GListPtr op_list = NULL;
437 GListPtr sorted_op_list = NULL;
438
439 CRM_CHECK(node != NULL, return);
440
441 if (is_set(rsc->flags, pe_rsc_orphan)) {
442 resource_t *parent = uber_parent(rsc);
443 if(parent == NULL
444 || pe_rsc_is_clone(parent) == FALSE
445 || is_set(parent->flags, pe_rsc_unique)) {
446 pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id);
447 DeleteRsc(rsc, node, FALSE, data_set);
448 } else {
449 pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id);
450 }
451 return;
452
453 } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
454 if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) {
455 DeleteRsc(rsc, node, FALSE, data_set);
456 }
457 pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s",
458 rsc->id, node->details->uname);
459 return;
460 }
461
462 pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname);
463
464 if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) {
465 DeleteRsc(rsc, node, FALSE, data_set);
466 }
467
468 for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
469 rsc_op = __xml_next_element(rsc_op)) {
470
471 if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
472 op_list = g_list_prepend(op_list, rsc_op);
473 }
474 }
475
476 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
477 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
478
479 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
480 xmlNode *rsc_op = (xmlNode *) gIter->data;
481
482 offset++;
483
484 if (start_index < stop_index) {
485 /* stopped */
486 continue;
487 } else if (offset < start_index) {
488 /* action occurred prior to a start */
489 continue;
490 }
491
492 task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
493
494 interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL);
495 interval = crm_parse_int(interval_s, "0");
496
497 if (interval > 0 &&
498 (is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) {
499 // Maintenance mode cancels recurring operations
500 CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set);
501
502 } else if ((interval > 0)
503 || safe_str_eq(task, RSC_STATUS)
504 || safe_str_eq(task, RSC_START)
505 || safe_str_eq(task, RSC_PROMOTE)
506 || safe_str_eq(task, RSC_MIGRATED)) {
507
508 /* If a resource operation failed, and the operation's definition
509 * has changed, clear any fail count so they can be retried fresh.
510 */
511
512 if (container_fix_remote_addr(rsc)) {
513 /* We haven't allocated resources to nodes yet, so if the
514 * REMOTE_CONTAINER_HACK is used, we may calculate the digest
515 * based on the literal "#uname" value rather than the properly
516 * substituted value. That would mistakenly make the action
517 * definition appear to have been changed. Defer the check until
518 * later in this case.
519 */
520 pe__add_param_check(rsc_op, rsc, node, pe_check_active,
521 data_set);
522
523 } else if (check_action_definition(rsc, node, rsc_op, data_set)
524 && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
525 data_set)) {
526 pe__clear_failcount(rsc, node, "action definition changed",
527 data_set);
528 }
529 }
530 }
531 g_list_free(sorted_op_list);
532 }
533
534 static GListPtr
find_rsc_list(GListPtr result,resource_t * rsc,const char * id,gboolean renamed_clones,gboolean partial,pe_working_set_t * data_set)535 find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones,
536 gboolean partial, pe_working_set_t * data_set)
537 {
538 GListPtr gIter = NULL;
539 gboolean match = FALSE;
540
541 if (id == NULL) {
542 return NULL;
543 }
544
545 if (rsc == NULL) {
546 if (data_set == NULL) {
547 return NULL;
548 }
549 for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
550 pe_resource_t *child = (pe_resource_t *) gIter->data;
551
552 result = find_rsc_list(result, child, id, renamed_clones, partial,
553 NULL);
554 }
555 return result;
556 }
557
558 if (partial) {
559 if (strstr(rsc->id, id)) {
560 match = TRUE;
561
562 } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) {
563 match = TRUE;
564 }
565
566 } else {
567 if (strcmp(rsc->id, id) == 0) {
568 match = TRUE;
569
570 } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
571 match = TRUE;
572 }
573 }
574
575 if (match) {
576 result = g_list_prepend(result, rsc);
577 }
578
579 if (rsc->children) {
580 gIter = rsc->children;
581 for (; gIter != NULL; gIter = gIter->next) {
582 resource_t *child = (resource_t *) gIter->data;
583
584 result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
585 }
586 }
587
588 return result;
589 }
590
591 static void
check_actions(pe_working_set_t * data_set)592 check_actions(pe_working_set_t * data_set)
593 {
594 const char *id = NULL;
595 node_t *node = NULL;
596 xmlNode *lrm_rscs = NULL;
597 xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
598
599 xmlNode *node_state = NULL;
600
601 for (node_state = __xml_first_child_element(status); node_state != NULL;
602 node_state = __xml_next_element(node_state)) {
603 if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
604 id = crm_element_value(node_state, XML_ATTR_ID);
605 lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
606 lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE);
607
608 node = pe_find_node_id(data_set->nodes, id);
609
610 if (node == NULL) {
611 continue;
612
613 /* Still need to check actions for a maintenance node to cancel existing monitor operations */
614 } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) {
615 crm_trace("Skipping param check for %s: can't run resources",
616 node->details->uname);
617 continue;
618 }
619
620 crm_trace("Processing node %s", node->details->uname);
621 if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
622 xmlNode *rsc_entry = NULL;
623
624 for (rsc_entry = __xml_first_child_element(lrm_rscs);
625 rsc_entry != NULL;
626 rsc_entry = __xml_next_element(rsc_entry)) {
627
628 if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
629
630 if (xml_has_children(rsc_entry)) {
631 GListPtr gIter = NULL;
632 GListPtr result = NULL;
633 const char *rsc_id = ID(rsc_entry);
634
635 CRM_CHECK(rsc_id != NULL, return);
636
637 result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set);
638 for (gIter = result; gIter != NULL; gIter = gIter->next) {
639 resource_t *rsc = (resource_t *) gIter->data;
640
641 if (rsc->variant != pe_native) {
642 continue;
643 }
644 check_actions_for(rsc_entry, rsc, node, data_set);
645 }
646 g_list_free(result);
647 }
648 }
649 }
650 }
651 }
652 }
653 }
654
655 static void
apply_placement_constraints(pe_working_set_t * data_set)656 apply_placement_constraints(pe_working_set_t * data_set)
657 {
658 for (GList *gIter = data_set->placement_constraints;
659 gIter != NULL; gIter = gIter->next) {
660 pe__location_t *cons = gIter->data;
661
662 cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
663 }
664 }
665
666 static gboolean
failcount_clear_action_exists(node_t * node,resource_t * rsc)667 failcount_clear_action_exists(node_t * node, resource_t * rsc)
668 {
669 gboolean rc = FALSE;
670 char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
671 GListPtr list = find_actions_exact(rsc->actions, key, node);
672
673 if (list) {
674 rc = TRUE;
675 }
676 g_list_free(list);
677 free(key);
678
679 return rc;
680 }
681
682 /*!
683 * \internal
684 * \brief Force resource away if failures hit migration threshold
685 *
686 * \param[in,out] rsc Resource to check for failures
687 * \param[in,out] node Node to check for failures
688 * \param[in,out] data_set Cluster working set to update
689 */
690 static void
check_migration_threshold(resource_t * rsc,node_t * node,pe_working_set_t * data_set)691 check_migration_threshold(resource_t *rsc, node_t *node,
692 pe_working_set_t *data_set)
693 {
694 int fail_count, countdown;
695 resource_t *failed;
696
697 /* Migration threshold of 0 means never force away */
698 if (rsc->migration_threshold == 0) {
699 return;
700 }
701
702 // If we're ignoring failures, also ignore the migration threshold
703 if (is_set(rsc->flags, pe_rsc_failure_ignored)) {
704 return;
705 }
706
707 /* If there are no failures, there's no need to force away */
708 fail_count = pe_get_failcount(node, rsc, NULL,
709 pe_fc_effective|pe_fc_fillers, NULL,
710 data_set);
711 if (fail_count <= 0) {
712 return;
713 }
714
715 /* How many more times recovery will be tried on this node */
716 countdown = QB_MAX(rsc->migration_threshold - fail_count, 0);
717
718 /* If failed resource has a parent, we'll force the parent away */
719 failed = rsc;
720 if (is_not_set(rsc->flags, pe_rsc_unique)) {
721 failed = uber_parent(rsc);
722 }
723
724 if (countdown == 0) {
725 resource_location(failed, node, -INFINITY, "__fail_limit__", data_set);
726 crm_warn("Forcing %s away from %s after %d failures (max=%d)",
727 failed->id, node->details->uname, fail_count,
728 rsc->migration_threshold);
729 } else {
730 crm_info("%s can fail %d more times on %s before being forced off",
731 failed->id, countdown, node->details->uname);
732 }
733 }
734
735 static void
common_apply_stickiness(resource_t * rsc,node_t * node,pe_working_set_t * data_set)736 common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set)
737 {
738 if (rsc->children) {
739 GListPtr gIter = rsc->children;
740
741 for (; gIter != NULL; gIter = gIter->next) {
742 resource_t *child_rsc = (resource_t *) gIter->data;
743
744 common_apply_stickiness(child_rsc, node, data_set);
745 }
746 return;
747 }
748
749 if (is_set(rsc->flags, pe_rsc_managed)
750 && rsc->stickiness != 0 && g_list_length(rsc->running_on) == 1) {
751 node_t *current = pe_find_node_id(rsc->running_on, node->details->id);
752 node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
753
754 if (current == NULL) {
755
756 } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
757 resource_t *sticky_rsc = rsc;
758
759 resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set);
760 pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location"
761 " (node=%s, weight=%d)", sticky_rsc->id,
762 node->details->uname, rsc->stickiness);
763 } else {
764 GHashTableIter iter;
765 node_t *nIter = NULL;
766
767 pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric"
768 " and node %s is not explicitly allowed", rsc->id, node->details->uname);
769 g_hash_table_iter_init(&iter, rsc->allowed_nodes);
770 while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) {
771 crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight);
772 }
773 }
774 }
775
776 /* Check the migration threshold only if a failcount clear action
777 * has not already been placed for this resource on the node.
778 * There is no sense in potentially forcing the resource from this
779 * node if the failcount is being reset anyway.
780 *
781 * @TODO A clear_failcount operation can be scheduled in stage4() via
782 * check_actions_for(), or in stage5() via check_params(). This runs in
783 * stage2(), so it cannot detect those, meaning we might check the migration
784 * threshold when we shouldn't -- worst case, we stop or move the resource,
785 * then move it back next transition.
786 */
787 if (failcount_clear_action_exists(node, rsc) == FALSE) {
788 check_migration_threshold(rsc, node, data_set);
789 }
790 }
791
792 void
complex_set_cmds(resource_t * rsc)793 complex_set_cmds(resource_t * rsc)
794 {
795 GListPtr gIter = rsc->children;
796
797 rsc->cmds = &resource_class_alloc_functions[rsc->variant];
798
799 for (; gIter != NULL; gIter = gIter->next) {
800 resource_t *child_rsc = (resource_t *) gIter->data;
801
802 complex_set_cmds(child_rsc);
803 }
804 }
805
806 void
set_alloc_actions(pe_working_set_t * data_set)807 set_alloc_actions(pe_working_set_t * data_set)
808 {
809
810 GListPtr gIter = data_set->resources;
811
812 for (; gIter != NULL; gIter = gIter->next) {
813 resource_t *rsc = (resource_t *) gIter->data;
814
815 complex_set_cmds(rsc);
816 }
817 }
818
819 static void
calculate_system_health(gpointer gKey,gpointer gValue,gpointer user_data)820 calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data)
821 {
822 const char *key = (const char *)gKey;
823 const char *value = (const char *)gValue;
824 int *system_health = (int *)user_data;
825
826 if (!gKey || !gValue || !user_data) {
827 return;
828 }
829
830 if (crm_starts_with(key, "#health")) {
831 int score;
832
833 /* Convert the value into an integer */
834 score = char2score(value);
835
836 /* Add it to the running total */
837 *system_health = merge_weights(score, *system_health);
838 }
839 }
840
841 static gboolean
apply_system_health(pe_working_set_t * data_set)842 apply_system_health(pe_working_set_t * data_set)
843 {
844 GListPtr gIter = NULL;
845 const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy");
846 int base_health = 0;
847
848 if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) {
849 /* Prevent any accidental health -> score translation */
850 node_score_red = 0;
851 node_score_yellow = 0;
852 node_score_green = 0;
853 return TRUE;
854
855 } else if (safe_str_eq(health_strategy, "migrate-on-red")) {
856
857 /* Resources on nodes which have health values of red are
858 * weighted away from that node.
859 */
860 node_score_red = -INFINITY;
861 node_score_yellow = 0;
862 node_score_green = 0;
863
864 } else if (safe_str_eq(health_strategy, "only-green")) {
865
866 /* Resources on nodes which have health values of red or yellow
867 * are forced away from that node.
868 */
869 node_score_red = -INFINITY;
870 node_score_yellow = -INFINITY;
871 node_score_green = 0;
872
873 } else if (safe_str_eq(health_strategy, "progressive")) {
874 /* Same as the above, but use the r/y/g scores provided by the user
875 * Defaults are provided by the pe_prefs table
876 * Also, custom health "base score" can be used
877 */
878 base_health = crm_parse_int(pe_pref(data_set->config_hash, "node-health-base"), "0");
879
880 } else if (safe_str_eq(health_strategy, "custom")) {
881
882 /* Requires the admin to configure the rsc_location constaints for
883 * processing the stored health scores
884 */
885 /* TODO: Check for the existence of appropriate node health constraints */
886 return TRUE;
887
888 } else {
889 crm_err("Unknown node health strategy: %s", health_strategy);
890 return FALSE;
891 }
892
893 crm_info("Applying automated node health strategy: %s", health_strategy);
894
895 for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
896 int system_health = base_health;
897 node_t *node = (node_t *) gIter->data;
898
899 /* Search through the node hash table for system health entries. */
900 g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health);
901
902 crm_info(" Node %s has an combined system health of %d",
903 node->details->uname, system_health);
904
905 /* If the health is non-zero, then create a new rsc2node so that the
906 * weight will be added later on.
907 */
908 if (system_health != 0) {
909
910 GListPtr gIter2 = data_set->resources;
911
912 for (; gIter2 != NULL; gIter2 = gIter2->next) {
913 resource_t *rsc = (resource_t *) gIter2->data;
914
915 rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set);
916 }
917 }
918 }
919
920 return TRUE;
921 }
922
923 gboolean
stage0(pe_working_set_t * data_set)924 stage0(pe_working_set_t * data_set)
925 {
926 xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
927
928 if (data_set->input == NULL) {
929 return FALSE;
930 }
931
932 if (is_set(data_set->flags, pe_flag_have_status) == FALSE) {
933 crm_trace("Calculating status");
934 cluster_status(data_set);
935 }
936
937 set_alloc_actions(data_set);
938 apply_system_health(data_set);
939 unpack_constraints(cib_constraints, data_set);
940
941 return TRUE;
942 }
943
944 /*
945 * Check nodes for resources started outside of the LRM
946 */
947 gboolean
probe_resources(pe_working_set_t * data_set)948 probe_resources(pe_working_set_t * data_set)
949 {
950 action_t *probe_node_complete = NULL;
951
952 for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
953 node_t *node = (node_t *) gIter->data;
954 const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
955
956 if (node->details->online == FALSE) {
957
958 if (is_baremetal_remote_node(node) && node->details->remote_rsc
959 && (get_remote_node_state(node) == remote_state_failed)) {
960
961 pe_fence_node(data_set, node, "the connection is unrecoverable", FALSE);
962 }
963 continue;
964
965 } else if (node->details->unclean) {
966 continue;
967
968 } else if (node->details->rsc_discovery_enabled == FALSE) {
969 /* resource discovery is disabled for this node */
970 continue;
971 }
972
973 if (probed != NULL && crm_is_true(probed) == FALSE) {
974 action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname),
975 CRM_OP_REPROBE, node, FALSE, TRUE, data_set);
976
977 add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
978 continue;
979 }
980
981 for (GListPtr gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) {
982 resource_t *rsc = (resource_t *) gIter2->data;
983
984 rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set);
985 }
986 }
987 return TRUE;
988 }
989
990 static void
rsc_discover_filter(resource_t * rsc,node_t * node)991 rsc_discover_filter(resource_t *rsc, node_t *node)
992 {
993 GListPtr gIter = rsc->children;
994 resource_t *top = uber_parent(rsc);
995 node_t *match;
996
997 if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) {
998 return;
999 }
1000
1001 for (; gIter != NULL; gIter = gIter->next) {
1002 resource_t *child_rsc = (resource_t *) gIter->data;
1003 rsc_discover_filter(child_rsc, node);
1004 }
1005
1006 match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
1007 if (match && match->rsc_discover_mode != pe_discover_exclusive) {
1008 match->weight = -INFINITY;
1009 }
1010 }
1011
1012 /*
1013 * \internal
1014 * \brief Stage 2 of cluster status: apply node-specific criteria
1015 *
1016 * Count known nodes, and apply location constraints, stickiness, and exclusive
1017 * resource discovery.
1018 */
1019 gboolean
stage2(pe_working_set_t * data_set)1020 stage2(pe_working_set_t * data_set)
1021 {
1022 GListPtr gIter = NULL;
1023
1024 if (is_not_set(data_set->flags, pe_flag_no_compat)) {
1025 // @COMPAT API backward compatibility
1026 for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1027 pe_node_t *node = (pe_node_t *) gIter->data;
1028
1029 if (node && (node->weight >= 0) && node->details->online
1030 && (node->details->type != node_ping)) {
1031 data_set->max_valid_nodes++;
1032 }
1033 }
1034 }
1035
1036 crm_trace("Applying placement constraints");
1037 apply_placement_constraints(data_set);
1038
1039 gIter = data_set->nodes;
1040 for (; gIter != NULL; gIter = gIter->next) {
1041 GListPtr gIter2 = NULL;
1042 node_t *node = (node_t *) gIter->data;
1043
1044 gIter2 = data_set->resources;
1045 for (; gIter2 != NULL; gIter2 = gIter2->next) {
1046 resource_t *rsc = (resource_t *) gIter2->data;
1047
1048 common_apply_stickiness(rsc, node, data_set);
1049 rsc_discover_filter(rsc, node);
1050 }
1051 }
1052
1053 return TRUE;
1054 }
1055
1056 /*
1057 * Create internal resource constraints before allocation
1058 */
1059 gboolean
stage3(pe_working_set_t * data_set)1060 stage3(pe_working_set_t * data_set)
1061 {
1062
1063 GListPtr gIter = data_set->resources;
1064
1065 for (; gIter != NULL; gIter = gIter->next) {
1066 resource_t *rsc = (resource_t *) gIter->data;
1067
1068 rsc->cmds->internal_constraints(rsc, data_set);
1069 }
1070
1071 return TRUE;
1072 }
1073
1074 /*
1075 * Check for orphaned or redefined actions
1076 */
1077 gboolean
stage4(pe_working_set_t * data_set)1078 stage4(pe_working_set_t * data_set)
1079 {
1080 check_actions(data_set);
1081 return TRUE;
1082 }
1083
1084 static void *
convert_const_pointer(const void * ptr)1085 convert_const_pointer(const void *ptr)
1086 {
1087 /* Worst function ever */
1088 return (void *)ptr;
1089 }
1090
1091 static gint
sort_rsc_process_order(gconstpointer a,gconstpointer b,gpointer data)1092 sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data)
1093 {
1094 int rc = 0;
1095 int r1_weight = -INFINITY;
1096 int r2_weight = -INFINITY;
1097
1098 const char *reason = "existence";
1099
1100 const GListPtr nodes = (GListPtr) data;
1101 const resource_t *resource1 = a;
1102 const resource_t *resource2 = b;
1103
1104 node_t *r1_node = NULL;
1105 node_t *r2_node = NULL;
1106 GListPtr gIter = NULL;
1107 GHashTable *r1_nodes = NULL;
1108 GHashTable *r2_nodes = NULL;
1109
1110 if (a == NULL && b == NULL) {
1111 goto done;
1112 }
1113 if (a == NULL) {
1114 return 1;
1115 }
1116 if (b == NULL) {
1117 return -1;
1118 }
1119
1120 reason = "priority";
1121 r1_weight = resource1->priority;
1122 r2_weight = resource2->priority;
1123
1124 if (r1_weight > r2_weight) {
1125 rc = -1;
1126 goto done;
1127 }
1128
1129 if (r1_weight < r2_weight) {
1130 rc = 1;
1131 goto done;
1132 }
1133
1134 reason = "no node list";
1135 if (nodes == NULL) {
1136 goto done;
1137 }
1138
1139 r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1),
1140 resource1->id, NULL, NULL, 1,
1141 pe_weights_forward | pe_weights_init);
1142 pe__show_node_weights(true, NULL, resource1->id, r1_nodes);
1143
1144 r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2),
1145 resource2->id, NULL, NULL, 1,
1146 pe_weights_forward | pe_weights_init);
1147 pe__show_node_weights(true, NULL, resource2->id, r2_nodes);
1148
1149 /* Current location score */
1150 reason = "current location";
1151 r1_weight = -INFINITY;
1152 r2_weight = -INFINITY;
1153
1154 if (resource1->running_on) {
1155 r1_node = pe__current_node(resource1);
1156 r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id);
1157 if (r1_node != NULL) {
1158 r1_weight = r1_node->weight;
1159 }
1160 }
1161 if (resource2->running_on) {
1162 r2_node = pe__current_node(resource2);
1163 r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id);
1164 if (r2_node != NULL) {
1165 r2_weight = r2_node->weight;
1166 }
1167 }
1168
1169 if (r1_weight > r2_weight) {
1170 rc = -1;
1171 goto done;
1172 }
1173
1174 if (r1_weight < r2_weight) {
1175 rc = 1;
1176 goto done;
1177 }
1178
1179 reason = "score";
1180 for (gIter = nodes; gIter != NULL; gIter = gIter->next) {
1181 node_t *node = (node_t *) gIter->data;
1182
1183 r1_node = NULL;
1184 r2_node = NULL;
1185
1186 r1_weight = -INFINITY;
1187 if (r1_nodes) {
1188 r1_node = g_hash_table_lookup(r1_nodes, node->details->id);
1189 }
1190 if (r1_node) {
1191 r1_weight = r1_node->weight;
1192 }
1193
1194 r2_weight = -INFINITY;
1195 if (r2_nodes) {
1196 r2_node = g_hash_table_lookup(r2_nodes, node->details->id);
1197 }
1198 if (r2_node) {
1199 r2_weight = r2_node->weight;
1200 }
1201
1202 if (r1_weight > r2_weight) {
1203 rc = -1;
1204 goto done;
1205 }
1206
1207 if (r1_weight < r2_weight) {
1208 rc = 1;
1209 goto done;
1210 }
1211 }
1212
1213 done:
1214 crm_trace("%s (%d) on %s %c %s (%d) on %s: %s",
1215 resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a",
1216 rc < 0 ? '>' : rc > 0 ? '<' : '=',
1217 resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason);
1218
1219 if (r1_nodes) {
1220 g_hash_table_destroy(r1_nodes);
1221 }
1222 if (r2_nodes) {
1223 g_hash_table_destroy(r2_nodes);
1224 }
1225
1226 return rc;
1227 }
1228
1229 static void
allocate_resources(pe_working_set_t * data_set)1230 allocate_resources(pe_working_set_t * data_set)
1231 {
1232 GListPtr gIter = NULL;
1233
1234 if (is_set(data_set->flags, pe_flag_have_remote_nodes)) {
1235 /* Allocate remote connection resources first (which will also allocate
1236 * any colocation dependencies). If the connection is migrating, always
1237 * prefer the partial migration target.
1238 */
1239 for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1240 resource_t *rsc = (resource_t *) gIter->data;
1241 if (rsc->is_remote_node == FALSE) {
1242 continue;
1243 }
1244 pe_rsc_trace(rsc, "Allocating remote connection resource '%s'",
1245 rsc->id);
1246 rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set);
1247 }
1248 }
1249
1250 /* now do the rest of the resources */
1251 for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1252 resource_t *rsc = (resource_t *) gIter->data;
1253 if (rsc->is_remote_node == TRUE) {
1254 continue;
1255 }
1256 pe_rsc_trace(rsc, "Allocating resource '%s'", rsc->id);
1257 rsc->cmds->allocate(rsc, NULL, data_set);
1258 }
1259 }
1260
1261 /* We always use pe_order_preserve with these convenience functions to exempt
1262 * internally generated constraints from the prohibition of user constraints
1263 * involving remote connection resources.
1264 *
1265 * The start ordering additionally uses pe_order_runnable_left so that the
1266 * specified action is not runnable if the start is not runnable.
1267 */
1268
1269 static inline void
order_start_then_action(resource_t * lh_rsc,action_t * rh_action,enum pe_ordering extra,pe_working_set_t * data_set)1270 order_start_then_action(resource_t *lh_rsc, action_t *rh_action,
1271 enum pe_ordering extra, pe_working_set_t *data_set)
1272 {
1273 if (lh_rsc && rh_action && data_set) {
1274 custom_action_order(lh_rsc, start_key(lh_rsc), NULL,
1275 rh_action->rsc, NULL, rh_action,
1276 pe_order_preserve | pe_order_runnable_left | extra,
1277 data_set);
1278 }
1279 }
1280
1281 static inline void
order_action_then_stop(action_t * lh_action,resource_t * rh_rsc,enum pe_ordering extra,pe_working_set_t * data_set)1282 order_action_then_stop(action_t *lh_action, resource_t *rh_rsc,
1283 enum pe_ordering extra, pe_working_set_t *data_set)
1284 {
1285 if (lh_action && rh_rsc && data_set) {
1286 custom_action_order(lh_action->rsc, NULL, lh_action,
1287 rh_rsc, stop_key(rh_rsc), NULL,
1288 pe_order_preserve | extra, data_set);
1289 }
1290 }
1291
1292 static void
cleanup_orphans(resource_t * rsc,pe_working_set_t * data_set)1293 cleanup_orphans(resource_t * rsc, pe_working_set_t * data_set)
1294 {
1295 GListPtr gIter = NULL;
1296
1297 if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
1298 return;
1299 }
1300
1301 /* Don't recurse into ->children, those are just unallocated clone instances */
1302 if(is_not_set(rsc->flags, pe_rsc_orphan)) {
1303 return;
1304 }
1305
1306 for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1307 node_t *node = (node_t *) gIter->data;
1308
1309 if (node->details->online
1310 && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
1311 data_set)) {
1312
1313 pe_action_t *clear_op = NULL;
1314
1315 clear_op = pe__clear_failcount(rsc, node, "it is orphaned",
1316 data_set);
1317
1318 /* We can't use order_action_then_stop() here because its
1319 * pe_order_preserve breaks things
1320 */
1321 custom_action_order(clear_op->rsc, NULL, clear_op,
1322 rsc, stop_key(rsc), NULL,
1323 pe_order_optional, data_set);
1324 }
1325 }
1326 }
1327
1328 gboolean
stage5(pe_working_set_t * data_set)1329 stage5(pe_working_set_t * data_set)
1330 {
1331 GListPtr gIter = NULL;
1332
1333 if (safe_str_neq(data_set->placement_strategy, "default")) {
1334 GListPtr nodes = g_list_copy(data_set->nodes);
1335
1336 nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL);
1337
1338 data_set->resources =
1339 g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes);
1340
1341 g_list_free(nodes);
1342 }
1343
1344 gIter = data_set->nodes;
1345 for (; gIter != NULL; gIter = gIter->next) {
1346 node_t *node = (node_t *) gIter->data;
1347
1348 dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Original", node);
1349 }
1350
1351 crm_trace("Allocating services");
1352 /* Take (next) highest resource, assign it and create its actions */
1353
1354 allocate_resources(data_set);
1355
1356 gIter = data_set->nodes;
1357 for (; gIter != NULL; gIter = gIter->next) {
1358 node_t *node = (node_t *) gIter->data;
1359
1360 dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Remaining", node);
1361 }
1362
1363 // Process deferred action checks
1364 pe__foreach_param_check(data_set, check_params);
1365 pe__free_param_checks(data_set);
1366
1367 if (is_set(data_set->flags, pe_flag_startup_probes)) {
1368 crm_trace("Calculating needed probes");
1369 /* This code probably needs optimization
1370 * ptest -x with 100 nodes, 100 clones and clone-max=100:
1371
1372 With probes:
1373
1374 ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
1375 ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
1376 ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
1377 ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions
1378 ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
1379 ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services
1380 ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes
1381 ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions
1382 ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done
1383 ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
1384 ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
1385 36s
1386 ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
1387
1388 Without probes:
1389
1390 ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
1391 ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
1392 ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
1393 ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions
1394 ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
1395 ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services
1396 ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions
1397 ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done
1398 ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
1399 ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
1400 ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
1401 */
1402
1403 probe_resources(data_set);
1404 }
1405
1406 crm_trace("Handle orphans");
1407
1408 for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1409 resource_t *rsc = (resource_t *) gIter->data;
1410 cleanup_orphans(rsc, data_set);
1411 }
1412
1413 crm_trace("Creating actions");
1414
1415 for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1416 resource_t *rsc = (resource_t *) gIter->data;
1417
1418 rsc->cmds->create_actions(rsc, data_set);
1419 }
1420
1421 crm_trace("Creating done");
1422 return TRUE;
1423 }
1424
1425 static gboolean
is_managed(const resource_t * rsc)1426 is_managed(const resource_t * rsc)
1427 {
1428 GListPtr gIter = rsc->children;
1429
1430 if (is_set(rsc->flags, pe_rsc_managed)) {
1431 return TRUE;
1432 }
1433
1434 for (; gIter != NULL; gIter = gIter->next) {
1435 resource_t *child_rsc = (resource_t *) gIter->data;
1436
1437 if (is_managed(child_rsc)) {
1438 return TRUE;
1439 }
1440 }
1441
1442 return FALSE;
1443 }
1444
1445 static gboolean
any_managed_resources(pe_working_set_t * data_set)1446 any_managed_resources(pe_working_set_t * data_set)
1447 {
1448
1449 GListPtr gIter = data_set->resources;
1450
1451 for (; gIter != NULL; gIter = gIter->next) {
1452 resource_t *rsc = (resource_t *) gIter->data;
1453
1454 if (is_managed(rsc)) {
1455 return TRUE;
1456 }
1457 }
1458 return FALSE;
1459 }
1460
1461 /*!
1462 * \internal
1463 * \brief Create pseudo-op for guest node fence, and order relative to it
1464 *
1465 * \param[in] node Guest node to fence
1466 * \param[in] data_set Working set of CIB state
1467 */
1468 static void
fence_guest(pe_node_t * node,pe_working_set_t * data_set)1469 fence_guest(pe_node_t *node, pe_working_set_t *data_set)
1470 {
1471 resource_t *container = node->details->remote_rsc->container;
1472 pe_action_t *stop = NULL;
1473 pe_action_t *stonith_op = NULL;
1474
1475 /* The fence action is just a label; we don't do anything differently for
1476 * off vs. reboot. We specify it explicitly, rather than let it default to
1477 * cluster's default action, because we are not _initiating_ fencing -- we
1478 * are creating a pseudo-event to describe fencing that is already occurring
1479 * by other means (container recovery).
1480 */
1481 const char *fence_action = "off";
1482
1483 /* Check whether guest's container resource is has any explicit stop or
1484 * start (the stop may be implied by fencing of the guest's host).
1485 */
1486 if (container) {
1487 stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL);
1488
1489 if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) {
1490 fence_action = "reboot";
1491 }
1492 }
1493
1494 /* Create a fence pseudo-event, so we have an event to order actions
1495 * against, and crmd can always detect it.
1496 */
1497 stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", FALSE, data_set);
1498 update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable,
1499 __FUNCTION__, __LINE__);
1500
1501 /* We want to imply stops/demotes after the guest is stopped, not wait until
1502 * it is restarted, so we always order pseudo-fencing after stop, not start
1503 * (even though start might be closer to what is done for a real reboot).
1504 */
1505 if(stop && is_set(stop->flags, pe_action_pseudo)) {
1506 pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, FALSE, data_set);
1507 crm_info("Implying guest node %s is down (action %d) after %s fencing",
1508 node->details->uname, stonith_op->id, stop->node->details->uname);
1509 order_actions(parent_stonith_op, stonith_op,
1510 pe_order_runnable_left|pe_order_implies_then);
1511
1512 } else if (stop) {
1513 order_actions(stop, stonith_op,
1514 pe_order_runnable_left|pe_order_implies_then);
1515 crm_info("Implying guest node %s is down (action %d) "
1516 "after container %s is stopped (action %d)",
1517 node->details->uname, stonith_op->id,
1518 container->id, stop->id);
1519 } else {
1520 /* If we're fencing the guest node but there's no stop for the guest
1521 * resource, we must think the guest is already stopped. However, we may
1522 * think so because its resource history was just cleaned. To avoid
1523 * unnecessarily considering the guest node down if it's really up,
1524 * order the pseudo-fencing after any stop of the connection resource,
1525 * which will be ordered after any container (re-)probe.
1526 */
1527 stop = find_first_action(node->details->remote_rsc->actions, NULL,
1528 RSC_STOP, NULL);
1529
1530 if (stop) {
1531 order_actions(stop, stonith_op, pe_order_optional);
1532 crm_info("Implying guest node %s is down (action %d) "
1533 "after connection is stopped (action %d)",
1534 node->details->uname, stonith_op->id, stop->id);
1535 } else {
1536 /* Not sure why we're fencing, but everything must already be
1537 * cleanly stopped.
1538 */
1539 crm_info("Implying guest node %s is down (action %d) ",
1540 node->details->uname, stonith_op->id);
1541 }
1542 }
1543
1544 /* Order/imply other actions relative to pseudo-fence as with real fence */
1545 stonith_constraints(node, stonith_op, data_set);
1546 }
1547
1548 /*
1549 * Create dependencies for stonith and shutdown operations
1550 */
1551 gboolean
stage6(pe_working_set_t * data_set)1552 stage6(pe_working_set_t * data_set)
1553 {
1554 action_t *dc_down = NULL;
1555 action_t *stonith_op = NULL;
1556 gboolean integrity_lost = FALSE;
1557 gboolean need_stonith = TRUE;
1558 GListPtr gIter;
1559 GListPtr stonith_ops = NULL;
1560 GList *shutdown_ops = NULL;
1561
1562 /* Remote ordering constraints need to happen prior to calculate
1563 * fencing because it is one more place we will mark the node as
1564 * dirty.
1565 *
1566 * A nice side-effect of doing it first is that we can remove a
1567 * bunch of special logic from apply_*_ordering() because its
1568 * already part of pe_fence_node()
1569 */
1570 crm_trace("Creating remote ordering constraints");
1571 apply_remote_node_ordering(data_set);
1572
1573 crm_trace("Processing fencing and shutdown cases");
1574 if (any_managed_resources(data_set) == FALSE) {
1575 crm_notice("Delaying fencing operations until there are resources to manage");
1576 need_stonith = FALSE;
1577 }
1578
1579 /* Check each node for stonith/shutdown */
1580 for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1581 node_t *node = (node_t *) gIter->data;
1582
1583 /* Guest nodes are "fenced" by recovering their container resource,
1584 * so handle them separately.
1585 */
1586 if (is_container_remote_node(node)) {
1587 if (node->details->remote_requires_reset && need_stonith
1588 && pe_can_fence(data_set, node)) {
1589 fence_guest(node, data_set);
1590 }
1591 continue;
1592 }
1593
1594 stonith_op = NULL;
1595
1596 if (node->details->unclean
1597 && need_stonith && pe_can_fence(data_set, node)) {
1598
1599 stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, data_set);
1600 pe_warn("Scheduling Node %s for STONITH", node->details->uname);
1601
1602 stonith_constraints(node, stonith_op, data_set);
1603
1604 if (node->details->is_dc) {
1605 // Remember if the DC is being fenced
1606 dc_down = stonith_op;
1607
1608 } else {
1609
1610 if (is_not_set(data_set->flags, pe_flag_concurrent_fencing)
1611 && (stonith_ops != NULL)) {
1612 /* Concurrent fencing is disabled, so order each non-DC
1613 * fencing in a chain. If there is any DC fencing or
1614 * shutdown, it will be ordered after the last action in the
1615 * chain later.
1616 */
1617 order_actions((pe_action_t *) stonith_ops->data,
1618 stonith_op, pe_order_optional);
1619 }
1620
1621 // Remember all non-DC fencing actions in a separate list
1622 stonith_ops = g_list_prepend(stonith_ops, stonith_op);
1623 }
1624
1625 } else if (node->details->online && node->details->shutdown &&
1626 /* TODO define what a shutdown op means for a remote node.
1627 * For now we do not send shutdown operations for remote nodes, but
1628 * if we can come up with a good use for this in the future, we will. */
1629 is_remote_node(node) == FALSE) {
1630
1631 action_t *down_op = sched_shutdown_op(node, data_set);
1632
1633 if (node->details->is_dc) {
1634 // Remember if the DC is being shut down
1635 dc_down = down_op;
1636 } else {
1637 // Remember non-DC shutdowns for later ordering
1638 shutdown_ops = g_list_prepend(shutdown_ops, down_op);
1639 }
1640 }
1641
1642 if (node->details->unclean && stonith_op == NULL) {
1643 integrity_lost = TRUE;
1644 pe_warn("Node %s is unclean!", node->details->uname);
1645 }
1646 }
1647
1648 if (integrity_lost) {
1649 if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
1650 pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED");
1651 pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE");
1652
1653 } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) {
1654 crm_notice("Cannot fence unclean nodes until quorum is"
1655 " attained (or no-quorum-policy is set to ignore)");
1656 }
1657 }
1658
1659 if (dc_down != NULL) {
1660 /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
1661 * DC elections. However, we don't want to order non-DC shutdowns before
1662 * a DC *fencing*, because even though we don't want a node that's
1663 * shutting down to become DC, the DC fencing could be ordered before a
1664 * clone stop that's also ordered before the shutdowns, thus leading to
1665 * a graph loop.
1666 */
1667 if (safe_str_eq(dc_down->task, CRM_OP_SHUTDOWN)) {
1668 for (gIter = shutdown_ops; gIter != NULL; gIter = gIter->next) {
1669 action_t *node_stop = (action_t *) gIter->data;
1670
1671 crm_debug("Ordering shutdown on %s before %s on DC %s",
1672 node_stop->node->details->uname,
1673 dc_down->task, dc_down->node->details->uname);
1674
1675 order_actions(node_stop, dc_down, pe_order_optional);
1676 }
1677 }
1678
1679 // Order any non-DC fencing before any DC fencing or shutdown
1680
1681 if (is_set(data_set->flags, pe_flag_concurrent_fencing)) {
1682 /* With concurrent fencing, order each non-DC fencing action
1683 * separately before any DC fencing or shutdown.
1684 */
1685 for (gIter = stonith_ops; gIter != NULL; gIter = gIter->next) {
1686 order_actions((pe_action_t *) gIter->data, dc_down,
1687 pe_order_optional);
1688 }
1689 } else if (stonith_ops) {
1690 /* Without concurrent fencing, the non-DC fencing actions are
1691 * already ordered relative to each other, so we just need to order
1692 * the DC fencing after the last action in the chain (which is the
1693 * first item in the list).
1694 */
1695 order_actions((pe_action_t *) stonith_ops->data, dc_down,
1696 pe_order_optional);
1697 }
1698 }
1699 g_list_free(stonith_ops);
1700 g_list_free(shutdown_ops);
1701 return TRUE;
1702 }
1703
1704 /*
1705 * Determine the sets of independent actions and the correct order for the
1706 * actions in each set.
1707 *
1708 * Mark dependencies of un-runnable actions un-runnable
1709 *
1710 */
1711 static GListPtr
find_actions_by_task(GListPtr actions,resource_t * rsc,const char * original_key)1712 find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key)
1713 {
1714 GListPtr list = NULL;
1715
1716 list = find_actions(actions, original_key, NULL);
1717 if (list == NULL) {
1718 /* we're potentially searching a child of the original resource */
1719 char *key = NULL;
1720 char *task = NULL;
1721 int interval = 0;
1722
1723 if (parse_op_key(original_key, NULL, &task, &interval)) {
1724 key = generate_op_key(rsc->id, task, interval);
1725 list = find_actions(actions, key, NULL);
1726
1727 } else {
1728 crm_err("search key: %s", original_key);
1729 }
1730
1731 free(key);
1732 free(task);
1733 }
1734
1735 return list;
1736 }
1737
1738 static void
rsc_order_then(pe_action_t * lh_action,pe_resource_t * rsc,pe__ordering_t * order)1739 rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc,
1740 pe__ordering_t *order)
1741 {
1742 GListPtr gIter = NULL;
1743 GListPtr rh_actions = NULL;
1744 action_t *rh_action = NULL;
1745 enum pe_ordering type;
1746
1747 CRM_CHECK(rsc != NULL, return);
1748 CRM_CHECK(order != NULL, return);
1749
1750 type = order->type;
1751 rh_action = order->rh_action;
1752 crm_trace("Processing RH of ordering constraint %d", order->id);
1753
1754 if (rh_action != NULL) {
1755 rh_actions = g_list_prepend(NULL, rh_action);
1756
1757 } else if (rsc != NULL) {
1758 rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task);
1759 }
1760
1761 if (rh_actions == NULL) {
1762 pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..."
1763 " ignoring", rsc->id, order->rh_action_task);
1764 if (lh_action) {
1765 pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid);
1766 }
1767 return;
1768 }
1769
1770 if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) {
1771 pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid,
1772 order->rh_action_task);
1773 clear_bit(type, pe_order_implies_then);
1774 }
1775
1776 gIter = rh_actions;
1777 for (; gIter != NULL; gIter = gIter->next) {
1778 action_t *rh_action_iter = (action_t *) gIter->data;
1779
1780 if (lh_action) {
1781 order_actions(lh_action, rh_action_iter, type);
1782
1783 } else if (type & pe_order_implies_then) {
1784 update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
1785 crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type);
1786 } else {
1787 crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type);
1788 }
1789 }
1790
1791 g_list_free(rh_actions);
1792 }
1793
1794 static void
rsc_order_first(pe_resource_t * lh_rsc,pe__ordering_t * order,pe_working_set_t * data_set)1795 rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order,
1796 pe_working_set_t *data_set)
1797 {
1798 GListPtr gIter = NULL;
1799 GListPtr lh_actions = NULL;
1800 action_t *lh_action = order->lh_action;
1801 resource_t *rh_rsc = order->rh_rsc;
1802
1803 crm_trace("Processing LH of ordering constraint %d", order->id);
1804 CRM_ASSERT(lh_rsc != NULL);
1805
1806 if (lh_action != NULL) {
1807 lh_actions = g_list_prepend(NULL, lh_action);
1808
1809 } else if (lh_action == NULL) {
1810 lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task);
1811 }
1812
1813 if (lh_actions == NULL && lh_rsc != rh_rsc) {
1814 char *key = NULL;
1815 char *op_type = NULL;
1816 int interval = 0;
1817
1818 parse_op_key(order->lh_action_task, NULL, &op_type, &interval);
1819 key = generate_op_key(lh_rsc->id, op_type, interval);
1820
1821 if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) {
1822 free(key);
1823 pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
1824 lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1825
1826 } else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && safe_str_eq(op_type, RSC_DEMOTE)) {
1827 free(key);
1828 pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
1829 lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1830
1831 } else {
1832 pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating",
1833 lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1834 lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set);
1835 lh_actions = g_list_prepend(NULL, lh_action);
1836 }
1837
1838 free(op_type);
1839 }
1840
1841 gIter = lh_actions;
1842 for (; gIter != NULL; gIter = gIter->next) {
1843 action_t *lh_action_iter = (action_t *) gIter->data;
1844
1845 if (rh_rsc == NULL && order->rh_action) {
1846 rh_rsc = order->rh_action->rsc;
1847 }
1848 if (rh_rsc) {
1849 rsc_order_then(lh_action_iter, rh_rsc, order);
1850
1851 } else if (order->rh_action) {
1852 order_actions(lh_action_iter, order->rh_action, order->type);
1853 }
1854 }
1855
1856 g_list_free(lh_actions);
1857 }
1858
1859 extern gboolean update_action(action_t * action);
1860 extern void update_colo_start_chain(action_t * action);
1861
1862 static int
is_recurring_action(action_t * action)1863 is_recurring_action(action_t *action)
1864 {
1865 const char *interval_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL);
1866 int interval = crm_parse_int(interval_s, "0");
1867 if(interval > 0) {
1868 return TRUE;
1869 }
1870 return FALSE;
1871 }
1872
1873 static void
apply_container_ordering(action_t * action,pe_working_set_t * data_set)1874 apply_container_ordering(action_t *action, pe_working_set_t *data_set)
1875 {
1876 /* VMs are also classified as containers for these purposes... in
1877 * that they both involve a 'thing' running on a real or remote
1878 * cluster node.
1879 *
1880 * This allows us to be smarter about the type and extent of
1881 * recovery actions required in various scenarios
1882 */
1883 resource_t *remote_rsc = NULL;
1884 resource_t *container = NULL;
1885 enum action_tasks task = text2task(action->task);
1886
1887 CRM_ASSERT(action->rsc);
1888 CRM_ASSERT(action->node);
1889 CRM_ASSERT(is_remote_node(action->node));
1890
1891 remote_rsc = action->node->details->remote_rsc;
1892 CRM_ASSERT(remote_rsc);
1893
1894 container = remote_rsc->container;
1895 CRM_ASSERT(container);
1896
1897 if(is_set(container->flags, pe_rsc_failed)) {
1898 pe_fence_node(data_set, action->node, "container failed", FALSE);
1899 }
1900
1901 crm_trace("Order %s action %s relative to %s%s for %s%s",
1902 action->task, action->uuid,
1903 is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
1904 remote_rsc->id,
1905 is_set(container->flags, pe_rsc_failed)? "failed " : "",
1906 container->id);
1907
1908 if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
1909 || safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) {
1910 /* Migration ops map to "no_action", but we need to apply the same
1911 * ordering as for stop or demote (see get_router_node()).
1912 */
1913 task = stop_rsc;
1914 }
1915
1916 switch (task) {
1917 case start_rsc:
1918 case action_promote:
1919 /* Force resource recovery if the container is recovered */
1920 order_start_then_action(container, action, pe_order_implies_then,
1921 data_set);
1922
1923 /* Wait for the connection resource to be up too */
1924 order_start_then_action(remote_rsc, action, pe_order_none,
1925 data_set);
1926 break;
1927
1928 case stop_rsc:
1929 case action_demote:
1930 if (is_set(container->flags, pe_rsc_failed)) {
1931 /* When the container representing a guest node fails, any stop
1932 * or demote actions for resources running on the guest node
1933 * are implied by the container stopping. This is similar to
1934 * how fencing operations work for cluster nodes and remote
1935 * nodes.
1936 */
1937 } else {
1938 /* Ensure the operation happens before the connection is brought
1939 * down.
1940 *
1941 * If we really wanted to, we could order these after the
1942 * connection start, IFF the container's current role was
1943 * stopped (otherwise we re-introduce an ordering loop when the
1944 * connection is restarting).
1945 */
1946 order_action_then_stop(action, remote_rsc, pe_order_none,
1947 data_set);
1948 }
1949 break;
1950
1951 default:
1952 /* Wait for the connection resource to be up */
1953 if (is_recurring_action(action)) {
1954 /* In case we ever get the recovery logic wrong, force
1955 * recurring monitors to be restarted, even if just
1956 * the connection was re-established
1957 */
1958 if(task != no_action) {
1959 order_start_then_action(remote_rsc, action,
1960 pe_order_implies_then, data_set);
1961 }
1962 } else {
1963 order_start_then_action(remote_rsc, action, pe_order_none,
1964 data_set);
1965 }
1966 break;
1967 }
1968 }
1969
1970 static enum remote_connection_state
get_remote_node_state(pe_node_t * node)1971 get_remote_node_state(pe_node_t *node)
1972 {
1973 resource_t *remote_rsc = NULL;
1974 node_t *cluster_node = NULL;
1975
1976 CRM_ASSERT(node);
1977
1978 remote_rsc = node->details->remote_rsc;
1979 CRM_ASSERT(remote_rsc);
1980
1981 cluster_node = pe__current_node(remote_rsc);
1982
1983 /* If the cluster node the remote connection resource resides on
1984 * is unclean or went offline, we can't process any operations
1985 * on that remote node until after it starts elsewhere.
1986 */
1987 if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) {
1988 /* The connection resource is not going to run anywhere */
1989
1990 if (cluster_node && cluster_node->details->unclean) {
1991 /* The remote connection is failed because its resource is on a
1992 * failed node and can't be recovered elsewhere, so we must fence.
1993 */
1994 return remote_state_failed;
1995 }
1996
1997 if (is_not_set(remote_rsc->flags, pe_rsc_failed)) {
1998 /* Connection resource is cleanly stopped */
1999 return remote_state_stopped;
2000 }
2001
2002 /* Connection resource is failed */
2003
2004 if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
2005 && remote_rsc->remote_reconnect_interval
2006 && node->details->remote_was_fenced
2007 && !pe__shutdown_requested(node)) {
2008
2009 /* We won't know whether the connection is recoverable until the
2010 * reconnect interval expires and we reattempt connection.
2011 */
2012 return remote_state_unknown;
2013 }
2014
2015 /* The remote connection is in a failed state. If there are any
2016 * resources known to be active on it (stop) or in an unknown state
2017 * (probe), we must assume the worst and fence it.
2018 */
2019 return remote_state_failed;
2020
2021 } else if (cluster_node == NULL) {
2022 /* Connection is recoverable but not currently running anywhere, see if we can recover it first */
2023 return remote_state_unknown;
2024
2025 } else if(cluster_node->details->unclean == TRUE
2026 || cluster_node->details->online == FALSE) {
2027 /* Connection is running on a dead node, see if we can recover it first */
2028 return remote_state_resting;
2029
2030 } else if (g_list_length(remote_rsc->running_on) > 1
2031 && remote_rsc->partial_migration_source
2032 && remote_rsc->partial_migration_target) {
2033 /* We're in the middle of migrating a connection resource,
2034 * wait until after the resource migrates before performing
2035 * any actions.
2036 */
2037 return remote_state_resting;
2038
2039 }
2040 return remote_state_alive;
2041 }
2042
2043 /*!
2044 * \internal
2045 * \brief Order actions on remote node relative to actions for the connection
2046 */
2047 static void
apply_remote_ordering(action_t * action,pe_working_set_t * data_set)2048 apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
2049 {
2050 resource_t *remote_rsc = NULL;
2051 enum action_tasks task = text2task(action->task);
2052 enum remote_connection_state state = get_remote_node_state(action->node);
2053
2054 enum pe_ordering order_opts = pe_order_none;
2055
2056 if (action->rsc == NULL) {
2057 return;
2058 }
2059
2060 CRM_ASSERT(action->node);
2061 CRM_ASSERT(is_remote_node(action->node));
2062
2063 remote_rsc = action->node->details->remote_rsc;
2064 CRM_ASSERT(remote_rsc);
2065
2066 crm_trace("Order %s action %s relative to %s%s (state: %s)",
2067 action->task, action->uuid,
2068 is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
2069 remote_rsc->id, state2text(state));
2070
2071 if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
2072 || safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) {
2073 /* Migration ops map to "no_action", but we need to apply the same
2074 * ordering as for stop or demote (see get_router_node()).
2075 */
2076 task = stop_rsc;
2077 }
2078
2079 switch (task) {
2080 case start_rsc:
2081 case action_promote:
2082 order_opts = pe_order_none;
2083
2084 if (state == remote_state_failed) {
2085 /* Force recovery, by making this action required */
2086 order_opts |= pe_order_implies_then;
2087 }
2088
2089 /* Ensure connection is up before running this action */
2090 order_start_then_action(remote_rsc, action, order_opts, data_set);
2091 break;
2092
2093 case stop_rsc:
2094 if(state == remote_state_alive) {
2095 order_action_then_stop(action, remote_rsc,
2096 pe_order_implies_first, data_set);
2097
2098 } else if(state == remote_state_failed) {
2099 /* The resource is active on the node, but since we don't have a
2100 * valid connection, the only way to stop the resource is by
2101 * fencing the node. There is no need to order the stop relative
2102 * to the remote connection, since the stop will become implied
2103 * by the fencing.
2104 */
2105 pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable", FALSE);
2106
2107 } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) {
2108 /* State must be remote_state_unknown or remote_state_stopped.
2109 * Since the connection is not coming back up in this
2110 * transition, stop this resource first.
2111 */
2112 order_action_then_stop(action, remote_rsc,
2113 pe_order_implies_first, data_set);
2114
2115 } else {
2116 /* The connection is going to be started somewhere else, so
2117 * stop this resource after that completes.
2118 */
2119 order_start_then_action(remote_rsc, action, pe_order_none, data_set);
2120 }
2121 break;
2122
2123 case action_demote:
2124 /* Only order this demote relative to the connection start if the
2125 * connection isn't being torn down. Otherwise, the demote would be
2126 * blocked because the connection start would not be allowed.
2127 */
2128 if(state == remote_state_resting || state == remote_state_unknown) {
2129 order_start_then_action(remote_rsc, action, pe_order_none,
2130 data_set);
2131 } /* Otherwise we can rely on the stop ordering */
2132 break;
2133
2134 default:
2135 /* Wait for the connection resource to be up */
2136 if (is_recurring_action(action)) {
2137 /* In case we ever get the recovery logic wrong, force
2138 * recurring monitors to be restarted, even if just
2139 * the connection was re-established
2140 */
2141 order_start_then_action(remote_rsc, action,
2142 pe_order_implies_then, data_set);
2143
2144 } else {
2145 node_t *cluster_node = pe__current_node(remote_rsc);
2146
2147 if(task == monitor_rsc && state == remote_state_failed) {
2148 /* We would only be here if we do not know the
2149 * state of the resource on the remote node.
2150 * Since we have no way to find out, it is
2151 * necessary to fence the node.
2152 */
2153 pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable", FALSE);
2154 }
2155
2156 if(cluster_node && state == remote_state_stopped) {
2157 /* The connection is currently up, but is going
2158 * down permanently.
2159 *
2160 * Make sure we check services are actually
2161 * stopped _before_ we let the connection get
2162 * closed
2163 */
2164 order_action_then_stop(action, remote_rsc,
2165 pe_order_runnable_left, data_set);
2166
2167 } else {
2168 order_start_then_action(remote_rsc, action, pe_order_none,
2169 data_set);
2170 }
2171 }
2172 break;
2173 }
2174 }
2175
2176 static void
apply_remote_node_ordering(pe_working_set_t * data_set)2177 apply_remote_node_ordering(pe_working_set_t *data_set)
2178 {
2179 if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
2180 return;
2181 }
2182
2183 for (GListPtr gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2184 action_t *action = (action_t *) gIter->data;
2185 resource_t *remote = NULL;
2186
2187 // We are only interested in resource actions
2188 if (action->rsc == NULL) {
2189 continue;
2190 }
2191
2192 /* Special case: If we are clearing the failcount of an actual
2193 * remote connection resource, then make sure this happens before
2194 * any start of the resource in this transition.
2195 */
2196 if (action->rsc->is_remote_node &&
2197 safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) {
2198
2199 custom_action_order(action->rsc,
2200 NULL,
2201 action,
2202 action->rsc,
2203 generate_op_key(action->rsc->id, RSC_START, 0),
2204 NULL,
2205 pe_order_optional,
2206 data_set);
2207
2208 continue;
2209 }
2210
2211 // We are only interested in actions allocated to a node
2212 if (action->node == NULL) {
2213 continue;
2214 }
2215
2216 if (is_remote_node(action->node) == FALSE) {
2217 continue;
2218 }
2219
2220 /* We are only interested in real actions.
2221 *
2222 * @TODO This is probably wrong; pseudo-actions might be converted to
2223 * real actions and vice versa later in update_actions() at the end of
2224 * stage7().
2225 */
2226 if (is_set(action->flags, pe_action_pseudo)) {
2227 continue;
2228 }
2229
2230 remote = action->node->details->remote_rsc;
2231 if (remote == NULL) {
2232 // Orphaned
2233 continue;
2234 }
2235
2236 /* Another special case: if a resource is moving to a Pacemaker Remote
2237 * node, order the stop on the original node after any start of the
2238 * remote connection. This ensures that if the connection fails to
2239 * start, we leave the resource running on the original node.
2240 */
2241 if (safe_str_eq(action->task, RSC_START)) {
2242 for (GList *item = action->rsc->actions; item != NULL;
2243 item = item->next) {
2244 pe_action_t *rsc_action = item->data;
2245
2246 if ((rsc_action->node->details != action->node->details)
2247 && safe_str_eq(rsc_action->task, RSC_STOP)) {
2248 custom_action_order(remote, start_key(remote), NULL,
2249 action->rsc, NULL, rsc_action,
2250 pe_order_optional, data_set);
2251 }
2252 }
2253 }
2254
2255 /* The action occurs across a remote connection, so create
2256 * ordering constraints that guarantee the action occurs while the node
2257 * is active (after start, before stop ... things like that).
2258 *
2259 * This is somewhat brittle in that we need to make sure the results of
2260 * this ordering are compatible with the result of get_router_node().
2261 * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part
2262 * of this logic rather than action2xml().
2263 */
2264 if (remote->container) {
2265 crm_trace("Container ordering for %s", action->uuid);
2266 apply_container_ordering(action, data_set);
2267
2268 } else {
2269 crm_trace("Remote ordering for %s", action->uuid);
2270 apply_remote_ordering(action, data_set);
2271 }
2272 }
2273 }
2274
2275 static gboolean
order_first_probe_unneeded(pe_action_t * probe,pe_action_t * rh_action)2276 order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action)
2277 {
2278 /* No need to probe the resource on the node that is being
2279 * unfenced. Otherwise it might introduce transition loop
2280 * since probe will be performed after the node is
2281 * unfenced.
2282 */
2283 if (safe_str_eq(rh_action->task, CRM_OP_FENCE)
2284 && probe->node && rh_action->node
2285 && probe->node->details == rh_action->node->details) {
2286 const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action");
2287
2288 if (safe_str_eq(op, "on")) {
2289 return TRUE;
2290 }
2291 }
2292
2293 // Shutdown waits for probe to complete only if it's on the same node
2294 if ((safe_str_eq(rh_action->task, CRM_OP_SHUTDOWN))
2295 && probe->node && rh_action->node
2296 && probe->node->details != rh_action->node->details) {
2297 return TRUE;
2298 }
2299 return FALSE;
2300 }
2301
2302 static void
order_first_probes_imply_stops(pe_working_set_t * data_set)2303 order_first_probes_imply_stops(pe_working_set_t * data_set)
2304 {
2305 GListPtr gIter = NULL;
2306
2307 for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
2308 pe__ordering_t *order = gIter->data;
2309 enum pe_ordering order_type = pe_order_optional;
2310
2311 pe_resource_t *lh_rsc = order->lh_rsc;
2312 pe_resource_t *rh_rsc = order->rh_rsc;
2313 pe_action_t *lh_action = order->lh_action;
2314 pe_action_t *rh_action = order->rh_action;
2315 const char *lh_action_task = order->lh_action_task;
2316 const char *rh_action_task = order->rh_action_task;
2317
2318 char *key = NULL;
2319 GListPtr probes = NULL;
2320 GListPtr rh_actions = NULL;
2321
2322 GListPtr pIter = NULL;
2323
2324 if (lh_rsc == NULL) {
2325 continue;
2326
2327 } else if (rh_rsc && lh_rsc == rh_rsc) {
2328 continue;
2329 }
2330
2331 if (lh_action == NULL && lh_action_task == NULL) {
2332 continue;
2333 }
2334
2335 if (rh_action == NULL && rh_action_task == NULL) {
2336 continue;
2337 }
2338
2339 /* Technically probe is expected to return "not running", which could be
2340 * the alternative of stop action if the status of the resource is
2341 * unknown yet.
2342 */
2343 if (lh_action && safe_str_neq(lh_action->task, RSC_STOP)) {
2344 continue;
2345
2346 } else if (lh_action == NULL
2347 && lh_action_task
2348 && crm_ends_with(lh_action_task, "_" RSC_STOP "_0") == FALSE) {
2349 continue;
2350 }
2351
2352 /* Do not probe the resource inside of a stopping container. Otherwise
2353 * it might introduce transition loop since probe will be performed
2354 * after the container starts again.
2355 */
2356 if (rh_rsc && lh_rsc->container == rh_rsc) {
2357 if (rh_action && safe_str_eq(rh_action->task, RSC_STOP)) {
2358 continue;
2359
2360 } else if (rh_action == NULL && rh_action_task
2361 && crm_ends_with(rh_action_task,"_" RSC_STOP "_0")) {
2362 continue;
2363 }
2364 }
2365
2366 if (order->type == pe_order_none) {
2367 continue;
2368 }
2369
2370 // Preserve the order options for future filtering
2371 if (is_set(order->type, pe_order_apply_first_non_migratable)) {
2372 set_bit(order_type, pe_order_apply_first_non_migratable);
2373 }
2374
2375 if (is_set(order->type, pe_order_same_node)) {
2376 set_bit(order_type, pe_order_same_node);
2377 }
2378
2379 // Keep the order types for future filtering
2380 if (order->type == pe_order_anti_colocation
2381 || order->type == pe_order_load) {
2382 order_type = order->type;
2383 }
2384
2385 key = generate_op_key(lh_rsc->id, RSC_STATUS, 0);
2386 probes = find_actions(lh_rsc->actions, key, NULL);
2387 free(key);
2388
2389 if (probes == NULL) {
2390 continue;
2391 }
2392
2393 if (rh_action) {
2394 rh_actions = g_list_prepend(rh_actions, rh_action);
2395
2396 } else if (rh_rsc && rh_action_task) {
2397 rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL);
2398 }
2399
2400 if (rh_actions == NULL) {
2401 g_list_free(probes);
2402 continue;
2403 }
2404
2405 crm_trace("Processing for LH probe based on ordering constraint %s -> %s"
2406 " (id=%d, type=%.6x)",
2407 lh_action ? lh_action->uuid : lh_action_task,
2408 rh_action ? rh_action->uuid : rh_action_task,
2409 order->id, order->type);
2410
2411 for (pIter = probes; pIter != NULL; pIter = pIter->next) {
2412 pe_action_t *probe = (pe_action_t *) pIter->data;
2413 GListPtr rIter = NULL;
2414
2415 for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) {
2416 pe_action_t *rh_action_iter = (pe_action_t *) rIter->data;
2417
2418 if (order_first_probe_unneeded(probe, rh_action_iter)) {
2419 continue;
2420 }
2421 order_actions(probe, rh_action_iter, order_type);
2422 }
2423 }
2424
2425 g_list_free(rh_actions);
2426 g_list_free(probes);
2427 }
2428 }
2429
2430 static void
order_first_probe_then_restart_repromote(pe_action_t * probe,pe_action_t * after,pe_working_set_t * data_set)2431 order_first_probe_then_restart_repromote(pe_action_t * probe,
2432 pe_action_t * after,
2433 pe_working_set_t * data_set)
2434 {
2435 GListPtr gIter = NULL;
2436 bool interleave = FALSE;
2437 pe_resource_t *compatible_rsc = NULL;
2438
2439 if (probe == NULL
2440 || probe->rsc == NULL
2441 || probe->rsc->variant != pe_native) {
2442 return;
2443 }
2444
2445 if (after == NULL
2446 // Avoid running into any possible loop
2447 || is_set(after->flags, pe_action_tracking)) {
2448 return;
2449 }
2450
2451 if (safe_str_neq(probe->task, RSC_STATUS)) {
2452 return;
2453 }
2454
2455 pe_set_action_bit(after, pe_action_tracking);
2456
2457 crm_trace("Processing based on %s %s -> %s %s",
2458 probe->uuid,
2459 probe->node ? probe->node->details->uname: "",
2460 after->uuid,
2461 after->node ? after->node->details->uname : "");
2462
2463 if (after->rsc
2464 /* Better not build a dependency directly with a clone/group.
2465 * We are going to proceed through the ordering chain and build
2466 * dependencies with its children.
2467 */
2468 && after->rsc->variant == pe_native
2469 && probe->rsc != after->rsc) {
2470
2471 GListPtr then_actions = NULL;
2472 enum pe_ordering probe_order_type = pe_order_optional;
2473
2474 if (safe_str_eq(after->task, RSC_START)) {
2475 char *key = generate_op_key(after->rsc->id, RSC_STOP, 0);
2476
2477 then_actions = find_actions(after->rsc->actions, key, NULL);
2478 free(key);
2479
2480 } else if (safe_str_eq(after->task, RSC_PROMOTE)) {
2481 char *key = generate_op_key(after->rsc->id, RSC_DEMOTE, 0);
2482
2483 then_actions = find_actions(after->rsc->actions, key, NULL);
2484 free(key);
2485 }
2486
2487 for (gIter = then_actions; gIter != NULL; gIter = gIter->next) {
2488 pe_action_t *then = (pe_action_t *) gIter->data;
2489
2490 // Skip any pseudo action which for example is implied by fencing
2491 if (is_set(then->flags, pe_action_pseudo)) {
2492 continue;
2493 }
2494
2495 order_actions(probe, then, probe_order_type);
2496 }
2497 g_list_free(then_actions);
2498 }
2499
2500 if (after->rsc
2501 && after->rsc->variant > pe_group) {
2502 const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
2503 XML_RSC_ATTR_INTERLEAVE);
2504
2505 interleave = crm_is_true(interleave_s);
2506
2507 if (interleave) {
2508 /* For an interleaved clone, we should build a dependency only
2509 * with the relevant clone child.
2510 */
2511 compatible_rsc = find_compatible_child(probe->rsc,
2512 after->rsc,
2513 RSC_ROLE_UNKNOWN,
2514 FALSE);
2515 }
2516 }
2517
2518 for (gIter = after->actions_after; gIter != NULL; gIter = gIter->next) {
2519 action_wrapper_t *after_wrapper = (action_wrapper_t *) gIter->data;
2520 /* pe_order_implies_then is the reason why a required A.start
2521 * implies/enforces B.start to be required too, which is the cause of
2522 * B.restart/re-promote.
2523 *
2524 * Not sure about pe_order_implies_then_on_node though. It's now only
2525 * used for unfencing case, which tends to introduce transition
2526 * loops...
2527 */
2528
2529 if (is_not_set(after_wrapper->type, pe_order_implies_then)) {
2530 /* The order type between a group/clone and its child such as
2531 * B.start-> B_child.start is:
2532 * pe_order_implies_first_printed | pe_order_runnable_left
2533 *
2534 * Proceed through the ordering chain and build dependencies with
2535 * its children.
2536 */
2537 if (after->rsc == NULL
2538 || after->rsc->variant < pe_group
2539 || probe->rsc->parent == after->rsc
2540 || after_wrapper->action->rsc == NULL
2541 || after_wrapper->action->rsc->variant > pe_group
2542 || after->rsc != after_wrapper->action->rsc->parent) {
2543 continue;
2544 }
2545
2546 /* Proceed to the children of a group or a non-interleaved clone.
2547 * For an interleaved clone, proceed only to the relevant child.
2548 */
2549 if (after->rsc->variant > pe_group
2550 && interleave == TRUE
2551 && (compatible_rsc == NULL
2552 || compatible_rsc != after_wrapper->action->rsc)) {
2553 continue;
2554 }
2555 }
2556
2557 crm_trace("Proceeding through %s %s -> %s %s (type=0x%.6x)",
2558 after->uuid,
2559 after->node ? after->node->details->uname: "",
2560 after_wrapper->action->uuid,
2561 after_wrapper->action->node ? after_wrapper->action->node->details->uname : "",
2562 after_wrapper->type);
2563
2564 order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
2565 }
2566 }
2567
clear_actions_tracking_flag(pe_working_set_t * data_set)2568 static void clear_actions_tracking_flag(pe_working_set_t * data_set)
2569 {
2570 GListPtr gIter = NULL;
2571
2572 for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2573 pe_action_t *action = (pe_action_t *) gIter->data;
2574
2575 if (is_set(action->flags, pe_action_tracking)) {
2576 pe_clear_action_bit(action, pe_action_tracking);
2577 }
2578 }
2579 }
2580
2581 static void
order_first_rsc_probes(pe_resource_t * rsc,pe_working_set_t * data_set)2582 order_first_rsc_probes(pe_resource_t * rsc, pe_working_set_t * data_set)
2583 {
2584 GListPtr gIter = NULL;
2585 GListPtr probes = NULL;
2586 char *key = NULL;
2587
2588 for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
2589 pe_resource_t * child = (pe_resource_t *) gIter->data;
2590
2591 order_first_rsc_probes(child, data_set);
2592 }
2593
2594 if (rsc->variant != pe_native) {
2595 return;
2596 }
2597
2598 key = generate_op_key(rsc->id, RSC_STATUS, 0);
2599 probes = find_actions(rsc->actions, key, NULL);
2600 free(key);
2601
2602 for (gIter = probes; gIter != NULL; gIter= gIter->next) {
2603 pe_action_t *probe = (pe_action_t *) gIter->data;
2604 GListPtr aIter = NULL;
2605
2606 for (aIter = probe->actions_after; aIter != NULL; aIter = aIter->next) {
2607 action_wrapper_t *after_wrapper = (action_wrapper_t *) aIter->data;
2608
2609 order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
2610 clear_actions_tracking_flag(data_set);
2611 }
2612 }
2613
2614 g_list_free(probes);
2615 }
2616
2617 static void
order_first_probes(pe_working_set_t * data_set)2618 order_first_probes(pe_working_set_t * data_set)
2619 {
2620 GListPtr gIter = NULL;
2621
2622 for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2623 pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2624
2625 order_first_rsc_probes(rsc, data_set);
2626 }
2627
2628 order_first_probes_imply_stops(data_set);
2629 }
2630
2631 static void
order_then_probes(pe_working_set_t * data_set)2632 order_then_probes(pe_working_set_t * data_set)
2633 {
2634 #if 0
2635 GListPtr gIter = NULL;
2636
2637 for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2638 resource_t *rsc = (resource_t *) gIter->data;
2639
2640 /* Given "A then B", we would prefer to wait for A to be
2641 * started before probing B.
2642 *
2643 * If A was a filesystem on which the binaries and data for B
2644 * lived, it would have been useful if the author of B's agent
2645 * could assume that A is running before B.monitor will be
2646 * called.
2647 *
2648 * However we can't _only_ probe once A is running, otherwise
2649 * we'd not detect the state of B if A could not be started
2650 * for some reason.
2651 *
2652 * In practice however, we cannot even do an opportunistic
2653 * version of this because B may be moving:
2654 *
2655 * B.probe -> B.start
2656 * B.probe -> B.stop
2657 * B.stop -> B.start
2658 * A.stop -> A.start
2659 * A.start -> B.probe
2660 *
2661 * So far so good, but if we add the result of this code:
2662 *
2663 * B.stop -> A.stop
2664 *
2665 * Then we get a loop:
2666 *
2667 * B.probe -> B.stop -> A.stop -> A.start -> B.probe
2668 *
2669 * We could kill the 'B.probe -> B.stop' dependency, but that
2670 * could mean stopping B "too" soon, because B.start must wait
2671 * for the probes to complete.
2672 *
2673 * Another option is to allow it only if A is a non-unique
2674 * clone with clone-max == node-max (since we'll never be
2675 * moving it). However, we could still be stopping one
2676 * instance at the same time as starting another.
2677
2678 * The complexity of checking for allowed conditions combined
2679 * with the ever narrowing usecase suggests that this code
2680 * should remain disabled until someone gets smarter.
2681 */
2682 action_t *start = NULL;
2683 GListPtr actions = NULL;
2684 GListPtr probes = NULL;
2685 char *key = NULL;
2686
2687 key = start_key(rsc);
2688 actions = find_actions(rsc->actions, key, NULL);
2689 free(key);
2690
2691 if (actions) {
2692 start = actions->data;
2693 g_list_free(actions);
2694 }
2695
2696 if(start == NULL) {
2697 crm_err("No start action for %s", rsc->id);
2698 continue;
2699 }
2700
2701 key = generate_op_key(rsc->id, CRMD_ACTION_STATUS, 0);
2702 probes = find_actions(rsc->actions, key, NULL);
2703 free(key);
2704
2705 for (actions = start->actions_before; actions != NULL; actions = actions->next) {
2706 action_wrapper_t *before = (action_wrapper_t *) actions->data;
2707
2708 GListPtr pIter = NULL;
2709 action_t *first = before->action;
2710 resource_t *first_rsc = first->rsc;
2711
2712 if(first->required_runnable_before) {
2713 GListPtr clone_actions = NULL;
2714 for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) {
2715 before = (action_wrapper_t *) clone_actions->data;
2716
2717 crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid);
2718
2719 CRM_ASSERT(before->action->rsc);
2720 first_rsc = before->action->rsc;
2721 break;
2722 }
2723
2724 } else if(safe_str_neq(first->task, RSC_START)) {
2725 crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
2726 }
2727
2728 if(first_rsc == NULL) {
2729 continue;
2730
2731 } else if(uber_parent(first_rsc) == uber_parent(start->rsc)) {
2732 crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
2733 continue;
2734
2735 } else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) {
2736 crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
2737 continue;
2738 }
2739
2740 crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant);
2741
2742 for (pIter = probes; pIter != NULL; pIter = pIter->next) {
2743 action_t *probe = (action_t *) pIter->data;
2744
2745 crm_err("Ordering %s before %s", first->uuid, probe->uuid);
2746 order_actions(first, probe, pe_order_optional);
2747 }
2748 }
2749 }
2750 #endif
2751 }
2752
2753 static void
order_probes(pe_working_set_t * data_set)2754 order_probes(pe_working_set_t * data_set)
2755 {
2756 order_first_probes(data_set);
2757 order_then_probes(data_set);
2758 }
2759
2760 gboolean
stage7(pe_working_set_t * data_set)2761 stage7(pe_working_set_t * data_set)
2762 {
2763 GListPtr gIter = NULL;
2764
2765 crm_trace("Applying ordering constraints");
2766
2767 /* Don't ask me why, but apparently they need to be processed in
2768 * the order they were created in... go figure
2769 *
2770 * Also g_list_append() has horrendous performance characteristics
2771 * So we need to use g_list_prepend() and then reverse the list here
2772 */
2773 data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints);
2774
2775 for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
2776 pe__ordering_t *order = gIter->data;
2777 resource_t *rsc = order->lh_rsc;
2778
2779 crm_trace("Applying ordering constraint: %d", order->id);
2780
2781 if (rsc != NULL) {
2782 crm_trace("rsc_action-to-*");
2783 rsc_order_first(rsc, order, data_set);
2784 continue;
2785 }
2786
2787 rsc = order->rh_rsc;
2788 if (rsc != NULL) {
2789 crm_trace("action-to-rsc_action");
2790 rsc_order_then(order->lh_action, rsc, order);
2791
2792 } else {
2793 crm_trace("action-to-action");
2794 order_actions(order->lh_action, order->rh_action, order->type);
2795 }
2796 }
2797
2798 for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2799 action_t *action = (action_t *) gIter->data;
2800
2801 update_colo_start_chain(action);
2802 }
2803
2804 crm_trace("Ordering probes");
2805 order_probes(data_set);
2806
2807 crm_trace("Updating %d actions", g_list_length(data_set->actions));
2808 for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2809 action_t *action = (action_t *) gIter->data;
2810
2811 update_action(action);
2812 }
2813
2814 LogNodeActions(data_set, FALSE);
2815 for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2816 resource_t *rsc = (resource_t *) gIter->data;
2817
2818 LogActions(rsc, data_set, FALSE);
2819 }
2820 return TRUE;
2821 }
2822
2823 int transition_id = -1;
2824
2825 /*
2826 * Create a dependency graph to send to the transitioner (via the CRMd)
2827 */
2828 gboolean
stage8(pe_working_set_t * data_set)2829 stage8(pe_working_set_t * data_set)
2830 {
2831 GListPtr gIter = NULL;
2832 const char *value = NULL;
2833
2834 transition_id++;
2835 crm_trace("Creating transition graph %d.", transition_id);
2836
2837 data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
2838
2839 value = pe_pref(data_set->config_hash, "cluster-delay");
2840 crm_xml_add(data_set->graph, "cluster-delay", value);
2841
2842 value = pe_pref(data_set->config_hash, "stonith-timeout");
2843 crm_xml_add(data_set->graph, "stonith-timeout", value);
2844
2845 crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
2846
2847 if (is_set(data_set->flags, pe_flag_start_failure_fatal)) {
2848 crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
2849 } else {
2850 crm_xml_add(data_set->graph, "failed-start-offset", "1");
2851 }
2852
2853 value = pe_pref(data_set->config_hash, "batch-limit");
2854 crm_xml_add(data_set->graph, "batch-limit", value);
2855
2856 crm_xml_add_int(data_set->graph, "transition_id", transition_id);
2857
2858 value = pe_pref(data_set->config_hash, "migration-limit");
2859 if (crm_int_helper(value, NULL) > 0) {
2860 crm_xml_add(data_set->graph, "migration-limit", value);
2861 }
2862
2863 /* errors...
2864 slist_iter(action, action_t, action_list, lpc,
2865 if(action->optional == FALSE && action->runnable == FALSE) {
2866 print_action("Ignoring", action, TRUE);
2867 }
2868 );
2869 */
2870
2871 gIter = data_set->resources;
2872 for (; gIter != NULL; gIter = gIter->next) {
2873 resource_t *rsc = (resource_t *) gIter->data;
2874
2875 pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id);
2876 rsc->cmds->expand(rsc, data_set);
2877 }
2878
2879 crm_log_xml_trace(data_set->graph, "created resource-driven action list");
2880
2881 /* pseudo action to distribute list of nodes with maintenance state update */
2882 add_maintenance_update(data_set);
2883
2884 /* catch any non-resource specific actions */
2885 crm_trace("processing non-resource actions");
2886
2887 gIter = data_set->actions;
2888 for (; gIter != NULL; gIter = gIter->next) {
2889 action_t *action = (action_t *) gIter->data;
2890
2891 if (action->rsc
2892 && action->node
2893 && action->node->details->shutdown
2894 && is_not_set(action->rsc->flags, pe_rsc_maintenance)
2895 && is_not_set(action->flags, pe_action_optional)
2896 && is_not_set(action->flags, pe_action_runnable)
2897 && crm_str_eq(action->task, RSC_STOP, TRUE)
2898 ) {
2899 /* Eventually we should just ignore the 'fence' case
2900 * But for now it's the best way to detect (in CTS) when
2901 * CIB resource updates are being lost
2902 */
2903 if (is_set(data_set->flags, pe_flag_have_quorum)
2904 || data_set->no_quorum_policy == no_quorum_ignore) {
2905 crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)",
2906 action->node->details->unclean ? "fence" : "shut down",
2907 action->node->details->uname, action->rsc->id,
2908 is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked",
2909 is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : "",
2910 action->uuid);
2911 }
2912 }
2913
2914 graph_element_from_action(action, data_set);
2915 }
2916
2917 crm_log_xml_trace(data_set->graph, "created generic action list");
2918 crm_trace("Created transition graph %d.", transition_id);
2919
2920 return TRUE;
2921 }
2922
2923 void
LogNodeActions(pe_working_set_t * data_set,gboolean terminal)2924 LogNodeActions(pe_working_set_t * data_set, gboolean terminal)
2925 {
2926 GListPtr gIter = NULL;
2927
2928 for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2929 char *node_name = NULL;
2930 char *task = NULL;
2931 action_t *action = (action_t *) gIter->data;
2932
2933 if (action->rsc != NULL) {
2934 continue;
2935 } else if (is_set(action->flags, pe_action_optional)) {
2936 continue;
2937 }
2938
2939 if (is_container_remote_node(action->node)) {
2940 node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id);
2941 } else if(action->node) {
2942 node_name = crm_strdup_printf("%s", action->node->details->uname);
2943 }
2944
2945
2946 if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
2947 task = strdup("Shutdown");
2948 } else if (safe_str_eq(action->task, CRM_OP_FENCE)) {
2949 const char *op = g_hash_table_lookup(action->meta, "stonith_action");
2950 task = crm_strdup_printf("Fence (%s)", op);
2951 }
2952
2953 if(task == NULL) {
2954 /* Nothing to report */
2955 } else if(terminal && action->reason) {
2956 printf(" * %s %s '%s'\n", task, node_name, action->reason);
2957 } else if(terminal) {
2958 printf(" * %s %s\n", task, node_name);
2959 } else if(action->reason) {
2960 crm_notice(" * %s %s '%s'\n", task, node_name, action->reason);
2961 } else {
2962 crm_notice(" * %s %s\n", task, node_name);
2963 }
2964
2965 free(node_name);
2966 free(task);
2967 }
2968 }
2969