1 /*
2  * Copyright 2004-2021 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/msg_xml.h>
13 #include <crm/common/xml.h>
14 #include <crm/common/util.h>
15 
16 #include <glib.h>
17 
18 #include <crm/pengine/internal.h>
19 
20 gboolean was_processing_error = FALSE;
21 gboolean was_processing_warning = FALSE;
22 
23 static bool
check_health(const char * value)24 check_health(const char *value)
25 {
26     return pcmk__strcase_any_of(value, "none", "custom", "only-green", "progressive",
27                            "migrate-on-red", NULL);
28 }
29 
30 static bool
check_stonith_action(const char * value)31 check_stonith_action(const char *value)
32 {
33     return pcmk__strcase_any_of(value, "reboot", "poweroff", "off", NULL);
34 }
35 
36 static bool
check_placement_strategy(const char * value)37 check_placement_strategy(const char *value)
38 {
39     return pcmk__strcase_any_of(value, "default", "utilization", "minimal",
40                            "balanced", NULL);
41 }
42 
43 static pcmk__cluster_option_t pe_opts[] = {
44     /* name, old name, type, allowed values,
45      * default value, validator,
46      * short description,
47      * long description
48      */
49     {
50         "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, demote, suicide",
51         "stop", pcmk__valid_quorum,
52         "What to do when the cluster does not have quorum",
53         NULL
54     },
55     {
56         "symmetric-cluster", NULL, "boolean", NULL,
57         "true", pcmk__valid_boolean,
58         "Whether resources can run on any node by default",
59         NULL
60     },
61     {
62         "maintenance-mode", NULL, "boolean", NULL,
63         "false", pcmk__valid_boolean,
64         "Whether the cluster should refrain from monitoring, starting, "
65             "and stopping resources",
66         NULL
67     },
68     {
69         "start-failure-is-fatal", NULL, "boolean", NULL,
70         "true", pcmk__valid_boolean,
71         "Whether a start failure should prevent a resource from being "
72             "recovered on the same node",
73         "When true, the cluster will immediately ban a resource from a node "
74             "if it fails to start there. When false, the cluster will instead "
75             "check the resource's fail count against its migration-threshold."
76     },
77     {
78         "enable-startup-probes", NULL, "boolean", NULL,
79         "true", pcmk__valid_boolean,
80         "Whether the cluster should check for active resources during start-up",
81         NULL
82     },
83     {
84         XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
85         "false", pcmk__valid_boolean,
86         "Whether to lock resources to a cleanly shut down node",
87         "When true, resources active on a node when it is cleanly shut down "
88             "are kept \"locked\" to that node (not allowed to run elsewhere) "
89             "until they start again on that node after it rejoins (or for at "
90             "most shutdown-lock-limit, if set). Stonith resources and "
91             "Pacemaker Remote connections are never locked. Clone and bundle "
92             "instances and the promoted role of promotable clones are currently"
93             " never locked, though support could be added in a future release."
94     },
95     {
96         XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
97         "0", pcmk__valid_interval_spec,
98         "Do not lock resources to a cleanly shut down node longer than this",
99         "If shutdown-lock is true and this is set to a nonzero time duration, "
100             "shutdown locks will expire after this much time has passed since "
101             "the shutdown was initiated, even if the node has not rejoined."
102     },
103 
104     // Fencing-related options
105     {
106         "stonith-enabled", NULL, "boolean", NULL,
107         "true", pcmk__valid_boolean,
108         "*** Advanced Use Only *** "
109             "Whether nodes may be fenced as part of recovery",
110         "If false, unresponsive nodes are immediately assumed to be harmless, "
111             "and resources that were active on them may be recovered "
112             "elsewhere. This can result in a \"split-brain\" situation, "
113             "potentially leading to data loss and/or service unavailability."
114     },
115     {
116         "stonith-action", NULL, "enum", "reboot, off, poweroff",
117         "reboot", check_stonith_action,
118         "Action to send to fence device when a node needs to be fenced "
119             "(\"poweroff\" is a deprecated alias for \"off\")",
120         NULL
121     },
122     {
123         "stonith-timeout", NULL, "time", NULL,
124         "60s", pcmk__valid_interval_spec,
125         "*** Advanced Use Only *** Unused by Pacemaker",
126         "This value is not used by Pacemaker, but is kept for backward "
127             "compatibility, and certain legacy fence agents might use it."
128     },
129     {
130         XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL,
131         "false", pcmk__valid_boolean,
132         "Whether watchdog integration is enabled",
133         "This is set automatically by the cluster according to whether SBD "
134             "is detected to be in use. User-configured values are ignored. "
135             "The value `true` is meaningful if diskless SBD is used and "
136             "`stonith-watchdog-timeout` is nonzero. In that case, if fencing "
137             "is required, watchdog-based self-fencing will be performed via "
138             "SBD without requiring a fencing resource explicitly configured."
139     },
140     {
141         "concurrent-fencing", NULL, "boolean", NULL,
142         PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean,
143         "Allow performing fencing operations in parallel",
144         NULL
145     },
146     {
147         "startup-fencing", NULL, "boolean", NULL,
148         "true", pcmk__valid_boolean,
149         "*** Advanced Use Only *** Whether to fence unseen nodes at start-up",
150         "Setting this to false may lead to a \"split-brain\" situation,"
151             "potentially leading to data loss and/or service unavailability."
152     },
153     {
154         XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL,
155         "0", pcmk__valid_interval_spec,
156         "Apply fencing delay targeting the lost nodes with the highest total resource priority",
157         "Apply specified delay for the fencings that are targeting the lost "
158             "nodes with the highest total resource priority in case we don't "
159             "have the majority of the nodes in our cluster partition, so that "
160             "the more significant nodes potentially win any fencing match, "
161             "which is especially meaningful under split-brain of 2-node "
162             "cluster. A promoted resource instance takes the base priority + 1 "
163             "on calculation if the base priority is not 0. Any static/random "
164             "delays that are introduced by `pcmk_delay_base/max` configured "
165             "for the corresponding fencing resources will be added to this "
166             "delay. This delay should be significantly greater than, safely "
167             "twice, the maximum `pcmk_delay_base/max`. By default, priority "
168             "fencing delay is disabled."
169     },
170 
171     {
172         "cluster-delay", NULL, "time", NULL,
173         "60s", pcmk__valid_interval_spec,
174         "Maximum time for node-to-node communication",
175         "The node elected Designated Controller (DC) will consider an action "
176             "failed if it does not get a response from the node executing the "
177             "action within this time (after considering the action's own "
178             "timeout). The \"correct\" value will depend on the speed and "
179             "load of your network and cluster nodes."
180     },
181     {
182         "batch-limit", NULL, "integer", NULL,
183         "0", pcmk__valid_number,
184         "Maximum number of jobs that the cluster may execute in parallel "
185             "across all nodes",
186         "The \"correct\" value will depend on the speed and load of your "
187             "network and cluster nodes. If set to 0, the cluster will "
188             "impose a dynamically calculated limit when any node has a "
189             "high load."
190     },
191     {
192         "migration-limit", NULL, "integer", NULL,
193         "-1", pcmk__valid_number,
194         "The number of live migration actions that the cluster is allowed "
195             "to execute in parallel on a node (-1 means no limit)"
196     },
197 
198     /* Orphans and stopping */
199     {
200         "stop-all-resources", NULL, "boolean", NULL,
201         "false", pcmk__valid_boolean,
202         "Whether the cluster should stop all active resources",
203         NULL
204     },
205     {
206         "stop-orphan-resources", NULL, "boolean", NULL,
207         "true", pcmk__valid_boolean,
208         "Whether to stop resources that were removed from the configuration",
209         NULL
210     },
211     {
212         "stop-orphan-actions", NULL, "boolean", NULL,
213         "true", pcmk__valid_boolean,
214         "Whether to cancel recurring actions removed from the configuration",
215         NULL
216     },
217     {
218         "remove-after-stop", NULL, "boolean", NULL,
219         "false", pcmk__valid_boolean,
220         "*** Deprecated *** Whether to remove stopped resources from "
221             "the executor",
222         "Values other than default are poorly tested and potentially dangerous."
223             " This option will be removed in a future release."
224     },
225 
226     /* Storing inputs */
227     {
228         "pe-error-series-max", NULL, "integer", NULL,
229         "-1", pcmk__valid_number,
230         "The number of scheduler inputs resulting in errors to save",
231         "Zero to disable, -1 to store unlimited."
232     },
233     {
234         "pe-warn-series-max",  NULL, "integer", NULL,
235         "5000", pcmk__valid_number,
236         "The number of scheduler inputs resulting in warnings to save",
237         "Zero to disable, -1 to store unlimited."
238     },
239     {
240         "pe-input-series-max", NULL, "integer", NULL,
241         "4000", pcmk__valid_number,
242         "The number of scheduler inputs without errors or warnings to save",
243         "Zero to disable, -1 to store unlimited."
244     },
245 
246     /* Node health */
247     {
248         "node-health-strategy", NULL, "enum",
249         "none, migrate-on-red, only-green, progressive, custom",
250         "none", check_health,
251         "How cluster should react to node health attributes",
252         "Requires external entities to create node attributes (named with "
253             "the prefix \"#health\") with values \"red\", \"yellow\" or "
254             "\"green\"."
255     },
256     {
257         "node-health-base", NULL, "integer", NULL,
258         "0", pcmk__valid_number,
259         "Base health score assigned to a node",
260         "Only used when node-health-strategy is set to progressive."
261     },
262     {
263         "node-health-green", NULL, "integer", NULL,
264         "0", pcmk__valid_number,
265         "The score to use for a node health attribute whose value is \"green\"",
266         "Only used when node-health-strategy is set to custom or progressive."
267     },
268     {
269         "node-health-yellow", NULL, "integer", NULL,
270         "0", pcmk__valid_number,
271         "The score to use for a node health attribute whose value is \"yellow\"",
272         "Only used when node-health-strategy is set to custom or progressive."
273     },
274     {
275         "node-health-red", NULL, "integer", NULL,
276         "-INFINITY", pcmk__valid_number,
277         "The score to use for a node health attribute whose value is \"red\"",
278         "Only used when node-health-strategy is set to custom or progressive."
279     },
280 
281     /*Placement Strategy*/
282     {
283         "placement-strategy", NULL, "enum",
284         "default, utilization, minimal, balanced",
285         "default", check_placement_strategy,
286         "How the cluster should allocate resources to nodes",
287         NULL
288     },
289 };
290 
291 void
pe_metadata(void)292 pe_metadata(void)
293 {
294     pcmk__print_option_metadata("pacemaker-schedulerd", "1.0",
295                                 "Pacemaker scheduler options",
296                                 "Cluster options used by Pacemaker's scheduler"
297                                     " (formerly called pengine)",
298                                 pe_opts, PCMK__NELEM(pe_opts));
299 }
300 
301 void
verify_pe_options(GHashTable * options)302 verify_pe_options(GHashTable * options)
303 {
304     pcmk__validate_cluster_options(options, pe_opts, PCMK__NELEM(pe_opts));
305 }
306 
307 const char *
pe_pref(GHashTable * options,const char * name)308 pe_pref(GHashTable * options, const char *name)
309 {
310     return pcmk__cluster_option(options, pe_opts, PCMK__NELEM(pe_opts), name);
311 }
312 
313 const char *
fail2text(enum action_fail_response fail)314 fail2text(enum action_fail_response fail)
315 {
316     const char *result = "<unknown>";
317 
318     switch (fail) {
319         case action_fail_ignore:
320             result = "ignore";
321             break;
322         case action_fail_demote:
323             result = "demote";
324             break;
325         case action_fail_block:
326             result = "block";
327             break;
328         case action_fail_recover:
329             result = "recover";
330             break;
331         case action_fail_migrate:
332             result = "migrate";
333             break;
334         case action_fail_stop:
335             result = "stop";
336             break;
337         case action_fail_fence:
338             result = "fence";
339             break;
340         case action_fail_standby:
341             result = "standby";
342             break;
343         case action_fail_restart_container:
344             result = "restart-container";
345             break;
346         case action_fail_reset_remote:
347             result = "reset-remote";
348             break;
349     }
350     return result;
351 }
352 
353 enum action_tasks
text2task(const char * task)354 text2task(const char *task)
355 {
356     if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) {
357         return stop_rsc;
358     } else if (pcmk__str_eq(task, CRMD_ACTION_STOPPED, pcmk__str_casei)) {
359         return stopped_rsc;
360     } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) {
361         return start_rsc;
362     } else if (pcmk__str_eq(task, CRMD_ACTION_STARTED, pcmk__str_casei)) {
363         return started_rsc;
364     } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
365         return shutdown_crm;
366     } else if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
367         return stonith_node;
368     } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
369         return monitor_rsc;
370     } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_casei)) {
371         return action_notify;
372     } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFIED, pcmk__str_casei)) {
373         return action_notified;
374     } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) {
375         return action_promote;
376     } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) {
377         return action_demote;
378     } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTED, pcmk__str_casei)) {
379         return action_promoted;
380     } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTED, pcmk__str_casei)) {
381         return action_demoted;
382     }
383 #if SUPPORT_TRACING
384     if (pcmk__str_eq(task, CRMD_ACTION_CANCEL, pcmk__str_casei)) {
385         return no_action;
386     } else if (pcmk__str_eq(task, CRMD_ACTION_DELETE, pcmk__str_casei)) {
387         return no_action;
388     } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
389         return no_action;
390     } else if (pcmk__str_eq(task, CRM_OP_PROBED, pcmk__str_casei)) {
391         return no_action;
392     } else if (pcmk__str_eq(task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) {
393         return no_action;
394     } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
395         return no_action;
396     } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
397         return no_action;
398     }
399     crm_trace("Unsupported action: %s", task);
400 #endif
401 
402     return no_action;
403 }
404 
405 const char *
task2text(enum action_tasks task)406 task2text(enum action_tasks task)
407 {
408     const char *result = "<unknown>";
409 
410     switch (task) {
411         case no_action:
412             result = "no_action";
413             break;
414         case stop_rsc:
415             result = CRMD_ACTION_STOP;
416             break;
417         case stopped_rsc:
418             result = CRMD_ACTION_STOPPED;
419             break;
420         case start_rsc:
421             result = CRMD_ACTION_START;
422             break;
423         case started_rsc:
424             result = CRMD_ACTION_STARTED;
425             break;
426         case shutdown_crm:
427             result = CRM_OP_SHUTDOWN;
428             break;
429         case stonith_node:
430             result = CRM_OP_FENCE;
431             break;
432         case monitor_rsc:
433             result = CRMD_ACTION_STATUS;
434             break;
435         case action_notify:
436             result = CRMD_ACTION_NOTIFY;
437             break;
438         case action_notified:
439             result = CRMD_ACTION_NOTIFIED;
440             break;
441         case action_promote:
442             result = CRMD_ACTION_PROMOTE;
443             break;
444         case action_promoted:
445             result = CRMD_ACTION_PROMOTED;
446             break;
447         case action_demote:
448             result = CRMD_ACTION_DEMOTE;
449             break;
450         case action_demoted:
451             result = CRMD_ACTION_DEMOTED;
452             break;
453     }
454 
455     return result;
456 }
457 
458 const char *
role2text(enum rsc_role_e role)459 role2text(enum rsc_role_e role)
460 {
461     switch (role) {
462         case RSC_ROLE_UNKNOWN:
463             return RSC_ROLE_UNKNOWN_S;
464         case RSC_ROLE_STOPPED:
465             return RSC_ROLE_STOPPED_S;
466         case RSC_ROLE_STARTED:
467             return RSC_ROLE_STARTED_S;
468         case RSC_ROLE_UNPROMOTED:
469 #ifdef PCMK__COMPAT_2_0
470             return RSC_ROLE_UNPROMOTED_LEGACY_S;
471 #else
472             return RSC_ROLE_UNPROMOTED_S;
473 #endif
474         case RSC_ROLE_PROMOTED:
475 #ifdef PCMK__COMPAT_2_0
476             return RSC_ROLE_PROMOTED_LEGACY_S;
477 #else
478             return RSC_ROLE_PROMOTED_S;
479 #endif
480     }
481     CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S);
482     CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S);
483     // coverity[dead_error_line]
484     return RSC_ROLE_UNKNOWN_S;
485 }
486 
487 enum rsc_role_e
text2role(const char * role)488 text2role(const char *role)
489 {
490     CRM_ASSERT(role != NULL);
491     if (pcmk__str_eq(role, RSC_ROLE_STOPPED_S, pcmk__str_casei)) {
492         return RSC_ROLE_STOPPED;
493     } else if (pcmk__str_eq(role, RSC_ROLE_STARTED_S, pcmk__str_casei)) {
494         return RSC_ROLE_STARTED;
495     } else if (pcmk__strcase_any_of(role, RSC_ROLE_UNPROMOTED_S,
496                                     RSC_ROLE_UNPROMOTED_LEGACY_S, NULL)) {
497         return RSC_ROLE_UNPROMOTED;
498     } else if (pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
499                                     RSC_ROLE_PROMOTED_LEGACY_S, NULL)) {
500         return RSC_ROLE_PROMOTED;
501     } else if (pcmk__str_eq(role, RSC_ROLE_UNKNOWN_S, pcmk__str_casei)) {
502         return RSC_ROLE_UNKNOWN;
503     }
504     crm_err("Unknown role: %s", role);
505     return RSC_ROLE_UNKNOWN;
506 }
507 
508 /*!
509  * \internal
510  * \brief Add two scores (bounding to +/- INFINITY)
511  *
512  * \param[in] score1  First score to add
513  * \param[in] score2  Second score to add
514  */
515 int
pe__add_scores(int score1,int score2)516 pe__add_scores(int score1, int score2)
517 {
518     int result = score1 + score2;
519 
520     // First handle the cases where one or both is infinite
521 
522     if (score1 <= -CRM_SCORE_INFINITY) {
523 
524         if (score2 <= -CRM_SCORE_INFINITY) {
525             crm_trace("-INFINITY + -INFINITY = -INFINITY");
526         } else if (score2 >= CRM_SCORE_INFINITY) {
527             crm_trace("-INFINITY + +INFINITY = -INFINITY");
528         } else {
529             crm_trace("-INFINITY + %d = -INFINITY", score2);
530         }
531 
532         return -CRM_SCORE_INFINITY;
533 
534     } else if (score2 <= -CRM_SCORE_INFINITY) {
535 
536         if (score1 >= CRM_SCORE_INFINITY) {
537             crm_trace("+INFINITY + -INFINITY = -INFINITY");
538         } else {
539             crm_trace("%d + -INFINITY = -INFINITY", score1);
540         }
541 
542         return -CRM_SCORE_INFINITY;
543 
544     } else if (score1 >= CRM_SCORE_INFINITY) {
545 
546         if (score2 >= CRM_SCORE_INFINITY) {
547             crm_trace("+INFINITY + +INFINITY = +INFINITY");
548         } else {
549             crm_trace("+INFINITY + %d = +INFINITY", score2);
550         }
551 
552         return CRM_SCORE_INFINITY;
553 
554     } else if (score2 >= CRM_SCORE_INFINITY) {
555         crm_trace("%d + +INFINITY = +INFINITY", score1);
556         return CRM_SCORE_INFINITY;
557     }
558 
559     /* As long as CRM_SCORE_INFINITY is less than half of the maximum integer,
560      * we can ignore the possibility of integer overflow
561      */
562 
563     // Bound result to infinity
564 
565     if (result >= CRM_SCORE_INFINITY) {
566         crm_trace("%d + %d = +INFINITY", score1, score2);
567         return CRM_SCORE_INFINITY;
568 
569     } else if (result <= -CRM_SCORE_INFINITY) {
570         crm_trace("%d + %d = -INFINITY", score1, score2);
571         return -CRM_SCORE_INFINITY;
572     }
573 
574     crm_trace("%d + %d = %d", score1, score2, result);
575     return result;
576 }
577 
578 void
add_hash_param(GHashTable * hash,const char * name,const char * value)579 add_hash_param(GHashTable * hash, const char *name, const char *value)
580 {
581     CRM_CHECK(hash != NULL, return);
582 
583     crm_trace("adding: name=%s value=%s", crm_str(name), crm_str(value));
584     if (name == NULL || value == NULL) {
585         return;
586 
587     } else if (pcmk__str_eq(value, "#default", pcmk__str_casei)) {
588         return;
589 
590     } else if (g_hash_table_lookup(hash, name) == NULL) {
591         g_hash_table_insert(hash, strdup(name), strdup(value));
592     }
593 }
594 
595 const char *
pe_node_attribute_calculated(const pe_node_t * node,const char * name,const pe_resource_t * rsc)596 pe_node_attribute_calculated(const pe_node_t *node, const char *name,
597                              const pe_resource_t *rsc)
598 {
599     const char *source;
600 
601     if(node == NULL) {
602         return NULL;
603 
604     } else if(rsc == NULL) {
605         return g_hash_table_lookup(node->details->attrs, name);
606     }
607 
608     source = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET);
609     if(source == NULL || !pcmk__str_eq("host", source, pcmk__str_casei)) {
610         return g_hash_table_lookup(node->details->attrs, name);
611     }
612 
613     /* Use attributes set for the containers location
614      * instead of for the container itself
615      *
616      * Useful when the container is using the host's local
617      * storage
618      */
619 
620     CRM_ASSERT(node->details->remote_rsc);
621     CRM_ASSERT(node->details->remote_rsc->container);
622 
623     if(node->details->remote_rsc->container->running_on) {
624         pe_node_t *host = node->details->remote_rsc->container->running_on->data;
625         pe_rsc_trace(rsc, "%s: Looking for %s on the container host %s", rsc->id, name, host->details->uname);
626         return g_hash_table_lookup(host->details->attrs, name);
627     }
628 
629     pe_rsc_trace(rsc, "%s: Not looking for %s on the container host: %s is inactive",
630                  rsc->id, name, node->details->remote_rsc->container->id);
631     return NULL;
632 }
633 
634 const char *
pe_node_attribute_raw(pe_node_t * node,const char * name)635 pe_node_attribute_raw(pe_node_t *node, const char *name)
636 {
637     if(node == NULL) {
638         return NULL;
639     }
640     return g_hash_table_lookup(node->details->attrs, name);
641 }
642