1 /*
2  * Copyright 2004-2021 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #ifndef _GNU_SOURCE
13 #  define _GNU_SOURCE
14 #endif
15 
16 #include <sys/param.h>
17 #include <sys/types.h>
18 #include <stdio.h>
19 #include <unistd.h>
20 #include <string.h>
21 #include <glib.h>
22 #include <crm/common/ipc.h>
23 #include <crm/common/xml_internal.h>
24 #include <crm/cluster/internal.h>
25 #include <crm/msg_xml.h>
26 #include <crm/stonith-ng.h>
27 #include "crmcluster_private.h"
28 
29 /* The peer cache remembers cluster nodes that have been seen.
30  * This is managed mostly automatically by libcluster, based on
31  * cluster membership events.
32  *
33  * Because cluster nodes can have conflicting names or UUIDs,
34  * the hash table key is a uniquely generated ID.
35  */
36 GHashTable *crm_peer_cache = NULL;
37 
38 /*
39  * The remote peer cache tracks pacemaker_remote nodes. While the
40  * value has the same type as the peer cache's, it is tracked separately for
41  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
42  * so the name (which is also the UUID) is used as the hash table key; there
43  * is no equivalent of membership events, so management is not automatic; and
44  * most users of the peer cache need to exclude pacemaker_remote nodes.
45  *
46  * That said, using a single cache would be more logical and less error-prone,
47  * so it would be a good idea to merge them one day.
48  *
49  * libcluster provides two avenues for populating the cache:
50  * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it,
51  * while crm_remote_peer_cache_refresh() populates it via the CIB.
52  */
53 GHashTable *crm_remote_peer_cache = NULL;
54 
55 /*
56  * The known node cache tracks cluster and remote nodes that have been seen in
57  * the CIB. It is useful mainly when a caller needs to know about a node that
58  * may no longer be in the membership, but doesn't want to add the node to the
59  * main peer cache tables.
60  */
61 static GHashTable *known_node_cache = NULL;
62 
63 unsigned long long crm_peer_seq = 0;
64 gboolean crm_have_quorum = FALSE;
65 static gboolean crm_autoreap  = TRUE;
66 
67 // Flag setting and clearing for crm_node_t:flags
68 
69 #define set_peer_flags(peer, flags_to_set) do {                               \
70         (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE,     \
71                                            "Peer", (peer)->uname,             \
72                                            (peer)->flags, (flags_to_set),     \
73                                            #flags_to_set);                    \
74     } while (0)
75 
76 #define clear_peer_flags(peer, flags_to_clear) do {                           \
77         (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__,              \
78                                              LOG_TRACE,                       \
79                                              "Peer", (peer)->uname,           \
80                                              (peer)->flags, (flags_to_clear), \
81                                              #flags_to_clear);                \
82     } while (0)
83 
84 static void update_peer_uname(crm_node_t *node, const char *uname);
85 
86 int
crm_remote_peer_cache_size(void)87 crm_remote_peer_cache_size(void)
88 {
89     if (crm_remote_peer_cache == NULL) {
90         return 0;
91     }
92     return g_hash_table_size(crm_remote_peer_cache);
93 }
94 
95 /*!
96  * \brief Get a remote node peer cache entry, creating it if necessary
97  *
98  * \param[in] node_name  Name of remote node
99  *
100  * \return Cache entry for node on success, NULL (and set errno) otherwise
101  *
102  * \note When creating a new entry, this will leave the node state undetermined,
103  *       so the caller should also call pcmk__update_peer_state() if the state
104  *       is known.
105  */
106 crm_node_t *
crm_remote_peer_get(const char * node_name)107 crm_remote_peer_get(const char *node_name)
108 {
109     crm_node_t *node;
110 
111     if (node_name == NULL) {
112         errno = -EINVAL;
113         return NULL;
114     }
115 
116     /* Return existing cache entry if one exists */
117     node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
118     if (node) {
119         return node;
120     }
121 
122     /* Allocate a new entry */
123     node = calloc(1, sizeof(crm_node_t));
124     if (node == NULL) {
125         return NULL;
126     }
127 
128     /* Populate the essential information */
129     set_peer_flags(node, crm_remote_node);
130     node->uuid = strdup(node_name);
131     if (node->uuid == NULL) {
132         free(node);
133         errno = -ENOMEM;
134         return NULL;
135     }
136 
137     /* Add the new entry to the cache */
138     g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
139     crm_trace("added %s to remote cache", node_name);
140 
141     /* Update the entry's uname, ensuring peer status callbacks are called */
142     update_peer_uname(node, node_name);
143     return node;
144 }
145 
146 void
crm_remote_peer_cache_remove(const char * node_name)147 crm_remote_peer_cache_remove(const char *node_name)
148 {
149     if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
150         crm_trace("removed %s from remote peer cache", node_name);
151     }
152 }
153 
154 /*!
155  * \internal
156  * \brief Return node status based on a CIB status entry
157  *
158  * \param[in] node_state  XML of node state
159  *
160  * \return CRM_NODE_LOST if XML_NODE_IN_CLUSTER is false in node_state,
161  *         CRM_NODE_MEMBER otherwise
162  * \note Unlike most boolean XML attributes, this one defaults to true, for
163  *       backward compatibility with older controllers that don't set it.
164  */
165 static const char *
remote_state_from_cib(xmlNode * node_state)166 remote_state_from_cib(xmlNode *node_state)
167 {
168     const char *status;
169 
170     status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
171     if (status && !crm_is_true(status)) {
172         status = CRM_NODE_LOST;
173     } else {
174         status = CRM_NODE_MEMBER;
175     }
176     return status;
177 }
178 
179 /* user data for looping through remote node xpath searches */
180 struct refresh_data {
181     const char *field;  /* XML attribute to check for node name */
182     gboolean has_state; /* whether to update node state based on XML */
183 };
184 
185 /*!
186  * \internal
187  * \brief Process one pacemaker_remote node xpath search result
188  *
189  * \param[in] result     XML search result
190  * \param[in] user_data  what to look for in the XML
191  */
192 static void
remote_cache_refresh_helper(xmlNode * result,void * user_data)193 remote_cache_refresh_helper(xmlNode *result, void *user_data)
194 {
195     struct refresh_data *data = user_data;
196     const char *remote = crm_element_value(result, data->field);
197     const char *state = NULL;
198     crm_node_t *node;
199 
200     CRM_CHECK(remote != NULL, return);
201 
202     /* Determine node's state, if the result has it */
203     if (data->has_state) {
204         state = remote_state_from_cib(result);
205     }
206 
207     /* Check whether cache already has entry for node */
208     node = g_hash_table_lookup(crm_remote_peer_cache, remote);
209 
210     if (node == NULL) {
211         /* Node is not in cache, so add a new entry for it */
212         node = crm_remote_peer_get(remote);
213         CRM_ASSERT(node);
214         if (state) {
215             pcmk__update_peer_state(__func__, node, state, 0);
216         }
217 
218     } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
219         /* Node is in cache and hasn't been updated already, so mark it clean */
220         clear_peer_flags(node, crm_node_dirty);
221         if (state) {
222             pcmk__update_peer_state(__func__, node, state, 0);
223         }
224     }
225 }
226 
227 static void
mark_dirty(gpointer key,gpointer value,gpointer user_data)228 mark_dirty(gpointer key, gpointer value, gpointer user_data)
229 {
230     set_peer_flags((crm_node_t *) value, crm_node_dirty);
231 }
232 
233 static gboolean
is_dirty(gpointer key,gpointer value,gpointer user_data)234 is_dirty(gpointer key, gpointer value, gpointer user_data)
235 {
236     return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
237 }
238 
239 /*!
240  * \brief Repopulate the remote peer cache based on CIB XML
241  *
242  * \param[in] xmlNode  CIB XML to parse
243  */
244 void
crm_remote_peer_cache_refresh(xmlNode * cib)245 crm_remote_peer_cache_refresh(xmlNode *cib)
246 {
247     struct refresh_data data;
248 
249     crm_peer_init();
250 
251     /* First, we mark all existing cache entries as dirty,
252      * so that later we can remove any that weren't in the CIB.
253      * We don't empty the cache, because we need to detect changes in state.
254      */
255     g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
256 
257     /* Look for guest nodes and remote nodes in the status section */
258     data.field = "id";
259     data.has_state = TRUE;
260     crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS,
261                              remote_cache_refresh_helper, &data);
262 
263     /* Look for guest nodes and remote nodes in the configuration section,
264      * because they may have just been added and not have a status entry yet.
265      * In that case, the cached node state will be left NULL, so that the
266      * peer status callback isn't called until we're sure the node started
267      * successfully.
268      */
269     data.field = "value";
270     data.has_state = FALSE;
271     crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG,
272                              remote_cache_refresh_helper, &data);
273     data.field = "id";
274     data.has_state = FALSE;
275     crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG,
276                              remote_cache_refresh_helper, &data);
277 
278     /* Remove all old cache entries that weren't seen in the CIB */
279     g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
280 }
281 
282 gboolean
crm_is_peer_active(const crm_node_t * node)283 crm_is_peer_active(const crm_node_t * node)
284 {
285     if(node == NULL) {
286         return FALSE;
287     }
288 
289     if (pcmk_is_set(node->flags, crm_remote_node)) {
290         /* remote nodes are never considered active members. This
291          * guarantees they will never be considered for DC membership.*/
292         return FALSE;
293     }
294 #if SUPPORT_COROSYNC
295     if (is_corosync_cluster()) {
296         return crm_is_corosync_peer_active(node);
297     }
298 #endif
299     crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
300     return FALSE;
301 }
302 
303 static gboolean
crm_reap_dead_member(gpointer key,gpointer value,gpointer user_data)304 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
305 {
306     crm_node_t *node = value;
307     crm_node_t *search = user_data;
308 
309     if (search == NULL) {
310         return FALSE;
311 
312     } else if (search->id && node->id != search->id) {
313         return FALSE;
314 
315     } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
316         return FALSE;
317 
318     } else if (crm_is_peer_active(value) == FALSE) {
319         crm_info("Removing node with name %s and id %u from membership cache",
320                  (node->uname? node->uname : "unknown"), node->id);
321         return TRUE;
322     }
323     return FALSE;
324 }
325 
326 /*!
327  * \brief Remove all peer cache entries matching a node ID and/or uname
328  *
329  * \param[in] id    ID of node to remove (or 0 to ignore)
330  * \param[in] name  Uname of node to remove (or NULL to ignore)
331  *
332  * \return Number of cache entries removed
333  */
334 guint
reap_crm_member(uint32_t id,const char * name)335 reap_crm_member(uint32_t id, const char *name)
336 {
337     int matches = 0;
338     crm_node_t search;
339 
340     if (crm_peer_cache == NULL) {
341         crm_trace("Membership cache not initialized, ignoring purge request");
342         return 0;
343     }
344 
345     search.id = id;
346     search.uname = name ? strdup(name) : NULL;
347     matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
348     if(matches) {
349         crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
350                    matches, pcmk__plural_s(matches), search.id,
351                    (search.uname? " and/or uname=" : ""),
352                    (search.uname? search.uname : ""));
353 
354     } else {
355         crm_info("No peers with id=%u%s%s to purge from the membership cache",
356                  search.id, (search.uname? " and/or uname=" : ""),
357                  (search.uname? search.uname : ""));
358     }
359 
360     free(search.uname);
361     return matches;
362 }
363 
364 static void
count_peer(gpointer key,gpointer value,gpointer user_data)365 count_peer(gpointer key, gpointer value, gpointer user_data)
366 {
367     guint *count = user_data;
368     crm_node_t *node = value;
369 
370     if (crm_is_peer_active(node)) {
371         *count = *count + 1;
372     }
373 }
374 
375 guint
crm_active_peers(void)376 crm_active_peers(void)
377 {
378     guint count = 0;
379 
380     if (crm_peer_cache) {
381         g_hash_table_foreach(crm_peer_cache, count_peer, &count);
382     }
383     return count;
384 }
385 
386 static void
destroy_crm_node(gpointer data)387 destroy_crm_node(gpointer data)
388 {
389     crm_node_t *node = data;
390 
391     crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
392 
393     free(node->uname);
394     free(node->state);
395     free(node->uuid);
396     free(node->expected);
397     free(node);
398 }
399 
400 void
crm_peer_init(void)401 crm_peer_init(void)
402 {
403     if (crm_peer_cache == NULL) {
404         crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
405     }
406 
407     if (crm_remote_peer_cache == NULL) {
408         crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
409     }
410 
411     if (known_node_cache == NULL) {
412         known_node_cache = pcmk__strikey_table(free, destroy_crm_node);
413     }
414 }
415 
416 void
crm_peer_destroy(void)417 crm_peer_destroy(void)
418 {
419     if (crm_peer_cache != NULL) {
420         crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
421         g_hash_table_destroy(crm_peer_cache);
422         crm_peer_cache = NULL;
423     }
424 
425     if (crm_remote_peer_cache != NULL) {
426         crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
427         g_hash_table_destroy(crm_remote_peer_cache);
428         crm_remote_peer_cache = NULL;
429     }
430 
431     if (known_node_cache != NULL) {
432         crm_trace("Destroying known node cache with %d members",
433                   g_hash_table_size(known_node_cache));
434         g_hash_table_destroy(known_node_cache);
435         known_node_cache = NULL;
436     }
437 
438 }
439 
440 static void (*peer_status_callback)(enum crm_status_type, crm_node_t *,
441                                     const void *) = NULL;
442 
443 /*!
444  * \brief Set a client function that will be called after peer status changes
445  *
446  * \param[in] dispatch  Pointer to function to use as callback
447  *
448  * \note Previously, client callbacks were responsible for peer cache
449  *       management. This is no longer the case, and client callbacks should do
450  *       only client-specific handling. Callbacks MUST NOT add or remove entries
451  *       in the peer caches.
452  */
453 void
crm_set_status_callback(void (* dispatch)(enum crm_status_type,crm_node_t *,const void *))454 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
455 {
456     peer_status_callback = dispatch;
457 }
458 
459 /*!
460  * \brief Tell the library whether to automatically reap lost nodes
461  *
462  * If TRUE (the default), calling crm_update_peer_proc() will also update the
463  * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and pcmk__update_peer_state()
464  * will reap peers whose state changes to anything other than CRM_NODE_MEMBER.
465  * Callers should leave this enabled unless they plan to manage the cache
466  * separately on their own.
467  *
468  * \param[in] autoreap  TRUE to enable automatic reaping, FALSE to disable
469  */
470 void
crm_set_autoreap(gboolean autoreap)471 crm_set_autoreap(gboolean autoreap)
472 {
473     crm_autoreap = autoreap;
474 }
475 
476 static void
dump_peer_hash(int level,const char * caller)477 dump_peer_hash(int level, const char *caller)
478 {
479     GHashTableIter iter;
480     const char *id = NULL;
481     crm_node_t *node = NULL;
482 
483     g_hash_table_iter_init(&iter, crm_peer_cache);
484     while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
485         do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
486     }
487 }
488 
489 static gboolean
hash_find_by_data(gpointer key,gpointer value,gpointer user_data)490 hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
491 {
492     return value == user_data;
493 }
494 
495 /*!
496  * \internal
497  * \brief Search caches for a node (cluster or Pacemaker Remote)
498  *
499  * \param[in] id     If not 0, cluster node ID to search for
500  * \param[in] uname  If not NULL, node name to search for
501  * \param[in] flags  Bitmask of enum crm_get_peer_flags
502  *
503  * \return Node cache entry if found, otherwise NULL
504  */
505 crm_node_t *
pcmk__search_node_caches(unsigned int id,const char * uname,uint32_t flags)506 pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
507 {
508     crm_node_t *node = NULL;
509 
510     CRM_ASSERT(id > 0 || uname != NULL);
511 
512     crm_peer_init();
513 
514     if ((uname != NULL) && pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) {
515         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
516     }
517 
518     if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) {
519         node = pcmk__search_cluster_node_cache(id, uname);
520     }
521     return node;
522 }
523 
524 /*!
525  * \brief Get a node cache entry (cluster or Pacemaker Remote)
526  *
527  * \param[in] id     If not 0, cluster node ID to search for
528  * \param[in] uname  If not NULL, node name to search for
529  * \param[in] flags  Bitmask of enum crm_get_peer_flags
530  *
531  * \return (Possibly newly created) node cache entry
532  */
533 crm_node_t *
crm_get_peer_full(unsigned int id,const char * uname,int flags)534 crm_get_peer_full(unsigned int id, const char *uname, int flags)
535 {
536     crm_node_t *node = NULL;
537 
538     CRM_ASSERT(id > 0 || uname != NULL);
539 
540     crm_peer_init();
541 
542     if (pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) {
543         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
544     }
545 
546     if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) {
547         node = crm_get_peer(id, uname);
548     }
549     return node;
550 }
551 
552 /*!
553  * \internal
554  * \brief Search cluster node cache
555  *
556  * \param[in] id     If not 0, cluster node ID to search for
557  * \param[in] uname  If not NULL, node name to search for
558  *
559  * \return Cluster node cache entry if found, otherwise NULL
560  */
561 crm_node_t *
pcmk__search_cluster_node_cache(unsigned int id,const char * uname)562 pcmk__search_cluster_node_cache(unsigned int id, const char *uname)
563 {
564     GHashTableIter iter;
565     crm_node_t *node = NULL;
566     crm_node_t *by_id = NULL;
567     crm_node_t *by_name = NULL;
568 
569     CRM_ASSERT(id > 0 || uname != NULL);
570 
571     crm_peer_init();
572 
573     if (uname != NULL) {
574         g_hash_table_iter_init(&iter, crm_peer_cache);
575         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
576             if(node->uname && strcasecmp(node->uname, uname) == 0) {
577                 crm_trace("Name match: %s = %p", node->uname, node);
578                 by_name = node;
579                 break;
580             }
581         }
582     }
583 
584     if (id > 0) {
585         g_hash_table_iter_init(&iter, crm_peer_cache);
586         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
587             if(node->id == id) {
588                 crm_trace("ID match: %u = %p", node->id, node);
589                 by_id = node;
590                 break;
591             }
592         }
593     }
594 
595     node = by_id; /* Good default */
596     if(by_id == by_name) {
597         /* Nothing to do if they match (both NULL counts) */
598         crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
599 
600     } else if(by_id == NULL && by_name) {
601         crm_trace("Only one: %p for %u/%s", by_name, id, uname);
602 
603         if(id && by_name->id) {
604             dump_peer_hash(LOG_WARNING, __func__);
605             crm_crit("Node %u and %u share the same name '%s'",
606                      id, by_name->id, uname);
607             node = NULL; /* Create a new one */
608 
609         } else {
610             node = by_name;
611         }
612 
613     } else if(by_name == NULL && by_id) {
614         crm_trace("Only one: %p for %u/%s", by_id, id, uname);
615 
616         if(uname && by_id->uname) {
617             dump_peer_hash(LOG_WARNING, __func__);
618             crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
619                      uname, by_id->uname, id, uname);
620         }
621 
622     } else if(uname && by_id->uname) {
623         if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
624             crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
625             g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
626 
627         } else {
628             crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
629             dump_peer_hash(LOG_INFO, __func__);
630             crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
631                       TRUE);
632         }
633 
634     } else if(id && by_name->id) {
635         crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
636 
637     } else {
638         /* Simple merge */
639 
640         /* Only corosync-based clusters use node IDs. The functions that call
641          * pcmk__update_peer_state() and crm_update_peer_proc() only know
642          * nodeid, so 'by_id' is authoritative when merging.
643          */
644         dump_peer_hash(LOG_DEBUG, __func__);
645 
646         crm_info("Merging %p into %p", by_name, by_id);
647         g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
648     }
649 
650     return node;
651 }
652 
653 #if SUPPORT_COROSYNC
654 static guint
remove_conflicting_peer(crm_node_t * node)655 remove_conflicting_peer(crm_node_t *node)
656 {
657     int matches = 0;
658     GHashTableIter iter;
659     crm_node_t *existing_node = NULL;
660 
661     if (node->id == 0 || node->uname == NULL) {
662         return 0;
663     }
664 
665     if (!pcmk__corosync_has_nodelist()) {
666         return 0;
667     }
668 
669     g_hash_table_iter_init(&iter, crm_peer_cache);
670     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
671         if (existing_node->id > 0
672             && existing_node->id != node->id
673             && existing_node->uname != NULL
674             && strcasecmp(existing_node->uname, node->uname) == 0) {
675 
676             if (crm_is_peer_active(existing_node)) {
677                 continue;
678             }
679 
680             crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
681                      existing_node->id, existing_node->uname, node->id);
682 
683             g_hash_table_iter_remove(&iter);
684             matches++;
685         }
686     }
687 
688     return matches;
689 }
690 #endif
691 
692 /*!
693  * \brief Get a cluster node cache entry
694  *
695  * \param[in] id     If not 0, cluster node ID to search for
696  * \param[in] uname  If not NULL, node name to search for
697  *
698  * \return (Possibly newly created) cluster node cache entry
699  */
700 /* coverity[-alloc] Memory is referenced in one or both hashtables */
701 crm_node_t *
crm_get_peer(unsigned int id,const char * uname)702 crm_get_peer(unsigned int id, const char *uname)
703 {
704     crm_node_t *node = NULL;
705     char *uname_lookup = NULL;
706 
707     CRM_ASSERT(id > 0 || uname != NULL);
708 
709     crm_peer_init();
710 
711     node = pcmk__search_cluster_node_cache(id, uname);
712 
713     /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
714      * we need to do a lookup of the node name using the id in the cluster membership. */
715     if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
716         uname_lookup = get_node_name(id);
717     }
718 
719     if (uname_lookup) {
720         uname = uname_lookup;
721         crm_trace("Inferred a name of '%s' for node %u", uname, id);
722 
723         /* try to turn up the node one more time now that we know the uname. */
724         if (node == NULL) {
725             node = pcmk__search_cluster_node_cache(id, uname);
726         }
727     }
728 
729 
730     if (node == NULL) {
731         char *uniqueid = crm_generate_uuid();
732 
733         node = calloc(1, sizeof(crm_node_t));
734         CRM_ASSERT(node);
735 
736         crm_info("Created entry %s/%p for node %s/%u (%d total)",
737                  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
738         g_hash_table_replace(crm_peer_cache, uniqueid, node);
739     }
740 
741     if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
742         crm_info("Node %u is now known as %s", id, uname);
743     }
744 
745     if(id > 0 && node->id == 0) {
746         node->id = id;
747     }
748 
749     if (uname && (node->uname == NULL)) {
750         update_peer_uname(node, uname);
751     }
752 
753     if(node->uuid == NULL) {
754         const char *uuid = crm_peer_uuid(node);
755 
756         if (uuid) {
757             crm_info("Node %u has uuid %s", id, uuid);
758 
759         } else {
760             crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
761         }
762     }
763 
764     free(uname_lookup);
765 
766     return node;
767 }
768 
769 /*!
770  * \internal
771  * \brief Update a node's uname
772  *
773  * \param[in] node        Node object to update
774  * \param[in] uname       New name to set
775  *
776  * \note This function should not be called within a peer cache iteration,
777  *       because in some cases it can remove conflicting cache entries,
778  *       which would invalidate the iterator.
779  */
780 static void
update_peer_uname(crm_node_t * node,const char * uname)781 update_peer_uname(crm_node_t *node, const char *uname)
782 {
783     CRM_CHECK(uname != NULL,
784               crm_err("Bug: can't update node name without name"); return);
785     CRM_CHECK(node != NULL,
786               crm_err("Bug: can't update node name to %s without node", uname);
787               return);
788 
789     if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
790         crm_debug("Node uname '%s' did not change", uname);
791         return;
792     }
793 
794     for (const char *c = uname; *c; ++c) {
795         if ((*c >= 'A') && (*c <= 'Z')) {
796             crm_warn("Node names with capitals are discouraged, consider changing '%s'",
797                      uname);
798             break;
799         }
800     }
801 
802     free(node->uname);
803     node->uname = strdup(uname);
804     CRM_ASSERT(node->uname != NULL);
805 
806     if (peer_status_callback != NULL) {
807         peer_status_callback(crm_status_uname, node, NULL);
808     }
809 
810 #if SUPPORT_COROSYNC
811     if (is_corosync_cluster() && !pcmk_is_set(node->flags, crm_remote_node)) {
812         remove_conflicting_peer(node);
813     }
814 #endif
815 }
816 
817 /*!
818  * \internal
819  * \brief Get log-friendly string equivalent of a process flag
820  *
821  * \param[in] proc  Process flag
822  *
823  * \return Log-friendly string equivalent of \p proc
824  */
825 static inline const char *
proc2text(enum crm_proc_flag proc)826 proc2text(enum crm_proc_flag proc)
827 {
828     const char *text = "unknown";
829 
830     switch (proc) {
831         case crm_proc_none:
832             text = "none";
833             break;
834         case crm_proc_based:
835             text = "pacemaker-based";
836             break;
837         case crm_proc_controld:
838             text = "pacemaker-controld";
839             break;
840         case crm_proc_schedulerd:
841             text = "pacemaker-schedulerd";
842             break;
843         case crm_proc_execd:
844             text = "pacemaker-execd";
845             break;
846         case crm_proc_attrd:
847             text = "pacemaker-attrd";
848             break;
849         case crm_proc_fenced:
850             text = "pacemaker-fenced";
851             break;
852         case crm_proc_cpg:
853             text = "corosync-cpg";
854             break;
855     }
856     return text;
857 }
858 
859 /*!
860  * \internal
861  * \brief Update a node's process information (and potentially state)
862  *
863  * \param[in] source      Caller's function name (for log messages)
864  * \param[in] node        Node object to update
865  * \param[in] flag        Bitmask of new process information
866  * \param[in] status      node status (online, offline, etc.)
867  *
868  * \return NULL if any node was reaped from peer caches, value of node otherwise
869  *
870  * \note If this function returns NULL, the supplied node object was likely
871  *       freed and should not be used again. This function should not be
872  *       called within a cache iteration if reaping is possible, otherwise
873  *       reaping could invalidate the iterator.
874  */
875 crm_node_t *
crm_update_peer_proc(const char * source,crm_node_t * node,uint32_t flag,const char * status)876 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
877 {
878     uint32_t last = 0;
879     gboolean changed = FALSE;
880 
881     CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
882                                     source, proc2text(flag), status);
883                             return NULL);
884 
885     /* Pacemaker doesn't spawn processes on remote nodes */
886     if (pcmk_is_set(node->flags, crm_remote_node)) {
887         return node;
888     }
889 
890     last = node->processes;
891     if (status == NULL) {
892         node->processes = flag;
893         if (node->processes != last) {
894             changed = TRUE;
895         }
896 
897     } else if (pcmk__str_eq(status, ONLINESTATUS, pcmk__str_casei)) {
898         if ((node->processes & flag) != flag) {
899             node->processes = pcmk__set_flags_as(__func__, __LINE__,
900                                                  LOG_TRACE, "Peer process",
901                                                  node->uname, node->processes,
902                                                  flag, "processes");
903             changed = TRUE;
904         }
905 
906     } else if (node->processes & flag) {
907         node->processes = pcmk__clear_flags_as(__func__, __LINE__,
908                                                LOG_TRACE, "Peer process",
909                                                node->uname, node->processes,
910                                                flag, "processes");
911         changed = TRUE;
912     }
913 
914     if (changed) {
915         if (status == NULL && flag <= crm_proc_none) {
916             crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
917                      node->id);
918         } else {
919             crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
920                      proc2text(flag), status);
921         }
922 
923         /* Call the client callback first, then update the peer state,
924          * in case the node will be reaped
925          */
926         if (peer_status_callback != NULL) {
927             peer_status_callback(crm_status_processes, node, &last);
928         }
929 
930         /* The client callback shouldn't touch the peer caches,
931          * but as a safety net, bail if the peer cache was destroyed.
932          */
933         if (crm_peer_cache == NULL) {
934             return NULL;
935         }
936 
937         if (crm_autoreap) {
938             const char *peer_state = NULL;
939 
940             if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
941                 peer_state = CRM_NODE_MEMBER;
942             } else {
943                 peer_state = CRM_NODE_LOST;
944             }
945             node = pcmk__update_peer_state(__func__, node, peer_state, 0);
946         }
947     } else {
948         crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
949                   proc2text(flag), status);
950     }
951     return node;
952 }
953 
954 /*!
955  * \internal
956  * \brief Update a cluster node cache entry's expected join state
957  *
958  * \param[in]     source    Caller's function name (for logging)
959  * \param[in,out] node      Node to update
960  * \param[in]     expected  Node's new join state
961  */
962 void
pcmk__update_peer_expected(const char * source,crm_node_t * node,const char * expected)963 pcmk__update_peer_expected(const char *source, crm_node_t *node,
964                            const char *expected)
965 {
966     char *last = NULL;
967     gboolean changed = FALSE;
968 
969     CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
970               return);
971 
972     /* Remote nodes don't participate in joins */
973     if (pcmk_is_set(node->flags, crm_remote_node)) {
974         return;
975     }
976 
977     last = node->expected;
978     if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
979         node->expected = strdup(expected);
980         changed = TRUE;
981     }
982 
983     if (changed) {
984         crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
985                  expected, last);
986         free(last);
987     } else {
988         crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
989                   node->id, expected);
990     }
991 }
992 
993 /*!
994  * \internal
995  * \brief Update a node's state and membership information
996  *
997  * \param[in] source      Caller's function name (for log messages)
998  * \param[in] node        Node object to update
999  * \param[in] state       Node's new state
1000  * \param[in] membership  Node's new membership ID
1001  * \param[in] iter        If not NULL, pointer to node's peer cache iterator
1002  *
1003  * \return NULL if any node was reaped, value of node otherwise
1004  *
1005  * \note If this function returns NULL, the supplied node object was likely
1006  *       freed and should not be used again. This function may be called from
1007  *       within a peer cache iteration if the iterator is supplied.
1008  */
1009 static crm_node_t *
update_peer_state_iter(const char * source,crm_node_t * node,const char * state,uint64_t membership,GHashTableIter * iter)1010 update_peer_state_iter(const char *source, crm_node_t *node, const char *state,
1011                        uint64_t membership, GHashTableIter *iter)
1012 {
1013     gboolean is_member;
1014 
1015     CRM_CHECK(node != NULL,
1016               crm_err("Could not set state for unknown host to %s"
1017                       CRM_XS " source=%s", state, source);
1018               return NULL);
1019 
1020     is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
1021     if (is_member) {
1022         node->when_lost = 0;
1023         if (membership) {
1024             node->last_seen = membership;
1025         }
1026     }
1027 
1028     if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1029         char *last = node->state;
1030 
1031         node->state = strdup(state);
1032         crm_notice("Node %s state is now %s " CRM_XS
1033                    " nodeid=%u previous=%s source=%s", node->uname, state,
1034                    node->id, (last? last : "unknown"), source);
1035         if (peer_status_callback != NULL) {
1036             peer_status_callback(crm_status_nstate, node, last);
1037         }
1038         free(last);
1039 
1040         if (crm_autoreap && !is_member
1041             && !pcmk_is_set(node->flags, crm_remote_node)) {
1042             /* We only autoreap from the peer cache, not the remote peer cache,
1043              * because the latter should be managed only by
1044              * crm_remote_peer_cache_refresh().
1045              */
1046             if(iter) {
1047                 crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
1048                 g_hash_table_iter_remove(iter);
1049 
1050             } else {
1051                 reap_crm_member(node->id, node->uname);
1052             }
1053             node = NULL;
1054         }
1055 
1056     } else {
1057         crm_trace("Node %s state is unchanged (%s) " CRM_XS
1058                   " nodeid=%u source=%s", node->uname, state, node->id, source);
1059     }
1060     return node;
1061 }
1062 
1063 /*!
1064  * \brief Update a node's state and membership information
1065  *
1066  * \param[in] source      Caller's function name (for log messages)
1067  * \param[in] node        Node object to update
1068  * \param[in] state       Node's new state
1069  * \param[in] membership  Node's new membership ID
1070  *
1071  * \return NULL if any node was reaped, value of node otherwise
1072  *
1073  * \note If this function returns NULL, the supplied node object was likely
1074  *       freed and should not be used again. This function should not be
1075  *       called within a cache iteration if reaping is possible,
1076  *       otherwise reaping could invalidate the iterator.
1077  */
1078 crm_node_t *
pcmk__update_peer_state(const char * source,crm_node_t * node,const char * state,uint64_t membership)1079 pcmk__update_peer_state(const char *source, crm_node_t *node,
1080                         const char *state, uint64_t membership)
1081 {
1082     return update_peer_state_iter(source, node, state, membership, NULL);
1083 }
1084 
1085 /*!
1086  * \internal
1087  * \brief Reap all nodes from cache whose membership information does not match
1088  *
1089  * \param[in] membership  Membership ID of nodes to keep
1090  */
1091 void
pcmk__reap_unseen_nodes(uint64_t membership)1092 pcmk__reap_unseen_nodes(uint64_t membership)
1093 {
1094     GHashTableIter iter;
1095     crm_node_t *node = NULL;
1096 
1097     crm_trace("Reaping unseen nodes...");
1098     g_hash_table_iter_init(&iter, crm_peer_cache);
1099     while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1100         if (node->last_seen != membership) {
1101             if (node->state) {
1102                 /*
1103                  * Calling update_peer_state_iter() allows us to
1104                  * remove the node from crm_peer_cache without
1105                  * invalidating our iterator
1106                  */
1107                 update_peer_state_iter(__func__, node, CRM_NODE_LOST,
1108                                            membership, &iter);
1109 
1110             } else {
1111                 crm_info("State of node %s[%u] is still unknown",
1112                          node->uname, node->id);
1113             }
1114         }
1115     }
1116 }
1117 
1118 static crm_node_t *
find_known_node(const char * id,const char * uname)1119 find_known_node(const char *id, const char *uname)
1120 {
1121     GHashTableIter iter;
1122     crm_node_t *node = NULL;
1123     crm_node_t *by_id = NULL;
1124     crm_node_t *by_name = NULL;
1125 
1126     if (uname) {
1127         g_hash_table_iter_init(&iter, known_node_cache);
1128         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1129             if (node->uname && strcasecmp(node->uname, uname) == 0) {
1130                 crm_trace("Name match: %s = %p", node->uname, node);
1131                 by_name = node;
1132                 break;
1133             }
1134         }
1135     }
1136 
1137     if (id) {
1138         g_hash_table_iter_init(&iter, known_node_cache);
1139         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1140             if(strcasecmp(node->uuid, id) == 0) {
1141                 crm_trace("ID match: %s= %p", id, node);
1142                 by_id = node;
1143                 break;
1144             }
1145         }
1146     }
1147 
1148     node = by_id; /* Good default */
1149     if (by_id == by_name) {
1150         /* Nothing to do if they match (both NULL counts) */
1151         crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1152 
1153     } else if (by_id == NULL && by_name) {
1154         crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1155 
1156         if (id) {
1157             node = NULL;
1158 
1159         } else {
1160             node = by_name;
1161         }
1162 
1163     } else if (by_name == NULL && by_id) {
1164         crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1165 
1166         if (uname) {
1167             node = NULL;
1168         }
1169 
1170     } else if (uname && by_id->uname
1171                && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
1172         /* Multiple nodes have the same uname in the CIB.
1173          * Return by_id. */
1174 
1175     } else if (id && by_name->uuid
1176                && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
1177         /* Multiple nodes have the same id in the CIB.
1178          * Return by_name. */
1179         node = by_name;
1180 
1181     } else {
1182         node = NULL;
1183     }
1184 
1185     if (node == NULL) {
1186         crm_debug("Couldn't find node%s%s%s%s",
1187                    id? " " : "",
1188                    id? id : "",
1189                    uname? " with name " : "",
1190                    uname? uname : "");
1191     }
1192 
1193     return node;
1194 }
1195 
1196 static void
known_node_cache_refresh_helper(xmlNode * xml_node,void * user_data)1197 known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1198 {
1199     const char *id = crm_element_value(xml_node, XML_ATTR_ID);
1200     const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME);
1201     crm_node_t * node =  NULL;
1202 
1203     CRM_CHECK(id != NULL && uname !=NULL, return);
1204     node = find_known_node(id, uname);
1205 
1206     if (node == NULL) {
1207         char *uniqueid = crm_generate_uuid();
1208 
1209         node = calloc(1, sizeof(crm_node_t));
1210         CRM_ASSERT(node != NULL);
1211 
1212         node->uname = strdup(uname);
1213         CRM_ASSERT(node->uname != NULL);
1214 
1215         node->uuid = strdup(id);
1216         CRM_ASSERT(node->uuid != NULL);
1217 
1218         g_hash_table_replace(known_node_cache, uniqueid, node);
1219 
1220     } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
1221         if (!pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
1222             free(node->uname);
1223             node->uname = strdup(uname);
1224             CRM_ASSERT(node->uname != NULL);
1225         }
1226 
1227         /* Node is in cache and hasn't been updated already, so mark it clean */
1228         clear_peer_flags(node, crm_node_dirty);
1229     }
1230 
1231 }
1232 
1233 static void
refresh_known_node_cache(xmlNode * cib)1234 refresh_known_node_cache(xmlNode *cib)
1235 {
1236     crm_peer_init();
1237 
1238     g_hash_table_foreach(known_node_cache, mark_dirty, NULL);
1239 
1240     crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG,
1241                              known_node_cache_refresh_helper, NULL);
1242 
1243     /* Remove all old cache entries that weren't seen in the CIB */
1244     g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL);
1245 }
1246 
1247 void
pcmk__refresh_node_caches_from_cib(xmlNode * cib)1248 pcmk__refresh_node_caches_from_cib(xmlNode *cib)
1249 {
1250     crm_remote_peer_cache_refresh(cib);
1251     refresh_known_node_cache(cib);
1252 }
1253 
1254 /*!
1255  * \internal
1256  * \brief Search known node cache
1257  *
1258  * \param[in] id     If not 0, cluster node ID to search for
1259  * \param[in] uname  If not NULL, node name to search for
1260  * \param[in] flags  Bitmask of enum crm_get_peer_flags
1261  *
1262  * \return Known node cache entry if found, otherwise NULL
1263  */
1264 crm_node_t *
pcmk__search_known_node_cache(unsigned int id,const char * uname,uint32_t flags)1265 pcmk__search_known_node_cache(unsigned int id, const char *uname,
1266                               uint32_t flags)
1267 {
1268     crm_node_t *node = NULL;
1269     char *id_str = NULL;
1270 
1271     CRM_ASSERT(id > 0 || uname != NULL);
1272 
1273     node = pcmk__search_node_caches(id, uname, flags);
1274 
1275     if (node || !(flags & CRM_GET_PEER_CLUSTER)) {
1276         return node;
1277     }
1278 
1279     if (id > 0) {
1280         id_str = crm_strdup_printf("%u", id);
1281     }
1282 
1283     node = find_known_node(id_str, uname);
1284 
1285     free(id_str);
1286     return node;
1287 }
1288 
1289 
1290 // Deprecated functions kept only for backward API compatibility
1291 
1292 #include <crm/cluster/compat.h>
1293 
1294 int
crm_terminate_member(int nodeid,const char * uname,void * unused)1295 crm_terminate_member(int nodeid, const char *uname, void *unused)
1296 {
1297     return stonith_api_kick(nodeid, uname, 120, TRUE);
1298 }
1299 
1300 int
crm_terminate_member_no_mainloop(int nodeid,const char * uname,int * connection)1301 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1302 {
1303     return stonith_api_kick(nodeid, uname, 120, TRUE);
1304 }
1305 
1306 // End deprecated API
1307