1 /*
2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include <crm_internal.h>
19 
20 #ifndef _GNU_SOURCE
21 #  define _GNU_SOURCE
22 #endif
23 
24 #include <sys/param.h>
25 #include <sys/types.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <string.h>
29 #include <glib.h>
30 #include <crm/common/ipc.h>
31 #include <crm/cluster/internal.h>
32 #include <crm/msg_xml.h>
33 #include <crm/stonith-ng.h>
34 
35 #define s_if_plural(i) (((i) == 1)? "" : "s")
36 
37 /* The peer cache remembers cluster nodes that have been seen.
38  * This is managed mostly automatically by libcluster, based on
39  * cluster membership events.
40  *
41  * Because cluster nodes can have conflicting names or UUIDs,
42  * the hash table key is a uniquely generated ID.
43  */
44 GHashTable *crm_peer_cache = NULL;
45 
46 /*
47  * The remote peer cache tracks pacemaker_remote nodes. While the
48  * value has the same type as the peer cache's, it is tracked separately for
49  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
50  * so the name (which is also the UUID) is used as the hash table key; there
51  * is no equivalent of membership events, so management is not automatic; and
52  * most users of the peer cache need to exclude pacemaker_remote nodes.
53  *
54  * That said, using a single cache would be more logical and less error-prone,
55  * so it would be a good idea to merge them one day.
56  *
57  * libcluster provides two avenues for populating the cache:
58  * crm_remote_peer_get(), crm_remote_peer_cache_add() and
59  * crm_remote_peer_cache_remove() directly manage it,
60  * while crm_remote_peer_cache_refresh() populates it via the CIB.
61  */
62 GHashTable *crm_remote_peer_cache = NULL;
63 
64 GHashTable *crm_known_peer_cache = NULL;
65 
66 unsigned long long crm_peer_seq = 0;
67 gboolean crm_have_quorum = FALSE;
68 static gboolean crm_autoreap  = TRUE;
69 
70 int
crm_remote_peer_cache_size(void)71 crm_remote_peer_cache_size(void)
72 {
73     if (crm_remote_peer_cache == NULL) {
74         return 0;
75     }
76     return g_hash_table_size(crm_remote_peer_cache);
77 }
78 
79 /*!
80  * \brief Get a remote node peer cache entry, creating it if necessary
81  *
82  * \param[in] node_name  Name of remote node
83  *
84  * \return Cache entry for node on success, NULL (and set errno) otherwise
85  *
86  * \note When creating a new entry, this will leave the node state undetermined,
87  *       so the caller should also call crm_update_peer_state() if the state is
88  *       known.
89  */
90 crm_node_t *
crm_remote_peer_get(const char * node_name)91 crm_remote_peer_get(const char *node_name)
92 {
93     crm_node_t *node;
94 
95     if (node_name == NULL) {
96         errno = -EINVAL;
97         return NULL;
98     }
99 
100     /* Return existing cache entry if one exists */
101     node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
102     if (node) {
103         return node;
104     }
105 
106     /* Allocate a new entry */
107     node = calloc(1, sizeof(crm_node_t));
108     if (node == NULL) {
109         return NULL;
110     }
111 
112     /* Populate the essential information */
113     node->flags = crm_remote_node;
114     node->uuid = strdup(node_name);
115     if (node->uuid == NULL) {
116         free(node);
117         errno = -ENOMEM;
118         return NULL;
119     }
120 
121     /* Add the new entry to the cache */
122     g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
123     crm_trace("added %s to remote cache", node_name);
124 
125     /* Update the entry's uname, ensuring peer status callbacks are called */
126     crm_update_peer_uname(node, node_name);
127     return node;
128 }
129 
130 /*!
131  * \brief Add a node to the remote peer cache
132  *
133  * \param[in] node_name  Name of remote node
134  *
135  * \note This is a legacy convenience wrapper for crm_remote_peer_get()
136  *       for callers that don't need the cache entry returned.
137  */
138 void
crm_remote_peer_cache_add(const char * node_name)139 crm_remote_peer_cache_add(const char *node_name)
140 {
141     CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
142 }
143 
144 void
crm_remote_peer_cache_remove(const char * node_name)145 crm_remote_peer_cache_remove(const char *node_name)
146 {
147     if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
148         crm_trace("removed %s from remote peer cache", node_name);
149     }
150 }
151 
152 /*!
153  * \internal
154  * \brief Return node status based on a CIB status entry
155  *
156  * \param[in] node_state  XML of node state
157  *
158  * \return CRM_NODE_LOST if XML_NODE_IN_CLUSTER is false in node_state,
159  *         CRM_NODE_MEMBER otherwise
160  * \note Unlike most boolean XML attributes, this one defaults to true, for
161  *       backward compatibility with older crmd versions that don't set it.
162  */
163 static const char *
remote_state_from_cib(xmlNode * node_state)164 remote_state_from_cib(xmlNode *node_state)
165 {
166     const char *status;
167 
168     status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
169     if (status && !crm_is_true(status)) {
170         status = CRM_NODE_LOST;
171     } else {
172         status = CRM_NODE_MEMBER;
173     }
174     return status;
175 }
176 
177 /* user data for looping through remote node xpath searches */
178 struct refresh_data {
179     const char *field;  /* XML attribute to check for node name */
180     gboolean has_state; /* whether to update node state based on XML */
181 };
182 
183 /*!
184  * \internal
185  * \brief Process one pacemaker_remote node xpath search result
186  *
187  * \param[in] result     XML search result
188  * \param[in] user_data  what to look for in the XML
189  */
190 static void
remote_cache_refresh_helper(xmlNode * result,void * user_data)191 remote_cache_refresh_helper(xmlNode *result, void *user_data)
192 {
193     struct refresh_data *data = user_data;
194     const char *remote = crm_element_value(result, data->field);
195     const char *state = NULL;
196     crm_node_t *node;
197 
198     CRM_CHECK(remote != NULL, return);
199 
200     /* Determine node's state, if the result has it */
201     if (data->has_state) {
202         state = remote_state_from_cib(result);
203     }
204 
205     /* Check whether cache already has entry for node */
206     node = g_hash_table_lookup(crm_remote_peer_cache, remote);
207 
208     if (node == NULL) {
209         /* Node is not in cache, so add a new entry for it */
210         node = crm_remote_peer_get(remote);
211         CRM_ASSERT(node);
212         if (state) {
213             crm_update_peer_state(__FUNCTION__, node, state, 0);
214         }
215 
216     } else if (is_set(node->flags, crm_node_dirty)) {
217         /* Node is in cache and hasn't been updated already, so mark it clean */
218         clear_bit(node->flags, crm_node_dirty);
219         if (state) {
220             crm_update_peer_state(__FUNCTION__, node, state, 0);
221         }
222     }
223 }
224 
225 static void
mark_dirty(gpointer key,gpointer value,gpointer user_data)226 mark_dirty(gpointer key, gpointer value, gpointer user_data)
227 {
228     set_bit(((crm_node_t*)value)->flags, crm_node_dirty);
229 }
230 
231 static gboolean
is_dirty(gpointer key,gpointer value,gpointer user_data)232 is_dirty(gpointer key, gpointer value, gpointer user_data)
233 {
234     return is_set(((crm_node_t*)value)->flags, crm_node_dirty);
235 }
236 
237 /* search string to find CIB resources entries for guest nodes */
238 #define XPATH_GUEST_NODE_CONFIG \
239     "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
240     "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
241     "[@name='" XML_RSC_ATTR_REMOTE_NODE "']"
242 
243 /* search string to find CIB resources entries for remote nodes */
244 #define XPATH_REMOTE_NODE_CONFIG \
245     "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
246     "[@type='remote'][@provider='pacemaker']"
247 
248 /* search string to find CIB node status entries for pacemaker_remote nodes */
249 #define XPATH_REMOTE_NODE_STATUS \
250     "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
251     "[@" XML_NODE_IS_REMOTE "='true']"
252 
253 /*!
254  * \brief Repopulate the remote peer cache based on CIB XML
255  *
256  * \param[in] xmlNode  CIB XML to parse
257  */
258 void
crm_remote_peer_cache_refresh(xmlNode * cib)259 crm_remote_peer_cache_refresh(xmlNode *cib)
260 {
261     struct refresh_data data;
262 
263     crm_peer_init();
264 
265     /* First, we mark all existing cache entries as dirty,
266      * so that later we can remove any that weren't in the CIB.
267      * We don't empty the cache, because we need to detect changes in state.
268      */
269     g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
270 
271     /* Look for guest nodes and remote nodes in the status section */
272     data.field = "id";
273     data.has_state = TRUE;
274     crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_STATUS,
275                              remote_cache_refresh_helper, &data);
276 
277     /* Look for guest nodes and remote nodes in the configuration section,
278      * because they may have just been added and not have a status entry yet.
279      * In that case, the cached node state will be left NULL, so that the
280      * peer status callback isn't called until we're sure the node started
281      * successfully.
282      */
283     data.field = "value";
284     data.has_state = FALSE;
285     crm_foreach_xpath_result(cib, XPATH_GUEST_NODE_CONFIG,
286                              remote_cache_refresh_helper, &data);
287     data.field = "id";
288     data.has_state = FALSE;
289     crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_CONFIG,
290                              remote_cache_refresh_helper, &data);
291 
292     /* Remove all old cache entries that weren't seen in the CIB */
293     g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
294 }
295 
296 gboolean
crm_is_peer_active(const crm_node_t * node)297 crm_is_peer_active(const crm_node_t * node)
298 {
299     if(node == NULL) {
300         return FALSE;
301     }
302 
303     if (is_set(node->flags, crm_remote_node)) {
304         /* remote nodes are never considered active members. This
305          * guarantees they will never be considered for DC membership.*/
306         return FALSE;
307     }
308 #if SUPPORT_COROSYNC
309     if (is_openais_cluster()) {
310         return crm_is_corosync_peer_active(node);
311     }
312 #endif
313 #if SUPPORT_HEARTBEAT
314     if (is_heartbeat_cluster()) {
315         return crm_is_heartbeat_peer_active(node);
316     }
317 #endif
318     crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
319     return FALSE;
320 }
321 
322 static gboolean
crm_reap_dead_member(gpointer key,gpointer value,gpointer user_data)323 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
324 {
325     crm_node_t *node = value;
326     crm_node_t *search = user_data;
327 
328     if (search == NULL) {
329         return FALSE;
330 
331     } else if (search->id && node->id != search->id) {
332         return FALSE;
333 
334     } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
335         return FALSE;
336 
337     } else if (crm_is_peer_active(value) == FALSE) {
338         crm_info("Removing node with name %s and id %u from membership cache",
339                  (node->uname? node->uname : "unknown"), node->id);
340         return TRUE;
341     }
342     return FALSE;
343 }
344 
345 /*!
346  * \brief Remove all peer cache entries matching a node ID and/or uname
347  *
348  * \param[in] id    ID of node to remove (or 0 to ignore)
349  * \param[in] name  Uname of node to remove (or NULL to ignore)
350  *
351  * \return Number of cache entries removed
352  */
353 guint
reap_crm_member(uint32_t id,const char * name)354 reap_crm_member(uint32_t id, const char *name)
355 {
356     int matches = 0;
357     crm_node_t search;
358 
359     if (crm_peer_cache == NULL) {
360         crm_trace("Membership cache not initialized, ignoring purge request");
361         return 0;
362     }
363 
364     search.id = id;
365     search.uname = name ? strdup(name) : NULL;
366     matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
367     if(matches) {
368         crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
369                    matches, s_if_plural(matches), search.id,
370                    (search.uname? " and/or uname=" : ""),
371                    (search.uname? search.uname : ""));
372 
373     } else {
374         crm_info("No peers with id=%u%s%s to purge from the membership cache",
375                  search.id, (search.uname? " and/or uname=" : ""),
376                  (search.uname? search.uname : ""));
377     }
378 
379     free(search.uname);
380     return matches;
381 }
382 
383 static void
crm_count_peer(gpointer key,gpointer value,gpointer user_data)384 crm_count_peer(gpointer key, gpointer value, gpointer user_data)
385 {
386     guint *count = user_data;
387     crm_node_t *node = value;
388 
389     if (crm_is_peer_active(node)) {
390         *count = *count + 1;
391     }
392 }
393 
394 guint
crm_active_peers(void)395 crm_active_peers(void)
396 {
397     guint count = 0;
398 
399     if (crm_peer_cache) {
400         g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
401     }
402     return count;
403 }
404 
405 static void
destroy_crm_node(gpointer data)406 destroy_crm_node(gpointer data)
407 {
408     crm_node_t *node = data;
409 
410     crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
411 
412     free(node->addr);
413     free(node->uname);
414     free(node->state);
415     free(node->uuid);
416     free(node->expected);
417     free(node);
418 }
419 
420 void
crm_peer_init(void)421 crm_peer_init(void)
422 {
423     if (crm_peer_cache == NULL) {
424         crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
425     }
426 
427     if (crm_remote_peer_cache == NULL) {
428         crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
429     }
430 
431     if (crm_known_peer_cache == NULL) {
432         crm_known_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
433     }
434 }
435 
436 void
crm_peer_destroy(void)437 crm_peer_destroy(void)
438 {
439     if (crm_peer_cache != NULL) {
440         crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
441         g_hash_table_destroy(crm_peer_cache);
442         crm_peer_cache = NULL;
443     }
444 
445     if (crm_remote_peer_cache != NULL) {
446         crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
447         g_hash_table_destroy(crm_remote_peer_cache);
448         crm_remote_peer_cache = NULL;
449     }
450 
451     if (crm_known_peer_cache != NULL) {
452         crm_trace("Destroying known peer cache with %d members", g_hash_table_size(crm_known_peer_cache));
453         g_hash_table_destroy(crm_known_peer_cache);
454         crm_known_peer_cache = NULL;
455     }
456 
457 }
458 
459 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
460 
461 /*!
462  * \brief Set a client function that will be called after peer status changes
463  *
464  * \param[in] dispatch  Pointer to function to use as callback
465  *
466  * \note Previously, client callbacks were responsible for peer cache
467  *       management. This is no longer the case, and client callbacks should do
468  *       only client-specific handling. Callbacks MUST NOT add or remove entries
469  *       in the peer caches.
470  */
471 void
crm_set_status_callback(void (* dispatch)(enum crm_status_type,crm_node_t *,const void *))472 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
473 {
474     crm_status_callback = dispatch;
475 }
476 
477 /*!
478  * \brief Tell the library whether to automatically reap lost nodes
479  *
480  * If TRUE (the default), calling crm_update_peer_proc() will also update the
481  * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and crm_update_peer_state()
482  * will reap peers whose state changes to anything other than CRM_NODE_MEMBER.
483  * Callers should leave this enabled unless they plan to manage the cache
484  * separately on their own.
485  *
486  * \param[in] autoreap  TRUE to enable automatic reaping, FALSE to disable
487  */
488 void
crm_set_autoreap(gboolean autoreap)489 crm_set_autoreap(gboolean autoreap)
490 {
491     crm_autoreap = autoreap;
492 }
493 
crm_dump_peer_hash(int level,const char * caller)494 static void crm_dump_peer_hash(int level, const char *caller)
495 {
496     GHashTableIter iter;
497     const char *id = NULL;
498     crm_node_t *node = NULL;
499 
500     g_hash_table_iter_init(&iter, crm_peer_cache);
501     while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
502         do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
503     }
504 }
505 
crm_hash_find_by_data(gpointer key,gpointer value,gpointer user_data)506 static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
507 {
508     if(value == user_data) {
509         return TRUE;
510     }
511     return FALSE;
512 }
513 
514 crm_node_t *
crm_find_peer_full(unsigned int id,const char * uname,int flags)515 crm_find_peer_full(unsigned int id, const char *uname, int flags)
516 {
517     crm_node_t *node = NULL;
518 
519     CRM_ASSERT(id > 0 || uname != NULL);
520 
521     crm_peer_init();
522 
523     if ((uname != NULL) && (flags & CRM_GET_PEER_REMOTE)) {
524         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
525     }
526 
527     if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
528         node = crm_find_peer(id, uname);
529     }
530     return node;
531 }
532 
533 crm_node_t *
crm_get_peer_full(unsigned int id,const char * uname,int flags)534 crm_get_peer_full(unsigned int id, const char *uname, int flags)
535 {
536     crm_node_t *node = NULL;
537 
538     CRM_ASSERT(id > 0 || uname != NULL);
539 
540     crm_peer_init();
541 
542     if (flags & CRM_GET_PEER_REMOTE) {
543         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
544     }
545 
546     if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
547         node = crm_get_peer(id, uname);
548     }
549     return node;
550 }
551 
552 crm_node_t *
crm_find_peer(unsigned int id,const char * uname)553 crm_find_peer(unsigned int id, const char *uname)
554 {
555     GHashTableIter iter;
556     crm_node_t *node = NULL;
557     crm_node_t *by_id = NULL;
558     crm_node_t *by_name = NULL;
559 
560     CRM_ASSERT(id > 0 || uname != NULL);
561 
562     crm_peer_init();
563 
564     if (uname != NULL) {
565         g_hash_table_iter_init(&iter, crm_peer_cache);
566         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
567             if(node->uname && strcasecmp(node->uname, uname) == 0) {
568                 crm_trace("Name match: %s = %p", node->uname, node);
569                 by_name = node;
570                 break;
571             }
572         }
573     }
574 
575     if (id > 0) {
576         g_hash_table_iter_init(&iter, crm_peer_cache);
577         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
578             if(node->id == id) {
579                 crm_trace("ID match: %u = %p", node->id, node);
580                 by_id = node;
581                 break;
582             }
583         }
584     }
585 
586     node = by_id; /* Good default */
587     if(by_id == by_name) {
588         /* Nothing to do if they match (both NULL counts) */
589         crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
590 
591     } else if(by_id == NULL && by_name) {
592         crm_trace("Only one: %p for %u/%s", by_name, id, uname);
593 
594         if(id && by_name->id) {
595             crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
596             crm_crit("Node %u and %u share the same name '%s'",
597                      id, by_name->id, uname);
598             node = NULL; /* Create a new one */
599 
600         } else {
601             node = by_name;
602         }
603 
604     } else if(by_name == NULL && by_id) {
605         crm_trace("Only one: %p for %u/%s", by_id, id, uname);
606 
607         if(uname && by_id->uname) {
608             crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
609             crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
610                      uname, by_id->uname, id, uname);
611         }
612 
613     } else if(uname && by_id->uname) {
614         if(safe_str_eq(uname, by_id->uname)) {
615             crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
616             g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
617 
618         } else {
619             crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
620             crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
621             crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
622         }
623 
624     } else if(id && by_name->id) {
625         crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
626 
627     } else {
628         /* Simple merge */
629 
630         /* Only corosync based clusters use nodeid's
631          *
632          * The functions that call crm_update_peer_state() only know nodeid
633          * so 'by_id' is authorative when merging
634          *
635          * Same for crm_update_peer_proc()
636          */
637         crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);
638 
639         crm_info("Merging %p into %p", by_name, by_id);
640         g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
641     }
642 
643     return node;
644 }
645 
646 #if SUPPORT_COROSYNC
647 static guint
crm_remove_conflicting_peer(crm_node_t * node)648 crm_remove_conflicting_peer(crm_node_t *node)
649 {
650     int matches = 0;
651     GHashTableIter iter;
652     crm_node_t *existing_node = NULL;
653 
654     if (node->id == 0 || node->uname == NULL) {
655         return 0;
656     }
657 
658 #  if !SUPPORT_PLUGIN
659     if (corosync_cmap_has_config("nodelist") != 0) {
660         return 0;
661     }
662 #  endif
663 
664     g_hash_table_iter_init(&iter, crm_peer_cache);
665     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
666         if (existing_node->id > 0
667             && existing_node->id != node->id
668             && existing_node->uname != NULL
669             && strcasecmp(existing_node->uname, node->uname) == 0) {
670 
671             if (crm_is_peer_active(existing_node)) {
672                 continue;
673             }
674 
675             crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
676                      existing_node->id, existing_node->uname, node->id);
677 
678             g_hash_table_iter_remove(&iter);
679             matches++;
680         }
681     }
682 
683     return matches;
684 }
685 #endif
686 
687 /* coverity[-alloc] Memory is referenced in one or both hashtables */
688 crm_node_t *
crm_get_peer(unsigned int id,const char * uname)689 crm_get_peer(unsigned int id, const char *uname)
690 {
691     crm_node_t *node = NULL;
692     char *uname_lookup = NULL;
693 
694     CRM_ASSERT(id > 0 || uname != NULL);
695 
696     crm_peer_init();
697 
698     node = crm_find_peer(id, uname);
699 
700     /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
701      * we need to do a lookup of the node name using the id in the cluster membership. */
702     if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
703         uname_lookup = get_node_name(id);
704     }
705 
706     if (uname_lookup) {
707         uname = uname_lookup;
708         crm_trace("Inferred a name of '%s' for node %u", uname, id);
709 
710         /* try to turn up the node one more time now that we know the uname. */
711         if (node == NULL) {
712             node = crm_find_peer(id, uname);
713         }
714     }
715 
716 
717     if (node == NULL) {
718         char *uniqueid = crm_generate_uuid();
719 
720         node = calloc(1, sizeof(crm_node_t));
721         CRM_ASSERT(node);
722 
723         crm_info("Created entry %s/%p for node %s/%u (%d total)",
724                  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
725         g_hash_table_replace(crm_peer_cache, uniqueid, node);
726     }
727 
728     if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
729         crm_info("Node %u is now known as %s", id, uname);
730     }
731 
732     if(id > 0 && node->id == 0) {
733         node->id = id;
734     }
735 
736     if (uname && (node->uname == NULL)) {
737         crm_update_peer_uname(node, uname);
738     }
739 
740     if(node->uuid == NULL) {
741         const char *uuid = crm_peer_uuid(node);
742 
743         if (uuid) {
744             crm_info("Node %u has uuid %s", id, uuid);
745 
746         } else {
747             crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
748         }
749     }
750 
751     free(uname_lookup);
752 
753     return node;
754 }
755 
756 /*!
757  * \internal
758  * \brief Update all of a node's information (process list, state, etc.)
759  *
760  * \param[in] source      Caller's function name (for log messages)
761  *
762  * \return NULL if node was reaped from peer caches, pointer to node otherwise
763  *
764  * \note This function should not be called within a peer cache iteration,
765  *       otherwise reaping could invalidate the iterator.
766  */
767 crm_node_t *
crm_update_peer(const char * source,unsigned int id,uint64_t born,uint64_t seen,int32_t votes,uint32_t children,const char * uuid,const char * uname,const char * addr,const char * state)768 crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes,
769                 uint32_t children, const char *uuid, const char *uname, const char *addr,
770                 const char *state)
771 {
772 #if SUPPORT_PLUGIN
773     gboolean addr_changed = FALSE;
774     gboolean votes_changed = FALSE;
775 #endif
776     crm_node_t *node = NULL;
777 
778     id = get_corosync_id(id, uuid);
779     node = crm_get_peer(id, uname);
780 
781     CRM_ASSERT(node != NULL);
782 
783     if (node->uuid == NULL) {
784         if (is_openais_cluster()) {
785             /* Yes, overrule whatever was passed in */
786             crm_peer_uuid(node);
787 
788         } else if (uuid != NULL) {
789             node->uuid = strdup(uuid);
790         }
791     }
792 
793     if (children > 0) {
794         if (crm_update_peer_proc(source, node, children, state) == NULL) {
795             return NULL;
796         }
797     }
798 
799     if (state != NULL) {
800         if (crm_update_peer_state(source, node, state, seen) == NULL) {
801             return NULL;
802         }
803     }
804 #if SUPPORT_HEARTBEAT
805     if (born != 0) {
806         node->born = born;
807     }
808 #endif
809 
810 #if SUPPORT_PLUGIN
811     /* These were only used by the plugin */
812     if (born != 0) {
813         node->born = born;
814     }
815 
816     if (votes > 0 && node->votes != votes) {
817         votes_changed = TRUE;
818         node->votes = votes;
819     }
820 
821     if (addr != NULL) {
822         if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) {
823             addr_changed = TRUE;
824             free(node->addr);
825             node->addr = strdup(addr);
826         }
827     }
828     if (addr_changed || votes_changed) {
829         crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T
830                  " proc=%.32x", source, node->uname, node->id, node->state,
831                  node->addr, addr_changed ? " (new)" : "", node->votes,
832                  votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes);
833     }
834 #endif
835 
836     return node;
837 }
838 
839 /*!
840  * \internal
841  * \brief Update a node's uname
842  *
843  * \param[in] node        Node object to update
844  * \param[in] uname       New name to set
845  *
846  * \note This function should not be called within a peer cache iteration,
847  *       because in some cases it can remove conflicting cache entries,
848  *       which would invalidate the iterator.
849  */
850 void
crm_update_peer_uname(crm_node_t * node,const char * uname)851 crm_update_peer_uname(crm_node_t *node, const char *uname)
852 {
853     CRM_CHECK(uname != NULL,
854               crm_err("Bug: can't update node name without name"); return);
855     CRM_CHECK(node != NULL,
856               crm_err("Bug: can't update node name to %s without node", uname);
857               return);
858 
859     if (safe_str_eq(uname, node->uname)) {
860         crm_debug("Node uname '%s' did not change", uname);
861         return;
862     }
863 
864     for (const char *c = uname; *c; ++c) {
865         if ((*c >= 'A') && (*c <= 'Z')) {
866             crm_warn("Node names with capitals are discouraged, consider changing '%s'",
867                      uname);
868             break;
869         }
870     }
871 
872     free(node->uname);
873     node->uname = strdup(uname);
874     CRM_ASSERT(node->uname != NULL);
875 
876     if (crm_status_callback) {
877         crm_status_callback(crm_status_uname, node, NULL);
878     }
879 
880 #if SUPPORT_COROSYNC
881     if (is_openais_cluster() && !is_set(node->flags, crm_remote_node)) {
882         crm_remove_conflicting_peer(node);
883     }
884 #endif
885 }
886 
887 /*!
888  * \internal
889  * \brief Update a node's process information (and potentially state)
890  *
891  * \param[in] source      Caller's function name (for log messages)
892  * \param[in] node        Node object to update
893  * \param[in] flag        Bitmask of new process information
894  * \param[in] status      node status (online, offline, etc.)
895  *
896  * \return NULL if any node was reaped from peer caches, value of node otherwise
897  *
898  * \note If this function returns NULL, the supplied node object was likely
899  *       freed and should not be used again. This function should not be
900  *       called within a cache iteration if reaping is possible, otherwise
901  *       reaping could invalidate the iterator.
902  */
903 crm_node_t *
crm_update_peer_proc(const char * source,crm_node_t * node,uint32_t flag,const char * status)904 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
905 {
906     uint32_t last = 0;
907     gboolean changed = FALSE;
908 
909     CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
910                                     source, peer2text(flag), status); return NULL);
911 
912     /* Pacemaker doesn't spawn processes on remote nodes */
913     if (is_set(node->flags, crm_remote_node)) {
914         return node;
915     }
916 
917     last = node->processes;
918     if (status == NULL) {
919         node->processes = flag;
920         if (node->processes != last) {
921             changed = TRUE;
922         }
923 
924     } else if (safe_str_eq(status, ONLINESTATUS)) {
925         if ((node->processes & flag) != flag) {
926             set_bit(node->processes, flag);
927             changed = TRUE;
928         }
929 #if SUPPORT_PLUGIN
930     } else if (safe_str_eq(status, CRM_NODE_MEMBER)) {
931         if (flag > 0 && node->processes != flag) {
932             node->processes = flag;
933             changed = TRUE;
934         }
935 #endif
936 
937     } else if (node->processes & flag) {
938         clear_bit(node->processes, flag);
939         changed = TRUE;
940     }
941 
942     if (changed) {
943         if (status == NULL && flag <= crm_proc_none) {
944             crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
945                      node->id);
946         } else {
947             crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
948                      peer2text(flag), status);
949         }
950 
951         /* Call the client callback first, then update the peer state,
952          * in case the node will be reaped
953          */
954         if (crm_status_callback) {
955             crm_status_callback(crm_status_processes, node, &last);
956         }
957 
958         /* The client callback shouldn't touch the peer caches,
959          * but as a safety net, bail if the peer cache was destroyed.
960          */
961         if (crm_peer_cache == NULL) {
962             return NULL;
963         }
964 
965         if (crm_autoreap) {
966             node = crm_update_peer_state(__FUNCTION__, node,
967                                          is_set(node->processes, crm_get_cluster_proc())?
968                                          CRM_NODE_MEMBER : CRM_NODE_LOST, 0);
969         }
970     } else {
971         crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
972                   peer2text(flag), status);
973     }
974     return node;
975 }
976 
977 void
crm_update_peer_expected(const char * source,crm_node_t * node,const char * expected)978 crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
979 {
980     char *last = NULL;
981     gboolean changed = FALSE;
982 
983     CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
984               return);
985 
986     /* Remote nodes don't participate in joins */
987     if (is_set(node->flags, crm_remote_node)) {
988         return;
989     }
990 
991     last = node->expected;
992     if (expected != NULL && safe_str_neq(node->expected, expected)) {
993         node->expected = strdup(expected);
994         changed = TRUE;
995     }
996 
997     if (changed) {
998         crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
999                  expected, last);
1000         free(last);
1001     } else {
1002         crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
1003                   node->id, expected);
1004     }
1005 }
1006 
1007 /*!
1008  * \internal
1009  * \brief Update a node's state and membership information
1010  *
1011  * \param[in] source      Caller's function name (for log messages)
1012  * \param[in] node        Node object to update
1013  * \param[in] state       Node's new state
1014  * \param[in] membership  Node's new membership ID
1015  * \param[in] iter        If not NULL, pointer to node's peer cache iterator
1016  *
1017  * \return NULL if any node was reaped, value of node otherwise
1018  *
1019  * \note If this function returns NULL, the supplied node object was likely
1020  *       freed and should not be used again. This function may be called from
1021  *       within a peer cache iteration if the iterator is supplied.
1022  */
1023 static crm_node_t *
crm_update_peer_state_iter(const char * source,crm_node_t * node,const char * state,uint64_t membership,GHashTableIter * iter)1024 crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, uint64_t membership, GHashTableIter *iter)
1025 {
1026     gboolean is_member;
1027 
1028     CRM_CHECK(node != NULL,
1029               crm_err("Could not set state for unknown host to %s"
1030                       CRM_XS " source=%s", state, source);
1031               return NULL);
1032 
1033     is_member = safe_str_eq(state, CRM_NODE_MEMBER);
1034     if (membership && is_member) {
1035         node->last_seen = membership;
1036     }
1037 
1038     if (state && safe_str_neq(node->state, state)) {
1039         char *last = node->state;
1040         enum crm_status_type status_type = is_set(node->flags, crm_remote_node)?
1041                                            crm_status_rstate : crm_status_nstate;
1042 
1043         node->state = strdup(state);
1044         crm_notice("Node %s state is now %s " CRM_XS
1045                    " nodeid=%u previous=%s source=%s", node->uname, state,
1046                    node->id, (last? last : "unknown"), source);
1047         if (crm_status_callback) {
1048             crm_status_callback(status_type, node, last);
1049         }
1050         free(last);
1051 
1052         if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) {
1053             /* We only autoreap from the peer cache, not the remote peer cache,
1054              * because the latter should be managed only by
1055              * crm_remote_peer_cache_refresh().
1056              */
1057             if(iter) {
1058                 crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
1059                 g_hash_table_iter_remove(iter);
1060 
1061             } else {
1062                 reap_crm_member(node->id, node->uname);
1063             }
1064             node = NULL;
1065         }
1066 
1067     } else {
1068         crm_trace("Node %s state is unchanged (%s) " CRM_XS
1069                   " nodeid=%u source=%s", node->uname, state, node->id, source);
1070     }
1071     return node;
1072 }
1073 
1074 /*!
1075  * \brief Update a node's state and membership information
1076  *
1077  * \param[in] source      Caller's function name (for log messages)
1078  * \param[in] node        Node object to update
1079  * \param[in] state       Node's new state
1080  * \param[in] membership  Node's new membership ID
1081  *
1082  * \return NULL if any node was reaped, value of node otherwise
1083  *
1084  * \note If this function returns NULL, the supplied node object was likely
1085  *       freed and should not be used again. This function should not be
1086  *       called within a cache iteration if reaping is possible,
1087  *       otherwise reaping could invalidate the iterator.
1088  */
1089 crm_node_t *
crm_update_peer_state(const char * source,crm_node_t * node,const char * state,uint64_t membership)1090 crm_update_peer_state(const char *source, crm_node_t * node, const char *state, uint64_t membership)
1091 {
1092     return crm_update_peer_state_iter(source, node, state, membership, NULL);
1093 }
1094 
1095 /*!
1096  * \internal
1097  * \brief Reap all nodes from cache whose membership information does not match
1098  *
1099  * \param[in] membership  Membership ID of nodes to keep
1100  */
1101 void
crm_reap_unseen_nodes(uint64_t membership)1102 crm_reap_unseen_nodes(uint64_t membership)
1103 {
1104     GHashTableIter iter;
1105     crm_node_t *node = NULL;
1106 
1107     crm_trace("Reaping unseen nodes...");
1108     g_hash_table_iter_init(&iter, crm_peer_cache);
1109     while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1110         if (node->last_seen != membership) {
1111             if (node->state) {
1112                 /*
1113                  * Calling crm_update_peer_state_iter() allows us to
1114                  * remove the node from crm_peer_cache without
1115                  * invalidating our iterator
1116                  */
1117                 crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter);
1118 
1119             } else {
1120                 crm_info("State of node %s[%u] is still unknown",
1121                          node->uname, node->id);
1122             }
1123         }
1124     }
1125 }
1126 
1127 int
crm_terminate_member(int nodeid,const char * uname,void * unused)1128 crm_terminate_member(int nodeid, const char *uname, void *unused)
1129 {
1130     /* Always use the synchronous, non-mainloop version */
1131     return stonith_api_kick(nodeid, uname, 120, TRUE);
1132 }
1133 
1134 int
crm_terminate_member_no_mainloop(int nodeid,const char * uname,int * connection)1135 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1136 {
1137     return stonith_api_kick(nodeid, uname, 120, TRUE);
1138 }
1139 
1140 static crm_node_t *
crm_find_known_peer(const char * id,const char * uname)1141 crm_find_known_peer(const char *id, const char *uname)
1142 {
1143     GHashTableIter iter;
1144     crm_node_t *node = NULL;
1145     crm_node_t *by_id = NULL;
1146     crm_node_t *by_name = NULL;
1147 
1148     if (uname) {
1149         g_hash_table_iter_init(&iter, crm_known_peer_cache);
1150         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1151             if (node->uname && strcasecmp(node->uname, uname) == 0) {
1152                 crm_trace("Name match: %s = %p", node->uname, node);
1153                 by_name = node;
1154                 break;
1155             }
1156         }
1157     }
1158 
1159     if (id) {
1160         g_hash_table_iter_init(&iter, crm_known_peer_cache);
1161         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1162             if(strcasecmp(node->uuid, id) == 0) {
1163                 crm_trace("ID match: %s= %p", id, node);
1164                 by_id = node;
1165                 break;
1166             }
1167         }
1168     }
1169 
1170     node = by_id; /* Good default */
1171     if (by_id == by_name) {
1172         /* Nothing to do if they match (both NULL counts) */
1173         crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1174 
1175     } else if (by_id == NULL && by_name) {
1176         crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1177 
1178         if (id) {
1179             node = NULL;
1180 
1181         } else {
1182             node = by_name;
1183         }
1184 
1185     } else if (by_name == NULL && by_id) {
1186         crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1187 
1188         if (uname) {
1189             node = NULL;
1190         }
1191 
1192     } else if (uname && by_id->uname
1193                && safe_str_eq(uname, by_id->uname)) {
1194         /* Multiple nodes have the same uname in the CIB.
1195          * Return by_id. */
1196 
1197     } else if (id && by_name->uuid
1198                && safe_str_eq(id, by_name->uuid)) {
1199         /* Multiple nodes have the same id in the CIB.
1200          * Return by_name. */
1201         node = by_name;
1202 
1203     } else {
1204         node = NULL;
1205     }
1206 
1207     if (node == NULL) {
1208         crm_debug("Couldn't find node%s%s%s%s",
1209                    id? " " : "",
1210                    id? id : "",
1211                    uname? " with name " : "",
1212                    uname? uname : "");
1213     }
1214 
1215     return node;
1216 }
1217 
1218 static void
known_peer_cache_refresh_helper(xmlNode * xml_node,void * user_data)1219 known_peer_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1220 {
1221     const char *id = crm_element_value(xml_node, XML_ATTR_ID);
1222     const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME);
1223     crm_node_t * node =  NULL;
1224 
1225     CRM_CHECK(id != NULL && uname !=NULL, return);
1226     node = crm_find_known_peer(id, uname);
1227 
1228     if (node == NULL) {
1229         char *uniqueid = crm_generate_uuid();
1230 
1231         node = calloc(1, sizeof(crm_node_t));
1232         CRM_ASSERT(node != NULL);
1233 
1234         node->uname = strdup(uname);
1235         CRM_ASSERT(node->uname != NULL);
1236 
1237         node->uuid = strdup(id);
1238         CRM_ASSERT(node->uuid != NULL);
1239 
1240         g_hash_table_replace(crm_known_peer_cache, uniqueid, node);
1241 
1242     } else if (is_set(node->flags, crm_node_dirty)) {
1243         if (safe_str_neq(uname, node->uname)) {
1244             free(node->uname);
1245             node->uname = strdup(uname);
1246             CRM_ASSERT(node->uname != NULL);
1247         }
1248 
1249         /* Node is in cache and hasn't been updated already, so mark it clean */
1250         clear_bit(node->flags, crm_node_dirty);
1251     }
1252 
1253 }
1254 
1255 #define XPATH_MEMBER_NODE_CONFIG \
1256     "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \
1257     "/" XML_CIB_TAG_NODE "[not(@type) or @type='member']"
1258 
1259 static void
crm_known_peer_cache_refresh(xmlNode * cib)1260 crm_known_peer_cache_refresh(xmlNode *cib)
1261 {
1262     crm_peer_init();
1263 
1264     g_hash_table_foreach(crm_known_peer_cache, mark_dirty, NULL);
1265 
1266     crm_foreach_xpath_result(cib, XPATH_MEMBER_NODE_CONFIG,
1267                              known_peer_cache_refresh_helper, NULL);
1268 
1269     /* Remove all old cache entries that weren't seen in the CIB */
1270     g_hash_table_foreach_remove(crm_known_peer_cache, is_dirty, NULL);
1271 }
1272 
1273 void
crm_peer_caches_refresh(xmlNode * cib)1274 crm_peer_caches_refresh(xmlNode *cib)
1275 {
1276     crm_remote_peer_cache_refresh(cib);
1277     crm_known_peer_cache_refresh(cib);
1278 }
1279 
1280 crm_node_t *
crm_find_known_peer_full(unsigned int id,const char * uname,int flags)1281 crm_find_known_peer_full(unsigned int id, const char *uname, int flags)
1282 {
1283     crm_node_t *node = NULL;
1284     char *id_str = NULL;
1285 
1286     CRM_ASSERT(id > 0 || uname != NULL);
1287 
1288     node = crm_find_peer_full(id, uname, flags);
1289 
1290     if (node || !(flags & CRM_GET_PEER_CLUSTER)) {
1291         return node;
1292     }
1293 
1294     if (id > 0) {
1295         id_str = crm_strdup_printf("%u", id);
1296     }
1297 
1298     node = crm_find_known_peer(id_str, uname);
1299 
1300     free(id_str);
1301     return node;
1302 }
1303