1 /*
2  * Copyright (c) 2016 Red Hat, Inc.
3  *
4  * All rights reserved.
5  *
6  * Author: Christine Caulfield (ccaulfie@redhat.com)
7  *
8  * This software licensed under BSD license, the text of which follows:
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * - Redistributions of source code must retain the above copyright notice,
14  *   this list of conditions and the following disclaimer.
15  * - Redistributions in binary form must reproduce the above copyright notice,
16  *   this list of conditions and the following disclaimer in the documentation
17  *   and/or other materials provided with the distribution.
18  * - Neither the name of the Red Hat, Inc. nor the names of its
19  *   contributors may be used to endorse or promote products derived from this
20  *   software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32  * THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 #include <sys/types.h>
35 #include <string.h>
36 
37 #include "qnetd-log.h"
38 #include "qnetd-cluster-list.h"
39 #include "qnetd-algo-utils.h"
40 #include "utils.h"
41 
42 /*
43  * Returns -1 if any node that is supposedly in the same cluster partition
44  * as us has a different ring_id.
45  * If this happens it simply means that qnetd does not yet have the full current view
46  * of the cluster and should wait until all of the ring_ids in this membership list match up
47  */
48 int
qnetd_algo_all_ring_ids_match(struct qnetd_client * client,const struct tlv_ring_id * ring_id)49 qnetd_algo_all_ring_ids_match(struct qnetd_client *client, const struct tlv_ring_id *ring_id)
50 {
51 	struct node_list_entry *node_info;
52  	struct qnetd_client *other_client;
53 
54 	TAILQ_FOREACH(other_client, &client->cluster->client_list, cluster_entries) {
55 		int in_our_partition = 0;
56 
57 		if (other_client == client) {
58 			continue; /* We've seen our membership list */
59 		}
60 		qnetd_log(LOG_DEBUG, "algo-util: all_ring_ids_match: seen nodeid %d (client %p) ring_id (" UTILS_PRI_RING_ID ")", other_client->node_id, other_client, other_client->last_ring_id.node_id, other_client->last_ring_id.seq);
61 
62 		/* Look down our node list and see if this client is known to us */
63 		TAILQ_FOREACH(node_info, &client->last_membership_node_list, entries) {
64 			if (node_info->node_id == other_client->node_id) {
65 				in_our_partition = 1;
66 			}
67 		}
68 
69 		if (in_our_partition == 0) {
70 			/*
71 			 * Also try to look from the other side to see if we are
72 			 * not in the other node's membership list.
73 			 * Because if so it may mean the membership lists are not equal
74 			 */
75 			TAILQ_FOREACH(node_info, &other_client->last_membership_node_list, entries) {
76 				if (node_info->node_id == client->node_id) {
77 					in_our_partition = 1;
78 				}
79 			}
80 		}
81 
82 		/*
83 		 * If the other nodes on our side of a partition have a different ring ID then
84 		 * we need to wait until they have all caught up before making a decision
85 		 */
86 		if (in_our_partition && !tlv_ring_id_eq(ring_id, &other_client->last_ring_id)) {
87 			qnetd_log(LOG_DEBUG, "algo-util: nodeid %d in our partition has different ring_id (" UTILS_PRI_RING_ID ") to us (" UTILS_PRI_RING_ID ")", other_client->node_id, other_client->last_ring_id.node_id, other_client->last_ring_id.seq, ring_id->node_id, ring_id->seq);
88 			return (-1); /* ring IDs don't match */
89 		}
90 	}
91 
92 	return (0);
93 }
94 
95 struct qnetd_algo_partition *
qnetd_algo_find_partition(partitions_list_t * partitions_list,const struct tlv_ring_id * ring_id)96 qnetd_algo_find_partition(partitions_list_t *partitions_list, const struct tlv_ring_id *ring_id)
97 {
98 	struct qnetd_algo_partition *cur_partition;
99 
100 	TAILQ_FOREACH(cur_partition, partitions_list, entries) {
101 		if (tlv_ring_id_eq(&cur_partition->ring_id, ring_id)) {
102 			return (cur_partition);
103 		}
104 	}
105 
106 	return (NULL);
107 }
108 
109 int
qnetd_algo_create_partitions(struct qnetd_client * client,partitions_list_t * partitions_list,const struct tlv_ring_id * ring_id)110 qnetd_algo_create_partitions(struct qnetd_client *client, partitions_list_t *partitions_list, const struct tlv_ring_id *ring_id)
111 {
112  	struct qnetd_client *other_client;
113 	int num_partitions = 0;
114 
115 	TAILQ_FOREACH(other_client, &client->cluster->client_list, cluster_entries) {
116 		struct qnetd_algo_partition *partition;
117 
118 		if (other_client->last_ring_id.seq == 0){
119 			continue; /* not initialised yet */
120 		}
121 		partition = qnetd_algo_find_partition(partitions_list, &other_client->last_ring_id);
122 		if (!partition) {
123 			partition = malloc(sizeof(struct qnetd_algo_partition));
124 			if (!partition) {
125 				return (-1);
126 			}
127 			partition->num_nodes = 0;
128 			partition->score = 0;
129 			memcpy(&partition->ring_id, &other_client->last_ring_id, sizeof(*ring_id));
130 			num_partitions++;
131 			TAILQ_INSERT_TAIL(partitions_list, partition, entries);
132 		}
133 		partition->num_nodes++;
134 
135 		/*
136 		 * Score is computer similar way as in the ffsplit algorithm
137 		 */
138 		partition->score++;
139 		if (other_client->last_heuristics == TLV_HEURISTICS_PASS) {
140 			partition->score++;
141 		} else if (other_client->last_heuristics == TLV_HEURISTICS_FAIL) {
142 			partition->score--;
143 		}
144 
145 	}
146 
147 	return (num_partitions);
148 }
149 
150 
151 void
qnetd_algo_free_partitions(partitions_list_t * partitions_list)152 qnetd_algo_free_partitions(partitions_list_t *partitions_list)
153 {
154 	struct qnetd_algo_partition *cur_partition;
155 	struct qnetd_algo_partition *partition_next;
156 
157 	cur_partition = TAILQ_FIRST(partitions_list);
158 
159 	while (cur_partition != NULL) {
160 		partition_next = TAILQ_NEXT(cur_partition, entries);
161 
162 		free(cur_partition);
163 
164 		cur_partition = partition_next;
165 	}
166 
167 	TAILQ_INIT(partitions_list);
168 }
169 
170 void
qnetd_algo_dump_partitions(partitions_list_t * partitions_list)171 qnetd_algo_dump_partitions(partitions_list_t *partitions_list)
172 {
173 	struct qnetd_algo_partition *partition;
174 
175 	TAILQ_FOREACH(partition, partitions_list, entries) {
176 		qnetd_log(LOG_DEBUG, "algo-util: partition (" UTILS_PRI_RING_ID ") (%p) has %d nodes",
177 			  partition->ring_id.node_id, partition->ring_id.seq, partition, partition->num_nodes);
178 	}
179 }
180