1 /*
2 * Copyright (c) 2016 Red Hat, Inc.
3 *
4 * All rights reserved.
5 *
6 * Author: Christine Caulfield (ccaulfie@redhat.com)
7 *
8 * This software licensed under BSD license, the text of which follows:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
12 *
13 * - Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * - Neither the name of the Red Hat, Inc. nor the names of its
19 * contributors may be used to endorse or promote products derived from this
20 * software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
33 */
34 #include <sys/types.h>
35 #include <string.h>
36
37 #include "qnetd-log.h"
38 #include "qnetd-cluster-list.h"
39 #include "qnetd-algo-utils.h"
40 #include "utils.h"
41
42 /*
43 * Returns -1 if any node that is supposedly in the same cluster partition
44 * as us has a different ring_id.
45 * If this happens it simply means that qnetd does not yet have the full current view
46 * of the cluster and should wait until all of the ring_ids in this membership list match up
47 */
48 int
qnetd_algo_all_ring_ids_match(struct qnetd_client * client,const struct tlv_ring_id * ring_id)49 qnetd_algo_all_ring_ids_match(struct qnetd_client *client, const struct tlv_ring_id *ring_id)
50 {
51 struct node_list_entry *node_info;
52 struct qnetd_client *other_client;
53
54 TAILQ_FOREACH(other_client, &client->cluster->client_list, cluster_entries) {
55 int in_our_partition = 0;
56
57 if (other_client == client) {
58 continue; /* We've seen our membership list */
59 }
60 qnetd_log(LOG_DEBUG, "algo-util: all_ring_ids_match: seen nodeid %d (client %p) ring_id (" UTILS_PRI_RING_ID ")", other_client->node_id, other_client, other_client->last_ring_id.node_id, other_client->last_ring_id.seq);
61
62 /* Look down our node list and see if this client is known to us */
63 TAILQ_FOREACH(node_info, &client->last_membership_node_list, entries) {
64 if (node_info->node_id == other_client->node_id) {
65 in_our_partition = 1;
66 }
67 }
68
69 if (in_our_partition == 0) {
70 /*
71 * Also try to look from the other side to see if we are
72 * not in the other node's membership list.
73 * Because if so it may mean the membership lists are not equal
74 */
75 TAILQ_FOREACH(node_info, &other_client->last_membership_node_list, entries) {
76 if (node_info->node_id == client->node_id) {
77 in_our_partition = 1;
78 }
79 }
80 }
81
82 /*
83 * If the other nodes on our side of a partition have a different ring ID then
84 * we need to wait until they have all caught up before making a decision
85 */
86 if (in_our_partition && !tlv_ring_id_eq(ring_id, &other_client->last_ring_id)) {
87 qnetd_log(LOG_DEBUG, "algo-util: nodeid %d in our partition has different ring_id (" UTILS_PRI_RING_ID ") to us (" UTILS_PRI_RING_ID ")", other_client->node_id, other_client->last_ring_id.node_id, other_client->last_ring_id.seq, ring_id->node_id, ring_id->seq);
88 return (-1); /* ring IDs don't match */
89 }
90 }
91
92 return (0);
93 }
94
95 struct qnetd_algo_partition *
qnetd_algo_find_partition(partitions_list_t * partitions_list,const struct tlv_ring_id * ring_id)96 qnetd_algo_find_partition(partitions_list_t *partitions_list, const struct tlv_ring_id *ring_id)
97 {
98 struct qnetd_algo_partition *cur_partition;
99
100 TAILQ_FOREACH(cur_partition, partitions_list, entries) {
101 if (tlv_ring_id_eq(&cur_partition->ring_id, ring_id)) {
102 return (cur_partition);
103 }
104 }
105
106 return (NULL);
107 }
108
109 int
qnetd_algo_create_partitions(struct qnetd_client * client,partitions_list_t * partitions_list,const struct tlv_ring_id * ring_id)110 qnetd_algo_create_partitions(struct qnetd_client *client, partitions_list_t *partitions_list, const struct tlv_ring_id *ring_id)
111 {
112 struct qnetd_client *other_client;
113 int num_partitions = 0;
114
115 TAILQ_FOREACH(other_client, &client->cluster->client_list, cluster_entries) {
116 struct qnetd_algo_partition *partition;
117
118 if (other_client->last_ring_id.seq == 0){
119 continue; /* not initialised yet */
120 }
121 partition = qnetd_algo_find_partition(partitions_list, &other_client->last_ring_id);
122 if (!partition) {
123 partition = malloc(sizeof(struct qnetd_algo_partition));
124 if (!partition) {
125 return (-1);
126 }
127 partition->num_nodes = 0;
128 partition->score = 0;
129 memcpy(&partition->ring_id, &other_client->last_ring_id, sizeof(*ring_id));
130 num_partitions++;
131 TAILQ_INSERT_TAIL(partitions_list, partition, entries);
132 }
133 partition->num_nodes++;
134
135 /*
136 * Score is computer similar way as in the ffsplit algorithm
137 */
138 partition->score++;
139 if (other_client->last_heuristics == TLV_HEURISTICS_PASS) {
140 partition->score++;
141 } else if (other_client->last_heuristics == TLV_HEURISTICS_FAIL) {
142 partition->score--;
143 }
144
145 }
146
147 return (num_partitions);
148 }
149
150
151 void
qnetd_algo_free_partitions(partitions_list_t * partitions_list)152 qnetd_algo_free_partitions(partitions_list_t *partitions_list)
153 {
154 struct qnetd_algo_partition *cur_partition;
155 struct qnetd_algo_partition *partition_next;
156
157 cur_partition = TAILQ_FIRST(partitions_list);
158
159 while (cur_partition != NULL) {
160 partition_next = TAILQ_NEXT(cur_partition, entries);
161
162 free(cur_partition);
163
164 cur_partition = partition_next;
165 }
166
167 TAILQ_INIT(partitions_list);
168 }
169
170 void
qnetd_algo_dump_partitions(partitions_list_t * partitions_list)171 qnetd_algo_dump_partitions(partitions_list_t *partitions_list)
172 {
173 struct qnetd_algo_partition *partition;
174
175 TAILQ_FOREACH(partition, partitions_list, entries) {
176 qnetd_log(LOG_DEBUG, "algo-util: partition (" UTILS_PRI_RING_ID ") (%p) has %d nodes",
177 partition->ring_id.node_id, partition->ring_id.seq, partition, partition->num_nodes);
178 }
179 }
180