1 /* Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 #include "xcom/site_def.h"
24
25 #include <assert.h>
26 #include <stdlib.h>
27
28 #include "xcom/bitset.h"
29 #include "xcom/node_list.h"
30 #include "xcom/node_no.h"
31 #include "xcom/node_set.h"
32 #include "xcom/server_struct.h"
33 #include "xcom/simset.h"
34 #include "xcom/site_struct.h"
35 #include "xcom/synode_no.h"
36 #include "xcom/task.h"
37 #include "xcom/task_debug.h"
38 #include "xcom/x_platform.h"
39 #include "xcom/xcom_base.h"
40 #include "xcom/xcom_common.h"
41 #include "xcom/xcom_detector.h"
42 #include "xcom/xcom_memory.h"
43 #include "xcom/xcom_profile.h"
44 #include "xcom/xcom_transport.h"
45 #include "xdr_gen/xcom_vp.h"
46
47 typedef site_def *site_def_ptr;
48
49 struct site_def_ptr_array {
50 u_int count;
51 u_int site_def_ptr_array_len;
52 site_def_ptr *site_def_ptr_array_val;
53 };
54 typedef struct site_def_ptr_array site_def_ptr_array;
55
56 init_xdr_array(site_def_ptr) free_xdr_array(site_def_ptr)
57 set_xdr_array(site_def_ptr)
58
59 /* FIFO of site definitions */
60 static site_def_ptr_array site_defs;
61
62 static inline node_no _get_maxnodes(site_def const *site);
63
64 /* Return pointer to array of site defs */
get_all_site_defs(site_def *** s,uint32_t * n)65 void get_all_site_defs(site_def ***s, uint32_t *n) {
66 *s = site_defs.site_def_ptr_array_val;
67 *n = site_defs.site_def_ptr_array_len;
68 }
69
70 /* Module initialization */
init_site_vars()71 void init_site_vars() {
72 init_site_def_ptr_array(&site_defs);
73 site_defs.count = 0;
74 }
75
76 /* Recursively free a complete site_def. Only free the site_def, not
77 the servers that it points to, since servers are shared by multiple
78 site_defs, and will eventually be deallocated by garbage_collect_servers
79 */
free_site_def(site_def * s)80 void free_site_def(site_def *s) {
81 if (s) {
82 invalidate_detector_sites(s);
83 xdr_free((xdrproc_t)xdr_node_list, (char *)(&s->nodes));
84 free_node_set(&s->global_node_set);
85 free_node_set(&s->local_node_set);
86 free(s);
87 }
88 }
89
90 /* Free all resources in this module */
free_site_defs()91 void free_site_defs() {
92 u_int i;
93 for (i = 0; i < site_defs.count; i++) {
94 free_site_def(site_defs.site_def_ptr_array_val[i]);
95 }
96 free_site_def_ptr_array(&site_defs);
97 site_defs.count = 0;
98 }
99
100 /* Add a new site definition to the list */
push_site_def(site_def * s)101 site_def *push_site_def(site_def *s) {
102 uint32_t i;
103 set_site_def_ptr(&site_defs, 0, site_defs.count);
104 IFDBG(
105 D_NONE, FN; NDBG(site_defs.count, u); PTREXP(s); if (s) {
106 SYCEXP(s->start);
107 SYCEXP(s->boot_key);
108 });
109 for (i = site_defs.count; i > 0; i--) {
110 IFDBG(
111 D_NONE, NDBG(i - 1, d); PTREXP(site_defs.site_def_ptr_array_val[i - 1]);
112 if (site_defs.site_def_ptr_array_val[i - 1]) {
113 SYCEXP(site_defs.site_def_ptr_array_val[i - 1]->start);
114 SYCEXP(site_defs.site_def_ptr_array_val[i - 1]->boot_key);
115 });
116 site_defs.site_def_ptr_array_val[i] =
117 site_defs.site_def_ptr_array_val[i - 1];
118 }
119 set_site_def_ptr(&site_defs, s, 0);
120 if (s) {
121 s->x_proto = set_latest_common_proto(common_xcom_version(s));
122 G_DEBUG("latest common protocol is now %d", s->x_proto);
123 }
124 site_defs.count++;
125 assert(!s || (s->global_node_set.node_set_len == _get_maxnodes(s)));
126 return s;
127 }
128
129 /* Return first site def */
_get_site_def()130 static inline site_def const *_get_site_def() {
131 assert(site_defs.count == 0 || !site_defs.site_def_ptr_array_val[0] ||
132 site_defs.site_def_ptr_array_val[0]->global_node_set.node_set_len ==
133 _get_maxnodes(site_defs.site_def_ptr_array_val[0]));
134 if (site_defs.count > 0)
135 return site_defs.site_def_ptr_array_val[0];
136 else
137 return 0;
138 }
139
140 /* Return first site def */
get_site_def_rw()141 site_def *get_site_def_rw() {
142 if (site_defs.count > 0)
143 return site_defs.site_def_ptr_array_val[0];
144 else
145 return 0;
146 }
147
148 /* purecov: begin deadcode */
149 /* Return previous site def */
_get_prev_site_def()150 static inline site_def const *_get_prev_site_def() {
151 assert(site_defs.count == 0 || !site_defs.site_def_ptr_array_val[1] ||
152 site_defs.site_def_ptr_array_val[1]->global_node_set.node_set_len ==
153 _get_maxnodes(site_defs.site_def_ptr_array_val[1]));
154 if (site_defs.count > 0)
155 return site_defs.site_def_ptr_array_val[1];
156 else
157 return 0;
158 }
159 /* purecov: end */
160
161 /* Return first site def as const ptr */
get_site_def()162 site_def const *get_site_def() { return _get_site_def(); }
163
164 /* purecov: begin deadcode */
165
166 /* Return previous site def */
get_prev_site_def()167 site_def const *get_prev_site_def() { return _get_prev_site_def(); }
168
169 /* purecov: end */
170
171 /* Checks if site->start >= synode. */
match_def(site_def const * site,synode_no synode)172 static inline int match_def(site_def const *site, synode_no synode) {
173 return site &&
174 (synode.group_id == 0 || synode.group_id == site->start.group_id) &&
175 !synode_lt(synode, site->start);
176 }
177
178 /* Return first site def which has start less than or equal to synode */
find_site_def(synode_no synode)179 site_def const *find_site_def(synode_no synode) {
180 site_def const *retval = 0;
181 u_int i;
182
183 for (i = 0; i < site_defs.count; i++)
184 if (match_def(site_defs.site_def_ptr_array_val[i], synode)) {
185 retval = site_defs.site_def_ptr_array_val[i];
186 break;
187 }
188 assert(!retval ||
189 retval->global_node_set.node_set_len == _get_maxnodes(retval));
190 return retval;
191 }
192
193 /* As find_site_def, but return pointer to non-const object */
find_site_def_rw(synode_no synode)194 site_def *find_site_def_rw(synode_no synode) {
195 site_def *retval = 0;
196 u_int i;
197
198 for (i = 0; i < site_defs.count; i++)
199 if (match_def(site_defs.site_def_ptr_array_val[i], synode)) {
200 retval = site_defs.site_def_ptr_array_val[i];
201 break;
202 }
203 assert(!retval ||
204 retval->global_node_set.node_set_len == _get_maxnodes(retval));
205 return retval;
206 }
207
208 /* Checks if site->start > synode. */
start_gt(site_def const * site,synode_no synode)209 static inline int start_gt(site_def const *site, synode_no synode) {
210 return site &&
211 (synode.group_id == 0 || synode.group_id == site->start.group_id) &&
212 synode_gt(site->start, synode);
213 }
214
215 /* Retrieve the first site_def which has start greater than synode. */
find_next_site_def(synode_no synode)216 site_def const *find_next_site_def(synode_no synode) {
217 site_def const *retval = NULL;
218 u_int i;
219
220 for (i = site_defs.count; i > 0; i--)
221 if (start_gt(site_defs.site_def_ptr_array_val[i - 1], synode)) {
222 retval = site_defs.site_def_ptr_array_val[i - 1];
223 break;
224 }
225 assert(retval == NULL ||
226 retval->global_node_set.node_set_len == _get_maxnodes(retval));
227 return retval;
228 }
229
prev_def(site_def const * site,synode_no synode)230 static inline int prev_def(site_def const *site, synode_no synode) {
231 return (site &&
232 (synode.group_id == 0 || synode.group_id == site->start.group_id));
233 }
234
find_prev_site_def(synode_no synode)235 site_def const *find_prev_site_def(synode_no synode) {
236 site_def const *retval = 0;
237 u_int i;
238
239 for (i = site_defs.count; i > 0; i--)
240 if (prev_def(site_defs.site_def_ptr_array_val[i - 1], synode)) {
241 retval = site_defs.site_def_ptr_array_val[i - 1];
242 break;
243 }
244 assert(!retval ||
245 retval->global_node_set.node_set_len == _get_maxnodes(retval));
246 return retval;
247 }
248
garbage_collect_site_defs(synode_no x)249 void garbage_collect_site_defs(synode_no x) {
250 u_int i;
251 u_int s_max = site_defs.count;
252
253 IFDBG(D_NONE, FN; NDBG(site_defs.count, u); SYCEXP(x););
254 for (i = 3; i < s_max; i++) {
255 if (match_def(site_defs.site_def_ptr_array_val[i], x)) {
256 break;
257 }
258 }
259 i++;
260 for (; i < s_max; i++) {
261 site_def *site = site_defs.site_def_ptr_array_val[i];
262 IFDBG(D_NONE, NDBG(i, d); PTREXP(site_defs.site_def_ptr_array_val[i]););
263 if (site) {
264 IFDBG(D_NONE, SYCEXP(site->start); SYCEXP(site->boot_key););
265 free_site_def(site);
266 site_defs.site_def_ptr_array_val[i] = 0;
267 }
268 site_defs.count--;
269 }
270 }
271
272 /* purecov: begin deadcode */
dbg_site_def(site_def const * site)273 char *dbg_site_def(site_def const *site) {
274 assert(site->global_node_set.node_set_len == _get_maxnodes(site));
275 return dbg_list(&site->nodes);
276 }
277 /* purecov: end */
278
279 /* Create a new empty site_def */
new_site_def()280 site_def *new_site_def() {
281 site_def *retval = (site_def *)calloc((size_t)1, sizeof(site_def));
282 retval->nodeno = VOID_NODE_NO;
283 return retval;
284 }
285
286 /* Clone a site definition */
287
clone_site_def(site_def const * site)288 site_def *clone_site_def(site_def const *site) {
289 site_def *retval = new_site_def();
290 assert(site->global_node_set.node_set_len == _get_maxnodes(site));
291 *retval = *site;
292 init_node_list(site->nodes.node_list_len, site->nodes.node_list_val,
293 &retval->nodes);
294 retval->global_node_set = clone_node_set(site->global_node_set);
295 retval->local_node_set = clone_node_set(site->local_node_set);
296 assert(retval->global_node_set.node_set_len == _get_maxnodes(retval));
297 IFDBG(D_NONE, FN; PTREXP(site); PTREXP(retval));
298 return retval;
299 }
300
301 /* Initialize a site definition from array of string pointers */
302
init_site_def(u_int n,node_address * names,site_def * site)303 void init_site_def(u_int n, node_address *names, site_def *site) {
304 const site_def *latest_config;
305 site->start = null_synode;
306 site->boot_key = null_synode;
307 site->nodeno = VOID_NODE_NO;
308 init_detector(site->detected);
309 init_node_list(n, names, &site->nodes);
310 site->global_node_count = 0;
311 alloc_node_set(&site->global_node_set, NSERVERS);
312 site->global_node_set.node_set_len = site->nodes.node_list_len;
313 set_node_set(&site->global_node_set); /* Assume everyone is there */
314 assert(site->global_node_set.node_set_len == _get_maxnodes(site));
315 alloc_node_set(&site->local_node_set, NSERVERS);
316 site->local_node_set.node_set_len = site->nodes.node_list_len;
317 set_node_set(&site->local_node_set); /* Assume everyone is there */
318 assert(site->local_node_set.node_set_len == _get_maxnodes(site));
319 site->detector_updated = 0;
320 site->x_proto = my_xcom_version;
321 /* Inherit latest configuration's event horizon or fallback to default */
322 latest_config = get_site_def();
323 if (latest_config != NULL) {
324 site->event_horizon = latest_config->event_horizon;
325 } else {
326 site->event_horizon = EVENT_HORIZON_MIN;
327 }
328 assert(site->event_horizon);
329 }
330
331 /* Add nodes to site definition, avoid duplicates */
add_site_def(u_int n,node_address * names,site_def * site)332 void add_site_def(u_int n, node_address *names, site_def *site) {
333 if (n > 0) {
334 add_node_list(n, names, &site->nodes);
335 }
336 realloc_node_set(&site->global_node_set, _get_maxnodes(site));
337 realloc_node_set(&site->local_node_set, _get_maxnodes(site));
338 }
339
340 /* Remove nodes from site definition, ignore missing nodes */
remove_site_def(u_int n,node_address * names,site_def * site)341 void remove_site_def(u_int n, node_address *names, site_def *site) {
342 if (n > 0) {
343 remove_node_list(n, names, &site->nodes);
344 }
345 init_detector(site->detected); /* Zero all unused timestamps */
346 realloc_node_set(&site->global_node_set, _get_maxnodes(site));
347 realloc_node_set(&site->local_node_set, _get_maxnodes(site));
348 }
349
350 /* Return group id of site */
get_group_id(site_def const * site)351 uint32_t get_group_id(site_def const *site) {
352 if (site) {
353 uint32_t group_id = site->start.group_id;
354 assert(site->global_node_set.node_set_len == _get_maxnodes(site));
355 IFDBG(D_NONE, FN; NDBG((unsigned long)group_id, lu););
356 return group_id;
357 } else {
358 return null_id;
359 }
360 }
361
_get_maxnodes(site_def const * site)362 static inline node_no _get_maxnodes(site_def const *site) {
363 if (site) {
364 return site->nodes.node_list_len;
365 } else
366 return 0;
367 }
368
369 /* Return maxnodes of site */
get_maxnodes(site_def const * site)370 node_no get_maxnodes(site_def const *site) { return _get_maxnodes(site); }
371
372 /* Return nodeno of site */
_get_nodeno(site_def const * site)373 static inline node_no _get_nodeno(site_def const *site) {
374 if (site) {
375 assert(site->global_node_set.node_set_len == _get_maxnodes(site));
376 return site->nodeno;
377 } else
378 return VOID_NODE_NO;
379 }
380
381 /* purecov: begin deadcode */
382 /* Return nodeno of site */
get_nodeno(site_def const * site)383 node_no get_nodeno(site_def const *site) { return _get_nodeno(site); }
384 /* purecov: end */
385
find_nodeno(site_def const * site,const char * address)386 node_no find_nodeno(site_def const *site, const char *address) {
387 uint32_t i;
388 G_TRACE("find_nodeno: Node to find: %s", address);
389 for (i = 0; i < site->nodes.node_list_len; i++) {
390 G_TRACE("find_nodeno: Node %d: %s", i,
391 site->nodes.node_list_val[i].address);
392 if (strcmp(site->nodes.node_list_val[i].address, address) == 0) return i;
393 }
394 return VOID_NODE_NO;
395 }
396
get_prev_nodeno()397 node_no get_prev_nodeno() { return _get_nodeno(_get_prev_site_def()); }
398
399 /* Find the maximum config number known on this node */
config_max_boot_key(gcs_snapshot const * gcs_snap)400 synode_no config_max_boot_key(gcs_snapshot const *gcs_snap) {
401 int i;
402 synode_no max = null_synode;
403
404 /* Loop over all configs looking for max */
405 for (i = (int)gcs_snap->cfg.configs_len - 1; i >= 0; i--) {
406 config_ptr cp = gcs_snap->cfg.configs_val[i];
407 if (cp && cp->boot_key.group_id == gcs_snap->log_start.group_id &&
408 synode_gt(cp->boot_key, max)) {
409 max = cp->boot_key;
410 }
411 }
412 return max;
413 }
414
415 /* Import configs from snapshot */
import_config(gcs_snapshot * gcs_snap)416 void import_config(gcs_snapshot *gcs_snap) {
417 int i;
418 IFDBG(D_NONE, FN; SYCEXP(gcs_snap->log_start); SYCEXP(gcs_snap->log_end));
419 for (i = (int)gcs_snap->cfg.configs_len - 1; i >= 0; i--) {
420 config_ptr cp = gcs_snap->cfg.configs_val[i];
421 if (cp) {
422 /* We now have a valid pointer to a config.
423 Unconditionally import if if we have no configs.
424 If we have a config, import it if either the boot_key or
425 start does not match the latest config we already have.
426 This avoids import of duplicate configs.
427 */
428 if (!get_site_def() ||
429 !synode_eq(cp->boot_key, get_site_def()->boot_key) ||
430 !synode_eq(cp->start, get_site_def()->start)) {
431 site_def *site = new_site_def();
432 IFDBG(D_NONE, FN; SYCEXP(cp->start); SYCEXP(cp->boot_key));
433 init_site_def(cp->nodes.node_list_len, cp->nodes.node_list_val, site);
434 site->start = cp->start;
435 site->boot_key = cp->boot_key;
436 assert(cp->event_horizon);
437 site->event_horizon = cp->event_horizon;
438 copy_node_set(&cp->global_node_set, &site->global_node_set);
439 site_install_action(site, app_type);
440 }
441 }
442 }
443 }
444
445 extern synode_no executed_msg;
446
447 /* Return maximum config number, which is the config number of the last site */
get_conf_max()448 static synode_no get_conf_max() {
449 u_int i;
450 for (i = 0; i < site_defs.count; i++) {
451 site_def *site = site_defs.site_def_ptr_array_val[i];
452 if (site) {
453 return site->boot_key;
454 }
455 }
456 return null_synode;
457 }
458
459 /* Export configs to snapshot */
export_config()460 gcs_snapshot *export_config() {
461 u_int i;
462 gcs_snapshot *gcs_snap =
463 (gcs_snapshot *)calloc((size_t)1, sizeof(gcs_snapshot));
464 gcs_snap->cfg.configs_val =
465 (config_ptr *)calloc((size_t)site_defs.count, sizeof(config_ptr));
466 gcs_snap->cfg.configs_len = site_defs.count;
467
468 for (i = 0; i < site_defs.count; i++) {
469 site_def *site = site_defs.site_def_ptr_array_val[i];
470 if (site) {
471 config_ptr cp = (config_ptr)calloc((size_t)1, sizeof(config));
472 init_node_list(site->nodes.node_list_len, site->nodes.node_list_val,
473 &cp->nodes);
474 cp->start = site->start;
475 cp->boot_key = site->boot_key;
476 cp->event_horizon = site->event_horizon;
477 assert(cp->event_horizon);
478 cp->global_node_set = clone_node_set(site->global_node_set);
479 IFDBG(D_BUG, FN; SYCEXP(cp->start); SYCEXP(cp->boot_key));
480 gcs_snap->cfg.configs_val[i] = cp;
481 }
482 }
483 /* log_start is the last message that has actually been delivered and
484 executed. During recovery, only messages > log_start will be delivered. */
485 gcs_snap->log_start = get_last_delivered_msg();
486 /* log_end is the first message that will not be delivered during recovery. */
487 gcs_snap->log_end = get_conf_max(); /* Set log_end based on configs */
488 set_log_end(gcs_snap); /* Possibly advance log_end based on max_synode */
489
490 return gcs_snap;
491 }
492
493 /* Return the global minimum delivered message number, based on incoming gossip
494 */
get_min_delivered_msg(site_def const * s)495 synode_no get_min_delivered_msg(site_def const *s) {
496 u_int i;
497 synode_no retval = null_synode;
498 int init = 1;
499
500 for (i = 0; i < s->nodes.node_list_len; i++) {
501 if (s->servers[i]->detected + DETECTOR_LIVE_TIMEOUT > task_now()) {
502 if (init) {
503 init = 0;
504 retval = s->delivered_msg[i];
505 } else {
506 if (synode_lt(s->delivered_msg[i], retval)) {
507 retval = s->delivered_msg[i];
508 }
509 }
510 }
511 }
512 IFDBG(D_NONE, FN; SYCEXP(retval));
513 return retval;
514 }
515
516 /* Track the minimum delivered message numbers based on incoming messages */
update_delivered(site_def * s,node_no node,synode_no msgno)517 void update_delivered(site_def *s, node_no node, synode_no msgno) {
518 if (node < s->nodes.node_list_len) {
519 s->delivered_msg[node] = msgno;
520 /* IFDBG(D_NONE, FN; SYCEXP(s->delivered_msg[node]); NDBG(node,u)); */
521 }
522 }
523
524 /* The last shall be first and the first last.
525 The FIRST config in the snaphost has the highest message number, and
526 the LAST config has the lowest. */
527
528 /* Return boot_key of highest numbered config in snapshot */
get_highest_boot_key(gcs_snapshot * gcs_snap)529 synode_no get_highest_boot_key(gcs_snapshot *gcs_snap) {
530 int i;
531 synode_no retval = null_synode;
532 IFDBG(D_NONE, FN; SYCEXP(gcs_snap->log_start); SYCEXP(gcs_snap->log_end));
533 for (i = 0; i < (int)gcs_snap->cfg.configs_len; i++) {
534 config_ptr cp = gcs_snap->cfg.configs_val[i];
535 if (cp) {
536 IFDBG(D_NONE, FN; SYCEXP(cp->start); SYCEXP(cp->boot_key));
537 retval = cp->boot_key;
538 break;
539 }
540 }
541 assert(!synode_eq(retval, null_synode));
542 return retval;
543 }
544
545 /* Return boot_key of lowest numbered config in snapshot */
546 /* purecov: begin deadcode */
get_lowest_boot_key(gcs_snapshot * gcs_snap)547 synode_no get_lowest_boot_key(gcs_snapshot *gcs_snap) {
548 int i;
549 synode_no retval = null_synode;
550 IFDBG(D_NONE, FN; SYCEXP(gcs_snap->log_start); SYCEXP(gcs_snap->log_end));
551 for (i = (int)gcs_snap->cfg.configs_len - 1; i >= 0; i--) {
552 config_ptr cp = gcs_snap->cfg.configs_val[i];
553 if (cp) {
554 IFDBG(D_NONE, FN; SYCEXP(cp->start); SYCEXP(cp->boot_key));
555 retval = cp->boot_key;
556 break;
557 }
558 }
559 return retval;
560 }
561 /* purecov: end */
562