1 /*
2  * Copyright (C) 2015-2020 Codership Oy <info@codership.com>
3  */
4 
5 #include "gcs_test_utils.hpp"
6 
7 namespace gcs_test
8 {
9 
10 void
common_ctor(gu::Config & cfg)11 InitConfig::common_ctor(gu::Config& cfg)
12 {
13     gcache::GCache::register_params(cfg);
14     gcs_register_params(reinterpret_cast<gu_config_t*>(&cfg));
15 }
16 
InitConfig(gu::Config & cfg)17 InitConfig::InitConfig(gu::Config& cfg)
18 {
19     common_ctor(cfg);
20 }
21 
InitConfig(gu::Config & cfg,const std::string & base_name)22 InitConfig::InitConfig(gu::Config& cfg, const std::string& base_name)
23 {
24     common_ctor(cfg);
25     std::string p("gcache.size=1M;gcache.name=");
26     p += base_name;
27     gu_trace(cfg.parse(p));
28 }
29 
GcsGroup()30 GcsGroup::GcsGroup() :
31     conf_   (),
32     init_   (conf_, "group"),
33     gcache_ (NULL),
34     group_  (),
35     initialized_(false)
36 {}
37 
38 void
common_ctor(const char * node_name,const char * inc_addr,gcs_proto_t gver,int rver,int aver)39 GcsGroup::common_ctor(const char*  node_name,
40                       const char*  inc_addr,
41                       gcs_proto_t  gver,
42                       int          rver,
43                       int          aver)
44 {
45     assert(NULL  == gcache_);
46     assert(false == initialized_);
47 
48     conf_.set("gcache.name", std::string(node_name) + ".cache");
49     gcache_ = new gcache::GCache(conf_, ".");
50 
51     int const err(gcs_group_init(&group_, &conf_,
52                                  reinterpret_cast<gcache_t*>(gcache_),
53                                  node_name, inc_addr, gver, rver, aver));
54     if (err)
55     {
56         gu_throw_error(-err) << "GcsGroup init failed";
57     }
58 
59     initialized_ = true;
60 }
61 
GcsGroup(const std::string & node_id,const std::string & inc_addr,gcs_proto_t gver,int rver,int aver)62 GcsGroup::GcsGroup(const std::string& node_id,
63                    const std::string& inc_addr,
64                    gcs_proto_t gver, int rver, int aver) :
65     conf_   (),
66     init_   (conf_, "group"),
67     gcache_ (NULL),
68     group_  (),
69     initialized_(false)
70 {
71     common_ctor(node_id.c_str(), inc_addr.c_str(), gver, rver, aver);
72 }
73 
74 void
common_dtor()75 GcsGroup::common_dtor()
76 {
77     if (initialized_)
78     {
79         assert(NULL != gcache_);
80         gcs_group_free(&group_);
81         delete gcache_;
82 
83         std::string const gcache_name(conf_.get("gcache.name"));
84         ::unlink(gcache_name.c_str());
85     }
86     else
87     {
88         assert(NULL == gcache_);
89     }
90 }
91 
92 void
init(const char * node_name,const char * inc_addr,gcs_proto_t gcs_proto_ver,int repl_proto_ver,int appl_proto_ver)93 GcsGroup::init(const char*  node_name,
94                const char*  inc_addr,
95                gcs_proto_t  gcs_proto_ver,
96                int          repl_proto_ver,
97                int          appl_proto_ver)
98 {
99     common_dtor();
100     initialized_ = false;
101     gcache_ = NULL;
102     common_ctor(node_name, inc_addr,gcs_proto_ver,repl_proto_ver,appl_proto_ver);
103 }
104 
~GcsGroup()105 GcsGroup::~GcsGroup()
106 {
107     common_dtor();
108 }
109 
110 } // namespace
111 
112 #include "../gcs_comp_msg.hpp"
113 #include <check.h>
114 
115 gcs_seqno_t
deliver_last_applied(int const from,gcs_seqno_t const la)116 gt_node::deliver_last_applied(int const from, gcs_seqno_t const la)
117 {
118     gcs_seqno_t buf(gcs_seqno_htog(la));
119 
120     gcs_recv_msg_t const msg(&buf, sizeof(buf), sizeof(buf),
121                              from,
122                              GCS_MSG_LAST);
123     return gcs_group_handle_last_msg(group(), &msg);
124 }
125 
gt_node(const char * const name,int const gcs_proto_ver)126 gt_node::gt_node(const char* const name, int const gcs_proto_ver)
127     : group(),
128       id()
129 {
130     if (name)
131     {
132         snprintf(id, sizeof(id) - 1, "%s", name);
133     }
134     else
135     {
136         snprintf(id, sizeof(id) - 1, "%p", this);
137     }
138 
139     id[sizeof(id) - 1] = '\0';
140 
141     int const str_len = sizeof(id) + 6;
142     char name_str[str_len] = { '\0', };
143     char addr_str[str_len] = { '\0', };
144     snprintf(name_str, str_len - 1, "name:%s", id);
145     snprintf(addr_str, str_len - 1, "addr:%s", id);
146 
147     group.init(name_str, addr_str, gcs_proto_ver, 0, 0);
148 }
149 
~gt_node()150 gt_node::~gt_node()
151 {
152 }
153 
154 /* delivers new component message to all memebers */
155 int
deliver_component_msg(bool const prim)156 gt_group::deliver_component_msg(bool const prim)
157 {
158     for (int i = 0; i < nodes_num; i++)
159     {
160         gcs_comp_msg_t* msg = gcs_comp_msg_new(prim, false, i, nodes_num, 0);
161         if (msg)
162         {
163             for (int j = 0; j < nodes_num; j++) {
164                 const struct gt_node* const node(nodes[j]);
165                 long ret = gcs_comp_msg_add (msg, node->id, j);
166                 ck_assert_msg(j == ret, "Failed to add %d member: %ld (%s)",
167                               j, ret, strerror(-ret));
168                 /* check that duplicate node ID is ignored */
169                 ret = gcs_comp_msg_add (msg, node->id, j);
170                 ck_assert_msg(ret < 0, "Added duplicate %d member", j);
171             }
172 
173             /* check component message */
174             ck_assert(i == gcs_comp_msg_self(msg));
175             ck_assert(nodes_num == gcs_comp_msg_num(msg));
176 
177             for (int j = 0; j < nodes_num; j++) {
178                 const char* const src_id = nodes[j]->id;
179                 const char* const dst_id = gcs_comp_msg_member(msg, j)->id;
180                 ck_assert_msg(!strcmp(src_id, dst_id),
181                               "%d node id %s, recorded in comp msg as %s",
182                               j, src_id, dst_id);
183 
184                 gcs_segment_t const dst_seg(gcs_comp_msg_member(msg,j)->segment);
185                 ck_assert_msg(j == dst_seg,
186                               "%d node segment %d, recorded in comp msg as %d",
187                               j, j, (int)dst_seg);
188             }
189 
190             gcs_group_state_t ret =
191                 gcs_group_handle_comp_msg(nodes[i]->group(), msg);
192 
193             ck_assert(ret == GCS_GROUP_WAIT_STATE_UUID);
194 
195             gcs_comp_msg_delete (msg);
196 
197             /* check that uuids are properly recorded in internal structures */
198             for (int j = 0; j < nodes_num; j++) {
199                 const char* src_id = nodes[j]->id;
200                 const char* dst_id = nodes[i]->group()->nodes[j].id;
201                 ck_assert_msg(!strcmp(src_id, dst_id),
202                               "%d node id %s, recorded at node %d as %s",
203                               j, src_id, i, dst_id);
204             }
205         }
206         else {
207             return -ENOMEM;
208         }
209     }
210 
211     return 0;
212 }
213 
214 int
perform_state_exchange()215 gt_group::perform_state_exchange()
216 {
217     /* first deliver state uuid message */
218     gu_uuid_t state_uuid;
219     gu_uuid_generate (&state_uuid, NULL, 0);
220 
221     gcs_recv_msg_t uuid_msg(&state_uuid,
222                             sizeof (state_uuid),
223                             sizeof (state_uuid),
224                             0,
225                             GCS_MSG_STATE_UUID);
226 
227     gcs_group_state_t state;
228     int i;
229     for (i = 0; i < nodes_num; i++) {
230         state = gcs_group_handle_uuid_msg (nodes[i]->group(),&uuid_msg);
231         ck_assert_msg(state == GCS_GROUP_WAIT_STATE_MSG,
232                       "Wrong group state after STATE_UUID message. "
233                       "Expected: %d, got: %d", GCS_GROUP_WAIT_STATE_MSG, state);
234     }
235 
236     /* complete state message exchange */
237     for (i = 0; i < nodes_num; i++)
238     {
239         /* create state message from node i */
240         gcs_state_msg_t* state =
241             gcs_group_get_state (nodes[i]->group());
242         ck_assert(NULL != state);
243 
244         ssize_t state_len = gcs_state_msg_len (state);
245         uint8_t state_buf[state_len];
246         gcs_state_msg_write (state_buf, state);
247 
248         gcs_recv_msg_t state_msg(state_buf,
249                                  sizeof (state_buf),
250                                  sizeof (state_buf),
251                                  i,
252                                  GCS_MSG_STATE_MSG);
253 
254         /* deliver to each of the nodes */
255         int j;
256         for (j = 0; j < nodes_num; j++) {
257             gcs_group_state_t ret =
258                 gcs_group_handle_state_msg (nodes[j]->group(), &state_msg);
259 
260             if (nodes_num - 1 == i) { // a message from the last node
261                 ck_assert_msg(ret == GCS_GROUP_PRIMARY,
262                               "Handling state msg failed: sender %d, "
263                               "receiver %d", i, j);
264             }
265             else {
266                 ck_assert_msg(ret == GCS_GROUP_WAIT_STATE_MSG,
267                               "Handling state msg failed: sender %d, "
268                               "receiver %d", i, j);
269             }
270         }
271 
272         gcs_state_msg_destroy (state);
273     }
274 
275     return 0;
276 }
277 
278 int
add_node(struct gt_node * node,bool const new_id)279 gt_group::add_node(struct gt_node* node, bool const new_id)
280 {
281     if (GT_MAX_NODES == nodes_num) return -ERANGE;
282 
283     if (new_id) {
284         gu_uuid_t node_uuid;
285         gu_uuid_generate (&node_uuid, NULL, 0);
286         gu_uuid_print (&node_uuid, (char*)node->id, sizeof (node->id));
287         gu_debug ("Node %d (%p) UUID: %s", nodes_num, node, node->id);
288     }
289 
290     nodes[nodes_num] = node;
291     nodes_num++;
292 
293     /* check that all node ids are different */
294     int i;
295     for (i = 0; i < nodes_num; i++) {
296         int j;
297         for (j = i+1; j < nodes_num; j++) {
298             ck_assert_msg(strcmp(nodes[i]->id, nodes[j]->id),
299                           "%d (%p) and %d (%p) have the same id: %s/%s",
300                           i, nodes[i], j, nodes[j], nodes[i]->id, nodes[j]->id);
301         }
302     }
303 
304     /* deliver new component message to all nodes */
305     int ret = deliver_component_msg(primary);
306     ck_assert_msg(ret == 0, "Component message delivery failed: %d (%s)",
307                   ret, strerror(-ret));
308 
309     /* deliver state exchange uuid */
310     ret = perform_state_exchange();
311     ck_assert_msg(ret == 0, "State exchange failed: %d (%s)",
312                   ret, strerror(-ret));
313 
314     return 0;
315 }
316 
317 /* NOTE: this function uses simplified and determinitstic algorithm where
318  *       dropped node is always replaced by the last one in group.
319  *       For our purposes (reproduction of #465) it fits perfectly. */
320 struct gt_node*
drop_node(int const idx)321 gt_group::drop_node(int const idx)
322 {
323     ck_assert(idx >= 0);
324     ck_assert(idx < nodes_num);
325 
326     struct gt_node* dropped = nodes[idx];
327 
328     nodes[idx] = nodes[nodes_num - 1];
329     nodes[nodes_num - 1] = NULL;
330     nodes_num--;
331 
332     if (nodes_num > 0) {
333         deliver_component_msg(primary);
334         perform_state_exchange();
335     }
336 
337     return dropped;
338 }
339 
340 /* for delivery of GCS_MSG_SYNC or GCS_MSG_JOIN msg*/
341 int
deliver_join_sync_msg(int const src,gcs_msg_type_t const type)342 gt_group::deliver_join_sync_msg(int const src, gcs_msg_type_t const type)
343 {
344     gcs_seqno_t    seqno = nodes[src]->group()->act_id_;
345     gcs_recv_msg_t msg(&seqno,
346                        sizeof (seqno),
347                        sizeof (seqno),
348                        src,
349                        type);
350 
351     int ret = -1;
352     int i;
353     for (i = 0; i < nodes_num; i++) {
354         gcs_group_t* const group = nodes[i]->group();
355         switch (type) {
356         case GCS_MSG_JOIN:
357             ret = gcs_group_handle_join_msg(group, &msg);
358             mark_point();
359             if (i == src) {
360                 ck_assert_msg(ret == 1,
361                               "%d failed to handle own JOIN message: %d (%s)",
362                               i, ret, strerror (-ret));
363             }
364             else {
365                 ck_assert_msg(ret == 0,
366                               "%d failed to handle other JOIN message: %d (%s)",
367                               i, ret, strerror (-ret));
368             }
369             break;
370         case GCS_MSG_SYNC:
371             ret = gcs_group_handle_sync_msg(group, &msg);
372             if (i == src) {
373                 ck_assert_msg(ret == 1 ||
374                               group->nodes[src].status != GCS_NODE_STATE_JOINED,
375                               "%d failed to handle own SYNC message: %d (%s)",
376                               i, ret, strerror (-ret));
377             }
378             else {
379                 ck_assert_msg(ret == 0,
380                               "%d failed to handle other SYNC message: %d (%s)",
381                               i, ret, strerror (-ret));
382             }
383             break;
384         default:
385             ck_abort_msg("wrong message type: %d", type);
386         }
387     }
388 
389     return ret;
390 }
391 
392 gcs_seqno_t
deliver_last_applied(int const from,gcs_seqno_t const la)393 gt_group::deliver_last_applied(int const from, gcs_seqno_t const la)
394 {
395     gcs_seqno_t res = GCS_SEQNO_ILL;
396 
397     if (nodes_num > 0) res = nodes[0]->deliver_last_applied(from, la);
398 
399     for (int i(1); i < nodes_num; ++i)
400     {
401         ck_assert(nodes[i]->deliver_last_applied(from, la) == res);
402     }
403 
404     return res;
405 }
406 
407 bool
verify_node_state_across(int const idx,gcs_node_state_t const check) const408 gt_group::verify_node_state_across(int const idx, gcs_node_state_t const check)
409 const
410 {
411     for (int i(0); i < nodes_num; i++)
412     {
413         gcs_node_state_t const state(nodes[i]->group()->nodes[idx].status);
414         if (check != state) {
415             gu_error("At node %d node's %d status is not %d, but %d",
416                      i, idx, check, state);
417             return false;
418         }
419     }
420 
421     return true;
422 }
423 
424 /* start SST on behalf of node idx (joiner)
425  * @return donor idx or negative error code */
426 int
sst_start(int const joiner_idx,const char * donor_name)427 gt_group::sst_start (int const joiner_idx,const char* donor_name)
428 {
429     ck_assert(joiner_idx >= 0);
430     ck_assert(joiner_idx <  nodes_num);
431 
432     ssize_t const req_len = strlen(donor_name) + 2;
433     // leave one byte as sst request payload
434 
435     int donor_idx = -1;
436     int i;
437     for (i = 0; i < nodes_num; i++)
438     {
439         gcache::GCache* const gcache(nodes[i]->group.gcache());
440         ck_assert(NULL != gcache);
441         // sst request is expected to be dynamically allocated
442         char* const req_buf = (char*)gcache->malloc(req_len);
443         ck_assert(NULL != req_buf);
444         ::memset(req_buf, 0, req_len);
445         sprintf (req_buf, "%s", donor_name);
446 
447         struct gcs_act_rcvd req(gcs_act(req_buf, req_len, GCS_ACT_STATE_REQ),
448                                 NULL,
449                                 GCS_SEQNO_ILL,
450                                 joiner_idx);
451 
452         int ret = gcs_group_handle_state_request(nodes[i]->group(), &req);
453 
454         if (ret < 0) { // don't fail here, we may want to test negatives
455             gu_error (ret < 0, "Handling state request to '%s' failed: %d (%s)",
456                       donor_name, ret, strerror (-ret));
457             return ret;
458         }
459 
460         if (i == joiner_idx) {
461             ck_assert(ret == req_len);
462             gcache->free(req_buf); // passed to joiner
463         }
464         else {
465             if (ret > 0) {
466                 if (donor_idx < 0) {
467                     ck_assert(req.id == i);
468                     donor_idx = i;
469                     gcache->free(req_buf); // passed to donor
470                 }
471                 else {
472                     ck_abort_msg("More than one donor selected: %d, first "
473                                  "donor: %d", i, donor_idx);
474                 }
475             }
476         }
477     }
478 
479     ck_assert_msg(donor_idx >= 0, "Failed to select donor");
480 
481     for (i = 0; i < nodes_num; i++) {
482         gcs_group_t* const group = nodes[i]->group();
483         gcs_node_t* const donor = &group->nodes[donor_idx];
484         gcs_node_state_t state = donor->status;
485         ck_assert_msg(state == GCS_NODE_STATE_DONOR, "%d is not donor at %d",
486                       donor_idx, i);
487         int dc = donor->desync_count;
488         ck_assert_msg(dc >= 1, "donor %d at %d has desync_count %d",
489                       donor_idx, i, dc);
490 
491         gcs_node_t* const joiner = &group->nodes[joiner_idx];
492         state = joiner->status;
493         ck_assert_msg(state == GCS_NODE_STATE_JOINER, "%d is not joiner at %d",
494                       joiner_idx, i);
495         dc = joiner->desync_count;
496         ck_assert_msg(dc == 0, "joiner %d at %d has desync_count %d",
497                       donor_idx, i, dc);
498 
499         /* check that donor and joiner point at each other */
500         ck_assert_msg(!memcmp(group->nodes[donor_idx].joiner,
501                               group->nodes[joiner_idx].id,
502                               GCS_COMP_MEMB_ID_MAX_LEN+1),
503                       "Donor points at wrong joiner: expected %s, got %s",
504                       group->nodes[joiner_idx].id,group->nodes[donor_idx].joiner);
505 
506         ck_assert_msg(!memcmp(group->nodes[joiner_idx].donor,
507                               group->nodes[donor_idx].id,
508                               GCS_COMP_MEMB_ID_MAX_LEN+1),
509                       "Joiner points at wrong donor: expected %s, got %s",
510                       group->nodes[donor_idx].id,group->nodes[joiner_idx].donor);
511     }
512 
513     return donor_idx;
514 }
515 
516 /* Finish SST on behalf of a node idx (joiner or donor) */
517 int
sst_finish(int const idx)518 gt_group::sst_finish(int const idx)
519 {
520     gcs_node_state_t node_state;
521 
522     deliver_join_sync_msg(idx, GCS_MSG_JOIN);
523     node_state = nodes[idx]->state();
524     ck_assert(node_state == GCS_NODE_STATE_JOINED);
525 
526     deliver_join_sync_msg(idx, GCS_MSG_SYNC);
527     node_state = nodes[idx]->state();
528     ck_assert(node_state == GCS_NODE_STATE_SYNCED);
529 
530     return 0;
531 }
532 
533 int
sync_node(int const joiner_idx)534 gt_group::sync_node(int const joiner_idx)
535 {
536     gcs_node_state_t const node_state(nodes[joiner_idx]->state());
537     ck_assert(node_state == GCS_NODE_STATE_PRIM);
538 
539     // initiate SST
540     int const donor_idx(sst_start(joiner_idx, ""));
541     ck_assert(donor_idx >= 0);
542     ck_assert(donor_idx != joiner_idx);
543 
544     // complete SST
545     int err;
546     err = sst_finish(donor_idx);
547     ck_assert(0 == err);
548     err = sst_finish(joiner_idx);
549     ck_assert(0 == err);
550 
551     return 0;
552 }
553 
gt_group(int const num,int const gcs_proto_ver,bool const prim)554 gt_group::gt_group(int const num, int const gcs_proto_ver, bool const prim)
555     : nodes(),
556       nodes_num(0),
557       primary(prim)
558 {
559     if (num > 0)
560     {
561         for (int i = 0; i < num; ++i)
562         {
563             char name[32];
564             sprintf(name, "%d", i);
565             add_node(new gt_node(name, gcs_proto_ver), true);
566             bool const prim_state(nodes[0]->group.state() == GCS_GROUP_PRIMARY);
567             ck_assert(prim_state == prim);
568 
569             gcs_node_state_t node_state(nodes[i]->state());
570             if (primary)
571             {
572                 if (0 == i)
573                 {
574                     ck_assert(node_state == GCS_NODE_STATE_JOINED);
575 
576                     deliver_join_sync_msg(0, GCS_MSG_SYNC);
577                     node_state = nodes[0]->state();
578                     ck_assert(node_state == GCS_NODE_STATE_SYNCED);
579                 }
580                 else
581                 {
582                     ck_assert(node_state == GCS_NODE_STATE_PRIM);
583 
584                     // initiate SST
585                     int const donor_idx(sst_start(i, ""));
586                     ck_assert(donor_idx >= 0);
587                     ck_assert(donor_idx != i);
588 
589                     // complete SST
590                     int err;
591                     err = sst_finish(donor_idx);
592                     ck_assert(0 == err);
593                     err = sst_finish(i);
594                     ck_assert(0 == err);
595                 }
596             }
597             else
598             {
599                 ck_assert(node_state == GCS_NODE_STATE_NON_PRIM);
600             }
601         }
602     }
603 
604     ck_assert(num == nodes_num);
605 }
606 
~gt_group()607 gt_group::~gt_group()
608 {
609     while (nodes_num)
610     {
611         struct gt_node* const node(drop_node(0));
612         ck_assert(node != NULL);
613         delete node;
614     }
615 }
616