1 /*
2 CTDB cluster mutex test
3
4 Copyright (C) Martin Schwenke 2019
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "replace.h"
21 #include "system/network.h"
22 #include "system/wait.h"
23
24 #include <assert.h>
25
26 #include <talloc.h>
27 #include <tevent.h>
28
29 #include "lib/util/util.h"
30
31 /*
32 * ctdb_cluster_mutex.c is included below. This requires a few hacks...
33 */
34
35 /* Avoid inclusion of ctdb_private.h */
36 #define _CTDB_PRIVATE_H
37
38 /* Fake ctdb_context */
39 struct ctdb_context {
40 struct tevent_context *ev;
41 };
42
43 /*
44 * ctdb_fork() and ctdb_kill() are used in ctdb_cluster_mutex.c for
45 * safer tracking of PIDs. Fake them here to avoid dragging in the
46 * world.
47 */
48
ctdb_fork(struct ctdb_context * ctdb)49 static pid_t ctdb_fork(struct ctdb_context *ctdb)
50 {
51 return fork();
52 }
53
ctdb_kill(struct ctdb_context * ctdb,pid_t pid,int signum)54 static int ctdb_kill(struct ctdb_context *ctdb, pid_t pid, int signum)
55 {
56 /*
57 * Tests need to wait for the child to exit to ensure that the
58 * lock really has been released. The PID is only accessible
59 * in ctdb_cluster_mutex.c, so make a best attempt to ensure
60 * that the child process is waited for after it is killed.
61 * Avoid waiting if the process is already gone.
62 */
63 int ret;
64
65 if (signum == 0) {
66 return kill(pid, signum);
67 }
68
69 ret = kill(pid, signum);
70 waitpid(pid, NULL, 0);
71
72 return ret;
73 }
74
75 #include "server/ctdb_cluster_mutex.c"
76
77 /*
78 * Mutex testing support
79 */
80
81 struct mutex_handle {
82 bool done;
83 bool locked;
84 struct ctdb_cluster_mutex_handle *h;
85 };
86
87 struct do_lock_context {
88 struct mutex_handle *mh;
89 struct ctdb_context *ctdb;
90 };
91
do_lock_handler(char status,double latency,void * private_data)92 static void do_lock_handler(char status, double latency, void *private_data)
93 {
94 struct do_lock_context *dl = talloc_get_type_abort(
95 private_data, struct do_lock_context);
96 struct mutex_handle *mh;
97
98 assert(dl->mh != NULL);
99 mh = dl->mh;
100
101 mh->locked = (status == '0') ;
102
103 /*
104 * If unsuccessful then ensure the process has exited and that
105 * the file descriptor event handler has been cancelled
106 */
107 if (! mh->locked) {
108 TALLOC_FREE(mh->h);
109 }
110
111 switch (status) {
112 case '0':
113 printf("LOCK\n");
114 break;
115
116 case '1':
117 printf("CONTENTION\n");
118 break;
119
120 case '2':
121 printf("TIMEOUT\n");
122 break;
123
124 default:
125 printf("ERROR\n");
126 }
127
128 fflush(stdout);
129 mh->done = true;
130 }
131
do_lock_lost_handler(void * private_data)132 static void do_lock_lost_handler(void *private_data)
133 {
134 struct do_lock_context *dl = talloc_get_type_abort(
135 private_data, struct do_lock_context);
136
137 printf("LOST\n");
138 fflush(stdout);
139 TALLOC_FREE(dl->mh);
140 }
141
do_lock_take(struct do_lock_context * dl,const char * mutex_string)142 static void do_lock_take(struct do_lock_context *dl,
143 const char *mutex_string)
144 {
145 struct ctdb_cluster_mutex_handle *h;
146
147 dl->mh = talloc_zero(dl, struct mutex_handle);
148 assert(dl->mh != NULL);
149
150 h = ctdb_cluster_mutex(dl->mh,
151 dl->ctdb,
152 mutex_string,
153 120,
154 do_lock_handler,
155 dl,
156 do_lock_lost_handler,
157 dl);
158 assert(h != NULL);
159
160 dl->mh->h = h;
161 }
162
do_lock_wait_done(struct do_lock_context * dl)163 static void do_lock_wait_done(struct do_lock_context *dl)
164 {
165 assert(dl->mh != NULL);
166
167 while (! dl->mh->done) {
168 tevent_loop_once(dl->ctdb->ev);
169 }
170 }
171
do_lock_check(struct do_lock_context * dl)172 static void do_lock_check(struct do_lock_context *dl)
173 {
174 assert(dl->mh != NULL);
175
176 if (! dl->mh->locked) {
177 printf("NOLOCK\n");
178 fflush(stdout);
179 TALLOC_FREE(dl->mh);
180 }
181 }
182
do_lock(struct do_lock_context * dl,const char * mutex_string)183 static void do_lock(struct do_lock_context *dl,
184 const char *mutex_string)
185 {
186 do_lock_take(dl, mutex_string);
187
188 do_lock_wait_done(dl);
189
190 do_lock_check(dl);
191 }
192
do_unlock(struct do_lock_context * dl)193 static void do_unlock(struct do_lock_context *dl)
194 {
195 if (dl->mh == NULL) {
196 return;
197 }
198
199 if (! dl->mh->done) {
200 /*
201 * Taking of lock still in progress. Free the cluster
202 * mutex handle to release it but leave the lock
203 * handle in place to allow taking of the lock to
204 * fail.
205 */
206 printf("CANCEL\n");
207 fflush(stdout);
208 TALLOC_FREE(dl->mh->h);
209 dl->mh->done = true;
210 dl->mh->locked = false;
211 return;
212 }
213
214 printf("UNLOCK\n");
215 fflush(stdout);
216 TALLOC_FREE(dl->mh);
217 }
218
wait_handler(struct tevent_context * ev,struct tevent_timer * te,struct timeval t,void * private_data)219 static void wait_handler(struct tevent_context *ev,
220 struct tevent_timer *te,
221 struct timeval t,
222 void *private_data)
223 {
224 bool *done = (bool *)private_data;
225
226 *done = true;
227 }
228
do_lock_wait_time(struct do_lock_context * dl,unsigned long wait_time)229 static void do_lock_wait_time(struct do_lock_context *dl,
230 unsigned long wait_time)
231 {
232 struct tevent_timer *tt;
233 bool done = false;
234
235 tt = tevent_add_timer(dl->ctdb->ev,
236 dl,
237 tevent_timeval_current_ofs(wait_time, 0),
238 wait_handler,
239 &done);
240 assert(tt != NULL);
241
242 while (! done) {
243 tevent_loop_once(dl->ctdb->ev);
244 }
245 }
246
247 /*
248 * Testcases
249 */
250
test_lock_unlock(TALLOC_CTX * mem_ctx,struct ctdb_context * ctdb,const char * mutex_string)251 static void test_lock_unlock(TALLOC_CTX *mem_ctx,
252 struct ctdb_context *ctdb,
253 const char *mutex_string)
254 {
255 struct do_lock_context *dl;
256
257 dl = talloc_zero(mem_ctx, struct do_lock_context);
258 assert(dl != NULL);
259 dl->ctdb = ctdb;
260
261 /* LOCK */
262 do_lock(dl, mutex_string);
263 assert(dl->mh != NULL);
264
265 /* UNLOCK */
266 do_unlock(dl);
267 assert(dl->mh == NULL);
268 }
269
test_lock_lock_unlock(TALLOC_CTX * mem_ctx,struct ctdb_context * ctdb,const char * mutex_string)270 static void test_lock_lock_unlock(TALLOC_CTX *mem_ctx,
271 struct ctdb_context *ctdb,
272 const char *mutex_string)
273 {
274 struct do_lock_context *dl1;
275 struct do_lock_context *dl2;
276
277 dl1 = talloc_zero(mem_ctx, struct do_lock_context);
278 assert(dl1 != NULL);
279 dl1->ctdb = ctdb;
280
281 dl2 = talloc_zero(mem_ctx, struct do_lock_context);
282 assert(dl2 != NULL);
283 dl2->ctdb = ctdb;
284
285 /* LOCK */
286 do_lock(dl1, mutex_string);
287 assert(dl1->mh != NULL);
288
289 /* CONTENTION */
290 do_lock(dl2, mutex_string);
291 assert(dl2->mh == NULL);
292
293 /* UNLOCK */
294 do_unlock(dl1);
295 assert(dl1->mh == NULL);
296 }
297
test_lock_unlock_lock_unlock(TALLOC_CTX * mem_ctx,struct ctdb_context * ctdb,const char * mutex_string)298 static void test_lock_unlock_lock_unlock(TALLOC_CTX *mem_ctx,
299 struct ctdb_context *ctdb,
300 const char *mutex_string)
301 {
302 struct do_lock_context *dl1;
303 struct do_lock_context *dl2;
304
305 dl1 = talloc_zero(mem_ctx, struct do_lock_context);
306 assert(dl1 != NULL);
307 dl1->ctdb = ctdb;
308
309 dl2 = talloc_zero(mem_ctx, struct do_lock_context);
310 assert(dl2 != NULL);
311 dl2->ctdb = ctdb;
312
313 /* LOCK */
314 do_lock(dl1, mutex_string);
315 assert(dl1->mh != NULL);
316
317 /* UNLOCK */
318 do_unlock(dl1);
319 assert(dl1->mh == NULL);
320
321 /* LOCK */
322 do_lock(dl2, mutex_string);
323 assert(dl2->mh != NULL);
324
325 /* UNLOCK */
326 do_unlock(dl2);
327 assert(dl2->mh == NULL);
328 }
329
test_lock_cancel_check(TALLOC_CTX * mem_ctx,struct ctdb_context * ctdb,const char * mutex_string)330 static void test_lock_cancel_check(TALLOC_CTX *mem_ctx,
331 struct ctdb_context *ctdb,
332 const char *mutex_string)
333 {
334 struct do_lock_context *dl;
335
336 dl = talloc_zero(mem_ctx, struct do_lock_context);
337 assert(dl != NULL);
338 dl->ctdb = ctdb;
339
340 do_lock_take(dl, mutex_string);
341 assert(dl->mh != NULL);
342
343 /* CANCEL */
344 do_unlock(dl);
345 assert(dl->mh != NULL);
346
347 do_lock_wait_done(dl);
348
349 /* NOLOCK */
350 do_lock_check(dl);
351 assert(dl->mh == NULL);
352 }
353
test_lock_cancel_unlock(TALLOC_CTX * mem_ctx,struct ctdb_context * ctdb,const char * mutex_string)354 static void test_lock_cancel_unlock(TALLOC_CTX *mem_ctx,
355 struct ctdb_context *ctdb,
356 const char *mutex_string)
357 {
358 struct do_lock_context *dl;
359
360 dl = talloc_zero(mem_ctx, struct do_lock_context);
361 assert(dl != NULL);
362 dl->ctdb = ctdb;
363
364 do_lock_take(dl, mutex_string);
365 assert(dl->mh != NULL);
366
367 /* CANCEL */
368 do_unlock(dl);
369 assert(dl->mh != NULL);
370
371 do_lock_wait_done(dl);
372
373 /* UNLOCK */
374 do_unlock(dl);
375 assert(dl->mh == NULL);
376 }
377
test_lock_wait_unlock(TALLOC_CTX * mem_ctx,struct ctdb_context * ctdb,const char * mutex_string)378 static void test_lock_wait_unlock(TALLOC_CTX *mem_ctx,
379 struct ctdb_context *ctdb,
380 const char *mutex_string)
381 {
382 struct do_lock_context *dl;
383
384 dl = talloc_zero(mem_ctx, struct do_lock_context);
385 assert(dl != NULL);
386 dl->ctdb = ctdb;
387
388 /* LOCK */
389 do_lock(dl, mutex_string);
390 assert(dl->mh != NULL);
391
392 /* Wait for twice as long as the PPID timeout */
393 do_lock_wait_time(dl, 2 * 5);
394 assert(dl->mh != NULL);
395
396 /* UNLOCK */
397 do_unlock(dl);
398 assert(dl->mh == NULL);
399 }
400
fd_done_handler(struct tevent_context * ev,struct tevent_fd * fde,uint16_t flags,void * private_data)401 static void fd_done_handler(struct tevent_context *ev,
402 struct tevent_fd *fde,
403 uint16_t flags,
404 void *private_data)
405 {
406 bool *done = (bool *)private_data;
407
408 *done = true;
409 }
410
test_lock_ppid_gone_lock_unlock(TALLOC_CTX * mem_ctx,struct ctdb_context * ctdb,const char * mutex_string)411 static void test_lock_ppid_gone_lock_unlock(TALLOC_CTX *mem_ctx,
412 struct ctdb_context *ctdb,
413 const char *mutex_string)
414 {
415 struct do_lock_context *dl;
416 struct tevent_fd *fde;
417 int pipefd[2];
418 int ret;
419 pid_t pid, pid2;
420 ssize_t nread;
421 bool done;
422
423 /*
424 * Do this in the parent - debugging aborts of the child is
425 * trickier
426 */
427 dl = talloc_zero(mem_ctx, struct do_lock_context);
428 assert(dl != NULL);
429 dl->ctdb = ctdb;
430
431 ret = pipe(pipefd);
432 assert(ret == 0);
433
434 pid = fork();
435 assert(pid != -1);
436
437 if (pid == 0) {
438 ssize_t nwritten;
439
440 close(pipefd[0]);
441
442 /* LOCK */
443 do_lock(dl, mutex_string);
444 assert(dl->mh != NULL);
445
446 /*
447 * Note that we never see corresponding LOST. That
448 * would come from this process, but it is killed
449 * below.
450 */
451
452 nwritten = write(pipefd[1], &ret, sizeof(ret));
453 assert(nwritten == sizeof(ret));
454
455 sleep(999);
456 exit(1);
457 }
458
459 close(pipefd[1]);
460
461 nread = read(pipefd[0], &ret, sizeof(ret));
462 assert(nread == sizeof(ret));
463 assert(ret == 0);
464
465 /*
466 * pipefd[1] is leaked into the helper, so there will be an
467 * event generated when the helper exits
468 */
469 done = false;
470 fde = tevent_add_fd(ctdb->ev,
471 ctdb,
472 pipefd[0],
473 TEVENT_FD_READ,
474 fd_done_handler,
475 &done);
476 assert(fde != NULL);
477
478 ret = kill(pid, SIGKILL);
479 assert(ret == 0);
480 pid2 = waitpid(pid, &ret, 0);
481 assert(pid2 == pid);
482
483 while (! done) {
484 tevent_loop_once(ctdb->ev);
485 }
486
487 /* LOCK */
488 do_lock(dl, mutex_string);
489 assert(dl->mh != NULL);
490
491 /* UNLOCK */
492 do_unlock(dl);
493 assert(dl->mh == NULL);
494 }
495
test_lock_file_removed_no_recheck(TALLOC_CTX * mem_ctx,struct ctdb_context * ctdb,const char * mutex_string,const char * lock_file)496 static void test_lock_file_removed_no_recheck(TALLOC_CTX *mem_ctx,
497 struct ctdb_context *ctdb,
498 const char *mutex_string,
499 const char *lock_file)
500 {
501 struct do_lock_context *dl1;
502 struct do_lock_context *dl2;
503 int ret;
504
505 dl1 = talloc_zero(mem_ctx, struct do_lock_context);
506 assert(dl1 != NULL);
507 dl1->ctdb = ctdb;
508
509 dl2 = talloc_zero(mem_ctx, struct do_lock_context);
510 assert(dl2 != NULL);
511 dl2->ctdb = ctdb;
512
513 /* LOCK */
514 do_lock(dl1, mutex_string);
515 assert(dl1->mh != NULL);
516
517 ret = unlink(lock_file);
518 assert(ret == 0);
519
520 /* LOCK */
521 do_lock(dl2, mutex_string);
522 assert(dl2->mh != NULL);
523
524 /* UNLOCK */
525 do_unlock(dl2);
526 assert(dl2->mh == NULL);
527
528 /* UNLOCK */
529 do_unlock(dl1);
530 assert(dl1->mh == NULL);
531 }
532
test_lock_file_wait_recheck_unlock(TALLOC_CTX * mem_ctx,struct ctdb_context * ctdb,const char * mutex_string,unsigned long wait_time)533 static void test_lock_file_wait_recheck_unlock(TALLOC_CTX *mem_ctx,
534 struct ctdb_context *ctdb,
535 const char *mutex_string,
536 unsigned long wait_time)
537 {
538 struct do_lock_context *dl;
539
540 dl = talloc_zero(mem_ctx, struct do_lock_context);
541 assert(dl != NULL);
542 dl->ctdb = ctdb;
543
544 /* LOCK */
545 do_lock(dl, mutex_string);
546 assert(dl->mh != NULL);
547
548 do_lock_wait_time(dl, wait_time);
549 assert(dl->mh != NULL);
550
551 /* UNLOCK */
552 do_unlock(dl);
553 assert(dl->mh == NULL);
554 }
555
test_lock_file_removed(TALLOC_CTX * mem_ctx,struct ctdb_context * ctdb,const char * mutex_string,const char * lock_file)556 static void test_lock_file_removed(TALLOC_CTX *mem_ctx,
557 struct ctdb_context *ctdb,
558 const char *mutex_string,
559 const char *lock_file)
560 {
561 struct do_lock_context *dl;
562 int ret;
563
564 dl = talloc_zero(mem_ctx, struct do_lock_context);
565 assert(dl != NULL);
566 dl->ctdb = ctdb;
567
568 /* LOCK */
569 do_lock(dl, mutex_string);
570 assert(dl->mh != NULL);
571
572 ret = unlink(lock_file);
573 assert(ret == 0);
574
575 while (dl->mh != NULL) {
576 /* LOST */
577 tevent_loop_once(ctdb->ev);
578 }
579 }
580
test_lock_file_changed(TALLOC_CTX * mem_ctx,struct ctdb_context * ctdb,const char * mutex_string,const char * lock_file)581 static void test_lock_file_changed(TALLOC_CTX *mem_ctx,
582 struct ctdb_context *ctdb,
583 const char *mutex_string,
584 const char *lock_file)
585 {
586 struct do_lock_context *dl;
587 char *t;
588 int fd;
589 int ret;
590
591 dl = talloc_zero(mem_ctx, struct do_lock_context);
592 assert(dl != NULL);
593 dl->ctdb = ctdb;
594
595 /* LOCK */
596 do_lock(dl, mutex_string);
597 assert(dl->mh != NULL);
598
599 t = talloc_asprintf(ctdb, "%s.new", lock_file);
600 assert(t != NULL);
601
602 fd = open(t, O_RDWR|O_CREAT, 0600);
603 assert(fd != -1);
604 close(fd);
605
606 ret = rename(t, lock_file);
607 assert(ret == 0);
608
609 while (dl->mh != NULL) {
610 /* LOST */
611 tevent_loop_once(ctdb->ev);
612 }
613 }
614
615 /*
616 * Main
617 */
618
619 static const char *prog;
620
usage(void)621 static void usage(void)
622 {
623 fprintf(stderr, "usage: %s <test> <mutex-string> [<arg>...]\n", prog);
624 exit(1);
625 }
626
alarm_handler(int sig)627 static void alarm_handler(int sig)
628 {
629 abort();
630 }
631
main(int argc,const char * argv[])632 int main(int argc, const char *argv[])
633 {
634 TALLOC_CTX *mem_ctx;
635 struct ctdb_context *ctdb;
636 const char *mutex_string;
637 const char *test;
638 struct sigaction sa = { .sa_handler = NULL, };
639 int ret;
640 const char *lock_file;
641 unsigned int wait_time;
642
643 prog = argv[0];
644
645 if (argc < 3) {
646 usage();
647 }
648
649 mem_ctx = talloc_new(NULL);
650 assert(mem_ctx != NULL);
651
652 ctdb = talloc_zero(mem_ctx, struct ctdb_context);
653 assert(ctdb != NULL);
654
655 ctdb->ev = tevent_context_init(ctdb);
656 assert(ctdb->ev != NULL);
657
658 /* Add a 60s timeout for the whole test */
659 sa.sa_handler = alarm_handler;
660 sigemptyset(&sa.sa_mask);
661 ret = sigaction(SIGALRM, &sa, NULL);
662 assert(ret == 0);
663 alarm(60);
664
665 test = argv[1];
666 mutex_string = argv[2];
667
668 if (strcmp(test, "lock-unlock") == 0) {
669 test_lock_unlock(mem_ctx, ctdb, mutex_string);
670 } else if (strcmp(test, "lock-lock-unlock") == 0) {
671 test_lock_lock_unlock(mem_ctx, ctdb, mutex_string);
672 } else if (strcmp(test, "lock-unlock-lock-unlock") == 0) {
673 test_lock_unlock_lock_unlock(mem_ctx, ctdb, mutex_string);
674 } else if (strcmp(test, "lock-cancel-check") == 0) {
675 test_lock_cancel_check(mem_ctx, ctdb, mutex_string);
676 } else if (strcmp(test, "lock-cancel-unlock") == 0) {
677 test_lock_cancel_unlock(mem_ctx, ctdb, mutex_string);
678 } else if (strcmp(test, "lock-wait-unlock") == 0) {
679 test_lock_wait_unlock(mem_ctx, ctdb, mutex_string);
680 } else if (strcmp(test, "lock-ppid-gone-lock-unlock") == 0) {
681 test_lock_ppid_gone_lock_unlock(mem_ctx, ctdb, mutex_string);
682 } else if (strcmp(test, "lock-file-removed-no-recheck") == 0) {
683 if (argc != 4) {
684 usage();
685 }
686
687 lock_file = argv[3];
688
689 test_lock_file_removed_no_recheck(mem_ctx,
690 ctdb,
691 mutex_string,
692 lock_file);
693 } else if (strcmp(test, "lock-file-wait-recheck-unlock") == 0) {
694 if (argc != 4) {
695 usage();
696 }
697
698 wait_time = smb_strtoul(argv[3],
699 NULL,
700 10,
701 &ret,
702 SMB_STR_STANDARD);
703 if (ret != 0) {
704 usage();
705 }
706
707 test_lock_file_wait_recheck_unlock(mem_ctx,
708 ctdb,
709 mutex_string,
710 wait_time);
711 } else if (strcmp(test, "lock-file-removed") == 0) {
712 if (argc != 4) {
713 usage();
714 }
715
716 lock_file = argv[3];
717
718 test_lock_file_removed(mem_ctx,
719 ctdb,
720 mutex_string,
721 lock_file);
722 } else if (strcmp(test, "lock-file-changed") == 0) {
723 if (argc != 4) {
724 usage();
725 }
726
727 lock_file = argv[3];
728
729 test_lock_file_changed(mem_ctx,
730 ctdb,
731 mutex_string,
732 lock_file);
733 } else {
734 fprintf(stderr, "Unknown test\n");
735 exit(1);
736 }
737
738 talloc_free(mem_ctx);
739
740 return 0;
741 }
742