1# Copyright (c) 2012, 2020 Oracle and/or its affiliates. All rights reserved. 2# 3# See the file LICENSE for license information. 4# 5# $Id$ 6# 7# TEST repmgr113 8# TEST Multi-process repmgr automatic listener takeover. 9# TEST 10# TEST One of the subordinate processes automatically becomes listener if the 11# TEST original listener leaves. An election is delayed long enough for a 12# TEST takeover to occur if the takeover happens on the master. 13 14proc repmgr113 { {tnum "113"} } { 15 source ./include.tcl 16 if { $is_freebsd_test == 1 } { 17 puts "Skipping replication manager test on FreeBSD platform." 18 return 19 } 20 21 puts "Repmgr$tnum:\ 22 Test automatic listener takeover among multiple processes." 23 24 # Test running multiple listener takeovers on master and client. 25 repmgr113_loop $tnum 26 27 # Test listener takeovers in different scenarios. 28 repmgr113_test $tnum 29 30 # Test zero nthreads in taking over subordinate process. 31 repmgr113_zero_nthreads $tnum 32 33 # Test listener takeover on each site in a preferred master repgroup. 34 repmgr113_prefmas $tnum 35} 36 37proc repmgr113_loop { {tnum "113"} } { 38 global testdir 39 global ipversion 40 41 puts "\tRepmgr$tnum.loop: Run short-lived processes to\ 42 perform multiple takeovers." 43 env_cleanup $testdir 44 45 set hoststr [get_hoststr $ipversion] 46 foreach {mport c1port c2port} [available_ports 3] {} 47 file mkdir [set mdir $testdir/MASTER] 48 file mkdir [set c1dir $testdir/CLIENT1] 49 file mkdir [set c2dir $testdir/CLIENT2] 50 make_dbconfig $mdir \ 51 [list [list repmgr_site $hoststr $mport db_local_site on]] 52 make_dbconfig $c1dir \ 53 [list [list repmgr_site $hoststr $c1port db_local_site on] \ 54 [list repmgr_site $hoststr $mport db_bootstrap_helper on]] 55 make_dbconfig $c2dir \ 56 [list [list repmgr_site $hoststr $c2port db_local_site on] \ 57 [list repmgr_site $hoststr $mport db_bootstrap_helper on]] 58 59 setup_repmgr_ssl $mdir 60 setup_repmgr_ssl $c1dir 61 setup_repmgr_ssl $c2dir 62 63 puts "\t\tRepmgr$tnum.loop.a: Start master and client1." 64 set cmds { 65 "home $mdir" 66 "output $testdir/m_0_output" 67 "open_env" 68 "start master" 69 } 70 set m_1 [open_site_prog [subst $cmds]] 71 set m_env [berkdb_env -home $mdir] 72 set cmds { 73 "home $c1dir" 74 "output $testdir/c1_1_output" 75 "open_env" 76 "start client" 77 } 78 set c1_1 [open_site_prog [subst $cmds]] 79 set c1_env [berkdb_env -home $c1dir] 80 await_startup_done $c1_env 81 82 # Test case 1: Test listener takeover on master. 83 # 2 sites, master and client1 84 # 2 master processes, m_1 (listener) and m_2 85 # 1 client1 process, c1_1 (listener) 86 # 87 # Start all processes. Stop master listener m_1. Verify m_2 takes 88 # over listener role and no election on client1. Set m_2 to m_1 and 89 # start another master process m_2, stop m_1 again and redo takeover 90 # for multiple times. 91 puts -nonewline "\t\tRepmgr$tnum.loop.b: Run short-lived processes\ 92 to perform multiple takeovers on master" 93 flush stdout 94 for { set i 1 } { $i < 11 } { incr i} { 95 # Close listener process and verify takeover happens. 96 puts -nonewline "." 97 flush stdout 98 99 set cmds { 100 "home $mdir" 101 "output $testdir/m_$i\_output" 102 "open_env" 103 "start master" 104 } 105 set m_2 [open_site_prog [subst $cmds]] 106 set count 0 107 puts $m_2 "is_connected $c1port" 108 while {! [gets $m_2]} { 109 if {[incr count] > 30} { 110 error "FAIL: couldn't connect to client1\ 111 within 30 seconds" 112 } 113 tclsleep 1 114 puts $m_2 "is_connected $c1port" 115 } 116 close $m_1 117 set count 0 118 set m_takeover_count [stat_field $m_env repmgr_stat \ 119 "Automatic replication process takeovers"] 120 while { $m_takeover_count != $i } { 121 if {[incr count] > 30} { 122 error "FAIL: couldn't takeover on master\ 123 in 30 seconds" 124 } 125 tclsleep 1 126 set m_takeover_count [stat_field $m_env repmgr_stat \ 127 "Automatic replication process takeovers"] 128 } 129 set election_count [stat_field $c1_env rep_stat \ 130 "Elections held"] 131 error_check_good c1_no_elections_1 $election_count 0 132 tclsleep 3 133 puts $m_2 "is_connected $c1port" 134 while {! [gets $m_2]} { 135 if {[incr count] > 30} { 136 error "FAIL: couldn't connect to client1 137 within 30 seconds" 138 } 139 tclsleep 1 140 puts $m_2 "is_connected $c1port" 141 } 142 set m_1 $m_2 143 } 144 puts "" 145 146 # Test case 2: Test listener takeover on master and client successively. 147 # 3 sites, master, client1, client2 148 # 2 master processes, m_1 (listener) and m_2 149 # 1 client1 process, c1_1 (listener) 150 # 2 client2 processes, c2_1 (listener) and c2_2 151 # 152 # Start client2 process c2_1, c2_2 and master process m_2. Stop 153 # client2 listener c2_1. Verify takeover happens on client2. Stop 154 # master listener m_1. Verify m_2 takes over listener role and no 155 # election on client1. Set c2_2 to c2_1, m_2 to m_1. Start another 156 # client2 process c2_2 and master process m_2. Stop c2_1 and m_2 157 # again and redo takeovers for multiple times. 158 puts "\t\tRepmgr$tnum.loop.c: Start client2." 159 set cmds { 160 "home $c2dir" 161 "output $testdir/c2_1_output" 162 "open_env" 163 "start client" 164 } 165 set c2_1 [open_site_prog [subst $cmds]] 166 set c2_env [berkdb_env -home $c2dir] 167 await_startup_done $c2_env 168 169 puts -nonewline "\t\tRepmgr$tnum.loop.d: Run short-lived processes to\ 170 perform multiple takeovers on master and client2 successively" 171 flush stdout 172 for { set i 11 } { $i < 21 } { incr i} { 173 puts -nonewline "." 174 flush stdout 175 set cmds { 176 "home $mdir" 177 "output $testdir/m_$i\_output" 178 "open_env" 179 "start master" 180 } 181 set m_2 [open_site_prog [subst $cmds]] 182 set cmds { 183 "home $c2dir" 184 "output $testdir/c2_$i\_output" 185 "open_env" 186 "start client" 187 } 188 set c2_2 [open_site_prog [subst $cmds]] 189 set count 0 190 puts $m_2 "is_connected $c2port" 191 while {! [gets $m_2]} { 192 if {[incr count] > 30} { 193 error "FAIL: couldn't connect to client2\ 194 within 30 seconds" 195 } 196 tclsleep 1 197 puts $m_2 "is_connected $c2port" 198 } 199 set count 0 200 puts $c2_2 "is_connected $mport" 201 while {! [gets $c2_2]} { 202 if {[incr count] > 30} { 203 error "FAIL: couldn't connect to master\ 204 within 30 seconds" 205 } 206 tclsleep 1 207 puts $c2_2 "is_connected $mport" 208 } 209 210 close $c2_1 211 set count 0 212 set c_takeover_count [stat_field $c2_env repmgr_stat \ 213 "Automatic replication process takeovers"] 214 while { $c_takeover_count != [expr $i - 10] } { 215 if {[incr count] > 30} { 216 error "FAIL: couldn't takeover on client2\ 217 in 30 seconds" 218 } 219 tclsleep 1 220 set c_takeover_count [stat_field $c2_env repmgr_stat \ 221 "Automatic replication process takeovers"] 222 } 223 # Pause to let c2_2 connect to m_2. 224 tclsleep 3 225 226 close $m_1 227 set count 0 228 set m_takeover_count [stat_field $m_env repmgr_stat \ 229 "Automatic replication process takeovers"] 230 while { $m_takeover_count != $i } { 231 if {[incr count] > 30} { 232 error "FAIL: couldn't takeover on master\ 233 in 30 seconds" 234 } 235 tclsleep 1 236 set m_takeover_count [stat_field $m_env repmgr_stat \ 237 "Automatic replication process takeovers"] 238 } 239 set election_count [stat_field $c1_env rep_stat \ 240 "Elections held"] 241 error_check_good c1_no_elections_2 $election_count 0 242 243 set m_1 $m_2 244 set c2_1 $c2_2 245 } 246 $m_env close 247 $c1_env close 248 $c2_env close 249 close $c1_1 250 close $c2_1 251 close $m_1 252 puts " " 253} 254 255proc repmgr113_test { {tnum "113"} } { 256 global testdir 257 global ipversion 258 259 puts "\tRepmgr$tnum.test: Takeover in any subordinate process and\ 260 election delay due to the takeover on master" 261 env_cleanup $testdir 262 263 set hoststr [get_hoststr $ipversion] 264 foreach {mport c1port c2port c3port} [available_ports 4] {} 265 file mkdir [set mdir $testdir/MASTER] 266 file mkdir [set c1dir $testdir/CLIENT1] 267 file mkdir [set c2dir $testdir/CLIENT2] 268 file mkdir [set c3dir $testdir/CLIENT3] 269 make_dbconfig $mdir \ 270 [list [list repmgr_site $hoststr $mport db_local_site on]] 271 make_dbconfig $c1dir \ 272 [list [list repmgr_site $hoststr $c1port db_local_site on] \ 273 [list repmgr_site $hoststr $mport db_bootstrap_helper on]] 274 make_dbconfig $c2dir \ 275 [list [list repmgr_site $hoststr $c2port db_local_site on] \ 276 [list repmgr_site $hoststr $mport db_bootstrap_helper on]] 277 make_dbconfig $c3dir \ 278 [list [list repmgr_site $hoststr $c3port db_local_site on] \ 279 [list repmgr_site $hoststr $mport db_bootstrap_helper on]] 280 281 setup_repmgr_ssl $mdir 282 setup_repmgr_ssl $c1dir 283 setup_repmgr_ssl $c2dir 284 setup_repmgr_ssl $c3dir 285 286 # Test case 1: Test listener takeover on master. 287 # 2 sites, master and client1 288 # 2 master processes, m_1 (listener) and m_2 289 # 1 client1 process, c1_1 (listener) 290 # 291 # Start all processes. Stop master listener m_1. Verify m_2 takes 292 # over listener role and no election on client1. 293 puts "\t\tRepmgr$tnum.test.a: Start two processes on master and one\ 294 process on client1." 295 set cmds { 296 "home $mdir" 297 "output $testdir/m_1_output" 298 "open_env" 299 "start master" 300 } 301 set m_1 [open_site_prog [subst $cmds]] 302 set cmds { 303 "home $mdir" 304 "output $testdir/m_2_output" 305 "open_env" 306 "start master" 307 } 308 set m_2 [open_site_prog [subst $cmds]] 309 set m_env [berkdb_env -home $mdir] 310 set cmds { 311 "home $c1dir" 312 "output $testdir/c1_1_output" 313 "open_env" 314 "start client" 315 } 316 set c1_1 [open_site_prog [subst $cmds]] 317 set c1_env [berkdb_env -home $c1dir] 318 await_startup_done $c1_env 319 await_condition {[expr [$m_env rep_get_nsites] == 2]} 320 # Wait for some time so that m2 connects to c1 321 tclsleep 3 322 323 puts "\t\tRepmgr$tnum.test.b: Close master listener, verify takeover\ 324 on master and no election on client1." 325 close $m_1 326 tclsleep 3 327 set takeover_count [stat_field $m_env repmgr_stat \ 328 "Automatic replication process takeovers"] 329 error_check_good m_takeover_count_1 $takeover_count 1 330 set election_count [stat_field $c1_env rep_stat "Elections held"] 331 error_check_good c1_no_elections_1 $election_count 0 332 333 # Test case 2: Test listener takeover on client. 334 # 2 sites, master and client1 335 # 2 master processes, m_2 (listener) and m_3 336 # 2 client1 processes, c1_1 (listener) and c1_2 337 # 338 # Start subordinate processes on master and client1, m_3 and c1_2. 339 # Stop client1 listener c1_1. Verify c1_2 takes over listener role. 340 puts "\t\tRepmgr$tnum.test.c: Start a master subordinate process." 341 set cmds { 342 "home $mdir" 343 "output $testdir/m_3_output" 344 "open_env" 345 } 346 set m_3 [open_site_prog [subst $cmds]] 347 puts $m_3 "start master" 348 error_check_match m_sub_ret_1 [gets $m_3] "*DB_REP_IGNORE*" 349 350 puts "\t\tRepmgr$tnum.test.d: Start a client1 subordinate process." 351 set cmds { 352 "home $c1dir" 353 "output $testdir/c1_2_output" 354 "open_env" 355 "start client" 356 } 357 set c1_2 [open_site_prog [subst $cmds]] 358 # Pause to let c1_2 connect to m_2 and m_3. 359 tclsleep 2 360 361 puts "\t\tRepmgr$tnum.test.e: Close client1 listener, verify\ 362 takeover on client1." 363 close $c1_1 364 tclsleep 3 365 set takeover_count [stat_field $c1_env repmgr_stat \ 366 "Automatic replication process takeovers"] 367 error_check_good c1_takeover_count_1 $takeover_count 1 368 369 # Test case 3: Test master takeover soon after client takeover in test 370 # case 2. 371 # 2 sites, master and client1 372 # 2 master processes, m_2 (listener) and m_3 373 # 1 client1 process, c1_2 (listener) 374 # 375 # Close master listener m_2. Takeover happens on master. Verify no 376 # election on client1, which means the connections between subordinate 377 # process m_3 and new listener c1_2 are established in time. 378 puts "\t\tRepmgr$tnum.test.f: Close master listener, verify takeover\ 379 on master and no election on client1." 380 close $m_2 381 tclsleep 3 382 set takeover_count [stat_field $m_env repmgr_stat \ 383 "Automatic replication process takeovers"] 384 error_check_good m_takeover_count_2 $takeover_count 2 385 set election_count [stat_field $c1_env rep_stat "Elections held"] 386 error_check_good c1_no_elections_2 $election_count 0 387 388 # Test case 4: Test no takeover in subordinate rep-unaware process. 389 # 2 sites, master and client1 390 # 3 master processes, m_3 (listener), m_4 (rep-unaware) and 391 # m_5 (rep-unaware) 392 # 1 client1 process, c1_2 (listener) 393 # 394 # Start two master subordinate rep-unaware processes m_4 and m_5. 395 # Close master listener m_3. Verify m_4 and m_5 don't take over 396 # listener role, client1 raises election. 397 puts "\t\tRepmgr$tnum.test.g: Start two master rep-unaware processes." 398 set cmds { 399 "home $mdir" 400 "output $testdir/m_4_output" 401 "open_env" 402 } 403 set m_4 [open_site_prog [subst $cmds]] 404 puts $m_4 "open_db test.db" 405 set count 0 406 puts $m_4 "is_connected $c1port" 407 while {! [gets $m_4]} { 408 if {[incr count] > 30} { 409 error "FAIL:\ 410 couldn't connect client1 within 30 seconds" 411 } 412 tclsleep 1 413 puts $m_4 "is_connected $c1port" 414 } 415 416 set cmds { 417 "home $mdir" 418 "output $testdir/m_5_output" 419 "open_env" 420 } 421 set m_5 [open_site_prog [subst $cmds]] 422 puts $m_5 "open_db test.db" 423 puts $m_5 "put k1 k1" 424 puts $m_5 "echo done" 425 error_check_good m_5_put_done_k1 [gets $m_5] "done" 426 set count 0 427 puts $m_5 "is_connected $c1port" 428 while {! [gets $m_5]} { 429 if {[incr count] > 30} { 430 error "FAIL:\ 431 couldn't connect client1 within 30 seconds" 432 } 433 tclsleep 1 434 puts $m_5 "is_connected $c1port" 435 } 436 437 puts "\t\tRepmgr$tnum.test.h: Close master listener, verify no\ 438 takeover on master, election happens on client1." 439 close $m_3 440 # Election should be held before election delay. 441 tclsleep 2 442 set election_count [stat_field $c1_env rep_stat "Elections held"] 443 error_check_good c1_one_election_1 $election_count 1 444 tclsleep 2 445 set takeover_count [stat_field $m_env repmgr_stat \ 446 "Automatic replication process takeovers"] 447 error_check_good m_takeover_count_3 $takeover_count 2 448 close $m_4 449 close $m_5 450 451 # Test case 5: Test failed takeover. 452 # 2 sites, master and client1 453 # 2 master processes, m_6 (listener), m_7 454 # 1 client1 process, c1_2 (listener) 455 # 456 # Start two master processes m_6 and m_7. Close m_6, verify client1 457 # delays the election. Close m_7 before takeover succeeds, verify 458 # takeover fails and election finally happens on client1. 459 puts "\t\tRepmgr$tnum.test.i: A master process rejoins, should be\ 460 the listener." 461 set cmds { 462 "home $mdir" 463 "output $testdir/m_6_output" 464 "open_env" 465 } 466 set m_6 [open_site_prog [subst $cmds]] 467 puts $m_6 "start master" 468 error_check_match m_sub_ret_2 [gets $m_6] "*Successful*" 469 puts $m_6 "open_db test.db" 470 puts $m_6 "put k2 k2" 471 puts $m_6 "echo done" 472 gets $m_6 473 474 puts "\t\tRepmgr$tnum.test.j: Start a master subordinate process" 475 set cmds { 476 "home $mdir" 477 "output $testdir/m_7_output" 478 "open_env" 479 } 480 set m_7 [open_site_prog [subst $cmds]] 481 puts $m_7 "start master" 482 error_check_match m_sub_ret_1 [gets $m_7] "*DB_REP_IGNORE*" 483 # Pause to let m_7 connect to c1_2 484 tclsleep 3 485 486 puts "\t\tRepmgr$tnum.test.k: Close master processes to prevent\ 487 takeover, verify that election is delayed but finally happens" 488 close $m_6 489 set takeover_count [stat_field $m_env repmgr_stat \ 490 "Automatic replication process takeovers"] 491 error_check_good m_takeover_count_4 $takeover_count 2 492 set election_count [stat_field $c1_env rep_stat "Elections held"] 493 error_check_good c1_no_elections_3 $election_count 1 494 close $m_7 495 tclsleep 3 496 set election_count [stat_field $c1_env rep_stat "Elections held"] 497 error_check_good c1_one_election_2 $election_count 2 498 499 # Test case 6: Test one of subordinate processes succeeds in takeover. 500 # 2 sites, master and client1 501 # 1 master process, m_8 (listener) 502 # 3 client1 processes, c1_2 (listener), c1_3 and c1_4. 503 # 504 # Start master listener m_8 and two client1 processes c1_3 and c1_4. 505 # Close c1_2. Verify takeover happens once. 506 puts "\t\tRepmgr$tnum.test.l: A master process rejoins, should be\ 507 master listener." 508 set cmds { 509 "home $mdir" 510 "output $testdir/m_8_output" 511 "open_env" 512 } 513 set m_8 [open_site_prog [subst $cmds]] 514 puts $m_8 "start master" 515 error_check_match m_sub_ret_4 [gets $m_8] "*Successful*" 516 puts $m_8 "open_db test.db" 517 puts $m_8 "put k3 k3" 518 puts $m_8 "echo done" 519 gets $m_8 520 521 puts "\t\tRepmgr$tnum.test.m: Start two processes on client1, close\ 522 client1 listener, verify takeover on client1." 523 set cmds { 524 "home $c1dir" 525 "output $testdir/c1_3_output" 526 "open_env" 527 "start client" 528 } 529 set c1_3 [open_site_prog [subst $cmds]] 530 set cmds { 531 "home $c1dir" 532 "output $testdir/c1_4_output" 533 "open_env" 534 "start client" 535 } 536 set c1_4 [open_site_prog [subst $cmds]] 537 close $c1_2 538 tclsleep 3 539 set takeover_count [stat_field $c1_env repmgr_stat \ 540 "Automatic replication process takeovers"] 541 error_check_good c1_takeover_count_2 $takeover_count 2 542 543 # Test case 7: Test no takeover on removed site. 544 # 2 sites, master and client1 545 # 1 master process, m_8 (listener) 546 # 2 client1 processes, c1_3 (listener), c1_4 547 # 548 # Remove client1. Verify c1_4 doesn't take over listener role. 549 puts "\t\tRepmgr$tnum.test.n: Remove client1 and verify no takeover on\ 550 client1." 551 puts $m_8 "remove $hoststr $c1port" 552 await_condition {[expr [$m_env rep_get_nsites] == 1]} 553 tclsleep 3 554 set takeover_count [stat_field $c1_env repmgr_stat \ 555 "Automatic replication process takeovers"] 556 error_check_good c1_takeover_count_3 $takeover_count 2 557 558 $c1_env close 559 close $c1_3 560 close $c1_4 561 562 # Test case 8: Test takeover happens on a site with both subordinate 563 # rep-aware process and rep-unaware process. 564 # 3 sites, master, client2 and client3 565 # 3 master processes, m_8 (listener), m_9 (rep-aware) and 566 # m_10 (rep-unaware) 567 # 1 client2 process, c2_1 (listener) 568 # 1 client3 process, c3_1 (listener) 569 # 570 # Start listener process on client2 and client3, one rep-aware master 571 # process m_9 and another rep-unaware master process m_10. Close 572 # master listener m_8. Verify takeover happens on master and no 573 # election on client2 and client3. 574 puts "\t\tRepmgr$tnum.test.o: Add client2 and client3." 575 set cmds { 576 "home $c2dir" 577 "output $testdir/c2_1_output" 578 "open_env" 579 "start client" 580 } 581 set c2_1 [open_site_prog [subst $cmds]] 582 set cmds { 583 "home $c3dir" 584 "output $testdir/c3_1_output" 585 "open_env" 586 "start client" 587 } 588 set c3_1 [open_site_prog [subst $cmds]] 589 set c2_env [berkdb_env -home $c2dir] 590 await_startup_done $c2_env 591 set c3_env [berkdb_env -home $c3dir] 592 await_startup_done $c3_env 593 594 puts "\t\tRepmgr$tnum.test.p: Start a rep-aware and a rep-unaware\ 595 processes on master, close master listener, verify no election." 596 set cmds { 597 "home $mdir" 598 "output $testdir/m_9_output" 599 "open_env" 600 "start master" 601 } 602 set m_9 [open_site_prog [subst $cmds]] 603 tclsleep 3 604 puts $m_9 "is_connected $c2port" 605 error_check_good m_10_connected_c2_1 [gets $m_9] 1 606 puts $m_9 "is_connected $c3port" 607 error_check_good m_10_connected_c3_1 [gets $m_9] 1 608 609 set cmds { 610 "home $mdir" 611 "output $testdir/m_10_output" 612 "open_env" 613 } 614 set m_10 [open_site_prog [subst $cmds]] 615 puts $m_10 "open_db test.db" 616 puts $m_10 "put k4 k4" 617 puts $m_10 "echo done" 618 error_check_good m_10_put_done_k1 [gets $m_10] "done" 619 620 set count 0 621 puts $m_10 "is_connected $c2port" 622 while {! [gets $m_10]} { 623 if {[incr count] > 30} { 624 error "FAIL: couldn't connect c2_1 within 30 seconds" 625 } 626 tclsleep 1 627 puts $m_10 "is_connected $c2port" 628 } 629 set count 0 630 puts $m_10 "is_connected $c3port" 631 while {! [gets $m_10]} { 632 if {[incr count] > 30} { 633 error "FAIL: couldn't connect c3_1 within 30 seconds" 634 } 635 tclsleep 1 636 puts $m_10 "is_connected $c3port" 637 } 638 639 close $m_8 640 tclsleep 3 641 set election_count [stat_field $c2_env rep_stat "Elections held"] 642 error_check_good c2_no_elections_1 $election_count 0 643 set election_count [stat_field $c3_env rep_stat "Elections held"] 644 error_check_good c3_no_elections_1 $election_count 0 645 set takeover_count [stat_field $m_env repmgr_stat \ 646 "Automatic replication process takeovers"] 647 error_check_good m_takeover_count_5 $takeover_count 3 648 649 # Test case 9: Test election happens without listener candidate. 650 # 3 sites, master, client2 and client3 651 # 2 master processes, m_9 (listener), m_10 (rep-unaware) 652 # 1 client2 process, c2_1 (listener) 653 # 1 client3 process, c3_1 (listener) 654 # 655 # Close master listener m_9. Verify no takeover on the master, 656 # election happens and end with new master. 657 puts "\t\tRepmgr$tnum.test.q: Close new master listener, verify that\ 658 election happens." 659 set old_master_id [stat_field $c2_env rep_stat "Master environment ID"] 660 close $m_9 661 tclsleep 2 662 set election_count [stat_field $c2_env rep_stat "Elections held"] 663 error_check_good c2_no_elections_2 $election_count 1 664 set election_count [stat_field $c3_env rep_stat "Elections held"] 665 error_check_good c3_no_elections_2 $election_count 1 666 tclsleep 2 667 set new_master_id [stat_field $c2_env rep_stat "Master environment ID"] 668 error_check_bad new_master $new_master_id $old_master_id 669 set takeover_count [stat_field $m_env repmgr_stat \ 670 "Automatic replication process takeovers"] 671 error_check_good m_takeover_count_6 $takeover_count 3 672 673 close $c2_1 674 close $c3_1 675 $m_env close 676 $c2_env close 677 $c3_env close 678 close $m_10 679} 680 681proc repmgr113_zero_nthreads { {tnum "113"} } { 682 global testdir 683 global ipversion 684 685 puts "\tRepmgr$tnum.zero.nthreads: Test automatic takeover by a\ 686 subordinate process configured with zero nthreads." 687 env_cleanup $testdir 688 689 set hoststr [get_hoststr $ipversion] 690 foreach {mport} [available_ports 1] {} 691 file mkdir [set mdir $testdir/MASTER] 692 make_dbconfig $mdir \ 693 [list [list repmgr_site $hoststr $mport db_local_site on]] 694 setup_repmgr_ssl $mdir 695 696 puts "\t\tRepmgr$tnum.zero.nthreads.a: Start master listener." 697 set cmds { 698 "home $mdir" 699 "output $testdir/m_1_output" 700 "open_env" 701 "start master" 702 } 703 set m_1 [open_site_prog [subst $cmds]] 704 705 puts "\t\tRepmgr$tnum.zero.nthreads.b: Start master subordinate process\ 706 configured with 0 message threads." 707 set m_2 [berkdb_env -home $mdir -txn -rep -thread -event -errpfx \ 708 "MASTER" -errfile $testdir/m_2_output] 709 $m_2 repmgr -local [list $hoststr $mport] -start master -msgth 0 710 711 puts "\t\tRepmgr$tnum.zero.nthreads.c: Close listener, verify takeover\ 712 happens in the subordinate process." 713 close $m_1 714 tclsleep 3 715 # Verify that the takeovers stat should show a takeover and there is 716 # no autotakeover_failed event. 717 set takeover_count [stat_field $m_2 repmgr_stat \ 718 "Automatic replication process takeovers"] 719 error_check_good m_takeover $takeover_count 1 720 set ev [find_event [$m_2 event_info] autotakeover_failed] 721 error_check_good m_no_autotakeover_failed [string length $ev] 0 722 set ev2 [find_event [$m_2 event_info] autotakeover] 723 error_check_good m_autotakeover_event [is_substr $ev2 "autotakeover"] 1 724 $m_2 close 725} 726 727proc repmgr113_prefmas { {tnum "113"} } { 728 global testdir 729 global ipversion 730 731 # Test case 10: Test listener takeover in preferred master repgroup. 732 # 2 sites, master and client 733 # 2 master processes, m_1 (listener) and m_2 734 # 2 client processes, c_1 (listener) and c_2 735 # 736 # Start all processes. Perform a put from the initial master 737 # listener process m_1. Stop client listener c_1. Verify c_2 738 # takes over listener role on client. Stop master listener m_1. 739 # Verify m_2 takes over listener role on master. Perform another 740 # put from the post-takeover master listener process m_2. Verify 741 # both puts are present on client. 742 743 puts "\tRepmgr$tnum.pm: Perform a takeover on each preferred\ 744 master site." 745 env_cleanup $testdir 746 747 set hoststr [get_hoststr $ipversion] 748 foreach {mport cport} [available_ports 2] {} 749 file mkdir [set mdir $testdir/MASTER] 750 file mkdir [set cdir $testdir/CLIENT] 751 # The "all" ack_policy guarantees that replication is complete before 752 # put operations return. 753 make_dbconfig $mdir \ 754 [list [list repmgr_site $hoststr $mport db_local_site on] \ 755 "rep_set_config db_repmgr_conf_prefmas_master on" \ 756 "repmgr_set_ack_policy db_repmgr_acks_all"] 757 make_dbconfig $cdir \ 758 [list [list repmgr_site $hoststr $cport db_local_site on] \ 759 [list repmgr_site $hoststr $mport db_bootstrap_helper on] \ 760 "rep_set_config db_repmgr_conf_prefmas_client on" \ 761 "repmgr_set_ack_policy db_repmgr_acks_all"] 762 763 setup_repmgr_ssl $mdir 764 setup_repmgr_ssl $cdir 765 766 puts "\t\tRepmgr$tnum.pm.a: Start master and client." 767 set cmds { 768 "home $mdir" 769 "output $testdir/m_1_output" 770 "open_env" 771 "start client" 772 } 773 set m_1 [open_site_prog [subst $cmds]] 774 set m_env [berkdb_env -home $mdir] 775 set cmds { 776 "home $cdir" 777 "output $testdir/c_1_output" 778 "open_env" 779 "start client" 780 } 781 set c_1 [open_site_prog [subst $cmds]] 782 set c_env [berkdb_env -home $cdir] 783 await_startup_done $c_env 784 785 puts "\t\tRepmgr$tnum.pm.b: Start a subordinate process on each site." 786 set cmds { 787 "home $mdir" 788 "output $testdir/m_2_output" 789 "open_env" 790 "start client" 791 } 792 set m_2 [open_site_prog [subst $cmds]] 793 set count 0 794 puts $m_2 "is_connected $cport" 795 while {! [gets $m_2]} { 796 if {[incr count] > 30} { 797 error "FAIL: couldn't connect to client\ 798 within 30 seconds" 799 } 800 tclsleep 1 801 puts $m_2 "is_connected $cport" 802 } 803 set cmds { 804 "home $cdir" 805 "output $testdir/c_2_output" 806 "open_env" 807 "start client" 808 } 809 set c_2 [open_site_prog [subst $cmds]] 810 set count 0 811 puts $c_2 "is_connected $mport" 812 while {! [gets $c_2]} { 813 if {[incr count] > 30} { 814 error "FAIL: couldn't connect to master\ 815 within 30 seconds" 816 } 817 tclsleep 1 818 puts $c_2 "is_connected $mport" 819 } 820 821 puts "\t\tRepmgr$tnum.pm.c: Perform a master put before takeovers." 822 puts $m_1 "open_db test.db" 823 puts $m_1 "put initKey initValue" 824 puts $m_1 "echo initPut" 825 set sentinel [gets $m_1] 826 error_check_good echo_initPut $sentinel "initPut" 827 828 puts "\t\tRepmgr$tnum.pm.d: Perform a client site takeover." 829 close $c_1 830 set count 0 831 set c_takeover_count [stat_field $c_env repmgr_stat \ 832 "Automatic replication process takeovers"] 833 while { $c_takeover_count < 1 } { 834 if {[incr count] > 30} { 835 error "FAIL: couldn't take over on client\ 836 in 30 seconds" 837 } 838 tclsleep 1 839 set c_takeover_count [stat_field $c_env repmgr_stat \ 840 "Automatic replication process takeovers"] 841 } 842 # Pause to refresh c_2 connection to m_1. 843 tclsleep 3 844 puts $c_2 "is_connected $mport" 845 while {! [gets $c_2]} { 846 if {[incr count] > 30} { 847 error "FAIL: couldn't connect to master\ 848 within 30 seconds" 849 } 850 tclsleep 1 851 puts $c_2 "is_connected $mport" 852 } 853 854 puts "\t\tRepmgr$tnum.pm.e: Perform a preferred master site takeover." 855 close $m_1 856 set count 0 857 set m_takeover_count [stat_field $m_env repmgr_stat \ 858 "Automatic replication process takeovers"] 859 while { $m_takeover_count < 1 } { 860 if {[incr count] > 30} { 861 error "FAIL: couldn't take over on master\ 862 in 30 seconds" 863 } 864 tclsleep 1 865 set m_takeover_count [stat_field $m_env repmgr_stat \ 866 "Automatic replication process takeovers"] 867 } 868 # Pause to let c_2 establish its main connection to new master 869 # listener process m_2. 870 tclsleep 3 871 puts $c_2 "is_connected $mport" 872 while {! [gets $c_2]} { 873 if {[incr count] > 30} { 874 error "FAIL: couldn't connect to master\ 875 within 30 seconds" 876 } 877 tclsleep 1 878 puts $c_2 "is_connected $mport" 879 } 880 881 puts "\t\tRepmgr$tnum.pm.f: Perform a master put after takeovers." 882 puts $m_2 "open_db test.db" 883 puts $m_2 "put tookoverKey tookoverValue" 884 puts $m_2 "echo tookoverPut" 885 set sentinel [gets $m_2] 886 error_check_good echo_tookoverPut $sentinel "tookoverPut" 887 888 puts "\t\tRepmgr$tnum.pm.g: Verify both master puts are on client." 889 puts $c_2 "open_db test.db" 890 set expected {{initKey initValue} {tookoverKey tookoverValue}} 891 verify_client_data $c_env test.db $expected 892 893 $c_env close 894 close $c_2 895 $m_env close 896 close $m_2 897} 898