1# Copyright (c) 2012, 2020 Oracle and/or its affiliates.  All rights reserved.
2#
3# See the file LICENSE for license information.
4#
5# $Id$
6#
7# TEST	repmgr113
8# TEST	Multi-process repmgr automatic listener takeover.
9# TEST
10# TEST	One of the subordinate processes automatically becomes listener if the
11# TEST	original listener leaves.  An election is delayed long enough for a
12# TEST	takeover to occur if the takeover happens on the master.
13
14proc repmgr113 { {tnum "113"} } {
15	source ./include.tcl
16	if { $is_freebsd_test == 1 } {
17		puts "Skipping replication manager test on FreeBSD platform."
18		return
19	}
20
21	puts "Repmgr$tnum:\
22	    Test automatic listener takeover among multiple processes."
23
24	# Test running multiple listener takeovers on master and client.
25	repmgr113_loop $tnum
26
27	# Test listener takeovers in different scenarios.
28	repmgr113_test $tnum
29
30	# Test zero nthreads in taking over subordinate process.
31	repmgr113_zero_nthreads $tnum
32
33	# Test listener takeover on each site in a preferred master repgroup.
34	repmgr113_prefmas $tnum
35}
36
37proc repmgr113_loop { {tnum "113"} } {
38	global testdir
39	global ipversion
40
41	puts "\tRepmgr$tnum.loop: Run short-lived processes to\
42	    perform multiple takeovers."
43	env_cleanup $testdir
44
45	set hoststr [get_hoststr $ipversion]
46	foreach {mport c1port c2port} [available_ports 3] {}
47	file mkdir [set mdir $testdir/MASTER]
48	file mkdir [set c1dir $testdir/CLIENT1]
49	file mkdir [set c2dir $testdir/CLIENT2]
50	make_dbconfig $mdir \
51	    [list [list repmgr_site $hoststr $mport db_local_site on]]
52	make_dbconfig $c1dir \
53	    [list [list repmgr_site $hoststr $c1port db_local_site on] \
54	    [list repmgr_site $hoststr $mport db_bootstrap_helper on]]
55	make_dbconfig $c2dir \
56	    [list [list repmgr_site $hoststr $c2port db_local_site on] \
57	    [list repmgr_site $hoststr $mport db_bootstrap_helper on]]
58
59	setup_repmgr_ssl $mdir
60	setup_repmgr_ssl $c1dir
61	setup_repmgr_ssl $c2dir
62
63	puts "\t\tRepmgr$tnum.loop.a: Start master and client1."
64	set cmds {
65		"home $mdir"
66		"output $testdir/m_0_output"
67		"open_env"
68		"start master"
69	}
70	set m_1 [open_site_prog [subst $cmds]]
71	set m_env [berkdb_env -home $mdir]
72	set cmds {
73		"home $c1dir"
74		"output $testdir/c1_1_output"
75		"open_env"
76		"start client"
77	}
78	set c1_1 [open_site_prog [subst $cmds]]
79	set c1_env [berkdb_env -home $c1dir]
80	await_startup_done $c1_env
81
82	# Test case 1: Test listener takeover on master.
83	# 2 sites, master and client1
84	# 2 master processes, m_1 (listener) and m_2
85	# 1 client1 process, c1_1 (listener)
86	#
87	# Start all processes.  Stop master listener m_1.  Verify m_2 takes
88	# over listener role and no election on client1.  Set m_2 to m_1 and
89	# start another master process m_2, stop m_1 again and redo takeover
90	# for multiple times.
91	puts -nonewline "\t\tRepmgr$tnum.loop.b: Run short-lived processes\
92	    to perform multiple takeovers on master"
93	flush stdout
94	for { set i 1 } { $i < 11 } { incr i} {
95		# Close listener process and verify takeover happens.
96		puts -nonewline "."
97		flush stdout
98
99		set cmds {
100			"home $mdir"
101			"output $testdir/m_$i\_output"
102			"open_env"
103			"start master"
104		}
105		set m_2 [open_site_prog [subst $cmds]]
106		set count 0
107		puts $m_2 "is_connected $c1port"
108		while {! [gets $m_2]} {
109			if {[incr count] > 30} {
110				error "FAIL: couldn't connect to client1\
111				    within 30 seconds"
112			}
113			tclsleep 1
114			puts $m_2 "is_connected $c1port"
115		}
116		close $m_1
117		set count 0
118		set m_takeover_count [stat_field $m_env repmgr_stat \
119		    "Automatic replication process takeovers"]
120		while { $m_takeover_count != $i } {
121			if {[incr count] > 30} {
122				error "FAIL: couldn't takeover on master\
123				    in 30 seconds"
124			}
125			tclsleep 1
126			set m_takeover_count [stat_field $m_env repmgr_stat \
127			    "Automatic replication process takeovers"]
128		}
129		set election_count [stat_field $c1_env rep_stat \
130		    "Elections held"]
131		error_check_good c1_no_elections_1 $election_count 0
132		tclsleep 3
133		puts $m_2 "is_connected $c1port"
134		while {! [gets $m_2]} {
135			if {[incr count] > 30} {
136				error "FAIL: couldn't connect to client1
137				    within 30 seconds"
138			}
139			tclsleep 1
140			puts $m_2 "is_connected $c1port"
141		}
142		set m_1 $m_2
143	}
144	puts ""
145
146	# Test case 2: Test listener takeover on master and client successively.
147	# 3 sites, master, client1, client2
148	# 2 master processes, m_1 (listener) and m_2
149	# 1 client1 process, c1_1 (listener)
150	# 2 client2 processes,  c2_1 (listener) and c2_2
151	#
152	# Start client2 process c2_1, c2_2 and master process m_2.  Stop
153	# client2 listener c2_1.  Verify takeover happens on client2.  Stop
154	# master listener m_1.  Verify m_2 takes over listener role and no
155	# election on client1.  Set c2_2 to c2_1, m_2 to m_1.  Start another
156	# client2 process c2_2 and master process m_2.  Stop c2_1 and m_2
157	# again and redo takeovers for multiple times.
158	puts "\t\tRepmgr$tnum.loop.c: Start client2."
159	set cmds {
160		"home $c2dir"
161		"output $testdir/c2_1_output"
162		"open_env"
163		"start client"
164	}
165	set c2_1 [open_site_prog [subst $cmds]]
166	set c2_env [berkdb_env -home $c2dir]
167	await_startup_done $c2_env
168
169	puts -nonewline "\t\tRepmgr$tnum.loop.d: Run short-lived processes to\
170	    perform multiple takeovers on master and client2 successively"
171	flush stdout
172	for { set i 11 } { $i < 21 } { incr i} {
173		puts -nonewline "."
174		flush stdout
175		set cmds {
176			"home $mdir"
177			"output $testdir/m_$i\_output"
178			"open_env"
179			"start master"
180		}
181		set m_2 [open_site_prog [subst $cmds]]
182		set cmds {
183			"home $c2dir"
184			"output $testdir/c2_$i\_output"
185			"open_env"
186			"start client"
187		}
188		set c2_2 [open_site_prog [subst $cmds]]
189		set count 0
190		puts $m_2 "is_connected $c2port"
191		while {! [gets $m_2]} {
192			if {[incr count] > 30} {
193				error "FAIL: couldn't connect to client2\
194				    within 30 seconds"
195			}
196			tclsleep 1
197			puts $m_2 "is_connected $c2port"
198		}
199		set count 0
200		puts $c2_2 "is_connected $mport"
201		while {! [gets $c2_2]} {
202			if {[incr count] > 30} {
203				error "FAIL: couldn't connect to master\
204				    within 30 seconds"
205			}
206			tclsleep 1
207			puts $c2_2 "is_connected $mport"
208		}
209
210		close $c2_1
211		set count 0
212		set c_takeover_count [stat_field $c2_env repmgr_stat \
213		    "Automatic replication process takeovers"]
214		while { $c_takeover_count != [expr $i - 10] } {
215			if {[incr count] > 30} {
216				error "FAIL: couldn't takeover on client2\
217				    in 30 seconds"
218			}
219			tclsleep 1
220			set c_takeover_count [stat_field $c2_env repmgr_stat \
221			    "Automatic replication process takeovers"]
222		}
223		# Pause to let c2_2 connect to m_2.
224		tclsleep 3
225
226		close $m_1
227		set count 0
228		set m_takeover_count [stat_field $m_env repmgr_stat \
229		    "Automatic replication process takeovers"]
230		while { $m_takeover_count != $i } {
231			if {[incr count] > 30} {
232				error "FAIL: couldn't takeover on master\
233				    in 30 seconds"
234			}
235			tclsleep 1
236			set m_takeover_count [stat_field $m_env repmgr_stat \
237			    "Automatic replication process takeovers"]
238		}
239		set election_count [stat_field $c1_env rep_stat \
240		    "Elections held"]
241		error_check_good c1_no_elections_2 $election_count 0
242
243		set m_1 $m_2
244		set c2_1 $c2_2
245	}
246	$m_env close
247	$c1_env close
248	$c2_env close
249	close $c1_1
250	close $c2_1
251	close $m_1
252	puts " "
253}
254
255proc repmgr113_test { {tnum "113"} } {
256	global testdir
257	global ipversion
258
259	puts "\tRepmgr$tnum.test: Takeover in any subordinate process and\
260	    election delay due to the takeover on master"
261	env_cleanup $testdir
262
263	set hoststr [get_hoststr $ipversion]
264	foreach {mport c1port c2port c3port} [available_ports 4] {}
265	file mkdir [set mdir $testdir/MASTER]
266	file mkdir [set c1dir $testdir/CLIENT1]
267	file mkdir [set c2dir $testdir/CLIENT2]
268	file mkdir [set c3dir $testdir/CLIENT3]
269	make_dbconfig $mdir \
270	    [list [list repmgr_site $hoststr $mport db_local_site on]]
271	make_dbconfig $c1dir \
272	    [list [list repmgr_site $hoststr $c1port db_local_site on] \
273	    [list repmgr_site $hoststr $mport db_bootstrap_helper on]]
274	make_dbconfig $c2dir \
275	    [list [list repmgr_site $hoststr $c2port db_local_site on] \
276	    [list repmgr_site $hoststr $mport db_bootstrap_helper on]]
277	make_dbconfig $c3dir \
278	    [list [list repmgr_site $hoststr $c3port db_local_site on] \
279	    [list repmgr_site $hoststr $mport db_bootstrap_helper on]]
280
281	setup_repmgr_ssl $mdir
282	setup_repmgr_ssl $c1dir
283	setup_repmgr_ssl $c2dir
284	setup_repmgr_ssl $c3dir
285
286	# Test case 1: Test listener takeover on master.
287	# 2 sites, master and client1
288	# 2 master processes, m_1 (listener) and m_2
289	# 1 client1 process, c1_1 (listener)
290	#
291	# Start all processes.  Stop master listener m_1.  Verify m_2 takes
292	# over listener role and no election on client1.
293	puts "\t\tRepmgr$tnum.test.a: Start two processes on master and one\
294	    process on client1."
295	set cmds {
296		"home $mdir"
297		"output $testdir/m_1_output"
298		"open_env"
299		"start master"
300	}
301	set m_1 [open_site_prog [subst $cmds]]
302	set cmds {
303		"home $mdir"
304		"output $testdir/m_2_output"
305		"open_env"
306		"start master"
307	}
308	set m_2 [open_site_prog [subst $cmds]]
309	set m_env [berkdb_env -home $mdir]
310	set cmds {
311		"home $c1dir"
312		"output $testdir/c1_1_output"
313		"open_env"
314		"start client"
315	}
316	set c1_1 [open_site_prog [subst $cmds]]
317	set c1_env [berkdb_env -home $c1dir]
318	await_startup_done $c1_env
319	await_condition {[expr [$m_env rep_get_nsites] == 2]}
320	# Wait for some time so that m2 connects to c1
321	tclsleep 3
322
323	puts "\t\tRepmgr$tnum.test.b: Close master listener, verify takeover\
324	    on master and no election on client1."
325	close $m_1
326	tclsleep 3
327	set takeover_count [stat_field $m_env repmgr_stat \
328	    "Automatic replication process takeovers"]
329	error_check_good m_takeover_count_1 $takeover_count 1
330	set election_count [stat_field $c1_env rep_stat "Elections held"]
331	error_check_good c1_no_elections_1 $election_count 0
332
333	# Test case 2: Test listener takeover on client.
334	# 2 sites, master and client1
335	# 2 master processes, m_2 (listener) and m_3
336	# 2 client1 processes, c1_1 (listener) and c1_2
337	#
338	# Start subordinate processes on master and client1, m_3 and c1_2.
339	# Stop client1 listener c1_1.  Verify c1_2 takes over listener role.
340	puts "\t\tRepmgr$tnum.test.c: Start a master subordinate process."
341	set cmds {
342		"home $mdir"
343		"output $testdir/m_3_output"
344		"open_env"
345	}
346	set m_3 [open_site_prog [subst $cmds]]
347	puts $m_3 "start master"
348	error_check_match m_sub_ret_1 [gets $m_3] "*DB_REP_IGNORE*"
349
350	puts "\t\tRepmgr$tnum.test.d: Start a client1 subordinate process."
351	set cmds {
352		"home $c1dir"
353		"output $testdir/c1_2_output"
354		"open_env"
355		"start client"
356	}
357	set c1_2 [open_site_prog [subst $cmds]]
358	# Pause to let c1_2 connect to m_2 and m_3.
359	tclsleep 2
360
361	puts "\t\tRepmgr$tnum.test.e: Close client1 listener, verify\
362	    takeover on client1."
363	close $c1_1
364	tclsleep 3
365	set takeover_count [stat_field $c1_env repmgr_stat \
366	    "Automatic replication process takeovers"]
367	error_check_good c1_takeover_count_1 $takeover_count 1
368
369	# Test case 3: Test master takeover soon after client takeover in test
370	# case 2.
371	# 2 sites, master and client1
372	# 2 master processes, m_2 (listener) and m_3
373	# 1 client1 process, c1_2 (listener)
374	#
375	# Close master listener m_2.  Takeover happens on master.  Verify no
376	# election on client1, which means the connections between subordinate
377	# process m_3 and new listener c1_2 are established in time.
378	puts "\t\tRepmgr$tnum.test.f: Close master listener, verify takeover\
379	    on master and no election on client1."
380	close $m_2
381	tclsleep 3
382	set takeover_count [stat_field $m_env repmgr_stat \
383	    "Automatic replication process takeovers"]
384	error_check_good m_takeover_count_2 $takeover_count 2
385	set election_count [stat_field $c1_env rep_stat "Elections held"]
386	error_check_good c1_no_elections_2 $election_count 0
387
388	# Test case 4: Test no takeover in subordinate rep-unaware process.
389	# 2 sites, master and client1
390	# 3 master processes, m_3 (listener), m_4 (rep-unaware) and
391	# m_5 (rep-unaware)
392	# 1 client1 process, c1_2 (listener)
393	#
394	# Start two master subordinate rep-unaware processes m_4 and m_5.
395	# Close master listener m_3.  Verify m_4 and m_5 don't take over
396	# listener role, client1 raises election.
397	puts "\t\tRepmgr$tnum.test.g: Start two master rep-unaware processes."
398	set cmds {
399		"home $mdir"
400		"output $testdir/m_4_output"
401		"open_env"
402	}
403	set m_4 [open_site_prog [subst $cmds]]
404	puts $m_4 "open_db test.db"
405	set count 0
406	puts $m_4 "is_connected $c1port"
407	while {! [gets $m_4]} {
408		if {[incr count] > 30} {
409			error "FAIL:\
410			    couldn't connect client1 within 30 seconds"
411		}
412		tclsleep 1
413		puts $m_4 "is_connected $c1port"
414	}
415
416	set cmds {
417		"home $mdir"
418		"output $testdir/m_5_output"
419		"open_env"
420	}
421	set m_5 [open_site_prog [subst $cmds]]
422	puts $m_5 "open_db test.db"
423	puts $m_5 "put k1 k1"
424	puts $m_5 "echo done"
425	error_check_good m_5_put_done_k1 [gets $m_5] "done"
426	set count 0
427	puts $m_5 "is_connected $c1port"
428	while {! [gets $m_5]} {
429		if {[incr count] > 30} {
430			error "FAIL:\
431			    couldn't connect client1 within 30 seconds"
432		}
433		tclsleep 1
434		puts $m_5 "is_connected $c1port"
435	}
436
437	puts "\t\tRepmgr$tnum.test.h: Close master listener, verify no\
438	    takeover on master, election happens on client1."
439	close $m_3
440	# Election should be held before election delay.
441	tclsleep 2
442	set election_count [stat_field $c1_env rep_stat "Elections held"]
443	error_check_good c1_one_election_1 $election_count 1
444	tclsleep 2
445	set takeover_count [stat_field $m_env repmgr_stat \
446	    "Automatic replication process takeovers"]
447	error_check_good m_takeover_count_3 $takeover_count 2
448	close $m_4
449	close $m_5
450
451	# Test case 5: Test failed takeover.
452	# 2 sites, master and client1
453	# 2 master processes, m_6 (listener), m_7
454	# 1 client1 process, c1_2 (listener)
455	#
456	# Start two master processes m_6 and m_7.  Close m_6, verify client1
457	# delays the election.  Close m_7 before takeover succeeds, verify
458	# takeover fails and election finally happens on client1.
459	puts "\t\tRepmgr$tnum.test.i: A master process rejoins, should be\
460	    the listener."
461	set cmds {
462		"home $mdir"
463		"output $testdir/m_6_output"
464		"open_env"
465	}
466	set m_6 [open_site_prog [subst $cmds]]
467	puts $m_6 "start master"
468	error_check_match m_sub_ret_2 [gets $m_6] "*Successful*"
469	puts $m_6 "open_db test.db"
470	puts $m_6 "put k2 k2"
471	puts $m_6 "echo done"
472	gets $m_6
473
474	puts "\t\tRepmgr$tnum.test.j: Start a master subordinate process"
475	set cmds {
476		"home $mdir"
477		"output $testdir/m_7_output"
478		"open_env"
479	}
480	set m_7 [open_site_prog [subst $cmds]]
481	puts $m_7 "start master"
482	error_check_match m_sub_ret_1 [gets $m_7] "*DB_REP_IGNORE*"
483	# Pause to let m_7 connect to c1_2
484	tclsleep 3
485
486	puts "\t\tRepmgr$tnum.test.k: Close master processes to prevent\
487	    takeover, verify that election is delayed but finally happens"
488	close $m_6
489	set takeover_count [stat_field $m_env repmgr_stat \
490	    "Automatic replication process takeovers"]
491	error_check_good m_takeover_count_4 $takeover_count 2
492	set election_count [stat_field $c1_env rep_stat "Elections held"]
493	error_check_good c1_no_elections_3 $election_count 1
494	close $m_7
495	tclsleep 3
496	set election_count [stat_field $c1_env rep_stat "Elections held"]
497	error_check_good c1_one_election_2 $election_count 2
498
499	# Test case 6: Test one of subordinate processes succeeds in takeover.
500	# 2 sites, master and client1
501	# 1 master process, m_8 (listener)
502	# 3 client1 processes, c1_2 (listener), c1_3 and c1_4.
503	#
504	# Start master listener m_8 and two client1 processes c1_3 and c1_4.
505	# Close c1_2.  Verify takeover happens once.
506	puts "\t\tRepmgr$tnum.test.l: A master process rejoins, should be\
507	    master listener."
508	set cmds {
509		"home $mdir"
510		"output $testdir/m_8_output"
511		"open_env"
512	}
513	set m_8 [open_site_prog [subst $cmds]]
514	puts $m_8 "start master"
515	error_check_match m_sub_ret_4 [gets $m_8] "*Successful*"
516	puts $m_8 "open_db test.db"
517	puts $m_8 "put k3 k3"
518	puts $m_8 "echo done"
519	gets $m_8
520
521	puts "\t\tRepmgr$tnum.test.m: Start two processes on client1, close\
522	    client1 listener, verify takeover on client1."
523	set cmds {
524		"home $c1dir"
525		"output $testdir/c1_3_output"
526		"open_env"
527		"start client"
528	}
529	set c1_3 [open_site_prog [subst $cmds]]
530	set cmds {
531		"home $c1dir"
532		"output $testdir/c1_4_output"
533		"open_env"
534		"start client"
535	}
536	set c1_4 [open_site_prog [subst $cmds]]
537	close $c1_2
538	tclsleep 3
539	set takeover_count [stat_field $c1_env repmgr_stat \
540	    "Automatic replication process takeovers"]
541	error_check_good c1_takeover_count_2 $takeover_count 2
542
543	# Test case 7: Test no takeover on removed site.
544	# 2 sites, master and client1
545	# 1 master process, m_8 (listener)
546	# 2 client1 processes, c1_3 (listener), c1_4
547	#
548	# Remove client1.  Verify c1_4 doesn't take over listener role.
549	puts "\t\tRepmgr$tnum.test.n: Remove client1 and verify no takeover on\
550	    client1."
551	puts $m_8 "remove $hoststr $c1port"
552	await_condition {[expr [$m_env rep_get_nsites] == 1]}
553	tclsleep 3
554	set takeover_count [stat_field $c1_env repmgr_stat \
555	    "Automatic replication process takeovers"]
556	error_check_good c1_takeover_count_3 $takeover_count 2
557
558	$c1_env close
559	close $c1_3
560	close $c1_4
561
562	# Test case 8: Test takeover happens on a site with both subordinate
563	# rep-aware process and rep-unaware process.
564	# 3 sites, master, client2 and client3
565	# 3 master processes, m_8 (listener), m_9 (rep-aware) and
566	# m_10 (rep-unaware)
567	# 1 client2 process, c2_1 (listener)
568	# 1 client3 process, c3_1 (listener)
569	#
570	# Start listener process on client2 and client3, one rep-aware master
571	# process m_9 and another rep-unaware master process m_10.  Close
572	# master listener m_8.  Verify takeover happens on master and no
573	# election on client2 and client3.
574	puts "\t\tRepmgr$tnum.test.o: Add client2 and client3."
575	set cmds {
576		"home $c2dir"
577		"output $testdir/c2_1_output"
578		"open_env"
579		"start client"
580	}
581	set c2_1 [open_site_prog [subst $cmds]]
582	set cmds {
583		"home $c3dir"
584		"output $testdir/c3_1_output"
585		"open_env"
586		"start client"
587	}
588	set c3_1 [open_site_prog [subst $cmds]]
589	set c2_env [berkdb_env -home $c2dir]
590	await_startup_done $c2_env
591	set c3_env [berkdb_env -home $c3dir]
592	await_startup_done $c3_env
593
594	puts "\t\tRepmgr$tnum.test.p: Start a rep-aware and a rep-unaware\
595	    processes on master, close master listener, verify no election."
596	set cmds {
597		"home $mdir"
598		"output $testdir/m_9_output"
599		"open_env"
600		"start master"
601	}
602	set m_9 [open_site_prog [subst $cmds]]
603	tclsleep 3
604	puts $m_9 "is_connected $c2port"
605	error_check_good m_10_connected_c2_1 [gets $m_9] 1
606	puts $m_9 "is_connected $c3port"
607	error_check_good m_10_connected_c3_1 [gets $m_9] 1
608
609	set cmds {
610		"home $mdir"
611		"output $testdir/m_10_output"
612		"open_env"
613	}
614	set m_10 [open_site_prog [subst $cmds]]
615	puts $m_10 "open_db test.db"
616	puts $m_10 "put k4 k4"
617	puts $m_10 "echo done"
618	error_check_good m_10_put_done_k1 [gets $m_10] "done"
619
620	set count 0
621	puts $m_10 "is_connected $c2port"
622	while {! [gets $m_10]} {
623		if {[incr count] > 30} {
624			error "FAIL: couldn't connect c2_1 within 30 seconds"
625		}
626		tclsleep 1
627		puts $m_10 "is_connected $c2port"
628	}
629	set count 0
630	puts $m_10 "is_connected $c3port"
631	while {! [gets $m_10]} {
632		if {[incr count] > 30} {
633			error "FAIL: couldn't connect c3_1 within 30 seconds"
634		}
635		tclsleep 1
636		puts $m_10 "is_connected $c3port"
637	}
638
639	close $m_8
640	tclsleep 3
641	set election_count [stat_field $c2_env rep_stat "Elections held"]
642	error_check_good c2_no_elections_1 $election_count 0
643	set election_count [stat_field $c3_env rep_stat "Elections held"]
644	error_check_good c3_no_elections_1 $election_count 0
645	set takeover_count [stat_field $m_env repmgr_stat \
646	    "Automatic replication process takeovers"]
647	error_check_good m_takeover_count_5 $takeover_count 3
648
649	# Test case 9: Test election happens without listener candidate.
650	# 3 sites, master, client2 and client3
651	# 2 master processes, m_9 (listener), m_10 (rep-unaware)
652	# 1 client2 process, c2_1 (listener)
653	# 1 client3 process, c3_1 (listener)
654	#
655	# Close master listener m_9.  Verify no takeover on the master,
656	# election happens and end with new master.
657	puts "\t\tRepmgr$tnum.test.q: Close new master listener, verify that\
658	    election happens."
659	set old_master_id [stat_field $c2_env rep_stat "Master environment ID"]
660	close $m_9
661	tclsleep 2
662	set election_count [stat_field $c2_env rep_stat "Elections held"]
663	error_check_good c2_no_elections_2 $election_count 1
664	set election_count [stat_field $c3_env rep_stat "Elections held"]
665	error_check_good c3_no_elections_2 $election_count 1
666	tclsleep 2
667	set new_master_id [stat_field $c2_env rep_stat "Master environment ID"]
668	error_check_bad new_master $new_master_id $old_master_id
669	set takeover_count [stat_field $m_env repmgr_stat \
670	    "Automatic replication process takeovers"]
671	error_check_good m_takeover_count_6 $takeover_count 3
672
673	close $c2_1
674	close $c3_1
675	$m_env close
676	$c2_env close
677	$c3_env close
678	close $m_10
679}
680
681proc repmgr113_zero_nthreads { {tnum "113"} } {
682	global testdir
683	global ipversion
684
685	puts "\tRepmgr$tnum.zero.nthreads: Test automatic takeover by a\
686	    subordinate process configured with zero nthreads."
687	env_cleanup $testdir
688
689	set hoststr [get_hoststr $ipversion]
690	foreach {mport} [available_ports 1] {}
691	file mkdir [set mdir $testdir/MASTER]
692	make_dbconfig $mdir \
693	    [list [list repmgr_site $hoststr $mport db_local_site on]]
694	setup_repmgr_ssl $mdir
695
696	puts "\t\tRepmgr$tnum.zero.nthreads.a: Start master listener."
697	set cmds {
698		"home $mdir"
699		"output $testdir/m_1_output"
700		"open_env"
701		"start master"
702	}
703	set m_1 [open_site_prog [subst $cmds]]
704
705	puts "\t\tRepmgr$tnum.zero.nthreads.b: Start master subordinate process\
706	    configured with 0 message threads."
707	set m_2 [berkdb_env -home $mdir -txn -rep -thread -event -errpfx \
708	    "MASTER" -errfile $testdir/m_2_output]
709	$m_2 repmgr -local [list $hoststr $mport] -start master -msgth 0
710
711	puts "\t\tRepmgr$tnum.zero.nthreads.c: Close listener, verify takeover\
712	    happens in the subordinate process."
713	close $m_1
714	tclsleep 3
715	# Verify that the takeovers stat should show a takeover and there is
716	# no autotakeover_failed event.
717	set takeover_count [stat_field $m_2 repmgr_stat \
718	    "Automatic replication process takeovers"]
719	error_check_good m_takeover $takeover_count 1
720	set ev [find_event [$m_2 event_info] autotakeover_failed]
721	error_check_good m_no_autotakeover_failed [string length $ev] 0
722	set ev2 [find_event [$m_2 event_info] autotakeover]
723	error_check_good m_autotakeover_event [is_substr $ev2 "autotakeover"] 1
724	$m_2 close
725}
726
727proc repmgr113_prefmas { {tnum "113"} } {
728	global testdir
729	global ipversion
730
731	# Test case 10: Test listener takeover in preferred master repgroup.
732	# 2 sites, master and client
733	# 2 master processes, m_1 (listener) and m_2
734	# 2 client processes, c_1 (listener) and c_2
735	#
736	# Start all processes.  Perform a put from the initial master
737	# listener process m_1.  Stop client listener c_1.  Verify c_2
738	# takes over listener role on client.  Stop master listener m_1.
739	# Verify m_2 takes over listener role on master.  Perform another
740	# put from the post-takeover master listener process m_2.  Verify
741	# both puts are present on client.
742
743	puts "\tRepmgr$tnum.pm: Perform a takeover on each preferred\
744	    master site."
745	env_cleanup $testdir
746
747	set hoststr [get_hoststr $ipversion]
748	foreach {mport cport} [available_ports 2] {}
749	file mkdir [set mdir $testdir/MASTER]
750	file mkdir [set cdir $testdir/CLIENT]
751	# The "all" ack_policy guarantees that replication is complete before
752	# put operations return.
753	make_dbconfig $mdir \
754	    [list [list repmgr_site $hoststr $mport db_local_site on] \
755	    "rep_set_config db_repmgr_conf_prefmas_master on" \
756	    "repmgr_set_ack_policy db_repmgr_acks_all"]
757	make_dbconfig $cdir \
758	    [list [list repmgr_site $hoststr $cport db_local_site on] \
759	    [list repmgr_site $hoststr $mport db_bootstrap_helper on] \
760	    "rep_set_config db_repmgr_conf_prefmas_client on" \
761	    "repmgr_set_ack_policy db_repmgr_acks_all"]
762
763	setup_repmgr_ssl $mdir
764	setup_repmgr_ssl $cdir
765
766	puts "\t\tRepmgr$tnum.pm.a: Start master and client."
767	set cmds {
768		"home $mdir"
769		"output $testdir/m_1_output"
770		"open_env"
771		"start client"
772	}
773	set m_1 [open_site_prog [subst $cmds]]
774	set m_env [berkdb_env -home $mdir]
775	set cmds {
776		"home $cdir"
777		"output $testdir/c_1_output"
778		"open_env"
779		"start client"
780	}
781	set c_1 [open_site_prog [subst $cmds]]
782	set c_env [berkdb_env -home $cdir]
783	await_startup_done $c_env
784
785	puts "\t\tRepmgr$tnum.pm.b: Start a subordinate process on each site."
786	set cmds {
787		"home $mdir"
788		"output $testdir/m_2_output"
789		"open_env"
790		"start client"
791	}
792	set m_2 [open_site_prog [subst $cmds]]
793	set count 0
794	puts $m_2 "is_connected $cport"
795	while {! [gets $m_2]} {
796		if {[incr count] > 30} {
797			error "FAIL: couldn't connect to client\
798			    within 30 seconds"
799		}
800		tclsleep 1
801		puts $m_2 "is_connected $cport"
802	}
803	set cmds {
804		"home $cdir"
805		"output $testdir/c_2_output"
806		"open_env"
807		"start client"
808	}
809	set c_2 [open_site_prog [subst $cmds]]
810	set count 0
811	puts $c_2 "is_connected $mport"
812	while {! [gets $c_2]} {
813		if {[incr count] > 30} {
814			error "FAIL: couldn't connect to master\
815			    within 30 seconds"
816		}
817		tclsleep 1
818		puts $c_2 "is_connected $mport"
819	}
820
821	puts "\t\tRepmgr$tnum.pm.c: Perform a master put before takeovers."
822	puts $m_1 "open_db test.db"
823	puts $m_1 "put initKey initValue"
824	puts $m_1 "echo initPut"
825	set sentinel [gets $m_1]
826	error_check_good echo_initPut $sentinel "initPut"
827
828	puts "\t\tRepmgr$tnum.pm.d: Perform a client site takeover."
829	close $c_1
830	set count 0
831	set c_takeover_count [stat_field $c_env repmgr_stat \
832	    "Automatic replication process takeovers"]
833	while { $c_takeover_count < 1 } {
834		if {[incr count] > 30} {
835			error "FAIL: couldn't take over on client\
836			    in 30 seconds"
837		}
838		tclsleep 1
839		set c_takeover_count [stat_field $c_env repmgr_stat \
840		    "Automatic replication process takeovers"]
841	}
842	# Pause to refresh c_2 connection to m_1.
843	tclsleep 3
844	puts $c_2 "is_connected $mport"
845	while {! [gets $c_2]} {
846		if {[incr count] > 30} {
847			error "FAIL: couldn't connect to master\
848			    within 30 seconds"
849		}
850		tclsleep 1
851		puts $c_2 "is_connected $mport"
852	}
853
854	puts "\t\tRepmgr$tnum.pm.e: Perform a preferred master site takeover."
855	close $m_1
856	set count 0
857	set m_takeover_count [stat_field $m_env repmgr_stat \
858	    "Automatic replication process takeovers"]
859	while { $m_takeover_count < 1 } {
860		if {[incr count] > 30} {
861			error "FAIL: couldn't take over on master\
862			    in 30 seconds"
863		}
864		tclsleep 1
865		set m_takeover_count [stat_field $m_env repmgr_stat \
866		    "Automatic replication process takeovers"]
867	}
868	# Pause to let c_2 establish its main connection to new master
869	# listener process m_2.
870	tclsleep 3
871	puts $c_2 "is_connected $mport"
872	while {! [gets $c_2]} {
873		if {[incr count] > 30} {
874			error "FAIL: couldn't connect to master\
875			    within 30 seconds"
876		}
877		tclsleep 1
878		puts $c_2 "is_connected $mport"
879	}
880
881	puts "\t\tRepmgr$tnum.pm.f: Perform a master put after takeovers."
882	puts $m_2 "open_db test.db"
883	puts $m_2 "put tookoverKey tookoverValue"
884	puts $m_2 "echo tookoverPut"
885	set sentinel [gets $m_2]
886	error_check_good echo_tookoverPut $sentinel "tookoverPut"
887
888	puts "\t\tRepmgr$tnum.pm.g: Verify both master puts are on client."
889	puts $c_2 "open_db test.db"
890	set expected {{initKey initValue} {tookoverKey tookoverValue}}
891	verify_client_data $c_env test.db $expected
892
893	$c_env close
894	close $c_2
895	$m_env close
896	close $m_2
897}
898