1# Copyright (c) 2013, 2020 Oracle and/or its affiliates.  All rights reserved.
2#
3# See the file LICENSE for license information.
4#
5# $Id$
6#
7# TEST	repmgr039
8# TEST	repmgr duplicate master test.
9# TEST
10# TEST	This test verifies repmgr's automatic dupmaster resolution.  It
11# TEST	uses the repmgr test hook to prevent sending heartbeats and
12# TEST	2SITE_STRICT=off to enable the client to become a master in
13# TEST	parallel with the already-established master.  After rescinding
14# TEST	the test hook, it makes sure repmgr performs its dupmaster resolution
15# TEST	process resulting in the expected winner.
16# TEST
17# TEST	This test runs in the following configurations:
18# TEST	    Default elections where master generation helps determine winner
19# TEST	    The undocumented DB_REP_CONF_ELECT_LOGLENGTH election option
20# TEST	    A Preferred Master replication group
21# TEST
22# TEST	Run for btree only because access method shouldn't matter.
23# TEST
24proc repmgr039 { { niter 100 } { tnum "039" } args } {
25
26	source ./include.tcl
27
28	if { $is_freebsd_test == 1 } {
29		puts "Skipping replication manager test on FreeBSD platform."
30		return
31	}
32
33	set method "btree"
34	set args [convert_args $method $args]
35
36	#
37	# Run for the default case where master generation takes precedence
38	# over log length for the election winner, and for the undocumented
39	# option to base the election winner on log length without considering
40	# the master generation.  Also run to test dupmaster operation in
41	# preferred master mode.
42	#
43	# Add more data to one site or the other during the dupmaster.
44	#
45	set electopts { mastergen loglength prefmas }
46	set moredataopts { master client }
47	foreach e $electopts {
48		foreach m $moredataopts {
49			puts "Repmgr$tnum ($method $e $m): repmgr duplicate\
50			    master test."
51			repmgr039_sub $method $niter $tnum $e $m $args
52		}
53	}
54}
55
56proc repmgr039_sub { method niter tnum electopt moredataopt largs } {
57	global testdir
58	global rep_verbose
59	global verbose_type
60	global ipversion
61	set nsites 2
62
63	set verbargs ""
64	if { $rep_verbose == 1 } {
65		set verbargs " -verbose {$verbose_type on} "
66	}
67
68	env_cleanup $testdir
69	set hoststr [get_hoststr $ipversion]
70	set ports [available_ports $nsites]
71
72	# Heartbeat timeout values.
73	set hbsend 500000
74	set hbmon 1100000
75	# Extra fast connection retry timeout for prompt dupmaster resolution.
76	set connretry 500000
77	set big_iter [expr $niter * 2]
78
79	set masterdir $testdir/MASTERDIR
80	set clientdir $testdir/CLIENTDIR
81
82	file mkdir $masterdir
83	file mkdir $clientdir
84
85	setup_repmgr_ssl $masterdir
86	setup_repmgr_ssl $clientdir
87
88	# Open a master.
89	puts "\tRepmgr$tnum.a: Start master."
90	set ma_envcmd "berkdb_env_noerr -create $verbargs \
91	    -errpfx MASTER -home $masterdir -txn -rep -thread -event"
92	set masterenv [eval $ma_envcmd]
93	set role master
94	if { $electopt == "loglength" } {
95		$masterenv rep_config {electloglength on}
96	}
97	if { $electopt == "prefmas" } {
98		# Both preferred master sites (master and client) must use
99		# the -client option to start to allow the preferred master
100		# startup sequence in the code to control which site becomes
101		# master.
102		set role client
103		$masterenv rep_config {mgrprefmasmaster on}
104	}
105	$masterenv repmgr -ack all \
106	    -local [list $hoststr [lindex $ports 0]] \
107	    -timeout [list heartbeat_send $hbsend] \
108	    -timeout [list heartbeat_monitor $hbmon] \
109	    -timeout [list connection_retry $connretry] \
110	    -start $role
111	if { $electopt != "prefmas" } {
112		$masterenv rep_config {mgr2sitestrict off}
113	}
114	await_expected_master $masterenv
115
116	# Open a client
117	puts "\tRepmgr$tnum.b: Start client."
118	set cl_envcmd "berkdb_env_noerr -create $verbargs \
119	    -errpfx CLIENT -home $clientdir -txn -rep -thread -event"
120	set clientenv [eval $cl_envcmd]
121	if { $electopt == "loglength" } {
122		$clientenv rep_config {electloglength on}
123	}
124	if { $electopt == "prefmas" } {
125		$clientenv rep_config {mgrprefmasclient on}
126	}
127	$clientenv repmgr -ack all \
128	    -local [list $hoststr [lindex $ports 1]] \
129	    -remote [list $hoststr [lindex $ports 0]] \
130	    -timeout [list heartbeat_send $hbsend] \
131	    -timeout [list heartbeat_monitor $hbmon] \
132	    -timeout [list connection_retry $connretry] \
133	    -start client
134	if { $electopt != "prefmas" } {
135		$clientenv rep_config {mgr2sitestrict off}
136	}
137	await_startup_done $clientenv
138
139	#
140	# Use of -ack all guarantees that replication is complete before the
141	# repmgr send function returns and rep_test finishes.
142	#
143	puts "\tRepmgr$tnum.c: Run first set of transactions at master."
144	set start 0
145	eval rep_test $method $masterenv NULL $niter $start 0 0 $largs
146	incr start $niter
147
148	# Set up expected winner and loser after the dupmaster.
149	if { ($electopt == "loglength" && $moredataopt == "master") ||
150	    $electopt == "prefmas" } {
151		# For loglength, the master should win when it has more data.
152		# For preferred master, the master's data is always retained.
153		set winenv $masterenv
154		set windir $masterenv
155		set loseenv $clientenv
156		set losedir $clientdir
157	} else {
158		# For mastergen, client always wins regardless of data size.
159		# For loglength, the client should win when it has more data.
160		set winenv $clientenv
161		set windir $clientdir
162		set loseenv $masterenv
163		set losedir $masterdir
164	}
165	# Set up amount of data at each site during dupmaster.
166	if { $moredataopt == "master" } {
167		set m_iter $big_iter
168		set c_iter $niter
169	} else {
170		set m_iter $niter
171		set c_iter $big_iter
172	}
173
174	puts "\tRepmgr$tnum.d: Enable test hook to prevent heartbeats."
175	$masterenv test abort repmgr_heartbeat
176	$clientenv test abort repmgr_heartbeat
177	#
178	# Make sure client site also becomes a master.  This indicates
179	# that we have the needed dupmaster condition.
180	#
181	await_expected_master $clientenv
182
183	puts "\tRepmgr$tnum.e: Run transactions at each site, more on\
184	    $moredataopt."
185	eval rep_test $method $masterenv NULL $m_iter $start 0 0 $largs
186	eval rep_test $method $clientenv NULL $c_iter $start 0 0 $largs
187	incr start $big_iter
188
189	if { $electopt == "prefmas" } {
190		# Restart temporary master a varying number of times to test
191		# the preferred master site's ability to catch up with multiple
192		# temporary master generations.
193		set num_restarts [berkdb random_int 0 3]
194		puts "\tRepmgr$tnum.e1: Perform $num_restarts additional\
195		    temporary master restart(s)."
196		for { set i 0 } { $i < $num_restarts } { incr i } {
197			error_check_good client_close [$clientenv close] 0
198			set clientenv [eval $cl_envcmd]
199			$clientenv rep_config {mgrprefmasclient on}
200			$clientenv test abort repmgr_heartbeat
201			$clientenv repmgr -ack all \
202			    -local [list $hoststr [lindex $ports 1]] \
203			    -remote [list $hoststr [lindex $ports 0]] \
204			    -timeout [list heartbeat_send $hbsend] \
205			    -timeout [list heartbeat_monitor $hbmon] \
206			    -timeout [list connection_retry $connretry] \
207			    -start client
208			await_expected_master $clientenv
209		}
210		set loseenv $clientenv
211	} else {
212		# Depending on thread ordering, some reconnection and
213		# dupmaster scenarios can have initial elections that don't
214		# count both votes because one site still needs to update its
215		# gen.  When this happens, the wrong site can win the election
216		# with only its own vote unless we turn on 2site_strict.
217		$masterenv rep_config {mgr2sitestrict on}
218		$clientenv rep_config {mgr2sitestrict on}
219	}
220
221	puts "\tRepmgr$tnum.f: Rescind test hook to prevent heartbeats."
222	$masterenv test abort none
223	$clientenv test abort none
224	#
225	# Pause to allow time to for dupmaster to be noticed on both sites and
226	# for the resulting election to occur.
227	#
228	tclsleep 3
229
230	# Check for expected winner after the dupmaster resolution.
231	await_expected_master $winenv
232	await_startup_done $loseenv
233
234	puts "\tRepmgr$tnum.g: Run final set of transactions at winner."
235	eval rep_test $method $winenv NULL $niter $start 0 0 $largs
236	incr start $niter
237
238	puts "\tRepmgr$tnum.h: Verify dupmaster event on each site."
239	# Needed to process some messages to see the dupmaster event.
240	error_check_good dupmaster_event2 \
241	    [is_event_present $masterenv dupmaster] 1
242	error_check_good dupmaster_event \
243	    [is_event_present $clientenv dupmaster] 1
244
245	puts "\tRepmgr$tnum.i: Verify loser's database contents."
246	rep_verify $windir $winenv $losedir $loseenv 1 1 1
247
248	error_check_good client_close [$clientenv close] 0
249	error_check_good masterenv_close [$masterenv close] 0
250}
251