1# Copyright (c) 2014, 2020 Oracle and/or its affiliates.  All rights reserved.
2#
3# See the file LICENSE for license information.
4#
5# $Id$
6#
7# TEST	env026
8# TEST	Test reopening an environment after a panic.
9# TEST
10# TEST	Repeatedly panic the environment, close & reopen it in order to
11# TEST	verify that a process is able to reopen the env and there are no
12# TEST	major shmem/mmap "leaks"; malloc leaks will occur, and that's ok.
13# TEST
14# TEST  Since this test leaks memory, it is meant to be run standalone
15# TEST  and should not be added to the automated Tcl test suite.
16
17proc env026 { } {
18	source ./include.tcl
19	set tnum 026
20	# Shmkey could be any value here.
21	set shmkey 20
22
23	puts "Env$tnum: Test reopening an environment after a panic."
24
25	# Check that a process can reopen an environment after it panics, with
26	# both mmap'd regions and -system_mem shared memory segments.
27	set reopenlimit 10
28	env026_reopen $reopenlimit $shmkey
29
30	# Detect file descriptor limit. Set reopen times to fdlimit + 1.
31	if { $is_windows_test == 1 } {
32		# In fact, there is no fixed handle limit in Windows.
33		# Windows always allocates a handle in the handle table of the
34		# application's process and returns the handle value.
35		# The hard-coded limitation for a user handle is set to
36		# 10,000 by default. It is defined in:
37		# HKEY_LOCAL_MACHINE\Software\Microsoft\WindowsNT\
38		# CurrentVersion\Windows\USERProcessHandleQuota.
39		puts "\tEnv$tnum: Use default fd limit:10000"
40		set reopenlimit 10000
41	} else {
42		set fdlimit ""
43		# use 'ulimit -n' to get fd limit on linux, freebsd and solaris.
44		error_check_good getFDlimit [catch {eval exec \
45		    "echo \"ulimit -n\" | bash" } fdlimit] 0
46		puts "\tEnv$tnum: fd limit:$fdlimit"
47		set reopenlimit $fdlimit
48	}
49	incr reopenlimit
50	env026_reopen $reopenlimit $shmkey
51
52	# Detect SHMALL and SHMMAX, then run subtest with cachesize at
53	# (SHMALL * kernel pagesize) or SHMMAX.
54	set shmall 0
55	set shmmax 0
56	set kernel_pgsize 0
57	set cache_size 0
58	if { $is_linux_test == 1 } {
59		error_check_good getSHMALL [catch {eval exec \
60		    "cat /proc/sys/kernel/shmall"} shmall] \
61		    0
62		error_check_good getSHMMAX [catch {eval exec \
63		    "cat /proc/sys/kernel/shmmax"} shmmax] \
64		    0
65		error_check_good getPGSIZE [catch {eval exec \
66		    "getconf PAGE_SIZE"} kernel_pgsize]\
67		    0
68	}
69	if { $is_osx_test == 1 } {
70		error_check_good getSHMALL [catch {eval exec \
71		    "sysctl -n kern.sysv.shmall"} \
72		    shmall] 0
73		error_check_good getSHMMAX [catch {eval exec \
74		    "sysctl -n kern.sysv.shmmax"} \
75		    shmmax] 0
76		error_check_good getPGSIZE [catch {eval exec \
77		    "getconf PAGE_SIZE"} kernel_pgsize]\
78		    0
79	}
80	if { $is_freebsd_test == 1 } {
81		error_check_good getSHMALL [catch {eval exec \
82		    "sysctl -n kern.ipc.shmall"} \
83		    shmall] 0
84		error_check_good getSHMMAX [catch {eval exec \
85		    "sysctl -n kern.ipc.shmmax"} \
86		    shmmax] 0
87		error_check_good getPGSIZE [catch {eval exec \
88		    "getconf PAGE_SIZE"} kernel_pgsize]\
89		    0
90	}
91	if { $is_sunos_test == 1 } {
92		# Cannot get shmall from solaris. Just query shmmax here.
93		error_check_good getSHMMAX [catch {eval exec \
94	            "prctl -n project.max-shm-memory -i \
95		    project default | grep privileged | \
96	            awk \"{print \\\$2}\""} \
97		    shmmax] 0
98		# Shmmax on solaris is in format of "x.xxGB".
99		error_check_good checkSHMMAX [is_substr $shmmax "GB"] 1
100		# Convert shmmax, from GB unit to bytes.
101		set endpos [expr [string length $shmmax] - \
102		    [string length "GB"] - 1]
103		set shmmax [string range $shmmax 0 $endpos]
104		# Round up the shmmax.
105		set shmmax [expr int($shmmax) + 1]
106		# Use bc, in case of shmmax is out of Tcl integer range.
107		error_check_good computeSHMMAX [catch {eval exec \
108		    "echo \"$shmmax * 1024 * 1024 * 1024\" | bc"} shmmax] 0
109		error_check_good getPGSIZE [catch {eval exec \
110		    "getconf PAGE_SIZE"} kernel_pgsize]\
111		    0
112	}
113	puts "\tEnv$tnum: shmall:$shmall, shmmax:$shmmax,\
114	    kernel pgsize:$kernel_pgsize"
115	# Choose the bigger one for cache_size.
116	set cache_size [expr $shmall * $kernel_pgsize]
117	if {$cache_size < $shmmax} {
118		set cache_size $shmmax
119	}
120	# Enlarge cache_size to exceed maximum allowed cache size.
121	if { $is_sunos_test == 1 } {
122		# In Solaris, there is no specific shmmax so just enlarge
123		# cache size to hit its swap space.
124		error_check_good enlargeCachesize [catch {eval exec \
125		    "echo \"$cache_size * 30\" | bc"} cache_size] 0
126	} else {
127		error_check_good enlargeCachesize [catch {eval exec \
128		    "echo \"$cache_size * 5 / 4\" | bc"} cache_size] 0
129	}
130	puts "\tEnv$tnum: cache size is set to be $cache_size."
131	if { ![catch {env026_reopen 1 $shmkey $cache_size}] } {
132		puts "FAIL: large cache size does not lead to a failure."
133	} else {
134		puts "\tEnv$tnum: Get failure as expected."
135	}
136}
137
138# Env026_reopen tests that a process can reopen environment after a panic,
139# without needed to start a new process. Usually it runs for a few iterations,
140# but a "leak" test would run for hundreds or thousands of iterations, in order
141# to reach file descriptor and shared memory limits. Some places to find them are:
142# Oracle Enterprise Linux: limit or ulimit; /proc/sys/kernel/shmmni
143# Solaris: prctl -n process.max-file-descriptor | project.max-shm-ids $$
144proc env026_reopen { { reopenlimit 10 } { shmkey 0 } {cache_size 0}} {
145	source ./include.tcl
146
147	set tnum 026
148	set testfile TESTFILE
149	set key KEY_REOPEN
150	set data DATA_REOPEN
151
152	env_cleanup $testdir
153	set envopen [list -create -home $testdir -txn -register -recover ]
154	lappend envopen -errfile "$testdir/errfile"
155	if { $cache_size != 0} {
156		set GB [expr 1024 * 1024 * 1024]
157		set gbytes [expr int($cache_size / $GB)]
158		set bytes [expr $cache_size % $GB]
159		# Cache number could be any integer, but each cache
160		# should be less than 4GB.
161		set cachenum [expr $gbytes + 1]
162		lappend envopen -cachesize "$gbytes $bytes $cachenum"
163		puts "\tEnv$tnum: cache parameter:$gbytes $bytes $cachenum"
164	}
165	set shmmesg ""
166	if { $shmkey != 0 } {
167		lappend envopen -system_mem -shm_key $shmkey
168		set shmmesg " with a shared memory key of $shmkey"
169	}
170	puts "\tEnv$tnum: Reopen panic'ed env $reopenlimit times$shmmesg."
171	env_cleanup $testdir
172	for {set reopen 0} {$reopen < $reopenlimit} {incr reopen} {
173		set env [ berkdb_env {*}$envopen -errpfx "ENV026 #$reopen" ]
174		# Verify that the open of the environment ran recovery by
175		# checking that no txns have been created.
176		error_check_good "Env$tnum #$reopen: detect-recovery" \
177		    [getstats [$env txn_stat] {Number txns begun}] 0
178		set txn [$env txn]
179		error_check_good \
180		    "Env$tnum: #$reopen txn" [is_valid_txn $txn $env] TRUE
181
182		# The db open needs to be the "_noerr" version; the plain
183		# version overrides the -errfile specification on the env.
184		set db [eval {berkdb_open_noerr -env $env -create -mode 0644} \
185		    -auto_commit {-btree $testfile}  ]
186		error_check_good \
187		    "Env$tnum: #$reopen db open" [is_valid_db $db] TRUE
188		set ret [eval {$db put}  $key $data]
189		error_check_good "Env$tnum: #$reopen put($key,$data)" $ret 0
190		set dbc [eval {$db cursor} -txn $txn]
191		error_check_good "Env$tnum: #$reopen db cursor" \
192		    [is_valid_cursor $dbc $db] TRUE
193		set ret [ catch {$env set_flags -panic on} res ]
194		# This intentionally does not close the cursor, db, or txn.
195		# We want to test that a slight faulty app doesn't crash.
196		if {[catch {eval [$env close]} ret] == 0} {
197			puts "Env$tnum: #$reopen close didn't panic: $ret"
198		}
199
200		if {$reopen > 0 && $reopen % 20 == 0} {
201			puts "\t\tEnv$tnum: reopen times:$reopen "
202		}
203	}
204	set env [ berkdb_env_noerr {*}$envopen ]
205	error_check_good "Env$tnum final recovery check" \
206	    [getstats [$env txn_stat] {Number txns begun}] 0
207	puts "\tEnv$tnum: #$reopen Each reopen after a panic succeeded."
208}
209