test/tcl/env026.tcl

# Copyright (c) 2014, 2020 Oracle and/or its affiliates.  All rights reserved.
#
# See the file LICENSE for license information.
#
# $Id$
#
# TEST	env026
# TEST	Test reopening an environment after a panic.
# TEST
# TEST	Repeatedly panic the environment, close & reopen it in order to
# TEST	verify that a process is able to reopen the env and there are no
# TEST	major shmem/mmap "leaks"; malloc leaks will occur, and that's ok.
# TEST
# TEST  Since this test leaks memory, it is meant to be run standalone
# TEST  and should not be added to the automated Tcl test suite.

proc env026 { } {
	source ./include.tcl
	set tnum 026
	# Shmkey could be any value here.
	set shmkey 20

	puts "Env$tnum: Test reopening an environment after a panic."

	# Check that a process can reopen an environment after it panics, with
	# both mmap'd regions and -system_mem shared memory segments.
	set reopenlimit 10
	env026_reopen $reopenlimit $shmkey

	# Detect file descriptor limit. Set reopen times to fdlimit + 1.
	if { $is_windows_test == 1 } {
		# In fact, there is no fixed handle limit in Windows.
		# Windows always allocates a handle in the handle table of the
		# application's process and returns the handle value.
		# The hard-coded limitation for a user handle is set to
		# 10,000 by default. It is defined in:
		# HKEY_LOCAL_MACHINE\Software\Microsoft\WindowsNT\
		# CurrentVersion\Windows\USERProcessHandleQuota.
		puts "\tEnv$tnum: Use default fd limit:10000"
		set reopenlimit 10000
	} else {
		set fdlimit ""
		# use 'ulimit -n' to get fd limit on linux, freebsd and solaris.
		error_check_good getFDlimit [catch {eval exec \
		    "echo \"ulimit -n\" | bash" } fdlimit] 0
		puts "\tEnv$tnum: fd limit:$fdlimit"
		set reopenlimit $fdlimit
	}
	incr reopenlimit
	env026_reopen $reopenlimit $shmkey

	# Detect SHMALL and SHMMAX, then run subtest with cachesize at
	# (SHMALL * kernel pagesize) or SHMMAX.
	set shmall 0
	set shmmax 0
	set kernel_pgsize 0
	set cache_size 0
	if { $is_linux_test == 1 } {
		error_check_good getSHMALL [catch {eval exec \
		    "cat /proc/sys/kernel/shmall"} shmall] \
		    0
		error_check_good getSHMMAX [catch {eval exec \
		    "cat /proc/sys/kernel/shmmax"} shmmax] \
		    0
		error_check_good getPGSIZE [catch {eval exec \
		    "getconf PAGE_SIZE"} kernel_pgsize]\
		    0
	}
	if { $is_osx_test == 1 } {
		error_check_good getSHMALL [catch {eval exec \
		    "sysctl -n kern.sysv.shmall"} \
		    shmall] 0
		error_check_good getSHMMAX [catch {eval exec \
		    "sysctl -n kern.sysv.shmmax"} \
		    shmmax] 0
		error_check_good getPGSIZE [catch {eval exec \
		    "getconf PAGE_SIZE"} kernel_pgsize]\
		    0
	}
	if { $is_freebsd_test == 1 } {
		error_check_good getSHMALL [catch {eval exec \
		    "sysctl -n kern.ipc.shmall"} \
		    shmall] 0
		error_check_good getSHMMAX [catch {eval exec \
		    "sysctl -n kern.ipc.shmmax"} \
		    shmmax] 0
		error_check_good getPGSIZE [catch {eval exec \
		    "getconf PAGE_SIZE"} kernel_pgsize]\
		    0
	}
	if { $is_sunos_test == 1 } {
		# Cannot get shmall from solaris. Just query shmmax here.
		error_check_good getSHMMAX [catch {eval exec \
	            "prctl -n project.max-shm-memory -i \
		    project default | grep privileged | \
	            awk \"{print \\\$2}\""} \
		    shmmax] 0
		# Shmmax on solaris is in format of "x.xxGB".
		error_check_good checkSHMMAX [is_substr $shmmax "GB"] 1
		# Convert shmmax, from GB unit to bytes.
		set endpos [expr [string length $shmmax] - \
		    [string length "GB"] - 1]
		set shmmax [string range $shmmax 0 $endpos]
		# Round up the shmmax.
		set shmmax [expr int($shmmax) + 1]
		# Use bc, in case of shmmax is out of Tcl integer range.
		error_check_good computeSHMMAX [catch {eval exec \
		    "echo \"$shmmax * 1024 * 1024 * 1024\" | bc"} shmmax] 0
		error_check_good getPGSIZE [catch {eval exec \
		    "getconf PAGE_SIZE"} kernel_pgsize]\
		    0
	}
	puts "\tEnv$tnum: shmall:$shmall, shmmax:$shmmax,\
	    kernel pgsize:$kernel_pgsize"
	# Choose the bigger one for cache_size.
	set cache_size [expr $shmall * $kernel_pgsize]
	if {$cache_size < $shmmax} {
		set cache_size $shmmax
	}
	# Enlarge cache_size to exceed maximum allowed cache size.
	if { $is_sunos_test == 1 } {
		# In Solaris, there is no specific shmmax so just enlarge
		# cache size to hit its swap space.
		error_check_good enlargeCachesize [catch {eval exec \
		    "echo \"$cache_size * 30\" | bc"} cache_size] 0
	} else {
		error_check_good enlargeCachesize [catch {eval exec \
		    "echo \"$cache_size * 5 / 4\" | bc"} cache_size] 0
	}
	puts "\tEnv$tnum: cache size is set to be $cache_size."
	if { ![catch {env026_reopen 1 $shmkey $cache_size}] } {
		puts "FAIL: large cache size does not lead to a failure."
	} else {
		puts "\tEnv$tnum: Get failure as expected."
	}
}

# Env026_reopen tests that a process can reopen environment after a panic,
# without needed to start a new process. Usually it runs for a few iterations,
# but a "leak" test would run for hundreds or thousands of iterations, in order
# to reach file descriptor and shared memory limits. Some places to find them are:
# Oracle Enterprise Linux: limit or ulimit; /proc/sys/kernel/shmmni
# Solaris: prctl -n process.max-file-descriptor | project.max-shm-ids $$
proc env026_reopen { { reopenlimit 10 } { shmkey 0 } {cache_size 0}} {
	source ./include.tcl

	set tnum 026
	set testfile TESTFILE
	set key KEY_REOPEN
	set data DATA_REOPEN

	env_cleanup $testdir
	set envopen [list -create -home $testdir -txn -register -recover ]
	lappend envopen -errfile "$testdir/errfile"
	if { $cache_size != 0} {
		set GB [expr 1024 * 1024 * 1024]
		set gbytes [expr int($cache_size / $GB)]
		set bytes [expr $cache_size % $GB]
		# Cache number could be any integer, but each cache
		# should be less than 4GB.
		set cachenum [expr $gbytes + 1]
		lappend envopen -cachesize "$gbytes $bytes $cachenum"
		puts "\tEnv$tnum: cache parameter:$gbytes $bytes $cachenum"
	}
	set shmmesg ""
	if { $shmkey != 0 } {
		lappend envopen -system_mem -shm_key $shmkey
		set shmmesg " with a shared memory key of $shmkey"
	}
	puts "\tEnv$tnum: Reopen panic'ed env $reopenlimit times$shmmesg."
	env_cleanup $testdir
	for {set reopen 0} {$reopen < $reopenlimit} {incr reopen} {
		set env [ berkdb_env {*}$envopen -errpfx "ENV026 #$reopen" ]
		# Verify that the open of the environment ran recovery by
		# checking that no txns have been created.
		error_check_good "Env$tnum #$reopen: detect-recovery" \
		    [getstats [$env txn_stat] {Number txns begun}] 0
		set txn [$env txn]
		error_check_good \
		    "Env$tnum: #$reopen txn" [is_valid_txn $txn $env] TRUE

		# The db open needs to be the "_noerr" version; the plain
		# version overrides the -errfile specification on the env.
		set db [eval {berkdb_open_noerr -env $env -create -mode 0644} \
		    -auto_commit {-btree $testfile}  ]
		error_check_good \
		    "Env$tnum: #$reopen db open" [is_valid_db $db] TRUE
		set ret [eval {$db put}  $key $data]
		error_check_good "Env$tnum: #$reopen put($key,$data)" $ret 0
		set dbc [eval {$db cursor} -txn $txn]
		error_check_good "Env$tnum: #$reopen db cursor" \
		    [is_valid_cursor $dbc $db] TRUE
		set ret [ catch {$env set_flags -panic on} res ]
		# This intentionally does not close the cursor, db, or txn.
		# We want to test that a slight faulty app doesn't crash.
		if {[catch {eval [$env close]} ret] == 0} {
			puts "Env$tnum: #$reopen close didn't panic: $ret"
		}

		if {$reopen > 0 && $reopen % 20 == 0} {
			puts "\t\tEnv$tnum: reopen times:$reopen "
		}
	}
	set env [ berkdb_env_noerr {*}$envopen ]
	error_check_good "Env$tnum final recovery check" \
	    [getstats [$env txn_stat] {Number txns begun}] 0
	puts "\tEnv$tnum: #$reopen Each reopen after a panic succeeded."
}