db/test/run.test

#!/bin/sh -
#
# Copyright (c) 1992 The Regents of the University of California.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. All advertising materials mentioning features or use of this software
#    must display the following acknowledgement:
#	This product includes software developed by the University of
#	California, Berkeley and its contributors.
# 4. Neither the name of the University nor the names of its contributors
#    may be used to endorse or promote products derived from this software
#    without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#	@(#)run.test	5.17 (Berkeley) 5/22/93
#

# db regression tests

main()
{
#	DICT=/usr/share/dict/words
	DICT=/usr/share/dict/web2
	PROG=obj/dbtest
	TMP1=t1
	TMP2=t2
	TMP3=t3

	test1
	test2
	test3
	test4
	test5
	test6
	test7
	test8
	test9
	test10
	test11
	test12
	test13
	test20
	rm -f $TMP1 $TMP2 $TMP3
	exit 0
}

# Take the first hundred entries in the dictionary, and make them
# be key/data pairs.
test1()
{
	printf "Test 1: btree, hash: small key, small data pairs\n"
	sed 200q $DICT > $TMP1
	for type in btree hash; do
		rm -f $TMP2 $TMP3
		for i in `sed 200q $DICT`; do
			printf "p\nk%s\nd%s\ng\nk%s\n" $i $i $i
		done > $TMP2
		$PROG -o $TMP3 $type $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test1: type %s: failed\n" $type
			exit 1
		fi
	done
	printf "Test 1: recno: small key, small data pairs\n"
	rm -f $TMP2 $TMP3
	sed 200q $DICT |
	awk '{
		++i;
		printf("p\nk%d\nd%s\ng\nk%d\n", i, $0, i);
	}' > $TMP2
	$PROG -o $TMP3 recno $TMP2
	if (cmp -s $TMP1 $TMP3) ; then :
	else
		printf "test1: type recno: failed\n"
		exit 1
	fi
}

# Take the first 200 entries in the dictionary, and give them
# each a medium size data entry.
test2()
{
	printf "Test 2: btree, hash: small key, medium data pairs\n"
	mdata=abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz
	echo $mdata |
	awk '{ for (i = 1; i < 201; ++i) print $0 }' > $TMP1
	for type in hash btree; do
		rm -f $TMP2 $TMP3
		for i in `sed 200q $DICT`; do
			printf "p\nk%s\nd%s\ng\nk%s\n" $i $mdata $i
		done > $TMP2
		$PROG -o $TMP3 $type $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test2: type %s: failed\n" $type
			exit 1
		fi
	done
	printf "Test 2: recno: small key, medium data pairs\n"
	rm -f $TMP2 $TMP3
	echo $mdata |
	awk '{  for (i = 1; i < 201; ++i)
		printf("p\nk%d\nd%s\ng\nk%d\n", i, $0, i);
	}' > $TMP2
	$PROG -o $TMP3 recno $TMP2
	if (cmp -s $TMP1 $TMP3) ; then :
	else
		printf "test2: type recno: failed\n"
		exit 1
	fi
}

# Insert the programs in /bin with their paths as their keys.
test3()
{
	printf "Test 3: hash: small key, big data pairs\n"
	rm -f $TMP1
	(find /bin -type f -print | xargs cat) > $TMP1
	for type in hash; do
		rm -f $TMP2 $TMP3
		for i in `find /bin -type f -print`; do
			printf "p\nk%s\nD%s\ng\nk%s\n" $i $i $i
		done > $TMP2
		$PROG -o $TMP3 $type $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test3: %s: page size %d: failed\n" \
			    $type $psize
			exit 1
		fi
	done
	printf "Test 3: btree: small key, big data pairs\n"
	for psize in 512 16384 65536; do
		printf "\tpage size %d\n" $psize
		for type in btree; do
			rm -f $TMP2 $TMP3
			for i in `find /bin -type f -print`; do
				printf "p\nk%s\nD%s\ng\nk%s\n" $i $i $i
			done > $TMP2
			$PROG -i psize=$psize -o $TMP3 $type $TMP2
			if (cmp -s $TMP1 $TMP3) ; then :
			else
				printf "test3: %s: page size %d: failed\n" \
				    $type $psize
				exit 1
			fi
		done
	done
	printf "Test 3: recno: big data pairs\n"
	rm -f $TMP2 $TMP3
	find /bin -type f -print |
	awk '{
		++i;
		printf("p\nk%d\nD%s\ng\nk%d\n", i, $0, i);
	}' > $TMP2
	for psize in 512 16384 65536; do
		printf "\tpage size %d\n" $psize
		$PROG -i psize=$psize -o $TMP3 recno $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test3: recno: page size %d: failed\n" $psize
			exit 1
		fi
	done
}

# Do random recno entries.
test4()
{
	printf "Test 4: recno: random entries\n"
	echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" |
	awk '{
		for (i = 37; i <= 37 + 88 * 17; i += 17)
			printf("input key %d: %.*s\n", i, i % 41, $0);
		for (i = 1; i <= 15; ++i)
			printf("input key %d: %.*s\n", i, i % 41, $0);
		for (i = 19234; i <= 19234 + 61 * 27; i += 27)
			printf("input key %d: %.*s\n", i, i % 41, $0);
		exit
	}' > $TMP1
	rm -f TMP2 $TMP3
	cat $TMP1 |
	awk 'BEGIN {
			i = 37;
			incr = 17;
		}
		{
			printf("p\nk%d\nd%s\n", i, $0);
			if (i == 19234 + 61 * 27)
				exit;
			if (i == 37 + 88 * 17) {
				i = 1;
				incr = 1;
			} else if (i == 15) {
				i = 19234;
				incr = 27;
			} else
				i += incr;
		}
		END {
			for (i = 37; i <= 37 + 88 * 17; i += 17)
				printf("g\nk%d\n", i);
			for (i = 1; i <= 15; ++i)
				printf("g\nk%d\n", i);
			for (i = 19234; i <= 19234 + 61 * 27; i += 27)
				printf("g\nk%d\n", i);
		}' > $TMP2
	$PROG -o $TMP3 recno $TMP2
	if (cmp -s $TMP1 $TMP3) ; then :
	else
		printf "test4: type recno: failed\n"
		exit 1
	fi
}

# Do reverse order recno entries.
test5()
{
	printf "Test 5: recno: reverse order entries\n"
	echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" |
	awk ' {
		for (i = 1500; i; --i)
			printf("input key %d: %.*s\n", i, i % 34, $0);
		exit;
	}' > $TMP1
	rm -f TMP2 $TMP3
	cat $TMP1 |
	awk 'BEGIN {
			i = 1500;
		}
		{
			printf("p\nk%d\nd%s\n", i, $0);
			--i;
		}
		END {
			for (i = 1500; i; --i)
				printf("g\nk%d\n", i);
		}' > $TMP2
	$PROG -o $TMP3 recno $TMP2
	if (cmp -s $TMP1 $TMP3) ; then :
	else
		printf "test5: type recno: failed\n"
		exit 1
	fi
}

# Do alternating order recno entries.
test6()
{
	printf "Test 6: recno: alternating order entries\n"
	echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" |
	awk ' {
		for (i = 1; i < 1200; i += 2)
			printf("input key %d: %.*s\n", i, i % 34, $0);
		for (i = 2; i < 1200; i += 2)
			printf("input key %d: %.*s\n", i, i % 34, $0);
		exit;
	}' > $TMP1
	rm -f TMP2 $TMP3
	cat $TMP1 |
	awk 'BEGIN {
			i = 1;
			even = 0;
		}
		{
			printf("p\nk%d\nd%s\n", i, $0);
			i += 2;
			if (i >= 1200) {
				if (even == 1)
					exit;
				even = 1;
				i = 2;
			}
		}
		END {
			for (i = 1; i < 1200; ++i)
				printf("g\nk%d\n", i);
		}' > $TMP2
	$PROG -o $TMP3 recno $TMP2
	sort -o $TMP1 $TMP1
	sort -o $TMP3 $TMP3
	if (cmp -s $TMP1 $TMP3) ; then :
	else
		printf "test6: type recno: failed\n"
		exit 1
	fi
}

# Delete cursor record
test7()
{
	printf "Test 7: btree, recno: delete cursor record\n"
	echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" |
	awk '{
		for (i = 1; i <= 120; ++i)
			printf("%05d: input key %d: %s\n", i, i, $0);
		printf("%05d: input key %d: %s\n", 120, 120, $0);
		printf("get failed, no such key\n");
		printf("%05d: input key %d: %s\n", 1, 1, $0);
		printf("%05d: input key %d: %s\n", 2, 2, $0);
		exit;
	}' > $TMP1
	rm -f TMP2 $TMP3

	for type in btree recno; do
		cat $TMP1 |
		awk '{
			if (i == 120)
				exit;
			printf("p\nk%d\nd%s\n", ++i, $0);
		}
		END {
			printf("fR_NEXT\n");
			for (i = 1; i <= 120; ++i)
				printf("s\n");
			printf("fR_CURSOR\ns\nk120\n");
			printf("r\nk120\n");
			printf("fR_NEXT\ns\n");
			printf("fR_CURSOR\ns\nk1\n");
			printf("r\nk1\n");
			printf("fR_FIRST\ns\n");
		}' > $TMP2
		$PROG -o $TMP3 recno $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test7: type $type: failed\n"
			exit 1
		fi
	done
}

# Make sure that overflow pages are reused.
test8()
{
	printf "Test 8: btree, hash: repeated small key, big data pairs\n"
	rm -f $TMP1
	awk 'BEGIN {
		for (i = 1; i <= 10; ++i) {
			printf("p\nkkey1\nD/bin/sh\n");
			printf("p\nkkey2\nD/bin/csh\n");
			if (i % 8 == 0) {
				printf("c\nkkey2\nD/bin/csh\n");
				printf("c\nkkey1\nD/bin/sh\n");
				printf("e\t%d of 10 (comparison)\r\n", i);
			} else
				printf("e\t%d of 10             \r\n", i);
			printf("r\nkkey1\nr\nkkey2\n");
		}
		printf("e\n");
		printf("eend of test8 run\n");
	}' > $TMP1
	$PROG btree $TMP1
	$PROG hash $TMP1
	# No explicit test for success.
}

# Test btree duplicate keys
test9()
{
	printf "Test 9: btree: duplicate keys\n"
	echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" |
	awk '{
		for (i = 1; i <= 543; ++i)
			printf("%05d: input key %d: %s\n", i, i, $0);
		exit;
	}' > $TMP1
	rm -f TMP2 $TMP3

	for type in btree; do
		cat $TMP1 |
		awk '{
			if (i++ % 2)
				printf("p\nkduplicatekey\nd%s\n", $0);
			else
				printf("p\nkunique%dkey\nd%s\n", i, $0);
		}
		END {
				printf("o\n");
		}' > $TMP2
		$PROG -iflags=1 -o $TMP3 $type $TMP2
		sort -o $TMP3 $TMP3
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test9: type $type: failed\n"
			exit 1
		fi
	done
}

# Test use of cursor flags without initialization
test10()
{
	printf "Test 10: btree, recno: test cursor flag use\n"
	echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" |
	awk '{
		for (i = 1; i <= 20; ++i)
			printf("%05d: input key %d: %s\n", i, i, $0);
		exit;
	}' > $TMP1
	rm -f TMP2 $TMP3

	# Test that R_CURSOR doesn't succeed before cursor initialized
	for type in btree recno; do
		cat $TMP1 |
		awk '{
			if (i == 10)
				exit;
			printf("p\nk%d\nd%s\n", ++i, $0);
		}
		END {
			printf("fR_CURSOR\nr\nk1\n");
			printf("eR_CURSOR SHOULD HAVE FAILED\n");
		}' > $TMP2
		$PROG -o $TMP3 $type $TMP2 > /dev/null 2>&1
		if [ -s $TMP3 ] ; then
			printf "Test 10: delete: R_CURSOR SHOULD HAVE FAILED\n"
			exit 1
		fi
	done
	for type in btree recno; do
		cat $TMP1 |
		awk '{
			if (i == 10)
				exit;
			printf("p\nk%d\nd%s\n", ++i, $0);
		}
		END {
			printf("fR_CURSOR\np\nk1\ndsome data\n");
			printf("eR_CURSOR SHOULD HAVE FAILED\n");
		}' > $TMP2
		$PROG -o $TMP3 $type $TMP2 > /dev/null 2>&1
		if [ -s $TMP3 ] ; then
			printf "Test 10: put: R_CURSOR SHOULD HAVE FAILED\n"
			exit 1
		fi
	done
}

# Test insert in reverse order.
test11()
{
	printf "Test 11: recno: reverse order insert\n"
	echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" |
	awk '{
		for (i = 1; i <= 779; ++i)
			printf("%05d: input key %d: %s\n", i, i, $0);
		exit;
	}' > $TMP1
	rm -f TMP2 $TMP3

	for type in recno; do
		cat $TMP1 |
		awk '{
			if (i == 0) {
				i = 1;
				printf("p\nk1\nd%s\n", $0);
				printf("%s\n", "fR_IBEFORE");
			} else
				printf("p\nk1\nd%s\n", $0);
		}
		END {
				printf("or\n");
		}' > $TMP2
		$PROG -o $TMP3 $type $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test11: type $type: failed\n"
			exit 1
		fi
	done
}

# Take the first 20000 entries in the dictionary, reverse them, and give
# them each a small size data entry.  Use a small page size to make sure
# the btree split code gets hammered.
test12()
{
	printf "Test 12: btree: lots of keys, small page size\n"
	mdata=abcdefghijklmnopqrstuvwxy
	echo $mdata |
	awk '{ for (i = 1; i < 20001; ++i) print $0 }' > $TMP1
	for type in btree; do
		rm -f $TMP2 $TMP3
		for i in `sed 20000q $DICT | rev`; do
			printf "p\nk%s\nd%s\ng\nk%s\n" $i $mdata $i
		done > $TMP2
		$PROG -i psize=512 -o $TMP3 $type $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test12: type %s: failed\n" $type
			exit 1
		fi
	done
}

# Test different byte orders.
test13()
{
	printf "Test 13: btree, hash: differing byte orders\n"
	sed 50q $DICT > $TMP1
	for order in 1234 4321; do
		for type in btree hash; do
			rm -f byte.file $TMP2 $TMP3
			for i in `sed 50q $DICT`; do
				printf "p\nk%s\nd%s\ng\nk%s\n" $i $i $i
			done > $TMP2
			$PROG -ilorder=$order -f byte.file -o $TMP3 $type $TMP2
			if (cmp -s $TMP1 $TMP3) ; then :
			else
				printf "test13: %s/%s put failed\n" $type $order
				exit 1
			fi
			for i in `sed 50q $DICT`; do
				printf "g\nk%s\n" $i
			done > $TMP2
			$PROG -ilorder=$order -f byte.file -o $TMP3 $type $TMP2
			if (cmp -s $TMP1 $TMP3) ; then :
			else
				printf "test13: %s/%s get failed\n" $type $order
				exit 1
			fi
		done
	done
	rm -f byte.file
}

# Try a variety of bucketsizes and fill factors for hashing
test20()
{
	printf\
    "Test 20: hash: bucketsize, fill factor; nelem 25000 cachesize 65536\n"
	awk 'BEGIN {
		for (i = 1; i <= 10000; ++i)
			printf("%.*s\n", i % 34,
		    "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg");
	}' > $TMP1
	sed 10000q $DICT |
	awk '{
		++i;
		printf("p\nk%s\nd%.*s\n", $0, i % 34,
		    "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg");
	}' > $TMP2
	sed 10000q $DICT |
	awk '{
		++i;
		printf("g\nk%s\n", $0);
	}' >> $TMP2
	bsize=256
	for ffactor in 11 14 21; do
		printf "\tbucketsize %d, fill factor %d\n" $bsize $ffactor
		$PROG -o$TMP3 \
		    -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\
		    hash $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test20: type hash:\
bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed\n"
			exit 1
		fi
	done
	bsize=512
	for ffactor in 21 28 43; do
		printf "\tbucketsize %d, fill factor %d\n" $bsize $ffactor
		$PROG -o$TMP3 \
		    -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\
		    hash $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test20: type hash:\
bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed\n"
			exit 1
		fi
	done
	bsize=1024
	for ffactor in 43 57 85; do
		printf "\tbucketsize %d, fill factor %d\n" $bsize $ffactor
		$PROG -o$TMP3 \
		    -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\
		    hash $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test20: type hash:\
bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed\n"
			exit 1
		fi
	done
	bsize=2048
	for ffactor in 85 114 171; do
		printf "\tbucketsize %d, fill factor %d\n" $bsize $ffactor
		$PROG -o$TMP3 \
		    -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\
		    hash $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test20: type hash:\
bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed\n"
			exit 1
		fi
	done
	bsize=4096
	for ffactor in 171 228 341; do
		printf "\tbucketsize %d, fill factor %d\n" $bsize $ffactor
		$PROG -o$TMP3 \
		    -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\
		    hash $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test20: type hash:\
bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed\n"
			exit 1
		fi
	done
	bsize=8192
	for ffactor in 341 455 683; do
		printf "\tbucketsize %d, fill factor %d\n" $bsize $ffactor
		$PROG -o$TMP3 \
		    -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\
		    hash $TMP2
		if (cmp -s $TMP1 $TMP3) ; then :
		else
			printf "test20: type hash:\
bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed\n"
			exit 1
		fi
	done
}

main