1#
2# This file and its contents are supplied under the terms of the
3# Common Development and Distribution License ("CDDL"), version 1.0.
4# You may only use this file in accordance with the terms of version
5# 1.0 of the CDDL.
6#
7# A full copy of the text of the CDDL should have accompanied this
8# source.  A copy of the CDDL is also available via the Internet at
9# http://www.illumos.org/license/CDDL.
10#
11
12#
13# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
14# Copyright (c) 2016, Intel Corporation.
15#
16
17. $STF_SUITE/include/libtest.shlib
18
19# Defaults common to all the tests in the regression group
20export PERF_RUNTIME=${PERF_RUNTIME:-'180'}
21export PERF_RANDSEED=${PERF_RANDSEED:-'1234'}
22export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'}
23export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'}
24
25# Default to JSON for fio output
26export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'}
27
28# Default fs creation options
29export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
30    ' -o checksum=sha256 -o redundant_metadata=most'}
31
32function get_sync_str
33{
34	typeset sync=$1
35	typeset sync_str=''
36
37	[[ $sync -eq 0 ]] && sync_str='async'
38	[[ $sync -eq 1 ]] && sync_str='sync'
39	echo $sync_str
40}
41
42function get_suffix
43{
44	typeset threads=$1
45	typeset sync=$2
46	typeset iosize=$3
47
48	typeset sync_str=$(get_sync_str $sync)
49	typeset filesystems=$(get_nfilesystems)
50
51	typeset suffix="$sync_str.$iosize-ios"
52	suffix="$suffix.$threads-threads.$filesystems-filesystems"
53	echo $suffix
54}
55
56function do_fio_run_impl
57{
58	typeset script=$1
59	typeset do_recreate=$2
60	typeset clear_cache=$3
61
62	typeset threads=$4
63	typeset threads_per_fs=$5
64	typeset sync=$6
65	typeset iosize=$7
66
67	typeset sync_str=$(get_sync_str $sync)
68	log_note "Running with $threads $sync_str threads, $iosize ios"
69
70	if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
71		log_must test $do_recreate
72		verify_threads_per_fs $threads $threads_per_fs
73	fi
74
75	if $do_recreate; then
76		recreate_perf_pool
77
78		#
79		# A value of zero for "threads_per_fs" is "special", and
80		# means a single filesystem should be used, regardless
81		# of the number of threads.
82		#
83		if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
84			populate_perf_filesystems $((threads / threads_per_fs))
85		else
86			populate_perf_filesystems 1
87		fi
88	fi
89
90	if $clear_cache; then
91		# Clear the ARC
92		log_must zinject -a
93	fi
94
95	if [[ -n $ZINJECT_DELAYS ]]; then
96		apply_zinject_delays
97	else
98		log_note "No per-device commands to execute."
99	fi
100
101	#
102	# Allow this to be overridden by the individual test case. This
103	# can be used to run the FIO job against something other than
104	# the default filesystem (e.g. against a clone).
105	#
106	export DIRECTORY=$(get_directory)
107	log_note "DIRECTORY: " $DIRECTORY
108
109	export RUNTIME=$PERF_RUNTIME
110	export RANDSEED=$PERF_RANDSEED
111	export COMPPERCENT=$PERF_COMPPERCENT
112	export COMPCHUNK=$PERF_COMPCHUNK
113	export FILESIZE=$((TOTAL_SIZE / threads))
114	export NUMJOBS=$threads
115	export SYNC_TYPE=$sync
116	export BLOCKSIZE=$iosize
117	sync
118
119	# When running locally, we want to keep the default behavior of
120	# DIRECT == 0, so only set it when we're running over NFS to
121	# disable client cache for reads.
122	if [[ $NFS -eq 1 ]]; then
123		export DIRECT=1
124		do_setup_nfs $script
125	else
126		export DIRECT=0
127	fi
128
129	# This will be part of the output filename.
130	typeset suffix=$(get_suffix $threads $sync $iosize)
131
132	# Start the data collection
133	do_collect_scripts $suffix
134
135	# Define output file
136	typeset logbase="$(get_perf_output_dir)/$(basename \
137	    $SUDO_COMMAND)"
138	typeset outfile="$logbase.fio.$suffix"
139
140	# Start the load
141	if [[ $NFS -eq 1 ]]; then
142		log_must ssh -t $NFS_USER@$NFS_CLIENT "
143			fio --output-format=${PERF_FIO_FORMAT} \
144			    --output /tmp/fio.out /tmp/test.fio
145		"
146		log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile
147		log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
148	else
149		log_must fio --output-format=${PERF_FIO_FORMAT} \
150		    --output $outfile $FIO_SCRIPTS/$script
151	fi
152}
153
154#
155# This function will run fio in a loop, according to the .fio file passed
156# in and a number of environment variables. The following variables can be
157# set before launching zfstest to override the defaults.
158#
159# PERF_RUNTIME: The time in seconds each fio invocation should run.
160# PERF_NTHREADS: A list of how many threads each fio invocation will use.
161# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
162# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
163# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag'
164#    pairs that will be added to the scripts specified in each test.
165#
166function do_fio_run
167{
168	typeset script=$1
169	typeset do_recreate=$2
170	typeset clear_cache=$3
171	typeset threads threads_per_fs sync iosize
172
173	for threads in $PERF_NTHREADS; do
174		for threads_per_fs in $PERF_NTHREADS_PER_FS; do
175			for sync in $PERF_SYNC_TYPES; do
176				for iosize in $PERF_IOSIZES; do
177					do_fio_run_impl \
178					    $script \
179					    $do_recreate \
180					    $clear_cache \
181					    $threads \
182					    $threads_per_fs \
183					    $sync \
184					    $iosize
185				done
186			done
187		done
188	done
189}
190
191# This function sets NFS mount on the client and make sure all correct
192# permissions are in place
193#
194function do_setup_nfs
195{
196	typeset script=$1
197	zfs set sharenfs=on $TESTFS
198	log_must chmod  -R 777 /$TESTFS
199
200	ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT"
201	ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
202	log_must ssh -t $NFS_USER@$NFS_CLIENT "
203		sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT
204	"
205	#
206	# The variables in the fio script are only available in our current
207	# shell session, so we have to evaluate them here before copying
208	# the resulting script over to the target machine.
209	#
210	export jobnum='$jobnum'
211	while read line; do
212		eval echo "$line"
213	done < $FIO_SCRIPTS/$script > /tmp/test.fio
214	log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio
215	log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp
216	log_must rm /tmp/test.fio
217}
218
219#
220# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS.
221# The script at index N is launched in the background, with its output
222# redirected to a logfile containing the tag specified at index N + 1.
223#
224function do_collect_scripts
225{
226	typeset suffix=$1
227
228	[[ -n $collect_scripts ]] || log_fail "No data collection scripts."
229	[[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified."
230
231	# Add in user supplied scripts and logfiles, if any.
232	typeset oIFS=$IFS
233	IFS=','
234	for item in $PERF_COLLECT_SCRIPTS; do
235		collect_scripts+=($(echo $item | sed 's/^ *//g'))
236	done
237	IFS=$oIFS
238
239	typeset idx=0
240	while [[ $idx -lt "${#collect_scripts[@]}" ]]; do
241		typeset logbase="$(get_perf_output_dir)/$(basename \
242		    $SUDO_COMMAND)"
243		typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix"
244
245		timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 &
246		((idx += 2))
247	done
248
249	# Need to explicitly return 0 because timeout(1) will kill
250	# a child process and cause us to return non-zero.
251	return 0
252}
253
254# Find a place to deposit performance data collected while under load.
255function get_perf_output_dir
256{
257	typeset dir="$PWD/perf_data"
258	[[ -d $dir ]] || mkdir -p $dir
259
260	echo $dir
261}
262
263function apply_zinject_delays
264{
265	typeset idx=0
266	while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do
267		[[ -n ${ZINJECT_DELAYS[$idx]} ]] || \
268		    log_fail "No zinject delay found at index: $idx"
269
270		for disk in $DISKS; do
271			log_must zinject \
272			    -d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL
273		done
274
275		((idx += 1))
276	done
277}
278
279function clear_zinject_delays
280{
281	log_must zinject -c all
282}
283
284#
285# Destroy and create the pool used for performance tests.
286#
287function recreate_perf_pool
288{
289	[[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
290
291	#
292	# In case there's been some "leaked" zinject delays, or if the
293	# performance test injected some delays itself, we clear all
294	# delays before attempting to destroy the pool. Each delay
295	# places a hold on the pool, so the destroy will fail if there
296	# are any outstanding delays.
297	#
298	clear_zinject_delays
299
300	#
301	# This function handles the case where the pool already exists,
302	# and will destroy the previous pool and recreate a new pool.
303	#
304	create_pool $PERFPOOL $DISKS
305}
306
307function verify_threads_per_fs
308{
309	typeset threads=$1
310	typeset threads_per_fs=$2
311
312	log_must test -n $threads
313	log_must test -n $threads_per_fs
314
315	#
316	# A value of "0" is treated as a "special value", and it is
317	# interpreted to mean all threads will run using a single
318	# filesystem.
319	#
320	[[ $threads_per_fs -eq 0 ]] && return
321
322	#
323	# The number of threads per filesystem must be a value greater
324	# than or equal to zero; since we just verified the value isn't
325	# 0 above, then it must be greater than zero here.
326	#
327	log_must test $threads_per_fs -ge 0
328
329	#
330	# This restriction can be lifted later if needed, but for now,
331	# we restrict the number of threads per filesystem to a value
332	# that evenly divides the thread count. This way, the threads
333	# will be evenly distributed over all the filesystems.
334	#
335	log_must test $((threads % threads_per_fs)) -eq 0
336}
337
338function populate_perf_filesystems
339{
340	typeset nfilesystems=${1:-1}
341
342	export TESTFS=""
343	for i in $(seq 1 $nfilesystems); do
344		typeset dataset="$PERFPOOL/fs$i"
345		create_dataset $dataset $PERF_FS_OPTS
346		if [[ -z "$TESTFS" ]]; then
347			TESTFS="$dataset"
348		else
349			TESTFS="$TESTFS $dataset"
350		fi
351	done
352}
353
354function get_nfilesystems
355{
356	typeset filesystems=( $TESTFS )
357	echo ${#filesystems[@]}
358}
359
360function get_directory
361{
362	typeset filesystems=( $TESTFS )
363	typeset directory=
364
365	typeset idx=0
366	while [[ $idx -lt "${#filesystems[@]}" ]]; do
367		mountpoint=$(get_prop mountpoint "${filesystems[$idx]}")
368
369		if [[ -n $directory ]]; then
370			directory=$directory:$mountpoint
371		else
372			directory=$mountpoint
373		fi
374
375		((idx += 1))
376	done
377
378	echo $directory
379}
380
381function get_min_arc_size
382{
383	case "$UNAME" in
384	Linux)
385		awk '$1 == "c_min" { print $3 }' /proc/spl/kstat/zfs/arcstats
386		;;
387	FreeBSD)
388		sysctl -n kstat.zfs.misc.arcstats.c_min
389		;;
390	*)
391		dtrace -qn 'BEGIN {
392		    printf("%u\n", `arc_stats.arcstat_c_min.value.ui64);
393		    exit(0);
394		}'
395		;;
396	esac || log_fail "get_min_arc_size failed"
397}
398
399function get_max_arc_size
400{
401	case "$UNAME" in
402	Linux)
403		awk '$1 == "c_max" { print $3 }' /proc/spl/kstat/zfs/arcstats
404		;;
405	FreeBSD)
406		sysctl -n kstat.zfs.misc.arcstats.c_max
407		;;
408	*)
409		dtrace -qn 'BEGIN {
410		    printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
411		    exit(0);
412		}'
413		;;
414	esac || log_fail "get_max_arc_size failed"
415}
416
417function get_arc_target
418{
419	case "$UNAME" in
420	Linux)
421		awk '$1 == "c" { print $3 }' /proc/spl/kstat/zfs/arcstats
422		;;
423	FreeBSD)
424		sysctl -n kstat.zfs.misc.arcstats.c
425		;;
426	*)
427		dtrace -qn 'BEGIN {
428		    printf("%u\n", `arc_stats.arcstat_c.value.ui64);
429		    exit(0);
430		}'
431		;;
432	esac || log_fail "get_arc_target failed"
433}
434
435function get_dbuf_cache_size
436{
437	typeset -l dbuf_cache_size dbuf_cache_shift
438
439	if is_illumos; then
440		dbuf_cache_size=$(dtrace -qn 'BEGIN {
441		    printf("%u\n", `dbuf_cache_max_bytes);
442		    exit(0);
443		}')
444	else
445		dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT)
446		dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift))
447	fi || log_fail "get_dbuf_cache_size failed"
448
449	echo $dbuf_cache_size
450}
451
452# Create a file with some information about how this system is configured.
453function get_system_config
454{
455	typeset config=$PERF_DATA_DIR/$1
456
457	echo "{" >>$config
458	if is_linux; then
459		echo "  \"ncpus\": \"$(lscpu | awk '/^CPU\(s\)/ {print $2; exit}')\"," >>$config
460		echo "  \"physmem\": \"$(free -b | \
461		    awk '$1 == "Mem:" { print $2 }')\"," >>$config
462		echo "  \"c_max\": \"$(get_max_arc_size)\"," >>$config
463		echo "  \"hostname\": \"$(uname -n)\"," >>$config
464		echo "  \"kernel version\": \"$(uname -sr)\"," >>$config
465	else
466		dtrace -qn 'BEGIN{
467		    printf("  \"ncpus\": %d,\n", `ncpus);
468		    printf("  \"physmem\": %u,\n", `physmem * `_pagesize);
469		    printf("  \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64);
470		    printf("  \"kmem_flags\": \"0x%x\",", `kmem_flags);
471		    exit(0)}' >>$config
472		echo "  \"hostname\": \"$(uname -n)\"," >>$config
473		echo "  \"kernel version\": \"$(uname -v)\"," >>$config
474	fi
475	if is_linux; then
476		lsblk -dino NAME,SIZE | awk 'BEGIN {
477		    printf("  \"disks\": {\n"); first = 1}
478		    {disk = $1} {size = $2;
479		    if (first != 1) {printf(",\n")} else {first = 0}
480		    printf("    \"%s\": \"%s\"", disk, size)}
481		    END {printf("\n  },\n")}' >>$config
482
483		zfs_tunables="/sys/module/zfs/parameters"
484
485		printf "  \"tunables\": {\n" >>$config
486		for tunable in \
487		    zfs_arc_max \
488		    zfs_arc_meta_limit \
489		    zfs_arc_sys_free \
490		    zfs_dirty_data_max \
491		    zfs_flags \
492		    zfs_prefetch_disable \
493		    zfs_txg_timeout \
494		    zfs_vdev_aggregation_limit \
495		    zfs_vdev_async_read_max_active \
496		    zfs_vdev_async_write_max_active \
497		    zfs_vdev_sync_read_max_active \
498		    zfs_vdev_sync_write_max_active \
499		    zio_slow_io_ms
500		do
501			if [ "$tunable" != "zfs_arc_max" ]
502			then
503				printf ",\n" >>$config
504			fi
505			printf  "    \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \
506			    >>$config
507		done
508		printf "\n  }\n" >>$config
509	else
510		iostat -En | awk 'BEGIN {
511		    printf("  \"disks\": {\n"); first = 1}
512		    /^c/ {disk = $1}
513		    /^Size: [^0]/ {size = $2;
514		    if (first != 1) {printf(",\n")} else {first = 0}
515		    printf("    \"%s\": \"%s\"", disk, size)}
516		    END {printf("\n  },\n")}' >>$config
517
518		sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \
519		    awk -F= 'BEGIN {printf("  \"system\": {\n"); first = 1}
520		    {if (first != 1) {printf(",\n")} else {first = 0};
521		    printf("    \"%s\": %s", $1, $2)}
522		    END {printf("\n  }\n")}' >>$config
523	fi
524	echo "}" >>$config
525}
526
527#
528# On illumos this looks like: ":sd3:sd4:sd1:sd2:"
529#
530function pool_to_lun_list
531{
532	typeset pool=$1
533	typeset ctd ctds devname lun
534	typeset lun_list=':'
535
536	case "$UNAME" in
537	Linux)
538		ctds=$(zpool list -HLv $pool | \
539		    awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
540
541		for ctd in $ctds; do
542			lun_list="$lun_list$ctd:"
543		done
544		;;
545	FreeBSD)
546		lun_list+=$(zpool list -HLv $pool | \
547		    awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/
548		         { printf "%s:", $1 }')
549		;;
550	*)
551		ctds=$(zpool list -v $pool |
552		    awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}')
553
554		for ctd in $ctds; do
555			# Get the device name as it appears in /etc/path_to_inst
556			devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n 's/\/devices\([^:]*\):.*/\1/p')
557			# Add a string composed of the driver name and instance
558			# number to the list for comparison with dev_statname.
559			lun=$(sed 's/"//g' /etc/path_to_inst | awk -v dn="$devname" '$0 ~ dn {print $3$2}')
560			lun_list="$lun_list$lun:"
561		done
562		;;
563	esac
564	echo $lun_list
565}
566
567function print_perf_settings
568{
569	echo "PERF_NTHREADS: $PERF_NTHREADS"
570	echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS"
571	echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES"
572	echo "PERF_IOSIZES: $PERF_IOSIZES"
573}
574
575# Create a perf_data directory to hold performance statistics and
576# configuration information.
577export PERF_DATA_DIR=$(get_perf_output_dir)
578[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json
579