1#
2# This file and its contents are supplied under the terms of the
3# Common Development and Distribution License ("CDDL"), version 1.0.
4# You may only use this file in accordance with the terms of version
5# 1.0 of the CDDL.
6#
7# A full copy of the text of the CDDL should have accompanied this
8# source.  A copy of the CDDL is also available via the Internet at
9# http://www.illumos.org/license/CDDL.
10#
11
12#
13# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
14# Copyright (c) 2016, Intel Corporation.
15#
16
17. $STF_SUITE/include/libtest.shlib
18
19# Defaults common to all the tests in the regression group
20export PERF_RUNTIME=${PERF_RUNTIME:-'180'}
21export PERF_RANDSEED=${PERF_RANDSEED:-'1234'}
22export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'}
23export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'}
24
25# Default to JSON for fio output
26export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'}
27
28# Default fs creation options
29export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
30    ' -o checksum=sha256 -o redundant_metadata=most'}
31
32function get_sync_str
33{
34	typeset sync=$1
35	typeset sync_str=''
36
37	[[ $sync -eq 0 ]] && sync_str='async'
38	[[ $sync -eq 1 ]] && sync_str='sync'
39	echo $sync_str
40}
41
42function get_suffix
43{
44	typeset threads=$1
45	typeset sync=$2
46	typeset iosize=$3
47
48	typeset sync_str=$(get_sync_str $sync)
49	typeset filesystems=$(get_nfilesystems)
50
51	typeset suffix="$sync_str.$iosize-ios"
52	suffix="$suffix.$threads-threads.$filesystems-filesystems"
53	echo $suffix
54}
55
56function do_fio_run_impl
57{
58	typeset script=$1
59	typeset do_recreate=$2
60	typeset clear_cache=$3
61
62	typeset threads=$4
63	typeset threads_per_fs=$5
64	typeset sync=$6
65	typeset iosize=$7
66
67	typeset sync_str=$(get_sync_str $sync)
68	log_note "Running with $threads $sync_str threads, $iosize ios"
69
70	if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
71		log_must test $do_recreate
72		verify_threads_per_fs $threads $threads_per_fs
73	fi
74
75	if $do_recreate; then
76		recreate_perf_pool
77
78		#
79		# A value of zero for "threads_per_fs" is "special", and
80		# means a single filesystem should be used, regardless
81		# of the number of threads.
82		#
83		if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
84			populate_perf_filesystems $((threads / threads_per_fs))
85		else
86			populate_perf_filesystems 1
87		fi
88	fi
89
90	if $clear_cache; then
91		# Clear the ARC
92		log_must zinject -a
93	fi
94
95	if [[ -n $ZINJECT_DELAYS ]]; then
96		apply_zinject_delays
97	else
98		log_note "No per-device commands to execute."
99	fi
100
101	#
102	# Allow this to be overridden by the individual test case. This
103	# can be used to run the FIO job against something other than
104	# the default filesystem (e.g. against a clone).
105	#
106	export DIRECTORY=$(get_directory)
107	log_note "DIRECTORY: " $DIRECTORY
108
109	export RUNTIME=$PERF_RUNTIME
110	export RANDSEED=$PERF_RANDSEED
111	export COMPPERCENT=$PERF_COMPPERCENT
112	export COMPCHUNK=$PERF_COMPCHUNK
113	export FILESIZE=$((TOTAL_SIZE / threads))
114	export NUMJOBS=$threads
115	export SYNC_TYPE=$sync
116	export BLOCKSIZE=$iosize
117	sync
118
119	# When running locally, we want to keep the default behavior of
120	# DIRECT == 0, so only set it when we're running over NFS to
121	# disable client cache for reads.
122	if [[ $NFS -eq 1 ]]; then
123		export DIRECT=1
124		do_setup_nfs $script
125	else
126		export DIRECT=0
127	fi
128
129	# This will be part of the output filename.
130	typeset suffix=$(get_suffix $threads $sync $iosize)
131
132	# Start the data collection
133	do_collect_scripts $suffix
134
135	# Define output file
136	typeset logbase="$(get_perf_output_dir)/$(basename \
137	    $SUDO_COMMAND)"
138	typeset outfile="$logbase.fio.$suffix"
139
140	# Start the load
141	if [[ $NFS -eq 1 ]]; then
142		log_must ssh -t $NFS_USER@$NFS_CLIENT "
143			fio --output-format=${PERF_FIO_FORMAT} \
144			    --output /tmp/fio.out /tmp/test.fio
145		"
146		log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile
147		log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
148	else
149		log_must fio --output-format=${PERF_FIO_FORMAT} \
150		    --output $outfile $FIO_SCRIPTS/$script
151	fi
152}
153
154#
155# This function will run fio in a loop, according to the .fio file passed
156# in and a number of environment variables. The following variables can be
157# set before launching zfstest to override the defaults.
158#
159# PERF_RUNTIME: The time in seconds each fio invocation should run.
160# PERF_NTHREADS: A list of how many threads each fio invocation will use.
161# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
162# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
163# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag'
164#    pairs that will be added to the scripts specified in each test.
165#
166function do_fio_run
167{
168	typeset script=$1
169	typeset do_recreate=$2
170	typeset clear_cache=$3
171	typeset threads threads_per_fs sync iosize
172
173	for threads in $PERF_NTHREADS; do
174		for threads_per_fs in $PERF_NTHREADS_PER_FS; do
175			for sync in $PERF_SYNC_TYPES; do
176				for iosize in $PERF_IOSIZES; do
177					do_fio_run_impl \
178					    $script \
179					    $do_recreate \
180					    $clear_cache \
181					    $threads \
182					    $threads_per_fs \
183					    $sync \
184					    $iosize
185				done
186			done
187		done
188	done
189}
190
191# This function sets NFS mount on the client and make sure all correct
192# permissions are in place
193#
194function do_setup_nfs
195{
196	typeset script=$1
197	zfs set sharenfs=on $TESTFS
198	log_must chmod  -R 777 /$TESTFS
199
200	ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT"
201	ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
202	log_must ssh -t $NFS_USER@$NFS_CLIENT "
203		sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT
204	"
205	#
206	# The variables in the fio script are only available in our current
207	# shell session, so we have to evaluate them here before copying
208	# the resulting script over to the target machine.
209	#
210	export jobnum='$jobnum'
211	while read line; do
212		eval echo "$line"
213	done < $FIO_SCRIPTS/$script > /tmp/test.fio
214	log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio
215	log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp
216	log_must rm /tmp/test.fio
217}
218
219#
220# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS.
221# The script at index N is launched in the background, with its output
222# redirected to a logfile containing the tag specified at index N + 1.
223#
224function do_collect_scripts
225{
226	typeset suffix=$1
227
228	[[ -n $collect_scripts ]] || log_fail "No data collection scripts."
229	[[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified."
230
231	# Add in user supplied scripts and logfiles, if any.
232	typeset oIFS=$IFS
233	IFS=','
234	for item in $PERF_COLLECT_SCRIPTS; do
235		collect_scripts+=($(echo $item | sed 's/^ *//g'))
236	done
237	IFS=$oIFS
238
239	typeset idx=0
240	while [[ $idx -lt "${#collect_scripts[@]}" ]]; do
241		typeset logbase="$(get_perf_output_dir)/$(basename \
242		    $SUDO_COMMAND)"
243		typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix"
244
245		timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 &
246		((idx += 2))
247	done
248
249	# Need to explicitly return 0 because timeout(1) will kill
250	# a child process and cause us to return non-zero.
251	return 0
252}
253
254# Find a place to deposit performance data collected while under load.
255function get_perf_output_dir
256{
257	typeset dir="$(pwd)/perf_data"
258	[[ -d $dir ]] || mkdir -p $dir
259
260	echo $dir
261}
262
263function apply_zinject_delays
264{
265	typeset idx=0
266	while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do
267		[[ -n ${ZINJECT_DELAYS[$idx]} ]] || \
268		    log_must "No zinject delay found at index: $idx"
269
270		for disk in $DISKS; do
271			log_must zinject \
272			    -d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL
273		done
274
275		((idx += 1))
276	done
277}
278
279function clear_zinject_delays
280{
281	log_must zinject -c all
282}
283
284#
285# Destroy and create the pool used for performance tests.
286#
287function recreate_perf_pool
288{
289	[[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
290
291	#
292	# In case there's been some "leaked" zinject delays, or if the
293	# performance test injected some delays itself, we clear all
294	# delays before attempting to destroy the pool. Each delay
295	# places a hold on the pool, so the destroy will fail if there
296	# are any outstanding delays.
297	#
298	clear_zinject_delays
299
300	#
301	# This function handles the case where the pool already exists,
302	# and will destroy the previous pool and recreate a new pool.
303	#
304	create_pool $PERFPOOL $DISKS
305}
306
307function verify_threads_per_fs
308{
309	typeset threads=$1
310	typeset threads_per_fs=$2
311
312	log_must test -n $threads
313	log_must test -n $threads_per_fs
314
315	#
316	# A value of "0" is treated as a "special value", and it is
317	# interpreted to mean all threads will run using a single
318	# filesystem.
319	#
320	[[ $threads_per_fs -eq 0 ]] && return
321
322	#
323	# The number of threads per filesystem must be a value greater
324	# than or equal to zero; since we just verified the value isn't
325	# 0 above, then it must be greater than zero here.
326	#
327	log_must test $threads_per_fs -ge 0
328
329	#
330	# This restriction can be lifted later if needed, but for now,
331	# we restrict the number of threads per filesystem to a value
332	# that evenly divides the thread count. This way, the threads
333	# will be evenly distributed over all the filesystems.
334	#
335	log_must test $((threads % threads_per_fs)) -eq 0
336}
337
338function populate_perf_filesystems
339{
340	typeset nfilesystems=${1:-1}
341
342	export TESTFS=""
343	for i in $(seq 1 $nfilesystems); do
344		typeset dataset="$PERFPOOL/fs$i"
345		create_dataset $dataset $PERF_FS_OPTS
346		if [[ -z "$TESTFS" ]]; then
347			TESTFS="$dataset"
348		else
349			TESTFS="$TESTFS $dataset"
350		fi
351	done
352}
353
354function get_nfilesystems
355{
356	typeset filesystems=( $TESTFS )
357	echo ${#filesystems[@]}
358}
359
360function get_directory
361{
362	typeset filesystems=( $TESTFS )
363	typeset directory=
364
365	typeset idx=0
366	while [[ $idx -lt "${#filesystems[@]}" ]]; do
367		mountpoint=$(get_prop mountpoint "${filesystems[$idx]}")
368
369		if [[ -n $directory ]]; then
370			directory=$directory:$mountpoint
371		else
372			directory=$mountpoint
373		fi
374
375		((idx += 1))
376	done
377
378	echo $directory
379}
380
381function get_min_arc_size
382{
383	typeset -l min_arc_size
384
385	if is_freebsd; then
386		min_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_min)
387	elif is_illumos; then
388		min_arc_size=$(dtrace -qn 'BEGIN {
389		    printf("%u\n", `arc_stats.arcstat_c_min.value.ui64);
390		    exit(0);
391		}')
392	elif is_linux; then
393		min_arc_size=`awk '$1 == "c_min" { print $3 }' \
394		    /proc/spl/kstat/zfs/arcstats`
395	fi
396
397	[[ $? -eq 0 ]] || log_fail "get_min_arc_size failed"
398
399	echo $min_arc_size
400}
401
402function get_max_arc_size
403{
404	typeset -l max_arc_size
405
406	if is_freebsd; then
407		max_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_max)
408	elif is_illumos; then
409		max_arc_size=$(dtrace -qn 'BEGIN {
410		    printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
411		    exit(0);
412		}')
413	elif is_linux; then
414		max_arc_size=`awk '$1 == "c_max" { print $3 }' \
415		    /proc/spl/kstat/zfs/arcstats`
416	fi
417
418	[[ $? -eq 0 ]] || log_fail "get_max_arc_size failed"
419
420	echo $max_arc_size
421}
422
423function get_arc_target
424{
425	typeset -l arc_c
426
427	if is_freebsd; then
428		arc_c=$(sysctl -n kstat.zfs.misc.arcstats.c)
429	elif is_illumos; then
430		arc_c=$(dtrace -qn 'BEGIN {
431		    printf("%u\n", `arc_stats.arcstat_c.value.ui64);
432		    exit(0);
433		}')
434	elif is_linux; then
435		arc_c=`awk '$1 == "c" { print $3 }' \
436		    /proc/spl/kstat/zfs/arcstats`
437	fi
438
439	[[ $? -eq 0 ]] || log_fail "get_arc_target failed"
440
441	echo $arc_c
442}
443
444function get_dbuf_cache_size
445{
446	typeset -l dbuf_cache_size dbuf_cache_shift
447
448	if is_illumos; then
449		dbuf_cache_size=$(dtrace -qn 'BEGIN {
450		    printf("%u\n", `dbuf_cache_max_bytes);
451		    exit(0);
452		}')
453	else
454		dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT)
455		dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift))
456	fi
457
458	[[ $? -eq 0 ]] || log_fail "get_dbuf_cache_size failed"
459
460	echo $dbuf_cache_size
461}
462
463# Create a file with some information about how this system is configured.
464function get_system_config
465{
466	typeset config=$PERF_DATA_DIR/$1
467
468	echo "{" >>$config
469	if is_linux; then
470		echo "  \"ncpus\": \"$(nproc --all)\"," >>$config
471		echo "  \"physmem\": \"$(free -b | \
472		    awk '$1 == "Mem:" { print $2 }')\"," >>$config
473		echo "  \"c_max\": \"$(get_max_arc_size)\"," >>$config
474		echo "  \"hostname\": \"$(uname -n)\"," >>$config
475		echo "  \"kernel version\": \"$(uname -sr)\"," >>$config
476	else
477		dtrace -qn 'BEGIN{
478		    printf("  \"ncpus\": %d,\n", `ncpus);
479		    printf("  \"physmem\": %u,\n", `physmem * `_pagesize);
480		    printf("  \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64);
481		    printf("  \"kmem_flags\": \"0x%x\",", `kmem_flags);
482		    exit(0)}' >>$config
483		echo "  \"hostname\": \"$(uname -n)\"," >>$config
484		echo "  \"kernel version\": \"$(uname -v)\"," >>$config
485	fi
486	if is_linux; then
487		lsblk -dino NAME,SIZE | awk 'BEGIN {
488		    printf("  \"disks\": {\n"); first = 1}
489		    {disk = $1} {size = $2;
490		    if (first != 1) {printf(",\n")} else {first = 0}
491		    printf("    \"%s\": \"%s\"", disk, size)}
492		    END {printf("\n  },\n")}' >>$config
493
494		zfs_tunables="/sys/module/zfs/parameters"
495
496		printf "  \"tunables\": {\n" >>$config
497		for tunable in \
498		    zfs_arc_max \
499		    zfs_arc_meta_limit \
500		    zfs_arc_sys_free \
501		    zfs_dirty_data_max \
502		    zfs_flags \
503		    zfs_prefetch_disable \
504		    zfs_txg_timeout \
505		    zfs_vdev_aggregation_limit \
506		    zfs_vdev_async_read_max_active \
507		    zfs_vdev_async_write_max_active \
508		    zfs_vdev_sync_read_max_active \
509		    zfs_vdev_sync_write_max_active \
510		    zio_slow_io_ms
511		do
512			if [ "$tunable" != "zfs_arc_max" ]
513			then
514				printf ",\n" >>$config
515			fi
516			printf  "    \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \
517			    >>$config
518		done
519		printf "\n  }\n" >>$config
520	else
521		iostat -En | awk 'BEGIN {
522		    printf("  \"disks\": {\n"); first = 1}
523		    /^c/ {disk = $1}
524		    /^Size: [^0]/ {size = $2;
525		    if (first != 1) {printf(",\n")} else {first = 0}
526		    printf("    \"%s\": \"%s\"", disk, size)}
527		    END {printf("\n  },\n")}' >>$config
528
529		sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \
530		    awk -F= 'BEGIN {printf("  \"system\": {\n"); first = 1}
531		    {if (first != 1) {printf(",\n")} else {first = 0};
532		    printf("    \"%s\": %s", $1, $2)}
533		    END {printf("\n  }\n")}' >>$config
534	fi
535	echo "}" >>$config
536}
537
538function num_jobs_by_cpu
539{
540	if is_linux; then
541		typeset ncpu=$($NPROC --all)
542	else
543		typeset ncpu=$(psrinfo | $WC -l)
544	fi
545	typeset num_jobs=$ncpu
546
547	[[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc)
548
549	echo $num_jobs
550}
551
552#
553# On illumos this looks like: ":sd3:sd4:sd1:sd2:"
554#
555function pool_to_lun_list
556{
557	typeset pool=$1
558	typeset ctd ctds devname lun
559	typeset lun_list=':'
560
561	if is_illumos; then
562		ctds=$(zpool list -v $pool |
563		    awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}')
564
565		for ctd in $ctds; do
566		# Get the device name as it appears in /etc/path_to_inst
567		devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \
568		    's/\/devices\([^:]*\):.*/\1/p')
569		# Add a string composed of the driver name and instance
570		# number to the list for comparison with dev_statname.
571		lun=$(sed 's/"//g' /etc/path_to_inst | grep \
572		    $devname | awk '{print $3$2}')
573		lun_list="$lun_list$lun:"
574		done
575	elif is_freebsd; then
576		lun_list+=$(zpool list -HLv $pool | \
577		    awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/
578		         { printf "%s:", $1 }')
579	elif is_linux; then
580		ctds=$(zpool list -HLv $pool | \
581		    awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
582
583		for ctd in $ctds; do
584			lun_list="$lun_list$ctd:"
585		done
586	fi
587	echo $lun_list
588}
589
590function print_perf_settings
591{
592	echo "PERF_NTHREADS: $PERF_NTHREADS"
593	echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS"
594	echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES"
595	echo "PERF_IOSIZES: $PERF_IOSIZES"
596}
597
598# Create a perf_data directory to hold performance statistics and
599# configuration information.
600export PERF_DATA_DIR=$(get_perf_output_dir)
601[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json
602