xref: /freebsd/usr.bin/split/tests/split_test.sh (revision d9d69a6f)
175c3ca1eSKyle Evans#
24d846d26SWarner Losh# SPDX-License-Identifier: BSD-2-Clause
375c3ca1eSKyle Evans#
4cbea5eddSDag-Erling Smørgrav# Copyright (c) 2022-2023 Klara Systems
575c3ca1eSKyle Evans#
675c3ca1eSKyle Evans# Redistribution and use in source and binary forms, with or without
775c3ca1eSKyle Evans# modification, are permitted provided that the following conditions
875c3ca1eSKyle Evans# are met:
975c3ca1eSKyle Evans# 1. Redistributions of source code must retain the above copyright
1075c3ca1eSKyle Evans#    notice, this list of conditions and the following disclaimer.
1175c3ca1eSKyle Evans# 2. Redistributions in binary form must reproduce the above copyright
1275c3ca1eSKyle Evans#    notice, this list of conditions and the following disclaimer in the
1375c3ca1eSKyle Evans#    documentation and/or other materials provided with the distribution.
1475c3ca1eSKyle Evans#
1575c3ca1eSKyle Evans# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1675c3ca1eSKyle Evans# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1775c3ca1eSKyle Evans# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1875c3ca1eSKyle Evans# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1975c3ca1eSKyle Evans# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2075c3ca1eSKyle Evans# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2175c3ca1eSKyle Evans# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2275c3ca1eSKyle Evans# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2375c3ca1eSKyle Evans# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2475c3ca1eSKyle Evans# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2575c3ca1eSKyle Evans# SUCH DAMAGE.
2675c3ca1eSKyle Evans#
2775c3ca1eSKyle Evans
2875c3ca1eSKyle Evans# sys/param.h
2975c3ca1eSKyle Evans: ${MAXBSIZE:=65536}
3075c3ca1eSKyle Evans
3175c3ca1eSKyle Evansatf_test_case bytes
3275c3ca1eSKyle Evansbytes_body()
3375c3ca1eSKyle Evans{
3475c3ca1eSKyle Evans	printf "aaaa" > foo-aa
3575c3ca1eSKyle Evans	printf "bb\nc" > foo-ab
3675c3ca1eSKyle Evans	printf "ccc\n" > foo-ac
3775c3ca1eSKyle Evans
3875c3ca1eSKyle Evans	cat foo-* > foo
3975c3ca1eSKyle Evans	atf_check split -b 4 foo split-
4075c3ca1eSKyle Evans	atf_check -o file:foo-aa cat split-aa
4175c3ca1eSKyle Evans	atf_check -o file:foo-ab cat split-ab
4275c3ca1eSKyle Evans	atf_check -o file:foo-ac cat split-ac
4375c3ca1eSKyle Evans
4475c3ca1eSKyle Evans	# MAXBSIZE is the default buffer size, so we'll split at just a little
4575c3ca1eSKyle Evans	# bit past the buffer size to make sure that it still properly splits
4675c3ca1eSKyle Evans	# even when it needs to read again to hit the limit.
4775c3ca1eSKyle Evans	bsize=$((MAXBSIZE + 12))
4875c3ca1eSKyle Evans	rm foo-* foo
4975c3ca1eSKyle Evans	jot -ns "" -b "a" ${bsize} > foo-aa
5075c3ca1eSKyle Evans	jot -ns "" -b "b" ${bsize} > foo-ab
5175c3ca1eSKyle Evans	jot -ns "" -b "c" 12 > foo-ac
5275c3ca1eSKyle Evans
5375c3ca1eSKyle Evans	cat foo-* > foo
5475c3ca1eSKyle Evans	atf_check split -b ${bsize} foo split-
5575c3ca1eSKyle Evans	atf_check -o file:foo-aa cat split-aa
5675c3ca1eSKyle Evans	atf_check -o file:foo-ab cat split-ab
5775c3ca1eSKyle Evans	atf_check -o file:foo-ac cat split-ac
5875c3ca1eSKyle Evans}
5975c3ca1eSKyle Evans
6075c3ca1eSKyle Evansatf_test_case chunks
6175c3ca1eSKyle Evanschunks_body()
6275c3ca1eSKyle Evans{
6375c3ca1eSKyle Evans	jot -ns "" -b "a" 4096 > foo
6475c3ca1eSKyle Evans	jot -ns "" -b "b" 4096 >> foo
6575c3ca1eSKyle Evans	jot -ns "" -b "c" 4104 >> foo
6675c3ca1eSKyle Evans
6775c3ca1eSKyle Evans	chunks=3
6875c3ca1eSKyle Evans	jot -ns "" -b "a" 4096 > foo-aa
6975c3ca1eSKyle Evans	jot -ns "" -b "b" 2 >> foo-aa
7075c3ca1eSKyle Evans	jot -ns "" -b "b" 4094 > foo-ab
7175c3ca1eSKyle Evans	jot -ns "" -b "c" 4 >> foo-ab
7275c3ca1eSKyle Evans	jot -ns "" -b "c" 4100 > foo-ac
7375c3ca1eSKyle Evans
7475c3ca1eSKyle Evans	atf_check split -n ${chunks} foo split-
7575c3ca1eSKyle Evans	atf_check -o file:foo-aa cat split-aa
7675c3ca1eSKyle Evans	atf_check -o file:foo-ab cat split-ab
7775c3ca1eSKyle Evans	atf_check -o file:foo-ac cat split-ac
7875c3ca1eSKyle Evans}
7975c3ca1eSKyle Evans
8075c3ca1eSKyle Evansatf_test_case sensible_lines
8175c3ca1eSKyle Evanssensible_lines_body()
8275c3ca1eSKyle Evans{
8375c3ca1eSKyle Evans	echo "The quick brown fox" > foo-aa
8475c3ca1eSKyle Evans	echo "jumps over" > foo-ab
8575c3ca1eSKyle Evans	echo "the lazy dog" > foo-ac
8675c3ca1eSKyle Evans
8775c3ca1eSKyle Evans	cat foo-* > foo
8875c3ca1eSKyle Evans	atf_check split -l 1 foo split-
8975c3ca1eSKyle Evans	atf_check -o file:foo-aa cat split-aa
9075c3ca1eSKyle Evans	atf_check -o file:foo-ab cat split-ab
9175c3ca1eSKyle Evans	atf_check -o file:foo-ac cat split-ac
9275c3ca1eSKyle Evans
9375c3ca1eSKyle Evans	# Try again, make sure that `-` uses stdin as documented.
9475c3ca1eSKyle Evans	atf_check rm split-*
9575c3ca1eSKyle Evans	atf_check -x 'split -l 1 - split- < foo'
9675c3ca1eSKyle Evans	atf_check -o file:foo-aa cat split-aa
9775c3ca1eSKyle Evans	atf_check -o file:foo-ab cat split-ab
9875c3ca1eSKyle Evans	atf_check -o file:foo-ac cat split-ac
9975c3ca1eSKyle Evans
10075c3ca1eSKyle Evans	# Finally, try with -l == 2; we should see a 2/1 split instead of the
10175c3ca1eSKyle Evans	# previous 1/1/1.
10275c3ca1eSKyle Evans	cat foo-aa foo-ab > foo-aa-ng
10375c3ca1eSKyle Evans	cat foo-ac > foo-ab-ng
10475c3ca1eSKyle Evans
10575c3ca1eSKyle Evans	atf_check rm split-*
10675c3ca1eSKyle Evans	atf_check split -l 2 foo split-
10775c3ca1eSKyle Evans
10875c3ca1eSKyle Evans	atf_check -o file:foo-aa-ng cat split-aa
10975c3ca1eSKyle Evans	atf_check -o file:foo-ab-ng cat split-ab
11075c3ca1eSKyle Evans}
11175c3ca1eSKyle Evans
11275c3ca1eSKyle Evansatf_test_case long_lines
11375c3ca1eSKyle Evanslong_lines_body()
11475c3ca1eSKyle Evans{
11575c3ca1eSKyle Evans
11675c3ca1eSKyle Evans	# Test file lines will be:
11775c3ca1eSKyle Evans	# a x MAXBSIZE
11875c3ca1eSKyle Evans	# b x MAXBSIZE + c x MAXBSIZE
11975c3ca1eSKyle Evans	# d x 1024
12075c3ca1eSKyle Evans	#
12175c3ca1eSKyle Evans	# The historical split(1) implementation wouldn't grow its internal
12275c3ca1eSKyle Evans	# buffer, so we'd end up with 2/3 split- files being wrong with -l 1.
12375c3ca1eSKyle Evans	# Notably, split-aa would include most of the first two lines, split-ab
12475c3ca1eSKyle Evans	# a tiny fraction of the second line, and split-ac the third line.
12575c3ca1eSKyle Evans	#
12675c3ca1eSKyle Evans	# Recent split(1) instead grows the buffer until we can either fit the
12775c3ca1eSKyle Evans	# line or we run out of memory.
12875c3ca1eSKyle Evans	jot -s "" -b "a" ${MAXBSIZE} > foo-aa
12975c3ca1eSKyle Evans	jot -ns "" -b "b" ${MAXBSIZE} > foo-ab
13075c3ca1eSKyle Evans	jot -s "" -b "c" ${MAXBSIZE} >> foo-ab
13175c3ca1eSKyle Evans	jot -s "" -b "d" 1024 > foo-ac
13275c3ca1eSKyle Evans
13375c3ca1eSKyle Evans	cat foo-* > foo
13475c3ca1eSKyle Evans	atf_check split -l 1 foo split-
13575c3ca1eSKyle Evans
13675c3ca1eSKyle Evans	atf_check -o file:foo-aa cat split-aa
13775c3ca1eSKyle Evans	atf_check -o file:foo-ab cat split-ab
13875c3ca1eSKyle Evans	atf_check -o file:foo-ac cat split-ac
13975c3ca1eSKyle Evans}
14075c3ca1eSKyle Evans
14175c3ca1eSKyle Evansatf_test_case numeric_suffix
14275c3ca1eSKyle Evansnumeric_suffix_body()
14375c3ca1eSKyle Evans{
14475c3ca1eSKyle Evans	echo "The quick brown fox" > foo-00
14575c3ca1eSKyle Evans	echo "jumps over" > foo-01
14675c3ca1eSKyle Evans	echo "the lazy dog" > foo-02
14775c3ca1eSKyle Evans
14875c3ca1eSKyle Evans	cat foo-* > foo
14975c3ca1eSKyle Evans	atf_check split -d -l 1 foo split-
15075c3ca1eSKyle Evans
15175c3ca1eSKyle Evans	atf_check -o file:foo-00 cat split-00
15275c3ca1eSKyle Evans	atf_check -o file:foo-01 cat split-01
15375c3ca1eSKyle Evans	atf_check -o file:foo-02 cat split-02
15475c3ca1eSKyle Evans}
15575c3ca1eSKyle Evans
15675c3ca1eSKyle Evansatf_test_case larger_suffix_length
15775c3ca1eSKyle Evanslarger_suffix_length_body()
15875c3ca1eSKyle Evans{
15975c3ca1eSKyle Evans	:> foo
16075c3ca1eSKyle Evans
16175c3ca1eSKyle Evans	# Generate foo-000 through foo-009, then foo-010 and foo-011
16275c3ca1eSKyle Evans	for i in $(seq -w 0 11); do
16375c3ca1eSKyle Evans		len=$((${i##0} + 1))
16475c3ca1eSKyle Evans		file="foo-0${i}"
16575c3ca1eSKyle Evans		jot -s "" -b "a" ${len} > ${file}
16675c3ca1eSKyle Evans		cat ${file} >> foo
16775c3ca1eSKyle Evans	done
16875c3ca1eSKyle Evans
16975c3ca1eSKyle Evans	atf_check split -a 3 -d -l 1 foo split-
17075c3ca1eSKyle Evans	for i in $(seq -w 0 11); do
17175c3ca1eSKyle Evans		srcfile="foo-0${i}"
17275c3ca1eSKyle Evans		splitfile="split-0${i}"
17375c3ca1eSKyle Evans		atf_check -o file:"${srcfile}" cat "${splitfile}"
17475c3ca1eSKyle Evans	done
17575c3ca1eSKyle Evans}
17675c3ca1eSKyle Evans
17775c3ca1eSKyle Evansatf_test_case pattern
17875c3ca1eSKyle Evanspattern_body()
17975c3ca1eSKyle Evans{
18075c3ca1eSKyle Evans
18175c3ca1eSKyle Evans	# Some fake yaml gives us a good realistic use-case for -p, as we can
18275c3ca1eSKyle Evans	# split on top-level stanzas.
18375c3ca1eSKyle Evans	cat <<EOF > foo-aa
18475c3ca1eSKyle Evanscat:
18575c3ca1eSKyle Evans  aa: true
18675c3ca1eSKyle Evans  ab: true
18775c3ca1eSKyle Evans  ac: true
18875c3ca1eSKyle EvansEOF
18975c3ca1eSKyle Evans	cat <<EOF > foo-ab
19075c3ca1eSKyle Evansdog:
19175c3ca1eSKyle Evans  ba: true
19275c3ca1eSKyle Evans  bb: true
19375c3ca1eSKyle Evans  bc: true
19475c3ca1eSKyle EvansEOF
19575c3ca1eSKyle Evans
19675c3ca1eSKyle Evans	cat foo-* > foo
19775c3ca1eSKyle Evans
19875c3ca1eSKyle Evans	atf_check split -p "^[^[:space:]]+:" foo split-
19975c3ca1eSKyle Evans	atf_check -o file:foo-aa cat split-aa
20075c3ca1eSKyle Evans	atf_check -o file:foo-ab cat split-ab
20175c3ca1eSKyle Evans}
20275c3ca1eSKyle Evans
203cbea5eddSDag-Erling Smørgravatf_test_case autoextend
204cbea5eddSDag-Erling Smørgravautoextend_body()
205cbea5eddSDag-Erling Smørgrav{
206cbea5eddSDag-Erling Smørgrav	seq $((26*25+1)) >input
207cbea5eddSDag-Erling Smørgrav	atf_check split -l1 input
208cbea5eddSDag-Erling Smørgrav	atf_check -o inline:"$((26*25))\n" cat xyz
209cbea5eddSDag-Erling Smørgrav	atf_check -o inline:"$((26*25+1))\n" cat xzaaa
210cbea5eddSDag-Erling Smørgrav}
211cbea5eddSDag-Erling Smørgrav
212d9d69a6fSDag-Erling Smørgravatf_test_case noautoextend
213d9d69a6fSDag-Erling Smørgravnoautoextend_body()
214d9d69a6fSDag-Erling Smørgrav{
215d9d69a6fSDag-Erling Smørgrav	seq $((26*26)) >input
216d9d69a6fSDag-Erling Smørgrav	atf_check split -a2 -l1 input
217d9d69a6fSDag-Erling Smørgrav	atf_check -o inline:"$((26*26))\n" cat xzz
218d9d69a6fSDag-Erling Smørgrav}
219d9d69a6fSDag-Erling Smørgrav
220d9d69a6fSDag-Erling Smørgravatf_test_case reautoextend
221d9d69a6fSDag-Erling Smørgravreautoextend_body()
222d9d69a6fSDag-Erling Smørgrav{
223d9d69a6fSDag-Erling Smørgrav	seq $((26*25+1)) >input
224d9d69a6fSDag-Erling Smørgrav	atf_check split -a2 -a0 -l1 input
225d9d69a6fSDag-Erling Smørgrav	atf_check -o inline:"$((26*25))\n" cat xyz
226d9d69a6fSDag-Erling Smørgrav	atf_check -o inline:"$((26*25+1))\n" cat xzaaa
227d9d69a6fSDag-Erling Smørgrav}
228d9d69a6fSDag-Erling Smørgrav
229cbea5eddSDag-Erling Smørgravatf_test_case continue
230cbea5eddSDag-Erling Smørgravcontinue_body()
231cbea5eddSDag-Erling Smørgrav{
232cbea5eddSDag-Erling Smørgrav	echo hello >input
233cbea5eddSDag-Erling Smørgrav	atf_check split input
234cbea5eddSDag-Erling Smørgrav	atf_check -o file:input cat xaa
235cbea5eddSDag-Erling Smørgrav	atf_check -s exit:1 -e ignore cat xab
236cbea5eddSDag-Erling Smørgrav	atf_check split -c input
237cbea5eddSDag-Erling Smørgrav	atf_check -o file:input cat xab
238cbea5eddSDag-Erling Smørgrav}
239cbea5eddSDag-Erling Smørgrav
240117c54a7SDag-Erling Smørgravatf_test_case undocumented_kludge
241117c54a7SDag-Erling Smørgravundocumented_kludge_body()
242117c54a7SDag-Erling Smørgrav{
243117c54a7SDag-Erling Smørgrav	seq 5000 >input
244117c54a7SDag-Erling Smørgrav	atf_check split -1000 input
245117c54a7SDag-Erling Smørgrav	atf_check -o file:xae seq 4001 5000
246117c54a7SDag-Erling Smørgrav	atf_check split -d1000 input
247117c54a7SDag-Erling Smørgrav	atf_check -o file:x04 seq 4001 5000
248117c54a7SDag-Erling Smørgrav}
249117c54a7SDag-Erling Smørgrav
250117c54a7SDag-Erling Smørgravatf_test_case duplicate_linecount
251117c54a7SDag-Erling Smørgravduplicate_linecount_body()
252117c54a7SDag-Erling Smørgrav{
253117c54a7SDag-Erling Smørgrav	atf_check -s exit:64 -e ignore split -5 -5 /dev/null
254117c54a7SDag-Erling Smørgrav	atf_check -s exit:64 -e ignore split -l5 -5 /dev/null
255117c54a7SDag-Erling Smørgrav	atf_check -s exit:64 -e ignore split -5 -l5 /dev/null
256117c54a7SDag-Erling Smørgrav	atf_check -s exit:64 -e ignore split -l5 -l5 /dev/null
257117c54a7SDag-Erling Smørgrav}
258117c54a7SDag-Erling Smørgrav
25975c3ca1eSKyle Evansatf_init_test_cases()
26075c3ca1eSKyle Evans{
26175c3ca1eSKyle Evans	atf_add_test_case bytes
26275c3ca1eSKyle Evans	atf_add_test_case chunks
26375c3ca1eSKyle Evans	atf_add_test_case sensible_lines
26475c3ca1eSKyle Evans	atf_add_test_case long_lines
26575c3ca1eSKyle Evans	atf_add_test_case numeric_suffix
26675c3ca1eSKyle Evans	atf_add_test_case larger_suffix_length
26775c3ca1eSKyle Evans	atf_add_test_case pattern
268cbea5eddSDag-Erling Smørgrav	atf_add_test_case autoextend
269d9d69a6fSDag-Erling Smørgrav	atf_add_test_case noautoextend
270d9d69a6fSDag-Erling Smørgrav	atf_add_test_case reautoextend
271cbea5eddSDag-Erling Smørgrav	atf_add_test_case continue
272117c54a7SDag-Erling Smørgrav	atf_add_test_case undocumented_kludge
273117c54a7SDag-Erling Smørgrav	atf_add_test_case duplicate_linecount
27475c3ca1eSKyle Evans}
275