175c3ca1eSKyle Evans# 24d846d26SWarner Losh# SPDX-License-Identifier: BSD-2-Clause 375c3ca1eSKyle Evans# 4cbea5eddSDag-Erling Smørgrav# Copyright (c) 2022-2023 Klara Systems 575c3ca1eSKyle Evans# 675c3ca1eSKyle Evans# Redistribution and use in source and binary forms, with or without 775c3ca1eSKyle Evans# modification, are permitted provided that the following conditions 875c3ca1eSKyle Evans# are met: 975c3ca1eSKyle Evans# 1. Redistributions of source code must retain the above copyright 1075c3ca1eSKyle Evans# notice, this list of conditions and the following disclaimer. 1175c3ca1eSKyle Evans# 2. Redistributions in binary form must reproduce the above copyright 1275c3ca1eSKyle Evans# notice, this list of conditions and the following disclaimer in the 1375c3ca1eSKyle Evans# documentation and/or other materials provided with the distribution. 1475c3ca1eSKyle Evans# 1575c3ca1eSKyle Evans# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1675c3ca1eSKyle Evans# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1775c3ca1eSKyle Evans# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1875c3ca1eSKyle Evans# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1975c3ca1eSKyle Evans# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2075c3ca1eSKyle Evans# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2175c3ca1eSKyle Evans# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2275c3ca1eSKyle Evans# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2375c3ca1eSKyle Evans# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2475c3ca1eSKyle Evans# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2575c3ca1eSKyle Evans# SUCH DAMAGE. 2675c3ca1eSKyle Evans# 2775c3ca1eSKyle Evans 2875c3ca1eSKyle Evans# sys/param.h 2975c3ca1eSKyle Evans: ${MAXBSIZE:=65536} 3075c3ca1eSKyle Evans 3175c3ca1eSKyle Evansatf_test_case bytes 3275c3ca1eSKyle Evansbytes_body() 3375c3ca1eSKyle Evans{ 3475c3ca1eSKyle Evans printf "aaaa" > foo-aa 3575c3ca1eSKyle Evans printf "bb\nc" > foo-ab 3675c3ca1eSKyle Evans printf "ccc\n" > foo-ac 3775c3ca1eSKyle Evans 3875c3ca1eSKyle Evans cat foo-* > foo 3975c3ca1eSKyle Evans atf_check split -b 4 foo split- 4075c3ca1eSKyle Evans atf_check -o file:foo-aa cat split-aa 4175c3ca1eSKyle Evans atf_check -o file:foo-ab cat split-ab 4275c3ca1eSKyle Evans atf_check -o file:foo-ac cat split-ac 4375c3ca1eSKyle Evans 4475c3ca1eSKyle Evans # MAXBSIZE is the default buffer size, so we'll split at just a little 4575c3ca1eSKyle Evans # bit past the buffer size to make sure that it still properly splits 4675c3ca1eSKyle Evans # even when it needs to read again to hit the limit. 4775c3ca1eSKyle Evans bsize=$((MAXBSIZE + 12)) 4875c3ca1eSKyle Evans rm foo-* foo 4975c3ca1eSKyle Evans jot -ns "" -b "a" ${bsize} > foo-aa 5075c3ca1eSKyle Evans jot -ns "" -b "b" ${bsize} > foo-ab 5175c3ca1eSKyle Evans jot -ns "" -b "c" 12 > foo-ac 5275c3ca1eSKyle Evans 5375c3ca1eSKyle Evans cat foo-* > foo 5475c3ca1eSKyle Evans atf_check split -b ${bsize} foo split- 5575c3ca1eSKyle Evans atf_check -o file:foo-aa cat split-aa 5675c3ca1eSKyle Evans atf_check -o file:foo-ab cat split-ab 5775c3ca1eSKyle Evans atf_check -o file:foo-ac cat split-ac 5875c3ca1eSKyle Evans} 5975c3ca1eSKyle Evans 6075c3ca1eSKyle Evansatf_test_case chunks 6175c3ca1eSKyle Evanschunks_body() 6275c3ca1eSKyle Evans{ 6375c3ca1eSKyle Evans jot -ns "" -b "a" 4096 > foo 6475c3ca1eSKyle Evans jot -ns "" -b "b" 4096 >> foo 6575c3ca1eSKyle Evans jot -ns "" -b "c" 4104 >> foo 6675c3ca1eSKyle Evans 6775c3ca1eSKyle Evans chunks=3 6875c3ca1eSKyle Evans jot -ns "" -b "a" 4096 > foo-aa 6975c3ca1eSKyle Evans jot -ns "" -b "b" 2 >> foo-aa 7075c3ca1eSKyle Evans jot -ns "" -b "b" 4094 > foo-ab 7175c3ca1eSKyle Evans jot -ns "" -b "c" 4 >> foo-ab 7275c3ca1eSKyle Evans jot -ns "" -b "c" 4100 > foo-ac 7375c3ca1eSKyle Evans 7475c3ca1eSKyle Evans atf_check split -n ${chunks} foo split- 7575c3ca1eSKyle Evans atf_check -o file:foo-aa cat split-aa 7675c3ca1eSKyle Evans atf_check -o file:foo-ab cat split-ab 7775c3ca1eSKyle Evans atf_check -o file:foo-ac cat split-ac 7875c3ca1eSKyle Evans} 7975c3ca1eSKyle Evans 8075c3ca1eSKyle Evansatf_test_case sensible_lines 8175c3ca1eSKyle Evanssensible_lines_body() 8275c3ca1eSKyle Evans{ 8375c3ca1eSKyle Evans echo "The quick brown fox" > foo-aa 8475c3ca1eSKyle Evans echo "jumps over" > foo-ab 8575c3ca1eSKyle Evans echo "the lazy dog" > foo-ac 8675c3ca1eSKyle Evans 8775c3ca1eSKyle Evans cat foo-* > foo 8875c3ca1eSKyle Evans atf_check split -l 1 foo split- 8975c3ca1eSKyle Evans atf_check -o file:foo-aa cat split-aa 9075c3ca1eSKyle Evans atf_check -o file:foo-ab cat split-ab 9175c3ca1eSKyle Evans atf_check -o file:foo-ac cat split-ac 9275c3ca1eSKyle Evans 9375c3ca1eSKyle Evans # Try again, make sure that `-` uses stdin as documented. 9475c3ca1eSKyle Evans atf_check rm split-* 9575c3ca1eSKyle Evans atf_check -x 'split -l 1 - split- < foo' 9675c3ca1eSKyle Evans atf_check -o file:foo-aa cat split-aa 9775c3ca1eSKyle Evans atf_check -o file:foo-ab cat split-ab 9875c3ca1eSKyle Evans atf_check -o file:foo-ac cat split-ac 9975c3ca1eSKyle Evans 10075c3ca1eSKyle Evans # Finally, try with -l == 2; we should see a 2/1 split instead of the 10175c3ca1eSKyle Evans # previous 1/1/1. 10275c3ca1eSKyle Evans cat foo-aa foo-ab > foo-aa-ng 10375c3ca1eSKyle Evans cat foo-ac > foo-ab-ng 10475c3ca1eSKyle Evans 10575c3ca1eSKyle Evans atf_check rm split-* 10675c3ca1eSKyle Evans atf_check split -l 2 foo split- 10775c3ca1eSKyle Evans 10875c3ca1eSKyle Evans atf_check -o file:foo-aa-ng cat split-aa 10975c3ca1eSKyle Evans atf_check -o file:foo-ab-ng cat split-ab 11075c3ca1eSKyle Evans} 11175c3ca1eSKyle Evans 11275c3ca1eSKyle Evansatf_test_case long_lines 11375c3ca1eSKyle Evanslong_lines_body() 11475c3ca1eSKyle Evans{ 11575c3ca1eSKyle Evans 11675c3ca1eSKyle Evans # Test file lines will be: 11775c3ca1eSKyle Evans # a x MAXBSIZE 11875c3ca1eSKyle Evans # b x MAXBSIZE + c x MAXBSIZE 11975c3ca1eSKyle Evans # d x 1024 12075c3ca1eSKyle Evans # 12175c3ca1eSKyle Evans # The historical split(1) implementation wouldn't grow its internal 12275c3ca1eSKyle Evans # buffer, so we'd end up with 2/3 split- files being wrong with -l 1. 12375c3ca1eSKyle Evans # Notably, split-aa would include most of the first two lines, split-ab 12475c3ca1eSKyle Evans # a tiny fraction of the second line, and split-ac the third line. 12575c3ca1eSKyle Evans # 12675c3ca1eSKyle Evans # Recent split(1) instead grows the buffer until we can either fit the 12775c3ca1eSKyle Evans # line or we run out of memory. 12875c3ca1eSKyle Evans jot -s "" -b "a" ${MAXBSIZE} > foo-aa 12975c3ca1eSKyle Evans jot -ns "" -b "b" ${MAXBSIZE} > foo-ab 13075c3ca1eSKyle Evans jot -s "" -b "c" ${MAXBSIZE} >> foo-ab 13175c3ca1eSKyle Evans jot -s "" -b "d" 1024 > foo-ac 13275c3ca1eSKyle Evans 13375c3ca1eSKyle Evans cat foo-* > foo 13475c3ca1eSKyle Evans atf_check split -l 1 foo split- 13575c3ca1eSKyle Evans 13675c3ca1eSKyle Evans atf_check -o file:foo-aa cat split-aa 13775c3ca1eSKyle Evans atf_check -o file:foo-ab cat split-ab 13875c3ca1eSKyle Evans atf_check -o file:foo-ac cat split-ac 13975c3ca1eSKyle Evans} 14075c3ca1eSKyle Evans 14175c3ca1eSKyle Evansatf_test_case numeric_suffix 14275c3ca1eSKyle Evansnumeric_suffix_body() 14375c3ca1eSKyle Evans{ 14475c3ca1eSKyle Evans echo "The quick brown fox" > foo-00 14575c3ca1eSKyle Evans echo "jumps over" > foo-01 14675c3ca1eSKyle Evans echo "the lazy dog" > foo-02 14775c3ca1eSKyle Evans 14875c3ca1eSKyle Evans cat foo-* > foo 14975c3ca1eSKyle Evans atf_check split -d -l 1 foo split- 15075c3ca1eSKyle Evans 15175c3ca1eSKyle Evans atf_check -o file:foo-00 cat split-00 15275c3ca1eSKyle Evans atf_check -o file:foo-01 cat split-01 15375c3ca1eSKyle Evans atf_check -o file:foo-02 cat split-02 15475c3ca1eSKyle Evans} 15575c3ca1eSKyle Evans 15675c3ca1eSKyle Evansatf_test_case larger_suffix_length 15775c3ca1eSKyle Evanslarger_suffix_length_body() 15875c3ca1eSKyle Evans{ 15975c3ca1eSKyle Evans :> foo 16075c3ca1eSKyle Evans 16175c3ca1eSKyle Evans # Generate foo-000 through foo-009, then foo-010 and foo-011 16275c3ca1eSKyle Evans for i in $(seq -w 0 11); do 16375c3ca1eSKyle Evans len=$((${i##0} + 1)) 16475c3ca1eSKyle Evans file="foo-0${i}" 16575c3ca1eSKyle Evans jot -s "" -b "a" ${len} > ${file} 16675c3ca1eSKyle Evans cat ${file} >> foo 16775c3ca1eSKyle Evans done 16875c3ca1eSKyle Evans 16975c3ca1eSKyle Evans atf_check split -a 3 -d -l 1 foo split- 17075c3ca1eSKyle Evans for i in $(seq -w 0 11); do 17175c3ca1eSKyle Evans srcfile="foo-0${i}" 17275c3ca1eSKyle Evans splitfile="split-0${i}" 17375c3ca1eSKyle Evans atf_check -o file:"${srcfile}" cat "${splitfile}" 17475c3ca1eSKyle Evans done 17575c3ca1eSKyle Evans} 17675c3ca1eSKyle Evans 17775c3ca1eSKyle Evansatf_test_case pattern 17875c3ca1eSKyle Evanspattern_body() 17975c3ca1eSKyle Evans{ 18075c3ca1eSKyle Evans 18175c3ca1eSKyle Evans # Some fake yaml gives us a good realistic use-case for -p, as we can 18275c3ca1eSKyle Evans # split on top-level stanzas. 18375c3ca1eSKyle Evans cat <<EOF > foo-aa 18475c3ca1eSKyle Evanscat: 18575c3ca1eSKyle Evans aa: true 18675c3ca1eSKyle Evans ab: true 18775c3ca1eSKyle Evans ac: true 18875c3ca1eSKyle EvansEOF 18975c3ca1eSKyle Evans cat <<EOF > foo-ab 19075c3ca1eSKyle Evansdog: 19175c3ca1eSKyle Evans ba: true 19275c3ca1eSKyle Evans bb: true 19375c3ca1eSKyle Evans bc: true 19475c3ca1eSKyle EvansEOF 19575c3ca1eSKyle Evans 19675c3ca1eSKyle Evans cat foo-* > foo 19775c3ca1eSKyle Evans 19875c3ca1eSKyle Evans atf_check split -p "^[^[:space:]]+:" foo split- 19975c3ca1eSKyle Evans atf_check -o file:foo-aa cat split-aa 20075c3ca1eSKyle Evans atf_check -o file:foo-ab cat split-ab 20175c3ca1eSKyle Evans} 20275c3ca1eSKyle Evans 203cbea5eddSDag-Erling Smørgravatf_test_case autoextend 204cbea5eddSDag-Erling Smørgravautoextend_body() 205cbea5eddSDag-Erling Smørgrav{ 206cbea5eddSDag-Erling Smørgrav seq $((26*25+1)) >input 207cbea5eddSDag-Erling Smørgrav atf_check split -l1 input 208cbea5eddSDag-Erling Smørgrav atf_check -o inline:"$((26*25))\n" cat xyz 209cbea5eddSDag-Erling Smørgrav atf_check -o inline:"$((26*25+1))\n" cat xzaaa 210cbea5eddSDag-Erling Smørgrav} 211cbea5eddSDag-Erling Smørgrav 212d9d69a6fSDag-Erling Smørgravatf_test_case noautoextend 213d9d69a6fSDag-Erling Smørgravnoautoextend_body() 214d9d69a6fSDag-Erling Smørgrav{ 215d9d69a6fSDag-Erling Smørgrav seq $((26*26)) >input 216d9d69a6fSDag-Erling Smørgrav atf_check split -a2 -l1 input 217d9d69a6fSDag-Erling Smørgrav atf_check -o inline:"$((26*26))\n" cat xzz 218d9d69a6fSDag-Erling Smørgrav} 219d9d69a6fSDag-Erling Smørgrav 220d9d69a6fSDag-Erling Smørgravatf_test_case reautoextend 221d9d69a6fSDag-Erling Smørgravreautoextend_body() 222d9d69a6fSDag-Erling Smørgrav{ 223d9d69a6fSDag-Erling Smørgrav seq $((26*25+1)) >input 224d9d69a6fSDag-Erling Smørgrav atf_check split -a2 -a0 -l1 input 225d9d69a6fSDag-Erling Smørgrav atf_check -o inline:"$((26*25))\n" cat xyz 226d9d69a6fSDag-Erling Smørgrav atf_check -o inline:"$((26*25+1))\n" cat xzaaa 227d9d69a6fSDag-Erling Smørgrav} 228d9d69a6fSDag-Erling Smørgrav 229cbea5eddSDag-Erling Smørgravatf_test_case continue 230cbea5eddSDag-Erling Smørgravcontinue_body() 231cbea5eddSDag-Erling Smørgrav{ 232cbea5eddSDag-Erling Smørgrav echo hello >input 233cbea5eddSDag-Erling Smørgrav atf_check split input 234cbea5eddSDag-Erling Smørgrav atf_check -o file:input cat xaa 235cbea5eddSDag-Erling Smørgrav atf_check -s exit:1 -e ignore cat xab 236cbea5eddSDag-Erling Smørgrav atf_check split -c input 237cbea5eddSDag-Erling Smørgrav atf_check -o file:input cat xab 238cbea5eddSDag-Erling Smørgrav} 239cbea5eddSDag-Erling Smørgrav 240117c54a7SDag-Erling Smørgravatf_test_case undocumented_kludge 241117c54a7SDag-Erling Smørgravundocumented_kludge_body() 242117c54a7SDag-Erling Smørgrav{ 243117c54a7SDag-Erling Smørgrav seq 5000 >input 244117c54a7SDag-Erling Smørgrav atf_check split -1000 input 245117c54a7SDag-Erling Smørgrav atf_check -o file:xae seq 4001 5000 246117c54a7SDag-Erling Smørgrav atf_check split -d1000 input 247117c54a7SDag-Erling Smørgrav atf_check -o file:x04 seq 4001 5000 248117c54a7SDag-Erling Smørgrav} 249117c54a7SDag-Erling Smørgrav 250117c54a7SDag-Erling Smørgravatf_test_case duplicate_linecount 251117c54a7SDag-Erling Smørgravduplicate_linecount_body() 252117c54a7SDag-Erling Smørgrav{ 253117c54a7SDag-Erling Smørgrav atf_check -s exit:64 -e ignore split -5 -5 /dev/null 254117c54a7SDag-Erling Smørgrav atf_check -s exit:64 -e ignore split -l5 -5 /dev/null 255117c54a7SDag-Erling Smørgrav atf_check -s exit:64 -e ignore split -5 -l5 /dev/null 256117c54a7SDag-Erling Smørgrav atf_check -s exit:64 -e ignore split -l5 -l5 /dev/null 257117c54a7SDag-Erling Smørgrav} 258117c54a7SDag-Erling Smørgrav 25975c3ca1eSKyle Evansatf_init_test_cases() 26075c3ca1eSKyle Evans{ 26175c3ca1eSKyle Evans atf_add_test_case bytes 26275c3ca1eSKyle Evans atf_add_test_case chunks 26375c3ca1eSKyle Evans atf_add_test_case sensible_lines 26475c3ca1eSKyle Evans atf_add_test_case long_lines 26575c3ca1eSKyle Evans atf_add_test_case numeric_suffix 26675c3ca1eSKyle Evans atf_add_test_case larger_suffix_length 26775c3ca1eSKyle Evans atf_add_test_case pattern 268cbea5eddSDag-Erling Smørgrav atf_add_test_case autoextend 269d9d69a6fSDag-Erling Smørgrav atf_add_test_case noautoextend 270d9d69a6fSDag-Erling Smørgrav atf_add_test_case reautoextend 271cbea5eddSDag-Erling Smørgrav atf_add_test_case continue 272117c54a7SDag-Erling Smørgrav atf_add_test_case undocumented_kludge 273117c54a7SDag-Erling Smørgrav atf_add_test_case duplicate_linecount 27475c3ca1eSKyle Evans} 275