1#!/usr/bin/env bash
2
3# Purpose: plain text tar format
4# Limitations: - only suitable for text files, directories, and symlinks
5#              - stores only filename, content, and mode
6#              - not designed for untrusted input
7#
8# Note: must work with bash version 3.2 (macOS)
9
10# Copyright 2017 Roger Luethi
11#
12# Licensed under the Apache License, Version 2.0 (the "License");
13# you may not use this file except in compliance with the License.
14# You may obtain a copy of the License at
15#
16# http://www.apache.org/licenses/LICENSE-2.0
17#
18# Unless required by applicable law or agreed to in writing, software
19# distributed under the License is distributed on an "AS IS" BASIS,
20# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21# See the License for the specific language governing permissions and
22# limitations under the License.
23
24set -o errexit -o nounset
25
26# Sanitize environment (for instance, standard sorting of glob matches)
27export LC_ALL=C
28
29path=""
30CMD=""
31ARG_STRING="$*"
32
33#------------------------------------------------------------------------------
34# Not all sed implementations can work on null bytes. In order to make ttar
35# work out of the box on macOS, use Python as a stream editor.
36
37USE_PYTHON=0
38
39PYTHON_CREATE_FILTER=$(cat << 'PCF'
40#!/usr/bin/env python
41
42import re
43import sys
44
45for line in sys.stdin:
46    line = re.sub(r'EOF', r'\EOF', line)
47    line = re.sub(r'NULLBYTE', r'\NULLBYTE', line)
48    line = re.sub('\x00', r'NULLBYTE', line)
49    sys.stdout.write(line)
50PCF
51)
52
53PYTHON_EXTRACT_FILTER=$(cat << 'PEF'
54#!/usr/bin/env python
55
56import re
57import sys
58
59for line in sys.stdin:
60    line = re.sub(r'(?<!\\)NULLBYTE', '\x00', line)
61    line = re.sub(r'\\NULLBYTE', 'NULLBYTE', line)
62    line = re.sub(r'([^\\])EOF', r'\1', line)
63    line = re.sub(r'\\EOF', 'EOF', line)
64    sys.stdout.write(line)
65PEF
66)
67
68function test_environment {
69    if [[ "$(echo "a" | sed 's/a/\x0/' | wc -c)" -ne 2 ]]; then
70        echo "WARNING sed unable to handle null bytes, using Python (slow)."
71        if ! which python >/dev/null; then
72            echo "ERROR Python not found. Aborting."
73            exit 2
74        fi
75        USE_PYTHON=1
76    fi
77}
78
79#------------------------------------------------------------------------------
80
81function usage {
82    bname=$(basename "$0")
83    cat << USAGE
84Usage:   $bname [-C <DIR>] -c -f <ARCHIVE> <FILE...> (create archive)
85         $bname            -t -f <ARCHIVE>           (list archive contents)
86         $bname [-C <DIR>] -x -f <ARCHIVE>           (extract archive)
87
88Options:
89         -C <DIR>           (change directory)
90         -v                 (verbose)
91         --recursive-unlink (recursively delete existing directory if path
92                             collides with file or directory to extract)
93
94Example: Change to sysfs directory, create ttar file from fixtures directory
95         $bname -C sysfs -c -f sysfs/fixtures.ttar fixtures/
96USAGE
97exit "$1"
98}
99
100function vecho {
101    if [ "${VERBOSE:-}" == "yes" ]; then
102        echo >&7 "$@"
103    fi
104}
105
106function set_cmd {
107    if [ -n "$CMD" ]; then
108        echo "ERROR: more than one command given"
109        echo
110        usage 2
111    fi
112    CMD=$1
113}
114
115unset VERBOSE
116unset RECURSIVE_UNLINK
117
118while getopts :cf:-:htxvC: opt; do
119    case $opt in
120        c)
121            set_cmd "create"
122            ;;
123        f)
124            ARCHIVE=$OPTARG
125            ;;
126        h)
127            usage 0
128            ;;
129        t)
130            set_cmd "list"
131            ;;
132        x)
133            set_cmd "extract"
134            ;;
135        v)
136            VERBOSE=yes
137            exec 7>&1
138            ;;
139        C)
140            CDIR=$OPTARG
141            ;;
142        -)
143            case $OPTARG in
144                recursive-unlink)
145                    RECURSIVE_UNLINK="yes"
146                    ;;
147                *)
148                    echo -e "Error: invalid option -$OPTARG"
149                    echo
150                    usage 1
151                    ;;
152            esac
153            ;;
154        *)
155            echo >&2 "ERROR: invalid option -$OPTARG"
156            echo
157            usage 1
158            ;;
159    esac
160done
161
162# Remove processed options from arguments
163shift $(( OPTIND - 1 ));
164
165if [ "${CMD:-}" == "" ]; then
166    echo >&2 "ERROR: no command given"
167    echo
168    usage 1
169elif [ "${ARCHIVE:-}" == "" ]; then
170    echo >&2 "ERROR: no archive name given"
171    echo
172    usage 1
173fi
174
175function list {
176    local path=""
177    local size=0
178    local line_no=0
179    local ttar_file=$1
180    if [ -n "${2:-}" ]; then
181        echo >&2 "ERROR: too many arguments."
182        echo
183        usage 1
184    fi
185    if [ ! -e "$ttar_file" ]; then
186        echo >&2 "ERROR: file not found ($ttar_file)"
187        echo
188        usage 1
189    fi
190    while read -r line; do
191        line_no=$(( line_no + 1 ))
192        if [ $size -gt 0 ]; then
193            size=$(( size - 1 ))
194            continue
195        fi
196        if [[ $line =~ ^Path:\ (.*)$ ]]; then
197            path=${BASH_REMATCH[1]}
198        elif [[ $line =~ ^Lines:\ (.*)$ ]]; then
199            size=${BASH_REMATCH[1]}
200            echo "$path"
201        elif [[ $line =~ ^Directory:\ (.*)$ ]]; then
202            path=${BASH_REMATCH[1]}
203            echo "$path/"
204        elif [[ $line =~ ^SymlinkTo:\ (.*)$ ]]; then
205            echo  "$path -> ${BASH_REMATCH[1]}"
206        fi
207    done < "$ttar_file"
208}
209
210function extract {
211    local path=""
212    local size=0
213    local line_no=0
214    local ttar_file=$1
215    if [ -n "${2:-}" ]; then
216        echo >&2 "ERROR: too many arguments."
217        echo
218        usage 1
219    fi
220    if [ ! -e "$ttar_file" ]; then
221        echo >&2 "ERROR: file not found ($ttar_file)"
222        echo
223        usage 1
224    fi
225    while IFS= read -r line; do
226        line_no=$(( line_no + 1 ))
227        local eof_without_newline
228        if [ "$size" -gt 0 ]; then
229            if [[ "$line" =~ [^\\]EOF ]]; then
230                # An EOF not preceded by a backslash indicates that the line
231                # does not end with a newline
232                eof_without_newline=1
233            else
234                eof_without_newline=0
235            fi
236            # Replace NULLBYTE with null byte if at beginning of line
237            # Replace NULLBYTE with null byte unless preceded by backslash
238            # Remove one backslash in front of NULLBYTE (if any)
239            # Remove EOF unless preceded by backslash
240            # Remove one backslash in front of EOF
241            if [ $USE_PYTHON -eq 1 ]; then
242                echo -n "$line" | python -c "$PYTHON_EXTRACT_FILTER" >> "$path"
243            else
244                # The repeated pattern makes up for sed's lack of negative
245                # lookbehind assertions (for consecutive null bytes).
246                echo -n "$line" | \
247                    sed -e 's/^NULLBYTE/\x0/g;
248                            s/\([^\\]\)NULLBYTE/\1\x0/g;
249                            s/\([^\\]\)NULLBYTE/\1\x0/g;
250                            s/\\NULLBYTE/NULLBYTE/g;
251                            s/\([^\\]\)EOF/\1/g;
252                            s/\\EOF/EOF/g;
253                    ' >> "$path"
254            fi
255            if [[ "$eof_without_newline" -eq 0 ]]; then
256                echo >> "$path"
257            fi
258            size=$(( size - 1 ))
259            continue
260        fi
261        if [[ $line =~ ^Path:\ (.*)$ ]]; then
262            path=${BASH_REMATCH[1]}
263            if [ -L "$path" ]; then
264                rm "$path"
265            elif [ -d "$path" ]; then
266                if [ "${RECURSIVE_UNLINK:-}" == "yes" ]; then
267                    rm -r "$path"
268                else
269                    # Safe because symlinks to directories are dealt with above
270                    rmdir "$path"
271                fi
272            elif [ -e "$path" ]; then
273                rm "$path"
274            fi
275        elif [[ $line =~ ^Lines:\ (.*)$ ]]; then
276            size=${BASH_REMATCH[1]}
277            # Create file even if it is zero-length.
278            touch "$path"
279            vecho "    $path"
280        elif [[ $line =~ ^Mode:\ (.*)$ ]]; then
281            mode=${BASH_REMATCH[1]}
282            chmod "$mode" "$path"
283            vecho "$mode"
284        elif [[ $line =~ ^Directory:\ (.*)$ ]]; then
285            path=${BASH_REMATCH[1]}
286            mkdir -p "$path"
287            vecho "    $path/"
288        elif [[ $line =~ ^SymlinkTo:\ (.*)$ ]]; then
289            ln -s "${BASH_REMATCH[1]}" "$path"
290            vecho "    $path -> ${BASH_REMATCH[1]}"
291        elif [[ $line =~ ^# ]]; then
292            # Ignore comments between files
293            continue
294        else
295            echo >&2 "ERROR: Unknown keyword on line $line_no: $line"
296            exit 1
297        fi
298    done < "$ttar_file"
299}
300
301function div {
302    echo "# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -" \
303         "- - - - - -"
304}
305
306function get_mode {
307    local mfile=$1
308    if [ -z "${STAT_OPTION:-}" ]; then
309        if stat -c '%a' "$mfile" >/dev/null 2>&1; then
310            # GNU stat
311            STAT_OPTION='-c'
312            STAT_FORMAT='%a'
313        else
314            # BSD stat
315            STAT_OPTION='-f'
316            # Octal output, user/group/other (omit file type, sticky bit)
317            STAT_FORMAT='%OLp'
318        fi
319    fi
320    stat "${STAT_OPTION}" "${STAT_FORMAT}" "$mfile"
321}
322
323function _create {
324    shopt -s nullglob
325    local mode
326    local eof_without_newline
327    while (( "$#" )); do
328        file=$1
329        if [ -L "$file" ]; then
330            echo "Path: $file"
331            symlinkTo=$(readlink "$file")
332            echo "SymlinkTo: $symlinkTo"
333            vecho "    $file -> $symlinkTo"
334            div
335        elif [ -d "$file" ]; then
336            # Strip trailing slash (if there is one)
337            file=${file%/}
338            echo "Directory: $file"
339            mode=$(get_mode "$file")
340            echo "Mode: $mode"
341            vecho "$mode $file/"
342            div
343            # Find all files and dirs, including hidden/dot files
344            for x in "$file/"{*,.[^.]*}; do
345                _create "$x"
346            done
347        elif [ -f "$file" ]; then
348            echo "Path: $file"
349            lines=$(wc -l "$file"|awk '{print $1}')
350            eof_without_newline=0
351            if [[ "$(wc -c "$file"|awk '{print $1}')" -gt 0 ]] && \
352                    [[ "$(tail -c 1 "$file" | wc -l)" -eq 0 ]]; then
353                eof_without_newline=1
354                lines=$((lines+1))
355            fi
356            echo "Lines: $lines"
357            # Add backslash in front of EOF
358            # Add backslash in front of NULLBYTE
359            # Replace null byte with NULLBYTE
360            if [ $USE_PYTHON -eq 1 ]; then
361                < "$file" python -c "$PYTHON_CREATE_FILTER"
362            else
363                < "$file" \
364                    sed 's/EOF/\\EOF/g;
365                            s/NULLBYTE/\\NULLBYTE/g;
366                            s/\x0/NULLBYTE/g;
367                    '
368            fi
369            if [[ "$eof_without_newline" -eq 1 ]]; then
370                # Finish line with EOF to indicate that the original line did
371                # not end with a linefeed
372                echo "EOF"
373            fi
374            mode=$(get_mode "$file")
375            echo "Mode: $mode"
376            vecho "$mode $file"
377            div
378        else
379            echo >&2 "ERROR: file not found ($file in $(pwd))"
380            exit 2
381        fi
382        shift
383    done
384}
385
386function create {
387    ttar_file=$1
388    shift
389    if [ -z "${1:-}" ]; then
390        echo >&2 "ERROR: missing arguments."
391        echo
392        usage 1
393    fi
394    if [ -e "$ttar_file" ]; then
395        rm "$ttar_file"
396    fi
397    exec > "$ttar_file"
398    echo "# Archive created by ttar $ARG_STRING"
399    _create "$@"
400}
401
402test_environment
403
404if [ -n "${CDIR:-}" ]; then
405    if [[ "$ARCHIVE" != /* ]]; then
406        # Relative path: preserve the archive's location before changing
407        # directory
408        ARCHIVE="$(pwd)/$ARCHIVE"
409    fi
410    cd "$CDIR"
411fi
412
413"$CMD" "$ARCHIVE" "$@"
414