1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package txtar implements a trivial text-based file archive format.
6//
7// The goals for the format are:
8//
9//	- be trivial enough to create and edit by hand.
10//	- be able to store trees of text files describing go command test cases.
11//	- diff nicely in git history and code reviews.
12//
13// Non-goals include being a completely general archive format,
14// storing binary data, storing file modes, storing special files like
15// symbolic links, and so on.
16//
17// Txtar format
18//
19// A txtar archive is zero or more comment lines and then a sequence of file entries.
20// Each file entry begins with a file marker line of the form "-- FILENAME --"
21// and is followed by zero or more file content lines making up the file data.
22// The comment or file content ends at the next file marker line.
23// The file marker line must begin with the three-byte sequence "-- "
24// and end with the three-byte sequence " --", but the enclosed
25// file name can be surrounding by additional white space,
26// all of which is stripped.
27//
28// If the txtar file is missing a trailing newline on the final line,
29// parsers should consider a final newline to be present anyway.
30//
31// There are no possible syntax errors in a txtar archive.
32package txtar
33
34import (
35	"bytes"
36	"fmt"
37	"io/ioutil"
38	"strings"
39)
40
41// An Archive is a collection of files.
42type Archive struct {
43	Comment []byte
44	Files   []File
45}
46
47// A File is a single file in an archive.
48type File struct {
49	Name string // name of file ("foo/bar.txt")
50	Data []byte // text content of file
51}
52
53// Format returns the serialized form of an Archive.
54// It is assumed that the Archive data structure is well-formed:
55// a.Comment and all a.File[i].Data contain no file marker lines,
56// and all a.File[i].Name is non-empty.
57func Format(a *Archive) []byte {
58	var buf bytes.Buffer
59	buf.Write(fixNL(a.Comment))
60	for _, f := range a.Files {
61		fmt.Fprintf(&buf, "-- %s --\n", f.Name)
62		buf.Write(fixNL(f.Data))
63	}
64	return buf.Bytes()
65}
66
67// ParseFile parses the named file as an archive.
68func ParseFile(file string) (*Archive, error) {
69	data, err := ioutil.ReadFile(file)
70	if err != nil {
71		return nil, err
72	}
73	return Parse(data), nil
74}
75
76// Parse parses the serialized form of an Archive.
77// The returned Archive holds slices of data.
78func Parse(data []byte) *Archive {
79	a := new(Archive)
80	var name string
81	a.Comment, name, data = findFileMarker(data)
82	for name != "" {
83		f := File{name, nil}
84		f.Data, name, data = findFileMarker(data)
85		a.Files = append(a.Files, f)
86	}
87	return a
88}
89
90var (
91	newlineMarker = []byte("\n-- ")
92	marker        = []byte("-- ")
93	markerEnd     = []byte(" --")
94)
95
96// findFileMarker finds the next file marker in data,
97// extracts the file name, and returns the data before the marker,
98// the file name, and the data after the marker.
99// If there is no next marker, findFileMarker returns before = fixNL(data), name = "", after = nil.
100func findFileMarker(data []byte) (before []byte, name string, after []byte) {
101	var i int
102	for {
103		if name, after = isMarker(data[i:]); name != "" {
104			return data[:i], name, after
105		}
106		j := bytes.Index(data[i:], newlineMarker)
107		if j < 0 {
108			return fixNL(data), "", nil
109		}
110		i += j + 1 // positioned at start of new possible marker
111	}
112}
113
114// isMarker checks whether data begins with a file marker line.
115// If so, it returns the name from the line and the data after the line.
116// Otherwise it returns name == "" with an unspecified after.
117func isMarker(data []byte) (name string, after []byte) {
118	if !bytes.HasPrefix(data, marker) {
119		return "", nil
120	}
121	if i := bytes.IndexByte(data, '\n'); i >= 0 {
122		data, after = data[:i], data[i+1:]
123	}
124	if !bytes.HasSuffix(data, markerEnd) {
125		return "", nil
126	}
127	return strings.TrimSpace(string(data[len(marker) : len(data)-len(markerEnd)])), after
128}
129
130// If data is empty or ends in \n, fixNL returns data.
131// Otherwise fixNL returns a new slice consisting of data with a final \n added.
132func fixNL(data []byte) []byte {
133	if len(data) == 0 || data[len(data)-1] == '\n' {
134		return data
135	}
136	d := make([]byte, len(data)+1)
137	copy(d, data)
138	d[len(data)] = '\n'
139	return d
140}
141