1// Copyright 2015 Richard Lehane. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package writer
16
17import (
18	"bufio"
19	"encoding/csv"
20	"encoding/hex"
21	"fmt"
22	"io"
23	"path/filepath"
24	"strconv"
25	"strings"
26	"time"
27
28	"github.com/richardlehane/siegfried/pkg/config"
29	"github.com/richardlehane/siegfried/pkg/core"
30)
31
32type Writer interface {
33	Head(path string, scanned, created time.Time, version [3]int, ids [][2]string, fields [][]string, hh string) // 	path := filepath.Base(path)
34	File(name string, sz int64, mod string, checksum []byte, err error, ids []core.Identification)               // if a directory give a negative sz
35	Tail()
36}
37
38func Null() Writer {
39	return null{}
40}
41
42type null struct{}
43
44func (n null) Head(path string, scanned, created time.Time, version [3]int, ids [][2]string, fields [][]string, hh string) {
45}
46func (n null) File(name string, sz int64, mod string, cs []byte, err error, ids []core.Identification) {
47}
48func (n null) Tail() {}
49
50type csvWriter struct {
51	recs  [][]string
52	names []string
53	w     *csv.Writer
54}
55
56func CSV(w io.Writer) Writer {
57	return &csvWriter{w: csv.NewWriter(w)}
58}
59
60func (c *csvWriter) Head(path string, scanned, created time.Time, version [3]int, ids [][2]string, fields [][]string, hh string) {
61	c.names = make([]string, len(fields))
62	l := 4
63	if hh != "" {
64		l++
65	}
66	for i, f := range fields {
67		l += len(f)
68		c.names[i] = f[0]
69	}
70	c.recs = make([][]string, 1)
71	c.recs[0] = make([]string, l)
72	c.recs[0][0], c.recs[0][1], c.recs[0][2], c.recs[0][3] = "filename", "filesize", "modified", "errors"
73	idx := 4
74	if hh != "" {
75		c.recs[0][4] = hh
76		idx++
77	}
78	for _, f := range fields {
79		copy(c.recs[0][idx:], f)
80		idx += len(f)
81	}
82	c.w.Write(c.recs[0])
83}
84
85func (c *csvWriter) File(name string, sz int64, mod string, checksum []byte, err error, ids []core.Identification) {
86	var errStr string
87	if err != nil {
88		errStr = err.Error()
89	}
90	c.recs[0][0], c.recs[0][1], c.recs[0][2], c.recs[0][3] = name, strconv.FormatInt(sz, 10), mod, errStr
91	idx := 4
92	if checksum != nil {
93		c.recs[0][4] = hex.EncodeToString(checksum)
94		idx++
95	}
96	if len(ids) == 0 {
97		empty := make([]string, len(c.recs[0])-idx)
98		if checksum != nil {
99			c.recs[0][4] = ""
100		}
101		copy(c.recs[0][idx:], empty)
102		c.w.Write(c.recs[0])
103		return
104	}
105
106	var thisName string
107	var rowIdx, colIdx, prevLen int
108	colIdx = idx
109	for _, id := range ids {
110		fields := id.Values()
111		if thisName == fields[0] {
112			rowIdx++
113		} else {
114			thisName = fields[0]
115			rowIdx = 0
116			colIdx += prevLen
117			prevLen = len(fields)
118		}
119		if rowIdx >= len(c.recs) {
120			c.recs = append(c.recs, make([]string, len(c.recs[0])))
121			copy(c.recs[rowIdx][:idx], c.recs[0][:idx])
122		}
123		copy(c.recs[rowIdx][colIdx:], fields)
124	}
125	for _, r := range c.recs {
126		c.w.Write(r)
127	}
128	c.recs = c.recs[:1]
129	return
130}
131
132func (c *csvWriter) Tail() { c.w.Flush() }
133
134type yamlWriter struct {
135	replacer *strings.Replacer
136	w        *bufio.Writer
137	hh       string
138	hstrs    []string
139	vals     [][]interface{}
140}
141
142func YAML(w io.Writer) Writer {
143	return &yamlWriter{
144		replacer: strings.NewReplacer("'", "''"),
145		w:        bufio.NewWriter(w),
146	}
147}
148
149func header(fields []string) string {
150	headings := make([]string, len(fields))
151	var max int
152	for _, v := range fields {
153		if v != "namespace" && len(v) > max {
154			max = len(v)
155		}
156	}
157	pad := fmt.Sprintf("%%-%ds", max)
158	for i, v := range fields {
159		if v == "namespace" {
160			v = "ns"
161		}
162		headings[i] = fmt.Sprintf(pad, v)
163	}
164	return "  - " + strings.Join(headings, " : %v\n    ") + " : %v\n"
165}
166
167func (y *yamlWriter) Head(path string, scanned, created time.Time, version [3]int, ids [][2]string, fields [][]string, hh string) {
168	y.hh = hh
169	y.hstrs = make([]string, len(fields))
170	y.vals = make([][]interface{}, len(fields))
171	for i, f := range fields {
172		y.hstrs[i] = header(f)
173		y.vals[i] = make([]interface{}, len(f))
174	}
175	fmt.Fprintf(y.w,
176		"---\nsiegfried   : %d.%d.%d\nscandate    : %v\nsignature   : %s\ncreated     : %v\nidentifiers : \n",
177		version[0], version[1], version[2],
178		scanned.Format(time.RFC3339),
179		y.replacer.Replace(path),
180		created.Format(time.RFC3339))
181	for _, id := range ids {
182		fmt.Fprintf(y.w, "  - name    : '%v'\n    details : '%v'\n", id[0], id[1])
183	}
184}
185
186func (y *yamlWriter) File(name string, sz int64, mod string, checksum []byte, err error, ids []core.Identification) {
187	var (
188		errStr   string
189		h        string
190		thisName string
191		idx      int = -1
192	)
193	if err != nil {
194		errStr = "'" + y.replacer.Replace(err.Error()) + "'"
195	}
196	if checksum != nil {
197		h = fmt.Sprintf("%-8s : %s\n", y.hh, hex.EncodeToString(checksum))
198	}
199	fmt.Fprintf(y.w, "---\nfilename : '%s'\nfilesize : %d\nmodified : %s\nerrors   : %s\n%smatches  :\n", y.replacer.Replace(name), sz, mod, errStr, h)
200	for _, id := range ids {
201		values := id.Values()
202		if values[0] != thisName {
203			idx++
204			thisName = values[0]
205		}
206		for i, v := range values {
207			if v == "" {
208				y.vals[idx][i] = ""
209				continue
210			}
211			y.vals[idx][i] = "'" + y.replacer.Replace(v) + "'"
212		}
213		fmt.Fprintf(y.w, y.hstrs[idx], y.vals[idx]...)
214	}
215}
216
217func (y *yamlWriter) Tail() { y.w.Flush() }
218
219type jsonWriter struct {
220	subs     bool
221	replacer *strings.Replacer
222	w        *bufio.Writer
223	hh       string
224	hstrs    []func([]string) string
225}
226
227func JSON(w io.Writer) Writer {
228	return &jsonWriter{
229		replacer: strings.NewReplacer(`"`, `\"`, `\\`, `\\`, `\`, `\\`),
230		w:        bufio.NewWriter(w),
231	}
232}
233
234func jsonizer(fields []string) func([]string) string {
235	for i, v := range fields {
236		if v == "namespace" {
237			fields[i] = "\"ns\":\""
238			continue
239		}
240		fields[i] = "\"" + v + "\":\""
241	}
242	vals := make([]string, len(fields))
243	return func(values []string) string {
244		for i, v := range values {
245			vals[i] = fields[i] + v
246		}
247		return "{" + strings.Join(vals, "\",") + "\"}"
248	}
249}
250
251func (j *jsonWriter) Head(path string, scanned, created time.Time, version [3]int, ids [][2]string, fields [][]string, hh string) {
252	j.hh = hh
253	j.hstrs = make([]func([]string) string, len(fields))
254	for i, f := range fields {
255		j.hstrs[i] = jsonizer(f)
256	}
257	fmt.Fprintf(j.w,
258		"{\"siegfried\":\"%d.%d.%d\",\"scandate\":\"%v\",\"signature\":\"%s\",\"created\":\"%v\",\"identifiers\":[",
259		version[0], version[1], version[2],
260		scanned.Format(time.RFC3339),
261		path,
262		created.Format(time.RFC3339))
263	for i, id := range ids {
264		if i > 0 {
265			j.w.WriteString(",")
266		}
267		fmt.Fprintf(j.w, "{\"name\":\"%s\",\"details\":\"%s\"}", id[0], id[1])
268	}
269	j.w.WriteString("],\"files\":[")
270}
271
272func (j *jsonWriter) File(name string, sz int64, mod string, checksum []byte, err error, ids []core.Identification) {
273	if j.subs {
274		j.w.WriteString(",")
275	}
276	var (
277		errStr   string
278		h        string
279		thisName string
280		idx      int = -1
281	)
282	if err != nil {
283		errStr = err.Error()
284	}
285	if checksum != nil {
286		h = fmt.Sprintf("\"%s\":\"%s\",", j.hh, hex.EncodeToString(checksum))
287	}
288	fmt.Fprintf(j.w, "{\"filename\":\"%s\",\"filesize\": %d,\"modified\":\"%s\",\"errors\": \"%s\",%s\"matches\": [", j.replacer.Replace(name), sz, mod, errStr, h)
289	for i, id := range ids {
290		if i > 0 {
291			j.w.WriteString(",")
292		}
293		values := id.Values()
294		if values[0] != thisName {
295			idx++
296			thisName = values[0]
297		}
298		j.w.WriteString(j.hstrs[idx](values))
299	}
300	j.w.WriteString("]}")
301	j.subs = true
302	return
303}
304
305func (j *jsonWriter) Tail() {
306	j.w.WriteString("]}\n")
307	j.w.Flush()
308}
309
310type droidWriter struct {
311	id      int
312	parents map[string]parent
313	rec     []string
314	w       *csv.Writer
315}
316
317type parent struct {
318	id      int
319	uri     string
320	archive string
321}
322
323func Droid(w io.Writer) Writer {
324	return &droidWriter{
325		parents: make(map[string]parent),
326		rec:     make([]string, 18),
327		w:       csv.NewWriter(w),
328	}
329}
330
331// "identifier", "id", "format name", "format version", "mimetype", "basis", "warning"
332func (d *droidWriter) Head(path string, scanned, created time.Time, version [3]int, ids [][2]string, fields [][]string, hh string) {
333	if hh == "" {
334		hh = "no"
335	}
336	d.w.Write([]string{
337		"ID", "PARENT_ID", "URI", "FILE_PATH", "NAME",
338		"METHOD", "STATUS", "SIZE", "TYPE", "EXT",
339		"LAST_MODIFIED", "EXTENSION_MISMATCH", strings.ToUpper(hh) + "_HASH", "FORMAT_COUNT",
340		"PUID", "MIME_TYPE", "FORMAT_NAME", "FORMAT_VERSION"})
341}
342
343func (d *droidWriter) File(p string, sz int64, mod string, checksum []byte, err error, ids []core.Identification) {
344	d.id++
345	d.rec[0], d.rec[6], d.rec[10] = strconv.Itoa(d.id), "Done", mod
346	if err != nil {
347		d.rec[6] = err.Error()
348	}
349	d.rec[1], d.rec[2], d.rec[3], d.rec[4], d.rec[9] = d.processPath(p)
350	// if folder (has sz -1) or error
351	if sz < 0 || ids == nil {
352		d.rec[5], d.rec[7], d.rec[12], d.rec[13], d.rec[14], d.rec[15], d.rec[16], d.rec[17] = "", "", "", "", "", "", "", ""
353		if sz < 0 {
354			d.rec[8], d.rec[9], d.rec[11] = "Folder", "", "false"
355			d.parents[d.rec[3]] = parent{d.id, d.rec[2], ""}
356		} else {
357			d.rec[8], d.rec[11] = "", ""
358		}
359		d.rec[3] = clearArchivePath(d.rec[2], d.rec[3])
360		d.w.Write(d.rec)
361		return
362	}
363	// size
364	d.rec[7] = strconv.FormatInt(sz, 10)
365	if checksum == nil {
366		d.rec[12] = ""
367	} else {
368		d.rec[12] = hex.EncodeToString(checksum)
369	}
370	// leave early for unknowns
371	if len(ids) < 1 || !ids[0].Known() {
372		d.rec[5], d.rec[8], d.rec[11], d.rec[13] = "", "File", "FALSE", "0"
373		d.rec[14], d.rec[15], d.rec[16], d.rec[17] = "", "", "", ""
374		d.rec[3] = clearArchivePath(d.rec[2], d.rec[3])
375		d.w.Write(d.rec)
376		return
377	}
378	d.rec[13] = strconv.Itoa(len(ids))
379	for _, id := range ids {
380		if id.Archive() > config.None {
381			d.rec[8] = "Container"
382			d.parents[d.rec[3]] = parent{d.id, d.rec[2], id.Archive().String()}
383		} else {
384			d.rec[8] = "File"
385		}
386		fields := id.Values()
387		d.rec[5], d.rec[11] = getMethod(fields[5]), mismatch(fields[6])
388		d.rec[14], d.rec[15], d.rec[16], d.rec[17] = fields[1], fields[4], fields[2], fields[3]
389		d.rec[3] = clearArchivePath(d.rec[2], d.rec[3])
390		d.w.Write(d.rec)
391	}
392	return
393}
394
395func (d *droidWriter) Tail() { d.w.Flush() }
396
397func (d *droidWriter) processPath(p string) (parent, uri, path, name, ext string) {
398	path, _ = filepath.Abs(p)
399	path = strings.TrimSuffix(path, string(filepath.Separator))
400	name = filepath.Base(path)
401	dir := filepath.Dir(path)
402	par, ok := d.parents[dir]
403	if ok {
404		parent = strconv.Itoa(par.id)
405		uri = toUri(par.uri, par.archive, escape(name))
406	} else {
407		puri := "file:/" + escape(filepath.ToSlash(dir))
408		uri = toUri(puri, "", escape(name))
409	}
410	ext = strings.TrimPrefix(filepath.Ext(p), ".")
411	return
412}
413
414func toUri(parenturi, parentarc, base string) string {
415	if len(parentarc) > 0 {
416		parenturi = parentarc + ":" + parenturi + "!"
417	}
418	return parenturi + "/" + base
419}
420
421// uri escaping adapted from https://golang.org/src/net/url/url.go
422func shouldEscape(c byte) bool {
423	if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
424		return false
425	}
426	switch c {
427	case '-', '_', '.', '~', '/', ':':
428		return false
429	}
430	return true
431}
432
433func escape(s string) string {
434	var hexCount int
435	for i := 0; i < len(s); i++ {
436		c := s[i]
437		if shouldEscape(c) {
438			hexCount++
439		}
440	}
441	if hexCount == 0 {
442		return s
443	}
444	t := make([]byte, len(s)+2*hexCount)
445	j := 0
446	for i := 0; i < len(s); i++ {
447		if c := s[i]; shouldEscape(c) {
448			t[j] = '%'
449			t[j+1] = "0123456789ABCDEF"[c>>4]
450			t[j+2] = "0123456789ABCDEF"[c&15]
451			j += 3
452		} else {
453			t[j] = s[i]
454			j++
455		}
456	}
457	return string(t)
458}
459
460func clearArchivePath(uri, path string) string {
461	if strings.HasPrefix(uri, config.Zip.String()) ||
462		strings.HasPrefix(uri, config.Tar.String()) ||
463		strings.HasPrefix(uri, config.Gzip.String()) {
464		path = ""
465	}
466	return path
467}
468
469func getMethod(basis string) string {
470	switch {
471	case strings.Contains(basis, "container"):
472		return "Container"
473	case strings.Contains(basis, "byte"):
474		return "Signature"
475	case strings.Contains(basis, "extension"):
476		return "Extension"
477	case strings.Contains(basis, "text"):
478		return "Text"
479	}
480	return ""
481}
482
483func mismatch(warning string) string {
484	if strings.Contains(warning, "extension mismatch") {
485		return "TRUE"
486	}
487	return "FALSE"
488}
489