1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file is mostly a copy of the go standard library text/tabwriter. With
6// the additional stripping of ansi control characters for width calculations.
7
8// Package tabwriter implements a write filter (tabwriter.Writer) that
9// translates tabbed columns in input into properly aligned text.
10//
11// The package is using the Elastic Tabstops algorithm described at
12// http://nickgravgaard.com/elastictabstops/index.html.
13//
14package tabwriter
15
16import (
17	"bytes"
18	"io"
19	"unicode/utf8"
20
21	"github.com/lunixbochs/vtclean"
22)
23
24// ----------------------------------------------------------------------------
25// Filter implementation
26
27// A cell represents a segment of text terminated by tabs or line breaks.
28// The text itself is stored in a separate buffer; cell only describes the
29// segment's size in bytes, its width in runes, and whether it's an htab
30// ('\t') terminated cell.
31//
32type cell struct {
33	size  int  // cell size in bytes
34	width int  // cell width in runes
35	htab  bool // true if the cell is terminated by an htab ('\t')
36}
37
38// A Writer is a filter that inserts padding around tab-delimited
39// columns in its input to align them in the output.
40//
41// The Writer treats incoming bytes as UTF-8 encoded text consisting
42// of cells terminated by (horizontal or vertical) tabs or line
43// breaks (newline or formfeed characters). Cells in adjacent lines
44// constitute a column. The Writer inserts padding as needed to
45// make all cells in a column have the same width, effectively
46// aligning the columns. It assumes that all characters have the
47// same width except for tabs for which a tabwidth must be specified.
48// Note that cells are tab-terminated, not tab-separated: trailing
49// non-tab text at the end of a line does not form a column cell.
50//
51// The Writer assumes that all Unicode code points have the same width;
52// this may not be true in some fonts.
53//
54// If DiscardEmptyColumns is set, empty columns that are terminated
55// entirely by vertical (or "soft") tabs are discarded. Columns
56// terminated by horizontal (or "hard") tabs are not affected by
57// this flag.
58//
59// If a Writer is configured to filter HTML, HTML tags and entities
60// are passed through. The widths of tags and entities are
61// assumed to be zero (tags) and one (entities) for formatting purposes.
62//
63// A segment of text may be escaped by bracketing it with Escape
64// characters. The tabwriter passes escaped text segments through
65// unchanged. In particular, it does not interpret any tabs or line
66// breaks within the segment. If the StripEscape flag is set, the
67// Escape characters are stripped from the output; otherwise they
68// are passed through as well. For the purpose of formatting, the
69// width of the escaped text is always computed excluding the Escape
70// characters.
71//
72// The formfeed character ('\f') acts like a newline but it also
73// terminates all columns in the current line (effectively calling
74// Flush). Cells in the next line start new columns. Unless found
75// inside an HTML tag or inside an escaped text segment, formfeed
76// characters appear as newlines in the output.
77//
78// The Writer must buffer input internally, because proper spacing
79// of one line may depend on the cells in future lines. Clients must
80// call Flush when done calling Write.
81//
82type Writer struct {
83	// configuration
84	output   io.Writer
85	minwidth int
86	tabwidth int
87	padding  int
88	padbytes [8]byte
89	flags    uint
90
91	// current state
92	buf       bytes.Buffer // collected text excluding tabs or line breaks
93	pos       int          // buffer position up to which cell.width of incomplete cell has been computed
94	cell      cell         // current incomplete cell; cell.width is up to buf[pos] excluding ignored sections
95	endChar   byte         // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
96	lines     [][]cell     // list of lines; each line is a list of cells
97	widths    []int        // list of column widths in runes - re-used during formatting
98	alignment map[int]uint // column alignment
99}
100
101func (b *Writer) addLine() { b.lines = append(b.lines, []cell{}) }
102
103// Reset the current state.
104func (b *Writer) reset() {
105	b.buf.Reset()
106	b.pos = 0
107	b.cell = cell{}
108	b.endChar = 0
109	b.lines = b.lines[0:0]
110	b.widths = b.widths[0:0]
111	b.alignment = make(map[int]uint)
112	b.addLine()
113}
114
115// Internal representation (current state):
116//
117// - all text written is appended to buf; tabs and line breaks are stripped away
118// - at any given time there is a (possibly empty) incomplete cell at the end
119//   (the cell starts after a tab or line break)
120// - cell.size is the number of bytes belonging to the cell so far
121// - cell.width is text width in runes of that cell from the start of the cell to
122//   position pos; html tags and entities are excluded from this width if html
123//   filtering is enabled
124// - the sizes and widths of processed text are kept in the lines list
125//   which contains a list of cells for each line
126// - the widths list is a temporary list with current widths used during
127//   formatting; it is kept in Writer because it's re-used
128//
129//                    |<---------- size ---------->|
130//                    |                            |
131//                    |<- width ->|<- ignored ->|  |
132//                    |           |             |  |
133// [---processed---tab------------<tag>...</tag>...]
134// ^                  ^                         ^
135// |                  |                         |
136// buf                start of incomplete cell  pos
137
138// Formatting can be controlled with these flags.
139const (
140	// Ignore html tags and treat entities (starting with '&'
141	// and ending in ';') as single characters (width = 1).
142	FilterHTML uint = 1 << iota
143
144	// Strip Escape characters bracketing escaped text segments
145	// instead of passing them through unchanged with the text.
146	StripEscape
147
148	// Force right-alignment of cell content.
149	// Default is left-alignment.
150	AlignRight
151
152	// Handle empty columns as if they were not present in
153	// the input in the first place.
154	DiscardEmptyColumns
155
156	// Always use tabs for indentation columns (i.e., padding of
157	// leading empty cells on the left) independent of padchar.
158	TabIndent
159
160	// Print a vertical bar ('|') between columns (after formatting).
161	// Discarded columns appear as zero-width columns ("||").
162	Debug
163)
164
165// A Writer must be initialized with a call to Init. The first parameter (output)
166// specifies the filter output. The remaining parameters control the formatting:
167//
168//	minwidth	minimal cell width including any padding
169//	tabwidth	width of tab characters (equivalent number of spaces)
170//	padding		padding added to a cell before computing its width
171//	padchar		ASCII char used for padding
172//			if padchar == '\t', the Writer will assume that the
173//			width of a '\t' in the formatted output is tabwidth,
174//			and cells are left-aligned independent of align_left
175//			(for correct-looking results, tabwidth must correspond
176//			to the tab width in the viewer displaying the result)
177//	flags		formatting control
178//
179func (b *Writer) Init(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
180	if minwidth < 0 || tabwidth < 0 || padding < 0 {
181		panic("negative minwidth, tabwidth, or padding")
182	}
183	b.output = output
184	b.minwidth = minwidth
185	b.tabwidth = tabwidth
186	b.padding = padding
187	for i := range b.padbytes {
188		b.padbytes[i] = padchar
189	}
190	if padchar == '\t' {
191		// tab padding enforces left-alignment
192		flags &^= AlignRight
193	}
194	b.flags = flags
195
196	b.reset()
197
198	return b
199}
200
201// debugging support (keep code around)
202func (b *Writer) dump() {
203	pos := 0
204	for i, line := range b.lines {
205		print("(", i, ") ")
206		for _, c := range line {
207			print("[", string(b.buf.Bytes()[pos:pos+c.size]), "]")
208			pos += c.size
209		}
210		print("\n")
211	}
212	print("\n")
213}
214
215// local error wrapper so we can distinguish errors we want to return
216// as errors from genuine panics (which we don't want to return as errors)
217type osError struct {
218	err error
219}
220
221func (b *Writer) write0(buf []byte) {
222	n, err := b.output.Write(buf)
223	if n != len(buf) && err == nil {
224		err = io.ErrShortWrite
225	}
226	if err != nil {
227		panic(osError{err})
228	}
229}
230
231func (b *Writer) writeN(src []byte, n int) {
232	for n > len(src) {
233		b.write0(src)
234		n -= len(src)
235	}
236	b.write0(src[0:n])
237}
238
239var (
240	newline = []byte{'\n'}
241	tabs    = []byte("\t\t\t\t\t\t\t\t")
242)
243
244func (b *Writer) writePadding(textw, cellw int, useTabs bool) {
245	if b.padbytes[0] == '\t' || useTabs {
246		// padding is done with tabs
247		if b.tabwidth == 0 {
248			return // tabs have no width - can't do any padding
249		}
250		// make cellw the smallest multiple of b.tabwidth
251		cellw = (cellw + b.tabwidth - 1) / b.tabwidth * b.tabwidth
252		n := cellw - textw // amount of padding
253		if n < 0 {
254			panic("internal error")
255		}
256		b.writeN(tabs, (n+b.tabwidth-1)/b.tabwidth)
257		return
258	}
259
260	// padding is done with non-tab characters
261	b.writeN(b.padbytes[0:], cellw-textw)
262}
263
264var vbar = []byte{'|'}
265
266func (b *Writer) writeLines(pos0 int, line0, line1 int) (pos int) {
267	pos = pos0
268	for i := line0; i < line1; i++ {
269		line := b.lines[i]
270
271		// if TabIndent is set, use tabs to pad leading empty cells
272		useTabs := b.flags&TabIndent != 0
273
274		for j, c := range line {
275			if j > 0 && b.flags&Debug != 0 {
276				// indicate column break
277				b.write0(vbar)
278			}
279
280			if c.size == 0 {
281				// empty cell
282				if j < len(b.widths) {
283					b.writePadding(c.width, b.widths[j], useTabs)
284				}
285			} else {
286				// non-empty cell
287				useTabs = false
288				alignColumnRight := b.alignment[j] == AlignRight
289				if (b.flags&AlignRight == 0) && !alignColumnRight { // align left
290					b.write0(b.buf.Bytes()[pos : pos+c.size])
291					pos += c.size
292					if j < len(b.widths) {
293						b.writePadding(c.width, b.widths[j], false)
294					}
295				} else if alignColumnRight && j < len(b.widths) {
296					// just this column
297					internalSize := b.widths[j] - b.padding
298					if j < len(b.widths) {
299						b.writePadding(c.width, internalSize, false)
300					}
301					b.write0(b.buf.Bytes()[pos : pos+c.size])
302					if b.padding > 0 {
303						b.writePadding(0, b.padding, false)
304					}
305					pos += c.size
306				} else { // align right
307					if j < len(b.widths) {
308						b.writePadding(c.width, b.widths[j], false)
309					}
310					b.write0(b.buf.Bytes()[pos : pos+c.size])
311					pos += c.size
312				}
313			}
314		}
315
316		if i+1 == len(b.lines) {
317			// last buffered line - we don't have a newline, so just write
318			// any outstanding buffered data
319			b.write0(b.buf.Bytes()[pos : pos+b.cell.size])
320			pos += b.cell.size
321		} else {
322			// not the last line - write newline
323			b.write0(newline)
324		}
325	}
326	return
327}
328
329// Format the text between line0 and line1 (excluding line1); pos
330// is the buffer position corresponding to the beginning of line0.
331// Returns the buffer position corresponding to the beginning of
332// line1 and an error, if any.
333//
334func (b *Writer) format(pos0 int, line0, line1 int) (pos int) {
335	pos = pos0
336	column := len(b.widths)
337	for this := line0; this < line1; this++ {
338		line := b.lines[this]
339
340		if column < len(line)-1 {
341			// cell exists in this column => this line
342			// has more cells than the previous line
343			// (the last cell per line is ignored because cells are
344			// tab-terminated; the last cell per line describes the
345			// text before the newline/formfeed and does not belong
346			// to a column)
347
348			// print unprinted lines until beginning of block
349			pos = b.writeLines(pos, line0, this)
350			line0 = this
351
352			// column block begin
353			width := b.minwidth // minimal column width
354			discardable := true // true if all cells in this column are empty and "soft"
355			for ; this < line1; this++ {
356				line = b.lines[this]
357				if column < len(line)-1 {
358					// cell exists in this column
359					c := line[column]
360					// update width
361					if w := c.width + b.padding; w > width {
362						width = w
363					}
364					// update discardable
365					if c.width > 0 || c.htab {
366						discardable = false
367					}
368				} else {
369					break
370				}
371			}
372			// column block end
373
374			// discard empty columns if necessary
375			if discardable && b.flags&DiscardEmptyColumns != 0 {
376				width = 0
377			}
378
379			// format and print all columns to the right of this column
380			// (we know the widths of this column and all columns to the left)
381			b.widths = append(b.widths, width) // push width
382			pos = b.format(pos, line0, this)
383			b.widths = b.widths[0 : len(b.widths)-1] // pop width
384			line0 = this
385		}
386	}
387
388	// print unprinted lines until end
389	return b.writeLines(pos, line0, line1)
390}
391
392// Append text to current cell.
393func (b *Writer) append(text []byte) {
394	b.buf.Write(text)
395	b.cell.size += len(text)
396}
397
398// Update the cell width.
399func (b *Writer) updateWidth() {
400	// ---- Changes here -----
401	newChars := b.buf.Bytes()[b.pos:b.buf.Len()]
402	cleaned := vtclean.Clean(string(newChars), false) // false to strip colors
403	b.cell.width += utf8.RuneCount([]byte(cleaned))
404	// --- end of changes ----
405	b.pos = b.buf.Len()
406}
407
408// To escape a text segment, bracket it with Escape characters.
409// For instance, the tab in this string "Ignore this tab: \xff\t\xff"
410// does not terminate a cell and constitutes a single character of
411// width one for formatting purposes.
412//
413// The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
414//
415const Escape = '\xff'
416
417// Start escaped mode.
418func (b *Writer) startEscape(ch byte) {
419	switch ch {
420	case Escape:
421		b.endChar = Escape
422	case '<':
423		b.endChar = '>'
424	case '&':
425		b.endChar = ';'
426	}
427}
428
429// Terminate escaped mode. If the escaped text was an HTML tag, its width
430// is assumed to be zero for formatting purposes; if it was an HTML entity,
431// its width is assumed to be one. In all other cases, the width is the
432// unicode width of the text.
433//
434func (b *Writer) endEscape() {
435	switch b.endChar {
436	case Escape:
437		b.updateWidth()
438		if b.flags&StripEscape == 0 {
439			b.cell.width -= 2 // don't count the Escape chars
440		}
441	case '>': // tag of zero width
442	case ';':
443		b.cell.width++ // entity, count as one rune
444	}
445	b.pos = b.buf.Len()
446	b.endChar = 0
447}
448
449// Terminate the current cell by adding it to the list of cells of the
450// current line. Returns the number of cells in that line.
451//
452func (b *Writer) terminateCell(htab bool) int {
453	b.cell.htab = htab
454	line := &b.lines[len(b.lines)-1]
455	*line = append(*line, b.cell)
456	b.cell = cell{}
457	return len(*line)
458}
459
460func handlePanic(err *error, op string) {
461	if e := recover(); e != nil {
462		if nerr, ok := e.(osError); ok {
463			*err = nerr.err
464			return
465		}
466		panic("tabwriter: panic during " + op)
467	}
468}
469
470// Flush should be called after the last call to Write to ensure
471// that any data buffered in the Writer is written to output. Any
472// incomplete escape sequence at the end is considered
473// complete for formatting purposes.
474//
475func (b *Writer) Flush() (err error) {
476	defer b.reset() // even in the presence of errors
477	defer handlePanic(&err, "Flush")
478
479	// add current cell if not empty
480	if b.cell.size > 0 {
481		if b.endChar != 0 {
482			// inside escape - terminate it even if incomplete
483			b.endEscape()
484		}
485		b.terminateCell(false)
486	}
487
488	// format contents of buffer
489	b.format(0, 0, len(b.lines))
490
491	return
492}
493
494var hbar = []byte("---\n")
495
496// SetColumnAlignRight will mark a particular column as align right.
497// This is reset on the next flush.
498func (b *Writer) SetColumnAlignRight(column int) {
499	b.alignment[column] = AlignRight
500}
501
502// Write writes buf to the writer b.
503// The only errors returned are ones encountered
504// while writing to the underlying output stream.
505//
506func (b *Writer) Write(buf []byte) (n int, err error) {
507	defer handlePanic(&err, "Write")
508
509	// split text into cells
510	n = 0
511	for i, ch := range buf {
512		if b.endChar == 0 {
513			// outside escape
514			switch ch {
515			case '\t', '\v', '\n', '\f':
516				// end of cell
517				b.append(buf[n:i])
518				b.updateWidth()
519				n = i + 1 // ch consumed
520				ncells := b.terminateCell(ch == '\t')
521				if ch == '\n' || ch == '\f' {
522					// terminate line
523					b.addLine()
524					if ch == '\f' || ncells == 1 {
525						// A '\f' always forces a flush. Otherwise, if the previous
526						// line has only one cell which does not have an impact on
527						// the formatting of the following lines (the last cell per
528						// line is ignored by format()), thus we can flush the
529						// Writer contents.
530						if err = b.Flush(); err != nil {
531							return
532						}
533						if ch == '\f' && b.flags&Debug != 0 {
534							// indicate section break
535							b.write0(hbar)
536						}
537					}
538				}
539
540			case Escape:
541				// start of escaped sequence
542				b.append(buf[n:i])
543				b.updateWidth()
544				n = i
545				if b.flags&StripEscape != 0 {
546					n++ // strip Escape
547				}
548				b.startEscape(Escape)
549
550			case '<', '&':
551				// possibly an html tag/entity
552				if b.flags&FilterHTML != 0 {
553					// begin of tag/entity
554					b.append(buf[n:i])
555					b.updateWidth()
556					n = i
557					b.startEscape(ch)
558				}
559			}
560
561		} else {
562			// inside escape
563			if ch == b.endChar {
564				// end of tag/entity
565				j := i + 1
566				if ch == Escape && b.flags&StripEscape != 0 {
567					j = i // strip Escape
568				}
569				b.append(buf[n:j])
570				n = i + 1 // ch consumed
571				b.endEscape()
572			}
573		}
574	}
575
576	// append leftover text
577	b.append(buf[n:])
578	n = len(buf)
579	return
580}
581
582// NewWriter allocates and initializes a new tabwriter.Writer.
583// The parameters are the same as for the Init function.
584//
585func NewWriter(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
586	return new(Writer).Init(output, minwidth, tabwidth, padding, padchar, flags)
587}
588