1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file implements FormatSelections and FormatText.
6// FormatText is used to HTML-format Go and non-Go source
7// text with line numbers and highlighted sections. It is
8// built on top of FormatSelections, a generic formatter
9// for "selected" text.
10
11package godoc
12
13import (
14	"fmt"
15	"go/scanner"
16	"go/token"
17	"io"
18	"regexp"
19	"strconv"
20	"text/template"
21)
22
23// ----------------------------------------------------------------------------
24// Implementation of FormatSelections
25
26// A Segment describes a text segment [start, end).
27// The zero value of a Segment is a ready-to-use empty segment.
28//
29type Segment struct {
30	start, end int
31}
32
33func (seg *Segment) isEmpty() bool { return seg.start >= seg.end }
34
35// A Selection is an "iterator" function returning a text segment.
36// Repeated calls to a selection return consecutive, non-overlapping,
37// non-empty segments, followed by an infinite sequence of empty
38// segments. The first empty segment marks the end of the selection.
39//
40type Selection func() Segment
41
42// A LinkWriter writes some start or end "tag" to w for the text offset offs.
43// It is called by FormatSelections at the start or end of each link segment.
44//
45type LinkWriter func(w io.Writer, offs int, start bool)
46
47// A SegmentWriter formats a text according to selections and writes it to w.
48// The selections parameter is a bit set indicating which selections provided
49// to FormatSelections overlap with the text segment: If the n'th bit is set
50// in selections, the n'th selection provided to FormatSelections is overlapping
51// with the text.
52//
53type SegmentWriter func(w io.Writer, text []byte, selections int)
54
55// FormatSelections takes a text and writes it to w using link and segment
56// writers lw and sw as follows: lw is invoked for consecutive segment starts
57// and ends as specified through the links selection, and sw is invoked for
58// consecutive segments of text overlapped by the same selections as specified
59// by selections. The link writer lw may be nil, in which case the links
60// Selection is ignored.
61//
62func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) {
63	// If we have a link writer, make the links
64	// selection the last entry in selections
65	if lw != nil {
66		selections = append(selections, links)
67	}
68
69	// compute the sequence of consecutive segment changes
70	changes := newMerger(selections)
71
72	// The i'th bit in bitset indicates that the text
73	// at the current offset is covered by selections[i].
74	bitset := 0
75	lastOffs := 0
76
77	// Text segments are written in a delayed fashion
78	// such that consecutive segments belonging to the
79	// same selection can be combined (peephole optimization).
80	// last describes the last segment which has not yet been written.
81	var last struct {
82		begin, end int // valid if begin < end
83		bitset     int
84	}
85
86	// flush writes the last delayed text segment
87	flush := func() {
88		if last.begin < last.end {
89			sw(w, text[last.begin:last.end], last.bitset)
90		}
91		last.begin = last.end // invalidate last
92	}
93
94	// segment runs the segment [lastOffs, end) with the selection
95	// indicated by bitset through the segment peephole optimizer.
96	segment := func(end int) {
97		if lastOffs < end { // ignore empty segments
98			if last.end != lastOffs || last.bitset != bitset {
99				// the last segment is not adjacent to or
100				// differs from the new one
101				flush()
102				// start a new segment
103				last.begin = lastOffs
104			}
105			last.end = end
106			last.bitset = bitset
107		}
108	}
109
110	for {
111		// get the next segment change
112		index, offs, start := changes.next()
113		if index < 0 || offs > len(text) {
114			// no more segment changes or the next change
115			// is past the end of the text - we're done
116			break
117		}
118		// determine the kind of segment change
119		if lw != nil && index == len(selections)-1 {
120			// we have a link segment change (see start of this function):
121			// format the previous selection segment, write the
122			// link tag and start a new selection segment
123			segment(offs)
124			flush()
125			lastOffs = offs
126			lw(w, offs, start)
127		} else {
128			// we have a selection change:
129			// format the previous selection segment, determine
130			// the new selection bitset and start a new segment
131			segment(offs)
132			lastOffs = offs
133			mask := 1 << uint(index)
134			if start {
135				bitset |= mask
136			} else {
137				bitset &^= mask
138			}
139		}
140	}
141	segment(len(text))
142	flush()
143}
144
145// A merger merges a slice of Selections and produces a sequence of
146// consecutive segment change events through repeated next() calls.
147//
148type merger struct {
149	selections []Selection
150	segments   []Segment // segments[i] is the next segment of selections[i]
151}
152
153const infinity int = 2e9
154
155func newMerger(selections []Selection) *merger {
156	segments := make([]Segment, len(selections))
157	for i, sel := range selections {
158		segments[i] = Segment{infinity, infinity}
159		if sel != nil {
160			if seg := sel(); !seg.isEmpty() {
161				segments[i] = seg
162			}
163		}
164	}
165	return &merger{selections, segments}
166}
167
168// next returns the next segment change: index specifies the Selection
169// to which the segment belongs, offs is the segment start or end offset
170// as determined by the start value. If there are no more segment changes,
171// next returns an index value < 0.
172//
173func (m *merger) next() (index, offs int, start bool) {
174	// find the next smallest offset where a segment starts or ends
175	offs = infinity
176	index = -1
177	for i, seg := range m.segments {
178		switch {
179		case seg.start < offs:
180			offs = seg.start
181			index = i
182			start = true
183		case seg.end < offs:
184			offs = seg.end
185			index = i
186			start = false
187		}
188	}
189	if index < 0 {
190		// no offset found => all selections merged
191		return
192	}
193	// offset found - it's either the start or end offset but
194	// either way it is ok to consume the start offset: set it
195	// to infinity so it won't be considered in the following
196	// next call
197	m.segments[index].start = infinity
198	if start {
199		return
200	}
201	// end offset found - consume it
202	m.segments[index].end = infinity
203	// advance to the next segment for that selection
204	seg := m.selections[index]()
205	if !seg.isEmpty() {
206		m.segments[index] = seg
207	}
208	return
209}
210
211// ----------------------------------------------------------------------------
212// Implementation of FormatText
213
214// lineSelection returns the line segments for text as a Selection.
215func lineSelection(text []byte) Selection {
216	i, j := 0, 0
217	return func() (seg Segment) {
218		// find next newline, if any
219		for j < len(text) {
220			j++
221			if text[j-1] == '\n' {
222				break
223			}
224		}
225		if i < j {
226			// text[i:j] constitutes a line
227			seg = Segment{i, j}
228			i = j
229		}
230		return
231	}
232}
233
234// tokenSelection returns, as a selection, the sequence of
235// consecutive occurrences of token sel in the Go src text.
236//
237func tokenSelection(src []byte, sel token.Token) Selection {
238	var s scanner.Scanner
239	fset := token.NewFileSet()
240	file := fset.AddFile("", fset.Base(), len(src))
241	s.Init(file, src, nil, scanner.ScanComments)
242	return func() (seg Segment) {
243		for {
244			pos, tok, lit := s.Scan()
245			if tok == token.EOF {
246				break
247			}
248			offs := file.Offset(pos)
249			if tok == sel {
250				seg = Segment{offs, offs + len(lit)}
251				break
252			}
253		}
254		return
255	}
256}
257
258// makeSelection is a helper function to make a Selection from a slice of pairs.
259// Pairs describing empty segments are ignored.
260//
261func makeSelection(matches [][]int) Selection {
262	i := 0
263	return func() Segment {
264		for i < len(matches) {
265			m := matches[i]
266			i++
267			if m[0] < m[1] {
268				// non-empty segment
269				return Segment{m[0], m[1]}
270			}
271		}
272		return Segment{}
273	}
274}
275
276// regexpSelection computes the Selection for the regular expression expr in text.
277func regexpSelection(text []byte, expr string) Selection {
278	var matches [][]int
279	if rx, err := regexp.Compile(expr); err == nil {
280		matches = rx.FindAllIndex(text, -1)
281	}
282	return makeSelection(matches)
283}
284
285var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`)
286
287// RangeSelection computes the Selection for a text range described
288// by the argument str; the range description must match the selRx
289// regular expression.
290func RangeSelection(str string) Selection {
291	m := selRx.FindStringSubmatch(str)
292	if len(m) >= 2 {
293		from, _ := strconv.Atoi(m[1])
294		to, _ := strconv.Atoi(m[2])
295		if from < to {
296			return makeSelection([][]int{{from, to}})
297		}
298	}
299	return nil
300}
301
302// Span tags for all the possible selection combinations that may
303// be generated by FormatText. Selections are indicated by a bitset,
304// and the value of the bitset specifies the tag to be used.
305//
306// bit 0: comments
307// bit 1: highlights
308// bit 2: selections
309//
310var startTags = [][]byte{
311	/* 000 */ []byte(``),
312	/* 001 */ []byte(`<span class="comment">`),
313	/* 010 */ []byte(`<span class="highlight">`),
314	/* 011 */ []byte(`<span class="highlight-comment">`),
315	/* 100 */ []byte(`<span class="selection">`),
316	/* 101 */ []byte(`<span class="selection-comment">`),
317	/* 110 */ []byte(`<span class="selection-highlight">`),
318	/* 111 */ []byte(`<span class="selection-highlight-comment">`),
319}
320
321var endTag = []byte(`</span>`)
322
323func selectionTag(w io.Writer, text []byte, selections int) {
324	if selections < len(startTags) {
325		if tag := startTags[selections]; len(tag) > 0 {
326			w.Write(tag)
327			template.HTMLEscape(w, text)
328			w.Write(endTag)
329			return
330		}
331	}
332	template.HTMLEscape(w, text)
333}
334
335// FormatText HTML-escapes text and writes it to w.
336// Consecutive text segments are wrapped in HTML spans (with tags as
337// defined by startTags and endTag) as follows:
338//
339//	- if line >= 0, line number (ln) spans are inserted before each line,
340//	  starting with the value of line
341//	- if the text is Go source, comments get the "comment" span class
342//	- each occurrence of the regular expression pattern gets the "highlight"
343//	  span class
344//	- text segments covered by selection get the "selection" span class
345//
346// Comments, highlights, and selections may overlap arbitrarily; the respective
347// HTML span classes are specified in the startTags variable.
348//
349func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) {
350	var comments, highlights Selection
351	if goSource {
352		comments = tokenSelection(text, token.COMMENT)
353	}
354	if pattern != "" {
355		highlights = regexpSelection(text, pattern)
356	}
357	if line >= 0 || comments != nil || highlights != nil || selection != nil {
358		var lineTag LinkWriter
359		if line >= 0 {
360			lineTag = func(w io.Writer, _ int, start bool) {
361				if start {
362					fmt.Fprintf(w, "<span id=\"L%d\" class=\"ln\">%6d</span>", line, line)
363					line++
364				}
365			}
366		}
367		FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection)
368	} else {
369		template.HTMLEscape(w, text)
370	}
371}
372