1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ast
6
7import (
8	"bytes"
9	"fmt"
10	"go/token"
11	"sort"
12)
13
14type byPos []*CommentGroup
15
16func (a byPos) Len() int           { return len(a) }
17func (a byPos) Less(i, j int) bool { return a[i].Pos() < a[j].Pos() }
18func (a byPos) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
19
20// sortComments sorts the list of comment groups in source order.
21//
22func sortComments(list []*CommentGroup) {
23	// TODO(gri): Does it make sense to check for sorted-ness
24	//            first (because we know that sorted-ness is
25	//            very likely)?
26	if orderedList := byPos(list); !sort.IsSorted(orderedList) {
27		sort.Sort(orderedList)
28	}
29}
30
31// A CommentMap maps an AST node to a list of comment groups
32// associated with it. See NewCommentMap for a description of
33// the association.
34//
35type CommentMap map[Node][]*CommentGroup
36
37func (cmap CommentMap) addComment(n Node, c *CommentGroup) {
38	list := cmap[n]
39	if len(list) == 0 {
40		list = []*CommentGroup{c}
41	} else {
42		list = append(list, c)
43	}
44	cmap[n] = list
45}
46
47type byInterval []Node
48
49func (a byInterval) Len() int { return len(a) }
50func (a byInterval) Less(i, j int) bool {
51	pi, pj := a[i].Pos(), a[j].Pos()
52	return pi < pj || pi == pj && a[i].End() > a[j].End()
53}
54func (a byInterval) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
55
56// nodeList returns the list of nodes of the AST n in source order.
57//
58func nodeList(n Node) []Node {
59	var list []Node
60	Inspect(n, func(n Node) bool {
61		// don't collect comments
62		switch n.(type) {
63		case nil, *CommentGroup, *Comment:
64			return false
65		}
66		list = append(list, n)
67		return true
68	})
69	// Note: The current implementation assumes that Inspect traverses the
70	//       AST in depth-first and thus _source_ order. If AST traversal
71	//       does not follow source order, the sorting call below will be
72	//       required.
73	// sort.Sort(byInterval(list))
74	return list
75}
76
77// A commentListReader helps iterating through a list of comment groups.
78//
79type commentListReader struct {
80	fset     *token.FileSet
81	list     []*CommentGroup
82	index    int
83	comment  *CommentGroup  // comment group at current index
84	pos, end token.Position // source interval of comment group at current index
85}
86
87func (r *commentListReader) eol() bool {
88	return r.index >= len(r.list)
89}
90
91func (r *commentListReader) next() {
92	if !r.eol() {
93		r.comment = r.list[r.index]
94		r.pos = r.fset.Position(r.comment.Pos())
95		r.end = r.fset.Position(r.comment.End())
96		r.index++
97	}
98}
99
100// A nodeStack keeps track of nested nodes.
101// A node lower on the stack lexically contains the nodes higher on the stack.
102//
103type nodeStack []Node
104
105// push pops all nodes that appear lexically before n
106// and then pushes n on the stack.
107//
108func (s *nodeStack) push(n Node) {
109	s.pop(n.Pos())
110	*s = append((*s), n)
111}
112
113// pop pops all nodes that appear lexically before pos
114// (i.e., whose lexical extent has ended before or at pos).
115// It returns the last node popped.
116//
117func (s *nodeStack) pop(pos token.Pos) (top Node) {
118	i := len(*s)
119	for i > 0 && (*s)[i-1].End() <= pos {
120		top = (*s)[i-1]
121		i--
122	}
123	*s = (*s)[0:i]
124	return top
125}
126
127// NewCommentMap creates a new comment map by associating comment groups
128// of the comments list with the nodes of the AST specified by node.
129//
130// A comment group g is associated with a node n if:
131//
132//   - g starts on the same line as n ends
133//   - g starts on the line immediately following n, and there is
134//     at least one empty line after g and before the next node
135//   - g starts before n and is not associated to the node before n
136//     via the previous rules
137//
138// NewCommentMap tries to associate a comment group to the "largest"
139// node possible: For instance, if the comment is a line comment
140// trailing an assignment, the comment is associated with the entire
141// assignment rather than just the last operand in the assignment.
142//
143func NewCommentMap(fset *token.FileSet, node Node, comments []*CommentGroup) CommentMap {
144	if len(comments) == 0 {
145		return nil // no comments to map
146	}
147
148	cmap := make(CommentMap)
149
150	// set up comment reader r
151	tmp := make([]*CommentGroup, len(comments))
152	copy(tmp, comments) // don't change incoming comments
153	sortComments(tmp)
154	r := commentListReader{fset: fset, list: tmp} // !r.eol() because len(comments) > 0
155	r.next()
156
157	// create node list in lexical order
158	nodes := nodeList(node)
159	nodes = append(nodes, nil) // append sentinel
160
161	// set up iteration variables
162	var (
163		p     Node           // previous node
164		pend  token.Position // end of p
165		pg    Node           // previous node group (enclosing nodes of "importance")
166		pgend token.Position // end of pg
167		stack nodeStack      // stack of node groups
168	)
169
170	for _, q := range nodes {
171		var qpos token.Position
172		if q != nil {
173			qpos = fset.Position(q.Pos()) // current node position
174		} else {
175			// set fake sentinel position to infinity so that
176			// all comments get processed before the sentinel
177			const infinity = 1 << 30
178			qpos.Offset = infinity
179			qpos.Line = infinity
180		}
181
182		// process comments before current node
183		for r.end.Offset <= qpos.Offset {
184			// determine recent node group
185			if top := stack.pop(r.comment.Pos()); top != nil {
186				pg = top
187				pgend = fset.Position(pg.End())
188			}
189			// Try to associate a comment first with a node group
190			// (i.e., a node of "importance" such as a declaration);
191			// if that fails, try to associate it with the most recent
192			// node.
193			// TODO(gri) try to simplify the logic below
194			var assoc Node
195			switch {
196			case pg != nil &&
197				(pgend.Line == r.pos.Line ||
198					pgend.Line+1 == r.pos.Line && r.end.Line+1 < qpos.Line):
199				// 1) comment starts on same line as previous node group ends, or
200				// 2) comment starts on the line immediately after the
201				//    previous node group and there is an empty line before
202				//    the current node
203				// => associate comment with previous node group
204				assoc = pg
205			case p != nil &&
206				(pend.Line == r.pos.Line ||
207					pend.Line+1 == r.pos.Line && r.end.Line+1 < qpos.Line ||
208					q == nil):
209				// same rules apply as above for p rather than pg,
210				// but also associate with p if we are at the end (q == nil)
211				assoc = p
212			default:
213				// otherwise, associate comment with current node
214				if q == nil {
215					// we can only reach here if there was no p
216					// which would imply that there were no nodes
217					panic("internal error: no comments should be associated with sentinel")
218				}
219				assoc = q
220			}
221			cmap.addComment(assoc, r.comment)
222			if r.eol() {
223				return cmap
224			}
225			r.next()
226		}
227
228		// update previous node
229		p = q
230		pend = fset.Position(p.End())
231
232		// update previous node group if we see an "important" node
233		switch q.(type) {
234		case *File, *Field, Decl, Spec, Stmt:
235			stack.push(q)
236		}
237	}
238
239	return cmap
240}
241
242// Update replaces an old node in the comment map with the new node
243// and returns the new node. Comments that were associated with the
244// old node are associated with the new node.
245//
246func (cmap CommentMap) Update(old, new Node) Node {
247	if list := cmap[old]; len(list) > 0 {
248		delete(cmap, old)
249		cmap[new] = append(cmap[new], list...)
250	}
251	return new
252}
253
254// Filter returns a new comment map consisting of only those
255// entries of cmap for which a corresponding node exists in
256// the AST specified by node.
257//
258func (cmap CommentMap) Filter(node Node) CommentMap {
259	umap := make(CommentMap)
260	Inspect(node, func(n Node) bool {
261		if g := cmap[n]; len(g) > 0 {
262			umap[n] = g
263		}
264		return true
265	})
266	return umap
267}
268
269// Comments returns the list of comment groups in the comment map.
270// The result is sorted in source order.
271//
272func (cmap CommentMap) Comments() []*CommentGroup {
273	list := make([]*CommentGroup, 0, len(cmap))
274	for _, e := range cmap {
275		list = append(list, e...)
276	}
277	sortComments(list)
278	return list
279}
280
281func summary(list []*CommentGroup) string {
282	const maxLen = 40
283	var buf bytes.Buffer
284
285	// collect comments text
286loop:
287	for _, group := range list {
288		// Note: CommentGroup.Text() does too much work for what we
289		//       need and would only replace this innermost loop.
290		//       Just do it explicitly.
291		for _, comment := range group.List {
292			if buf.Len() >= maxLen {
293				break loop
294			}
295			buf.WriteString(comment.Text)
296		}
297	}
298
299	// truncate if too long
300	if buf.Len() > maxLen {
301		buf.Truncate(maxLen - 3)
302		buf.WriteString("...")
303	}
304
305	// replace any invisibles with blanks
306	bytes := buf.Bytes()
307	for i, b := range bytes {
308		switch b {
309		case '\t', '\n', '\r':
310			bytes[i] = ' '
311		}
312	}
313
314	return string(bytes)
315}
316
317func (cmap CommentMap) String() string {
318	var buf bytes.Buffer
319	fmt.Fprintln(&buf, "CommentMap {")
320	for node, comment := range cmap {
321		// print name of identifiers; print node type for other nodes
322		var s string
323		if ident, ok := node.(*Ident); ok {
324			s = ident.Name
325		} else {
326			s = fmt.Sprintf("%T", node)
327		}
328		fmt.Fprintf(&buf, "\t%p  %20s:  %s\n", node, s, summary(comment))
329	}
330	fmt.Fprintln(&buf, "}")
331	return buf.String()
332}
333