1package highlight
2
3import (
4	"sort"
5	"strings"
6
7	"src.elv.sh/pkg/eval"
8	"src.elv.sh/pkg/parse"
9)
10
11var sourceText = parse.SourceText
12
13// Represents a region to be highlighted.
14type region struct {
15	begin int
16	end   int
17	// Regions can be lexical or semantic. Lexical regions always correspond to
18	// a leaf node in the parse tree, either a parse.Primary node or a parse.Sep
19	// node. Semantic regions may span several leaves and override all lexical
20	// regions in it.
21	kind regionKind
22	// In lexical regions for Primary nodes, this field corresponds to the Type
23	// field of the node (e.g. "bareword", "single-quoted"). In lexical regions
24	// for Sep nodes, this field is simply the source text itself (e.g. "(",
25	// "|"), except for comments, which have typ == "comment".
26	//
27	// In semantic regions, this field takes a value from a fixed list (see
28	// below).
29	typ string
30}
31
32type regionKind int
33
34// Region kinds.
35const (
36	lexicalRegion regionKind = iota
37	semanticRegion
38)
39
40// Lexical region types.
41const (
42	barewordRegion     = "bareword"
43	singleQuotedRegion = "single-quoted"
44	doubleQuotedRegion = "double-quoted"
45	variableRegion     = "variable" // Could also be semantic.
46	wildcardRegion     = "wildcard"
47	tildeRegion        = "tilde"
48	// A comment region. Note that this is the only type of Sep leaf node that
49	// is not identified by its text.
50	commentRegion = "comment"
51)
52
53// Semantic region types.
54const (
55	// A region when a string literal (bareword, single-quoted or double-quoted)
56	// appears as a command.
57	commandRegion = "command"
58	// A region for keywords in special forms, like "else" in an "if" form.
59	keywordRegion = "keyword"
60	// A region of parse or compilation error.
61	errorRegion = "error"
62)
63
64func getRegions(n parse.Node) []region {
65	regions := getRegionsInner(n)
66	regions = fixRegions(regions)
67	return regions
68}
69
70func getRegionsInner(n parse.Node) []region {
71	var regions []region
72	emitRegions(n, func(n parse.Node, kind regionKind, typ string) {
73		regions = append(regions, region{n.Range().From, n.Range().To, kind, typ})
74	})
75	return regions
76}
77
78func fixRegions(regions []region) []region {
79	// Sort regions by the begin position, putting semantic regions before
80	// lexical regions.
81	sort.Slice(regions, func(i, j int) bool {
82		if regions[i].begin < regions[j].begin {
83			return true
84		}
85		if regions[i].begin == regions[j].begin {
86			return regions[i].kind == semanticRegion && regions[j].kind == lexicalRegion
87		}
88		return false
89	})
90	// Remove overlapping regions, preferring the ones that appear earlier.
91	var newRegions []region
92	lastEnd := 0
93	for _, r := range regions {
94		if r.begin < lastEnd {
95			continue
96		}
97		newRegions = append(newRegions, r)
98		lastEnd = r.end
99	}
100	return newRegions
101}
102
103func emitRegions(n parse.Node, f func(parse.Node, regionKind, string)) {
104	switch n := n.(type) {
105	case *parse.Form:
106		emitRegionsInForm(n, f)
107	case *parse.Primary:
108		emitRegionsInPrimary(n, f)
109	case *parse.Sep:
110		emitRegionsInSep(n, f)
111	}
112	for _, child := range parse.Children(n) {
113		emitRegions(child, f)
114	}
115}
116
117func emitRegionsInForm(n *parse.Form, f func(parse.Node, regionKind, string)) {
118	// Left hands of temporary assignments.
119	for _, an := range n.Assignments {
120		if an.Left != nil && an.Left.Head != nil {
121			f(an.Left.Head, semanticRegion, variableRegion)
122		}
123	}
124	if n.Head == nil {
125		return
126	}
127	// Special forms.
128	// TODO: This only highlights bareword special commands, however currently
129	// quoted special commands are also possible (e.g `"if" $true { }` is
130	// accepted).
131	head := sourceText(n.Head)
132	switch head {
133	case "var", "set":
134		emitRegionsInVarSet(n, f)
135	case "if":
136		emitRegionsInIf(n, f)
137	case "for":
138		emitRegionsInFor(n, f)
139	case "try":
140		emitRegionsInTry(n, f)
141	}
142	if !eval.IsBuiltinSpecial[head] {
143		for i, arg := range n.Args {
144			if parse.SourceText(arg) == "=" {
145				// Highlight left hands of legacy assignment form.
146				emitVariableRegion(n.Head, f)
147				for j := 0; j < i; j++ {
148					emitVariableRegion(n.Args[j], f)
149				}
150				return
151			}
152		}
153	}
154	if isBarewordCompound(n.Head) {
155		f(n.Head, semanticRegion, commandRegion)
156	}
157}
158
159func emitRegionsInVarSet(n *parse.Form, f func(parse.Node, regionKind, string)) {
160	// Highlight all LHS, and = as a keyword.
161	for _, arg := range n.Args {
162		if parse.SourceText(arg) == "=" {
163			f(arg, semanticRegion, keywordRegion)
164			break
165		}
166		emitVariableRegion(arg, f)
167	}
168}
169
170func emitVariableRegion(n *parse.Compound, f func(parse.Node, regionKind, string)) {
171	// Only handle valid LHS here. Invalid LHS will result in a compile error
172	// and highlighted as an error accordingly.
173	if n != nil && len(n.Indexings) == 1 && n.Indexings[0].Head != nil {
174		f(n.Indexings[0].Head, semanticRegion, variableRegion)
175	}
176}
177
178func isBarewordCompound(n *parse.Compound) bool {
179	return len(n.Indexings) == 1 && len(n.Indexings[0].Indices) == 0 && n.Indexings[0].Head.Type == parse.Bareword
180}
181
182func emitRegionsInIf(n *parse.Form, f func(parse.Node, regionKind, string)) {
183	// Highlight all "elif" and "else".
184	for i := 2; i < len(n.Args); i += 2 {
185		arg := n.Args[i]
186		if s := sourceText(arg); s == "elif" || s == "else" {
187			f(arg, semanticRegion, keywordRegion)
188		}
189	}
190}
191
192func emitRegionsInFor(n *parse.Form, f func(parse.Node, regionKind, string)) {
193	// Highlight the iterating variable.
194	if 0 < len(n.Args) && len(n.Args[0].Indexings) > 0 {
195		f(n.Args[0].Indexings[0].Head, semanticRegion, variableRegion)
196	}
197	// Highlight "else".
198	if 3 < len(n.Args) && sourceText(n.Args[3]) == "else" {
199		f(n.Args[3], semanticRegion, keywordRegion)
200	}
201}
202
203func emitRegionsInTry(n *parse.Form, f func(parse.Node, regionKind, string)) {
204	// Highlight "except", the exception variable after it, "else" and
205	// "finally".
206	i := 1
207	matchKW := func(text string) bool {
208		if i < len(n.Args) && sourceText(n.Args[i]) == text {
209			f(n.Args[i], semanticRegion, keywordRegion)
210			return true
211		}
212		return false
213	}
214	if matchKW("except") {
215		if i+1 < len(n.Args) && len(n.Args[i+1].Indexings) > 0 {
216			f(n.Args[i+1], semanticRegion, variableRegion)
217		}
218		i += 3
219	}
220	if matchKW("else") {
221		i += 2
222	}
223	matchKW("finally")
224}
225
226func emitRegionsInPrimary(n *parse.Primary, f func(parse.Node, regionKind, string)) {
227	switch n.Type {
228	case parse.Bareword:
229		f(n, lexicalRegion, barewordRegion)
230	case parse.SingleQuoted:
231		f(n, lexicalRegion, singleQuotedRegion)
232	case parse.DoubleQuoted:
233		f(n, lexicalRegion, doubleQuotedRegion)
234	case parse.Variable:
235		f(n, lexicalRegion, variableRegion)
236	case parse.Wildcard:
237		f(n, lexicalRegion, wildcardRegion)
238	case parse.Tilde:
239		f(n, lexicalRegion, tildeRegion)
240	}
241}
242
243func emitRegionsInSep(n *parse.Sep, f func(parse.Node, regionKind, string)) {
244	text := sourceText(n)
245	trimmed := strings.TrimLeftFunc(text, parse.IsWhitespace)
246	switch {
247	case trimmed == "":
248		// Don't do anything; whitespaces do not get highlighted.
249	case strings.HasPrefix(trimmed, "#"):
250		f(n, lexicalRegion, commentRegion)
251	default:
252		f(n, lexicalRegion, text)
253	}
254}
255