1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package present
6
7import (
8	"errors"
9	"regexp"
10	"strconv"
11	"unicode/utf8"
12)
13
14// This file is stolen from go/src/cmd/godoc/codewalk.go.
15// It's an evaluator for the file address syntax implemented by acme and sam,
16// but using Go-native regular expressions.
17// To keep things reasonably close, this version uses (?m:re) for all user-provided
18// regular expressions. That is the only change to the code from codewalk.go.
19// See http://9p.io/sys/doc/sam/sam.html Table II for details on the syntax.
20
21// addrToByte evaluates the given address starting at offset start in data.
22// It returns the lo and hi byte offset of the matched region within data.
23func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) {
24	if addr == "" {
25		lo, hi = start, len(data)
26		return
27	}
28	var (
29		dir        byte
30		prevc      byte
31		charOffset bool
32	)
33	lo = start
34	hi = start
35	for addr != "" && err == nil {
36		c := addr[0]
37		switch c {
38		default:
39			err = errors.New("invalid address syntax near " + string(c))
40		case ',':
41			if len(addr) == 1 {
42				hi = len(data)
43			} else {
44				_, hi, err = addrToByteRange(addr[1:], hi, data)
45			}
46			return
47
48		case '+', '-':
49			if prevc == '+' || prevc == '-' {
50				lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset)
51			}
52			dir = c
53
54		case '$':
55			lo = len(data)
56			hi = len(data)
57			if len(addr) > 1 {
58				dir = '+'
59			}
60
61		case '#':
62			charOffset = true
63
64		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
65			var i int
66			for i = 1; i < len(addr); i++ {
67				if addr[i] < '0' || addr[i] > '9' {
68					break
69				}
70			}
71			var n int
72			n, err = strconv.Atoi(addr[0:i])
73			if err != nil {
74				break
75			}
76			lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset)
77			dir = 0
78			charOffset = false
79			prevc = c
80			addr = addr[i:]
81			continue
82
83		case '/':
84			var i, j int
85		Regexp:
86			for i = 1; i < len(addr); i++ {
87				switch addr[i] {
88				case '\\':
89					i++
90				case '/':
91					j = i + 1
92					break Regexp
93				}
94			}
95			if j == 0 {
96				j = i
97			}
98			pattern := addr[1:i]
99			lo, hi, err = addrRegexp(data, lo, hi, dir, pattern)
100			prevc = c
101			addr = addr[j:]
102			continue
103		}
104		prevc = c
105		addr = addr[1:]
106	}
107
108	if err == nil && dir != 0 {
109		lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset)
110	}
111	if err != nil {
112		return 0, 0, err
113	}
114	return lo, hi, nil
115}
116
117// addrNumber applies the given dir, n, and charOffset to the address lo, hi.
118// dir is '+' or '-', n is the count, and charOffset is true if the syntax
119// used was #n.  Applying +n (or +#n) means to advance n lines
120// (or characters) after hi.  Applying -n (or -#n) means to back up n lines
121// (or characters) before lo.
122// The return value is the new lo, hi.
123func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) {
124	switch dir {
125	case 0:
126		lo = 0
127		hi = 0
128		fallthrough
129
130	case '+':
131		if charOffset {
132			pos := hi
133			for ; n > 0 && pos < len(data); n-- {
134				_, size := utf8.DecodeRune(data[pos:])
135				pos += size
136			}
137			if n == 0 {
138				return pos, pos, nil
139			}
140			break
141		}
142		// find next beginning of line
143		if hi > 0 {
144			for hi < len(data) && data[hi-1] != '\n' {
145				hi++
146			}
147		}
148		lo = hi
149		if n == 0 {
150			return lo, hi, nil
151		}
152		for ; hi < len(data); hi++ {
153			if data[hi] != '\n' {
154				continue
155			}
156			switch n--; n {
157			case 1:
158				lo = hi + 1
159			case 0:
160				return lo, hi + 1, nil
161			}
162		}
163
164	case '-':
165		if charOffset {
166			// Scan backward for bytes that are not UTF-8 continuation bytes.
167			pos := lo
168			for ; pos > 0 && n > 0; pos-- {
169				if data[pos]&0xc0 != 0x80 {
170					n--
171				}
172			}
173			if n == 0 {
174				return pos, pos, nil
175			}
176			break
177		}
178		// find earlier beginning of line
179		for lo > 0 && data[lo-1] != '\n' {
180			lo--
181		}
182		hi = lo
183		if n == 0 {
184			return lo, hi, nil
185		}
186		for ; lo >= 0; lo-- {
187			if lo > 0 && data[lo-1] != '\n' {
188				continue
189			}
190			switch n--; n {
191			case 1:
192				hi = lo
193			case 0:
194				return lo, hi, nil
195			}
196		}
197	}
198
199	return 0, 0, errors.New("address out of range")
200}
201
202// addrRegexp searches for pattern in the given direction starting at lo, hi.
203// The direction dir is '+' (search forward from hi) or '-' (search backward from lo).
204// Backward searches are unimplemented.
205func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) {
206	// We want ^ and $ to work as in sam/acme, so use ?m.
207	re, err := regexp.Compile("(?m:" + pattern + ")")
208	if err != nil {
209		return 0, 0, err
210	}
211	if dir == '-' {
212		// Could implement reverse search using binary search
213		// through file, but that seems like overkill.
214		return 0, 0, errors.New("reverse search not implemented")
215	}
216	m := re.FindIndex(data[hi:])
217	if len(m) > 0 {
218		m[0] += hi
219		m[1] += hi
220	} else if hi > 0 {
221		// No match.  Wrap to beginning of data.
222		m = re.FindIndex(data)
223	}
224	if len(m) == 0 {
225		return 0, 0, errors.New("no match for " + pattern)
226	}
227	return m[0], m[1], nil
228}
229