1package text
2
3import (
4	"io"
5	"regexp"
6	"unicode/utf8"
7
8	"github.com/yuin/goldmark/util"
9)
10
11const invalidValue = -1
12
13// EOF indicates the end of file.
14const EOF = byte(0xff)
15
16// A Reader interface provides abstracted method for reading text.
17type Reader interface {
18	io.RuneReader
19
20	// Source returns a source of the reader.
21	Source() []byte
22
23	// ResetPosition resets positions.
24	ResetPosition()
25
26	// Peek returns a byte at current position without advancing the internal pointer.
27	Peek() byte
28
29	// PeekLine returns the current line without advancing the internal pointer.
30	PeekLine() ([]byte, Segment)
31
32	// PrecendingCharacter returns a character just before current internal pointer.
33	PrecendingCharacter() rune
34
35	// Value returns a value of the given segment.
36	Value(Segment) []byte
37
38	// LineOffset returns a distance from the line head to current position.
39	LineOffset() int
40
41	// Position returns current line number and position.
42	Position() (int, Segment)
43
44	// SetPosition sets current line number and position.
45	SetPosition(int, Segment)
46
47	// SetPadding sets padding to the reader.
48	SetPadding(int)
49
50	// Advance advances the internal pointer.
51	Advance(int)
52
53	// AdvanceAndSetPadding advances the internal pointer and add padding to the
54	// reader.
55	AdvanceAndSetPadding(int, int)
56
57	// AdvanceLine advances the internal pointer to the next line head.
58	AdvanceLine()
59
60	// SkipSpaces skips space characters and returns a non-blank line.
61	// If it reaches EOF, returns false.
62	SkipSpaces() (Segment, int, bool)
63
64	// SkipSpaces skips blank lines and returns a non-blank line.
65	// If it reaches EOF, returns false.
66	SkipBlankLines() (Segment, int, bool)
67
68	// Match performs regular expression matching to current line.
69	Match(reg *regexp.Regexp) bool
70
71	// Match performs regular expression searching to current line.
72	FindSubMatch(reg *regexp.Regexp) [][]byte
73}
74
75type reader struct {
76	source       []byte
77	sourceLength int
78	line         int
79	peekedLine   []byte
80	pos          Segment
81	head         int
82	lineOffset   int
83}
84
85// NewReader return a new Reader that can read UTF-8 bytes .
86func NewReader(source []byte) Reader {
87	r := &reader{
88		source:       source,
89		sourceLength: len(source),
90	}
91	r.ResetPosition()
92	return r
93}
94
95func (r *reader) ResetPosition() {
96	r.line = -1
97	r.head = 0
98	r.lineOffset = -1
99	r.AdvanceLine()
100}
101
102func (r *reader) Source() []byte {
103	return r.source
104}
105
106func (r *reader) Value(seg Segment) []byte {
107	return seg.Value(r.source)
108}
109
110func (r *reader) Peek() byte {
111	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
112		if r.pos.Padding != 0 {
113			return space[0]
114		}
115		return r.source[r.pos.Start]
116	}
117	return EOF
118}
119
120func (r *reader) PeekLine() ([]byte, Segment) {
121	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
122		if r.peekedLine == nil {
123			r.peekedLine = r.pos.Value(r.Source())
124		}
125		return r.peekedLine, r.pos
126	}
127	return nil, r.pos
128}
129
130// io.RuneReader interface
131func (r *reader) ReadRune() (rune, int, error) {
132	return readRuneReader(r)
133}
134
135func (r *reader) LineOffset() int {
136	if r.lineOffset < 0 {
137		v := 0
138		for i := r.head; i < r.pos.Start; i++ {
139			if r.source[i] == '\t' {
140				v += util.TabWidth(v)
141			} else {
142				v++
143			}
144		}
145		r.lineOffset = v - r.pos.Padding
146	}
147	return r.lineOffset
148}
149
150func (r *reader) PrecendingCharacter() rune {
151	if r.pos.Start <= 0 {
152		if r.pos.Padding != 0 {
153			return rune(' ')
154		}
155		return rune('\n')
156	}
157	i := r.pos.Start - 1
158	for ; i >= 0; i-- {
159		if utf8.RuneStart(r.source[i]) {
160			break
161		}
162	}
163	rn, _ := utf8.DecodeRune(r.source[i:])
164	return rn
165}
166
167func (r *reader) Advance(n int) {
168	r.lineOffset = -1
169	if n < len(r.peekedLine) && r.pos.Padding == 0 {
170		r.pos.Start += n
171		r.peekedLine = nil
172		return
173	}
174	r.peekedLine = nil
175	l := r.sourceLength
176	for ; n > 0 && r.pos.Start < l; n-- {
177		if r.pos.Padding != 0 {
178			r.pos.Padding--
179			continue
180		}
181		if r.source[r.pos.Start] == '\n' {
182			r.AdvanceLine()
183			continue
184		}
185		r.pos.Start++
186	}
187}
188
189func (r *reader) AdvanceAndSetPadding(n, padding int) {
190	r.Advance(n)
191	if padding > r.pos.Padding {
192		r.SetPadding(padding)
193	}
194}
195
196func (r *reader) AdvanceLine() {
197	r.lineOffset = -1
198	r.peekedLine = nil
199	r.pos.Start = r.pos.Stop
200	r.head = r.pos.Start
201	if r.pos.Start < 0 {
202		return
203	}
204	r.pos.Stop = r.sourceLength
205	for i := r.pos.Start; i < r.sourceLength; i++ {
206		c := r.source[i]
207		if c == '\n' {
208			r.pos.Stop = i + 1
209			break
210		}
211	}
212	r.line++
213	r.pos.Padding = 0
214}
215
216func (r *reader) Position() (int, Segment) {
217	return r.line, r.pos
218}
219
220func (r *reader) SetPosition(line int, pos Segment) {
221	r.lineOffset = -1
222	r.line = line
223	r.pos = pos
224}
225
226func (r *reader) SetPadding(v int) {
227	r.pos.Padding = v
228}
229
230func (r *reader) SkipSpaces() (Segment, int, bool) {
231	return skipSpacesReader(r)
232}
233
234func (r *reader) SkipBlankLines() (Segment, int, bool) {
235	return skipBlankLinesReader(r)
236}
237
238func (r *reader) Match(reg *regexp.Regexp) bool {
239	return matchReader(r, reg)
240}
241
242func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
243	return findSubMatchReader(r, reg)
244}
245
246// A BlockReader interface is a reader that is optimized for Blocks.
247type BlockReader interface {
248	Reader
249	// Reset resets current state and sets new segments to the reader.
250	Reset(segment *Segments)
251}
252
253type blockReader struct {
254	source         []byte
255	segments       *Segments
256	segmentsLength int
257	line           int
258	pos            Segment
259	head           int
260	last           int
261	lineOffset     int
262}
263
264// NewBlockReader returns a new BlockReader.
265func NewBlockReader(source []byte, segments *Segments) BlockReader {
266	r := &blockReader{
267		source: source,
268	}
269	if segments != nil {
270		r.Reset(segments)
271	}
272	return r
273}
274
275func (r *blockReader) ResetPosition() {
276	r.line = -1
277	r.head = 0
278	r.last = 0
279	r.lineOffset = -1
280	r.pos.Start = -1
281	r.pos.Stop = -1
282	r.pos.Padding = 0
283	if r.segmentsLength > 0 {
284		last := r.segments.At(r.segmentsLength - 1)
285		r.last = last.Stop
286	}
287	r.AdvanceLine()
288}
289
290func (r *blockReader) Reset(segments *Segments) {
291	r.segments = segments
292	r.segmentsLength = segments.Len()
293	r.ResetPosition()
294}
295
296func (r *blockReader) Source() []byte {
297	return r.source
298}
299
300func (r *blockReader) Value(seg Segment) []byte {
301	line := r.segmentsLength - 1
302	ret := make([]byte, 0, seg.Stop-seg.Start+1)
303	for ; line >= 0; line-- {
304		if seg.Start >= r.segments.At(line).Start {
305			break
306		}
307	}
308	i := seg.Start
309	for ; line < r.segmentsLength; line++ {
310		s := r.segments.At(line)
311		if i < 0 {
312			i = s.Start
313		}
314		ret = s.ConcatPadding(ret)
315		for ; i < seg.Stop && i < s.Stop; i++ {
316			ret = append(ret, r.source[i])
317		}
318		i = -1
319		if s.Stop > seg.Stop {
320			break
321		}
322	}
323	return ret
324}
325
326// io.RuneReader interface
327func (r *blockReader) ReadRune() (rune, int, error) {
328	return readRuneReader(r)
329}
330
331func (r *blockReader) PrecendingCharacter() rune {
332	if r.pos.Padding != 0 {
333		return rune(' ')
334	}
335	if r.pos.Start <= 0 {
336		return rune('\n')
337	}
338	l := len(r.source)
339	i := r.pos.Start - 1
340	for ; i < l && i >= 0; i-- {
341		if utf8.RuneStart(r.source[i]) {
342			break
343		}
344	}
345	if i < 0 || i >= l {
346		return rune('\n')
347	}
348	rn, _ := utf8.DecodeRune(r.source[i:])
349	return rn
350}
351
352func (r *blockReader) LineOffset() int {
353	if r.lineOffset < 0 {
354		v := 0
355		for i := r.head; i < r.pos.Start; i++ {
356			if r.source[i] == '\t' {
357				v += util.TabWidth(v)
358			} else {
359				v++
360			}
361		}
362		r.lineOffset = v - r.pos.Padding
363	}
364	return r.lineOffset
365}
366
367func (r *blockReader) Peek() byte {
368	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
369		if r.pos.Padding != 0 {
370			return space[0]
371		}
372		return r.source[r.pos.Start]
373	}
374	return EOF
375}
376
377func (r *blockReader) PeekLine() ([]byte, Segment) {
378	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
379		return r.pos.Value(r.source), r.pos
380	}
381	return nil, r.pos
382}
383
384func (r *blockReader) Advance(n int) {
385	r.lineOffset = -1
386
387	if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
388		r.pos.Start += n
389		return
390	}
391
392	for ; n > 0; n-- {
393		if r.pos.Padding != 0 {
394			r.pos.Padding--
395			continue
396		}
397		if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
398			r.AdvanceLine()
399			continue
400		}
401		r.pos.Start++
402	}
403}
404
405func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
406	r.Advance(n)
407	if padding > r.pos.Padding {
408		r.SetPadding(padding)
409	}
410}
411
412func (r *blockReader) AdvanceLine() {
413	r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
414	r.head = r.pos.Start
415}
416
417func (r *blockReader) Position() (int, Segment) {
418	return r.line, r.pos
419}
420
421func (r *blockReader) SetPosition(line int, pos Segment) {
422	r.lineOffset = -1
423	r.line = line
424	if pos.Start == invalidValue {
425		if r.line < r.segmentsLength {
426			s := r.segments.At(line)
427			r.head = s.Start
428			r.pos = s
429		}
430	} else {
431		r.pos = pos
432		if r.line < r.segmentsLength {
433			s := r.segments.At(line)
434			r.head = s.Start
435		}
436	}
437}
438
439func (r *blockReader) SetPadding(v int) {
440	r.lineOffset = -1
441	r.pos.Padding = v
442}
443
444func (r *blockReader) SkipSpaces() (Segment, int, bool) {
445	return skipSpacesReader(r)
446}
447
448func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
449	return skipBlankLinesReader(r)
450}
451
452func (r *blockReader) Match(reg *regexp.Regexp) bool {
453	return matchReader(r, reg)
454}
455
456func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
457	return findSubMatchReader(r, reg)
458}
459
460func skipBlankLinesReader(r Reader) (Segment, int, bool) {
461	lines := 0
462	for {
463		line, seg := r.PeekLine()
464		if line == nil {
465			return seg, lines, false
466		}
467		if util.IsBlank(line) {
468			lines++
469			r.AdvanceLine()
470		} else {
471			return seg, lines, true
472		}
473	}
474}
475
476func skipSpacesReader(r Reader) (Segment, int, bool) {
477	chars := 0
478	for {
479		line, segment := r.PeekLine()
480		if line == nil {
481			return segment, chars, false
482		}
483		for i, c := range line {
484			if util.IsSpace(c) {
485				chars++
486				r.Advance(1)
487				continue
488			}
489			return segment.WithStart(segment.Start + i + 1), chars, true
490		}
491	}
492}
493
494func matchReader(r Reader, reg *regexp.Regexp) bool {
495	oldline, oldseg := r.Position()
496	match := reg.FindReaderSubmatchIndex(r)
497	r.SetPosition(oldline, oldseg)
498	if match == nil {
499		return false
500	}
501	r.Advance(match[1] - match[0])
502	return true
503}
504
505func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
506	oldline, oldseg := r.Position()
507	match := reg.FindReaderSubmatchIndex(r)
508	r.SetPosition(oldline, oldseg)
509	if match == nil {
510		return nil
511	}
512	runes := make([]rune, 0, match[1]-match[0])
513	for i := 0; i < match[1]; {
514		r, size, _ := readRuneReader(r)
515		i += size
516		runes = append(runes, r)
517	}
518	result := [][]byte{}
519	for i := 0; i < len(match); i += 2 {
520		result = append(result, []byte(string(runes[match[i]:match[i+1]])))
521	}
522
523	r.SetPosition(oldline, oldseg)
524	r.Advance(match[1] - match[0])
525	return result
526}
527
528func readRuneReader(r Reader) (rune, int, error) {
529	line, _ := r.PeekLine()
530	if line == nil {
531		return 0, 0, io.EOF
532	}
533	rn, size := utf8.DecodeRune(line)
534	if rn == utf8.RuneError {
535		return 0, 0, io.EOF
536	}
537	r.Advance(size)
538	return rn, size, nil
539}
540