1package text
2
3import (
4	"io"
5	"regexp"
6	"unicode/utf8"
7
8	"github.com/yuin/goldmark/util"
9)
10
11const invalidValue = -1
12
13// EOF indicates the end of file.
14const EOF = byte(0xff)
15
16// A Reader interface provides abstracted method for reading text.
17type Reader interface {
18	io.RuneReader
19
20	// Source returns a source of the reader.
21	Source() []byte
22
23	// ResetPosition resets positions.
24	ResetPosition()
25
26	// Peek returns a byte at current position without advancing the internal pointer.
27	Peek() byte
28
29	// PeekLine returns the current line without advancing the internal pointer.
30	PeekLine() ([]byte, Segment)
31
32	// PrecendingCharacter returns a character just before current internal pointer.
33	PrecendingCharacter() rune
34
35	// Value returns a value of the given segment.
36	Value(Segment) []byte
37
38	// LineOffset returns a distance from the line head to current position.
39	LineOffset() int
40
41	// Position returns current line number and position.
42	Position() (int, Segment)
43
44	// SetPosition sets current line number and position.
45	SetPosition(int, Segment)
46
47	// SetPadding sets padding to the reader.
48	SetPadding(int)
49
50	// Advance advances the internal pointer.
51	Advance(int)
52
53	// AdvanceAndSetPadding advances the internal pointer and add padding to the
54	// reader.
55	AdvanceAndSetPadding(int, int)
56
57	// AdvanceLine advances the internal pointer to the next line head.
58	AdvanceLine()
59
60	// SkipSpaces skips space characters and returns a non-blank line.
61	// If it reaches EOF, returns false.
62	SkipSpaces() (Segment, int, bool)
63
64	// SkipSpaces skips blank lines and returns a non-blank line.
65	// If it reaches EOF, returns false.
66	SkipBlankLines() (Segment, int, bool)
67
68	// Match performs regular expression matching to current line.
69	Match(reg *regexp.Regexp) bool
70
71	// Match performs regular expression searching to current line.
72	FindSubMatch(reg *regexp.Regexp) [][]byte
73}
74
75type reader struct {
76	source       []byte
77	sourceLength int
78	line         int
79	peekedLine   []byte
80	pos          Segment
81	head         int
82	lineOffset   int
83}
84
85// NewReader return a new Reader that can read UTF-8 bytes .
86func NewReader(source []byte) Reader {
87	r := &reader{
88		source:       source,
89		sourceLength: len(source),
90	}
91	r.ResetPosition()
92	return r
93}
94
95func (r *reader) ResetPosition() {
96	r.line = -1
97	r.head = 0
98	r.lineOffset = -1
99	r.AdvanceLine()
100}
101
102func (r *reader) Source() []byte {
103	return r.source
104}
105
106func (r *reader) Value(seg Segment) []byte {
107	return seg.Value(r.source)
108}
109
110func (r *reader) Peek() byte {
111	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
112		if r.pos.Padding != 0 {
113			return space[0]
114		}
115		return r.source[r.pos.Start]
116	}
117	return EOF
118}
119
120func (r *reader) PeekLine() ([]byte, Segment) {
121	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
122		if r.peekedLine == nil {
123			r.peekedLine = r.pos.Value(r.Source())
124		}
125		return r.peekedLine, r.pos
126	}
127	return nil, r.pos
128}
129
130// io.RuneReader interface
131func (r *reader) ReadRune() (rune, int, error) {
132	return readRuneReader(r)
133}
134
135func (r *reader) LineOffset() int {
136	if r.lineOffset < 0 {
137		v := 0
138		for i := r.head; i < r.pos.Start; i++ {
139			if r.source[i] == '\t' {
140				v += util.TabWidth(v)
141			} else {
142				v++
143			}
144		}
145		r.lineOffset = v - r.pos.Padding
146	}
147	return r.lineOffset
148}
149
150func (r *reader) PrecendingCharacter() rune {
151	if r.pos.Start <= 0 {
152		if r.pos.Padding != 0 {
153			return rune(' ')
154		}
155		return rune('\n')
156	}
157	i := r.pos.Start - 1
158	for ; i >= 0; i-- {
159		if utf8.RuneStart(r.source[i]) {
160			break
161		}
162	}
163	rn, _ := utf8.DecodeRune(r.source[i:])
164	return rn
165}
166
167func (r *reader) Advance(n int) {
168	r.lineOffset = -1
169	if n < len(r.peekedLine) && r.pos.Padding == 0 {
170		r.pos.Start += n
171		r.peekedLine = nil
172		return
173	}
174	r.peekedLine = nil
175	l := r.sourceLength
176	for ; n > 0 && r.pos.Start < l; n-- {
177		if r.pos.Padding != 0 {
178			r.pos.Padding--
179			continue
180		}
181		if r.source[r.pos.Start] == '\n' {
182			r.AdvanceLine()
183			continue
184		}
185		r.pos.Start++
186	}
187}
188
189func (r *reader) AdvanceAndSetPadding(n, padding int) {
190	r.Advance(n)
191	if padding > r.pos.Padding {
192		r.SetPadding(padding)
193	}
194}
195
196func (r *reader) AdvanceLine() {
197	r.lineOffset = -1
198	r.peekedLine = nil
199	r.pos.Start = r.pos.Stop
200	r.head = r.pos.Start
201	if r.pos.Start < 0 {
202		return
203	}
204	r.pos.Stop = r.sourceLength
205	for i := r.pos.Start; i < r.sourceLength; i++ {
206		c := r.source[i]
207		if c == '\n' {
208			r.pos.Stop = i + 1
209			break
210		}
211	}
212	r.line++
213	r.pos.Padding = 0
214}
215
216func (r *reader) Position() (int, Segment) {
217	return r.line, r.pos
218}
219
220func (r *reader) SetPosition(line int, pos Segment) {
221	r.lineOffset = -1
222	r.line = line
223	r.pos = pos
224}
225
226func (r *reader) SetPadding(v int) {
227	r.pos.Padding = v
228}
229
230func (r *reader) SkipSpaces() (Segment, int, bool) {
231	return skipSpacesReader(r)
232}
233
234func (r *reader) SkipBlankLines() (Segment, int, bool) {
235	return skipBlankLinesReader(r)
236}
237
238func (r *reader) Match(reg *regexp.Regexp) bool {
239	return matchReader(r, reg)
240}
241
242func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
243	return findSubMatchReader(r, reg)
244}
245
246// A BlockReader interface is a reader that is optimized for Blocks.
247type BlockReader interface {
248	Reader
249	// Reset resets current state and sets new segments to the reader.
250	Reset(segment *Segments)
251}
252
253type blockReader struct {
254	source         []byte
255	segments       *Segments
256	segmentsLength int
257	line           int
258	pos            Segment
259	head           int
260	last           int
261	lineOffset     int
262}
263
264// NewBlockReader returns a new BlockReader.
265func NewBlockReader(source []byte, segments *Segments) BlockReader {
266	r := &blockReader{
267		source: source,
268	}
269	if segments != nil {
270		r.Reset(segments)
271	}
272	return r
273}
274
275func (r *blockReader) ResetPosition() {
276	r.line = -1
277	r.head = 0
278	r.last = 0
279	r.lineOffset = -1
280	r.pos.Start = -1
281	r.pos.Stop = -1
282	r.pos.Padding = 0
283	if r.segmentsLength > 0 {
284		last := r.segments.At(r.segmentsLength - 1)
285		r.last = last.Stop
286	}
287	r.AdvanceLine()
288}
289
290func (r *blockReader) Reset(segments *Segments) {
291	r.segments = segments
292	r.segmentsLength = segments.Len()
293	r.ResetPosition()
294}
295
296func (r *blockReader) Source() []byte {
297	return r.source
298}
299
300func (r *blockReader) Value(seg Segment) []byte {
301	line := r.segmentsLength - 1
302	ret := make([]byte, 0, seg.Stop-seg.Start+1)
303	for ; line >= 0; line-- {
304		if seg.Start >= r.segments.At(line).Start {
305			break
306		}
307	}
308	i := seg.Start
309	for ; line < r.segmentsLength; line++ {
310		s := r.segments.At(line)
311		if i < 0 {
312			i = s.Start
313		}
314		ret = s.ConcatPadding(ret)
315		for ; i < seg.Stop && i < s.Stop; i++ {
316			ret = append(ret, r.source[i])
317		}
318		i = -1
319		if s.Stop > seg.Stop {
320			break
321		}
322	}
323	return ret
324}
325
326// io.RuneReader interface
327func (r *blockReader) ReadRune() (rune, int, error) {
328	return readRuneReader(r)
329}
330
331func (r *blockReader) PrecendingCharacter() rune {
332	if r.pos.Padding != 0 {
333		return rune(' ')
334	}
335	if r.segments.Len() < 1 {
336		return rune('\n')
337	}
338	firstSegment := r.segments.At(0)
339	if r.line == 0 && r.pos.Start <= firstSegment.Start {
340		return rune('\n')
341	}
342	l := len(r.source)
343	i := r.pos.Start - 1
344	for ; i < l && i >= 0; i-- {
345		if utf8.RuneStart(r.source[i]) {
346			break
347		}
348	}
349	if i < 0 || i >= l {
350		return rune('\n')
351	}
352	rn, _ := utf8.DecodeRune(r.source[i:])
353	return rn
354}
355
356func (r *blockReader) LineOffset() int {
357	if r.lineOffset < 0 {
358		v := 0
359		for i := r.head; i < r.pos.Start; i++ {
360			if r.source[i] == '\t' {
361				v += util.TabWidth(v)
362			} else {
363				v++
364			}
365		}
366		r.lineOffset = v - r.pos.Padding
367	}
368	return r.lineOffset
369}
370
371func (r *blockReader) Peek() byte {
372	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
373		if r.pos.Padding != 0 {
374			return space[0]
375		}
376		return r.source[r.pos.Start]
377	}
378	return EOF
379}
380
381func (r *blockReader) PeekLine() ([]byte, Segment) {
382	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
383		return r.pos.Value(r.source), r.pos
384	}
385	return nil, r.pos
386}
387
388func (r *blockReader) Advance(n int) {
389	r.lineOffset = -1
390
391	if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
392		r.pos.Start += n
393		return
394	}
395
396	for ; n > 0; n-- {
397		if r.pos.Padding != 0 {
398			r.pos.Padding--
399			continue
400		}
401		if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
402			r.AdvanceLine()
403			continue
404		}
405		r.pos.Start++
406	}
407}
408
409func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
410	r.Advance(n)
411	if padding > r.pos.Padding {
412		r.SetPadding(padding)
413	}
414}
415
416func (r *blockReader) AdvanceLine() {
417	r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
418	r.head = r.pos.Start
419}
420
421func (r *blockReader) Position() (int, Segment) {
422	return r.line, r.pos
423}
424
425func (r *blockReader) SetPosition(line int, pos Segment) {
426	r.lineOffset = -1
427	r.line = line
428	if pos.Start == invalidValue {
429		if r.line < r.segmentsLength {
430			s := r.segments.At(line)
431			r.head = s.Start
432			r.pos = s
433		}
434	} else {
435		r.pos = pos
436		if r.line < r.segmentsLength {
437			s := r.segments.At(line)
438			r.head = s.Start
439		}
440	}
441}
442
443func (r *blockReader) SetPadding(v int) {
444	r.lineOffset = -1
445	r.pos.Padding = v
446}
447
448func (r *blockReader) SkipSpaces() (Segment, int, bool) {
449	return skipSpacesReader(r)
450}
451
452func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
453	return skipBlankLinesReader(r)
454}
455
456func (r *blockReader) Match(reg *regexp.Regexp) bool {
457	return matchReader(r, reg)
458}
459
460func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
461	return findSubMatchReader(r, reg)
462}
463
464func skipBlankLinesReader(r Reader) (Segment, int, bool) {
465	lines := 0
466	for {
467		line, seg := r.PeekLine()
468		if line == nil {
469			return seg, lines, false
470		}
471		if util.IsBlank(line) {
472			lines++
473			r.AdvanceLine()
474		} else {
475			return seg, lines, true
476		}
477	}
478}
479
480func skipSpacesReader(r Reader) (Segment, int, bool) {
481	chars := 0
482	for {
483		line, segment := r.PeekLine()
484		if line == nil {
485			return segment, chars, false
486		}
487		for i, c := range line {
488			if util.IsSpace(c) {
489				chars++
490				r.Advance(1)
491				continue
492			}
493			return segment.WithStart(segment.Start + i + 1), chars, true
494		}
495	}
496}
497
498func matchReader(r Reader, reg *regexp.Regexp) bool {
499	oldline, oldseg := r.Position()
500	match := reg.FindReaderSubmatchIndex(r)
501	r.SetPosition(oldline, oldseg)
502	if match == nil {
503		return false
504	}
505	r.Advance(match[1] - match[0])
506	return true
507}
508
509func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
510	oldline, oldseg := r.Position()
511	match := reg.FindReaderSubmatchIndex(r)
512	r.SetPosition(oldline, oldseg)
513	if match == nil {
514		return nil
515	}
516	runes := make([]rune, 0, match[1]-match[0])
517	for i := 0; i < match[1]; {
518		r, size, _ := readRuneReader(r)
519		i += size
520		runes = append(runes, r)
521	}
522	result := [][]byte{}
523	for i := 0; i < len(match); i += 2 {
524		result = append(result, []byte(string(runes[match[i]:match[i+1]])))
525	}
526
527	r.SetPosition(oldline, oldseg)
528	r.Advance(match[1] - match[0])
529	return result
530}
531
532func readRuneReader(r Reader) (rune, int, error) {
533	line, _ := r.PeekLine()
534	if line == nil {
535		return 0, 0, io.EOF
536	}
537	rn, size := utf8.DecodeRune(line)
538	if rn == utf8.RuneError {
539		return 0, 0, io.EOF
540	}
541	r.Advance(size)
542	return rn, size, nil
543}
544