1package text
2
3import (
4	"io"
5	"regexp"
6	"unicode/utf8"
7
8	"github.com/yuin/goldmark/util"
9)
10
11const invalidValue = -1
12
13// EOF indicates the end of file.
14const EOF = byte(0xff)
15
16// A Reader interface provides abstracted method for reading text.
17type Reader interface {
18	io.RuneReader
19
20	// Source returns a source of the reader.
21	Source() []byte
22
23	// ResetPosition resets positions.
24	ResetPosition()
25
26	// Peek returns a byte at current position without advancing the internal pointer.
27	Peek() byte
28
29	// PeekLine returns the current line without advancing the internal pointer.
30	PeekLine() ([]byte, Segment)
31
32	// PrecendingCharacter returns a character just before current internal pointer.
33	PrecendingCharacter() rune
34
35	// Value returns a value of the given segment.
36	Value(Segment) []byte
37
38	// LineOffset returns a distance from the line head to current position.
39	LineOffset() int
40
41	// Position returns current line number and position.
42	Position() (int, Segment)
43
44	// SetPosition sets current line number and position.
45	SetPosition(int, Segment)
46
47	// SetPadding sets padding to the reader.
48	SetPadding(int)
49
50	// Advance advances the internal pointer.
51	Advance(int)
52
53	// AdvanceAndSetPadding advances the internal pointer and add padding to the
54	// reader.
55	AdvanceAndSetPadding(int, int)
56
57	// AdvanceLine advances the internal pointer to the next line head.
58	AdvanceLine()
59
60	// SkipSpaces skips space characters and returns a non-blank line.
61	// If it reaches EOF, returns false.
62	SkipSpaces() (Segment, int, bool)
63
64	// SkipSpaces skips blank lines and returns a non-blank line.
65	// If it reaches EOF, returns false.
66	SkipBlankLines() (Segment, int, bool)
67
68	// Match performs regular expression matching to current line.
69	Match(reg *regexp.Regexp) bool
70
71	// Match performs regular expression searching to current line.
72	FindSubMatch(reg *regexp.Regexp) [][]byte
73
74	// FindClosure finds corresponding closure.
75	FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
76}
77
78// FindClosureOptions is options for Reader.FindClosure
79type FindClosureOptions struct {
80	// CodeSpan is a flag for the FindClosure. If this is set to true,
81	// FindClosure ignores closers in codespans.
82	CodeSpan bool
83
84	// Nesting is a flag for the FindClosure. If this is set to true,
85	// FindClosure allows nesting.
86	Nesting bool
87
88	// Newline is a flag for the FindClosure. If this is set to true,
89	// FindClosure searches for a closer over multiple lines.
90	Newline bool
91
92	// Advance is a flag for the FindClosure. If this is set to true,
93	// FindClosure advances pointers when closer is found.
94	Advance bool
95}
96
97type reader struct {
98	source       []byte
99	sourceLength int
100	line         int
101	peekedLine   []byte
102	pos          Segment
103	head         int
104	lineOffset   int
105}
106
107// NewReader return a new Reader that can read UTF-8 bytes .
108func NewReader(source []byte) Reader {
109	r := &reader{
110		source:       source,
111		sourceLength: len(source),
112	}
113	r.ResetPosition()
114	return r
115}
116
117func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
118	return findClosureReader(r, opener, closer, options)
119}
120
121func (r *reader) ResetPosition() {
122	r.line = -1
123	r.head = 0
124	r.lineOffset = -1
125	r.AdvanceLine()
126}
127
128func (r *reader) Source() []byte {
129	return r.source
130}
131
132func (r *reader) Value(seg Segment) []byte {
133	return seg.Value(r.source)
134}
135
136func (r *reader) Peek() byte {
137	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
138		if r.pos.Padding != 0 {
139			return space[0]
140		}
141		return r.source[r.pos.Start]
142	}
143	return EOF
144}
145
146func (r *reader) PeekLine() ([]byte, Segment) {
147	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
148		if r.peekedLine == nil {
149			r.peekedLine = r.pos.Value(r.Source())
150		}
151		return r.peekedLine, r.pos
152	}
153	return nil, r.pos
154}
155
156// io.RuneReader interface
157func (r *reader) ReadRune() (rune, int, error) {
158	return readRuneReader(r)
159}
160
161func (r *reader) LineOffset() int {
162	if r.lineOffset < 0 {
163		v := 0
164		for i := r.head; i < r.pos.Start; i++ {
165			if r.source[i] == '\t' {
166				v += util.TabWidth(v)
167			} else {
168				v++
169			}
170		}
171		r.lineOffset = v - r.pos.Padding
172	}
173	return r.lineOffset
174}
175
176func (r *reader) PrecendingCharacter() rune {
177	if r.pos.Start <= 0 {
178		if r.pos.Padding != 0 {
179			return rune(' ')
180		}
181		return rune('\n')
182	}
183	i := r.pos.Start - 1
184	for ; i >= 0; i-- {
185		if utf8.RuneStart(r.source[i]) {
186			break
187		}
188	}
189	rn, _ := utf8.DecodeRune(r.source[i:])
190	return rn
191}
192
193func (r *reader) Advance(n int) {
194	r.lineOffset = -1
195	if n < len(r.peekedLine) && r.pos.Padding == 0 {
196		r.pos.Start += n
197		r.peekedLine = nil
198		return
199	}
200	r.peekedLine = nil
201	l := r.sourceLength
202	for ; n > 0 && r.pos.Start < l; n-- {
203		if r.pos.Padding != 0 {
204			r.pos.Padding--
205			continue
206		}
207		if r.source[r.pos.Start] == '\n' {
208			r.AdvanceLine()
209			continue
210		}
211		r.pos.Start++
212	}
213}
214
215func (r *reader) AdvanceAndSetPadding(n, padding int) {
216	r.Advance(n)
217	if padding > r.pos.Padding {
218		r.SetPadding(padding)
219	}
220}
221
222func (r *reader) AdvanceLine() {
223	r.lineOffset = -1
224	r.peekedLine = nil
225	r.pos.Start = r.pos.Stop
226	r.head = r.pos.Start
227	if r.pos.Start < 0 {
228		return
229	}
230	r.pos.Stop = r.sourceLength
231	for i := r.pos.Start; i < r.sourceLength; i++ {
232		c := r.source[i]
233		if c == '\n' {
234			r.pos.Stop = i + 1
235			break
236		}
237	}
238	r.line++
239	r.pos.Padding = 0
240}
241
242func (r *reader) Position() (int, Segment) {
243	return r.line, r.pos
244}
245
246func (r *reader) SetPosition(line int, pos Segment) {
247	r.lineOffset = -1
248	r.line = line
249	r.pos = pos
250}
251
252func (r *reader) SetPadding(v int) {
253	r.pos.Padding = v
254}
255
256func (r *reader) SkipSpaces() (Segment, int, bool) {
257	return skipSpacesReader(r)
258}
259
260func (r *reader) SkipBlankLines() (Segment, int, bool) {
261	return skipBlankLinesReader(r)
262}
263
264func (r *reader) Match(reg *regexp.Regexp) bool {
265	return matchReader(r, reg)
266}
267
268func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
269	return findSubMatchReader(r, reg)
270}
271
272// A BlockReader interface is a reader that is optimized for Blocks.
273type BlockReader interface {
274	Reader
275	// Reset resets current state and sets new segments to the reader.
276	Reset(segment *Segments)
277}
278
279type blockReader struct {
280	source         []byte
281	segments       *Segments
282	segmentsLength int
283	line           int
284	pos            Segment
285	head           int
286	last           int
287	lineOffset     int
288}
289
290// NewBlockReader returns a new BlockReader.
291func NewBlockReader(source []byte, segments *Segments) BlockReader {
292	r := &blockReader{
293		source: source,
294	}
295	if segments != nil {
296		r.Reset(segments)
297	}
298	return r
299}
300
301func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
302	return findClosureReader(r, opener, closer, options)
303}
304
305func (r *blockReader) ResetPosition() {
306	r.line = -1
307	r.head = 0
308	r.last = 0
309	r.lineOffset = -1
310	r.pos.Start = -1
311	r.pos.Stop = -1
312	r.pos.Padding = 0
313	if r.segmentsLength > 0 {
314		last := r.segments.At(r.segmentsLength - 1)
315		r.last = last.Stop
316	}
317	r.AdvanceLine()
318}
319
320func (r *blockReader) Reset(segments *Segments) {
321	r.segments = segments
322	r.segmentsLength = segments.Len()
323	r.ResetPosition()
324}
325
326func (r *blockReader) Source() []byte {
327	return r.source
328}
329
330func (r *blockReader) Value(seg Segment) []byte {
331	line := r.segmentsLength - 1
332	ret := make([]byte, 0, seg.Stop-seg.Start+1)
333	for ; line >= 0; line-- {
334		if seg.Start >= r.segments.At(line).Start {
335			break
336		}
337	}
338	i := seg.Start
339	for ; line < r.segmentsLength; line++ {
340		s := r.segments.At(line)
341		if i < 0 {
342			i = s.Start
343		}
344		ret = s.ConcatPadding(ret)
345		for ; i < seg.Stop && i < s.Stop; i++ {
346			ret = append(ret, r.source[i])
347		}
348		i = -1
349		if s.Stop > seg.Stop {
350			break
351		}
352	}
353	return ret
354}
355
356// io.RuneReader interface
357func (r *blockReader) ReadRune() (rune, int, error) {
358	return readRuneReader(r)
359}
360
361func (r *blockReader) PrecendingCharacter() rune {
362	if r.pos.Padding != 0 {
363		return rune(' ')
364	}
365	if r.segments.Len() < 1 {
366		return rune('\n')
367	}
368	firstSegment := r.segments.At(0)
369	if r.line == 0 && r.pos.Start <= firstSegment.Start {
370		return rune('\n')
371	}
372	l := len(r.source)
373	i := r.pos.Start - 1
374	for ; i < l && i >= 0; i-- {
375		if utf8.RuneStart(r.source[i]) {
376			break
377		}
378	}
379	if i < 0 || i >= l {
380		return rune('\n')
381	}
382	rn, _ := utf8.DecodeRune(r.source[i:])
383	return rn
384}
385
386func (r *blockReader) LineOffset() int {
387	if r.lineOffset < 0 {
388		v := 0
389		for i := r.head; i < r.pos.Start; i++ {
390			if r.source[i] == '\t' {
391				v += util.TabWidth(v)
392			} else {
393				v++
394			}
395		}
396		r.lineOffset = v - r.pos.Padding
397	}
398	return r.lineOffset
399}
400
401func (r *blockReader) Peek() byte {
402	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
403		if r.pos.Padding != 0 {
404			return space[0]
405		}
406		return r.source[r.pos.Start]
407	}
408	return EOF
409}
410
411func (r *blockReader) PeekLine() ([]byte, Segment) {
412	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
413		return r.pos.Value(r.source), r.pos
414	}
415	return nil, r.pos
416}
417
418func (r *blockReader) Advance(n int) {
419	r.lineOffset = -1
420
421	if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
422		r.pos.Start += n
423		return
424	}
425
426	for ; n > 0; n-- {
427		if r.pos.Padding != 0 {
428			r.pos.Padding--
429			continue
430		}
431		if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
432			r.AdvanceLine()
433			continue
434		}
435		r.pos.Start++
436	}
437}
438
439func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
440	r.Advance(n)
441	if padding > r.pos.Padding {
442		r.SetPadding(padding)
443	}
444}
445
446func (r *blockReader) AdvanceLine() {
447	r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
448	r.head = r.pos.Start
449}
450
451func (r *blockReader) Position() (int, Segment) {
452	return r.line, r.pos
453}
454
455func (r *blockReader) SetPosition(line int, pos Segment) {
456	r.lineOffset = -1
457	r.line = line
458	if pos.Start == invalidValue {
459		if r.line < r.segmentsLength {
460			s := r.segments.At(line)
461			r.head = s.Start
462			r.pos = s
463		}
464	} else {
465		r.pos = pos
466		if r.line < r.segmentsLength {
467			s := r.segments.At(line)
468			r.head = s.Start
469		}
470	}
471}
472
473func (r *blockReader) SetPadding(v int) {
474	r.lineOffset = -1
475	r.pos.Padding = v
476}
477
478func (r *blockReader) SkipSpaces() (Segment, int, bool) {
479	return skipSpacesReader(r)
480}
481
482func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
483	return skipBlankLinesReader(r)
484}
485
486func (r *blockReader) Match(reg *regexp.Regexp) bool {
487	return matchReader(r, reg)
488}
489
490func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
491	return findSubMatchReader(r, reg)
492}
493
494func skipBlankLinesReader(r Reader) (Segment, int, bool) {
495	lines := 0
496	for {
497		line, seg := r.PeekLine()
498		if line == nil {
499			return seg, lines, false
500		}
501		if util.IsBlank(line) {
502			lines++
503			r.AdvanceLine()
504		} else {
505			return seg, lines, true
506		}
507	}
508}
509
510func skipSpacesReader(r Reader) (Segment, int, bool) {
511	chars := 0
512	for {
513		line, segment := r.PeekLine()
514		if line == nil {
515			return segment, chars, false
516		}
517		for i, c := range line {
518			if util.IsSpace(c) {
519				chars++
520				r.Advance(1)
521				continue
522			}
523			return segment.WithStart(segment.Start + i + 1), chars, true
524		}
525	}
526}
527
528func matchReader(r Reader, reg *regexp.Regexp) bool {
529	oldline, oldseg := r.Position()
530	match := reg.FindReaderSubmatchIndex(r)
531	r.SetPosition(oldline, oldseg)
532	if match == nil {
533		return false
534	}
535	r.Advance(match[1] - match[0])
536	return true
537}
538
539func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
540	oldline, oldseg := r.Position()
541	match := reg.FindReaderSubmatchIndex(r)
542	r.SetPosition(oldline, oldseg)
543	if match == nil {
544		return nil
545	}
546	runes := make([]rune, 0, match[1]-match[0])
547	for i := 0; i < match[1]; {
548		r, size, _ := readRuneReader(r)
549		i += size
550		runes = append(runes, r)
551	}
552	result := [][]byte{}
553	for i := 0; i < len(match); i += 2 {
554		result = append(result, []byte(string(runes[match[i]:match[i+1]])))
555	}
556
557	r.SetPosition(oldline, oldseg)
558	r.Advance(match[1] - match[0])
559	return result
560}
561
562func readRuneReader(r Reader) (rune, int, error) {
563	line, _ := r.PeekLine()
564	if line == nil {
565		return 0, 0, io.EOF
566	}
567	rn, size := utf8.DecodeRune(line)
568	if rn == utf8.RuneError {
569		return 0, 0, io.EOF
570	}
571	r.Advance(size)
572	return rn, size, nil
573}
574
575func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
576	opened := 1
577	codeSpanOpener := 0
578	closed := false
579	orgline, orgpos := r.Position()
580	var ret *Segments
581
582	for {
583		bs, seg := r.PeekLine()
584		if bs == nil {
585			goto end
586		}
587		i := 0
588		for i < len(bs) {
589			c := bs[i]
590			if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
591				codeSpanCloser := 0
592				for ; i < len(bs); i++ {
593					if bs[i] == '`' {
594						codeSpanCloser++
595					} else {
596						i--
597						break
598					}
599				}
600				if codeSpanCloser == codeSpanOpener {
601					codeSpanOpener = 0
602				}
603			} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
604				i += 2
605				continue
606			} else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
607				for ; i < len(bs); i++ {
608					if bs[i] == '`' {
609						codeSpanOpener++
610					} else {
611						i--
612						break
613					}
614				}
615			} else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
616				if c == closer {
617					opened--
618					if opened == 0 {
619						if ret == nil {
620							ret = NewSegments()
621						}
622						ret.Append(seg.WithStop(seg.Start + i))
623						r.Advance(i + 1)
624						closed = true
625						goto end
626					}
627				} else if c == opener {
628					if !opts.Nesting {
629						goto end
630					}
631					opened++
632				}
633			}
634			i++
635		}
636		if !opts.Newline {
637			goto end
638		}
639		r.AdvanceLine()
640		if ret == nil {
641			ret = NewSegments()
642		}
643		ret.Append(seg)
644	}
645end:
646	if !opts.Advance {
647		r.SetPosition(orgline, orgpos)
648	}
649	if closed {
650		return ret, true
651	}
652	return nil, false
653}
654