1package text 2 3import ( 4 "io" 5 "regexp" 6 "unicode/utf8" 7 8 "github.com/yuin/goldmark/util" 9) 10 11const invalidValue = -1 12 13// EOF indicates the end of file. 14const EOF = byte(0xff) 15 16// A Reader interface provides abstracted method for reading text. 17type Reader interface { 18 io.RuneReader 19 20 // Source returns a source of the reader. 21 Source() []byte 22 23 // ResetPosition resets positions. 24 ResetPosition() 25 26 // Peek returns a byte at current position without advancing the internal pointer. 27 Peek() byte 28 29 // PeekLine returns the current line without advancing the internal pointer. 30 PeekLine() ([]byte, Segment) 31 32 // PrecendingCharacter returns a character just before current internal pointer. 33 PrecendingCharacter() rune 34 35 // Value returns a value of the given segment. 36 Value(Segment) []byte 37 38 // LineOffset returns a distance from the line head to current position. 39 LineOffset() int 40 41 // Position returns current line number and position. 42 Position() (int, Segment) 43 44 // SetPosition sets current line number and position. 45 SetPosition(int, Segment) 46 47 // SetPadding sets padding to the reader. 48 SetPadding(int) 49 50 // Advance advances the internal pointer. 51 Advance(int) 52 53 // AdvanceAndSetPadding advances the internal pointer and add padding to the 54 // reader. 55 AdvanceAndSetPadding(int, int) 56 57 // AdvanceLine advances the internal pointer to the next line head. 58 AdvanceLine() 59 60 // SkipSpaces skips space characters and returns a non-blank line. 61 // If it reaches EOF, returns false. 62 SkipSpaces() (Segment, int, bool) 63 64 // SkipSpaces skips blank lines and returns a non-blank line. 65 // If it reaches EOF, returns false. 66 SkipBlankLines() (Segment, int, bool) 67 68 // Match performs regular expression matching to current line. 69 Match(reg *regexp.Regexp) bool 70 71 // Match performs regular expression searching to current line. 72 FindSubMatch(reg *regexp.Regexp) [][]byte 73} 74 75type reader struct { 76 source []byte 77 sourceLength int 78 line int 79 peekedLine []byte 80 pos Segment 81 head int 82 lineOffset int 83} 84 85// NewReader return a new Reader that can read UTF-8 bytes . 86func NewReader(source []byte) Reader { 87 r := &reader{ 88 source: source, 89 sourceLength: len(source), 90 } 91 r.ResetPosition() 92 return r 93} 94 95func (r *reader) ResetPosition() { 96 r.line = -1 97 r.head = 0 98 r.lineOffset = -1 99 r.AdvanceLine() 100} 101 102func (r *reader) Source() []byte { 103 return r.source 104} 105 106func (r *reader) Value(seg Segment) []byte { 107 return seg.Value(r.source) 108} 109 110func (r *reader) Peek() byte { 111 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength { 112 if r.pos.Padding != 0 { 113 return space[0] 114 } 115 return r.source[r.pos.Start] 116 } 117 return EOF 118} 119 120func (r *reader) PeekLine() ([]byte, Segment) { 121 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength { 122 if r.peekedLine == nil { 123 r.peekedLine = r.pos.Value(r.Source()) 124 } 125 return r.peekedLine, r.pos 126 } 127 return nil, r.pos 128} 129 130// io.RuneReader interface 131func (r *reader) ReadRune() (rune, int, error) { 132 return readRuneReader(r) 133} 134 135func (r *reader) LineOffset() int { 136 if r.lineOffset < 0 { 137 v := 0 138 for i := r.head; i < r.pos.Start; i++ { 139 if r.source[i] == '\t' { 140 v += util.TabWidth(v) 141 } else { 142 v++ 143 } 144 } 145 r.lineOffset = v - r.pos.Padding 146 } 147 return r.lineOffset 148} 149 150func (r *reader) PrecendingCharacter() rune { 151 if r.pos.Start <= 0 { 152 if r.pos.Padding != 0 { 153 return rune(' ') 154 } 155 return rune('\n') 156 } 157 i := r.pos.Start - 1 158 for ; i >= 0; i-- { 159 if utf8.RuneStart(r.source[i]) { 160 break 161 } 162 } 163 rn, _ := utf8.DecodeRune(r.source[i:]) 164 return rn 165} 166 167func (r *reader) Advance(n int) { 168 r.lineOffset = -1 169 if n < len(r.peekedLine) && r.pos.Padding == 0 { 170 r.pos.Start += n 171 r.peekedLine = nil 172 return 173 } 174 r.peekedLine = nil 175 l := r.sourceLength 176 for ; n > 0 && r.pos.Start < l; n-- { 177 if r.pos.Padding != 0 { 178 r.pos.Padding-- 179 continue 180 } 181 if r.source[r.pos.Start] == '\n' { 182 r.AdvanceLine() 183 continue 184 } 185 r.pos.Start++ 186 } 187} 188 189func (r *reader) AdvanceAndSetPadding(n, padding int) { 190 r.Advance(n) 191 if padding > r.pos.Padding { 192 r.SetPadding(padding) 193 } 194} 195 196func (r *reader) AdvanceLine() { 197 r.lineOffset = -1 198 r.peekedLine = nil 199 r.pos.Start = r.pos.Stop 200 r.head = r.pos.Start 201 if r.pos.Start < 0 { 202 return 203 } 204 r.pos.Stop = r.sourceLength 205 for i := r.pos.Start; i < r.sourceLength; i++ { 206 c := r.source[i] 207 if c == '\n' { 208 r.pos.Stop = i + 1 209 break 210 } 211 } 212 r.line++ 213 r.pos.Padding = 0 214} 215 216func (r *reader) Position() (int, Segment) { 217 return r.line, r.pos 218} 219 220func (r *reader) SetPosition(line int, pos Segment) { 221 r.lineOffset = -1 222 r.line = line 223 r.pos = pos 224} 225 226func (r *reader) SetPadding(v int) { 227 r.pos.Padding = v 228} 229 230func (r *reader) SkipSpaces() (Segment, int, bool) { 231 return skipSpacesReader(r) 232} 233 234func (r *reader) SkipBlankLines() (Segment, int, bool) { 235 return skipBlankLinesReader(r) 236} 237 238func (r *reader) Match(reg *regexp.Regexp) bool { 239 return matchReader(r, reg) 240} 241 242func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte { 243 return findSubMatchReader(r, reg) 244} 245 246// A BlockReader interface is a reader that is optimized for Blocks. 247type BlockReader interface { 248 Reader 249 // Reset resets current state and sets new segments to the reader. 250 Reset(segment *Segments) 251} 252 253type blockReader struct { 254 source []byte 255 segments *Segments 256 segmentsLength int 257 line int 258 pos Segment 259 head int 260 last int 261 lineOffset int 262} 263 264// NewBlockReader returns a new BlockReader. 265func NewBlockReader(source []byte, segments *Segments) BlockReader { 266 r := &blockReader{ 267 source: source, 268 } 269 if segments != nil { 270 r.Reset(segments) 271 } 272 return r 273} 274 275func (r *blockReader) ResetPosition() { 276 r.line = -1 277 r.head = 0 278 r.last = 0 279 r.lineOffset = -1 280 r.pos.Start = -1 281 r.pos.Stop = -1 282 r.pos.Padding = 0 283 if r.segmentsLength > 0 { 284 last := r.segments.At(r.segmentsLength - 1) 285 r.last = last.Stop 286 } 287 r.AdvanceLine() 288} 289 290func (r *blockReader) Reset(segments *Segments) { 291 r.segments = segments 292 r.segmentsLength = segments.Len() 293 r.ResetPosition() 294} 295 296func (r *blockReader) Source() []byte { 297 return r.source 298} 299 300func (r *blockReader) Value(seg Segment) []byte { 301 line := r.segmentsLength - 1 302 ret := make([]byte, 0, seg.Stop-seg.Start+1) 303 for ; line >= 0; line-- { 304 if seg.Start >= r.segments.At(line).Start { 305 break 306 } 307 } 308 i := seg.Start 309 for ; line < r.segmentsLength; line++ { 310 s := r.segments.At(line) 311 if i < 0 { 312 i = s.Start 313 } 314 ret = s.ConcatPadding(ret) 315 for ; i < seg.Stop && i < s.Stop; i++ { 316 ret = append(ret, r.source[i]) 317 } 318 i = -1 319 if s.Stop > seg.Stop { 320 break 321 } 322 } 323 return ret 324} 325 326// io.RuneReader interface 327func (r *blockReader) ReadRune() (rune, int, error) { 328 return readRuneReader(r) 329} 330 331func (r *blockReader) PrecendingCharacter() rune { 332 if r.pos.Padding != 0 { 333 return rune(' ') 334 } 335 if r.segments.Len() < 1 { 336 return rune('\n') 337 } 338 firstSegment := r.segments.At(0) 339 if r.line == 0 && r.pos.Start <= firstSegment.Start { 340 return rune('\n') 341 } 342 l := len(r.source) 343 i := r.pos.Start - 1 344 for ; i < l && i >= 0; i-- { 345 if utf8.RuneStart(r.source[i]) { 346 break 347 } 348 } 349 if i < 0 || i >= l { 350 return rune('\n') 351 } 352 rn, _ := utf8.DecodeRune(r.source[i:]) 353 return rn 354} 355 356func (r *blockReader) LineOffset() int { 357 if r.lineOffset < 0 { 358 v := 0 359 for i := r.head; i < r.pos.Start; i++ { 360 if r.source[i] == '\t' { 361 v += util.TabWidth(v) 362 } else { 363 v++ 364 } 365 } 366 r.lineOffset = v - r.pos.Padding 367 } 368 return r.lineOffset 369} 370 371func (r *blockReader) Peek() byte { 372 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last { 373 if r.pos.Padding != 0 { 374 return space[0] 375 } 376 return r.source[r.pos.Start] 377 } 378 return EOF 379} 380 381func (r *blockReader) PeekLine() ([]byte, Segment) { 382 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last { 383 return r.pos.Value(r.source), r.pos 384 } 385 return nil, r.pos 386} 387 388func (r *blockReader) Advance(n int) { 389 r.lineOffset = -1 390 391 if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 { 392 r.pos.Start += n 393 return 394 } 395 396 for ; n > 0; n-- { 397 if r.pos.Padding != 0 { 398 r.pos.Padding-- 399 continue 400 } 401 if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last { 402 r.AdvanceLine() 403 continue 404 } 405 r.pos.Start++ 406 } 407} 408 409func (r *blockReader) AdvanceAndSetPadding(n, padding int) { 410 r.Advance(n) 411 if padding > r.pos.Padding { 412 r.SetPadding(padding) 413 } 414} 415 416func (r *blockReader) AdvanceLine() { 417 r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue)) 418 r.head = r.pos.Start 419} 420 421func (r *blockReader) Position() (int, Segment) { 422 return r.line, r.pos 423} 424 425func (r *blockReader) SetPosition(line int, pos Segment) { 426 r.lineOffset = -1 427 r.line = line 428 if pos.Start == invalidValue { 429 if r.line < r.segmentsLength { 430 s := r.segments.At(line) 431 r.head = s.Start 432 r.pos = s 433 } 434 } else { 435 r.pos = pos 436 if r.line < r.segmentsLength { 437 s := r.segments.At(line) 438 r.head = s.Start 439 } 440 } 441} 442 443func (r *blockReader) SetPadding(v int) { 444 r.lineOffset = -1 445 r.pos.Padding = v 446} 447 448func (r *blockReader) SkipSpaces() (Segment, int, bool) { 449 return skipSpacesReader(r) 450} 451 452func (r *blockReader) SkipBlankLines() (Segment, int, bool) { 453 return skipBlankLinesReader(r) 454} 455 456func (r *blockReader) Match(reg *regexp.Regexp) bool { 457 return matchReader(r, reg) 458} 459 460func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte { 461 return findSubMatchReader(r, reg) 462} 463 464func skipBlankLinesReader(r Reader) (Segment, int, bool) { 465 lines := 0 466 for { 467 line, seg := r.PeekLine() 468 if line == nil { 469 return seg, lines, false 470 } 471 if util.IsBlank(line) { 472 lines++ 473 r.AdvanceLine() 474 } else { 475 return seg, lines, true 476 } 477 } 478} 479 480func skipSpacesReader(r Reader) (Segment, int, bool) { 481 chars := 0 482 for { 483 line, segment := r.PeekLine() 484 if line == nil { 485 return segment, chars, false 486 } 487 for i, c := range line { 488 if util.IsSpace(c) { 489 chars++ 490 r.Advance(1) 491 continue 492 } 493 return segment.WithStart(segment.Start + i + 1), chars, true 494 } 495 } 496} 497 498func matchReader(r Reader, reg *regexp.Regexp) bool { 499 oldline, oldseg := r.Position() 500 match := reg.FindReaderSubmatchIndex(r) 501 r.SetPosition(oldline, oldseg) 502 if match == nil { 503 return false 504 } 505 r.Advance(match[1] - match[0]) 506 return true 507} 508 509func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte { 510 oldline, oldseg := r.Position() 511 match := reg.FindReaderSubmatchIndex(r) 512 r.SetPosition(oldline, oldseg) 513 if match == nil { 514 return nil 515 } 516 runes := make([]rune, 0, match[1]-match[0]) 517 for i := 0; i < match[1]; { 518 r, size, _ := readRuneReader(r) 519 i += size 520 runes = append(runes, r) 521 } 522 result := [][]byte{} 523 for i := 0; i < len(match); i += 2 { 524 result = append(result, []byte(string(runes[match[i]:match[i+1]]))) 525 } 526 527 r.SetPosition(oldline, oldseg) 528 r.Advance(match[1] - match[0]) 529 return result 530} 531 532func readRuneReader(r Reader) (rune, int, error) { 533 line, _ := r.PeekLine() 534 if line == nil { 535 return 0, 0, io.EOF 536 } 537 rn, size := utf8.DecodeRune(line) 538 if rn == utf8.RuneError { 539 return 0, 0, io.EOF 540 } 541 r.Advance(size) 542 return rn, size, nil 543} 544