1package text 2 3import ( 4 "io" 5 "regexp" 6 "unicode/utf8" 7 8 "github.com/yuin/goldmark/util" 9) 10 11const invalidValue = -1 12 13// EOF indicates the end of file. 14const EOF = byte(0xff) 15 16// A Reader interface provides abstracted method for reading text. 17type Reader interface { 18 io.RuneReader 19 20 // Source returns a source of the reader. 21 Source() []byte 22 23 // ResetPosition resets positions. 24 ResetPosition() 25 26 // Peek returns a byte at current position without advancing the internal pointer. 27 Peek() byte 28 29 // PeekLine returns the current line without advancing the internal pointer. 30 PeekLine() ([]byte, Segment) 31 32 // PrecendingCharacter returns a character just before current internal pointer. 33 PrecendingCharacter() rune 34 35 // Value returns a value of the given segment. 36 Value(Segment) []byte 37 38 // LineOffset returns a distance from the line head to current position. 39 LineOffset() int 40 41 // Position returns current line number and position. 42 Position() (int, Segment) 43 44 // SetPosition sets current line number and position. 45 SetPosition(int, Segment) 46 47 // SetPadding sets padding to the reader. 48 SetPadding(int) 49 50 // Advance advances the internal pointer. 51 Advance(int) 52 53 // AdvanceAndSetPadding advances the internal pointer and add padding to the 54 // reader. 55 AdvanceAndSetPadding(int, int) 56 57 // AdvanceLine advances the internal pointer to the next line head. 58 AdvanceLine() 59 60 // SkipSpaces skips space characters and returns a non-blank line. 61 // If it reaches EOF, returns false. 62 SkipSpaces() (Segment, int, bool) 63 64 // SkipSpaces skips blank lines and returns a non-blank line. 65 // If it reaches EOF, returns false. 66 SkipBlankLines() (Segment, int, bool) 67 68 // Match performs regular expression matching to current line. 69 Match(reg *regexp.Regexp) bool 70 71 // Match performs regular expression searching to current line. 72 FindSubMatch(reg *regexp.Regexp) [][]byte 73} 74 75type reader struct { 76 source []byte 77 sourceLength int 78 line int 79 peekedLine []byte 80 pos Segment 81 head int 82 lineOffset int 83} 84 85// NewReader return a new Reader that can read UTF-8 bytes . 86func NewReader(source []byte) Reader { 87 r := &reader{ 88 source: source, 89 sourceLength: len(source), 90 } 91 r.ResetPosition() 92 return r 93} 94 95func (r *reader) ResetPosition() { 96 r.line = -1 97 r.head = 0 98 r.lineOffset = -1 99 r.AdvanceLine() 100} 101 102func (r *reader) Source() []byte { 103 return r.source 104} 105 106func (r *reader) Value(seg Segment) []byte { 107 return seg.Value(r.source) 108} 109 110func (r *reader) Peek() byte { 111 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength { 112 if r.pos.Padding != 0 { 113 return space[0] 114 } 115 return r.source[r.pos.Start] 116 } 117 return EOF 118} 119 120func (r *reader) PeekLine() ([]byte, Segment) { 121 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength { 122 if r.peekedLine == nil { 123 r.peekedLine = r.pos.Value(r.Source()) 124 } 125 return r.peekedLine, r.pos 126 } 127 return nil, r.pos 128} 129 130// io.RuneReader interface 131func (r *reader) ReadRune() (rune, int, error) { 132 return readRuneReader(r) 133} 134 135func (r *reader) LineOffset() int { 136 if r.lineOffset < 0 { 137 v := 0 138 for i := r.head; i < r.pos.Start; i++ { 139 if r.source[i] == '\t' { 140 v += util.TabWidth(v) 141 } else { 142 v++ 143 } 144 } 145 r.lineOffset = v - r.pos.Padding 146 } 147 return r.lineOffset 148} 149 150func (r *reader) PrecendingCharacter() rune { 151 if r.pos.Start <= 0 { 152 if r.pos.Padding != 0 { 153 return rune(' ') 154 } 155 return rune('\n') 156 } 157 i := r.pos.Start - 1 158 for ; i >= 0; i-- { 159 if utf8.RuneStart(r.source[i]) { 160 break 161 } 162 } 163 rn, _ := utf8.DecodeRune(r.source[i:]) 164 return rn 165} 166 167func (r *reader) Advance(n int) { 168 r.lineOffset = -1 169 if n < len(r.peekedLine) && r.pos.Padding == 0 { 170 r.pos.Start += n 171 r.peekedLine = nil 172 return 173 } 174 r.peekedLine = nil 175 l := r.sourceLength 176 for ; n > 0 && r.pos.Start < l; n-- { 177 if r.pos.Padding != 0 { 178 r.pos.Padding-- 179 continue 180 } 181 if r.source[r.pos.Start] == '\n' { 182 r.AdvanceLine() 183 continue 184 } 185 r.pos.Start++ 186 } 187} 188 189func (r *reader) AdvanceAndSetPadding(n, padding int) { 190 r.Advance(n) 191 if padding > r.pos.Padding { 192 r.SetPadding(padding) 193 } 194} 195 196func (r *reader) AdvanceLine() { 197 r.lineOffset = -1 198 r.peekedLine = nil 199 r.pos.Start = r.pos.Stop 200 r.head = r.pos.Start 201 if r.pos.Start < 0 { 202 return 203 } 204 r.pos.Stop = r.sourceLength 205 for i := r.pos.Start; i < r.sourceLength; i++ { 206 c := r.source[i] 207 if c == '\n' { 208 r.pos.Stop = i + 1 209 break 210 } 211 } 212 r.line++ 213 r.pos.Padding = 0 214} 215 216func (r *reader) Position() (int, Segment) { 217 return r.line, r.pos 218} 219 220func (r *reader) SetPosition(line int, pos Segment) { 221 r.lineOffset = -1 222 r.line = line 223 r.pos = pos 224} 225 226func (r *reader) SetPadding(v int) { 227 r.pos.Padding = v 228} 229 230func (r *reader) SkipSpaces() (Segment, int, bool) { 231 return skipSpacesReader(r) 232} 233 234func (r *reader) SkipBlankLines() (Segment, int, bool) { 235 return skipBlankLinesReader(r) 236} 237 238func (r *reader) Match(reg *regexp.Regexp) bool { 239 return matchReader(r, reg) 240} 241 242func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte { 243 return findSubMatchReader(r, reg) 244} 245 246// A BlockReader interface is a reader that is optimized for Blocks. 247type BlockReader interface { 248 Reader 249 // Reset resets current state and sets new segments to the reader. 250 Reset(segment *Segments) 251} 252 253type blockReader struct { 254 source []byte 255 segments *Segments 256 segmentsLength int 257 line int 258 pos Segment 259 head int 260 last int 261 lineOffset int 262} 263 264// NewBlockReader returns a new BlockReader. 265func NewBlockReader(source []byte, segments *Segments) BlockReader { 266 r := &blockReader{ 267 source: source, 268 } 269 if segments != nil { 270 r.Reset(segments) 271 } 272 return r 273} 274 275func (r *blockReader) ResetPosition() { 276 r.line = -1 277 r.head = 0 278 r.last = 0 279 r.lineOffset = -1 280 r.pos.Start = -1 281 r.pos.Stop = -1 282 r.pos.Padding = 0 283 if r.segmentsLength > 0 { 284 last := r.segments.At(r.segmentsLength - 1) 285 r.last = last.Stop 286 } 287 r.AdvanceLine() 288} 289 290func (r *blockReader) Reset(segments *Segments) { 291 r.segments = segments 292 r.segmentsLength = segments.Len() 293 r.ResetPosition() 294} 295 296func (r *blockReader) Source() []byte { 297 return r.source 298} 299 300func (r *blockReader) Value(seg Segment) []byte { 301 line := r.segmentsLength - 1 302 ret := make([]byte, 0, seg.Stop-seg.Start+1) 303 for ; line >= 0; line-- { 304 if seg.Start >= r.segments.At(line).Start { 305 break 306 } 307 } 308 i := seg.Start 309 for ; line < r.segmentsLength; line++ { 310 s := r.segments.At(line) 311 if i < 0 { 312 i = s.Start 313 } 314 ret = s.ConcatPadding(ret) 315 for ; i < seg.Stop && i < s.Stop; i++ { 316 ret = append(ret, r.source[i]) 317 } 318 i = -1 319 if s.Stop > seg.Stop { 320 break 321 } 322 } 323 return ret 324} 325 326// io.RuneReader interface 327func (r *blockReader) ReadRune() (rune, int, error) { 328 return readRuneReader(r) 329} 330 331func (r *blockReader) PrecendingCharacter() rune { 332 if r.pos.Padding != 0 { 333 return rune(' ') 334 } 335 if r.pos.Start <= 0 { 336 return rune('\n') 337 } 338 l := len(r.source) 339 i := r.pos.Start - 1 340 for ; i < l && i >= 0; i-- { 341 if utf8.RuneStart(r.source[i]) { 342 break 343 } 344 } 345 if i < 0 || i >= l { 346 return rune('\n') 347 } 348 rn, _ := utf8.DecodeRune(r.source[i:]) 349 return rn 350} 351 352func (r *blockReader) LineOffset() int { 353 if r.lineOffset < 0 { 354 v := 0 355 for i := r.head; i < r.pos.Start; i++ { 356 if r.source[i] == '\t' { 357 v += util.TabWidth(v) 358 } else { 359 v++ 360 } 361 } 362 r.lineOffset = v - r.pos.Padding 363 } 364 return r.lineOffset 365} 366 367func (r *blockReader) Peek() byte { 368 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last { 369 if r.pos.Padding != 0 { 370 return space[0] 371 } 372 return r.source[r.pos.Start] 373 } 374 return EOF 375} 376 377func (r *blockReader) PeekLine() ([]byte, Segment) { 378 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last { 379 return r.pos.Value(r.source), r.pos 380 } 381 return nil, r.pos 382} 383 384func (r *blockReader) Advance(n int) { 385 r.lineOffset = -1 386 387 if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 { 388 r.pos.Start += n 389 return 390 } 391 392 for ; n > 0; n-- { 393 if r.pos.Padding != 0 { 394 r.pos.Padding-- 395 continue 396 } 397 if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last { 398 r.AdvanceLine() 399 continue 400 } 401 r.pos.Start++ 402 } 403} 404 405func (r *blockReader) AdvanceAndSetPadding(n, padding int) { 406 r.Advance(n) 407 if padding > r.pos.Padding { 408 r.SetPadding(padding) 409 } 410} 411 412func (r *blockReader) AdvanceLine() { 413 r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue)) 414 r.head = r.pos.Start 415} 416 417func (r *blockReader) Position() (int, Segment) { 418 return r.line, r.pos 419} 420 421func (r *blockReader) SetPosition(line int, pos Segment) { 422 r.lineOffset = -1 423 r.line = line 424 if pos.Start == invalidValue { 425 if r.line < r.segmentsLength { 426 s := r.segments.At(line) 427 r.head = s.Start 428 r.pos = s 429 } 430 } else { 431 r.pos = pos 432 if r.line < r.segmentsLength { 433 s := r.segments.At(line) 434 r.head = s.Start 435 } 436 } 437} 438 439func (r *blockReader) SetPadding(v int) { 440 r.lineOffset = -1 441 r.pos.Padding = v 442} 443 444func (r *blockReader) SkipSpaces() (Segment, int, bool) { 445 return skipSpacesReader(r) 446} 447 448func (r *blockReader) SkipBlankLines() (Segment, int, bool) { 449 return skipBlankLinesReader(r) 450} 451 452func (r *blockReader) Match(reg *regexp.Regexp) bool { 453 return matchReader(r, reg) 454} 455 456func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte { 457 return findSubMatchReader(r, reg) 458} 459 460func skipBlankLinesReader(r Reader) (Segment, int, bool) { 461 lines := 0 462 for { 463 line, seg := r.PeekLine() 464 if line == nil { 465 return seg, lines, false 466 } 467 if util.IsBlank(line) { 468 lines++ 469 r.AdvanceLine() 470 } else { 471 return seg, lines, true 472 } 473 } 474} 475 476func skipSpacesReader(r Reader) (Segment, int, bool) { 477 chars := 0 478 for { 479 line, segment := r.PeekLine() 480 if line == nil { 481 return segment, chars, false 482 } 483 for i, c := range line { 484 if util.IsSpace(c) { 485 chars++ 486 r.Advance(1) 487 continue 488 } 489 return segment.WithStart(segment.Start + i + 1), chars, true 490 } 491 } 492} 493 494func matchReader(r Reader, reg *regexp.Regexp) bool { 495 oldline, oldseg := r.Position() 496 match := reg.FindReaderSubmatchIndex(r) 497 r.SetPosition(oldline, oldseg) 498 if match == nil { 499 return false 500 } 501 r.Advance(match[1] - match[0]) 502 return true 503} 504 505func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte { 506 oldline, oldseg := r.Position() 507 match := reg.FindReaderSubmatchIndex(r) 508 r.SetPosition(oldline, oldseg) 509 if match == nil { 510 return nil 511 } 512 runes := make([]rune, 0, match[1]-match[0]) 513 for i := 0; i < match[1]; { 514 r, size, _ := readRuneReader(r) 515 i += size 516 runes = append(runes, r) 517 } 518 result := [][]byte{} 519 for i := 0; i < len(match); i += 2 { 520 result = append(result, []byte(string(runes[match[i]:match[i+1]]))) 521 } 522 523 r.SetPosition(oldline, oldseg) 524 r.Advance(match[1] - match[0]) 525 return result 526} 527 528func readRuneReader(r Reader) (rune, int, error) { 529 line, _ := r.PeekLine() 530 if line == nil { 531 return 0, 0, io.EOF 532 } 533 rn, size := utf8.DecodeRune(line) 534 if rn == utf8.RuneError { 535 return 0, 0, io.EOF 536 } 537 r.Advance(size) 538 return rn, size, nil 539} 540