1 // Copyright 2015 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //! Raw parser, for doing a single pass over input. 16 17 use scanners::*; 18 use utils; 19 use std::borrow::Cow; 20 use std::borrow::Cow::{Borrowed}; 21 use std::collections::{HashMap, HashSet}; 22 use std::cmp; 23 24 #[derive(PartialEq, Debug)] 25 enum State { 26 StartBlock, 27 InContainers, 28 Inline, 29 CodeLineStart, 30 Code, 31 InlineCode, 32 Literal, 33 } 34 35 #[derive(Copy, Clone, Debug)] 36 enum Container { 37 BlockQuote, 38 List(usize, u8), 39 ListItem(usize), 40 } 41 42 pub struct RawParser<'a> { 43 text: &'a str, 44 off: usize, 45 46 opts: Options, 47 active_tab: [u8; 256], 48 49 state: State, 50 stack: Vec<(Tag<'a>, usize, usize)>, 51 leading_space: usize, 52 53 containers: Vec<Container>, 54 last_line_was_empty: bool, 55 56 // state for code fences 57 fence_char: u8, 58 fence_count: usize, 59 fence_indent: usize, 60 61 // info, used in second pass 62 loose_lists: HashSet<usize>, // offset is at list marker 63 links: HashMap<String, (Cow<'a, str>, Cow<'a, str>)>, 64 } 65 66 pub struct ParseInfo<'a> { 67 pub loose_lists: HashSet<usize>, 68 pub links: HashMap<String, (Cow<'a, str>, Cow<'a, str>)>, 69 } 70 71 #[derive(Clone, Debug)] 72 pub enum Tag<'a> { 73 // block-level tags 74 Paragraph, 75 Rule, 76 Header(i32), 77 BlockQuote, 78 CodeBlock(Cow<'a, str>), 79 List(Option<usize>), // TODO: add delim and tight for ast (not needed for html) 80 Item, 81 82 // span-level tags 83 Emphasis, 84 Strong, 85 Code, 86 Link(Cow<'a, str>, Cow<'a, str>), 87 Image(Cow<'a, str>, Cow<'a, str>), 88 } 89 90 #[derive(Debug)] 91 pub enum Event<'a> { 92 Start(Tag<'a>), 93 End(Tag<'a>), 94 Text(Cow<'a, str>), 95 Html(Cow<'a, str>), 96 InlineHtml(Cow<'a, str>), 97 SoftBreak, 98 HardBreak, 99 } 100 101 pub struct Options(u32); 102 103 const OPTION_FIRST_PASS: u32 = 1 << 0; 104 105 const MAX_LINK_NEST: usize = 10; 106 107 impl Options { new() -> Options108 pub fn new() -> Options { 109 Options(0) 110 } set_first_pass(&mut self)111 pub fn set_first_pass(&mut self) { 112 self.0 |= OPTION_FIRST_PASS; 113 } is_first_pass(&self) -> bool114 pub fn is_first_pass(&self) -> bool { 115 (self.0 & OPTION_FIRST_PASS) != 0 116 } 117 } 118 119 impl<'a> RawParser<'a> { new_with_links(text: &'a str, opts: Options, links: HashMap<String, (Cow<'a, str>, Cow<'a, str>)>) -> RawParser<'a>120 pub fn new_with_links(text: &'a str, opts: Options, 121 links: HashMap<String, (Cow<'a, str>, Cow<'a, str>)>) -> RawParser<'a> { 122 let mut ret = RawParser { 123 text: text, 124 off: if text.starts_with("\u{FEFF}") { 3 } else { 0 }, 125 opts: opts, 126 active_tab: [0; 256], 127 state: State::StartBlock, 128 leading_space: 0, 129 stack: Vec::new(), 130 containers: Vec::new(), 131 last_line_was_empty: false, 132 133 fence_char: 0, 134 fence_count: 0, 135 fence_indent: 0, 136 137 // info, used in second pass 138 loose_lists: HashSet::new(), 139 links: links, 140 }; 141 ret.init_active(); 142 ret.skip_blank_lines(); 143 ret 144 } 145 new(text: &'a str, opts: Options) -> RawParser<'a>146 pub fn new(text: &'a str, opts: Options) -> RawParser<'a> { 147 RawParser::new_with_links(text, opts, HashMap::new()) 148 } 149 150 // offset into text representing current parse position, hopefully 151 // useful for building source maps get_offset(&self) -> usize152 pub fn get_offset(&self) -> usize { 153 self.off 154 } 155 156 // extract info from parser on finish get_info(self) -> ParseInfo<'a>157 pub fn get_info(self) -> ParseInfo<'a> { 158 ParseInfo { 159 loose_lists: self.loose_lists, 160 links: self.links, 161 } 162 } 163 init_active(&mut self)164 fn init_active(&mut self) { 165 if self.opts.is_first_pass() { 166 self.active_tab[b'\n' as usize] = 1 167 } else { 168 for &c in b"\x00\t\n\r_\\&*[!`<" { 169 self.active_tab[c as usize] = 1; 170 } 171 } 172 } 173 limit(&self) -> usize174 fn limit(&self) -> usize { 175 match self.stack.last() { 176 Some(&(_, limit, _)) => limit, 177 None => self.text.len() 178 } 179 } 180 181 // if end is not known, limit should be text.len(), next should be 0 start(&mut self, tag: Tag<'a>, limit: usize, next: usize) -> Event<'a>182 fn start(&mut self, tag: Tag<'a>, limit: usize, next: usize) -> Event<'a> { 183 self.stack.push((tag.clone(), limit, next)); 184 Event::Start(tag) 185 } 186 end(&mut self) -> Event<'a>187 fn end(&mut self) -> Event<'a> { 188 let (tag, _, next) = self.stack.pop().unwrap(); 189 match tag { 190 // containers 191 Tag::BlockQuote | Tag::List(_) | Tag::Item => { 192 let _ = self.containers.pop(); 193 } 194 195 // block level tags 196 Tag::Paragraph | Tag::Header(_) | Tag::Rule | Tag::CodeBlock(_) => { 197 self.state = State::StartBlock; 198 // TODO: skip blank lines (for cleaner source maps) 199 } 200 201 // inline 202 Tag::Code => self.state = State::Inline, 203 _ => () 204 } 205 if next != 0 { self.off = next; } 206 207 /* 208 if self.stack.is_empty() { 209 // TODO maybe: make block ends do this 210 self.state = State::StartBlock; 211 self.skip_blank_lines(); 212 } 213 */ 214 Event::End(tag) 215 } 216 skip_leading_whitespace(&mut self)217 fn skip_leading_whitespace(&mut self) { 218 self.off += scan_whitespace_no_nl(&self.text[self.off .. self.limit()]); 219 } 220 221 // TODO: this function doesn't respect containers skip_blank_lines(&mut self)222 fn skip_blank_lines(&mut self) { 223 loop { 224 let ret = scan_blank_line(&self.text[self.off..]); 225 if ret == 0 { 226 break; 227 } 228 self.off += ret; 229 } 230 } 231 232 // Scan markers and indentation for current container stack 233 // Return: bytes scanned, whether containers are complete, and remaining space scan_containers(&self, text: &str) -> (usize, bool, usize)234 fn scan_containers(&self, text: &str) -> (usize, bool, usize) { 235 let (mut i, mut space) = scan_leading_space(text, 0); 236 for container in self.containers.iter() { 237 match *container { 238 Container::BlockQuote => { 239 if space <= 3 { 240 let n = scan_blockquote_start(&text[i..]); 241 if n > 0 { 242 let (n_sp, next_space) = scan_leading_space(text, i + n); 243 i += n + n_sp; 244 space = next_space; 245 } else { 246 return (i, false, space); 247 } 248 } else { 249 return (i, false, space); 250 } 251 } 252 Container::List(_, _) => (), 253 Container::ListItem(indent) => { 254 if space >= indent { 255 space -= indent; 256 } else if scan_eol(&text[i..]).1 { 257 space = 0; 258 } else { 259 return (i, false, 0); 260 } 261 } 262 } 263 } 264 (i, true, space) 265 } 266 267 // scans empty lines with current container stack 268 // returns number of bytes scanned, number of empty lines 269 // note: EOF counts as a line ending for counting lines scan_empty_lines(&self, text: &str) -> (usize, usize)270 fn scan_empty_lines(&self, text: &str) -> (usize, usize) { 271 let mut i = 0; 272 let mut lines = 0; 273 loop { 274 let (n, scanned, _) = self.scan_containers(&text[i..]); 275 if !scanned { 276 return (i, lines); 277 } 278 if i == text.len() { 279 return (i, lines + 1); 280 } 281 let n_blank = scan_eol(&text[i + n ..]).0; 282 if n_blank == 0 { 283 return (i, lines); 284 } 285 i += n + n_blank; 286 lines += 1; 287 } 288 } 289 290 // scans whitespace, skipping past containers on newline scan_whitespace_inline(&self, text: &str) -> usize291 fn scan_whitespace_inline(&self, text: &str) -> usize { 292 let i = scan_whitespace_no_nl(text); 293 if let (n, true) = scan_eol(&text[i..]) { 294 let (n_containers, _, space) = self.scan_containers(&text[i + n ..]); 295 let j = i + n + n_containers; 296 if !self.is_inline_block_end(&text[j..], space) { 297 return j; 298 } 299 } 300 i 301 } 302 at_list(&self, level: usize) -> Option<usize>303 fn at_list(&self, level: usize) -> Option<usize> { 304 let len = self.containers.len(); 305 if len >= level { 306 if let Container::List(offset, _) = self.containers[len - level] { 307 return Some(offset); 308 } 309 } 310 None 311 } 312 313 // n is number of bytes (in blank lines) to skip end_containing_lists(&mut self, n: usize) -> Event<'a>314 fn end_containing_lists(&mut self, n: usize) -> Event<'a> { 315 let mut i = self.stack.len(); 316 while i >= 2 { 317 if let (Tag::List(_), _, _) = self.stack[i - 2] { 318 i -= 2; 319 } else { 320 break; 321 } 322 } 323 let mut next = self.off + n; 324 while i < self.stack.len() { 325 if let (Tag::List(start), _, _) = self.stack[i] { 326 self.stack[i] = (Tag::List(start), self.off, next); 327 } 328 if let (Tag::Item, _, _) = self.stack[i + 1] { 329 self.stack[i + 1] = (Tag::Item, self.off, self.off); 330 } 331 next = self.off; 332 i += 2; 333 } 334 self.end() 335 } 336 start_block(&mut self) -> Option<Event<'a>>337 fn start_block(&mut self) -> Option<Event<'a>> { 338 let size = self.text.len(); 339 //println!("start_block {}", self.off); 340 while self.off < size { 341 //println!("start_block loop {} {}", self.off, self.last_line_was_empty); 342 if self.off >= self.limit() { 343 return Some(self.end()); 344 } 345 if self.state != State::InContainers { 346 let (n, scanned, space) = self.scan_containers(&self.text[self.off ..]); 347 if !scanned { 348 return Some(self.end()); 349 } 350 self.leading_space = space; 351 self.off += n; 352 self.state = State::InContainers; 353 } 354 355 let (n, at_eol) = scan_eol(&self.text[self.off ..]); 356 if at_eol { 357 self.off += n; 358 self.state = State::StartBlock; 359 // two empty lines close a list 360 let (n, empty_lines) = self.scan_empty_lines(&self.text[self.off ..]); 361 //println!("{} empty lines (n = {})", empty_lines, n); 362 if empty_lines >= 1 && self.at_list(2).is_some() { 363 return Some(self.end_containing_lists(n)); 364 } 365 self.off += n; 366 if let Some(_) = self.at_list(2) { 367 self.last_line_was_empty = true; 368 } 369 continue; 370 } 371 372 //println!("checking loose {} {:?}", self.last_line_was_empty, self.at_list(2)); 373 if self.last_line_was_empty { 374 if let Some(offset) = self.at_list(2) { 375 // list item contains two blocks separated by empty line 376 self.loose_lists.insert(offset); 377 } 378 } 379 380 if self.leading_space >= 4 && !self.at_list(1).is_some() { 381 // see below 382 if let Some(&Container::List(_, _)) = self.containers.last() { 383 return Some(self.end()); 384 } 385 return Some(self.start_indented_code()); 386 } 387 388 let tail = &self.text[self.off ..]; 389 390 // must be before list item because ambiguous 391 let n = scan_hrule(tail); 392 if n != 0 { 393 self.last_line_was_empty = false; 394 // see below 395 if let Some(&Container::List(_, _)) = self.containers.last() { 396 return Some(self.end()); 397 } 398 self.off += n; 399 return Some(self.start_hrule()); 400 } 401 402 let (n, c, start, indent) = scan_listitem(tail); 403 if n != 0 { 404 if self.last_line_was_empty { 405 if let Some(offset) = self.at_list(1) { 406 // two list items separated by empty line 407 self.loose_lists.insert(offset); 408 } 409 } 410 self.last_line_was_empty = false; 411 return Some(self.start_listitem(n, c, start, indent)); 412 } 413 414 // not a list item, so if we're in a list, close it 415 if let Some(&Container::List(_, _)) = self.containers.last() { 416 return Some(self.end()); 417 } 418 self.last_line_was_empty = false; 419 420 let c = tail.as_bytes()[0]; 421 match c { 422 b'#' => { 423 let (n, level) = scan_atx_header(tail); 424 if n != 0 { 425 self.off += n; 426 return Some(self.start_atx_header(level)); 427 } 428 } 429 b'`' | b'~' => { 430 let (n, ch) = scan_code_fence(tail); 431 if n != 0 { 432 return Some(self.start_code_fence(n, ch, n)); 433 } 434 } 435 b'>' => { 436 let n = scan_blockquote_start(tail); 437 if n != 0 { 438 self.off += n; 439 let (n, space) = scan_leading_space(self.text, self.off); 440 self.off += n; 441 self.leading_space = space; 442 self.containers.push(Container::BlockQuote); 443 return Some(self.start(Tag::BlockQuote, self.text.len(), 0)); 444 } 445 } 446 b'<' => { 447 if self.is_html_block(tail) { 448 return Some(self.do_html_block()); 449 } 450 } 451 b'[' => { 452 if self.try_link_reference_definition(tail) { 453 continue; 454 } 455 } 456 _ => () 457 } 458 return Some(self.start_paragraph()); 459 } 460 None 461 } 462 463 // can start a paragraph or a setext header, as they start similarly start_paragraph(&mut self) -> Event<'a>464 fn start_paragraph(&mut self) -> Event<'a> { 465 let mut i = self.off + scan_nextline(&self.text[self.off..]); 466 467 if let (n, true, space) = self.scan_containers(&self.text[i..]) { 468 i += n; 469 let (n, level) = scan_setext_header(&self.text[i..]); 470 if space <= 3 && n != 0 { 471 let next = i + n; 472 while i > self.off && is_ascii_whitespace(self.text.as_bytes()[i - 1]) { 473 i -= 1; 474 } 475 self.state = State::Inline; 476 return self.start(Tag::Header(level), i, next); 477 } 478 } 479 480 let size = self.text.len(); 481 self.state = State::Inline; 482 self.start(Tag::Paragraph, size, 0) 483 } 484 start_hrule(&mut self) -> Event<'a>485 fn start_hrule(&mut self) -> Event<'a> { 486 let limit = self.off; // body of hrule is empty 487 self.state = State::Inline; // handy state for producing correct end tag 488 self.start(Tag::Rule, limit, limit) 489 } 490 start_atx_header(&mut self, level: i32) -> Event<'a>491 fn start_atx_header(&mut self, level: i32) -> Event<'a> { 492 self.skip_leading_whitespace(); 493 let tail = &self.text[self.off..]; 494 let next = scan_nextline(tail); 495 let mut limit = next; 496 while limit > 0 && is_ascii_whitespace(tail.as_bytes()[limit - 1]) { 497 limit -= 1; 498 } 499 let mut end = limit; 500 while end > 0 && tail.as_bytes()[end - 1] == b'#' { 501 end -= 1; 502 } 503 if end == 0 { 504 limit = end; 505 } else if is_ascii_whitespace(tail.as_bytes()[end - 1]) { 506 limit = end - 1; 507 } 508 while limit > 0 && is_ascii_whitespace(tail.as_bytes()[limit - 1]) { 509 limit -= 1; 510 } 511 let limit = limit + self.off; 512 let next = next + self.off; 513 self.state = State::Inline; 514 self.start(Tag::Header(level), limit, next) 515 } 516 start_indented_code(&mut self) -> Event<'a>517 fn start_indented_code(&mut self) -> Event<'a> { 518 self.fence_char = b'\0'; 519 self.fence_indent = 4; 520 let size = self.text.len(); 521 self.state = State::Code; 522 self.start(Tag::CodeBlock(Borrowed("")), size, 0) 523 } 524 start_listitem(&mut self, n: usize, c: u8, start: usize, indent: usize) -> Event<'a>525 fn start_listitem(&mut self, n: usize, c: u8, start: usize, indent: usize) -> Event<'a> { 526 let indent = self.leading_space + indent; 527 match self.containers.last() { 528 Some(&Container::List(_, c2)) => { 529 if c != c2 { 530 // mismatched list type or delimeter 531 return self.end(); 532 } 533 self.off += n; 534 let n_blank = scan_blank_line(&self.text[self.off ..]); 535 if n_blank != 0 { 536 self.off += n_blank; 537 self.state = State::StartBlock; 538 } else { 539 // TODO: deal with tab 540 let (n, space) = scan_leading_space(self.text, self.off); 541 self.off += n; 542 self.leading_space = space; 543 } 544 self.containers.push(Container::ListItem(indent)); 545 self.start(Tag::Item, self.text.len(), 0) 546 } 547 _ => { 548 self.containers.push(Container::List(self.off, c)); 549 // arguably this should be done in the scanner, it should return option 550 let startopt = if c == b'.' || c == b')' { Some(start) } else { None }; 551 self.start(Tag::List(startopt), self.text.len(), 0) 552 } 553 } 554 } 555 start_code_fence(&mut self, n: usize, ch: u8, count: usize) -> Event<'a>556 fn start_code_fence(&mut self, n: usize, ch: u8, count: usize) -> Event<'a> { 557 self.fence_char = ch; 558 self.fence_count = count; 559 self.fence_indent = self.leading_space; 560 let beg_info = self.off + n; 561 let next_line = beg_info + scan_nextline(&self.text[beg_info..]); 562 self.off = next_line; 563 let info = unescape(&self.text[beg_info..next_line].trim()); 564 let size = self.text.len(); 565 self.state = State::CodeLineStart; 566 self.start(Tag::CodeBlock(info), size, 0) 567 } 568 next_code_line_start(&mut self) -> Event<'a>569 fn next_code_line_start(&mut self) -> Event<'a> { 570 let (off, space) = match self.scan_containers(&self.text[self.off ..]) { 571 (n, true, space) => (self.off + n, space), 572 _ => { 573 return self.end(); 574 } 575 }; 576 577 if self.fence_char == b'\0' { 578 let n = scan_blank_line(&self.text[off..]); 579 if n != 0 { 580 // TODO performance: this scanning is O(n^2) in the number of empty lines 581 let (n_empty, _lines) = self.scan_empty_lines(&self.text[off + n ..]); 582 let next = off + n + n_empty; 583 let (n_containers, scanned, nspace) = self.scan_containers(&self.text[next..]); 584 // TODO; handle space 585 if !scanned || self.is_code_block_end(next + n_containers, nspace) { 586 //println!("was end: {}", next + n_containers); 587 return self.end(); 588 } else { 589 self.off = off; 590 //println!("code line start space={}, off={}", space, off); 591 self.leading_space = space; 592 return self.next_code(); 593 } 594 } 595 } 596 597 if self.is_code_block_end(off, space) { 598 let ret = self.end(); 599 if self.fence_char != b'\0' { 600 self.off = off + scan_nextline(&self.text[off..]); 601 } 602 ret 603 } else { 604 self.off = off; 605 self.state = State::Code; 606 self.leading_space = space; 607 self.next_code() 608 } 609 } 610 next_code(&mut self) -> Event<'a>611 fn next_code(&mut self) -> Event<'a> { 612 if self.leading_space > self.fence_indent { 613 // TODO: might try to combine spaces in text, for fewer events 614 let space = self.leading_space; 615 self.leading_space = 0; 616 return Event::Text(spaces(space - self.fence_indent)); 617 } 618 let bytes = self.text.as_bytes(); 619 let mut beg = self.off; 620 let mut i = beg; 621 loop { 622 match bytes[i..].iter().position(|&c| c < b' ') { 623 Some(j) => i += j, 624 None => { 625 i += bytes[i..].len(); 626 break; 627 } 628 } 629 match bytes[i] { 630 b'\n' => { 631 i += 1; 632 self.state = State::CodeLineStart; 633 break; 634 } 635 b'\t' => { 636 if i > beg { break; } 637 return self.char_tab(); 638 } 639 b'\r' => { 640 // just skip it (does not support '\r' only line break) 641 if i > beg { break; } 642 beg += 1; 643 } 644 _ => () 645 } 646 i += 1; 647 } 648 self.off = i; 649 Event::Text(Borrowed(&self.text[beg..i])) 650 } 651 is_code_block_end(&self, loc: usize, space: usize) -> bool652 fn is_code_block_end(&self, loc: usize, space: usize) -> bool { 653 let tail = &self.text[loc..]; 654 if self.fence_char == b'\0' { 655 // indented code block 656 space < 4 657 } else if space <= 3 { 658 let (n, c) = scan_code_fence(tail); 659 if c != self.fence_char || n < self.fence_count { 660 return false; 661 } 662 if n < tail.len() && scan_blank_line(&tail[n..]) == 0 { 663 // Closing code fences cannot have info strings 664 return false; 665 } 666 return true; 667 } else { 668 false 669 } 670 } 671 672 // # HTML blocks 673 scan_html_block_tag(&self, data: &'a str) -> (usize, &'a str)674 fn scan_html_block_tag(&self, data: &'a str) -> (usize, &'a str) { 675 let mut i = scan_ch(data, b'<'); 676 if i == 0 { return (0, "") } 677 i += scan_ch(&data[i..], b'/'); 678 let n = scan_while(&data[i..], is_ascii_alphanumeric); 679 // TODO: scan attributes and > 680 (i + n, &data[i .. i + n]) 681 } 682 is_html_block(&self, data: &str) -> bool683 fn is_html_block(&self, data: &str) -> bool { 684 let (n_tag, tag) = self.scan_html_block_tag(data); 685 (n_tag > 0 && is_html_tag(tag)) || 686 data.starts_with("<?") || 687 data.starts_with("<!") 688 } 689 do_html_block(&mut self) -> Event<'a>690 fn do_html_block(&mut self) -> Event<'a> { 691 let size = self.text.len(); 692 let mut out = Borrowed(""); 693 let mut i = self.off; 694 let mut mark = i; 695 loop { 696 let n = scan_nextline(&self.text[i..]); 697 i += n; 698 if n >= 2 && self.text.as_bytes()[i - 2] == b'\r' { 699 if self.leading_space > 0 { 700 out = utils::cow_append(out, spaces(self.leading_space)); 701 self.leading_space = 0; 702 } 703 out = utils::cow_append(out, Borrowed(&self.text[mark .. i - 2])); 704 mark = i - 1; 705 } 706 let (n, scanned, space) = self.scan_containers(&self.text[i..]); 707 let n_blank = scan_blank_line(&self.text[i + n ..]); 708 if n != 0 || !scanned || i + n == size || n_blank != 0 { 709 if self.leading_space > 0 { 710 out = utils::cow_append(out, spaces(self.leading_space)); 711 } 712 self.leading_space = space; 713 out = utils::cow_append(out, Borrowed(&self.text[mark..i])); 714 mark = i + n; 715 } 716 if !scanned || i + n == size || n_blank != 0 { 717 self.off = i; // TODO: skip blank lines (cleaner source maps) 718 self.state = State::StartBlock; 719 return Event::Html(out) 720 } 721 } 722 } 723 724 // # Link reference definitions 725 try_link_reference_definition(&mut self, data: &'a str) -> bool726 fn try_link_reference_definition(&mut self, data: &'a str) -> bool { 727 let (n_link, text_beg, text_end, max_nest) = self.scan_link_label(data); 728 if n_link == 0 || max_nest > 1 { return false; } 729 let n_colon = scan_ch(&data[n_link ..], b':'); 730 if n_colon == 0 { return false; } 731 let mut i = n_link + n_colon; 732 i += self.scan_whitespace_inline(&data[i..]); 733 let linkdest = scan_link_dest(&data[i..]); 734 if linkdest.is_none() { return false; } 735 let (n_dest, raw_dest) = linkdest.unwrap(); 736 if n_dest == 0 { return false; } 737 i += n_dest; 738 i += scan_whitespace_no_nl(&data[i..]); 739 let n_nl = self.scan_whitespace_inline(&data[i..]); 740 let (n_title, title_beg, title_end) = self.scan_link_title(&data[i + n_nl ..]); 741 let title = if n_title == 0 { 742 Borrowed("") 743 } else { 744 let (title_beg, title_end) = (i + n_nl + title_beg, i + n_nl + title_end); 745 i += n_nl + n_title; 746 unescape(&data[title_beg..title_end]) 747 }; 748 i += scan_whitespace_no_nl(&data[i..]); 749 if let (n_eol, true) = scan_eol(&data[i..]) { 750 i += n_eol; 751 } else { 752 return false; 753 } 754 755 let linktext = self.normalize_link_ref(&data[text_beg..text_end]); 756 if linktext.is_empty() { 757 return false; 758 } 759 if !self.links.contains_key(&linktext) { 760 let dest = unescape(raw_dest); 761 self.links.insert(linktext, (dest, title)); 762 } 763 self.state = State::StartBlock; 764 self.off += i; 765 true 766 } 767 768 // normalize whitespace and case-fold normalize_link_ref(&self, raw: &str) -> String769 fn normalize_link_ref(&self, raw: &str) -> String { 770 let mut need_space = false; 771 let mut result = String::new(); 772 let mut i = 0; 773 while i < raw.len() { 774 let n = scan_nextline(&raw[i..]); 775 for c in raw[i.. i + n].chars() { 776 if c.is_whitespace() { 777 need_space = true; 778 } else { 779 if need_space && !result.is_empty() { 780 result.push(' '); 781 } 782 // TODO: Unicode case folding can differ from lowercase (ß) 783 result.extend(c.to_lowercase()); 784 need_space = false; 785 } 786 } 787 i += n; 788 if i == raw.len() { break; } 789 i += self.scan_containers(&raw[i..]).0; 790 need_space = true; 791 } 792 result 793 } 794 795 // determine whether the line starting at loc ends the block is_inline_block_end(&self, data: &str, space: usize) -> bool796 fn is_inline_block_end(&self, data: &str, space: usize) -> bool { 797 data.is_empty() || 798 scan_blank_line(data) != 0 || 799 space <= 3 && (scan_hrule(data) != 0 || 800 scan_atx_header(data).0 != 0 || 801 scan_code_fence(data).0 != 0 || 802 scan_blockquote_start(data) != 0 || 803 scan_listitem(data).0 != 0 || 804 self.is_html_block(data)) 805 } 806 next_inline(&mut self) -> Event<'a>807 fn next_inline(&mut self) -> Event<'a> { 808 let bytes = self.text.as_bytes(); 809 let beg = self.off; 810 let mut i = beg; 811 let limit = self.limit(); 812 while i < limit { 813 match bytes[i..limit].iter().position(|&c| self.active_tab[c as usize] != 0) { 814 Some(pos) => i += pos, 815 None => { i = limit; break; } 816 } 817 let c = bytes[i]; 818 if c == b'\n' || c == b'\r' { 819 let n = scan_trailing_whitespace(&self.text[beg..i]); 820 let end = i - n; 821 if end > beg { 822 self.off = end; 823 return Event::Text(Borrowed(&self.text[beg..end])); 824 } 825 if c == b'\r' && i + 1 < limit && self.text.as_bytes()[i + 1] == b'\n' { 826 i += 1; 827 } 828 i += 1; 829 let next = i; 830 let (n_containers, _, space) = self.scan_containers(&self.text[i..limit]); 831 i += n_containers; 832 if self.is_inline_block_end(&self.text[i..limit], space) { 833 self.off = next; 834 return self.end(); 835 } 836 i += scan_whitespace_no_nl(&self.text[i..limit]); 837 self.off = i; 838 return if n >= 2 { Event::HardBreak } else { Event::SoftBreak }; 839 } 840 self.off = i; 841 if i > beg { 842 return Event::Text(Borrowed(&self.text[beg..i])); 843 } 844 if let Some(event) = self.active_char(c) { 845 return event; 846 } 847 i = self.off; // let handler advance offset even on None 848 i += 1; 849 } 850 if i > beg { 851 self.off = i; 852 Event::Text(Borrowed(&self.text[beg..i])) 853 } else { 854 self.end() 855 } 856 } 857 active_char(&mut self, c: u8) -> Option<Event<'a>>858 fn active_char(&mut self, c: u8) -> Option<Event<'a>> { 859 match c { 860 b'\x00' => Some(self.char_null()), 861 b'\t' => Some(self.char_tab()), 862 b'\\' => self.char_backslash(), 863 b'&' => self.char_entity(), 864 b'_' => self.char_emphasis(), 865 b'*' => self.char_emphasis(), 866 b'[' | b'!' => self.char_link(), 867 b'`' => self.char_backtick(), 868 b'<' => self.char_lt(), 869 _ => None 870 } 871 } 872 char_null(&mut self) -> Event<'a>873 fn char_null(&mut self) -> Event<'a> { 874 self.off += 1; 875 Event::Text(Borrowed(&"\u{fffd}")) 876 } 877 878 // expand tab in content (used for code and inline) 879 // scan backward to find offset, counting unicode code points char_tab(&mut self) -> Event<'a>880 fn char_tab(&mut self) -> Event<'a> { 881 let count = count_tab(&self.text.as_bytes()[.. self.off]); 882 self.off += 1; 883 Event::Text(Borrowed(&" "[..count])) 884 } 885 char_backslash(&mut self) -> Option<Event<'a>>886 fn char_backslash(&mut self) -> Option<Event<'a>> { 887 let limit = self.limit(); 888 if self.off + 1 < limit { 889 if let (_, true) = scan_eol(&self.text[self.off + 1 .. limit]) { 890 let n_white = self.scan_whitespace_inline(&self.text[self.off + 1 .. limit]); 891 let space = 0; // TODO: figure this out 892 if !self.is_inline_block_end(&self.text[self.off + 1 + n_white .. limit], space) { 893 self.off += 1 + n_white; 894 return Some(Event::HardBreak); 895 } 896 } 897 let c = self.text.as_bytes()[self.off + 1]; 898 if is_ascii_punctuation(c) { 899 self.off += 2; 900 return Some(Event::Text(Borrowed(&self.text[self.off - 1 .. self.off]))); 901 } 902 } 903 None 904 } 905 char_entity(&mut self) -> Option<Event<'a>>906 fn char_entity(&mut self) -> Option<Event<'a>> { 907 match scan_entity(&self.text[self.off ..]) { 908 (n, Some(value)) => { 909 self.off += n; 910 Some(Event::Text(value)) 911 } 912 _ => None 913 } 914 } 915 char_emphasis(&mut self) -> Option<Event<'a>>916 fn char_emphasis(&mut self) -> Option<Event<'a>> { 917 // can see to left for flanking info, but not past limit 918 let limit = self.limit(); 919 let data = &self.text[..limit]; 920 921 let c = data.as_bytes()[self.off]; 922 let (n, can_open, _can_close) = compute_open_close(data, self.off, c); 923 if !can_open { 924 return None; 925 } 926 let mut stack = vec![n]; // TODO performance: don't allocate 927 let mut i = self.off + n; 928 while i < limit { 929 let c2 = data.as_bytes()[i]; 930 if c2 == b'\n' && !is_escaped(data, i) { 931 let space = 0; // TODO: scan containers 932 if self.is_inline_block_end(&self.text[i + 1 .. limit], space) { 933 return None 934 } else { 935 i += 1; 936 } 937 } else if c2 == c && !is_escaped(data, i) { 938 let (mut n2, can_open, can_close) = compute_open_close(data, i, c); 939 if can_close { 940 loop { 941 let ntos = stack.pop().unwrap(); 942 if ntos > n2 { 943 stack.push(ntos - n2); 944 break; 945 } 946 if stack.is_empty() { 947 let npop = if ntos < n2 { ntos } else { n2 }; 948 if npop == 1 { 949 self.off += 1; 950 return Some(self.start(Tag::Emphasis, i, i + 1)); 951 } else { 952 self.off += 2; 953 let next = i + npop; 954 return Some(self.start(Tag::Strong, next - 2, next)); 955 } 956 } else { 957 i += ntos; 958 n2 -= ntos; 959 } 960 } 961 } else if can_open { 962 stack.push(n2); 963 } 964 i += n2; 965 } else if c2 == b'`' { 966 let (n, beg, _) = self.scan_inline_code(&self.text[i..limit]); 967 if n != 0 { 968 i += n; 969 } else { 970 i += beg; 971 } 972 } else if c2 == b'<' { 973 let n = self.scan_autolink_or_html(&self.text[i..limit]); 974 if n != 0 { 975 i += n; 976 } else { 977 i += 1; 978 } 979 } else if c2 == b'[' { 980 if let Some((_, _, _, n)) = self.parse_link(&self.text[i..limit], false) { 981 i += n; 982 } else { 983 i += 1; 984 } 985 } else { 986 i += 1; 987 } 988 } 989 None 990 } 991 992 // # Links 993 994 // scans a link label, example [link] 995 // return value is: total bytes, start of text, end of text, max nesting scan_link_label(&self, data: &str) -> (usize, usize, usize, usize)996 fn scan_link_label(&self, data: &str) -> (usize, usize, usize, usize) { 997 let mut i = scan_ch(data, b'['); 998 if i == 0 { return (0, 0, 0, 0); } 999 let text_beg = i; 1000 let mut max_nest = 1; 1001 let mut nest = 1; 1002 loop { 1003 if i >= data.len() { return (0, 0, 0, 0); } 1004 match data.as_bytes()[i] { 1005 b'\n' => { 1006 let n = self.scan_whitespace_inline(&data[i..]); 1007 if n == 0 { return (0, 0, 0, 0); } 1008 i += n; 1009 continue; 1010 } 1011 b'[' => { 1012 nest += 1; 1013 if nest == MAX_LINK_NEST { return (0, 0, 0, 0); } 1014 max_nest = cmp::max(max_nest, nest) 1015 } 1016 b']' => { 1017 nest -= 1; 1018 if nest == 0 { 1019 break; 1020 } 1021 } 1022 b'\\' => i += 1, 1023 b'<' => { 1024 let n = self.scan_autolink_or_html(&data[i..]); 1025 if n != 0 { 1026 i += n; 1027 } else { 1028 i += 1; 1029 } 1030 } 1031 b'`' => { 1032 let (n, beg, _) = self.scan_inline_code(&data[i..]); 1033 if n != 0 { 1034 i += n; 1035 } else { 1036 i += beg; 1037 } 1038 } 1039 _ => () 1040 } 1041 i += 1; 1042 } 1043 let text_end = i; 1044 i += 1; // skip closing ] 1045 (i, text_beg, text_end, max_nest) 1046 } 1047 scan_link_title(&self, data: &str) -> (usize, usize, usize)1048 fn scan_link_title(&self, data: &str) -> (usize, usize, usize) { 1049 let size = data.len(); 1050 if size == 0 { return (0, 0, 0); } 1051 let mut i = 0; 1052 let titleclose = match data.as_bytes()[i] { 1053 b'(' => b')', 1054 b'\'' => b'\'', 1055 b'\"' => b'\"', 1056 _ => return (0, 0, 0) 1057 }; 1058 i += 1; 1059 let title_beg = i; 1060 while i < size { 1061 match data.as_bytes()[i] { 1062 x if x == titleclose => break, 1063 b'\\' => i += 2, // may be > size 1064 b'\n' => { 1065 let n = self.scan_whitespace_inline(&data[i..]); 1066 if n == 0 { return (0, 0, 0); } 1067 i += n; 1068 } 1069 _ => i += 1 1070 } 1071 } 1072 if i >= size { return (0, 0, 0); } 1073 let title_end = i; 1074 i += 1; 1075 (i, title_beg, title_end) 1076 } 1077 char_link(&mut self) -> Option<Event<'a>>1078 fn char_link(&mut self) -> Option<Event<'a>> { 1079 self.parse_link(&self.text[self.off .. self.limit()], false).map(|(tag, beg, end, n)| { 1080 let off = self.off; 1081 self.off += beg; 1082 self.start(tag, off + end, off + n) 1083 }) 1084 } 1085 1086 // return: tag, begin, end, total size parse_link(&self, data: &'a str, recur: bool) -> Option<(Tag<'a>, usize, usize, usize)>1087 fn parse_link(&self, data: &'a str, recur: bool) -> Option<(Tag<'a>, usize, usize, usize)> { 1088 let size = data.len(); 1089 1090 // scan link text 1091 let i = scan_ch(data, b'!'); 1092 let is_image = i == 1; 1093 let (n, text_beg, text_end, max_nest) = self.scan_link_label(&data[i..]); 1094 if n == 0 { return None; } 1095 let (text_beg, text_end) = (text_beg + i, text_end + i); 1096 if !is_image && !recur && max_nest > 1 && self.contains_link(&data[text_beg..text_end]) { 1097 // disallow nested links in links (but ok in images) 1098 return None; 1099 } 1100 let mut i = i + n; 1101 1102 // scan dest 1103 let (dest, title, beg, end, next) = if data[i..].starts_with("(") { 1104 i += 1; 1105 i += self.scan_whitespace_inline(&data[i..]); 1106 if i >= size { return None; } 1107 1108 let linkdest = scan_link_dest(&data[i..]); 1109 if linkdest.is_none() { return None; } 1110 let (n, raw_dest) = linkdest.unwrap(); 1111 let dest = unescape(raw_dest); 1112 i += n; 1113 1114 i += self.scan_whitespace_inline(&data[i..]); 1115 if i == size { return None; } 1116 1117 // scan title 1118 let (n_title, title_beg, title_end) = self.scan_link_title(&data[i..]); 1119 let title = if n_title == 0 { 1120 Borrowed("") 1121 } else { 1122 let (title_beg, title_end) = (i + title_beg, i + title_end); 1123 i += n_title; 1124 // TODO: not just unescape, remove containers from newlines 1125 unescape(&data[title_beg..title_end]) 1126 }; 1127 i += self.scan_whitespace_inline(&data[i..]); 1128 if i == size || data.as_bytes()[i] != b')' { return None; } 1129 i += 1; 1130 (dest, title, text_beg, text_end, i) 1131 } else { 1132 // try link reference 1133 let j = i + self.scan_whitespace_inline(&data[i..]); 1134 let (n_ref, ref_beg, ref_end, _) = self.scan_link_label(&data[j..]); 1135 let (ref_beg, ref_end) = if n_ref == 0 || ref_beg == ref_end { 1136 (text_beg, text_end) 1137 } else { 1138 (j + ref_beg, j + ref_end) 1139 }; 1140 if n_ref != 0 { 1141 i = j + n_ref; 1142 } 1143 let reference = self.normalize_link_ref(&data[ref_beg..ref_end]); 1144 let (dest, title) = match self.links.get(&reference) { 1145 Some(&(ref dest, ref title)) => (dest.clone(), title.clone()), 1146 None => return None 1147 }; 1148 (dest, title, text_beg, text_end, i) 1149 }; 1150 if is_image { 1151 Some((Tag::Image(dest, title), beg, end, next)) 1152 } else { 1153 Some((Tag::Link(dest, title), beg, end, next)) 1154 } 1155 } 1156 1157 // determine whether there's a link anywhere in the text 1158 // TODO: code duplication with scan_link_label is unpleasant contains_link(&self, data: &str) -> bool1159 fn contains_link(&self, data: &str) -> bool { 1160 let mut i = 0; 1161 while i < data.len() { 1162 match data.as_bytes()[i] { 1163 b'\n' => { 1164 let n = self.scan_whitespace_inline(&data[i..]); 1165 if n == 0 { return false; } 1166 i += n; 1167 continue; 1168 } 1169 b'!' => { 1170 if scan_ch(&data[i + 1 ..], b'[') != 0 { 1171 // ok to contain image, skip over opening bracket 1172 i += 1; 1173 } 1174 } 1175 b'[' => { 1176 if self.parse_link(&data[i..], true).is_some() { return true; } 1177 } 1178 b'\\' => i += 1, 1179 b'<' => { 1180 let n = self.scan_autolink_or_html(&data[i..]); 1181 if n != 0 { 1182 i += n; 1183 } else { 1184 i += 1; 1185 } 1186 } 1187 b'`' => { 1188 let (n, beg, _) = self.scan_inline_code(&data[i..]); 1189 if n != 0 { 1190 i += n; 1191 } else { 1192 i += beg; 1193 } 1194 } 1195 _ => () 1196 } 1197 i += 1; 1198 } 1199 false 1200 } 1201 1202 // # Autolinks and inline HTML 1203 char_lt(&mut self) -> Option<Event<'a>>1204 fn char_lt(&mut self) -> Option<Event<'a>> { 1205 let tail = &self.text[self.off .. self.limit()]; 1206 if let Some((n, link)) = scan_autolink(tail) { 1207 let next = self.off + n; 1208 self.off += 1; 1209 self.state = State::Literal; 1210 return Some(self.start(Tag::Link(link, Borrowed("")), next - 1, next)) 1211 } 1212 let n = self.scan_inline_html(tail); 1213 if n != 0 { 1214 return Some(self.inline_html_event(n)) 1215 } 1216 None 1217 } 1218 scan_autolink_or_html(&self, data: &str) -> usize1219 fn scan_autolink_or_html(&self, data: &str) -> usize { 1220 if let Some((n, _)) = scan_autolink(data) { 1221 n 1222 } else { 1223 self.scan_inline_html(data) 1224 } 1225 } 1226 scan_inline_html(&self, data: &str) -> usize1227 fn scan_inline_html(&self, data: &str) -> usize { 1228 let n = self.scan_html_tag(data); 1229 if n != 0 { return n; } 1230 let n = self.scan_html_comment(data); 1231 if n != 0 { return n; } 1232 let n = self.scan_processing_instruction(data); 1233 if n != 0 { return n; } 1234 let n = self.scan_declaration(data); 1235 if n != 0 { return n; } 1236 let n = self.scan_cdata(data); 1237 if n != 0 { return n; } 1238 0 1239 } 1240 scan_html_tag(&self, data: &str) -> usize1241 fn scan_html_tag(&self, data: &str) -> usize { 1242 let size = data.len(); 1243 let mut i = 0; 1244 if scan_ch(data, b'<') == 0 { return 0; } 1245 i += 1; 1246 let n_slash = scan_ch(&data[i..], b'/'); 1247 i += n_slash; 1248 if i == size || !is_ascii_alpha(data.as_bytes()[i]) { return 0; } 1249 i += 1; 1250 i += scan_while(&data[i..], is_ascii_alphanumeric); 1251 if n_slash == 0 { 1252 loop { 1253 let n = self.scan_whitespace_inline(&data[i..]); 1254 if n == 0 { break; } 1255 i += n; 1256 let n = scan_attribute_name(&data[i..]); 1257 if n == 0 { break; } 1258 i += n; 1259 let n = self.scan_whitespace_inline(&data[i..]); 1260 if scan_ch(&data[i + n ..], b'=') != 0 { 1261 i += n + 1; 1262 i += self.scan_whitespace_inline(&data[i..]); 1263 let n_attr = self.scan_attribute_value(&data[i..]); 1264 if n_attr == 0 { return 0; } 1265 i += n_attr; 1266 } 1267 } 1268 i += self.scan_whitespace_inline(&data[i..]); 1269 i += scan_ch(&data[i..], b'/'); 1270 } else { 1271 i += self.scan_whitespace_inline(&data[i..]); 1272 } 1273 if scan_ch(&data[i..], b'>') == 0 { return 0; } 1274 i += 1; 1275 i 1276 } 1277 scan_attribute_value(&self, data: &str) -> usize1278 fn scan_attribute_value(&self, data: &str) -> usize { 1279 let size = data.len(); 1280 if size == 0 { return 0; } 1281 let open = data.as_bytes()[0]; 1282 let quoted = open == b'\'' || open == b'"'; 1283 let mut i = if quoted { 1 } else { 0 }; 1284 while i < size { 1285 let c = data.as_bytes()[i]; 1286 match c { 1287 b'\n' => { 1288 if !quoted { break; } 1289 let n = self.scan_whitespace_inline(&data[i..]); 1290 if n == 0 { return 0; } 1291 i += n; 1292 } 1293 b'\'' | b'"' | b'=' | b'<' | b'>' | b'`' | b'\t' ... b' ' => { 1294 if !quoted || c == open { break; } 1295 i += 1; 1296 } 1297 _ => i += 1 1298 } 1299 } 1300 if quoted { 1301 if i == size || data.as_bytes()[i] != open { return 0; } 1302 i += 1; 1303 } 1304 i 1305 } 1306 scan_html_comment(&self, data: &str) -> usize1307 fn scan_html_comment(&self, data: &str) -> usize { 1308 if !data.starts_with("<!--") { return 0; } 1309 if let Some(n) = data[4..].find("--") { 1310 let text = &data[4..4 + n]; 1311 if !text.starts_with('>') && !text.starts_with("->") && 1312 data[n + 6 ..].starts_with('>') { 1313 return n + 7; 1314 } 1315 } 1316 0 1317 } 1318 scan_processing_instruction(&self, data: &str) -> usize1319 fn scan_processing_instruction(&self, data: &str) -> usize { 1320 if !data.starts_with("<?") { return 0; } 1321 if let Some(n) = data[2..].find("?>") { 1322 return n + 4; 1323 } 1324 0 1325 } 1326 scan_declaration(&self, data: &str) -> usize1327 fn scan_declaration(&self, data: &str) -> usize { 1328 if !data.starts_with("<!") { return 0; } 1329 let n = scan_while(&data[2..], is_ascii_upper); 1330 if n == 0 { return 0; } 1331 let i = n + 2; 1332 let n = self.scan_whitespace_inline(&data[i..]); 1333 if n == 0 { return 0; } 1334 let mut i = i + n; 1335 while i < data.len() { 1336 match data.as_bytes()[i] { 1337 b'>' => return i + 1, 1338 b'\n' => i += self.scan_whitespace_inline(&data[i..]), 1339 _ => i += 1 1340 } 1341 } 1342 0 1343 } 1344 scan_cdata(&self, data: &str) -> usize1345 fn scan_cdata(&self, data: &str) -> usize { 1346 if !data.starts_with("<![CDATA[") { return 0; } 1347 if let Some(n) = data[9..].find("]]>") { 1348 return n + 12; 1349 } 1350 0 1351 } 1352 inline_html_event(&mut self, n: usize) -> Event<'a>1353 fn inline_html_event(&mut self, n: usize) -> Event<'a> { 1354 let data = &self.text[self.off .. self.off + n]; 1355 let size = data.len(); 1356 let mut out = Borrowed(""); 1357 let mut i = 0; 1358 let mut mark = 0; 1359 while i < size { 1360 let n = scan_nextline(&data[i..]); 1361 i += n; 1362 if n >= 2 && data.as_bytes()[i - 2] == b'\r' { 1363 out = utils::cow_append(out, Borrowed(&data[mark .. i - 2])); 1364 mark = i - 1; 1365 } 1366 if i < size { 1367 let (n, _, _) = self.scan_containers(&data[i..]); 1368 if n != 0 { 1369 out = utils::cow_append(out, Borrowed(&data[mark..i])); 1370 mark = i + n; 1371 } 1372 } 1373 } 1374 out = utils::cow_append(out, Borrowed(&data[mark..n])); 1375 self.off += n; 1376 Event::InlineHtml(out) 1377 } 1378 1379 // link text is literal, with no processing of markup next_literal(&mut self) -> Event<'a>1380 fn next_literal(&mut self) -> Event<'a> { 1381 self.state = State::Inline; 1382 let beg = self.off; 1383 let end = self.limit(); 1384 self.off = end; 1385 Event::Text(Borrowed(&self.text[beg..end])) 1386 } 1387 1388 // second return value is number of backticks even if not closed scan_inline_code(&self, data: &str) -> (usize, usize, usize)1389 fn scan_inline_code(&self, data: &str) -> (usize, usize, usize) { 1390 let size = data.len(); 1391 let backtick_len = scan_backticks(data); 1392 let mut i = backtick_len; 1393 while i < size { 1394 match data.as_bytes()[i] { 1395 b'`' => { 1396 let close_len = scan_backticks(&data[i..]); 1397 if close_len == backtick_len { 1398 return (i + backtick_len, backtick_len, i); 1399 } else { 1400 i += close_len; 1401 } 1402 } 1403 b'\n' => { 1404 i += 1; 1405 let (n, _, space) = self.scan_containers(&data[i..]); 1406 i += n; 1407 if self.is_inline_block_end(&data[i..], space) { 1408 return (0, backtick_len, 0); 1409 } 1410 } 1411 // TODO: '<' 1412 _ => i += 1 1413 } 1414 } 1415 (0, backtick_len, 0) 1416 } 1417 char_backtick(&mut self) -> Option<Event<'a>>1418 fn char_backtick(&mut self) -> Option<Event<'a>> { 1419 let beg = self.off; 1420 let limit = self.limit(); 1421 let mut i = beg; 1422 let (n, code_beg, code_end) = self.scan_inline_code(&self.text[i..limit]); 1423 if n == 0 { 1424 self.off += code_beg - 1; 1425 return None; 1426 } 1427 i += code_beg; 1428 let end = beg + code_end; 1429 let next = beg + n; 1430 i += self.scan_whitespace_inline(&self.text[i..limit]); 1431 self.off = i; 1432 self.state = State::InlineCode; 1433 Some(self.start(Tag::Code, end, next)) 1434 } 1435 next_inline_code(&mut self) -> Event<'a>1436 fn next_inline_code(&mut self) -> Event<'a> { 1437 let beg = self.off; 1438 let mut i = beg; 1439 let limit = self.limit(); 1440 while i < limit { 1441 let c = self.text.as_bytes()[i]; 1442 if is_ascii_whitespace(c) { 1443 let n = self.scan_whitespace_inline(&self.text[i..limit]); 1444 if i + n == limit || n == 0 { 1445 if i > beg { 1446 break; 1447 } else { 1448 return self.end(); 1449 } 1450 } 1451 if c == b' ' && n == 1 { 1452 // optimization to reduce number of text blocks produced 1453 i += 1; 1454 } else { 1455 if i > beg { 1456 break; 1457 } 1458 i += n; 1459 self.off = i; 1460 return Event::Text(Borrowed(" ")); 1461 } 1462 } else { 1463 i += 1; 1464 } 1465 } 1466 if i > beg { 1467 self.off = i; 1468 Event::Text(Borrowed(&self.text[beg..i])) 1469 } else { 1470 self.end() 1471 } 1472 } 1473 } 1474 1475 impl<'a> Iterator for RawParser<'a> { 1476 type Item = Event<'a>; 1477 next(&mut self) -> Option<Event<'a>>1478 fn next(&mut self) -> Option<Event<'a>> { 1479 //println!("off {} {:?}, stack {:?} containers {:?}", 1480 // self.off, self.state, self.stack, self.containers); 1481 if self.off < self.text.len() { 1482 match self.state { 1483 State::StartBlock | State::InContainers => { 1484 let ret = self.start_block(); 1485 if ret.is_some() { 1486 return ret; 1487 } 1488 } 1489 State::Inline => return Some(self.next_inline()), 1490 State::CodeLineStart => return Some(self.next_code_line_start()), 1491 State::Code => return Some(self.next_code()), 1492 State::InlineCode => return Some(self.next_inline_code()), 1493 State::Literal => return Some(self.next_literal()), 1494 } 1495 } 1496 match self.stack.pop() { 1497 Some((tag, _, _)) => Some(Event::End(tag)), 1498 None => None 1499 } 1500 } 1501 } 1502