1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 //! Raw parser, for doing a single pass over input.
16 
17 use scanners::*;
18 use utils;
19 use std::borrow::Cow;
20 use std::borrow::Cow::{Borrowed};
21 use std::collections::{HashMap, HashSet};
22 use std::cmp;
23 
24 #[derive(PartialEq, Debug)]
25 enum State {
26     StartBlock,
27     InContainers,
28     Inline,
29     CodeLineStart,
30     Code,
31     InlineCode,
32     Literal,
33 }
34 
35 #[derive(Copy, Clone, Debug)]
36 enum Container {
37     BlockQuote,
38     List(usize, u8),
39     ListItem(usize),
40 }
41 
42 pub struct RawParser<'a> {
43     text: &'a str,
44     off: usize,
45 
46     opts: Options,
47     active_tab: [u8; 256],
48 
49     state: State,
50     stack: Vec<(Tag<'a>, usize, usize)>,
51     leading_space: usize,
52 
53     containers: Vec<Container>,
54     last_line_was_empty: bool,
55 
56     // state for code fences
57     fence_char: u8,
58     fence_count: usize,
59     fence_indent: usize,
60 
61     // info, used in second pass
62     loose_lists: HashSet<usize>,  // offset is at list marker
63     links: HashMap<String, (Cow<'a, str>, Cow<'a, str>)>,
64 }
65 
66 pub struct ParseInfo<'a> {
67     pub loose_lists: HashSet<usize>,
68     pub links: HashMap<String, (Cow<'a, str>, Cow<'a, str>)>,
69 }
70 
71 #[derive(Clone, Debug)]
72 pub enum Tag<'a> {
73     // block-level tags
74     Paragraph,
75     Rule,
76     Header(i32),
77     BlockQuote,
78     CodeBlock(Cow<'a, str>),
79     List(Option<usize>),  // TODO: add delim and tight for ast (not needed for html)
80     Item,
81 
82     // span-level tags
83     Emphasis,
84     Strong,
85     Code,
86     Link(Cow<'a, str>, Cow<'a, str>),
87     Image(Cow<'a, str>, Cow<'a, str>),
88 }
89 
90 #[derive(Debug)]
91 pub enum Event<'a> {
92     Start(Tag<'a>),
93     End(Tag<'a>),
94     Text(Cow<'a, str>),
95     Html(Cow<'a, str>),
96     InlineHtml(Cow<'a, str>),
97     SoftBreak,
98     HardBreak,
99 }
100 
101 pub struct Options(u32);
102 
103 const OPTION_FIRST_PASS: u32 = 1 << 0;
104 
105 const MAX_LINK_NEST: usize = 10;
106 
107 impl Options {
new() -> Options108     pub fn new() -> Options {
109         Options(0)
110     }
set_first_pass(&mut self)111     pub fn set_first_pass(&mut self) {
112         self.0 |= OPTION_FIRST_PASS;
113     }
is_first_pass(&self) -> bool114     pub fn is_first_pass(&self) -> bool {
115         (self.0 & OPTION_FIRST_PASS) != 0
116     }
117 }
118 
119 impl<'a> RawParser<'a> {
new_with_links(text: &'a str, opts: Options, links: HashMap<String, (Cow<'a, str>, Cow<'a, str>)>) -> RawParser<'a>120     pub fn new_with_links(text: &'a str, opts: Options,
121             links: HashMap<String, (Cow<'a, str>, Cow<'a, str>)>) -> RawParser<'a> {
122         let mut ret = RawParser {
123             text: text,
124             off: if text.starts_with("\u{FEFF}") { 3 } else { 0 },
125             opts: opts,
126             active_tab: [0; 256],
127             state: State::StartBlock,
128             leading_space: 0,
129             stack: Vec::new(),
130             containers: Vec::new(),
131             last_line_was_empty: false,
132 
133             fence_char: 0,
134             fence_count: 0,
135             fence_indent: 0,
136 
137             // info, used in second pass
138             loose_lists: HashSet::new(),
139             links: links,
140         };
141         ret.init_active();
142         ret.skip_blank_lines();
143         ret
144     }
145 
new(text: &'a str, opts: Options) -> RawParser<'a>146     pub fn new(text: &'a str, opts: Options) -> RawParser<'a> {
147         RawParser::new_with_links(text, opts, HashMap::new())
148     }
149 
150     // offset into text representing current parse position, hopefully
151     // useful for building source maps
get_offset(&self) -> usize152     pub fn get_offset(&self) -> usize {
153         self.off
154     }
155 
156     // extract info from parser on finish
get_info(self) -> ParseInfo<'a>157     pub fn get_info(self) -> ParseInfo<'a> {
158         ParseInfo {
159             loose_lists: self.loose_lists,
160             links: self.links,
161         }
162     }
163 
init_active(&mut self)164     fn init_active(&mut self) {
165         if self.opts.is_first_pass() {
166             self.active_tab[b'\n' as usize] = 1
167         } else {
168             for &c in b"\x00\t\n\r_\\&*[!`<" {
169                 self.active_tab[c as usize] = 1;
170             }
171         }
172     }
173 
limit(&self) -> usize174     fn limit(&self) -> usize {
175         match self.stack.last() {
176             Some(&(_, limit, _)) => limit,
177             None => self.text.len()
178         }
179     }
180 
181     // if end is not known, limit should be text.len(), next should be 0
start(&mut self, tag: Tag<'a>, limit: usize, next: usize) -> Event<'a>182     fn start(&mut self, tag: Tag<'a>, limit: usize, next: usize) -> Event<'a> {
183         self.stack.push((tag.clone(), limit, next));
184         Event::Start(tag)
185     }
186 
end(&mut self) -> Event<'a>187     fn end(&mut self) -> Event<'a> {
188         let (tag, _, next) = self.stack.pop().unwrap();
189         match tag {
190             // containers
191             Tag::BlockQuote | Tag::List(_) | Tag::Item => {
192                 let _ = self.containers.pop();
193             }
194 
195             // block level tags
196             Tag::Paragraph | Tag::Header(_) | Tag::Rule | Tag::CodeBlock(_) => {
197                 self.state = State::StartBlock;
198                 // TODO: skip blank lines (for cleaner source maps)
199             }
200 
201             // inline
202             Tag::Code => self.state = State::Inline,
203             _ => ()
204         }
205         if next != 0 { self.off = next; }
206 
207         /*
208         if self.stack.is_empty() {
209             // TODO maybe: make block ends do this
210             self.state = State::StartBlock;
211             self.skip_blank_lines();
212         }
213         */
214         Event::End(tag)
215     }
216 
skip_leading_whitespace(&mut self)217     fn skip_leading_whitespace(&mut self) {
218         self.off += scan_whitespace_no_nl(&self.text[self.off .. self.limit()]);
219     }
220 
221     // TODO: this function doesn't respect containers
skip_blank_lines(&mut self)222     fn skip_blank_lines(&mut self) {
223         loop {
224             let ret = scan_blank_line(&self.text[self.off..]);
225             if ret == 0 {
226                 break;
227             }
228             self.off += ret;
229         }
230     }
231 
232     // Scan markers and indentation for current container stack
233     // Return: bytes scanned, whether containers are complete, and remaining space
scan_containers(&self, text: &str) -> (usize, bool, usize)234     fn scan_containers(&self, text: &str) -> (usize, bool, usize) {
235         let (mut i, mut space) = scan_leading_space(text, 0);
236         for container in self.containers.iter() {
237             match *container {
238                 Container::BlockQuote => {
239                     if space <= 3 {
240                         let n = scan_blockquote_start(&text[i..]);
241                         if n > 0 {
242                             let (n_sp, next_space) = scan_leading_space(text, i + n);
243                             i += n + n_sp;
244                             space = next_space;
245                         } else {
246                             return (i, false, space);
247                         }
248                     } else {
249                         return (i, false, space);
250                     }
251                 }
252                 Container::List(_, _) => (),
253                 Container::ListItem(indent) => {
254                     if space >= indent {
255                         space -= indent;
256                     } else if scan_eol(&text[i..]).1 {
257                         space = 0;
258                     } else {
259                         return (i, false, 0);
260                     }
261                 }
262             }
263         }
264         (i, true, space)
265     }
266 
267     // scans empty lines with current container stack
268     // returns number of bytes scanned, number of empty lines
269     // note: EOF counts as a line ending for counting lines
scan_empty_lines(&self, text: &str) -> (usize, usize)270     fn scan_empty_lines(&self, text: &str) -> (usize, usize) {
271         let mut i = 0;
272         let mut lines = 0;
273         loop {
274             let (n, scanned, _) = self.scan_containers(&text[i..]);
275             if !scanned {
276                 return (i, lines);
277             }
278             if i == text.len() {
279                 return (i, lines + 1);
280             }
281             let n_blank = scan_eol(&text[i + n ..]).0;
282             if n_blank == 0 {
283                 return (i, lines);
284             }
285             i += n + n_blank;
286             lines += 1;
287         }
288     }
289 
290     // scans whitespace, skipping past containers on newline
scan_whitespace_inline(&self, text: &str) -> usize291     fn scan_whitespace_inline(&self, text: &str) -> usize {
292         let i = scan_whitespace_no_nl(text);
293         if let (n, true) = scan_eol(&text[i..]) {
294             let (n_containers, _, space) = self.scan_containers(&text[i + n ..]);
295             let j = i + n + n_containers;
296             if !self.is_inline_block_end(&text[j..], space) {
297                 return j;
298             }
299         }
300         i
301     }
302 
at_list(&self, level: usize) -> Option<usize>303     fn at_list(&self, level: usize) -> Option<usize> {
304         let len = self.containers.len();
305         if len >= level {
306             if let Container::List(offset, _) = self.containers[len - level] {
307                 return Some(offset);
308             }
309         }
310         None
311     }
312 
313     // n is number of bytes (in blank lines) to skip
end_containing_lists(&mut self, n: usize) -> Event<'a>314     fn end_containing_lists(&mut self, n: usize) -> Event<'a> {
315         let mut i = self.stack.len();
316         while i >= 2 {
317             if let (Tag::List(_), _, _) = self.stack[i - 2] {
318                 i -= 2;
319             } else {
320                 break;
321             }
322         }
323         let mut next = self.off + n;
324         while i < self.stack.len() {
325             if let (Tag::List(start), _, _) = self.stack[i] {
326                 self.stack[i] = (Tag::List(start), self.off, next);
327             }
328             if let (Tag::Item, _, _) = self.stack[i + 1] {
329                 self.stack[i + 1] = (Tag::Item, self.off, self.off);
330             }
331             next = self.off;
332             i += 2;
333         }
334         self.end()
335     }
336 
start_block(&mut self) -> Option<Event<'a>>337     fn start_block(&mut self) -> Option<Event<'a>> {
338         let size = self.text.len();
339         //println!("start_block {}", self.off);
340         while self.off < size {
341             //println!("start_block loop {} {}", self.off, self.last_line_was_empty);
342             if self.off >= self.limit() {
343                 return Some(self.end());
344             }
345             if self.state != State::InContainers {
346                 let (n, scanned, space) = self.scan_containers(&self.text[self.off ..]);
347                 if !scanned {
348                     return Some(self.end());
349                 }
350                 self.leading_space = space;
351                 self.off += n;
352                 self.state = State::InContainers;
353             }
354 
355             let (n, at_eol) = scan_eol(&self.text[self.off ..]);
356             if at_eol {
357                 self.off += n;
358                 self.state = State::StartBlock;
359                 // two empty lines close a list
360                 let (n, empty_lines) = self.scan_empty_lines(&self.text[self.off ..]);
361                 //println!("{} empty lines (n = {})", empty_lines, n);
362                 if empty_lines >= 1 && self.at_list(2).is_some() {
363                     return Some(self.end_containing_lists(n));
364                 }
365                 self.off += n;
366                 if let Some(_) = self.at_list(2) {
367                     self.last_line_was_empty = true;
368                 }
369                 continue;
370             }
371 
372             //println!("checking loose {} {:?}", self.last_line_was_empty, self.at_list(2));
373             if self.last_line_was_empty {
374                 if let Some(offset) = self.at_list(2) {
375                     // list item contains two blocks separated by empty line
376                     self.loose_lists.insert(offset);
377                 }
378             }
379 
380             if self.leading_space >= 4 && !self.at_list(1).is_some() {
381                 // see below
382                 if let Some(&Container::List(_, _)) = self.containers.last() {
383                     return Some(self.end());
384                 }
385                 return Some(self.start_indented_code());
386             }
387 
388             let tail = &self.text[self.off ..];
389 
390             // must be before list item because ambiguous
391             let n = scan_hrule(tail);
392             if n != 0 {
393                 self.last_line_was_empty = false;
394                 // see below
395                 if let Some(&Container::List(_, _)) = self.containers.last() {
396                     return Some(self.end());
397                 }
398                 self.off += n;
399                 return Some(self.start_hrule());
400             }
401 
402             let (n, c, start, indent) = scan_listitem(tail);
403             if n != 0 {
404                 if self.last_line_was_empty {
405                     if let Some(offset) = self.at_list(1) {
406                         // two list items separated by empty line
407                         self.loose_lists.insert(offset);
408                     }
409                 }
410                 self.last_line_was_empty = false;
411                 return Some(self.start_listitem(n, c, start, indent));
412             }
413 
414             // not a list item, so if we're in a list, close it
415             if let Some(&Container::List(_, _)) = self.containers.last() {
416                 return Some(self.end());
417             }
418             self.last_line_was_empty = false;
419 
420             let c = tail.as_bytes()[0];
421             match c {
422                 b'#' => {
423                     let (n, level) = scan_atx_header(tail);
424                     if n != 0 {
425                         self.off += n;
426                         return Some(self.start_atx_header(level));
427                     }
428                 }
429                 b'`' | b'~' => {
430                     let (n, ch) = scan_code_fence(tail);
431                     if n != 0 {
432                         return Some(self.start_code_fence(n, ch, n));
433                     }
434                 }
435                 b'>' => {
436                     let n = scan_blockquote_start(tail);
437                     if n != 0 {
438                         self.off += n;
439                         let (n, space) = scan_leading_space(self.text, self.off);
440                         self.off += n;
441                         self.leading_space = space;
442                         self.containers.push(Container::BlockQuote);
443                         return Some(self.start(Tag::BlockQuote, self.text.len(), 0));
444                     }
445                 }
446                 b'<' => {
447                     if self.is_html_block(tail) {
448                         return Some(self.do_html_block());
449                     }
450                 }
451                 b'[' => {
452                     if self.try_link_reference_definition(tail) {
453                         continue;
454                     }
455                 }
456                 _ => ()
457             }
458             return Some(self.start_paragraph());
459         }
460         None
461     }
462 
463     // can start a paragraph or a setext header, as they start similarly
start_paragraph(&mut self) -> Event<'a>464     fn start_paragraph(&mut self) -> Event<'a> {
465         let mut i = self.off + scan_nextline(&self.text[self.off..]);
466 
467         if let (n, true, space) = self.scan_containers(&self.text[i..]) {
468             i += n;
469             let (n, level) = scan_setext_header(&self.text[i..]);
470             if space <= 3 && n != 0 {
471                 let next = i + n;
472                 while i > self.off && is_ascii_whitespace(self.text.as_bytes()[i - 1]) {
473                     i -= 1;
474                 }
475                 self.state = State::Inline;
476                 return self.start(Tag::Header(level), i, next);
477             }
478         }
479 
480         let size = self.text.len();
481         self.state = State::Inline;
482         self.start(Tag::Paragraph, size, 0)
483     }
484 
start_hrule(&mut self) -> Event<'a>485     fn start_hrule(&mut self) -> Event<'a> {
486         let limit = self.off;  // body of hrule is empty
487         self.state = State::Inline;  // handy state for producing correct end tag
488         self.start(Tag::Rule, limit, limit)
489     }
490 
start_atx_header(&mut self, level: i32) -> Event<'a>491     fn start_atx_header(&mut self, level: i32) -> Event<'a> {
492         self.skip_leading_whitespace();
493         let tail = &self.text[self.off..];
494         let next = scan_nextline(tail);
495         let mut limit = next;
496         while limit > 0 && is_ascii_whitespace(tail.as_bytes()[limit - 1]) {
497             limit -= 1;
498         }
499         let mut end = limit;
500         while end > 0 && tail.as_bytes()[end - 1] == b'#' {
501             end -= 1;
502         }
503         if end == 0 {
504             limit = end;
505         } else if is_ascii_whitespace(tail.as_bytes()[end - 1]) {
506             limit = end - 1;
507         }
508         while limit > 0 && is_ascii_whitespace(tail.as_bytes()[limit - 1]) {
509             limit -= 1;
510         }
511         let limit = limit + self.off;
512         let next = next + self.off;
513         self.state = State::Inline;
514         self.start(Tag::Header(level), limit, next)
515     }
516 
start_indented_code(&mut self) -> Event<'a>517     fn start_indented_code(&mut self) -> Event<'a> {
518         self.fence_char = b'\0';
519         self.fence_indent = 4;
520         let size = self.text.len();
521         self.state = State::Code;
522         self.start(Tag::CodeBlock(Borrowed("")), size, 0)
523     }
524 
start_listitem(&mut self, n: usize, c: u8, start: usize, indent: usize) -> Event<'a>525     fn start_listitem(&mut self, n: usize, c: u8, start: usize, indent: usize) -> Event<'a> {
526         let indent = self.leading_space + indent;
527         match self.containers.last() {
528             Some(&Container::List(_, c2)) => {
529                 if c != c2 {
530                     // mismatched list type or delimeter
531                     return self.end();
532                 }
533                 self.off += n;
534                 let n_blank = scan_blank_line(&self.text[self.off ..]);
535                 if n_blank != 0 {
536                     self.off += n_blank;
537                     self.state = State::StartBlock;
538                 } else {
539                     // TODO: deal with tab
540                     let (n, space) = scan_leading_space(self.text, self.off);
541                     self.off += n;
542                     self.leading_space = space;
543                 }
544                 self.containers.push(Container::ListItem(indent));
545                 self.start(Tag::Item, self.text.len(), 0)
546             }
547             _ => {
548                 self.containers.push(Container::List(self.off, c));
549                 // arguably this should be done in the scanner, it should return option
550                 let startopt = if c == b'.' || c == b')' { Some(start) } else { None };
551                 self.start(Tag::List(startopt), self.text.len(), 0)
552             }
553         }
554     }
555 
start_code_fence(&mut self, n: usize, ch: u8, count: usize) -> Event<'a>556     fn start_code_fence(&mut self, n: usize, ch: u8, count: usize) -> Event<'a> {
557         self.fence_char = ch;
558         self.fence_count = count;
559         self.fence_indent = self.leading_space;
560         let beg_info = self.off + n;
561         let next_line = beg_info + scan_nextline(&self.text[beg_info..]);
562         self.off = next_line;
563         let info = unescape(&self.text[beg_info..next_line].trim());
564         let size = self.text.len();
565         self.state = State::CodeLineStart;
566         self.start(Tag::CodeBlock(info), size, 0)
567     }
568 
next_code_line_start(&mut self) -> Event<'a>569     fn next_code_line_start(&mut self) -> Event<'a> {
570         let (off, space) = match self.scan_containers(&self.text[self.off ..]) {
571             (n, true, space) => (self.off + n, space),
572             _ => {
573                 return self.end();
574             }
575         };
576 
577         if self.fence_char == b'\0' {
578             let n = scan_blank_line(&self.text[off..]);
579             if n != 0 {
580                 // TODO performance: this scanning is O(n^2) in the number of empty lines
581                 let (n_empty, _lines) = self.scan_empty_lines(&self.text[off + n ..]);
582                 let next = off + n + n_empty;
583                 let (n_containers, scanned, nspace) = self.scan_containers(&self.text[next..]);
584                 // TODO; handle space
585                 if !scanned || self.is_code_block_end(next + n_containers, nspace) {
586                     //println!("was end: {}", next + n_containers);
587                     return self.end();
588                 } else {
589                     self.off = off;
590                     //println!("code line start space={}, off={}", space, off);
591                     self.leading_space = space;
592                     return self.next_code();
593                 }
594             }
595         }
596 
597         if self.is_code_block_end(off, space) {
598             let ret = self.end();
599             if self.fence_char != b'\0' {
600                 self.off = off + scan_nextline(&self.text[off..]);
601             }
602             ret
603         } else {
604             self.off = off;
605             self.state = State::Code;
606             self.leading_space = space;
607             self.next_code()
608         }
609     }
610 
next_code(&mut self) -> Event<'a>611     fn next_code(&mut self) -> Event<'a> {
612         if self.leading_space > self.fence_indent {
613             // TODO: might try to combine spaces in text, for fewer events
614             let space = self.leading_space;
615             self.leading_space = 0;
616             return Event::Text(spaces(space - self.fence_indent));
617         }
618         let bytes = self.text.as_bytes();
619         let mut beg = self.off;
620         let mut i = beg;
621         loop {
622             match bytes[i..].iter().position(|&c| c < b' ') {
623                 Some(j) => i += j,
624                 None => {
625                     i += bytes[i..].len();
626                     break;
627                 }
628             }
629             match bytes[i] {
630                 b'\n' => {
631                     i += 1;
632                     self.state = State::CodeLineStart;
633                     break;
634                 }
635                 b'\t' => {
636                     if i > beg { break; }
637                     return self.char_tab();
638                 }
639                 b'\r' => {
640                     // just skip it (does not support '\r' only line break)
641                     if i > beg { break; }
642                     beg += 1;
643                 }
644                 _ => ()
645             }
646             i += 1;
647         }
648         self.off = i;
649         Event::Text(Borrowed(&self.text[beg..i]))
650     }
651 
is_code_block_end(&self, loc: usize, space: usize) -> bool652     fn is_code_block_end(&self, loc: usize, space: usize) -> bool {
653         let tail = &self.text[loc..];
654         if self.fence_char == b'\0' {
655             // indented code block
656             space < 4
657         } else if space <= 3 {
658             let (n, c) = scan_code_fence(tail);
659             if c != self.fence_char || n < self.fence_count {
660                 return false;
661             }
662             if n < tail.len() && scan_blank_line(&tail[n..]) == 0 {
663                 // Closing code fences cannot have info strings
664                 return false;
665             }
666             return true;
667         } else {
668             false
669         }
670     }
671 
672     // # HTML blocks
673 
scan_html_block_tag(&self, data: &'a str) -> (usize, &'a str)674     fn scan_html_block_tag(&self, data: &'a str) -> (usize, &'a str) {
675         let mut i = scan_ch(data, b'<');
676         if i == 0 { return (0, "") }
677         i += scan_ch(&data[i..], b'/');
678         let n = scan_while(&data[i..], is_ascii_alphanumeric);
679         // TODO: scan attributes and >
680         (i + n, &data[i .. i + n])
681     }
682 
is_html_block(&self, data: &str) -> bool683     fn is_html_block(&self, data: &str) -> bool {
684         let (n_tag, tag) = self.scan_html_block_tag(data);
685         (n_tag > 0 && is_html_tag(tag)) ||
686                 data.starts_with("<?") ||
687                 data.starts_with("<!")
688     }
689 
do_html_block(&mut self) -> Event<'a>690     fn do_html_block(&mut self) -> Event<'a> {
691         let size = self.text.len();
692         let mut out = Borrowed("");
693         let mut i = self.off;
694         let mut mark = i;
695         loop {
696             let n = scan_nextline(&self.text[i..]);
697             i += n;
698             if n >= 2 && self.text.as_bytes()[i - 2] == b'\r' {
699                 if self.leading_space > 0 {
700                     out = utils::cow_append(out, spaces(self.leading_space));
701                     self.leading_space = 0;
702                 }
703                 out = utils::cow_append(out, Borrowed(&self.text[mark .. i - 2]));
704                 mark = i - 1;
705             }
706             let (n, scanned, space) = self.scan_containers(&self.text[i..]);
707             let n_blank = scan_blank_line(&self.text[i + n ..]);
708             if n != 0 || !scanned || i + n == size || n_blank != 0 {
709                 if self.leading_space > 0 {
710                     out = utils::cow_append(out, spaces(self.leading_space));
711                 }
712                 self.leading_space = space;
713                 out = utils::cow_append(out, Borrowed(&self.text[mark..i]));
714                 mark = i + n;
715             }
716             if !scanned || i + n == size || n_blank != 0 {
717                 self.off = i;  // TODO: skip blank lines (cleaner source maps)
718                 self.state = State::StartBlock;
719                 return Event::Html(out)
720             }
721         }
722     }
723 
724     // # Link reference definitions
725 
try_link_reference_definition(&mut self, data: &'a str) -> bool726     fn try_link_reference_definition(&mut self, data: &'a str) -> bool {
727         let (n_link, text_beg, text_end, max_nest) = self.scan_link_label(data);
728         if n_link == 0 || max_nest > 1 { return false; }
729         let n_colon = scan_ch(&data[n_link ..], b':');
730         if n_colon == 0 { return false; }
731         let mut i = n_link + n_colon;
732         i += self.scan_whitespace_inline(&data[i..]);
733         let linkdest = scan_link_dest(&data[i..]);
734         if linkdest.is_none() { return false; }
735         let (n_dest, raw_dest) = linkdest.unwrap();
736         if n_dest == 0 { return false; }
737         i += n_dest;
738         i += scan_whitespace_no_nl(&data[i..]);
739         let n_nl = self.scan_whitespace_inline(&data[i..]);
740         let (n_title, title_beg, title_end) = self.scan_link_title(&data[i + n_nl ..]);
741         let title = if n_title == 0 {
742             Borrowed("")
743         } else {
744             let (title_beg, title_end) = (i + n_nl + title_beg, i + n_nl + title_end);
745             i += n_nl + n_title;
746             unescape(&data[title_beg..title_end])
747         };
748         i += scan_whitespace_no_nl(&data[i..]);
749         if let (n_eol, true) = scan_eol(&data[i..]) {
750             i += n_eol;
751         } else {
752             return false;
753         }
754 
755         let linktext = self.normalize_link_ref(&data[text_beg..text_end]);
756         if linktext.is_empty() {
757             return false;
758         }
759         if !self.links.contains_key(&linktext) {
760             let dest = unescape(raw_dest);
761             self.links.insert(linktext, (dest, title));
762         }
763         self.state = State::StartBlock;
764         self.off += i;
765         true
766     }
767 
768     // normalize whitespace and case-fold
normalize_link_ref(&self, raw: &str) -> String769     fn normalize_link_ref(&self, raw: &str) -> String {
770         let mut need_space = false;
771         let mut result = String::new();
772         let mut i = 0;
773         while i < raw.len() {
774             let n = scan_nextline(&raw[i..]);
775             for c in raw[i.. i + n].chars() {
776                 if c.is_whitespace() {
777                     need_space = true;
778                 } else {
779                     if need_space && !result.is_empty() {
780                         result.push(' ');
781                     }
782                     // TODO: Unicode case folding can differ from lowercase (ß)
783                     result.extend(c.to_lowercase());
784                     need_space = false;
785                 }
786             }
787             i += n;
788             if i == raw.len() { break; }
789             i += self.scan_containers(&raw[i..]).0;
790             need_space = true;
791         }
792         result
793     }
794 
795     // determine whether the line starting at loc ends the block
is_inline_block_end(&self, data: &str, space: usize) -> bool796     fn is_inline_block_end(&self, data: &str, space: usize) -> bool {
797         data.is_empty() ||
798                 scan_blank_line(data) != 0 ||
799                 space <= 3 && (scan_hrule(data) != 0 ||
800                     scan_atx_header(data).0 != 0 ||
801                     scan_code_fence(data).0 != 0 ||
802                     scan_blockquote_start(data) != 0 ||
803                     scan_listitem(data).0 != 0 ||
804                     self.is_html_block(data))
805     }
806 
next_inline(&mut self) -> Event<'a>807     fn next_inline(&mut self) -> Event<'a> {
808         let bytes = self.text.as_bytes();
809         let beg = self.off;
810         let mut i = beg;
811         let limit = self.limit();
812         while i < limit {
813             match bytes[i..limit].iter().position(|&c| self.active_tab[c as usize] != 0) {
814                 Some(pos) => i += pos,
815                 None => { i = limit; break; }
816             }
817             let c = bytes[i];
818             if c == b'\n' || c == b'\r' {
819                 let n = scan_trailing_whitespace(&self.text[beg..i]);
820                 let end = i - n;
821                 if end > beg {
822                     self.off = end;
823                     return Event::Text(Borrowed(&self.text[beg..end]));
824                 }
825                 if c == b'\r' && i + 1 < limit && self.text.as_bytes()[i + 1] == b'\n' {
826                     i += 1;
827                 }
828                 i += 1;
829                 let next = i;
830                 let (n_containers, _, space) = self.scan_containers(&self.text[i..limit]);
831                 i += n_containers;
832                 if self.is_inline_block_end(&self.text[i..limit], space) {
833                     self.off = next;
834                     return self.end();
835                 }
836                 i += scan_whitespace_no_nl(&self.text[i..limit]);
837                 self.off = i;
838                 return if n >= 2 { Event::HardBreak } else { Event::SoftBreak };
839             }
840             self.off = i;
841             if i > beg {
842                 return Event::Text(Borrowed(&self.text[beg..i]));
843             }
844             if let Some(event) = self.active_char(c) {
845                 return event;
846             }
847             i = self.off;  // let handler advance offset even on None
848             i += 1;
849         }
850         if i > beg {
851             self.off = i;
852             Event::Text(Borrowed(&self.text[beg..i]))
853         } else {
854             self.end()
855         }
856     }
857 
active_char(&mut self, c: u8) -> Option<Event<'a>>858     fn active_char(&mut self, c: u8) -> Option<Event<'a>> {
859         match c {
860             b'\x00' => Some(self.char_null()),
861             b'\t' => Some(self.char_tab()),
862             b'\\' => self.char_backslash(),
863             b'&' => self.char_entity(),
864             b'_' => self.char_emphasis(),
865             b'*' => self.char_emphasis(),
866             b'[' | b'!' => self.char_link(),
867             b'`' => self.char_backtick(),
868             b'<' => self.char_lt(),
869             _ => None
870         }
871     }
872 
char_null(&mut self) -> Event<'a>873     fn char_null(&mut self) -> Event<'a> {
874         self.off += 1;
875         Event::Text(Borrowed(&"\u{fffd}"))
876     }
877 
878     // expand tab in content (used for code and inline)
879     // scan backward to find offset, counting unicode code points
char_tab(&mut self) -> Event<'a>880     fn char_tab(&mut self) -> Event<'a> {
881         let count = count_tab(&self.text.as_bytes()[.. self.off]);
882         self.off += 1;
883         Event::Text(Borrowed(&"    "[..count]))
884     }
885 
char_backslash(&mut self) -> Option<Event<'a>>886     fn char_backslash(&mut self) -> Option<Event<'a>> {
887         let limit = self.limit();
888         if self.off + 1 < limit {
889             if let (_, true) = scan_eol(&self.text[self.off + 1 .. limit]) {
890                 let n_white = self.scan_whitespace_inline(&self.text[self.off + 1 .. limit]);
891                 let space = 0;  // TODO: figure this out
892                 if !self.is_inline_block_end(&self.text[self.off + 1 + n_white .. limit], space) {
893                     self.off += 1 + n_white;
894                     return Some(Event::HardBreak);
895                 }
896             }
897             let c = self.text.as_bytes()[self.off + 1];
898             if is_ascii_punctuation(c) {
899                 self.off += 2;
900                 return Some(Event::Text(Borrowed(&self.text[self.off - 1 .. self.off])));
901             }
902         }
903         None
904     }
905 
char_entity(&mut self) -> Option<Event<'a>>906     fn char_entity(&mut self) -> Option<Event<'a>> {
907         match scan_entity(&self.text[self.off ..]) {
908             (n, Some(value)) => {
909                 self.off += n;
910                 Some(Event::Text(value))
911             }
912             _ => None
913         }
914     }
915 
char_emphasis(&mut self) -> Option<Event<'a>>916     fn char_emphasis(&mut self) -> Option<Event<'a>> {
917         // can see to left for flanking info, but not past limit
918         let limit = self.limit();
919         let data = &self.text[..limit];
920 
921         let c = data.as_bytes()[self.off];
922         let (n, can_open, _can_close) = compute_open_close(data, self.off, c);
923         if !can_open {
924             return None;
925         }
926         let mut stack = vec![n];  // TODO performance: don't allocate
927         let mut i = self.off + n;
928         while i < limit {
929             let c2 = data.as_bytes()[i];
930             if c2 == b'\n' && !is_escaped(data, i) {
931                 let space = 0;  // TODO: scan containers
932                 if self.is_inline_block_end(&self.text[i + 1 .. limit], space) {
933                     return None
934                 } else {
935                     i += 1;
936                 }
937             } else if c2 == c && !is_escaped(data, i) {
938                 let (mut n2, can_open, can_close) = compute_open_close(data, i, c);
939                 if can_close {
940                     loop {
941                         let ntos = stack.pop().unwrap();
942                         if ntos > n2 {
943                             stack.push(ntos - n2);
944                             break;
945                         }
946                         if stack.is_empty() {
947                             let npop = if ntos < n2 { ntos } else { n2 };
948                             if npop == 1 {
949                                 self.off += 1;
950                                 return Some(self.start(Tag::Emphasis, i, i + 1));
951                             } else {
952                                 self.off += 2;
953                                 let next = i + npop;
954                                 return Some(self.start(Tag::Strong, next - 2, next));
955                             }
956                         } else {
957                             i += ntos;
958                             n2 -= ntos;
959                         }
960                     }
961                 } else if can_open {
962                     stack.push(n2);
963                 }
964                 i += n2;
965             } else if c2 == b'`' {
966                 let (n, beg, _) = self.scan_inline_code(&self.text[i..limit]);
967                 if n != 0 {
968                     i += n;
969                 } else {
970                     i += beg;
971                 }
972             } else if c2 == b'<' {
973                 let n = self.scan_autolink_or_html(&self.text[i..limit]);
974                 if n != 0 {
975                     i += n;
976                 } else {
977                     i += 1;
978                 }
979             } else if c2 == b'[' {
980                 if let Some((_, _, _, n)) = self.parse_link(&self.text[i..limit], false) {
981                     i += n;
982                 } else {
983                     i += 1;
984                 }
985             } else {
986                 i += 1;
987             }
988         }
989         None
990     }
991 
992     // # Links
993 
994     // scans a link label, example [link]
995     // return value is: total bytes, start of text, end of text, max nesting
scan_link_label(&self, data: &str) -> (usize, usize, usize, usize)996     fn scan_link_label(&self, data: &str) -> (usize, usize, usize, usize) {
997         let mut i = scan_ch(data, b'[');
998         if i == 0 { return (0, 0, 0, 0); }
999         let text_beg = i;
1000         let mut max_nest = 1;
1001         let mut nest = 1;
1002         loop {
1003             if i >= data.len() { return (0, 0, 0, 0); }
1004             match data.as_bytes()[i] {
1005                 b'\n' => {
1006                     let n = self.scan_whitespace_inline(&data[i..]);
1007                     if n == 0 { return (0, 0, 0, 0); }
1008                     i += n;
1009                     continue;
1010                 }
1011                 b'[' => {
1012                     nest += 1;
1013                     if nest == MAX_LINK_NEST { return (0, 0, 0, 0); }
1014                     max_nest = cmp::max(max_nest, nest)
1015                 }
1016                 b']' => {
1017                     nest -= 1;
1018                     if nest == 0 {
1019                         break;
1020                     }
1021                 }
1022                 b'\\' => i += 1,
1023                 b'<' => {
1024                     let n = self.scan_autolink_or_html(&data[i..]);
1025                     if n != 0 {
1026                         i += n;
1027                     } else {
1028                         i += 1;
1029                     }
1030                 }
1031                 b'`' => {
1032                     let (n, beg, _) = self.scan_inline_code(&data[i..]);
1033                     if n != 0 {
1034                         i += n;
1035                     } else {
1036                         i += beg;
1037                     }
1038                 }
1039                 _ => ()
1040             }
1041             i += 1;
1042         }
1043         let text_end = i;
1044         i += 1;  // skip closing ]
1045         (i, text_beg, text_end, max_nest)
1046     }
1047 
scan_link_title(&self, data: &str) -> (usize, usize, usize)1048     fn scan_link_title(&self, data: &str) -> (usize, usize, usize) {
1049         let size = data.len();
1050         if size == 0 { return (0, 0, 0); }
1051         let mut i = 0;
1052         let titleclose = match data.as_bytes()[i] {
1053             b'(' => b')',
1054             b'\'' => b'\'',
1055             b'\"' => b'\"',
1056             _ => return (0, 0, 0)
1057         };
1058         i += 1;
1059         let title_beg = i;
1060         while i < size {
1061             match data.as_bytes()[i] {
1062                 x if x == titleclose => break,
1063                 b'\\' => i += 2,  // may be > size
1064                 b'\n' => {
1065                     let n = self.scan_whitespace_inline(&data[i..]);
1066                     if n == 0 { return (0, 0, 0); }
1067                     i += n;
1068                 }
1069                 _ => i += 1
1070             }
1071         }
1072         if i >= size { return (0, 0, 0); }
1073         let title_end = i;
1074         i += 1;
1075         (i, title_beg, title_end)
1076     }
1077 
char_link(&mut self) -> Option<Event<'a>>1078     fn char_link(&mut self) -> Option<Event<'a>> {
1079         self.parse_link(&self.text[self.off .. self.limit()], false).map(|(tag, beg, end, n)| {
1080             let off = self.off;
1081             self.off += beg;
1082             self.start(tag, off + end, off + n)
1083         })
1084     }
1085 
1086     // return: tag, begin, end, total size
parse_link(&self, data: &'a str, recur: bool) -> Option<(Tag<'a>, usize, usize, usize)>1087     fn parse_link(&self, data: &'a str, recur: bool) -> Option<(Tag<'a>, usize, usize, usize)> {
1088         let size = data.len();
1089 
1090         // scan link text
1091         let i = scan_ch(data, b'!');
1092         let is_image = i == 1;
1093         let (n, text_beg, text_end, max_nest) = self.scan_link_label(&data[i..]);
1094         if n == 0 { return None; }
1095         let (text_beg, text_end) = (text_beg + i, text_end + i);
1096         if !is_image && !recur && max_nest > 1 && self.contains_link(&data[text_beg..text_end]) {
1097             // disallow nested links in links (but ok in images)
1098             return None;
1099         }
1100         let mut i = i + n;
1101 
1102         // scan dest
1103         let (dest, title, beg, end, next) = if data[i..].starts_with("(") {
1104             i += 1;
1105             i += self.scan_whitespace_inline(&data[i..]);
1106             if i >= size { return None; }
1107 
1108             let linkdest = scan_link_dest(&data[i..]);
1109             if linkdest.is_none() { return None; }
1110             let (n, raw_dest) = linkdest.unwrap();
1111             let dest = unescape(raw_dest);
1112             i += n;
1113 
1114             i += self.scan_whitespace_inline(&data[i..]);
1115             if i == size { return None; }
1116 
1117             // scan title
1118             let (n_title, title_beg, title_end) = self.scan_link_title(&data[i..]);
1119             let title = if n_title == 0 {
1120                 Borrowed("")
1121             } else {
1122                 let (title_beg, title_end) = (i + title_beg, i + title_end);
1123                 i += n_title;
1124                 // TODO: not just unescape, remove containers from newlines
1125                 unescape(&data[title_beg..title_end])
1126             };
1127             i += self.scan_whitespace_inline(&data[i..]);
1128             if i == size || data.as_bytes()[i] != b')' { return None; }
1129             i += 1;
1130             (dest, title, text_beg, text_end, i)
1131         } else {
1132             // try link reference
1133             let j = i + self.scan_whitespace_inline(&data[i..]);
1134             let (n_ref, ref_beg, ref_end, _) = self.scan_link_label(&data[j..]);
1135             let (ref_beg, ref_end) = if n_ref == 0 || ref_beg == ref_end {
1136                 (text_beg, text_end)
1137             } else {
1138                 (j + ref_beg, j + ref_end)
1139             };
1140             if n_ref != 0 {
1141                 i = j + n_ref;
1142             }
1143             let reference = self.normalize_link_ref(&data[ref_beg..ref_end]);
1144             let (dest, title) = match self.links.get(&reference) {
1145                 Some(&(ref dest, ref title)) => (dest.clone(), title.clone()),
1146                 None => return None
1147             };
1148             (dest, title, text_beg, text_end, i)
1149         };
1150         if is_image {
1151             Some((Tag::Image(dest, title), beg, end, next))
1152         } else {
1153             Some((Tag::Link(dest, title), beg, end, next))
1154         }
1155     }
1156 
1157     // determine whether there's a link anywhere in the text
1158     // TODO: code duplication with scan_link_label is unpleasant
contains_link(&self, data: &str) -> bool1159     fn contains_link(&self, data: &str) -> bool {
1160         let mut i = 0;
1161         while i < data.len() {
1162             match data.as_bytes()[i] {
1163                 b'\n' => {
1164                     let n = self.scan_whitespace_inline(&data[i..]);
1165                     if n == 0 { return false; }
1166                     i += n;
1167                     continue;
1168                 }
1169                 b'!' => {
1170                     if scan_ch(&data[i + 1 ..], b'[') != 0 {
1171                         // ok to contain image, skip over opening bracket
1172                         i += 1;
1173                     }
1174                 }
1175                 b'[' => {
1176                     if self.parse_link(&data[i..], true).is_some() { return true; }
1177                 }
1178                 b'\\' => i += 1,
1179                 b'<' => {
1180                     let n = self.scan_autolink_or_html(&data[i..]);
1181                     if n != 0 {
1182                         i += n;
1183                     } else {
1184                         i += 1;
1185                     }
1186                 }
1187                 b'`' => {
1188                     let (n, beg, _) = self.scan_inline_code(&data[i..]);
1189                     if n != 0 {
1190                         i += n;
1191                     } else {
1192                         i += beg;
1193                     }
1194                 }
1195                 _ => ()
1196             }
1197             i += 1;
1198         }
1199         false
1200     }
1201 
1202     // # Autolinks and inline HTML
1203 
char_lt(&mut self) -> Option<Event<'a>>1204     fn char_lt(&mut self) -> Option<Event<'a>> {
1205         let tail = &self.text[self.off .. self.limit()];
1206         if let Some((n, link)) = scan_autolink(tail) {
1207             let next = self.off + n;
1208             self.off += 1;
1209             self.state = State::Literal;
1210             return Some(self.start(Tag::Link(link, Borrowed("")), next - 1, next))
1211         }
1212         let n = self.scan_inline_html(tail);
1213         if n != 0 {
1214             return Some(self.inline_html_event(n))
1215         }
1216         None
1217     }
1218 
scan_autolink_or_html(&self, data: &str) -> usize1219     fn scan_autolink_or_html(&self, data: &str) -> usize {
1220         if let Some((n, _)) = scan_autolink(data) {
1221             n
1222         } else {
1223             self.scan_inline_html(data)
1224         }
1225     }
1226 
scan_inline_html(&self, data: &str) -> usize1227     fn scan_inline_html(&self, data: &str) -> usize {
1228         let n = self.scan_html_tag(data);
1229         if n != 0 { return n; }
1230         let n = self.scan_html_comment(data);
1231         if n != 0 { return n; }
1232         let n = self.scan_processing_instruction(data);
1233         if n != 0 { return n; }
1234         let n = self.scan_declaration(data);
1235         if n != 0 { return n; }
1236         let n = self.scan_cdata(data);
1237         if n != 0 { return n; }
1238         0
1239     }
1240 
scan_html_tag(&self, data: &str) -> usize1241     fn scan_html_tag(&self, data: &str) -> usize {
1242         let size = data.len();
1243         let mut i = 0;
1244         if scan_ch(data, b'<') == 0 { return 0; }
1245         i += 1;
1246         let n_slash = scan_ch(&data[i..], b'/');
1247         i += n_slash;
1248         if i == size || !is_ascii_alpha(data.as_bytes()[i]) { return 0; }
1249         i += 1;
1250         i += scan_while(&data[i..], is_ascii_alphanumeric);
1251         if n_slash == 0 {
1252             loop {
1253                 let n = self.scan_whitespace_inline(&data[i..]);
1254                 if n == 0 { break; }
1255                 i += n;
1256                 let n = scan_attribute_name(&data[i..]);
1257                 if n == 0 { break; }
1258                 i += n;
1259                 let n = self.scan_whitespace_inline(&data[i..]);
1260                 if scan_ch(&data[i + n ..], b'=') != 0 {
1261                     i += n + 1;
1262                     i += self.scan_whitespace_inline(&data[i..]);
1263                     let n_attr = self.scan_attribute_value(&data[i..]);
1264                     if n_attr == 0 { return 0; }
1265                     i += n_attr;
1266                 }
1267             }
1268             i += self.scan_whitespace_inline(&data[i..]);
1269             i += scan_ch(&data[i..], b'/');
1270         } else {
1271             i += self.scan_whitespace_inline(&data[i..]);
1272         }
1273         if scan_ch(&data[i..], b'>') == 0 { return 0; }
1274         i += 1;
1275         i
1276     }
1277 
scan_attribute_value(&self, data: &str) -> usize1278     fn scan_attribute_value(&self, data: &str) -> usize {
1279         let size = data.len();
1280         if size == 0 { return 0; }
1281         let open = data.as_bytes()[0];
1282         let quoted = open == b'\'' || open == b'"';
1283         let mut i = if quoted { 1 } else { 0 };
1284         while i < size {
1285             let c = data.as_bytes()[i];
1286             match c {
1287                 b'\n' => {
1288                     if !quoted { break; }
1289                     let n = self.scan_whitespace_inline(&data[i..]);
1290                     if n == 0 { return 0; }
1291                     i += n;
1292                 }
1293                 b'\'' | b'"' | b'=' | b'<' | b'>' | b'`' | b'\t' ... b' ' => {
1294                     if !quoted || c == open { break; }
1295                     i += 1;
1296                 }
1297                 _ => i += 1
1298             }
1299         }
1300         if quoted {
1301             if i == size || data.as_bytes()[i] != open { return 0; }
1302             i += 1;
1303         }
1304         i
1305     }
1306 
scan_html_comment(&self, data: &str) -> usize1307     fn scan_html_comment(&self, data: &str) -> usize {
1308         if !data.starts_with("<!--") { return 0; }
1309         if let Some(n) = data[4..].find("--") {
1310             let text = &data[4..4 + n];
1311             if !text.starts_with('>') && !text.starts_with("->") &&
1312                     data[n + 6 ..].starts_with('>') {
1313                 return n + 7;
1314             }
1315         }
1316         0
1317     }
1318 
scan_processing_instruction(&self, data: &str) -> usize1319     fn scan_processing_instruction(&self, data: &str) -> usize {
1320         if !data.starts_with("<?") { return 0; }
1321         if let Some(n) = data[2..].find("?>") {
1322             return n + 4;
1323         }
1324         0
1325     }
1326 
scan_declaration(&self, data: &str) -> usize1327     fn scan_declaration(&self, data: &str) -> usize {
1328         if !data.starts_with("<!") { return 0; }
1329         let n = scan_while(&data[2..], is_ascii_upper);
1330         if n == 0 { return 0; }
1331         let i = n + 2;
1332         let n = self.scan_whitespace_inline(&data[i..]);
1333         if n == 0 { return 0; }
1334         let mut i = i + n;
1335         while i < data.len() {
1336             match data.as_bytes()[i] {
1337                 b'>' => return i + 1,
1338                 b'\n' => i += self.scan_whitespace_inline(&data[i..]),
1339                 _ => i += 1
1340             }
1341         }
1342         0
1343     }
1344 
scan_cdata(&self, data: &str) -> usize1345     fn scan_cdata(&self, data: &str) -> usize {
1346         if !data.starts_with("<![CDATA[") { return 0; }
1347         if let Some(n) = data[9..].find("]]>") {
1348             return n + 12;
1349         }
1350         0
1351     }
1352 
inline_html_event(&mut self, n: usize) -> Event<'a>1353     fn inline_html_event(&mut self, n: usize) -> Event<'a> {
1354         let data = &self.text[self.off .. self.off + n];
1355         let size = data.len();
1356         let mut out = Borrowed("");
1357         let mut i = 0;
1358         let mut mark = 0;
1359         while i < size {
1360             let n = scan_nextline(&data[i..]);
1361             i += n;
1362             if n >= 2 && data.as_bytes()[i - 2] == b'\r' {
1363                 out = utils::cow_append(out, Borrowed(&data[mark .. i - 2]));
1364                 mark = i - 1;
1365             }
1366             if i < size {
1367                 let (n, _, _) = self.scan_containers(&data[i..]);
1368                 if n != 0 {
1369                     out = utils::cow_append(out, Borrowed(&data[mark..i]));
1370                     mark = i + n;
1371                 }
1372             }
1373         }
1374         out = utils::cow_append(out, Borrowed(&data[mark..n]));
1375         self.off += n;
1376         Event::InlineHtml(out)
1377     }
1378 
1379     // link text is literal, with no processing of markup
next_literal(&mut self) -> Event<'a>1380     fn next_literal(&mut self) -> Event<'a> {
1381         self.state = State::Inline;
1382         let beg = self.off;
1383         let end = self.limit();
1384         self.off = end;
1385         Event::Text(Borrowed(&self.text[beg..end]))
1386     }
1387 
1388     // second return value is number of backticks even if not closed
scan_inline_code(&self, data: &str) -> (usize, usize, usize)1389     fn scan_inline_code(&self, data: &str) -> (usize, usize, usize) {
1390         let size = data.len();
1391         let backtick_len = scan_backticks(data);
1392         let mut i = backtick_len;
1393         while i < size {
1394             match data.as_bytes()[i] {
1395                 b'`' => {
1396                     let close_len = scan_backticks(&data[i..]);
1397                     if close_len == backtick_len {
1398                         return (i + backtick_len, backtick_len, i);
1399                     } else {
1400                         i += close_len;
1401                     }
1402                 }
1403                 b'\n' => {
1404                     i += 1;
1405                     let (n, _, space) = self.scan_containers(&data[i..]);
1406                     i += n;
1407                     if self.is_inline_block_end(&data[i..], space) {
1408                         return (0, backtick_len, 0);
1409                     }
1410                 }
1411                 // TODO: '<'
1412                 _ => i += 1
1413             }
1414         }
1415         (0, backtick_len, 0)
1416     }
1417 
char_backtick(&mut self) -> Option<Event<'a>>1418     fn char_backtick(&mut self) -> Option<Event<'a>> {
1419         let beg = self.off;
1420         let limit = self.limit();
1421         let mut i = beg;
1422         let (n, code_beg, code_end) = self.scan_inline_code(&self.text[i..limit]);
1423         if n == 0 {
1424             self.off += code_beg - 1;
1425             return None;
1426         }
1427         i += code_beg;
1428         let end = beg + code_end;
1429         let next = beg + n;
1430         i += self.scan_whitespace_inline(&self.text[i..limit]);
1431         self.off = i;
1432         self.state = State::InlineCode;
1433         Some(self.start(Tag::Code, end, next))
1434     }
1435 
next_inline_code(&mut self) -> Event<'a>1436     fn next_inline_code(&mut self) -> Event<'a> {
1437         let beg = self.off;
1438         let mut i = beg;
1439         let limit = self.limit();
1440         while i < limit {
1441             let c = self.text.as_bytes()[i];
1442             if is_ascii_whitespace(c) {
1443                 let n = self.scan_whitespace_inline(&self.text[i..limit]);
1444                 if i + n == limit || n == 0 {
1445                     if i > beg {
1446                         break;
1447                     } else {
1448                         return self.end();
1449                     }
1450                 }
1451                 if c == b' ' && n == 1 {
1452                     // optimization to reduce number of text blocks produced
1453                     i += 1;
1454                 } else {
1455                     if i > beg {
1456                         break;
1457                     }
1458                     i += n;
1459                     self.off = i;
1460                     return Event::Text(Borrowed(" "));
1461                 }
1462             } else {
1463                 i += 1;
1464             }
1465         }
1466         if i > beg {
1467             self.off = i;
1468             Event::Text(Borrowed(&self.text[beg..i]))
1469         } else {
1470             self.end()
1471         }
1472     }
1473 }
1474 
1475 impl<'a> Iterator for RawParser<'a> {
1476     type Item = Event<'a>;
1477 
next(&mut self) -> Option<Event<'a>>1478     fn next(&mut self) -> Option<Event<'a>> {
1479         //println!("off {} {:?}, stack {:?} containers {:?}",
1480         //        self.off, self.state, self.stack, self.containers);
1481         if self.off < self.text.len() {
1482             match self.state {
1483                 State::StartBlock | State::InContainers => {
1484                     let ret = self.start_block();
1485                     if ret.is_some() {
1486                         return ret;
1487                     }
1488                 }
1489                 State::Inline => return Some(self.next_inline()),
1490                 State::CodeLineStart => return Some(self.next_code_line_start()),
1491                 State::Code => return Some(self.next_code()),
1492                 State::InlineCode => return Some(self.next_inline_code()),
1493                 State::Literal => return Some(self.next_literal()),
1494             }
1495         }
1496         match self.stack.pop() {
1497             Some((tag, _, _)) => Some(Event::End(tag)),
1498             None => None
1499         }
1500     }
1501 }
1502