1 //! The CommonMark AST.
2 
3 use arena_tree::Node;
4 use std::cell::RefCell;
5 
6 /// The core AST node enum.
7 #[derive(Debug, Clone)]
8 pub enum NodeValue {
9     /// The root of every CommonMark document.  Contains **blocks**.
10     Document,
11 
12     /// **Block**. A [block quote](https://github.github.com/gfm/#block-quotes).  Contains other
13     /// **blocks**.
14     ///
15     /// ``` md
16     /// > A block quote.
17     /// ```
18     BlockQuote,
19 
20     /// **Block**.  A [list](https://github.github.com/gfm/#lists).  Contains
21     /// [list items](https://github.github.com/gfm/#list-items).
22     ///
23     /// ``` md
24     /// * An unordered list
25     /// * Another item
26     ///
27     /// 1. An ordered list
28     /// 2. Another item
29     /// ```
30     List(NodeList),
31 
32     /// **Block**.  A [list item](https://github.github.com/gfm/#list-items).  Contains other
33     /// **blocks**.
34     Item(NodeList),
35 
36     /// **Block**. A description list, enabled with `ext_description_lists` option.  Contains
37     /// description items.
38     ///
39     /// It is required to put a blank line between terms and details.
40     ///
41     /// ``` md
42     /// Term 1
43     ///
44     /// : Details 1
45     ///
46     /// Term 2
47     ///
48     /// : Details 2
49     /// ```
50     DescriptionList,
51 
52     /// *Block**. An item of a description list.  Contains a term and one details block.
53     DescriptionItem(NodeDescriptionItem),
54 
55     /// **Block**. Term of an item in a definition list.
56     DescriptionTerm,
57 
58     /// **Block**. Details of an item in a definition list.
59     DescriptionDetails,
60 
61     /// **Block**. A code block; may be [fenced](https://github.github.com/gfm/#fenced-code-blocks)
62     /// or [indented](https://github.github.com/gfm/#indented-code-blocks).  Contains raw text
63     /// which is not parsed as Markdown, although is HTML escaped.
64     CodeBlock(NodeCodeBlock),
65 
66     /// **Block**. A [HTML block](https://github.github.com/gfm/#html-blocks).  Contains raw text
67     /// which is neither parsed as Markdown nor HTML escaped.
68     HtmlBlock(NodeHtmlBlock),
69 
70     /// **Block**. A [paragraph](https://github.github.com/gfm/#paragraphs).  Contains **inlines**.
71     Paragraph,
72 
73     /// **Block**. A heading; may be an [ATX heading](https://github.github.com/gfm/#atx-headings)
74     /// or a [setext heading](https://github.github.com/gfm/#setext-headings). Contains
75     /// **inlines**.
76     Heading(NodeHeading),
77 
78     /// **Block**. A [thematic break](https://github.github.com/gfm/#thematic-breaks).  Has no
79     /// children.
80     ThematicBreak,
81 
82     /// **Block**. A footnote definition.  The `Vec<u8>` is the footnote's name.
83     /// Contains other **blocks**.
84     FootnoteDefinition(Vec<u8>),
85 
86     /// **Block**. A [table](https://github.github.com/gfm/#tables-extension-) per the GFM spec.
87     /// Contains table rows.
88     Table(Vec<TableAlignment>),
89 
90     /// **Block**. A table row.  The `bool` represents whether the row is the header row or not.
91     /// Contains table cells.
92     TableRow(bool),
93 
94     /// **Block**.  A table cell.  Contains **inlines**.
95     TableCell,
96 
97     /// **Inline**.  [Textual content](https://github.github.com/gfm/#textual-content).  All text
98     /// in a document will be contained in a `Text` node.
99     Text(Vec<u8>),
100 
101     /// **Inline**. [Task list item](https://github.github.com/gfm/#task-list-items-extension-). The
102     /// `bool` indicates whether it is checked or not.
103     TaskItem(bool),
104 
105     /// **Inline**.  A [soft line break](https://github.github.com/gfm/#soft-line-breaks).  If
106     /// the `hardbreaks` option is set in `ComrakOptions` during formatting, it will be formatted
107     /// as a `LineBreak`.
108     SoftBreak,
109 
110     /// **Inline**.  A [hard line break](https://github.github.com/gfm/#hard-line-breaks).
111     LineBreak,
112 
113     /// **Inline**.  A [code span](https://github.github.com/gfm/#code-spans).
114     Code(Vec<u8>),
115 
116     /// **Inline**.  [Raw HTML](https://github.github.com/gfm/#raw-html) contained inline.
117     HtmlInline(Vec<u8>),
118 
119     /// **Inline**.  [Emphasised](https://github.github.com/gfm/#emphasis-and-strong-emphasis)
120     /// text.
121     Emph,
122 
123     /// **Inline**.  [Strong](https://github.github.com/gfm/#emphasis-and-strong-emphasis) text.
124     Strong,
125 
126     /// **Inline**.  [Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) text
127     /// per the GFM spec.
128     Strikethrough,
129 
130     /// **Inline**.  Superscript.  Enabled with `ext_superscript` option.
131     Superscript,
132 
133     /// **Inline**.  A [link](https://github.github.com/gfm/#links) to some URL, with possible
134     /// title.
135     Link(NodeLink),
136 
137     /// **Inline**.  An [image](https://github.github.com/gfm/#images).
138     Image(NodeLink),
139 
140     /// **Inline**.  A footnote reference; the `Vec<u8>` is the referent footnote's name.
141     FootnoteReference(Vec<u8>),
142 }
143 
144 /// Alignment of a single table cell.
145 #[derive(Debug, Copy, Clone)]
146 pub enum TableAlignment {
147     /// Cell content is unaligned.
148     None,
149 
150     /// Cell content is aligned left.
151     Left,
152 
153     /// Cell content is centered.
154     Center,
155 
156     /// Cell content is aligned right.
157     Right,
158 }
159 
160 /// The details of a link's destination, or an image's source.
161 #[derive(Debug, Clone)]
162 pub struct NodeLink {
163     /// The URL for the link destination or image source.
164     pub url: Vec<u8>,
165 
166     /// The title for the link or image.
167     ///
168     /// Note this field is used for the `title` attribute by the HTML formatter even for images;
169     /// `alt` text is supplied in the image inline text.
170     pub title: Vec<u8>,
171 }
172 
173 /// The metadata of a list; the kind of list, the delimiter used and so on.
174 #[derive(Debug, Default, Clone, Copy)]
175 pub struct NodeList {
176     /// The kind of list (bullet (unordered) or ordered).
177     pub list_type: ListType,
178 
179     #[doc(hidden)]
180     pub marker_offset: usize,
181 
182     #[doc(hidden)]
183     pub padding: usize,
184 
185     /// For ordered lists, the ordinal the list starts at.
186     pub start: usize,
187 
188     /// For ordered lists, the delimiter after each number.
189     pub delimiter: ListDelimType,
190 
191     /// For bullet lists, the character used for each bullet.
192     pub bullet_char: u8,
193 
194     /// Whether the list is [tight](https://github.github.com/gfm/#tight), i.e. whether the
195     /// paragraphs are wrapped in `<p>` tags when formatted as HTML.
196     pub tight: bool,
197 }
198 
199 /// The metadata of a description list
200 #[derive(Debug, Default, Clone, Copy)]
201 pub struct NodeDescriptionItem {
202     #[doc(hidden)]
203     pub marker_offset: usize,
204 
205     #[doc(hidden)]
206     pub padding: usize,
207 }
208 
209 /// The type of list.
210 #[derive(Debug, Clone, Copy, PartialEq)]
211 pub enum ListType {
212     /// A bullet list, i.e. an unordered list.
213     Bullet,
214 
215     /// An ordered list.
216     Ordered,
217 }
218 
219 impl Default for ListType {
default() -> ListType220     fn default() -> ListType {
221         ListType::Bullet
222     }
223 }
224 
225 /// The delimiter for ordered lists, i.e. the character which appears after each number.
226 #[derive(Debug, Clone, Copy, PartialEq)]
227 pub enum ListDelimType {
228     /// A period character `.`.
229     Period,
230 
231     /// A paren character `)`.
232     Paren,
233 }
234 
235 impl Default for ListDelimType {
default() -> ListDelimType236     fn default() -> ListDelimType {
237         ListDelimType::Period
238     }
239 }
240 
241 /// The metadata and data of a code block (fenced or indented).
242 #[derive(Default, Debug, Clone)]
243 pub struct NodeCodeBlock {
244     /// Whether the code block is fenced.
245     pub fenced: bool,
246 
247     /// For fenced code blocks, the fence character itself (`` ` `` or `~`).
248     pub fence_char: u8,
249 
250     /// For fenced code blocks, the length of the fence.
251     pub fence_length: usize,
252 
253     #[doc(hidden)]
254     pub fence_offset: usize,
255 
256     /// For fenced code blocks, the [info string](https://github.github.com/gfm/#info-string) after
257     /// the opening fence, if any.
258     pub info: Vec<u8>,
259 
260     /// The literal contents of the code block.  As the contents are not interpreted as Markdown at
261     /// all, they are contained within this structure, rather than inserted into a child inline of
262     /// any kind.
263     pub literal: Vec<u8>,
264 }
265 
266 /// The metadata of a heading.
267 #[derive(Default, Debug, Clone, Copy)]
268 pub struct NodeHeading {
269     /// The level of the header; from 1 to 6 for ATX headings, 1 or 2 for setext headings.
270     pub level: u32,
271 
272     /// Whether the heading is setext (if not, ATX).
273     pub setext: bool,
274 }
275 
276 /// The metadata of an included HTML block.
277 #[derive(Debug, Default, Clone)]
278 pub struct NodeHtmlBlock {
279     #[doc(hidden)]
280     pub block_type: u8,
281 
282     /// The literal contents of the HTML block.  Per NodeCodeBlock, the content is included here
283     /// rather than in any inline.
284     pub literal: Vec<u8>,
285 }
286 
287 impl NodeValue {
288     /// Indicates whether this node is a block node or inline node.
block(&self) -> bool289     pub fn block(&self) -> bool {
290         match *self {
291             NodeValue::Document
292             | NodeValue::BlockQuote
293             | NodeValue::FootnoteDefinition(_)
294             | NodeValue::List(..)
295             | NodeValue::DescriptionList
296             | NodeValue::DescriptionItem(_)
297             | NodeValue::DescriptionTerm
298             | NodeValue::DescriptionDetails
299             | NodeValue::Item(..)
300             | NodeValue::CodeBlock(..)
301             | NodeValue::HtmlBlock(..)
302             | NodeValue::Paragraph
303             | NodeValue::Heading(..)
304             | NodeValue::ThematicBreak
305             | NodeValue::Table(..)
306             | NodeValue::TableRow(..)
307             | NodeValue::TableCell => true,
308             _ => false,
309         }
310     }
311 
312     #[doc(hidden)]
accepts_lines(&self) -> bool313     pub fn accepts_lines(&self) -> bool {
314         match *self {
315             NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::CodeBlock(..) => true,
316             _ => false,
317         }
318     }
319 
320     /// Indicates whether this node may contain inlines.
contains_inlines(&self) -> bool321     pub fn contains_inlines(&self) -> bool {
322         match *self {
323             NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::TableCell => true,
324             _ => false,
325         }
326     }
327 
328     /// Return a reference to the text of a `Text` inline, if this node is one.
329     ///
330     /// Convenience method.
text(&self) -> Option<&Vec<u8>>331     pub fn text(&self) -> Option<&Vec<u8>> {
332         match *self {
333             NodeValue::Text(ref t) => Some(t),
334             _ => None,
335         }
336     }
337 
338     /// Return a mutable reference to the text of a `Text` inline, if this node is one.
339     ///
340     /// Convenience method.
text_mut(&mut self) -> Option<&mut Vec<u8>>341     pub fn text_mut(&mut self) -> Option<&mut Vec<u8>> {
342         match *self {
343             NodeValue::Text(ref mut t) => Some(t),
344             _ => None,
345         }
346     }
347 }
348 
349 /// A single node in the CommonMark AST.
350 ///
351 /// The struct contains metadata about the node's position in the original document, and the core
352 /// enum, `NodeValue`.
353 #[derive(Debug, Clone)]
354 pub struct Ast {
355     /// The node value itself.
356     pub value: NodeValue,
357 
358     /// The line in the input document the node starts at.
359     pub start_line: u32,
360 
361     #[doc(hidden)]
362     pub content: Vec<u8>,
363     #[doc(hidden)]
364     pub open: bool,
365     #[doc(hidden)]
366     pub last_line_blank: bool,
367 }
368 
369 impl Ast {
370     /// Create a new AST node with the given value.
new(value: NodeValue) -> Self371     pub fn new(value: NodeValue) -> Self {
372         Ast {
373             value: value,
374             content: vec![],
375             start_line: 0,
376             open: true,
377             last_line_blank: false,
378         }
379     }
380 }
381 
382 /// The type of a node within the document.
383 ///
384 /// It is bound by the lifetime `'a`, which corresponds to the `Arena` nodes are allocated in.
385 /// `AstNode`s are almost handled as a reference itself bound by `'a`.  Child `Ast`s are wrapped in
386 /// `RefCell` for interior mutability.
387 ///
388 /// You can construct a new `AstNode` from a `NodeValue` using the `From` trait:
389 ///
390 /// ```no_run
391 /// # use comrak::nodes::{AstNode, NodeValue};
392 /// let root = AstNode::from(NodeValue::Document);
393 /// ```
394 pub type AstNode<'a> = Node<'a, RefCell<Ast>>;
395 
396 impl<'a> From<NodeValue> for AstNode<'a> {
397     /// Create a new AST node with the given value.
from(value: NodeValue) -> Self398     fn from(value: NodeValue) -> Self {
399         Node::new(RefCell::new(Ast::new(value)))
400     }
401 }
402 
403 #[doc(hidden)]
last_child_is_open<'a>(node: &'a AstNode<'a>) -> bool404 pub fn last_child_is_open<'a>(node: &'a AstNode<'a>) -> bool {
405     node.last_child().map_or(false, |n| n.data.borrow().open)
406 }
407 
408 #[doc(hidden)]
can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool409 pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
410     if let NodeValue::Document = *child {
411         return false;
412     }
413 
414     match node.data.borrow().value {
415         NodeValue::Document
416         | NodeValue::BlockQuote
417         | NodeValue::FootnoteDefinition(_)
418         | NodeValue::DescriptionTerm
419         | NodeValue::DescriptionDetails
420         | NodeValue::Item(..) => {
421             child.block() && match *child {
422                 NodeValue::Item(..) => false,
423                 _ => true,
424             }
425         }
426 
427         NodeValue::List(..) => match *child {
428             NodeValue::Item(..) => true,
429             _ => false,
430         },
431 
432         NodeValue::DescriptionList => match *child {
433             NodeValue::DescriptionItem(_) => true,
434             _ => false,
435         },
436 
437         NodeValue::DescriptionItem(_) => match *child {
438             NodeValue::DescriptionTerm | NodeValue::DescriptionDetails => true,
439             _ => false,
440         },
441 
442         NodeValue::Paragraph
443         | NodeValue::Heading(..)
444         | NodeValue::Emph
445         | NodeValue::Strong
446         | NodeValue::Link(..)
447         | NodeValue::Image(..) => !child.block(),
448 
449         NodeValue::Table(..) => match *child {
450             NodeValue::TableRow(..) => true,
451             _ => false,
452         },
453 
454         NodeValue::TableRow(..) => match *child {
455             NodeValue::TableCell => true,
456             _ => false,
457         },
458 
459         NodeValue::TableCell => match *child {
460             NodeValue::Text(..)
461             | NodeValue::Code(..)
462             | NodeValue::Emph
463             | NodeValue::Strong
464             | NodeValue::Link(..)
465             | NodeValue::Image(..)
466             | NodeValue::Strikethrough
467             | NodeValue::HtmlInline(..) => true,
468             _ => false,
469         },
470 
471         _ => false,
472     }
473 }
474 
475 #[doc(hidden)]
ends_with_blank_line<'a>(node: &'a AstNode<'a>) -> bool476 pub fn ends_with_blank_line<'a>(node: &'a AstNode<'a>) -> bool {
477     let mut it = Some(node);
478     while let Some(cur) = it {
479         if cur.data.borrow().last_line_blank {
480             return true;
481         }
482         match cur.data.borrow().value {
483             NodeValue::List(..) | NodeValue::Item(..) => it = cur.last_child(),
484             _ => it = None,
485         };
486     }
487     false
488 }
489 
490 #[doc(hidden)]
containing_block<'a>(node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>>491 pub fn containing_block<'a>(node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> {
492     let mut ch = Some(node);
493     while let Some(n) = ch {
494         if n.data.borrow().value.block() {
495             return Some(n);
496         }
497         ch = n.parent();
498     }
499     None
500 }
501