1 //! The CommonMark AST.
2
3 use arena_tree::Node;
4 use std::cell::RefCell;
5
6 /// The core AST node enum.
7 #[derive(Debug, Clone)]
8 pub enum NodeValue {
9 /// The root of every CommonMark document. Contains **blocks**.
10 Document,
11
12 /// **Block**. A [block quote](https://github.github.com/gfm/#block-quotes). Contains other
13 /// **blocks**.
14 ///
15 /// ``` md
16 /// > A block quote.
17 /// ```
18 BlockQuote,
19
20 /// **Block**. A [list](https://github.github.com/gfm/#lists). Contains
21 /// [list items](https://github.github.com/gfm/#list-items).
22 ///
23 /// ``` md
24 /// * An unordered list
25 /// * Another item
26 ///
27 /// 1. An ordered list
28 /// 2. Another item
29 /// ```
30 List(NodeList),
31
32 /// **Block**. A [list item](https://github.github.com/gfm/#list-items). Contains other
33 /// **blocks**.
34 Item(NodeList),
35
36 /// **Block**. A description list, enabled with `ext_description_lists` option. Contains
37 /// description items.
38 ///
39 /// It is required to put a blank line between terms and details.
40 ///
41 /// ``` md
42 /// Term 1
43 ///
44 /// : Details 1
45 ///
46 /// Term 2
47 ///
48 /// : Details 2
49 /// ```
50 DescriptionList,
51
52 /// *Block**. An item of a description list. Contains a term and one details block.
53 DescriptionItem(NodeDescriptionItem),
54
55 /// **Block**. Term of an item in a definition list.
56 DescriptionTerm,
57
58 /// **Block**. Details of an item in a definition list.
59 DescriptionDetails,
60
61 /// **Block**. A code block; may be [fenced](https://github.github.com/gfm/#fenced-code-blocks)
62 /// or [indented](https://github.github.com/gfm/#indented-code-blocks). Contains raw text
63 /// which is not parsed as Markdown, although is HTML escaped.
64 CodeBlock(NodeCodeBlock),
65
66 /// **Block**. A [HTML block](https://github.github.com/gfm/#html-blocks). Contains raw text
67 /// which is neither parsed as Markdown nor HTML escaped.
68 HtmlBlock(NodeHtmlBlock),
69
70 /// **Block**. A [paragraph](https://github.github.com/gfm/#paragraphs). Contains **inlines**.
71 Paragraph,
72
73 /// **Block**. A heading; may be an [ATX heading](https://github.github.com/gfm/#atx-headings)
74 /// or a [setext heading](https://github.github.com/gfm/#setext-headings). Contains
75 /// **inlines**.
76 Heading(NodeHeading),
77
78 /// **Block**. A [thematic break](https://github.github.com/gfm/#thematic-breaks). Has no
79 /// children.
80 ThematicBreak,
81
82 /// **Block**. A footnote definition. The `Vec<u8>` is the footnote's name.
83 /// Contains other **blocks**.
84 FootnoteDefinition(Vec<u8>),
85
86 /// **Block**. A [table](https://github.github.com/gfm/#tables-extension-) per the GFM spec.
87 /// Contains table rows.
88 Table(Vec<TableAlignment>),
89
90 /// **Block**. A table row. The `bool` represents whether the row is the header row or not.
91 /// Contains table cells.
92 TableRow(bool),
93
94 /// **Block**. A table cell. Contains **inlines**.
95 TableCell,
96
97 /// **Inline**. [Textual content](https://github.github.com/gfm/#textual-content). All text
98 /// in a document will be contained in a `Text` node.
99 Text(Vec<u8>),
100
101 /// **Inline**. [Task list item](https://github.github.com/gfm/#task-list-items-extension-). The
102 /// `bool` indicates whether it is checked or not.
103 TaskItem(bool),
104
105 /// **Inline**. A [soft line break](https://github.github.com/gfm/#soft-line-breaks). If
106 /// the `hardbreaks` option is set in `ComrakOptions` during formatting, it will be formatted
107 /// as a `LineBreak`.
108 SoftBreak,
109
110 /// **Inline**. A [hard line break](https://github.github.com/gfm/#hard-line-breaks).
111 LineBreak,
112
113 /// **Inline**. A [code span](https://github.github.com/gfm/#code-spans).
114 Code(Vec<u8>),
115
116 /// **Inline**. [Raw HTML](https://github.github.com/gfm/#raw-html) contained inline.
117 HtmlInline(Vec<u8>),
118
119 /// **Inline**. [Emphasised](https://github.github.com/gfm/#emphasis-and-strong-emphasis)
120 /// text.
121 Emph,
122
123 /// **Inline**. [Strong](https://github.github.com/gfm/#emphasis-and-strong-emphasis) text.
124 Strong,
125
126 /// **Inline**. [Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) text
127 /// per the GFM spec.
128 Strikethrough,
129
130 /// **Inline**. Superscript. Enabled with `ext_superscript` option.
131 Superscript,
132
133 /// **Inline**. A [link](https://github.github.com/gfm/#links) to some URL, with possible
134 /// title.
135 Link(NodeLink),
136
137 /// **Inline**. An [image](https://github.github.com/gfm/#images).
138 Image(NodeLink),
139
140 /// **Inline**. A footnote reference; the `Vec<u8>` is the referent footnote's name.
141 FootnoteReference(Vec<u8>),
142 }
143
144 /// Alignment of a single table cell.
145 #[derive(Debug, Copy, Clone)]
146 pub enum TableAlignment {
147 /// Cell content is unaligned.
148 None,
149
150 /// Cell content is aligned left.
151 Left,
152
153 /// Cell content is centered.
154 Center,
155
156 /// Cell content is aligned right.
157 Right,
158 }
159
160 /// The details of a link's destination, or an image's source.
161 #[derive(Debug, Clone)]
162 pub struct NodeLink {
163 /// The URL for the link destination or image source.
164 pub url: Vec<u8>,
165
166 /// The title for the link or image.
167 ///
168 /// Note this field is used for the `title` attribute by the HTML formatter even for images;
169 /// `alt` text is supplied in the image inline text.
170 pub title: Vec<u8>,
171 }
172
173 /// The metadata of a list; the kind of list, the delimiter used and so on.
174 #[derive(Debug, Default, Clone, Copy)]
175 pub struct NodeList {
176 /// The kind of list (bullet (unordered) or ordered).
177 pub list_type: ListType,
178
179 #[doc(hidden)]
180 pub marker_offset: usize,
181
182 #[doc(hidden)]
183 pub padding: usize,
184
185 /// For ordered lists, the ordinal the list starts at.
186 pub start: usize,
187
188 /// For ordered lists, the delimiter after each number.
189 pub delimiter: ListDelimType,
190
191 /// For bullet lists, the character used for each bullet.
192 pub bullet_char: u8,
193
194 /// Whether the list is [tight](https://github.github.com/gfm/#tight), i.e. whether the
195 /// paragraphs are wrapped in `<p>` tags when formatted as HTML.
196 pub tight: bool,
197 }
198
199 /// The metadata of a description list
200 #[derive(Debug, Default, Clone, Copy)]
201 pub struct NodeDescriptionItem {
202 #[doc(hidden)]
203 pub marker_offset: usize,
204
205 #[doc(hidden)]
206 pub padding: usize,
207 }
208
209 /// The type of list.
210 #[derive(Debug, Clone, Copy, PartialEq)]
211 pub enum ListType {
212 /// A bullet list, i.e. an unordered list.
213 Bullet,
214
215 /// An ordered list.
216 Ordered,
217 }
218
219 impl Default for ListType {
default() -> ListType220 fn default() -> ListType {
221 ListType::Bullet
222 }
223 }
224
225 /// The delimiter for ordered lists, i.e. the character which appears after each number.
226 #[derive(Debug, Clone, Copy, PartialEq)]
227 pub enum ListDelimType {
228 /// A period character `.`.
229 Period,
230
231 /// A paren character `)`.
232 Paren,
233 }
234
235 impl Default for ListDelimType {
default() -> ListDelimType236 fn default() -> ListDelimType {
237 ListDelimType::Period
238 }
239 }
240
241 /// The metadata and data of a code block (fenced or indented).
242 #[derive(Default, Debug, Clone)]
243 pub struct NodeCodeBlock {
244 /// Whether the code block is fenced.
245 pub fenced: bool,
246
247 /// For fenced code blocks, the fence character itself (`` ` `` or `~`).
248 pub fence_char: u8,
249
250 /// For fenced code blocks, the length of the fence.
251 pub fence_length: usize,
252
253 #[doc(hidden)]
254 pub fence_offset: usize,
255
256 /// For fenced code blocks, the [info string](https://github.github.com/gfm/#info-string) after
257 /// the opening fence, if any.
258 pub info: Vec<u8>,
259
260 /// The literal contents of the code block. As the contents are not interpreted as Markdown at
261 /// all, they are contained within this structure, rather than inserted into a child inline of
262 /// any kind.
263 pub literal: Vec<u8>,
264 }
265
266 /// The metadata of a heading.
267 #[derive(Default, Debug, Clone, Copy)]
268 pub struct NodeHeading {
269 /// The level of the header; from 1 to 6 for ATX headings, 1 or 2 for setext headings.
270 pub level: u32,
271
272 /// Whether the heading is setext (if not, ATX).
273 pub setext: bool,
274 }
275
276 /// The metadata of an included HTML block.
277 #[derive(Debug, Default, Clone)]
278 pub struct NodeHtmlBlock {
279 #[doc(hidden)]
280 pub block_type: u8,
281
282 /// The literal contents of the HTML block. Per NodeCodeBlock, the content is included here
283 /// rather than in any inline.
284 pub literal: Vec<u8>,
285 }
286
287 impl NodeValue {
288 /// Indicates whether this node is a block node or inline node.
block(&self) -> bool289 pub fn block(&self) -> bool {
290 match *self {
291 NodeValue::Document
292 | NodeValue::BlockQuote
293 | NodeValue::FootnoteDefinition(_)
294 | NodeValue::List(..)
295 | NodeValue::DescriptionList
296 | NodeValue::DescriptionItem(_)
297 | NodeValue::DescriptionTerm
298 | NodeValue::DescriptionDetails
299 | NodeValue::Item(..)
300 | NodeValue::CodeBlock(..)
301 | NodeValue::HtmlBlock(..)
302 | NodeValue::Paragraph
303 | NodeValue::Heading(..)
304 | NodeValue::ThematicBreak
305 | NodeValue::Table(..)
306 | NodeValue::TableRow(..)
307 | NodeValue::TableCell => true,
308 _ => false,
309 }
310 }
311
312 #[doc(hidden)]
accepts_lines(&self) -> bool313 pub fn accepts_lines(&self) -> bool {
314 match *self {
315 NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::CodeBlock(..) => true,
316 _ => false,
317 }
318 }
319
320 /// Indicates whether this node may contain inlines.
contains_inlines(&self) -> bool321 pub fn contains_inlines(&self) -> bool {
322 match *self {
323 NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::TableCell => true,
324 _ => false,
325 }
326 }
327
328 /// Return a reference to the text of a `Text` inline, if this node is one.
329 ///
330 /// Convenience method.
text(&self) -> Option<&Vec<u8>>331 pub fn text(&self) -> Option<&Vec<u8>> {
332 match *self {
333 NodeValue::Text(ref t) => Some(t),
334 _ => None,
335 }
336 }
337
338 /// Return a mutable reference to the text of a `Text` inline, if this node is one.
339 ///
340 /// Convenience method.
text_mut(&mut self) -> Option<&mut Vec<u8>>341 pub fn text_mut(&mut self) -> Option<&mut Vec<u8>> {
342 match *self {
343 NodeValue::Text(ref mut t) => Some(t),
344 _ => None,
345 }
346 }
347 }
348
349 /// A single node in the CommonMark AST.
350 ///
351 /// The struct contains metadata about the node's position in the original document, and the core
352 /// enum, `NodeValue`.
353 #[derive(Debug, Clone)]
354 pub struct Ast {
355 /// The node value itself.
356 pub value: NodeValue,
357
358 /// The line in the input document the node starts at.
359 pub start_line: u32,
360
361 #[doc(hidden)]
362 pub content: Vec<u8>,
363 #[doc(hidden)]
364 pub open: bool,
365 #[doc(hidden)]
366 pub last_line_blank: bool,
367 }
368
369 impl Ast {
370 /// Create a new AST node with the given value.
new(value: NodeValue) -> Self371 pub fn new(value: NodeValue) -> Self {
372 Ast {
373 value: value,
374 content: vec![],
375 start_line: 0,
376 open: true,
377 last_line_blank: false,
378 }
379 }
380 }
381
382 /// The type of a node within the document.
383 ///
384 /// It is bound by the lifetime `'a`, which corresponds to the `Arena` nodes are allocated in.
385 /// `AstNode`s are almost handled as a reference itself bound by `'a`. Child `Ast`s are wrapped in
386 /// `RefCell` for interior mutability.
387 ///
388 /// You can construct a new `AstNode` from a `NodeValue` using the `From` trait:
389 ///
390 /// ```no_run
391 /// # use comrak::nodes::{AstNode, NodeValue};
392 /// let root = AstNode::from(NodeValue::Document);
393 /// ```
394 pub type AstNode<'a> = Node<'a, RefCell<Ast>>;
395
396 impl<'a> From<NodeValue> for AstNode<'a> {
397 /// Create a new AST node with the given value.
from(value: NodeValue) -> Self398 fn from(value: NodeValue) -> Self {
399 Node::new(RefCell::new(Ast::new(value)))
400 }
401 }
402
403 #[doc(hidden)]
last_child_is_open<'a>(node: &'a AstNode<'a>) -> bool404 pub fn last_child_is_open<'a>(node: &'a AstNode<'a>) -> bool {
405 node.last_child().map_or(false, |n| n.data.borrow().open)
406 }
407
408 #[doc(hidden)]
can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool409 pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
410 if let NodeValue::Document = *child {
411 return false;
412 }
413
414 match node.data.borrow().value {
415 NodeValue::Document
416 | NodeValue::BlockQuote
417 | NodeValue::FootnoteDefinition(_)
418 | NodeValue::DescriptionTerm
419 | NodeValue::DescriptionDetails
420 | NodeValue::Item(..) => {
421 child.block() && match *child {
422 NodeValue::Item(..) => false,
423 _ => true,
424 }
425 }
426
427 NodeValue::List(..) => match *child {
428 NodeValue::Item(..) => true,
429 _ => false,
430 },
431
432 NodeValue::DescriptionList => match *child {
433 NodeValue::DescriptionItem(_) => true,
434 _ => false,
435 },
436
437 NodeValue::DescriptionItem(_) => match *child {
438 NodeValue::DescriptionTerm | NodeValue::DescriptionDetails => true,
439 _ => false,
440 },
441
442 NodeValue::Paragraph
443 | NodeValue::Heading(..)
444 | NodeValue::Emph
445 | NodeValue::Strong
446 | NodeValue::Link(..)
447 | NodeValue::Image(..) => !child.block(),
448
449 NodeValue::Table(..) => match *child {
450 NodeValue::TableRow(..) => true,
451 _ => false,
452 },
453
454 NodeValue::TableRow(..) => match *child {
455 NodeValue::TableCell => true,
456 _ => false,
457 },
458
459 NodeValue::TableCell => match *child {
460 NodeValue::Text(..)
461 | NodeValue::Code(..)
462 | NodeValue::Emph
463 | NodeValue::Strong
464 | NodeValue::Link(..)
465 | NodeValue::Image(..)
466 | NodeValue::Strikethrough
467 | NodeValue::HtmlInline(..) => true,
468 _ => false,
469 },
470
471 _ => false,
472 }
473 }
474
475 #[doc(hidden)]
ends_with_blank_line<'a>(node: &'a AstNode<'a>) -> bool476 pub fn ends_with_blank_line<'a>(node: &'a AstNode<'a>) -> bool {
477 let mut it = Some(node);
478 while let Some(cur) = it {
479 if cur.data.borrow().last_line_blank {
480 return true;
481 }
482 match cur.data.borrow().value {
483 NodeValue::List(..) | NodeValue::Item(..) => it = cur.last_child(),
484 _ => it = None,
485 };
486 }
487 false
488 }
489
490 #[doc(hidden)]
containing_block<'a>(node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>>491 pub fn containing_block<'a>(node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> {
492 let mut ch = Some(node);
493 while let Some(n) = ch {
494 if n.data.borrow().value.block() {
495 return Some(n);
496 }
497 ch = n.parent();
498 }
499 None
500 }
501