1 //! Convert HTML to text formats.
2 //!
3 //! This crate renders HTML into a text format, wrapped to a specified width.
4 //! This can either be plain text or with extra annotations to (for example)
5 //! show in a terminal which supports colours.
6 //!
7 //! # Examples
8 //!
9 //! ```rust
10 //! # use html2text::from_read;
11 //! let html = b"
12 //!        <ul>
13 //!          <li>Item one</li>
14 //!          <li>Item two</li>
15 //!          <li>Item three</li>
16 //!        </ul>";
17 //! assert_eq!(from_read(&html[..], 20),
18 //!            "\
19 //! * Item one
20 //! * Item two
21 //! * Item three
22 //! ");
23 //! ```
24 //! A couple of simple demonstration programs are included as examples:
25 //!
26 //! ### html2text
27 //!
28 //! The simplest example uses `from_read` to convert HTML on stdin into plain
29 //! text:
30 //!
31 //! ```sh
32 //! $ cargo run --example html2text < foo.html
33 //! [...]
34 //! ```
35 //!
36 //! ### html2term
37 //!
38 //! A very simple example of using the rich interface (`from_read_rich`) for a
39 //! slightly interactive console HTML viewer is provided as `html2term`.
40 //!
41 //! ```sh
42 //! $ cargo run --example html2term foo.html
43 //! [...]
44 //! ```
45 //!
46 //! Note that this example takes the HTML file as a parameter so that it can
47 //! read keys from stdin.
48 //!
49 
50 #![cfg_attr(feature = "clippy", feature(plugin))]
51 #![cfg_attr(feature = "clippy", plugin(clippy))]
52 #![deny(missing_docs)]
53 
54 #[macro_use]
55 extern crate html5ever;
56 extern crate unicode_width;
57 
58 #[macro_use]
59 mod macros;
60 
61 pub mod render;
62 
63 use render::text_renderer::{
64     PlainDecorator, RenderLine, RichAnnotation, RichDecorator, TaggedLine, TextDecorator,
65     TextRenderer,
66 };
67 use render::Renderer;
68 
69 use html5ever::driver::ParseOpts;
70 use html5ever::parse_document;
71 use html5ever::tendril::TendrilSink;
72 use html5ever::tree_builder::TreeBuilderOpts;
73 use markup5ever_rcdom::{
74     self, Handle,
75     NodeData::{Comment, Document, Element},
76     RcDom,
77 };
78 use std::cell::Cell;
79 use std::cmp::max;
80 use std::io;
81 use std::io::Write;
82 use std::iter::{once, repeat};
83 use std::ops::{Deref, DerefMut};
84 
85 /// A dummy writer which does nothing
86 struct Discard {}
87 impl Write for Discard {
write(&mut self, bytes: &[u8]) -> std::result::Result<usize, io::Error>88     fn write(&mut self, bytes: &[u8]) -> std::result::Result<usize, io::Error> {
89         Ok(bytes.len())
90     }
flush(&mut self) -> std::result::Result<(), io::Error>91     fn flush(&mut self) -> std::result::Result<(), io::Error> {
92         Ok(())
93     }
94 }
95 
96 const MIN_WIDTH: usize = 5;
97 
98 /// Size information/estimate
99 #[derive(Debug, Copy, Clone)]
100 pub struct SizeEstimate {
101     size: usize,      // Rough overall size
102     min_width: usize, // The narrowest possible
103 }
104 
105 impl Default for SizeEstimate {
default() -> SizeEstimate106     fn default() -> SizeEstimate {
107         SizeEstimate {
108             size: 0,
109             min_width: 0,
110         }
111     }
112 }
113 
114 impl SizeEstimate {
115     /// Combine two estimates into one (add size and widest required)
add(self, other: SizeEstimate) -> SizeEstimate116     pub fn add(self, other: SizeEstimate) -> SizeEstimate {
117         SizeEstimate {
118             size: self.size + other.size,
119             min_width: max(self.min_width, other.min_width),
120         }
121     }
122 }
123 
124 #[derive(Clone, Debug)]
125 /// Render tree table cell
126 pub struct RenderTableCell {
127     colspan: usize,
128     content: Vec<RenderNode>,
129     size_estimate: Cell<Option<SizeEstimate>>,
130     col_width: Option<usize>, // Actual width to use
131 }
132 
133 impl RenderTableCell {
134     /// Render this cell to a builder.
render<T: Write, R: Renderer>(&mut self, _builder: &mut R, _err_out: &mut T)135     pub fn render<T: Write, R: Renderer>(&mut self, _builder: &mut R, _err_out: &mut T) {
136         unimplemented!()
137         //render_tree_children_to_string(builder, &mut self.content, err_out)
138     }
139 
140     /// Calculate or return the estimate size of the cell
get_size_estimate(&self) -> SizeEstimate141     pub fn get_size_estimate(&self) -> SizeEstimate {
142         if self.size_estimate.get().is_none() {
143             let size = self
144                 .content
145                 .iter()
146                 .map(|node| node.get_size_estimate())
147                 .fold(Default::default(), SizeEstimate::add);
148             self.size_estimate.set(Some(size));
149         }
150         self.size_estimate.get().unwrap()
151     }
152 }
153 
154 #[derive(Clone, Debug)]
155 /// Render tree table row
156 pub struct RenderTableRow {
157     cells: Vec<RenderTableCell>,
158     col_sizes: Option<Vec<usize>>,
159 }
160 
161 impl RenderTableRow {
162     /// Return a mutable iterator over the cells.
cells(&self) -> std::slice::Iter<RenderTableCell>163     pub fn cells(&self) -> std::slice::Iter<RenderTableCell> {
164         self.cells.iter()
165     }
166     /// Return a mutable iterator over the cells.
cells_mut(&mut self) -> std::slice::IterMut<RenderTableCell>167     pub fn cells_mut(&mut self) -> std::slice::IterMut<RenderTableCell> {
168         self.cells.iter_mut()
169     }
170     /// Count the number of cells in the row.
171     /// Takes into account colspan.
num_cells(&self) -> usize172     pub fn num_cells(&self) -> usize {
173         self.cells.iter().map(|cell| cell.colspan).sum()
174     }
175     /// Return an iterator over (column, &cell)s, which
176     /// takes into account colspan.
cell_columns(&mut self) -> Vec<(usize, &mut RenderTableCell)>177     pub fn cell_columns(&mut self) -> Vec<(usize, &mut RenderTableCell)> {
178         let mut result = Vec::new();
179         let mut colno = 0;
180         for cell in &mut self.cells {
181             let colspan = cell.colspan;
182             result.push((colno, cell));
183             colno += colspan;
184         }
185         result
186     }
187 
188     /// Return the contained cells as RenderNodes, annotated with their
189     /// widths if available.  Skips cells with no width allocated.
into_cells(self) -> Vec<RenderNode>190     pub fn into_cells(self) -> Vec<RenderNode> {
191         let mut result = Vec::new();
192         let mut colno = 0;
193         let col_sizes = self.col_sizes.unwrap();
194         for mut cell in self.cells {
195             let colspan = cell.colspan;
196             let col_width: usize = col_sizes[colno..colno + cell.colspan].iter().sum();
197             if col_width > 1 {
198                 cell.col_width = Some(col_width - 1);
199                 result.push(RenderNode::new(RenderNodeInfo::TableCell(cell)));
200             }
201             colno += colspan;
202         }
203         result
204     }
205 }
206 
207 #[derive(Clone, Debug)]
208 /// A representation of a table render tree with metadata.
209 pub struct RenderTable {
210     rows: Vec<RenderTableRow>,
211     num_columns: usize,
212     size_estimate: Cell<Option<SizeEstimate>>,
213 }
214 
215 impl RenderTable {
216     /// Create a new RenderTable with the given rows
new(rows: Vec<RenderTableRow>) -> RenderTable217     pub fn new(rows: Vec<RenderTableRow>) -> RenderTable {
218         let num_columns = rows.iter().map(|r| r.num_cells()).max().unwrap_or(0);
219         RenderTable {
220             rows: rows,
221             num_columns: num_columns,
222             size_estimate: Cell::new(None),
223         }
224     }
225 
226     /// Return an iterator over the rows.
rows(&self) -> std::slice::Iter<RenderTableRow>227     pub fn rows(&self) -> std::slice::Iter<RenderTableRow> {
228         self.rows.iter()
229     }
230 
231     /// Return an iterator over the rows.
rows_mut(&mut self) -> std::slice::IterMut<RenderTableRow>232     pub fn rows_mut(&mut self) -> std::slice::IterMut<RenderTableRow> {
233         self.rows.iter_mut()
234     }
235     /// Consume this and return a Vec<RenderNode> containing the children;
236     /// the children know the column sizes required.
into_rows(self, col_sizes: Vec<usize>) -> Vec<RenderNode>237     pub fn into_rows(self, col_sizes: Vec<usize>) -> Vec<RenderNode> {
238         self.rows
239             .into_iter()
240             .map(|mut tr| {
241                 tr.col_sizes = Some(col_sizes.clone());
242                 RenderNode::new(RenderNodeInfo::TableRow(tr))
243             })
244             .collect()
245     }
246 
calc_size_estimate(&self)247     fn calc_size_estimate(&self) {
248         if self.num_columns == 0 {
249             self.size_estimate.set(Some(SizeEstimate {
250                 size: 0,
251                 min_width: 0,
252             }));
253             return;
254         }
255         let mut sizes: Vec<SizeEstimate> = vec![Default::default(); self.num_columns];
256 
257         // For now, a simple estimate based on adding up sub-parts.
258         for row in self.rows() {
259             let mut colno = 0usize;
260             for cell in row.cells() {
261                 let cellsize = cell.get_size_estimate();
262                 for colnum in 0..cell.colspan {
263                     sizes[colno + colnum].size += cellsize.size / cell.colspan;
264                     sizes[colno + colnum].min_width = max(
265                         sizes[colno + colnum].min_width / cell.colspan,
266                         cellsize.min_width,
267                     );
268                 }
269                 colno += cell.colspan;
270             }
271         }
272         let size = sizes.iter().map(|s| s.size).sum(); // Include borders?
273         let min_width = sizes.iter().map(|s| s.min_width).sum::<usize>() + self.num_columns - 1;
274         self.size_estimate.set(Some(SizeEstimate {
275             size: size,
276             min_width: min_width,
277         }));
278     }
279 
280     /// Calculate and store (or return stored value) of estimated size
get_size_estimate(&self) -> SizeEstimate281     pub fn get_size_estimate(&self) -> SizeEstimate {
282         if self.size_estimate.get().is_none() {
283             self.calc_size_estimate();
284         }
285         self.size_estimate.get().unwrap()
286     }
287 }
288 
289 /// The node-specific information distilled from the DOM.
290 #[derive(Clone, Debug)]
291 pub enum RenderNodeInfo {
292     /// Some text.
293     Text(String),
294     /// A group of nodes collected together.
295     Container(Vec<RenderNode>),
296     /// A link with contained nodes
297     Link(String, Vec<RenderNode>),
298     /// An emphasised region
299     Em(Vec<RenderNode>),
300     /// A strong region
301     Strong(Vec<RenderNode>),
302     /// A struck out region
303     Strikeout(Vec<RenderNode>),
304     /// A code region
305     Code(Vec<RenderNode>),
306     /// An image (title)
307     Img(String),
308     /// A block element with children
309     Block(Vec<RenderNode>),
310     /// A header (h1, h2, ...) with children
311     Header(usize, Vec<RenderNode>),
312     /// A Div element with children
313     Div(Vec<RenderNode>),
314     /// A preformatted region.
315     Pre(Vec<RenderNode>),
316     /// A blockquote
317     BlockQuote(Vec<RenderNode>),
318     /// An unordered list
319     Ul(Vec<RenderNode>),
320     /// An ordered list
321     Ol(i64, Vec<RenderNode>),
322     /// A description list (containing Dt or Dd)
323     Dl(Vec<RenderNode>),
324     /// A term (from a <dl>)
325     Dt(Vec<RenderNode>),
326     /// A definition (from a <dl>)
327     Dd(Vec<RenderNode>),
328     /// A line break
329     Break,
330     /// A table
331     Table(RenderTable),
332     /// A set of table rows (from either <thead> or <tbody>
333     TableBody(Vec<RenderTableRow>),
334     /// Table row (must only appear within a table body)
335     TableRow(RenderTableRow),
336     /// Table cell (must only appear within a table row)
337     TableCell(RenderTableCell),
338     /// Start of a named HTML fragment
339     FragStart(String),
340 }
341 
342 /// Common fields from a node.
343 #[derive(Clone, Debug)]
344 pub struct RenderNode {
345     size_estimate: Cell<Option<SizeEstimate>>,
346     info: RenderNodeInfo,
347 }
348 
349 impl RenderNode {
350     /// Create a node from the RenderNodeInfo.
new(info: RenderNodeInfo) -> RenderNode351     pub fn new(info: RenderNodeInfo) -> RenderNode {
352         RenderNode {
353             size_estimate: Cell::new(None),
354             info: info,
355         }
356     }
357 
358     /// Get a size estimate (~characters)
get_size_estimate(&self) -> SizeEstimate359     pub fn get_size_estimate(&self) -> SizeEstimate {
360         // If it's already calculated, then just return the answer.
361         if let Some(s) = self.size_estimate.get() {
362             return s;
363         };
364 
365         use RenderNodeInfo::*;
366 
367         // Otherwise, make an estimate.
368         let estimate = match self.info {
369             Text(ref t) | Img(ref t) => {
370                 let len = t.trim().len();
371                 SizeEstimate {
372                     size: len,
373                     min_width: if len > 0 { MIN_WIDTH } else { 0 },
374                 }
375             }
376 
377             Container(ref v)
378             | Link(_, ref v)
379             | Em(ref v)
380             | Strong(ref v)
381             | Strikeout(ref v)
382             | Code(ref v)
383             | Block(ref v)
384             | Div(ref v)
385             | Pre(ref v)
386             | BlockQuote(ref v)
387             | Dl(ref v)
388             | Dt(ref v)
389             | Dd(ref v)
390             | Ul(ref v)
391             | Ol(_, ref v) => v
392                 .iter()
393                 .map(RenderNode::get_size_estimate)
394                 .fold(Default::default(), SizeEstimate::add),
395             Header(level, ref v) => v
396                 .iter()
397                 .map(RenderNode::get_size_estimate)
398                 .fold(Default::default(), SizeEstimate::add)
399                 .add(SizeEstimate {
400                     size: 0,
401                     min_width: MIN_WIDTH + level + 2,
402                 }),
403             Break => SizeEstimate {
404                 size: 1,
405                 min_width: 1,
406             },
407             Table(ref t) => t.get_size_estimate(),
408             TableRow(_) | TableBody(_) | TableCell(_) => unimplemented!(),
409             FragStart(_) => Default::default(),
410         };
411         self.size_estimate.set(Some(estimate));
412         estimate
413     }
414 }
415 
precalc_size_estimate<'a>(node: &'a RenderNode) -> TreeMapResult<(), &'a RenderNode, ()>416 fn precalc_size_estimate<'a>(node: &'a RenderNode) -> TreeMapResult<(), &'a RenderNode, ()> {
417     use RenderNodeInfo::*;
418     if node.size_estimate.get().is_some() {
419         return TreeMapResult::Nothing;
420     }
421     match node.info {
422         Text(_) | Img(_) | Break | FragStart(_) => {
423             let _ = node.get_size_estimate();
424             TreeMapResult::Nothing
425         }
426 
427         Container(ref v)
428         | Link(_, ref v)
429         | Em(ref v)
430         | Strong(ref v)
431         | Strikeout(ref v)
432         | Code(ref v)
433         | Block(ref v)
434         | Div(ref v)
435         | Pre(ref v)
436         | BlockQuote(ref v)
437         | Ul(ref v)
438         | Ol(_, ref v)
439         | Dl(ref v)
440         | Dt(ref v)
441         | Dd(ref v)
442         | Header(_, ref v) => TreeMapResult::PendingChildren {
443             children: v.iter().collect(),
444             cons: Box::new(move |_, _cs| {
445                 node.get_size_estimate();
446                 None
447             }),
448             prefn: None,
449             postfn: None,
450         },
451         Table(ref t) => {
452             /* Return all the indirect children which are RenderNodes. */
453             let mut children = Vec::new();
454             for row in &t.rows {
455                 for cell in &row.cells {
456                     children.extend(cell.content.iter());
457                 }
458             }
459             TreeMapResult::PendingChildren {
460                 children: children,
461                 cons: Box::new(move |_, _cs| {
462                     node.get_size_estimate();
463                     None
464                 }),
465                 prefn: None,
466                 postfn: None,
467             }
468         }
469         TableRow(_) | TableBody(_) | TableCell(_) => unimplemented!(),
470     }
471 }
472 
473 /// Make a Vec of RenderNodes from the children of a node.
children_to_render_nodes<T: Write>(handle: Handle, err_out: &mut T) -> Vec<RenderNode>474 fn children_to_render_nodes<T: Write>(handle: Handle, err_out: &mut T) -> Vec<RenderNode> {
475     /* process children, but don't add anything */
476     let children = handle
477         .children
478         .borrow()
479         .iter()
480         .flat_map(|ch| dom_to_render_tree(ch.clone(), err_out))
481         .collect();
482     children
483 }
484 
485 /// Make a Vec of RenderNodes from the <li> children of a node.
list_children_to_render_nodes<T: Write>(handle: Handle, err_out: &mut T) -> Vec<RenderNode>486 fn list_children_to_render_nodes<T: Write>(handle: Handle, err_out: &mut T) -> Vec<RenderNode> {
487     let mut children = Vec::new();
488 
489     for child in handle.children.borrow().iter() {
490         match child.data {
491             Element { ref name, .. } => match name.expanded() {
492                 expanded_name!(html "li") => {
493                     let li_children = children_to_render_nodes(child.clone(), err_out);
494                     children.push(RenderNode::new(RenderNodeInfo::Block(li_children)));
495                 }
496                 _ => {}
497             },
498             Comment { .. } => {}
499             _ => {
500                 html_trace!("Unhandled in list: {:?}\n", child);
501             }
502         }
503     }
504     children
505 }
506 
507 /// Make a Vec of DtElements from the <dt> and <dd> children of a node.
desc_list_children_to_render_nodes<T: Write>( handle: Handle, err_out: &mut T, ) -> Vec<RenderNode>508 fn desc_list_children_to_render_nodes<T: Write>(
509     handle: Handle,
510     err_out: &mut T,
511 ) -> Vec<RenderNode> {
512     let mut children = Vec::new();
513 
514     for child in handle.children.borrow().iter() {
515         match child.data {
516             Element { ref name, .. } => match name.expanded() {
517                 expanded_name!(html "dt") => {
518                     let dt_children = children_to_render_nodes(child.clone(), err_out);
519                     children.push(RenderNode::new(RenderNodeInfo::Dt(dt_children)));
520                 }
521                 expanded_name!(html "dd") => {
522                     let dd_children = children_to_render_nodes(child.clone(), err_out);
523                     children.push(RenderNode::new(RenderNodeInfo::Dd(dd_children)));
524                 }
525                 _ => {}
526             },
527             Comment { .. } => {}
528             _ => {
529                 html_trace!("Unhandled in list: {:?}\n", child);
530             }
531         }
532     }
533     children
534 }
535 
536 /// Convert a table into a RenderNode
table_to_render_tree<'a, 'b, T: Write>( handle: Handle, _err_out: &'b mut T, ) -> TreeMapResult<'a, (), Handle, RenderNode>537 fn table_to_render_tree<'a, 'b, T: Write>(
538     handle: Handle,
539     _err_out: &'b mut T,
540 ) -> TreeMapResult<'a, (), Handle, RenderNode> {
541     pending(handle, |_, rowset| {
542         let mut rows = vec![];
543         for bodynode in rowset {
544             if let RenderNodeInfo::TableBody(body) = bodynode.info {
545                 rows.extend(body);
546             } else {
547                 html_trace!("Found in table: {:?}", bodynode.info);
548             }
549         }
550         Some(RenderNode::new(RenderNodeInfo::Table(RenderTable::new(
551             rows,
552         ))))
553     })
554 }
555 
556 /// Add rows from a thead or tbody.
tbody_to_render_tree<'a, 'b, T: Write>( handle: Handle, _err_out: &'b mut T, ) -> TreeMapResult<'a, (), Handle, RenderNode>557 fn tbody_to_render_tree<'a, 'b, T: Write>(
558     handle: Handle,
559     _err_out: &'b mut T,
560 ) -> TreeMapResult<'a, (), Handle, RenderNode> {
561     pending(handle, |_, rowchildren| {
562         let rows = rowchildren
563             .into_iter()
564             .flat_map(|rownode| {
565                 if let RenderNodeInfo::TableRow(row) = rownode.info {
566                     Some(row)
567                 } else {
568                     html_trace!("  [[tbody child: {:?}]]", rownode);
569                     None
570                 }
571             })
572             .collect();
573         Some(RenderNode::new(RenderNodeInfo::TableBody(rows)))
574     })
575 }
576 
577 /// Convert a table row to a RenderTableRow
tr_to_render_tree<'a, 'b, T: Write>( handle: Handle, _err_out: &'b mut T, ) -> TreeMapResult<'a, (), Handle, RenderNode>578 fn tr_to_render_tree<'a, 'b, T: Write>(
579     handle: Handle,
580     _err_out: &'b mut T,
581 ) -> TreeMapResult<'a, (), Handle, RenderNode> {
582     pending(handle, |_, cellnodes| {
583         let cells = cellnodes
584             .into_iter()
585             .flat_map(|cellnode| {
586                 if let RenderNodeInfo::TableCell(cell) = cellnode.info {
587                     Some(cell)
588                 } else {
589                     html_trace!("  [[tr child: {:?}]]", cellnode);
590                     None
591                 }
592             })
593             .collect();
594         Some(RenderNode::new(RenderNodeInfo::TableRow(RenderTableRow {
595             cells,
596             col_sizes: None,
597         })))
598     })
599 }
600 
601 /// Convert a single table cell to a render node.
td_to_render_tree<'a, 'b, T: Write>( handle: Handle, _err_out: &'b mut T, ) -> TreeMapResult<'a, (), Handle, RenderNode>602 fn td_to_render_tree<'a, 'b, T: Write>(
603     handle: Handle,
604     _err_out: &'b mut T,
605 ) -> TreeMapResult<'a, (), Handle, RenderNode> {
606     let mut colspan = 1;
607     if let Element { ref attrs, .. } = handle.data {
608         for attr in attrs.borrow().iter() {
609             if &attr.name.local == "colspan" {
610                 let v: &str = &*attr.value;
611                 colspan = v.parse().unwrap_or(1);
612             }
613         }
614     }
615     pending(handle, move |_, children| {
616         Some(RenderNode::new(RenderNodeInfo::TableCell(
617             RenderTableCell {
618                 colspan: colspan,
619                 content: children,
620                 size_estimate: Cell::new(None),
621                 col_width: None,
622             },
623         )))
624     })
625 }
626 
627 /// A reducer which combines results from mapping children into
628 /// the result for the current node.  Takes a context and a
629 /// vector of results and returns a new result (or nothing).
630 type ResultReducer<'a, C, R> = dyn Fn(&mut C, Vec<R>) -> Option<R> + 'a;
631 
632 /// A closure to call before processing a child node.
633 type ChildPreFn<C, N> = dyn Fn(&mut C, &N);
634 
635 /// A closure to call after processing a child node,
636 /// before adding the result to the processed results
637 /// vector.
638 type ChildPostFn<C, R> = dyn Fn(&mut C, &R);
639 
640 /// The result of trying to render one node.
641 enum TreeMapResult<'a, C, N, R> {
642     /// A completed result.
643     Finished(R),
644     /// Deferred completion - can be turned into a result
645     /// once the vector of children are processed.
646     PendingChildren {
647         children: Vec<N>,
648         cons: Box<ResultReducer<'a, C, R>>,
649         prefn: Option<Box<ChildPreFn<C, N>>>,
650         postfn: Option<Box<ChildPostFn<C, R>>>,
651     },
652     /// Nothing (e.g. a comment or other ignored element).
653     Nothing,
654 }
655 
tree_map_reduce<'a, C, N, R, M>(context: &mut C, top: N, mut process_node: M) -> Option<R> where M: for<'c> FnMut(&'c mut C, N) -> TreeMapResult<'a, C, N, R>,656 fn tree_map_reduce<'a, C, N, R, M>(context: &mut C, top: N, mut process_node: M) -> Option<R>
657 where
658     M: for<'c> FnMut(&'c mut C, N) -> TreeMapResult<'a, C, N, R>,
659 {
660     /// A node partially decoded, waiting for its children to
661     /// be processed.
662     struct PendingNode<'a, C, R, N> {
663         /// How to make the node once finished
664         construct: Box<ResultReducer<'a, C, R>>,
665         /// Called before processing each child
666         prefn: Option<Box<ChildPreFn<C, N>>>,
667         /// Called after processing each child
668         postfn: Option<Box<ChildPostFn<C, R>>>,
669         /// Children already processed
670         children: Vec<R>,
671         /// Iterator of child nodes not yet processed
672         to_process: std::vec::IntoIter<N>,
673     }
674 
675     let mut pending_stack = vec![PendingNode {
676         // We only expect one child, which we'll just return.
677         construct: Box::new(|_, mut cs| cs.pop()),
678         prefn: None,
679         postfn: None,
680         children: Vec::new(),
681         to_process: vec![top].into_iter(),
682     }];
683     loop {
684         // Get the next child node to process
685         let next_node = pending_stack.last_mut().unwrap().to_process.next();
686         if let Some(h) = next_node {
687             pending_stack
688                 .last_mut()
689                 .unwrap()
690                 .prefn
691                 .as_ref()
692                 .map(|ref f| f(context, &h));
693             match process_node(context, h) {
694                 TreeMapResult::Finished(result) => {
695                     pending_stack
696                         .last_mut()
697                         .unwrap()
698                         .postfn
699                         .as_ref()
700                         .map(|ref f| f(context, &result));
701                     pending_stack.last_mut().unwrap().children.push(result);
702                 }
703                 TreeMapResult::PendingChildren {
704                     children,
705                     cons,
706                     prefn,
707                     postfn,
708                 } => {
709                     pending_stack.push(PendingNode {
710                         construct: cons,
711                         prefn,
712                         postfn,
713                         children: Vec::new(),
714                         to_process: children.into_iter(),
715                     });
716                 }
717                 TreeMapResult::Nothing => {}
718             };
719         } else {
720             // No more children, so finally construct the parent.
721             let completed = pending_stack.pop().unwrap();
722             let reduced = (completed.construct)(context, completed.children);
723             if let Some(node) = reduced {
724                 if let Some(parent) = pending_stack.last_mut() {
725                     parent.postfn.as_ref().map(|ref f| f(context, &node));
726                     parent.children.push(node);
727                 } else {
728                     // Finished the whole stack!
729                     break Some(node);
730                 }
731             } else {
732                 /* Finished the stack, and have nothing */
733                 if pending_stack.is_empty() {
734                     break None;
735                 }
736             }
737         }
738     }
739 }
740 
741 /// Convert a DOM tree or subtree into a render tree.
dom_to_render_tree<T: Write>(handle: Handle, err_out: &mut T) -> Option<RenderNode>742 pub fn dom_to_render_tree<T: Write>(handle: Handle, err_out: &mut T) -> Option<RenderNode> {
743     html_trace!("### dom_to_render_tree: HTML: {:?}", handle);
744     let result = tree_map_reduce(&mut (), handle, |_, handle| {
745         process_dom_node(handle, err_out)
746     });
747 
748     html_trace!("### dom_to_render_tree: out= {:#?}", result);
749     result
750 }
751 
pending<'a, F>(handle: Handle, f: F) -> TreeMapResult<'a, (), Handle, RenderNode> where for<'r> F: Fn(&'r mut (), std::vec::Vec<RenderNode>) -> Option<RenderNode> + 'static,752 fn pending<'a, F>(handle: Handle, f: F) -> TreeMapResult<'a, (), Handle, RenderNode>
753 where
754     //for<'a> F: Fn(&'a mut C, Vec<RenderNode>) -> Option<RenderNode>+'static
755     for<'r> F: Fn(&'r mut (), std::vec::Vec<RenderNode>) -> Option<RenderNode> + 'static,
756 {
757     TreeMapResult::PendingChildren {
758         children: handle.children.borrow().clone(),
759         cons: Box::new(f),
760         prefn: None,
761         postfn: None,
762     }
763 }
764 
765 /// Prepend a FragmentStart (or analogous) marker to an existing
766 /// RenderNode.
prepend_marker(prefix: RenderNode, mut orig: RenderNode) -> RenderNode767 fn prepend_marker(prefix: RenderNode, mut orig: RenderNode) -> RenderNode {
768     use RenderNodeInfo::*;
769     html_trace!("prepend_marker({:?}, {:?})", prefix, orig);
770 
771     match orig.info {
772         // For block elements such as Block and Div, we need to insert
773         // the node at the front of their children array, otherwise
774         // the renderer is liable to drop the fragment start marker
775         // _before_ the new line indicating the end of the previous
776         // paragraph.
777         //
778         // For Container, we do the same thing just to make the data
779         // less pointlessly nested.
780         Block(ref mut children)
781         | Div(ref mut children)
782         | Pre(ref mut children)
783         | BlockQuote(ref mut children)
784         | Container(ref mut children)
785         | TableCell(RenderTableCell {
786             content: ref mut children,
787             ..
788         }) => {
789             children.insert(0, prefix);
790             // Now return orig, but we do that outside the match so
791             // that we've given back the borrowed ref 'children'.
792         }
793 
794         // For table rows and tables, push down if there's any content.
795         TableRow(ref mut rrow) => {
796             // If the row is empty, then there isn't really anything
797             // to attach the fragment start to.
798             if rrow.cells.len() > 0 {
799                 rrow.cells[0].content.insert(0, prefix);
800             }
801         }
802 
803         TableBody(ref mut rows) | Table(RenderTable { ref mut rows, .. }) => {
804             // If the row is empty, then there isn't really anything
805             // to attach the fragment start to.
806             if rows.len() > 0 {
807                 let rrow = &mut rows[0];
808                 if rrow.cells.len() > 0 {
809                     rrow.cells[0].content.insert(0, prefix);
810                 }
811             }
812         }
813 
814         // For anything else, just make a new Container with the
815         // prefix node and the original one.
816         _ => {
817             let result = RenderNode::new(Container(vec![prefix, orig]));
818             html_trace!("prepend_marker() -> {:?}", result);
819             return result;
820         }
821     }
822     html_trace!("prepend_marker() -> {:?}", &orig);
823     orig
824 }
825 
process_dom_node<'a, 'b, T: Write>( handle: Handle, err_out: &'b mut T, ) -> TreeMapResult<'a, (), Handle, RenderNode>826 fn process_dom_node<'a, 'b, T: Write>(
827     handle: Handle,
828     err_out: &'b mut T,
829 ) -> TreeMapResult<'a, (), Handle, RenderNode> {
830     use RenderNodeInfo::*;
831     use TreeMapResult::*;
832 
833     match handle.clone().data {
834         Document => pending(handle, |&mut (), cs| Some(RenderNode::new(Container(cs)))),
835         Comment { .. } => Nothing,
836         Element {
837             ref name,
838             ref attrs,
839             ..
840         } => {
841             let mut frag_from_name_attr = false;
842             let result = match name.expanded() {
843                 expanded_name!(html "html")
844                 | expanded_name!(html "span")
845                 | expanded_name!(html "body") => {
846                     /* process children, but don't add anything */
847                     pending(handle, |_, cs| Some(RenderNode::new(Container(cs))))
848                 }
849                 expanded_name!(html "link")
850                 | expanded_name!(html "meta")
851                 | expanded_name!(html "hr")
852                 | expanded_name!(html "script")
853                 | expanded_name!(html "style")
854                 | expanded_name!(html "head") => {
855                     /* Ignore the head and its children */
856                     Nothing
857                 }
858                 expanded_name!(html "a") => {
859                     let borrowed = attrs.borrow();
860                     let mut target = None;
861                     frag_from_name_attr = true;
862                     for attr in borrowed.iter() {
863                         if &attr.name.local == "href" {
864                             target = Some(&*attr.value);
865                             break;
866                         }
867                     }
868                     PendingChildren {
869                         children: handle.children.borrow().clone(),
870                         cons: if let Some(href) = target {
871                             // We need the closure to own the string it's going to use.
872                             // Unfortunately that means we ideally want FnOnce; but
873                             // that doesn't yet work in a Box.  Box<FnBox()> does, but
874                             // is unstable.  So we'll just move a string in and clone
875                             // it on use.
876                             let href: String = href.into();
877                             Box::new(move |_, cs| Some(RenderNode::new(Link(href.clone(), cs))))
878                         } else {
879                             Box::new(|_, cs| Some(RenderNode::new(Container(cs))))
880                         },
881                         prefn: None,
882                         postfn: None,
883                     }
884                 }
885                 expanded_name!(html "em") => pending(handle, |_, cs| Some(RenderNode::new(Em(cs)))),
886                 expanded_name!(html "strong") => {
887                     pending(handle, |_, cs| Some(RenderNode::new(Strong(cs))))
888                 }
889                 expanded_name!(html "s") => {
890                     pending(handle, |_, cs| Some(RenderNode::new(Strikeout(cs))))
891                 }
892                 expanded_name!(html "code") => {
893                     pending(handle, |_, cs| Some(RenderNode::new(Code(cs))))
894                 }
895                 expanded_name!(html "img") => {
896                     let borrowed = attrs.borrow();
897                     let mut title = None;
898                     for attr in borrowed.iter() {
899                         if &attr.name.local == "alt" {
900                             title = Some(&*attr.value);
901                             break;
902                         }
903                     }
904                     if let Some(title) = title {
905                         Finished(RenderNode::new(Img(title.into())))
906                     } else {
907                         Nothing
908                     }
909                 }
910                 expanded_name!(html "h1")
911                 | expanded_name!(html "h2")
912                 | expanded_name!(html "h3")
913                 | expanded_name!(html "h4") => {
914                     let level: usize = name.local[1..].parse().unwrap();
915                     pending(handle, move |_, cs| {
916                         Some(RenderNode::new(Header(level, cs)))
917                     })
918                 }
919                 expanded_name!(html "p") => {
920                     pending(handle, |_, cs| Some(RenderNode::new(Block(cs))))
921                 }
922                 expanded_name!(html "div") => {
923                     pending(handle, |_, cs| Some(RenderNode::new(Div(cs))))
924                 }
925                 expanded_name!(html "pre") => {
926                     pending(handle, |_, cs| Some(RenderNode::new(Pre(cs))))
927                 }
928                 expanded_name!(html "br") => Finished(RenderNode::new(Break)),
929                 expanded_name!(html "table") => table_to_render_tree(handle.clone(), err_out),
930                 expanded_name!(html "thead") | expanded_name!(html "tbody") => {
931                     tbody_to_render_tree(handle.clone(), err_out)
932                 }
933                 expanded_name!(html "tr") => tr_to_render_tree(handle.clone(), err_out),
934                 expanded_name!(html "th") | expanded_name!(html "td") => {
935                     td_to_render_tree(handle.clone(), err_out)
936                 }
937                 expanded_name!(html "blockquote") => {
938                     pending(handle, |_, cs| Some(RenderNode::new(BlockQuote(cs))))
939                 }
940                 expanded_name!(html "ul") => Finished(RenderNode::new(Ul(
941                     list_children_to_render_nodes(handle.clone(), err_out),
942                 ))),
943                 expanded_name!(html "ol") => {
944                     let borrowed = attrs.borrow();
945                     let mut start = 1;
946                     for attr in borrowed.iter() {
947                         if &attr.name.local == "start" {
948                             start = attr.value.parse().ok().unwrap_or(1);
949                             break;
950                         }
951                     }
952 
953                     Finished(RenderNode::new(Ol(
954                         start,
955                         list_children_to_render_nodes(handle.clone(), err_out),
956                     )))
957                 }
958                 expanded_name!(html "dl") => Finished(RenderNode::new(Dl(
959                     desc_list_children_to_render_nodes(handle.clone(), err_out),
960                 ))),
961                 _ => {
962                     html_trace!("Unhandled element: {:?}\n", name.local);
963                     pending(handle, |_, cs| Some(RenderNode::new(Container(cs))))
964                     //None
965                 }
966             };
967 
968             let mut fragment = None;
969             let borrowed = attrs.borrow();
970             for attr in borrowed.iter() {
971                 if &attr.name.local == "id" || (frag_from_name_attr && &attr.name.local == "name") {
972                     fragment = Some(attr.value.to_string());
973                     break;
974                 }
975             }
976 
977             if let Some(fragname) = fragment {
978                 match result {
979                     Finished(node) => {
980                         Finished(prepend_marker(RenderNode::new(FragStart(fragname)), node))
981                     }
982                     Nothing => Finished(RenderNode::new(FragStart(fragname))),
983                     PendingChildren {
984                         children,
985                         cons,
986                         prefn,
987                         postfn,
988                     } => {
989                         let fragname: String = fragname.into();
990                         PendingChildren {
991                             children: children,
992                             prefn: prefn,
993                             postfn: postfn,
994                             cons: Box::new(move |ctx, ch| {
995                                 let fragnode = RenderNode::new(FragStart(fragname.clone()));
996                                 match cons(ctx, ch) {
997                                     None => Some(fragnode),
998                                     Some(node) => Some(prepend_marker(fragnode, node)),
999                                 }
1000                             }),
1001                         }
1002                     }
1003                 }
1004             } else {
1005                 result
1006             }
1007         }
1008         markup5ever_rcdom::NodeData::Text { contents: ref tstr } => {
1009             Finished(RenderNode::new(Text((&*tstr.borrow()).into())))
1010         }
1011         _ => {
1012             // NodeData doesn't have a Debug impl.
1013             write!(err_out, "Unhandled node type.\n").unwrap();
1014             Nothing
1015         }
1016     }
1017 }
1018 
1019 /// Context to use during tree parsing.
1020 /// This mainly gives access to a Renderer, but needs to be able to push
1021 /// new ones on for nested structures.
1022 struct BuilderStack<R: Renderer> {
1023     builders: Vec<R>,
1024 }
1025 
1026 impl<R: Renderer> BuilderStack<R> {
new(builder: R) -> BuilderStack<R>1027     pub fn new(builder: R) -> BuilderStack<R> {
1028         BuilderStack {
1029             builders: vec![builder],
1030         }
1031     }
1032 
1033     /// Push a new builder onto the stack
push(&mut self, builder: R)1034     pub fn push(&mut self, builder: R) {
1035         self.builders.push(builder);
1036     }
1037 
1038     /// Pop off the top builder and return it.
1039     /// Panics if empty
pop(&mut self) -> R1040     pub fn pop(&mut self) -> R {
1041         self.builders.pop().unwrap()
1042     }
1043 
1044     /// Pop off the only builder and return it.
1045     /// panics if there aren't exactly 1 available.
into_inner(mut self) -> R1046     pub fn into_inner(mut self) -> R {
1047         assert_eq!(self.builders.len(), 1);
1048         self.builders.pop().unwrap()
1049     }
1050 }
1051 
1052 impl<R: Renderer> Deref for BuilderStack<R> {
1053     type Target = R;
deref(&self) -> &R1054     fn deref(&self) -> &R {
1055         self.builders.last().expect("Underflow in BuilderStack")
1056     }
1057 }
1058 
1059 impl<R: Renderer> DerefMut for BuilderStack<R> {
deref_mut(&mut self) -> &mut R1060     fn deref_mut(&mut self) -> &mut R {
1061         self.builders.last_mut().expect("Underflow in BuilderStack")
1062     }
1063 }
1064 
render_tree_to_string<T: Write, R: Renderer>( builder: R, tree: RenderNode, err_out: &mut T, ) -> R1065 fn render_tree_to_string<T: Write, R: Renderer>(
1066     builder: R,
1067     tree: RenderNode,
1068     err_out: &mut T,
1069 ) -> R {
1070     /* Phase 1: get size estimates. */
1071     tree_map_reduce(&mut (), &tree, |_, node| precalc_size_estimate(&node));
1072 
1073     /* Phase 2: actually render. */
1074     let mut bs = BuilderStack::new(builder);
1075     tree_map_reduce(&mut bs, tree, |builders, node| {
1076         do_render_node(builders, node, err_out)
1077     });
1078     bs.into_inner()
1079 }
1080 
pending2< 'a, R: Renderer, F: Fn(&mut BuilderStack<R>, Vec<Option<R>>) -> Option<Option<R>> + 'static, >( children: Vec<RenderNode>, f: F, ) -> TreeMapResult<'a, BuilderStack<R>, RenderNode, Option<R>>1081 fn pending2<
1082     'a,
1083     R: Renderer,
1084     F: Fn(&mut BuilderStack<R>, Vec<Option<R>>) -> Option<Option<R>> + 'static,
1085 >(
1086     children: Vec<RenderNode>,
1087     f: F,
1088 ) -> TreeMapResult<'a, BuilderStack<R>, RenderNode, Option<R>> {
1089     TreeMapResult::PendingChildren {
1090         children: children,
1091         cons: Box::new(f),
1092         prefn: None,
1093         postfn: None,
1094     }
1095 }
1096 
do_render_node<'a, 'b, T: Write, R: Renderer>( builder: &mut BuilderStack<R>, tree: RenderNode, err_out: &'b mut T, ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>>1097 fn do_render_node<'a, 'b, T: Write, R: Renderer>(
1098     builder: &mut BuilderStack<R>,
1099     tree: RenderNode,
1100     err_out: &'b mut T,
1101 ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>> {
1102     html_trace!("do_render_node({:?}", tree);
1103     use RenderNodeInfo::*;
1104     use TreeMapResult::*;
1105     match tree.info {
1106         Text(ref tstr) => {
1107             builder.add_inline_text(tstr);
1108             Finished(None)
1109         }
1110         Container(children) => pending2(children, |_, _| Some(None)),
1111         Link(href, children) => {
1112             builder.start_link(&href);
1113             pending2(children, |builder: &mut BuilderStack<R>, _| {
1114                 builder.end_link();
1115                 Some(None)
1116             })
1117         }
1118         Em(children) => {
1119             builder.start_emphasis();
1120             pending2(children, |builder: &mut BuilderStack<R>, _| {
1121                 builder.end_emphasis();
1122                 Some(None)
1123             })
1124         }
1125         Strong(children) => {
1126             builder.start_strong();
1127             pending2(children, |builder: &mut BuilderStack<R>, _| {
1128                 builder.end_strong();
1129                 Some(None)
1130             })
1131         }
1132         Strikeout(children) => {
1133             builder.start_strikeout();
1134             pending2(children, |builder: &mut BuilderStack<R>, _| {
1135                 builder.end_strikeout();
1136                 Some(None)
1137             })
1138         }
1139         Code(children) => {
1140             builder.start_code();
1141             pending2(children, |builder: &mut BuilderStack<R>, _| {
1142                 builder.end_code();
1143                 Some(None)
1144             })
1145         }
1146         Img(title) => {
1147             builder.add_image(&title);
1148             Finished(None)
1149         }
1150         Block(children) => {
1151             builder.start_block();
1152             pending2(children, |builder: &mut BuilderStack<R>, _| {
1153                 builder.end_block();
1154                 Some(None)
1155             })
1156         }
1157         Header(level, children) => {
1158             let min_width = max(builder.width(), 1 + level + 1);
1159             let sub_builder = builder.new_sub_renderer(min_width - (1 + level));
1160             builder.push(sub_builder);
1161             pending2(children, move |builder: &mut BuilderStack<R>, _| {
1162                 let sub_builder = builder.pop();
1163 
1164                 let qs: String = "#".repeat(level) + " ";
1165 
1166                 builder.start_block();
1167                 builder.append_subrender(sub_builder, repeat(&qs[..]));
1168                 builder.end_block();
1169                 Some(None)
1170             })
1171         }
1172         Div(children) => {
1173             builder.new_line();
1174             pending2(children, |builder: &mut BuilderStack<R>, _| {
1175                 builder.new_line();
1176                 Some(None)
1177             })
1178         }
1179         Pre(children) => {
1180             builder.new_line();
1181             builder.start_pre();
1182             pending2(children, |builder: &mut BuilderStack<R>, _| {
1183                 builder.new_line();
1184                 builder.end_pre();
1185                 Some(None)
1186             })
1187         }
1188         BlockQuote(children) => {
1189             let sub_builder = builder.new_sub_renderer(builder.width() - 2);
1190             builder.push(sub_builder);
1191             pending2(children, |builder: &mut BuilderStack<R>, _| {
1192                 let sub_builder = builder.pop();
1193 
1194                 builder.start_block();
1195                 builder.append_subrender(sub_builder, repeat("> "));
1196                 builder.end_block();
1197                 Some(None)
1198             })
1199         }
1200         Ul(items) => {
1201             builder.start_block();
1202 
1203             TreeMapResult::PendingChildren {
1204                 children: items,
1205                 cons: Box::new(|_, _| Some(None)),
1206                 prefn: Some(Box::new(|builder: &mut BuilderStack<R>, _| {
1207                     let sub_builder = builder.new_sub_renderer(builder.width() - 2);
1208                     builder.push(sub_builder);
1209                 })),
1210                 postfn: Some(Box::new(|builder: &mut BuilderStack<R>, _| {
1211                     let sub_builder = builder.pop();
1212                     builder.append_subrender(sub_builder, once("* ").chain(repeat("  ")));
1213                 })),
1214             }
1215         }
1216         Ol(start, items) => {
1217             builder.start_block();
1218 
1219             let num_items = items.len();
1220 
1221             // The prefix width could be at either end if the start is negative.
1222             let min_number = start;
1223             // Assumption: num_items can't overflow isize.
1224             let max_number = start + (num_items as i64) - 1;
1225             let prefix_width_min = format!("{}", min_number).len() + 2;
1226             let prefix_width_max = format!("{}", max_number).len() + 2;
1227             let prefix_width = max(prefix_width_min, prefix_width_max);
1228             let prefixn = format!("{: <width$}", "", width = prefix_width);
1229             let i: Cell<_> = Cell::new(start);
1230 
1231             TreeMapResult::PendingChildren {
1232                 children: items,
1233                 cons: Box::new(|_, _| Some(None)),
1234                 prefn: Some(Box::new(move |builder: &mut BuilderStack<R>, _| {
1235                     let sub_builder = builder.new_sub_renderer(builder.width() - prefix_width);
1236                     builder.push(sub_builder);
1237                 })),
1238                 postfn: Some(Box::new(move |builder: &mut BuilderStack<R>, _| {
1239                     let sub_builder = builder.pop();
1240                     let prefix1 = format!("{}.", i.get());
1241                     let prefix1 = format!("{: <width$}", prefix1, width = prefix_width);
1242 
1243                     builder.append_subrender(
1244                         sub_builder,
1245                         once(prefix1.as_str()).chain(repeat(prefixn.as_str())),
1246                     );
1247                     i.set(i.get() + 1);
1248                 })),
1249             }
1250         }
1251         Dl(items) => {
1252             builder.start_block();
1253 
1254             TreeMapResult::PendingChildren {
1255                 children: items,
1256                 cons: Box::new(|_, _| Some(None)),
1257                 prefn: None,
1258                 postfn: None,
1259             }
1260         }
1261         Dt(children) => {
1262             builder.new_line();
1263             builder.start_emphasis();
1264             pending2(children, |builder: &mut BuilderStack<R>, _| {
1265                 builder.end_emphasis();
1266                 Some(None)
1267             })
1268         }
1269         Dd(children) => {
1270             let sub_builder = builder.new_sub_renderer(builder.width() - 2);
1271             builder.push(sub_builder);
1272             pending2(children, |builder: &mut BuilderStack<R>, _| {
1273                 let sub_builder = builder.pop();
1274                 builder.append_subrender(sub_builder, repeat("  "));
1275                 Some(None)
1276             })
1277         }
1278         Break => {
1279             builder.new_line_hard();
1280             Finished(None)
1281         }
1282         Table(tab) => render_table_tree(builder.deref_mut(), tab, err_out),
1283         TableRow(row) => render_table_row(builder.deref_mut(), row, err_out),
1284         TableBody(_) => unimplemented!("Unexpected TableBody while rendering"),
1285         TableCell(cell) => render_table_cell(builder.deref_mut(), cell, err_out),
1286         FragStart(fragname) => {
1287             builder.record_frag_start(&fragname);
1288             Finished(None)
1289         }
1290     }
1291 }
1292 
render_table_tree<T: Write, R: Renderer>( builder: &mut R, table: RenderTable, _err_out: &mut T, ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>>1293 fn render_table_tree<T: Write, R: Renderer>(
1294     builder: &mut R,
1295     table: RenderTable,
1296     _err_out: &mut T,
1297 ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>> {
1298     /* Now lay out the table. */
1299     let num_columns = table.num_columns;
1300 
1301     /* Heuristic: scale the column widths according to how much content there is. */
1302     let mut col_sizes: Vec<SizeEstimate> = vec![Default::default(); num_columns];
1303 
1304     for row in table.rows() {
1305         let mut colno = 0;
1306         for cell in row.cells() {
1307             // FIXME: get_size_estimate is still recursive.
1308             let mut estimate = cell.get_size_estimate();
1309             // If the cell has a colspan>1, then spread its size between the
1310             // columns.
1311             estimate.size /= cell.colspan;
1312             estimate.min_width /= cell.colspan;
1313             for i in 0..cell.colspan {
1314                 col_sizes[colno + i] = (col_sizes[colno + i]).add(estimate);
1315             }
1316             colno += cell.colspan;
1317         }
1318     }
1319     let tot_size: usize = col_sizes.iter().map(|est| est.size).sum();
1320     let width = builder.width();
1321     let mut col_widths: Vec<usize> = col_sizes
1322         .iter()
1323         .map(|sz| {
1324             if sz.size == 0 {
1325                 0
1326             } else {
1327                 max(sz.size * width / tot_size, sz.min_width)
1328             }
1329         })
1330         .collect();
1331     /* The minimums may have put the total width too high */
1332     while col_widths.iter().cloned().sum::<usize>() > width {
1333         let (i, _) = col_widths
1334             .iter()
1335             .cloned()
1336             .enumerate()
1337             .max_by_key(|&(colno, width)| {
1338                 (
1339                     width.saturating_sub(col_sizes[colno].min_width),
1340                     width,
1341                     usize::max_value() - colno,
1342                 )
1343             })
1344             .unwrap();
1345         col_widths[i] -= 1;
1346     }
1347     if !col_widths.is_empty() {
1348         // Slight fudge; we're not drawing extreme edges, so one of the columns
1349         // can gets a free character cell from not having a border.
1350         // make it the last.
1351         let last = col_widths.len() - 1;
1352         col_widths[last] += 1;
1353     }
1354 
1355     builder.start_block();
1356 
1357     builder.add_horizontal_border();
1358 
1359     TreeMapResult::PendingChildren {
1360         children: table.into_rows(col_widths),
1361         cons: Box::new(|_, _| Some(None)),
1362         prefn: Some(Box::new(|_, _| {})),
1363         postfn: Some(Box::new(|_, _| {})),
1364     }
1365 }
1366 
render_table_row<T: Write, R: Renderer>( _builder: &mut R, row: RenderTableRow, _err_out: &mut T, ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>>1367 fn render_table_row<T: Write, R: Renderer>(
1368     _builder: &mut R,
1369     row: RenderTableRow,
1370     _err_out: &mut T,
1371 ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>> {
1372     TreeMapResult::PendingChildren {
1373         children: row.into_cells(),
1374         cons: Box::new(|builders, children| {
1375             let children: Vec<_> = children.into_iter().map(Option::unwrap).collect();
1376             if children.iter().any(|c| !c.empty()) {
1377                 builders.append_columns_with_borders(children, true);
1378             }
1379             Some(None)
1380         }),
1381         prefn: Some(Box::new(|builder: &mut BuilderStack<R>, node| {
1382             if let RenderNodeInfo::TableCell(ref cell) = node.info {
1383                 let sub_builder = builder.new_sub_renderer(cell.col_width.unwrap());
1384                 builder.push(sub_builder);
1385             } else {
1386                 panic!()
1387             }
1388         })),
1389         postfn: Some(Box::new(|_builder: &mut BuilderStack<R>, _| {})),
1390     }
1391 }
1392 
render_table_cell<T: Write, R: Renderer>( _builder: &mut R, cell: RenderTableCell, _err_out: &mut T, ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>>1393 fn render_table_cell<T: Write, R: Renderer>(
1394     _builder: &mut R,
1395     cell: RenderTableCell,
1396     _err_out: &mut T,
1397 ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>> {
1398     pending2(cell.content, |builder: &mut BuilderStack<R>, _| {
1399         let sub_builder = builder.pop();
1400         Some(Some(sub_builder))
1401     })
1402 }
1403 
1404 /// The structure of an HTML document that can be rendered using a [`TextDecorator`][].
1405 ///
1406 /// [`TextDecorator`]: render/text_renderer/trait.TextDecorator.html
1407 
1408 #[derive(Clone, Debug)]
1409 pub struct RenderTree(RenderNode);
1410 
1411 impl RenderTree {
1412     /// Render this document using the given `decorator` and wrap it to `width` columns.
render<D: TextDecorator>(self, width: usize, decorator: D) -> RenderedText<D>1413     pub fn render<D: TextDecorator>(self, width: usize, decorator: D) -> RenderedText<D> {
1414         let builder = TextRenderer::new(width, decorator);
1415         let builder = render_tree_to_string(builder, self.0, &mut Discard {});
1416         RenderedText(builder)
1417     }
1418 
1419     /// Render this document as plain text using the [`PlainDecorator`][] and wrap it to `width`
1420     /// columns.
1421     ///
1422     /// [`PlainDecorator`]: render/text_renderer/struct.PlainDecorator.html
render_plain(self, width: usize) -> RenderedText<PlainDecorator>1423     pub fn render_plain(self, width: usize) -> RenderedText<PlainDecorator> {
1424         self.render(width, PlainDecorator::new())
1425     }
1426 
1427     /// Render this document as rich text using the [`RichDecorator`][] and wrap it to `width`
1428     /// columns.
1429     ///
1430     /// [`RichDecorator`]: render/text_renderer/struct.RichDecorator.html
render_rich(self, width: usize) -> RenderedText<RichDecorator>1431     pub fn render_rich(self, width: usize) -> RenderedText<RichDecorator> {
1432         self.render(width, RichDecorator::new())
1433     }
1434 }
1435 
1436 /// A rendered HTML document.
1437 pub struct RenderedText<D: TextDecorator>(TextRenderer<D>);
1438 
1439 impl<D: TextDecorator> RenderedText<D> {
1440     /// Convert the rendered HTML document to a string.
into_string(self) -> String1441     pub fn into_string(self) -> String {
1442         self.0.into_string()
1443     }
1444 
1445     /// Convert the rendered HTML document to a vector of lines with the annotations created by the
1446     /// decorator.
into_lines(self) -> Vec<TaggedLine<Vec<D::Annotation>>>1447     pub fn into_lines(self) -> Vec<TaggedLine<Vec<D::Annotation>>> {
1448         self.0
1449             .into_lines()
1450             .into_iter()
1451             .map(RenderLine::into_tagged_line)
1452             .collect()
1453     }
1454 }
1455 
1456 /// Reads and parses HTML from `input` and prepares a render tree.
parse(mut input: impl io::Read) -> RenderTree1457 pub fn parse(mut input: impl io::Read) -> RenderTree {
1458     let opts = ParseOpts {
1459         tree_builder: TreeBuilderOpts {
1460             drop_doctype: true,
1461             ..Default::default()
1462         },
1463         ..Default::default()
1464     };
1465     let dom = parse_document(RcDom::default(), opts)
1466         .from_utf8()
1467         .read_from(&mut input)
1468         .unwrap();
1469     let render_tree = dom_to_render_tree(dom.document.clone(), &mut Discard {}).unwrap();
1470     RenderTree(render_tree)
1471 }
1472 
1473 /// Reads HTML from `input`, decorates it using `decorator`, and
1474 /// returns a `String` with text wrapped to `width` columns.
from_read_with_decorator<R, D>(input: R, width: usize, decorator: D) -> String where R: io::Read, D: TextDecorator,1475 pub fn from_read_with_decorator<R, D>(input: R, width: usize, decorator: D) -> String
1476 where
1477     R: io::Read,
1478     D: TextDecorator,
1479 {
1480     parse(input).render(width, decorator).into_string()
1481 }
1482 
1483 /// Reads HTML from `input`, and returns a `String` with text wrapped to
1484 /// `width` columns.
from_read<R>(input: R, width: usize) -> String where R: io::Read,1485 pub fn from_read<R>(input: R, width: usize) -> String
1486 where
1487     R: io::Read,
1488 {
1489     let decorator = PlainDecorator::new();
1490     from_read_with_decorator(input, width, decorator)
1491 }
1492 
1493 /// Reads HTML from `input`, and returns text wrapped to `width` columns.
1494 /// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
1495 /// of `RichAnnotation`.  The "outer" annotation comes first in the `Vec`.
from_read_rich<R>(input: R, width: usize) -> Vec<TaggedLine<Vec<RichAnnotation>>> where R: io::Read,1496 pub fn from_read_rich<R>(input: R, width: usize) -> Vec<TaggedLine<Vec<RichAnnotation>>>
1497 where
1498     R: io::Read,
1499 {
1500     parse(input)
1501         .render(width, RichDecorator::new())
1502         .into_lines()
1503 }
1504 
1505 #[cfg(test)]
1506 mod tests;
1507