1 //! Convert HTML to text formats.
2 //!
3 //! This crate renders HTML into a text format, wrapped to a specified width.
4 //! This can either be plain text or with extra annotations to (for example)
5 //! show in a terminal which supports colours.
6 //!
7 //! # Examples
8 //!
9 //! ```rust
10 //! # use html2text::from_read;
11 //! let html = b"
12 //! <ul>
13 //! <li>Item one</li>
14 //! <li>Item two</li>
15 //! <li>Item three</li>
16 //! </ul>";
17 //! assert_eq!(from_read(&html[..], 20),
18 //! "\
19 //! * Item one
20 //! * Item two
21 //! * Item three
22 //! ");
23 //! ```
24 //! A couple of simple demonstration programs are included as examples:
25 //!
26 //! ### html2text
27 //!
28 //! The simplest example uses `from_read` to convert HTML on stdin into plain
29 //! text:
30 //!
31 //! ```sh
32 //! $ cargo run --example html2text < foo.html
33 //! [...]
34 //! ```
35 //!
36 //! ### html2term
37 //!
38 //! A very simple example of using the rich interface (`from_read_rich`) for a
39 //! slightly interactive console HTML viewer is provided as `html2term`.
40 //!
41 //! ```sh
42 //! $ cargo run --example html2term foo.html
43 //! [...]
44 //! ```
45 //!
46 //! Note that this example takes the HTML file as a parameter so that it can
47 //! read keys from stdin.
48 //!
49
50 #![cfg_attr(feature = "clippy", feature(plugin))]
51 #![cfg_attr(feature = "clippy", plugin(clippy))]
52 #![deny(missing_docs)]
53
54 #[macro_use]
55 extern crate html5ever;
56 extern crate unicode_width;
57
58 #[macro_use]
59 mod macros;
60
61 pub mod render;
62
63 use render::text_renderer::{
64 PlainDecorator, RenderLine, RichAnnotation, RichDecorator, TaggedLine, TextDecorator,
65 TextRenderer,
66 };
67 use render::Renderer;
68
69 use html5ever::driver::ParseOpts;
70 use html5ever::parse_document;
71 use html5ever::tendril::TendrilSink;
72 use html5ever::tree_builder::TreeBuilderOpts;
73 use markup5ever_rcdom::{
74 self, Handle,
75 NodeData::{Comment, Document, Element},
76 RcDom,
77 };
78 use std::cell::Cell;
79 use std::cmp::max;
80 use std::io;
81 use std::io::Write;
82 use std::iter::{once, repeat};
83 use std::ops::{Deref, DerefMut};
84
85 /// A dummy writer which does nothing
86 struct Discard {}
87 impl Write for Discard {
write(&mut self, bytes: &[u8]) -> std::result::Result<usize, io::Error>88 fn write(&mut self, bytes: &[u8]) -> std::result::Result<usize, io::Error> {
89 Ok(bytes.len())
90 }
flush(&mut self) -> std::result::Result<(), io::Error>91 fn flush(&mut self) -> std::result::Result<(), io::Error> {
92 Ok(())
93 }
94 }
95
96 const MIN_WIDTH: usize = 5;
97
98 /// Size information/estimate
99 #[derive(Debug, Copy, Clone)]
100 pub struct SizeEstimate {
101 size: usize, // Rough overall size
102 min_width: usize, // The narrowest possible
103 }
104
105 impl Default for SizeEstimate {
default() -> SizeEstimate106 fn default() -> SizeEstimate {
107 SizeEstimate {
108 size: 0,
109 min_width: 0,
110 }
111 }
112 }
113
114 impl SizeEstimate {
115 /// Combine two estimates into one (add size and widest required)
add(self, other: SizeEstimate) -> SizeEstimate116 pub fn add(self, other: SizeEstimate) -> SizeEstimate {
117 SizeEstimate {
118 size: self.size + other.size,
119 min_width: max(self.min_width, other.min_width),
120 }
121 }
122 }
123
124 #[derive(Clone, Debug)]
125 /// Render tree table cell
126 pub struct RenderTableCell {
127 colspan: usize,
128 content: Vec<RenderNode>,
129 size_estimate: Cell<Option<SizeEstimate>>,
130 col_width: Option<usize>, // Actual width to use
131 }
132
133 impl RenderTableCell {
134 /// Render this cell to a builder.
render<T: Write, R: Renderer>(&mut self, _builder: &mut R, _err_out: &mut T)135 pub fn render<T: Write, R: Renderer>(&mut self, _builder: &mut R, _err_out: &mut T) {
136 unimplemented!()
137 //render_tree_children_to_string(builder, &mut self.content, err_out)
138 }
139
140 /// Calculate or return the estimate size of the cell
get_size_estimate(&self) -> SizeEstimate141 pub fn get_size_estimate(&self) -> SizeEstimate {
142 if self.size_estimate.get().is_none() {
143 let size = self
144 .content
145 .iter()
146 .map(|node| node.get_size_estimate())
147 .fold(Default::default(), SizeEstimate::add);
148 self.size_estimate.set(Some(size));
149 }
150 self.size_estimate.get().unwrap()
151 }
152 }
153
154 #[derive(Clone, Debug)]
155 /// Render tree table row
156 pub struct RenderTableRow {
157 cells: Vec<RenderTableCell>,
158 col_sizes: Option<Vec<usize>>,
159 }
160
161 impl RenderTableRow {
162 /// Return a mutable iterator over the cells.
cells(&self) -> std::slice::Iter<RenderTableCell>163 pub fn cells(&self) -> std::slice::Iter<RenderTableCell> {
164 self.cells.iter()
165 }
166 /// Return a mutable iterator over the cells.
cells_mut(&mut self) -> std::slice::IterMut<RenderTableCell>167 pub fn cells_mut(&mut self) -> std::slice::IterMut<RenderTableCell> {
168 self.cells.iter_mut()
169 }
170 /// Count the number of cells in the row.
171 /// Takes into account colspan.
num_cells(&self) -> usize172 pub fn num_cells(&self) -> usize {
173 self.cells.iter().map(|cell| cell.colspan).sum()
174 }
175 /// Return an iterator over (column, &cell)s, which
176 /// takes into account colspan.
cell_columns(&mut self) -> Vec<(usize, &mut RenderTableCell)>177 pub fn cell_columns(&mut self) -> Vec<(usize, &mut RenderTableCell)> {
178 let mut result = Vec::new();
179 let mut colno = 0;
180 for cell in &mut self.cells {
181 let colspan = cell.colspan;
182 result.push((colno, cell));
183 colno += colspan;
184 }
185 result
186 }
187
188 /// Return the contained cells as RenderNodes, annotated with their
189 /// widths if available. Skips cells with no width allocated.
into_cells(self) -> Vec<RenderNode>190 pub fn into_cells(self) -> Vec<RenderNode> {
191 let mut result = Vec::new();
192 let mut colno = 0;
193 let col_sizes = self.col_sizes.unwrap();
194 for mut cell in self.cells {
195 let colspan = cell.colspan;
196 let col_width: usize = col_sizes[colno..colno + cell.colspan].iter().sum();
197 if col_width > 1 {
198 cell.col_width = Some(col_width - 1);
199 result.push(RenderNode::new(RenderNodeInfo::TableCell(cell)));
200 }
201 colno += colspan;
202 }
203 result
204 }
205 }
206
207 #[derive(Clone, Debug)]
208 /// A representation of a table render tree with metadata.
209 pub struct RenderTable {
210 rows: Vec<RenderTableRow>,
211 num_columns: usize,
212 size_estimate: Cell<Option<SizeEstimate>>,
213 }
214
215 impl RenderTable {
216 /// Create a new RenderTable with the given rows
new(rows: Vec<RenderTableRow>) -> RenderTable217 pub fn new(rows: Vec<RenderTableRow>) -> RenderTable {
218 let num_columns = rows.iter().map(|r| r.num_cells()).max().unwrap_or(0);
219 RenderTable {
220 rows: rows,
221 num_columns: num_columns,
222 size_estimate: Cell::new(None),
223 }
224 }
225
226 /// Return an iterator over the rows.
rows(&self) -> std::slice::Iter<RenderTableRow>227 pub fn rows(&self) -> std::slice::Iter<RenderTableRow> {
228 self.rows.iter()
229 }
230
231 /// Return an iterator over the rows.
rows_mut(&mut self) -> std::slice::IterMut<RenderTableRow>232 pub fn rows_mut(&mut self) -> std::slice::IterMut<RenderTableRow> {
233 self.rows.iter_mut()
234 }
235 /// Consume this and return a Vec<RenderNode> containing the children;
236 /// the children know the column sizes required.
into_rows(self, col_sizes: Vec<usize>) -> Vec<RenderNode>237 pub fn into_rows(self, col_sizes: Vec<usize>) -> Vec<RenderNode> {
238 self.rows
239 .into_iter()
240 .map(|mut tr| {
241 tr.col_sizes = Some(col_sizes.clone());
242 RenderNode::new(RenderNodeInfo::TableRow(tr))
243 })
244 .collect()
245 }
246
calc_size_estimate(&self)247 fn calc_size_estimate(&self) {
248 if self.num_columns == 0 {
249 self.size_estimate.set(Some(SizeEstimate {
250 size: 0,
251 min_width: 0,
252 }));
253 return;
254 }
255 let mut sizes: Vec<SizeEstimate> = vec![Default::default(); self.num_columns];
256
257 // For now, a simple estimate based on adding up sub-parts.
258 for row in self.rows() {
259 let mut colno = 0usize;
260 for cell in row.cells() {
261 let cellsize = cell.get_size_estimate();
262 for colnum in 0..cell.colspan {
263 sizes[colno + colnum].size += cellsize.size / cell.colspan;
264 sizes[colno + colnum].min_width = max(
265 sizes[colno + colnum].min_width / cell.colspan,
266 cellsize.min_width,
267 );
268 }
269 colno += cell.colspan;
270 }
271 }
272 let size = sizes.iter().map(|s| s.size).sum(); // Include borders?
273 let min_width = sizes.iter().map(|s| s.min_width).sum::<usize>() + self.num_columns - 1;
274 self.size_estimate.set(Some(SizeEstimate {
275 size: size,
276 min_width: min_width,
277 }));
278 }
279
280 /// Calculate and store (or return stored value) of estimated size
get_size_estimate(&self) -> SizeEstimate281 pub fn get_size_estimate(&self) -> SizeEstimate {
282 if self.size_estimate.get().is_none() {
283 self.calc_size_estimate();
284 }
285 self.size_estimate.get().unwrap()
286 }
287 }
288
289 /// The node-specific information distilled from the DOM.
290 #[derive(Clone, Debug)]
291 pub enum RenderNodeInfo {
292 /// Some text.
293 Text(String),
294 /// A group of nodes collected together.
295 Container(Vec<RenderNode>),
296 /// A link with contained nodes
297 Link(String, Vec<RenderNode>),
298 /// An emphasised region
299 Em(Vec<RenderNode>),
300 /// A strong region
301 Strong(Vec<RenderNode>),
302 /// A struck out region
303 Strikeout(Vec<RenderNode>),
304 /// A code region
305 Code(Vec<RenderNode>),
306 /// An image (title)
307 Img(String),
308 /// A block element with children
309 Block(Vec<RenderNode>),
310 /// A header (h1, h2, ...) with children
311 Header(usize, Vec<RenderNode>),
312 /// A Div element with children
313 Div(Vec<RenderNode>),
314 /// A preformatted region.
315 Pre(Vec<RenderNode>),
316 /// A blockquote
317 BlockQuote(Vec<RenderNode>),
318 /// An unordered list
319 Ul(Vec<RenderNode>),
320 /// An ordered list
321 Ol(i64, Vec<RenderNode>),
322 /// A description list (containing Dt or Dd)
323 Dl(Vec<RenderNode>),
324 /// A term (from a <dl>)
325 Dt(Vec<RenderNode>),
326 /// A definition (from a <dl>)
327 Dd(Vec<RenderNode>),
328 /// A line break
329 Break,
330 /// A table
331 Table(RenderTable),
332 /// A set of table rows (from either <thead> or <tbody>
333 TableBody(Vec<RenderTableRow>),
334 /// Table row (must only appear within a table body)
335 TableRow(RenderTableRow),
336 /// Table cell (must only appear within a table row)
337 TableCell(RenderTableCell),
338 /// Start of a named HTML fragment
339 FragStart(String),
340 }
341
342 /// Common fields from a node.
343 #[derive(Clone, Debug)]
344 pub struct RenderNode {
345 size_estimate: Cell<Option<SizeEstimate>>,
346 info: RenderNodeInfo,
347 }
348
349 impl RenderNode {
350 /// Create a node from the RenderNodeInfo.
new(info: RenderNodeInfo) -> RenderNode351 pub fn new(info: RenderNodeInfo) -> RenderNode {
352 RenderNode {
353 size_estimate: Cell::new(None),
354 info: info,
355 }
356 }
357
358 /// Get a size estimate (~characters)
get_size_estimate(&self) -> SizeEstimate359 pub fn get_size_estimate(&self) -> SizeEstimate {
360 // If it's already calculated, then just return the answer.
361 if let Some(s) = self.size_estimate.get() {
362 return s;
363 };
364
365 use RenderNodeInfo::*;
366
367 // Otherwise, make an estimate.
368 let estimate = match self.info {
369 Text(ref t) | Img(ref t) => {
370 let len = t.trim().len();
371 SizeEstimate {
372 size: len,
373 min_width: if len > 0 { MIN_WIDTH } else { 0 },
374 }
375 }
376
377 Container(ref v)
378 | Link(_, ref v)
379 | Em(ref v)
380 | Strong(ref v)
381 | Strikeout(ref v)
382 | Code(ref v)
383 | Block(ref v)
384 | Div(ref v)
385 | Pre(ref v)
386 | BlockQuote(ref v)
387 | Dl(ref v)
388 | Dt(ref v)
389 | Dd(ref v)
390 | Ul(ref v)
391 | Ol(_, ref v) => v
392 .iter()
393 .map(RenderNode::get_size_estimate)
394 .fold(Default::default(), SizeEstimate::add),
395 Header(level, ref v) => v
396 .iter()
397 .map(RenderNode::get_size_estimate)
398 .fold(Default::default(), SizeEstimate::add)
399 .add(SizeEstimate {
400 size: 0,
401 min_width: MIN_WIDTH + level + 2,
402 }),
403 Break => SizeEstimate {
404 size: 1,
405 min_width: 1,
406 },
407 Table(ref t) => t.get_size_estimate(),
408 TableRow(_) | TableBody(_) | TableCell(_) => unimplemented!(),
409 FragStart(_) => Default::default(),
410 };
411 self.size_estimate.set(Some(estimate));
412 estimate
413 }
414 }
415
precalc_size_estimate<'a>(node: &'a RenderNode) -> TreeMapResult<(), &'a RenderNode, ()>416 fn precalc_size_estimate<'a>(node: &'a RenderNode) -> TreeMapResult<(), &'a RenderNode, ()> {
417 use RenderNodeInfo::*;
418 if node.size_estimate.get().is_some() {
419 return TreeMapResult::Nothing;
420 }
421 match node.info {
422 Text(_) | Img(_) | Break | FragStart(_) => {
423 let _ = node.get_size_estimate();
424 TreeMapResult::Nothing
425 }
426
427 Container(ref v)
428 | Link(_, ref v)
429 | Em(ref v)
430 | Strong(ref v)
431 | Strikeout(ref v)
432 | Code(ref v)
433 | Block(ref v)
434 | Div(ref v)
435 | Pre(ref v)
436 | BlockQuote(ref v)
437 | Ul(ref v)
438 | Ol(_, ref v)
439 | Dl(ref v)
440 | Dt(ref v)
441 | Dd(ref v)
442 | Header(_, ref v) => TreeMapResult::PendingChildren {
443 children: v.iter().collect(),
444 cons: Box::new(move |_, _cs| {
445 node.get_size_estimate();
446 None
447 }),
448 prefn: None,
449 postfn: None,
450 },
451 Table(ref t) => {
452 /* Return all the indirect children which are RenderNodes. */
453 let mut children = Vec::new();
454 for row in &t.rows {
455 for cell in &row.cells {
456 children.extend(cell.content.iter());
457 }
458 }
459 TreeMapResult::PendingChildren {
460 children: children,
461 cons: Box::new(move |_, _cs| {
462 node.get_size_estimate();
463 None
464 }),
465 prefn: None,
466 postfn: None,
467 }
468 }
469 TableRow(_) | TableBody(_) | TableCell(_) => unimplemented!(),
470 }
471 }
472
473 /// Make a Vec of RenderNodes from the children of a node.
children_to_render_nodes<T: Write>(handle: Handle, err_out: &mut T) -> Vec<RenderNode>474 fn children_to_render_nodes<T: Write>(handle: Handle, err_out: &mut T) -> Vec<RenderNode> {
475 /* process children, but don't add anything */
476 let children = handle
477 .children
478 .borrow()
479 .iter()
480 .flat_map(|ch| dom_to_render_tree(ch.clone(), err_out))
481 .collect();
482 children
483 }
484
485 /// Make a Vec of RenderNodes from the <li> children of a node.
list_children_to_render_nodes<T: Write>(handle: Handle, err_out: &mut T) -> Vec<RenderNode>486 fn list_children_to_render_nodes<T: Write>(handle: Handle, err_out: &mut T) -> Vec<RenderNode> {
487 let mut children = Vec::new();
488
489 for child in handle.children.borrow().iter() {
490 match child.data {
491 Element { ref name, .. } => match name.expanded() {
492 expanded_name!(html "li") => {
493 let li_children = children_to_render_nodes(child.clone(), err_out);
494 children.push(RenderNode::new(RenderNodeInfo::Block(li_children)));
495 }
496 _ => {}
497 },
498 Comment { .. } => {}
499 _ => {
500 html_trace!("Unhandled in list: {:?}\n", child);
501 }
502 }
503 }
504 children
505 }
506
507 /// Make a Vec of DtElements from the <dt> and <dd> children of a node.
desc_list_children_to_render_nodes<T: Write>( handle: Handle, err_out: &mut T, ) -> Vec<RenderNode>508 fn desc_list_children_to_render_nodes<T: Write>(
509 handle: Handle,
510 err_out: &mut T,
511 ) -> Vec<RenderNode> {
512 let mut children = Vec::new();
513
514 for child in handle.children.borrow().iter() {
515 match child.data {
516 Element { ref name, .. } => match name.expanded() {
517 expanded_name!(html "dt") => {
518 let dt_children = children_to_render_nodes(child.clone(), err_out);
519 children.push(RenderNode::new(RenderNodeInfo::Dt(dt_children)));
520 }
521 expanded_name!(html "dd") => {
522 let dd_children = children_to_render_nodes(child.clone(), err_out);
523 children.push(RenderNode::new(RenderNodeInfo::Dd(dd_children)));
524 }
525 _ => {}
526 },
527 Comment { .. } => {}
528 _ => {
529 html_trace!("Unhandled in list: {:?}\n", child);
530 }
531 }
532 }
533 children
534 }
535
536 /// Convert a table into a RenderNode
table_to_render_tree<'a, 'b, T: Write>( handle: Handle, _err_out: &'b mut T, ) -> TreeMapResult<'a, (), Handle, RenderNode>537 fn table_to_render_tree<'a, 'b, T: Write>(
538 handle: Handle,
539 _err_out: &'b mut T,
540 ) -> TreeMapResult<'a, (), Handle, RenderNode> {
541 pending(handle, |_, rowset| {
542 let mut rows = vec![];
543 for bodynode in rowset {
544 if let RenderNodeInfo::TableBody(body) = bodynode.info {
545 rows.extend(body);
546 } else {
547 html_trace!("Found in table: {:?}", bodynode.info);
548 }
549 }
550 Some(RenderNode::new(RenderNodeInfo::Table(RenderTable::new(
551 rows,
552 ))))
553 })
554 }
555
556 /// Add rows from a thead or tbody.
tbody_to_render_tree<'a, 'b, T: Write>( handle: Handle, _err_out: &'b mut T, ) -> TreeMapResult<'a, (), Handle, RenderNode>557 fn tbody_to_render_tree<'a, 'b, T: Write>(
558 handle: Handle,
559 _err_out: &'b mut T,
560 ) -> TreeMapResult<'a, (), Handle, RenderNode> {
561 pending(handle, |_, rowchildren| {
562 let rows = rowchildren
563 .into_iter()
564 .flat_map(|rownode| {
565 if let RenderNodeInfo::TableRow(row) = rownode.info {
566 Some(row)
567 } else {
568 html_trace!(" [[tbody child: {:?}]]", rownode);
569 None
570 }
571 })
572 .collect();
573 Some(RenderNode::new(RenderNodeInfo::TableBody(rows)))
574 })
575 }
576
577 /// Convert a table row to a RenderTableRow
tr_to_render_tree<'a, 'b, T: Write>( handle: Handle, _err_out: &'b mut T, ) -> TreeMapResult<'a, (), Handle, RenderNode>578 fn tr_to_render_tree<'a, 'b, T: Write>(
579 handle: Handle,
580 _err_out: &'b mut T,
581 ) -> TreeMapResult<'a, (), Handle, RenderNode> {
582 pending(handle, |_, cellnodes| {
583 let cells = cellnodes
584 .into_iter()
585 .flat_map(|cellnode| {
586 if let RenderNodeInfo::TableCell(cell) = cellnode.info {
587 Some(cell)
588 } else {
589 html_trace!(" [[tr child: {:?}]]", cellnode);
590 None
591 }
592 })
593 .collect();
594 Some(RenderNode::new(RenderNodeInfo::TableRow(RenderTableRow {
595 cells,
596 col_sizes: None,
597 })))
598 })
599 }
600
601 /// Convert a single table cell to a render node.
td_to_render_tree<'a, 'b, T: Write>( handle: Handle, _err_out: &'b mut T, ) -> TreeMapResult<'a, (), Handle, RenderNode>602 fn td_to_render_tree<'a, 'b, T: Write>(
603 handle: Handle,
604 _err_out: &'b mut T,
605 ) -> TreeMapResult<'a, (), Handle, RenderNode> {
606 let mut colspan = 1;
607 if let Element { ref attrs, .. } = handle.data {
608 for attr in attrs.borrow().iter() {
609 if &attr.name.local == "colspan" {
610 let v: &str = &*attr.value;
611 colspan = v.parse().unwrap_or(1);
612 }
613 }
614 }
615 pending(handle, move |_, children| {
616 Some(RenderNode::new(RenderNodeInfo::TableCell(
617 RenderTableCell {
618 colspan: colspan,
619 content: children,
620 size_estimate: Cell::new(None),
621 col_width: None,
622 },
623 )))
624 })
625 }
626
627 /// A reducer which combines results from mapping children into
628 /// the result for the current node. Takes a context and a
629 /// vector of results and returns a new result (or nothing).
630 type ResultReducer<'a, C, R> = dyn Fn(&mut C, Vec<R>) -> Option<R> + 'a;
631
632 /// A closure to call before processing a child node.
633 type ChildPreFn<C, N> = dyn Fn(&mut C, &N);
634
635 /// A closure to call after processing a child node,
636 /// before adding the result to the processed results
637 /// vector.
638 type ChildPostFn<C, R> = dyn Fn(&mut C, &R);
639
640 /// The result of trying to render one node.
641 enum TreeMapResult<'a, C, N, R> {
642 /// A completed result.
643 Finished(R),
644 /// Deferred completion - can be turned into a result
645 /// once the vector of children are processed.
646 PendingChildren {
647 children: Vec<N>,
648 cons: Box<ResultReducer<'a, C, R>>,
649 prefn: Option<Box<ChildPreFn<C, N>>>,
650 postfn: Option<Box<ChildPostFn<C, R>>>,
651 },
652 /// Nothing (e.g. a comment or other ignored element).
653 Nothing,
654 }
655
tree_map_reduce<'a, C, N, R, M>(context: &mut C, top: N, mut process_node: M) -> Option<R> where M: for<'c> FnMut(&'c mut C, N) -> TreeMapResult<'a, C, N, R>,656 fn tree_map_reduce<'a, C, N, R, M>(context: &mut C, top: N, mut process_node: M) -> Option<R>
657 where
658 M: for<'c> FnMut(&'c mut C, N) -> TreeMapResult<'a, C, N, R>,
659 {
660 /// A node partially decoded, waiting for its children to
661 /// be processed.
662 struct PendingNode<'a, C, R, N> {
663 /// How to make the node once finished
664 construct: Box<ResultReducer<'a, C, R>>,
665 /// Called before processing each child
666 prefn: Option<Box<ChildPreFn<C, N>>>,
667 /// Called after processing each child
668 postfn: Option<Box<ChildPostFn<C, R>>>,
669 /// Children already processed
670 children: Vec<R>,
671 /// Iterator of child nodes not yet processed
672 to_process: std::vec::IntoIter<N>,
673 }
674
675 let mut pending_stack = vec![PendingNode {
676 // We only expect one child, which we'll just return.
677 construct: Box::new(|_, mut cs| cs.pop()),
678 prefn: None,
679 postfn: None,
680 children: Vec::new(),
681 to_process: vec![top].into_iter(),
682 }];
683 loop {
684 // Get the next child node to process
685 let next_node = pending_stack.last_mut().unwrap().to_process.next();
686 if let Some(h) = next_node {
687 pending_stack
688 .last_mut()
689 .unwrap()
690 .prefn
691 .as_ref()
692 .map(|ref f| f(context, &h));
693 match process_node(context, h) {
694 TreeMapResult::Finished(result) => {
695 pending_stack
696 .last_mut()
697 .unwrap()
698 .postfn
699 .as_ref()
700 .map(|ref f| f(context, &result));
701 pending_stack.last_mut().unwrap().children.push(result);
702 }
703 TreeMapResult::PendingChildren {
704 children,
705 cons,
706 prefn,
707 postfn,
708 } => {
709 pending_stack.push(PendingNode {
710 construct: cons,
711 prefn,
712 postfn,
713 children: Vec::new(),
714 to_process: children.into_iter(),
715 });
716 }
717 TreeMapResult::Nothing => {}
718 };
719 } else {
720 // No more children, so finally construct the parent.
721 let completed = pending_stack.pop().unwrap();
722 let reduced = (completed.construct)(context, completed.children);
723 if let Some(node) = reduced {
724 if let Some(parent) = pending_stack.last_mut() {
725 parent.postfn.as_ref().map(|ref f| f(context, &node));
726 parent.children.push(node);
727 } else {
728 // Finished the whole stack!
729 break Some(node);
730 }
731 } else {
732 /* Finished the stack, and have nothing */
733 if pending_stack.is_empty() {
734 break None;
735 }
736 }
737 }
738 }
739 }
740
741 /// Convert a DOM tree or subtree into a render tree.
dom_to_render_tree<T: Write>(handle: Handle, err_out: &mut T) -> Option<RenderNode>742 pub fn dom_to_render_tree<T: Write>(handle: Handle, err_out: &mut T) -> Option<RenderNode> {
743 html_trace!("### dom_to_render_tree: HTML: {:?}", handle);
744 let result = tree_map_reduce(&mut (), handle, |_, handle| {
745 process_dom_node(handle, err_out)
746 });
747
748 html_trace!("### dom_to_render_tree: out= {:#?}", result);
749 result
750 }
751
pending<'a, F>(handle: Handle, f: F) -> TreeMapResult<'a, (), Handle, RenderNode> where for<'r> F: Fn(&'r mut (), std::vec::Vec<RenderNode>) -> Option<RenderNode> + 'static,752 fn pending<'a, F>(handle: Handle, f: F) -> TreeMapResult<'a, (), Handle, RenderNode>
753 where
754 //for<'a> F: Fn(&'a mut C, Vec<RenderNode>) -> Option<RenderNode>+'static
755 for<'r> F: Fn(&'r mut (), std::vec::Vec<RenderNode>) -> Option<RenderNode> + 'static,
756 {
757 TreeMapResult::PendingChildren {
758 children: handle.children.borrow().clone(),
759 cons: Box::new(f),
760 prefn: None,
761 postfn: None,
762 }
763 }
764
765 /// Prepend a FragmentStart (or analogous) marker to an existing
766 /// RenderNode.
prepend_marker(prefix: RenderNode, mut orig: RenderNode) -> RenderNode767 fn prepend_marker(prefix: RenderNode, mut orig: RenderNode) -> RenderNode {
768 use RenderNodeInfo::*;
769 html_trace!("prepend_marker({:?}, {:?})", prefix, orig);
770
771 match orig.info {
772 // For block elements such as Block and Div, we need to insert
773 // the node at the front of their children array, otherwise
774 // the renderer is liable to drop the fragment start marker
775 // _before_ the new line indicating the end of the previous
776 // paragraph.
777 //
778 // For Container, we do the same thing just to make the data
779 // less pointlessly nested.
780 Block(ref mut children)
781 | Div(ref mut children)
782 | Pre(ref mut children)
783 | BlockQuote(ref mut children)
784 | Container(ref mut children)
785 | TableCell(RenderTableCell {
786 content: ref mut children,
787 ..
788 }) => {
789 children.insert(0, prefix);
790 // Now return orig, but we do that outside the match so
791 // that we've given back the borrowed ref 'children'.
792 }
793
794 // For table rows and tables, push down if there's any content.
795 TableRow(ref mut rrow) => {
796 // If the row is empty, then there isn't really anything
797 // to attach the fragment start to.
798 if rrow.cells.len() > 0 {
799 rrow.cells[0].content.insert(0, prefix);
800 }
801 }
802
803 TableBody(ref mut rows) | Table(RenderTable { ref mut rows, .. }) => {
804 // If the row is empty, then there isn't really anything
805 // to attach the fragment start to.
806 if rows.len() > 0 {
807 let rrow = &mut rows[0];
808 if rrow.cells.len() > 0 {
809 rrow.cells[0].content.insert(0, prefix);
810 }
811 }
812 }
813
814 // For anything else, just make a new Container with the
815 // prefix node and the original one.
816 _ => {
817 let result = RenderNode::new(Container(vec![prefix, orig]));
818 html_trace!("prepend_marker() -> {:?}", result);
819 return result;
820 }
821 }
822 html_trace!("prepend_marker() -> {:?}", &orig);
823 orig
824 }
825
process_dom_node<'a, 'b, T: Write>( handle: Handle, err_out: &'b mut T, ) -> TreeMapResult<'a, (), Handle, RenderNode>826 fn process_dom_node<'a, 'b, T: Write>(
827 handle: Handle,
828 err_out: &'b mut T,
829 ) -> TreeMapResult<'a, (), Handle, RenderNode> {
830 use RenderNodeInfo::*;
831 use TreeMapResult::*;
832
833 match handle.clone().data {
834 Document => pending(handle, |&mut (), cs| Some(RenderNode::new(Container(cs)))),
835 Comment { .. } => Nothing,
836 Element {
837 ref name,
838 ref attrs,
839 ..
840 } => {
841 let mut frag_from_name_attr = false;
842 let result = match name.expanded() {
843 expanded_name!(html "html")
844 | expanded_name!(html "span")
845 | expanded_name!(html "body") => {
846 /* process children, but don't add anything */
847 pending(handle, |_, cs| Some(RenderNode::new(Container(cs))))
848 }
849 expanded_name!(html "link")
850 | expanded_name!(html "meta")
851 | expanded_name!(html "hr")
852 | expanded_name!(html "script")
853 | expanded_name!(html "style")
854 | expanded_name!(html "head") => {
855 /* Ignore the head and its children */
856 Nothing
857 }
858 expanded_name!(html "a") => {
859 let borrowed = attrs.borrow();
860 let mut target = None;
861 frag_from_name_attr = true;
862 for attr in borrowed.iter() {
863 if &attr.name.local == "href" {
864 target = Some(&*attr.value);
865 break;
866 }
867 }
868 PendingChildren {
869 children: handle.children.borrow().clone(),
870 cons: if let Some(href) = target {
871 // We need the closure to own the string it's going to use.
872 // Unfortunately that means we ideally want FnOnce; but
873 // that doesn't yet work in a Box. Box<FnBox()> does, but
874 // is unstable. So we'll just move a string in and clone
875 // it on use.
876 let href: String = href.into();
877 Box::new(move |_, cs| Some(RenderNode::new(Link(href.clone(), cs))))
878 } else {
879 Box::new(|_, cs| Some(RenderNode::new(Container(cs))))
880 },
881 prefn: None,
882 postfn: None,
883 }
884 }
885 expanded_name!(html "em") => pending(handle, |_, cs| Some(RenderNode::new(Em(cs)))),
886 expanded_name!(html "strong") => {
887 pending(handle, |_, cs| Some(RenderNode::new(Strong(cs))))
888 }
889 expanded_name!(html "s") => {
890 pending(handle, |_, cs| Some(RenderNode::new(Strikeout(cs))))
891 }
892 expanded_name!(html "code") => {
893 pending(handle, |_, cs| Some(RenderNode::new(Code(cs))))
894 }
895 expanded_name!(html "img") => {
896 let borrowed = attrs.borrow();
897 let mut title = None;
898 for attr in borrowed.iter() {
899 if &attr.name.local == "alt" {
900 title = Some(&*attr.value);
901 break;
902 }
903 }
904 if let Some(title) = title {
905 Finished(RenderNode::new(Img(title.into())))
906 } else {
907 Nothing
908 }
909 }
910 expanded_name!(html "h1")
911 | expanded_name!(html "h2")
912 | expanded_name!(html "h3")
913 | expanded_name!(html "h4") => {
914 let level: usize = name.local[1..].parse().unwrap();
915 pending(handle, move |_, cs| {
916 Some(RenderNode::new(Header(level, cs)))
917 })
918 }
919 expanded_name!(html "p") => {
920 pending(handle, |_, cs| Some(RenderNode::new(Block(cs))))
921 }
922 expanded_name!(html "div") => {
923 pending(handle, |_, cs| Some(RenderNode::new(Div(cs))))
924 }
925 expanded_name!(html "pre") => {
926 pending(handle, |_, cs| Some(RenderNode::new(Pre(cs))))
927 }
928 expanded_name!(html "br") => Finished(RenderNode::new(Break)),
929 expanded_name!(html "table") => table_to_render_tree(handle.clone(), err_out),
930 expanded_name!(html "thead") | expanded_name!(html "tbody") => {
931 tbody_to_render_tree(handle.clone(), err_out)
932 }
933 expanded_name!(html "tr") => tr_to_render_tree(handle.clone(), err_out),
934 expanded_name!(html "th") | expanded_name!(html "td") => {
935 td_to_render_tree(handle.clone(), err_out)
936 }
937 expanded_name!(html "blockquote") => {
938 pending(handle, |_, cs| Some(RenderNode::new(BlockQuote(cs))))
939 }
940 expanded_name!(html "ul") => Finished(RenderNode::new(Ul(
941 list_children_to_render_nodes(handle.clone(), err_out),
942 ))),
943 expanded_name!(html "ol") => {
944 let borrowed = attrs.borrow();
945 let mut start = 1;
946 for attr in borrowed.iter() {
947 if &attr.name.local == "start" {
948 start = attr.value.parse().ok().unwrap_or(1);
949 break;
950 }
951 }
952
953 Finished(RenderNode::new(Ol(
954 start,
955 list_children_to_render_nodes(handle.clone(), err_out),
956 )))
957 }
958 expanded_name!(html "dl") => Finished(RenderNode::new(Dl(
959 desc_list_children_to_render_nodes(handle.clone(), err_out),
960 ))),
961 _ => {
962 html_trace!("Unhandled element: {:?}\n", name.local);
963 pending(handle, |_, cs| Some(RenderNode::new(Container(cs))))
964 //None
965 }
966 };
967
968 let mut fragment = None;
969 let borrowed = attrs.borrow();
970 for attr in borrowed.iter() {
971 if &attr.name.local == "id" || (frag_from_name_attr && &attr.name.local == "name") {
972 fragment = Some(attr.value.to_string());
973 break;
974 }
975 }
976
977 if let Some(fragname) = fragment {
978 match result {
979 Finished(node) => {
980 Finished(prepend_marker(RenderNode::new(FragStart(fragname)), node))
981 }
982 Nothing => Finished(RenderNode::new(FragStart(fragname))),
983 PendingChildren {
984 children,
985 cons,
986 prefn,
987 postfn,
988 } => {
989 let fragname: String = fragname.into();
990 PendingChildren {
991 children: children,
992 prefn: prefn,
993 postfn: postfn,
994 cons: Box::new(move |ctx, ch| {
995 let fragnode = RenderNode::new(FragStart(fragname.clone()));
996 match cons(ctx, ch) {
997 None => Some(fragnode),
998 Some(node) => Some(prepend_marker(fragnode, node)),
999 }
1000 }),
1001 }
1002 }
1003 }
1004 } else {
1005 result
1006 }
1007 }
1008 markup5ever_rcdom::NodeData::Text { contents: ref tstr } => {
1009 Finished(RenderNode::new(Text((&*tstr.borrow()).into())))
1010 }
1011 _ => {
1012 // NodeData doesn't have a Debug impl.
1013 write!(err_out, "Unhandled node type.\n").unwrap();
1014 Nothing
1015 }
1016 }
1017 }
1018
1019 /// Context to use during tree parsing.
1020 /// This mainly gives access to a Renderer, but needs to be able to push
1021 /// new ones on for nested structures.
1022 struct BuilderStack<R: Renderer> {
1023 builders: Vec<R>,
1024 }
1025
1026 impl<R: Renderer> BuilderStack<R> {
new(builder: R) -> BuilderStack<R>1027 pub fn new(builder: R) -> BuilderStack<R> {
1028 BuilderStack {
1029 builders: vec![builder],
1030 }
1031 }
1032
1033 /// Push a new builder onto the stack
push(&mut self, builder: R)1034 pub fn push(&mut self, builder: R) {
1035 self.builders.push(builder);
1036 }
1037
1038 /// Pop off the top builder and return it.
1039 /// Panics if empty
pop(&mut self) -> R1040 pub fn pop(&mut self) -> R {
1041 self.builders.pop().unwrap()
1042 }
1043
1044 /// Pop off the only builder and return it.
1045 /// panics if there aren't exactly 1 available.
into_inner(mut self) -> R1046 pub fn into_inner(mut self) -> R {
1047 assert_eq!(self.builders.len(), 1);
1048 self.builders.pop().unwrap()
1049 }
1050 }
1051
1052 impl<R: Renderer> Deref for BuilderStack<R> {
1053 type Target = R;
deref(&self) -> &R1054 fn deref(&self) -> &R {
1055 self.builders.last().expect("Underflow in BuilderStack")
1056 }
1057 }
1058
1059 impl<R: Renderer> DerefMut for BuilderStack<R> {
deref_mut(&mut self) -> &mut R1060 fn deref_mut(&mut self) -> &mut R {
1061 self.builders.last_mut().expect("Underflow in BuilderStack")
1062 }
1063 }
1064
render_tree_to_string<T: Write, R: Renderer>( builder: R, tree: RenderNode, err_out: &mut T, ) -> R1065 fn render_tree_to_string<T: Write, R: Renderer>(
1066 builder: R,
1067 tree: RenderNode,
1068 err_out: &mut T,
1069 ) -> R {
1070 /* Phase 1: get size estimates. */
1071 tree_map_reduce(&mut (), &tree, |_, node| precalc_size_estimate(&node));
1072
1073 /* Phase 2: actually render. */
1074 let mut bs = BuilderStack::new(builder);
1075 tree_map_reduce(&mut bs, tree, |builders, node| {
1076 do_render_node(builders, node, err_out)
1077 });
1078 bs.into_inner()
1079 }
1080
pending2< 'a, R: Renderer, F: Fn(&mut BuilderStack<R>, Vec<Option<R>>) -> Option<Option<R>> + 'static, >( children: Vec<RenderNode>, f: F, ) -> TreeMapResult<'a, BuilderStack<R>, RenderNode, Option<R>>1081 fn pending2<
1082 'a,
1083 R: Renderer,
1084 F: Fn(&mut BuilderStack<R>, Vec<Option<R>>) -> Option<Option<R>> + 'static,
1085 >(
1086 children: Vec<RenderNode>,
1087 f: F,
1088 ) -> TreeMapResult<'a, BuilderStack<R>, RenderNode, Option<R>> {
1089 TreeMapResult::PendingChildren {
1090 children: children,
1091 cons: Box::new(f),
1092 prefn: None,
1093 postfn: None,
1094 }
1095 }
1096
do_render_node<'a, 'b, T: Write, R: Renderer>( builder: &mut BuilderStack<R>, tree: RenderNode, err_out: &'b mut T, ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>>1097 fn do_render_node<'a, 'b, T: Write, R: Renderer>(
1098 builder: &mut BuilderStack<R>,
1099 tree: RenderNode,
1100 err_out: &'b mut T,
1101 ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>> {
1102 html_trace!("do_render_node({:?}", tree);
1103 use RenderNodeInfo::*;
1104 use TreeMapResult::*;
1105 match tree.info {
1106 Text(ref tstr) => {
1107 builder.add_inline_text(tstr);
1108 Finished(None)
1109 }
1110 Container(children) => pending2(children, |_, _| Some(None)),
1111 Link(href, children) => {
1112 builder.start_link(&href);
1113 pending2(children, |builder: &mut BuilderStack<R>, _| {
1114 builder.end_link();
1115 Some(None)
1116 })
1117 }
1118 Em(children) => {
1119 builder.start_emphasis();
1120 pending2(children, |builder: &mut BuilderStack<R>, _| {
1121 builder.end_emphasis();
1122 Some(None)
1123 })
1124 }
1125 Strong(children) => {
1126 builder.start_strong();
1127 pending2(children, |builder: &mut BuilderStack<R>, _| {
1128 builder.end_strong();
1129 Some(None)
1130 })
1131 }
1132 Strikeout(children) => {
1133 builder.start_strikeout();
1134 pending2(children, |builder: &mut BuilderStack<R>, _| {
1135 builder.end_strikeout();
1136 Some(None)
1137 })
1138 }
1139 Code(children) => {
1140 builder.start_code();
1141 pending2(children, |builder: &mut BuilderStack<R>, _| {
1142 builder.end_code();
1143 Some(None)
1144 })
1145 }
1146 Img(title) => {
1147 builder.add_image(&title);
1148 Finished(None)
1149 }
1150 Block(children) => {
1151 builder.start_block();
1152 pending2(children, |builder: &mut BuilderStack<R>, _| {
1153 builder.end_block();
1154 Some(None)
1155 })
1156 }
1157 Header(level, children) => {
1158 let min_width = max(builder.width(), 1 + level + 1);
1159 let sub_builder = builder.new_sub_renderer(min_width - (1 + level));
1160 builder.push(sub_builder);
1161 pending2(children, move |builder: &mut BuilderStack<R>, _| {
1162 let sub_builder = builder.pop();
1163
1164 let qs: String = "#".repeat(level) + " ";
1165
1166 builder.start_block();
1167 builder.append_subrender(sub_builder, repeat(&qs[..]));
1168 builder.end_block();
1169 Some(None)
1170 })
1171 }
1172 Div(children) => {
1173 builder.new_line();
1174 pending2(children, |builder: &mut BuilderStack<R>, _| {
1175 builder.new_line();
1176 Some(None)
1177 })
1178 }
1179 Pre(children) => {
1180 builder.new_line();
1181 builder.start_pre();
1182 pending2(children, |builder: &mut BuilderStack<R>, _| {
1183 builder.new_line();
1184 builder.end_pre();
1185 Some(None)
1186 })
1187 }
1188 BlockQuote(children) => {
1189 let sub_builder = builder.new_sub_renderer(builder.width() - 2);
1190 builder.push(sub_builder);
1191 pending2(children, |builder: &mut BuilderStack<R>, _| {
1192 let sub_builder = builder.pop();
1193
1194 builder.start_block();
1195 builder.append_subrender(sub_builder, repeat("> "));
1196 builder.end_block();
1197 Some(None)
1198 })
1199 }
1200 Ul(items) => {
1201 builder.start_block();
1202
1203 TreeMapResult::PendingChildren {
1204 children: items,
1205 cons: Box::new(|_, _| Some(None)),
1206 prefn: Some(Box::new(|builder: &mut BuilderStack<R>, _| {
1207 let sub_builder = builder.new_sub_renderer(builder.width() - 2);
1208 builder.push(sub_builder);
1209 })),
1210 postfn: Some(Box::new(|builder: &mut BuilderStack<R>, _| {
1211 let sub_builder = builder.pop();
1212 builder.append_subrender(sub_builder, once("* ").chain(repeat(" ")));
1213 })),
1214 }
1215 }
1216 Ol(start, items) => {
1217 builder.start_block();
1218
1219 let num_items = items.len();
1220
1221 // The prefix width could be at either end if the start is negative.
1222 let min_number = start;
1223 // Assumption: num_items can't overflow isize.
1224 let max_number = start + (num_items as i64) - 1;
1225 let prefix_width_min = format!("{}", min_number).len() + 2;
1226 let prefix_width_max = format!("{}", max_number).len() + 2;
1227 let prefix_width = max(prefix_width_min, prefix_width_max);
1228 let prefixn = format!("{: <width$}", "", width = prefix_width);
1229 let i: Cell<_> = Cell::new(start);
1230
1231 TreeMapResult::PendingChildren {
1232 children: items,
1233 cons: Box::new(|_, _| Some(None)),
1234 prefn: Some(Box::new(move |builder: &mut BuilderStack<R>, _| {
1235 let sub_builder = builder.new_sub_renderer(builder.width() - prefix_width);
1236 builder.push(sub_builder);
1237 })),
1238 postfn: Some(Box::new(move |builder: &mut BuilderStack<R>, _| {
1239 let sub_builder = builder.pop();
1240 let prefix1 = format!("{}.", i.get());
1241 let prefix1 = format!("{: <width$}", prefix1, width = prefix_width);
1242
1243 builder.append_subrender(
1244 sub_builder,
1245 once(prefix1.as_str()).chain(repeat(prefixn.as_str())),
1246 );
1247 i.set(i.get() + 1);
1248 })),
1249 }
1250 }
1251 Dl(items) => {
1252 builder.start_block();
1253
1254 TreeMapResult::PendingChildren {
1255 children: items,
1256 cons: Box::new(|_, _| Some(None)),
1257 prefn: None,
1258 postfn: None,
1259 }
1260 }
1261 Dt(children) => {
1262 builder.new_line();
1263 builder.start_emphasis();
1264 pending2(children, |builder: &mut BuilderStack<R>, _| {
1265 builder.end_emphasis();
1266 Some(None)
1267 })
1268 }
1269 Dd(children) => {
1270 let sub_builder = builder.new_sub_renderer(builder.width() - 2);
1271 builder.push(sub_builder);
1272 pending2(children, |builder: &mut BuilderStack<R>, _| {
1273 let sub_builder = builder.pop();
1274 builder.append_subrender(sub_builder, repeat(" "));
1275 Some(None)
1276 })
1277 }
1278 Break => {
1279 builder.new_line_hard();
1280 Finished(None)
1281 }
1282 Table(tab) => render_table_tree(builder.deref_mut(), tab, err_out),
1283 TableRow(row) => render_table_row(builder.deref_mut(), row, err_out),
1284 TableBody(_) => unimplemented!("Unexpected TableBody while rendering"),
1285 TableCell(cell) => render_table_cell(builder.deref_mut(), cell, err_out),
1286 FragStart(fragname) => {
1287 builder.record_frag_start(&fragname);
1288 Finished(None)
1289 }
1290 }
1291 }
1292
render_table_tree<T: Write, R: Renderer>( builder: &mut R, table: RenderTable, _err_out: &mut T, ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>>1293 fn render_table_tree<T: Write, R: Renderer>(
1294 builder: &mut R,
1295 table: RenderTable,
1296 _err_out: &mut T,
1297 ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>> {
1298 /* Now lay out the table. */
1299 let num_columns = table.num_columns;
1300
1301 /* Heuristic: scale the column widths according to how much content there is. */
1302 let mut col_sizes: Vec<SizeEstimate> = vec![Default::default(); num_columns];
1303
1304 for row in table.rows() {
1305 let mut colno = 0;
1306 for cell in row.cells() {
1307 // FIXME: get_size_estimate is still recursive.
1308 let mut estimate = cell.get_size_estimate();
1309 // If the cell has a colspan>1, then spread its size between the
1310 // columns.
1311 estimate.size /= cell.colspan;
1312 estimate.min_width /= cell.colspan;
1313 for i in 0..cell.colspan {
1314 col_sizes[colno + i] = (col_sizes[colno + i]).add(estimate);
1315 }
1316 colno += cell.colspan;
1317 }
1318 }
1319 let tot_size: usize = col_sizes.iter().map(|est| est.size).sum();
1320 let width = builder.width();
1321 let mut col_widths: Vec<usize> = col_sizes
1322 .iter()
1323 .map(|sz| {
1324 if sz.size == 0 {
1325 0
1326 } else {
1327 max(sz.size * width / tot_size, sz.min_width)
1328 }
1329 })
1330 .collect();
1331 /* The minimums may have put the total width too high */
1332 while col_widths.iter().cloned().sum::<usize>() > width {
1333 let (i, _) = col_widths
1334 .iter()
1335 .cloned()
1336 .enumerate()
1337 .max_by_key(|&(colno, width)| {
1338 (
1339 width.saturating_sub(col_sizes[colno].min_width),
1340 width,
1341 usize::max_value() - colno,
1342 )
1343 })
1344 .unwrap();
1345 col_widths[i] -= 1;
1346 }
1347 if !col_widths.is_empty() {
1348 // Slight fudge; we're not drawing extreme edges, so one of the columns
1349 // can gets a free character cell from not having a border.
1350 // make it the last.
1351 let last = col_widths.len() - 1;
1352 col_widths[last] += 1;
1353 }
1354
1355 builder.start_block();
1356
1357 builder.add_horizontal_border();
1358
1359 TreeMapResult::PendingChildren {
1360 children: table.into_rows(col_widths),
1361 cons: Box::new(|_, _| Some(None)),
1362 prefn: Some(Box::new(|_, _| {})),
1363 postfn: Some(Box::new(|_, _| {})),
1364 }
1365 }
1366
render_table_row<T: Write, R: Renderer>( _builder: &mut R, row: RenderTableRow, _err_out: &mut T, ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>>1367 fn render_table_row<T: Write, R: Renderer>(
1368 _builder: &mut R,
1369 row: RenderTableRow,
1370 _err_out: &mut T,
1371 ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>> {
1372 TreeMapResult::PendingChildren {
1373 children: row.into_cells(),
1374 cons: Box::new(|builders, children| {
1375 let children: Vec<_> = children.into_iter().map(Option::unwrap).collect();
1376 if children.iter().any(|c| !c.empty()) {
1377 builders.append_columns_with_borders(children, true);
1378 }
1379 Some(None)
1380 }),
1381 prefn: Some(Box::new(|builder: &mut BuilderStack<R>, node| {
1382 if let RenderNodeInfo::TableCell(ref cell) = node.info {
1383 let sub_builder = builder.new_sub_renderer(cell.col_width.unwrap());
1384 builder.push(sub_builder);
1385 } else {
1386 panic!()
1387 }
1388 })),
1389 postfn: Some(Box::new(|_builder: &mut BuilderStack<R>, _| {})),
1390 }
1391 }
1392
render_table_cell<T: Write, R: Renderer>( _builder: &mut R, cell: RenderTableCell, _err_out: &mut T, ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>>1393 fn render_table_cell<T: Write, R: Renderer>(
1394 _builder: &mut R,
1395 cell: RenderTableCell,
1396 _err_out: &mut T,
1397 ) -> TreeMapResult<'static, BuilderStack<R>, RenderNode, Option<R>> {
1398 pending2(cell.content, |builder: &mut BuilderStack<R>, _| {
1399 let sub_builder = builder.pop();
1400 Some(Some(sub_builder))
1401 })
1402 }
1403
1404 /// The structure of an HTML document that can be rendered using a [`TextDecorator`][].
1405 ///
1406 /// [`TextDecorator`]: render/text_renderer/trait.TextDecorator.html
1407
1408 #[derive(Clone, Debug)]
1409 pub struct RenderTree(RenderNode);
1410
1411 impl RenderTree {
1412 /// Render this document using the given `decorator` and wrap it to `width` columns.
render<D: TextDecorator>(self, width: usize, decorator: D) -> RenderedText<D>1413 pub fn render<D: TextDecorator>(self, width: usize, decorator: D) -> RenderedText<D> {
1414 let builder = TextRenderer::new(width, decorator);
1415 let builder = render_tree_to_string(builder, self.0, &mut Discard {});
1416 RenderedText(builder)
1417 }
1418
1419 /// Render this document as plain text using the [`PlainDecorator`][] and wrap it to `width`
1420 /// columns.
1421 ///
1422 /// [`PlainDecorator`]: render/text_renderer/struct.PlainDecorator.html
render_plain(self, width: usize) -> RenderedText<PlainDecorator>1423 pub fn render_plain(self, width: usize) -> RenderedText<PlainDecorator> {
1424 self.render(width, PlainDecorator::new())
1425 }
1426
1427 /// Render this document as rich text using the [`RichDecorator`][] and wrap it to `width`
1428 /// columns.
1429 ///
1430 /// [`RichDecorator`]: render/text_renderer/struct.RichDecorator.html
render_rich(self, width: usize) -> RenderedText<RichDecorator>1431 pub fn render_rich(self, width: usize) -> RenderedText<RichDecorator> {
1432 self.render(width, RichDecorator::new())
1433 }
1434 }
1435
1436 /// A rendered HTML document.
1437 pub struct RenderedText<D: TextDecorator>(TextRenderer<D>);
1438
1439 impl<D: TextDecorator> RenderedText<D> {
1440 /// Convert the rendered HTML document to a string.
into_string(self) -> String1441 pub fn into_string(self) -> String {
1442 self.0.into_string()
1443 }
1444
1445 /// Convert the rendered HTML document to a vector of lines with the annotations created by the
1446 /// decorator.
into_lines(self) -> Vec<TaggedLine<Vec<D::Annotation>>>1447 pub fn into_lines(self) -> Vec<TaggedLine<Vec<D::Annotation>>> {
1448 self.0
1449 .into_lines()
1450 .into_iter()
1451 .map(RenderLine::into_tagged_line)
1452 .collect()
1453 }
1454 }
1455
1456 /// Reads and parses HTML from `input` and prepares a render tree.
parse(mut input: impl io::Read) -> RenderTree1457 pub fn parse(mut input: impl io::Read) -> RenderTree {
1458 let opts = ParseOpts {
1459 tree_builder: TreeBuilderOpts {
1460 drop_doctype: true,
1461 ..Default::default()
1462 },
1463 ..Default::default()
1464 };
1465 let dom = parse_document(RcDom::default(), opts)
1466 .from_utf8()
1467 .read_from(&mut input)
1468 .unwrap();
1469 let render_tree = dom_to_render_tree(dom.document.clone(), &mut Discard {}).unwrap();
1470 RenderTree(render_tree)
1471 }
1472
1473 /// Reads HTML from `input`, decorates it using `decorator`, and
1474 /// returns a `String` with text wrapped to `width` columns.
from_read_with_decorator<R, D>(input: R, width: usize, decorator: D) -> String where R: io::Read, D: TextDecorator,1475 pub fn from_read_with_decorator<R, D>(input: R, width: usize, decorator: D) -> String
1476 where
1477 R: io::Read,
1478 D: TextDecorator,
1479 {
1480 parse(input).render(width, decorator).into_string()
1481 }
1482
1483 /// Reads HTML from `input`, and returns a `String` with text wrapped to
1484 /// `width` columns.
from_read<R>(input: R, width: usize) -> String where R: io::Read,1485 pub fn from_read<R>(input: R, width: usize) -> String
1486 where
1487 R: io::Read,
1488 {
1489 let decorator = PlainDecorator::new();
1490 from_read_with_decorator(input, width, decorator)
1491 }
1492
1493 /// Reads HTML from `input`, and returns text wrapped to `width` columns.
1494 /// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
1495 /// of `RichAnnotation`. The "outer" annotation comes first in the `Vec`.
from_read_rich<R>(input: R, width: usize) -> Vec<TaggedLine<Vec<RichAnnotation>>> where R: io::Read,1496 pub fn from_read_rich<R>(input: R, width: usize) -> Vec<TaggedLine<Vec<RichAnnotation>>>
1497 where
1498 R: io::Read,
1499 {
1500 parse(input)
1501 .render(width, RichDecorator::new())
1502 .into_lines()
1503 }
1504
1505 #[cfg(test)]
1506 mod tests;
1507