1 // Copyright 2014-2017 The html5ever Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 extern crate html5ever;
11 
12 use std::default::Default;
13 use std::io;
14 
15 use html5ever::tendril::*;
16 use html5ever::tokenizer::BufferQueue;
17 use html5ever::tokenizer::{CharacterTokens, EndTag, NullCharacterToken, StartTag, TagToken};
18 use html5ever::tokenizer::{
19     ParseError, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,
20 };
21 
22 #[derive(Copy, Clone)]
23 struct TokenPrinter {
24     in_char_run: bool,
25 }
26 
27 impl TokenPrinter {
is_char(&mut self, is_char: bool)28     fn is_char(&mut self, is_char: bool) {
29         match (self.in_char_run, is_char) {
30             (false, true) => print!("CHAR : \""),
31             (true, false) => println!("\""),
32             _ => (),
33         }
34         self.in_char_run = is_char;
35     }
36 
do_char(&mut self, c: char)37     fn do_char(&mut self, c: char) {
38         self.is_char(true);
39         print!("{}", c.escape_default().collect::<String>());
40     }
41 }
42 
43 impl TokenSink for TokenPrinter {
44     type Handle = ();
45 
process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()>46     fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
47         match token {
48             CharacterTokens(b) => {
49                 for c in b.chars() {
50                     self.do_char(c);
51                 }
52             },
53             NullCharacterToken => self.do_char('\0'),
54             TagToken(tag) => {
55                 self.is_char(false);
56                 // This is not proper HTML serialization, of course.
57                 match tag.kind {
58                     StartTag => print!("TAG  : <\x1b[32m{}\x1b[0m", tag.name),
59                     EndTag => print!("TAG  : <\x1b[31m/{}\x1b[0m", tag.name),
60                 }
61                 for attr in tag.attrs.iter() {
62                     print!(
63                         " \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'",
64                         attr.name.local, attr.value
65                     );
66                 }
67                 if tag.self_closing {
68                     print!(" \x1b[31m/\x1b[0m");
69                 }
70                 println!(">");
71             },
72             ParseError(err) => {
73                 self.is_char(false);
74                 println!("ERROR: {}", err);
75             },
76             _ => {
77                 self.is_char(false);
78                 println!("OTHER: {:?}", token);
79             },
80         }
81         TokenSinkResult::Continue
82     }
83 }
84 
main()85 fn main() {
86     let mut sink = TokenPrinter { in_char_run: false };
87     let mut chunk = ByteTendril::new();
88     io::stdin().read_to_tendril(&mut chunk).unwrap();
89     let mut input = BufferQueue::new();
90     input.push_back(chunk.try_reinterpret().unwrap());
91 
92     let mut tok = Tokenizer::new(
93         sink,
94         TokenizerOpts {
95             profile: true,
96             ..Default::default()
97         },
98     );
99     let _ = tok.feed(&mut input);
100     assert!(input.is_empty());
101     tok.end();
102     sink.is_char(false);
103 }
104