1 #include <qpdf/ContentNormalizer.hh> 2 #include <qpdf/QUtil.hh> 3 ContentNormalizer()4ContentNormalizer::ContentNormalizer() : 5 any_bad_tokens(false), 6 last_token_was_bad(false) 7 { 8 } 9 ~ContentNormalizer()10ContentNormalizer::~ContentNormalizer() 11 { 12 } 13 14 void handleToken(QPDFTokenizer::Token const & token)15ContentNormalizer::handleToken(QPDFTokenizer::Token const& token) 16 { 17 std::string value = token.getRawValue(); 18 QPDFTokenizer::token_type_e token_type = token.getType(); 19 20 if (token_type == QPDFTokenizer::tt_bad) 21 { 22 this->any_bad_tokens = true; 23 this->last_token_was_bad = true; 24 } 25 else if (token_type != QPDFTokenizer::tt_eof) 26 { 27 this->last_token_was_bad = false; 28 } 29 30 switch (token_type) 31 { 32 case QPDFTokenizer::tt_space: 33 { 34 size_t len = value.length(); 35 for (size_t i = 0; i < len; ++i) 36 { 37 char ch = value.at(i); 38 if (ch == '\r') 39 { 40 if ((i + 1 < len) && (value.at(i + 1) == '\n')) 41 { 42 // ignore 43 } 44 else 45 { 46 write("\n"); 47 } 48 } 49 else 50 { 51 write(&ch, 1); 52 } 53 } 54 } 55 break; 56 57 case QPDFTokenizer::tt_string: 58 // Replacing string and name tokens in this way normalizes 59 // their representation as this will automatically handle 60 // quoting of unprintable characters, etc. 61 writeToken(QPDFTokenizer::Token( 62 QPDFTokenizer::tt_string, token.getValue())); 63 break; 64 65 case QPDFTokenizer::tt_name: 66 writeToken(QPDFTokenizer::Token( 67 QPDFTokenizer::tt_name, token.getValue())); 68 break; 69 70 default: 71 writeToken(token); 72 break; 73 } 74 75 value = token.getRawValue(); 76 if (((token_type == QPDFTokenizer::tt_string) || 77 (token_type == QPDFTokenizer::tt_name)) && 78 ((value.find('\r') != std::string::npos) || 79 (value.find('\n') != std::string::npos))) 80 { 81 write("\n"); 82 } 83 } 84 85 bool anyBadTokens() const86ContentNormalizer::anyBadTokens() const 87 { 88 return this->any_bad_tokens; 89 } 90 91 bool lastTokenWasBad() const92ContentNormalizer::lastTokenWasBad()const 93 { 94 return this->last_token_was_bad; 95 } 96