1 #include "catch.hpp" 2 #include "internal/catch_xmlwriter.h" 3 4 #include <sstream> 5 encode(std::string const & str,Catch::XmlEncode::ForWhat forWhat=Catch::XmlEncode::ForTextNodes)6inline std::string encode( std::string const& str, Catch::XmlEncode::ForWhat forWhat = Catch::XmlEncode::ForTextNodes ) { 7 std::ostringstream oss; 8 oss << Catch::XmlEncode( str, forWhat ); 9 return oss.str(); 10 } 11 12 TEST_CASE( "XmlEncode", "[XML]" ) { 13 SECTION( "normal string" ) { 14 REQUIRE( encode( "normal string" ) == "normal string" ); 15 } 16 SECTION( "empty string" ) { 17 REQUIRE( encode( "" ) == "" ); 18 } 19 SECTION( "string with ampersand" ) { 20 REQUIRE( encode( "smith & jones" ) == "smith & jones" ); 21 } 22 SECTION( "string with less-than" ) { 23 REQUIRE( encode( "smith < jones" ) == "smith < jones" ); 24 } 25 SECTION( "string with greater-than" ) { 26 REQUIRE( encode( "smith > jones" ) == "smith > jones" ); 27 REQUIRE( encode( "smith ]]> jones" ) == "smith ]]> jones" ); 28 } 29 SECTION( "string with quotes" ) { 30 std::string stringWithQuotes = "don't \"quote\" me on that"; 31 REQUIRE( encode( stringWithQuotes ) == stringWithQuotes ); 32 REQUIRE( encode( stringWithQuotes, Catch::XmlEncode::ForAttributes ) == "don't "quote" me on that" ); 33 } 34 SECTION( "string with control char (1)" ) { 35 REQUIRE( encode( "[\x01]" ) == "[\\x01]" ); 36 } 37 SECTION( "string with control char (x7F)" ) { 38 REQUIRE( encode( "[\x7F]" ) == "[\\x7F]" ); 39 } 40 } 41 42 // Thanks to Peter Bindels (dascandy) for some of the tests 43 TEST_CASE("XmlEncode: UTF-8", "[XML][UTF-8][approvals]") { 44 #define ESC(lit) (char*)(lit) 45 SECTION("Valid utf-8 strings") { 46 CHECK(encode(ESC(u8"Here be ")) == ESC(u8"Here be ")); 47 CHECK(encode(ESC(u8"šš")) == ESC(u8"šš")); 48 49 CHECK(encode("\xDF\xBF") == "\xDF\xBF"); // 0x7FF 50 CHECK(encode("\xE0\xA0\x80") == "\xE0\xA0\x80"); // 0x800 51 CHECK(encode("\xED\x9F\xBF") == "\xED\x9F\xBF"); // 0xD7FF 52 CHECK(encode("\xEE\x80\x80") == "\xEE\x80\x80"); // 0xE000 53 CHECK(encode("\xEF\xBF\xBF") == "\xEF\xBF\xBF"); // 0xFFFF 54 CHECK(encode("\xF0\x90\x80\x80") == "\xF0\x90\x80\x80"); // 0x10000 55 CHECK(encode("\xF4\x8F\xBF\xBF") == "\xF4\x8F\xBF\xBF"); // 0x10FFFF 56 } 57 SECTION("Invalid utf-8 strings") { 58 SECTION("Various broken strings") { 59 CHECK(encode(ESC("Here \xFF be \xF0\x9F\x91\xBE")) == ESC(u8"Here \\xFF be ")); 60 CHECK(encode("\xFF") == "\\xFF"); 61 CHECK(encode("\xC5\xC5\xA0") == ESC(u8"\\xC5Š")); 62 CHECK(encode("\xF4\x90\x80\x80") == ESC(u8"\\xF4\\x90\\x80\\x80")); // 0x110000 -- out of unicode range 63 } 64 65 SECTION("Overlong encodings") { 66 CHECK(encode("\xC0\x80") == "\\xC0\\x80"); // \0 67 CHECK(encode("\xF0\x80\x80\x80") == "\\xF0\\x80\\x80\\x80"); // Super-over-long \0 68 CHECK(encode("\xC1\xBF") == "\\xC1\\xBF"); // ASCII char as UTF-8 (0x7F) 69 CHECK(encode("\xE0\x9F\xBF") == "\\xE0\\x9F\\xBF"); // 0x7FF 70 CHECK(encode("\xF0\x8F\xBF\xBF") == "\\xF0\\x8F\\xBF\\xBF"); // 0xFFFF 71 } 72 73 // Note that we actually don't modify surrogate pairs, as we do not do strict checking 74 SECTION("Surrogate pairs") { 75 CHECK(encode("\xED\xA0\x80") == "\xED\xA0\x80"); // Invalid surrogate half 0xD800 76 CHECK(encode("\xED\xAF\xBF") == "\xED\xAF\xBF"); // Invalid surrogate half 0xDBFF 77 CHECK(encode("\xED\xB0\x80") == "\xED\xB0\x80"); // Invalid surrogate half 0xDC00 78 CHECK(encode("\xED\xBF\xBF") == "\xED\xBF\xBF"); // Invalid surrogate half 0xDFFF 79 } 80 81 SECTION("Invalid start byte") { 82 CHECK(encode("\x80") == "\\x80"); 83 CHECK(encode("\x81") == "\\x81"); 84 CHECK(encode("\xBC") == "\\xBC"); 85 CHECK(encode("\xBF") == "\\xBF"); 86 // Out of range 87 CHECK(encode("\xF5\x80\x80\x80") == "\\xF5\\x80\\x80\\x80"); 88 CHECK(encode("\xF6\x80\x80\x80") == "\\xF6\\x80\\x80\\x80"); 89 CHECK(encode("\xF7\x80\x80\x80") == "\\xF7\\x80\\x80\\x80"); 90 } 91 92 SECTION("Missing continuation byte(s)") { 93 // Missing first continuation byte 94 CHECK(encode("\xDE") == "\\xDE"); 95 CHECK(encode("\xDF") == "\\xDF"); 96 CHECK(encode("\xE0") == "\\xE0"); 97 CHECK(encode("\xEF") == "\\xEF"); 98 CHECK(encode("\xF0") == "\\xF0"); 99 CHECK(encode("\xF4") == "\\xF4"); 100 101 // Missing second continuation byte 102 CHECK(encode("\xE0\x80") == "\\xE0\\x80"); 103 CHECK(encode("\xE0\xBF") == "\\xE0\\xBF"); 104 CHECK(encode("\xE1\x80") == "\\xE1\\x80"); 105 CHECK(encode("\xF0\x80") == "\\xF0\\x80"); 106 CHECK(encode("\xF4\x80") == "\\xF4\\x80"); 107 108 // Missing third continuation byte 109 CHECK(encode("\xF0\x80\x80") == "\\xF0\\x80\\x80"); 110 CHECK(encode("\xF4\x80\x80") == "\\xF4\\x80\\x80"); 111 } 112 } 113 #undef ESC 114 } 115