1 #include "emitterutils.h" 2 #include "exp.h" 3 #include "indentation.h" 4 #include "yaml-cpp03/binary.h" 5 #include "yaml-cpp03/exceptions.h" 6 #include "stringsource.h" 7 #include <sstream> 8 #include <iomanip> 9 10 namespace YAML 11 { 12 namespace Utils 13 { 14 namespace { 15 enum {REPLACEMENT_CHARACTER = 0xFFFD}; 16 IsAnchorChar(int ch)17 bool IsAnchorChar(int ch) { // test for ns-anchor-char 18 switch (ch) { 19 case ',': case '[': case ']': case '{': case '}': // c-flow-indicator 20 case ' ': case '\t': // s-white 21 case 0xFEFF: // c-byte-order-mark 22 case 0xA: case 0xD: // b-char 23 return false; 24 case 0x85: 25 return true; 26 } 27 28 if (ch < 0x20) 29 return false; 30 31 if (ch < 0x7E) 32 return true; 33 34 if (ch < 0xA0) 35 return false; 36 if (ch >= 0xD800 && ch <= 0xDFFF) 37 return false; 38 if ((ch & 0xFFFE) == 0xFFFE) 39 return false; 40 if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) 41 return false; 42 if (ch > 0x10FFFF) 43 return false; 44 45 return true; 46 } 47 Utf8BytesIndicated(char ch)48 int Utf8BytesIndicated(char ch) { 49 int byteVal = static_cast<unsigned char>(ch); 50 switch (byteVal >> 4) { 51 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: 52 return 1; 53 case 12: case 13: 54 return 2; 55 case 14: 56 return 3; 57 case 15: 58 return 4; 59 default: 60 return -1; 61 } 62 } 63 IsTrailingByte(char ch)64 bool IsTrailingByte(char ch) { 65 return (ch & 0xC0) == 0x80; 66 } 67 GetNextCodePointAndAdvance(int & codePoint,std::string::const_iterator & first,std::string::const_iterator last)68 bool GetNextCodePointAndAdvance(int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) { 69 if (first == last) 70 return false; 71 72 int nBytes = Utf8BytesIndicated(*first); 73 if (nBytes < 1) { 74 // Bad lead byte 75 ++first; 76 codePoint = REPLACEMENT_CHARACTER; 77 return true; 78 } 79 80 if (nBytes == 1) { 81 codePoint = *first++; 82 return true; 83 } 84 85 // Gather bits from trailing bytes 86 codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes)); 87 ++first; 88 --nBytes; 89 for (; nBytes > 0; ++first, --nBytes) { 90 if ((first == last) || !IsTrailingByte(*first)) { 91 codePoint = REPLACEMENT_CHARACTER; 92 break; 93 } 94 codePoint <<= 6; 95 codePoint |= *first & 0x3F; 96 } 97 98 // Check for illegal code points 99 if (codePoint > 0x10FFFF) 100 codePoint = REPLACEMENT_CHARACTER; 101 else if (codePoint >= 0xD800 && codePoint <= 0xDFFF) 102 codePoint = REPLACEMENT_CHARACTER; 103 else if ((codePoint & 0xFFFE) == 0xFFFE) 104 codePoint = REPLACEMENT_CHARACTER; 105 else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF) 106 codePoint = REPLACEMENT_CHARACTER; 107 return true; 108 } 109 WriteCodePoint(ostream & out,int codePoint)110 void WriteCodePoint(ostream& out, int codePoint) { 111 if (codePoint < 0 || codePoint > 0x10FFFF) { 112 codePoint = REPLACEMENT_CHARACTER; 113 } 114 if (codePoint < 0x7F) { 115 out << static_cast<char>(codePoint); 116 } else if (codePoint < 0x7FF) { 117 out << static_cast<char>(0xC0 | (codePoint >> 6)) 118 << static_cast<char>(0x80 | (codePoint & 0x3F)); 119 } else if (codePoint < 0xFFFF) { 120 out << static_cast<char>(0xE0 | (codePoint >> 12)) 121 << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F)) 122 << static_cast<char>(0x80 | (codePoint & 0x3F)); 123 } else { 124 out << static_cast<char>(0xF0 | (codePoint >> 18)) 125 << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F)) 126 << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F)) 127 << static_cast<char>(0x80 | (codePoint & 0x3F)); 128 } 129 } 130 IsValidPlainScalar(const std::string & str,bool inFlow,bool allowOnlyAscii)131 bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) { 132 if(str.empty()) 133 return false; 134 135 // first check the start 136 const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar()); 137 if(!start.Matches(str)) 138 return false; 139 140 // and check the end for plain whitespace (which can't be faithfully kept in a plain scalar) 141 if(!str.empty() && *str.rbegin() == ' ') 142 return false; 143 144 // then check until something is disallowed 145 const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar()) 146 || (Exp::BlankOrBreak() + Exp::Comment()) 147 || Exp::NotPrintable() 148 || Exp::Utf8_ByteOrderMark() 149 || Exp::Break() 150 || Exp::Tab(); 151 StringCharSource buffer(str.c_str(), str.size()); 152 while(buffer) { 153 if(disallowed.Matches(buffer)) 154 return false; 155 if(allowOnlyAscii && (0x7F < static_cast<unsigned char>(buffer[0]))) 156 return false; 157 ++buffer; 158 } 159 160 return true; 161 } 162 WriteDoubleQuoteEscapeSequence(ostream & out,int codePoint)163 void WriteDoubleQuoteEscapeSequence(ostream& out, int codePoint) { 164 static const char hexDigits[] = "0123456789abcdef"; 165 166 char escSeq[] = "\\U00000000"; 167 int digits = 8; 168 if (codePoint < 0xFF) { 169 escSeq[1] = 'x'; 170 digits = 2; 171 } else if (codePoint < 0xFFFF) { 172 escSeq[1] = 'u'; 173 digits = 4; 174 } 175 176 // Write digits into the escape sequence 177 int i = 2; 178 for (; digits > 0; --digits, ++i) { 179 escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF]; 180 } 181 182 escSeq[i] = 0; // terminate with NUL character 183 out << escSeq; 184 } 185 WriteAliasName(ostream & out,const std::string & str)186 bool WriteAliasName(ostream& out, const std::string& str) { 187 int codePoint; 188 for(std::string::const_iterator i = str.begin(); 189 GetNextCodePointAndAdvance(codePoint, i, str.end()); 190 ) 191 { 192 if (!IsAnchorChar(codePoint)) 193 return false; 194 195 WriteCodePoint(out, codePoint); 196 } 197 return true; 198 } 199 } 200 WriteString(ostream & out,const std::string & str,bool inFlow,bool escapeNonAscii)201 bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii) 202 { 203 if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) { 204 out << str; 205 return true; 206 } else 207 return WriteDoubleQuotedString(out, str, escapeNonAscii); 208 } 209 WriteSingleQuotedString(ostream & out,const std::string & str)210 bool WriteSingleQuotedString(ostream& out, const std::string& str) 211 { 212 out << "'"; 213 int codePoint; 214 for(std::string::const_iterator i = str.begin(); 215 GetNextCodePointAndAdvance(codePoint, i, str.end()); 216 ) 217 { 218 if (codePoint == '\n') 219 return false; // We can't handle a new line and the attendant indentation yet 220 221 if (codePoint == '\'') 222 out << "''"; 223 else 224 WriteCodePoint(out, codePoint); 225 } 226 out << "'"; 227 return true; 228 } 229 WriteDoubleQuotedString(ostream & out,const std::string & str,bool escapeNonAscii)230 bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii) 231 { 232 out << "\""; 233 int codePoint; 234 for(std::string::const_iterator i = str.begin(); 235 GetNextCodePointAndAdvance(codePoint, i, str.end()); 236 ) 237 { 238 if (codePoint == '\"') 239 out << "\\\""; 240 else if (codePoint == '\\') 241 out << "\\\\"; 242 else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0)) // Control characters and non-breaking space 243 WriteDoubleQuoteEscapeSequence(out, codePoint); 244 else if (codePoint == 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2) 245 WriteDoubleQuoteEscapeSequence(out, codePoint); 246 else if (escapeNonAscii && codePoint > 0x7E) 247 WriteDoubleQuoteEscapeSequence(out, codePoint); 248 else 249 WriteCodePoint(out, codePoint); 250 } 251 out << "\""; 252 return true; 253 } 254 WriteLiteralString(ostream & out,const std::string & str,int indent)255 bool WriteLiteralString(ostream& out, const std::string& str, int indent) 256 { 257 out << "|\n"; 258 out << IndentTo(indent); 259 int codePoint; 260 for(std::string::const_iterator i = str.begin(); 261 GetNextCodePointAndAdvance(codePoint, i, str.end()); 262 ) 263 { 264 if (codePoint == '\n') 265 out << "\n" << IndentTo(indent); 266 else 267 WriteCodePoint(out, codePoint); 268 } 269 return true; 270 } 271 WriteChar(ostream & out,char ch)272 bool WriteChar(ostream& out, char ch) 273 { 274 if(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) 275 out << ch; 276 else if((0x20 <= ch && ch <= 0x7e) || ch == ' ') 277 out << "\"" << ch << "\""; 278 else if(ch == '\t') 279 out << "\"\\t\""; 280 else if(ch == '\n') 281 out << "\"\\n\""; 282 else if(ch == '\b') 283 out << "\"\\b\""; 284 else { 285 out << "\""; 286 WriteDoubleQuoteEscapeSequence(out, ch); 287 out << "\""; 288 } 289 return true; 290 } 291 WriteComment(ostream & out,const std::string & str,int postCommentIndent)292 bool WriteComment(ostream& out, const std::string& str, int postCommentIndent) 293 { 294 const unsigned curIndent = out.col(); 295 out << "#" << Indentation(postCommentIndent); 296 int codePoint; 297 for(std::string::const_iterator i = str.begin(); 298 GetNextCodePointAndAdvance(codePoint, i, str.end()); 299 ) 300 { 301 if(codePoint == '\n') 302 out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent); 303 else 304 WriteCodePoint(out, codePoint); 305 } 306 return true; 307 } 308 WriteAlias(ostream & out,const std::string & str)309 bool WriteAlias(ostream& out, const std::string& str) 310 { 311 out << "*"; 312 return WriteAliasName(out, str); 313 } 314 WriteAnchor(ostream & out,const std::string & str)315 bool WriteAnchor(ostream& out, const std::string& str) 316 { 317 out << "&"; 318 return WriteAliasName(out, str); 319 } 320 WriteTag(ostream & out,const std::string & str,bool verbatim)321 bool WriteTag(ostream& out, const std::string& str, bool verbatim) 322 { 323 out << (verbatim ? "!<" : "!"); 324 StringCharSource buffer(str.c_str(), str.size()); 325 const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag(); 326 while(buffer) { 327 int n = reValid.Match(buffer); 328 if(n <= 0) 329 return false; 330 331 while(--n >= 0) { 332 out << buffer[0]; 333 ++buffer; 334 } 335 } 336 if (verbatim) 337 out << ">"; 338 return true; 339 } 340 WriteTagWithPrefix(ostream & out,const std::string & prefix,const std::string & tag)341 bool WriteTagWithPrefix(ostream& out, const std::string& prefix, const std::string& tag) 342 { 343 out << "!"; 344 StringCharSource prefixBuffer(prefix.c_str(), prefix.size()); 345 while(prefixBuffer) { 346 int n = Exp::URI().Match(prefixBuffer); 347 if(n <= 0) 348 return false; 349 350 while(--n >= 0) { 351 out << prefixBuffer[0]; 352 ++prefixBuffer; 353 } 354 } 355 356 out << "!"; 357 StringCharSource tagBuffer(tag.c_str(), tag.size()); 358 while(tagBuffer) { 359 int n = Exp::Tag().Match(tagBuffer); 360 if(n <= 0) 361 return false; 362 363 while(--n >= 0) { 364 out << tagBuffer[0]; 365 ++tagBuffer; 366 } 367 } 368 return true; 369 } 370 WriteBinary(ostream & out,const Binary & binary)371 bool WriteBinary(ostream& out, const Binary& binary) 372 { 373 WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()), false); 374 return true; 375 } 376 } 377 } 378 379