1 #include "emitterutils.h"
2 #include "exp.h"
3 #include "indentation.h"
4 #include "yaml-cpp03/binary.h"
5 #include "yaml-cpp03/exceptions.h"
6 #include "stringsource.h"
7 #include <sstream>
8 #include <iomanip>
9 
10 namespace YAML
11 {
12 	namespace Utils
13 	{
14 		namespace {
15 			enum {REPLACEMENT_CHARACTER = 0xFFFD};
16 
IsAnchorChar(int ch)17 			bool IsAnchorChar(int ch) { // test for ns-anchor-char
18 				switch (ch) {
19 					case ',': case '[': case ']': case '{': case '}': // c-flow-indicator
20 					case ' ': case '\t': // s-white
21 					case 0xFEFF: // c-byte-order-mark
22 					case 0xA: case 0xD: // b-char
23 						return false;
24 					case 0x85:
25 						return true;
26 				}
27 
28 				if (ch < 0x20)
29 					return false;
30 
31 				if (ch < 0x7E)
32 					return true;
33 
34 				if (ch < 0xA0)
35 					return false;
36 				if (ch >= 0xD800 && ch <= 0xDFFF)
37 					return false;
38 				if ((ch & 0xFFFE) == 0xFFFE)
39 					return false;
40 				if ((ch >= 0xFDD0) && (ch <= 0xFDEF))
41 					return false;
42 				if (ch > 0x10FFFF)
43 					return false;
44 
45 				return true;
46 			}
47 
Utf8BytesIndicated(char ch)48 			int Utf8BytesIndicated(char ch) {
49 				int byteVal = static_cast<unsigned char>(ch);
50 				switch (byteVal >> 4) {
51 					case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
52 						return 1;
53 					case 12: case 13:
54 						return 2;
55 					case 14:
56 						return 3;
57 					case 15:
58 						return 4;
59 					default:
60 					  return -1;
61 				}
62 			}
63 
IsTrailingByte(char ch)64 			bool IsTrailingByte(char ch) {
65 				return (ch & 0xC0) == 0x80;
66 			}
67 
GetNextCodePointAndAdvance(int & codePoint,std::string::const_iterator & first,std::string::const_iterator last)68 			bool GetNextCodePointAndAdvance(int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) {
69 				if (first == last)
70 					return false;
71 
72 				int nBytes = Utf8BytesIndicated(*first);
73 				if (nBytes < 1) {
74 					// Bad lead byte
75 					++first;
76 					codePoint = REPLACEMENT_CHARACTER;
77 					return true;
78 				}
79 
80 				if (nBytes == 1) {
81 					codePoint = *first++;
82 					return true;
83 				}
84 
85 				// Gather bits from trailing bytes
86 				codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
87 				++first;
88 				--nBytes;
89 				for (; nBytes > 0; ++first, --nBytes) {
90 					if ((first == last) || !IsTrailingByte(*first)) {
91 						codePoint = REPLACEMENT_CHARACTER;
92 						break;
93 					}
94 					codePoint <<= 6;
95 					codePoint |= *first & 0x3F;
96 				}
97 
98 				// Check for illegal code points
99 				if (codePoint > 0x10FFFF)
100 					codePoint = REPLACEMENT_CHARACTER;
101 				else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
102 					codePoint = REPLACEMENT_CHARACTER;
103 				else if ((codePoint & 0xFFFE) == 0xFFFE)
104 					codePoint = REPLACEMENT_CHARACTER;
105 				else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
106 					codePoint = REPLACEMENT_CHARACTER;
107 				return true;
108 			}
109 
WriteCodePoint(ostream & out,int codePoint)110 			void WriteCodePoint(ostream& out, int codePoint) {
111 				if (codePoint < 0 || codePoint > 0x10FFFF) {
112 					codePoint = REPLACEMENT_CHARACTER;
113 				}
114 				if (codePoint < 0x7F) {
115 					out << static_cast<char>(codePoint);
116 				} else if (codePoint < 0x7FF) {
117 					out << static_cast<char>(0xC0 | (codePoint >> 6))
118 					    << static_cast<char>(0x80 | (codePoint & 0x3F));
119 				} else if (codePoint < 0xFFFF) {
120 					out << static_cast<char>(0xE0 | (codePoint >> 12))
121 					    << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
122 					    << static_cast<char>(0x80 | (codePoint & 0x3F));
123 				} else {
124 					out << static_cast<char>(0xF0 | (codePoint >> 18))
125 					    << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
126 					    << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
127 					    << static_cast<char>(0x80 | (codePoint & 0x3F));
128 				}
129 			}
130 
IsValidPlainScalar(const std::string & str,bool inFlow,bool allowOnlyAscii)131 			bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) {
132 				if(str.empty())
133 					return false;
134 
135 				// first check the start
136 				const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar());
137 				if(!start.Matches(str))
138 					return false;
139 
140 				// and check the end for plain whitespace (which can't be faithfully kept in a plain scalar)
141 				if(!str.empty() && *str.rbegin() == ' ')
142 					return false;
143 
144 				// then check until something is disallowed
145 				const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar())
146 				                          || (Exp::BlankOrBreak() + Exp::Comment())
147 				                          || Exp::NotPrintable()
148 				                          || Exp::Utf8_ByteOrderMark()
149 				                          || Exp::Break()
150 				                          || Exp::Tab();
151 				StringCharSource buffer(str.c_str(), str.size());
152 				while(buffer) {
153 					if(disallowed.Matches(buffer))
154 						return false;
155 					if(allowOnlyAscii && (0x7F < static_cast<unsigned char>(buffer[0])))
156 						return false;
157 					++buffer;
158 				}
159 
160 				return true;
161 			}
162 
WriteDoubleQuoteEscapeSequence(ostream & out,int codePoint)163 			void WriteDoubleQuoteEscapeSequence(ostream& out, int codePoint) {
164 				static const char hexDigits[] = "0123456789abcdef";
165 
166 				char escSeq[] = "\\U00000000";
167 				int digits = 8;
168 				if (codePoint < 0xFF) {
169 					escSeq[1] = 'x';
170 					digits = 2;
171 				} else if (codePoint < 0xFFFF) {
172 					escSeq[1] = 'u';
173 					digits = 4;
174 				}
175 
176 				// Write digits into the escape sequence
177 				int i = 2;
178 				for (; digits > 0; --digits, ++i) {
179 					escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
180 				}
181 
182 				escSeq[i] = 0; // terminate with NUL character
183 				out << escSeq;
184 			}
185 
WriteAliasName(ostream & out,const std::string & str)186 			bool WriteAliasName(ostream& out, const std::string& str) {
187 				int codePoint;
188 				for(std::string::const_iterator i = str.begin();
189 					GetNextCodePointAndAdvance(codePoint, i, str.end());
190 					)
191 				{
192 					if (!IsAnchorChar(codePoint))
193 						return false;
194 
195 					WriteCodePoint(out, codePoint);
196 				}
197 				return true;
198 			}
199 		}
200 
WriteString(ostream & out,const std::string & str,bool inFlow,bool escapeNonAscii)201 		bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii)
202 		{
203 			if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) {
204 				out << str;
205 				return true;
206 			} else
207 				return WriteDoubleQuotedString(out, str, escapeNonAscii);
208 		}
209 
WriteSingleQuotedString(ostream & out,const std::string & str)210 		bool WriteSingleQuotedString(ostream& out, const std::string& str)
211 		{
212 			out << "'";
213 			int codePoint;
214 			for(std::string::const_iterator i = str.begin();
215 				GetNextCodePointAndAdvance(codePoint, i, str.end());
216 				)
217 			{
218 				if (codePoint == '\n')
219 					return false;  // We can't handle a new line and the attendant indentation yet
220 
221 				if (codePoint == '\'')
222 					out << "''";
223 				else
224 					WriteCodePoint(out, codePoint);
225 			}
226 			out << "'";
227 			return true;
228 		}
229 
WriteDoubleQuotedString(ostream & out,const std::string & str,bool escapeNonAscii)230 		bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii)
231 		{
232 			out << "\"";
233 			int codePoint;
234 			for(std::string::const_iterator i = str.begin();
235 				GetNextCodePointAndAdvance(codePoint, i, str.end());
236 				)
237 			{
238 				if (codePoint == '\"')
239 					out << "\\\"";
240 				else if (codePoint == '\\')
241 					out << "\\\\";
242 				else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0)) // Control characters and non-breaking space
243 					WriteDoubleQuoteEscapeSequence(out, codePoint);
244 				else if (codePoint == 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2)
245 					WriteDoubleQuoteEscapeSequence(out, codePoint);
246 				else if (escapeNonAscii && codePoint > 0x7E)
247 					WriteDoubleQuoteEscapeSequence(out, codePoint);
248 				else
249 					WriteCodePoint(out, codePoint);
250 			}
251 			out << "\"";
252 			return true;
253 		}
254 
WriteLiteralString(ostream & out,const std::string & str,int indent)255 		bool WriteLiteralString(ostream& out, const std::string& str, int indent)
256 		{
257 			out << "|\n";
258 			out << IndentTo(indent);
259 			int codePoint;
260 			for(std::string::const_iterator i = str.begin();
261 				GetNextCodePointAndAdvance(codePoint, i, str.end());
262 				)
263 			{
264 				if (codePoint == '\n')
265 				  out << "\n" << IndentTo(indent);
266 				else
267 				  WriteCodePoint(out, codePoint);
268 			}
269 			return true;
270 		}
271 
WriteChar(ostream & out,char ch)272 		bool WriteChar(ostream& out, char ch)
273 		{
274 			if(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
275 				out << ch;
276 			else if((0x20 <= ch && ch <= 0x7e) || ch == ' ')
277 				out << "\"" << ch << "\"";
278 			else if(ch == '\t')
279 				out << "\"\\t\"";
280 			else if(ch == '\n')
281 				out << "\"\\n\"";
282 			else if(ch == '\b')
283 				out << "\"\\b\"";
284 			else {
285 				out << "\"";
286 				WriteDoubleQuoteEscapeSequence(out, ch);
287 				out << "\"";
288 			}
289 			return true;
290 		}
291 
WriteComment(ostream & out,const std::string & str,int postCommentIndent)292 		bool WriteComment(ostream& out, const std::string& str, int postCommentIndent)
293 		{
294 			const unsigned curIndent = out.col();
295 			out << "#" << Indentation(postCommentIndent);
296 			int codePoint;
297 			for(std::string::const_iterator i = str.begin();
298 				GetNextCodePointAndAdvance(codePoint, i, str.end());
299 				)
300 			{
301 				if(codePoint == '\n')
302 					out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent);
303 				else
304 					WriteCodePoint(out, codePoint);
305 			}
306 			return true;
307 		}
308 
WriteAlias(ostream & out,const std::string & str)309 		bool WriteAlias(ostream& out, const std::string& str)
310 		{
311 			out << "*";
312 			return WriteAliasName(out, str);
313 		}
314 
WriteAnchor(ostream & out,const std::string & str)315 		bool WriteAnchor(ostream& out, const std::string& str)
316 		{
317 			out << "&";
318 			return WriteAliasName(out, str);
319 		}
320 
WriteTag(ostream & out,const std::string & str,bool verbatim)321 		bool WriteTag(ostream& out, const std::string& str, bool verbatim)
322 		{
323 			out << (verbatim ? "!<" : "!");
324 			StringCharSource buffer(str.c_str(), str.size());
325 			const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag();
326 			while(buffer) {
327 				int n = reValid.Match(buffer);
328 				if(n <= 0)
329 					return false;
330 
331 				while(--n >= 0) {
332 					out << buffer[0];
333 					++buffer;
334 				}
335 			}
336 			if (verbatim)
337 				out << ">";
338 			return true;
339 		}
340 
WriteTagWithPrefix(ostream & out,const std::string & prefix,const std::string & tag)341 		bool WriteTagWithPrefix(ostream& out, const std::string& prefix, const std::string& tag)
342 		{
343 			out << "!";
344 			StringCharSource prefixBuffer(prefix.c_str(), prefix.size());
345 			while(prefixBuffer) {
346 				int n = Exp::URI().Match(prefixBuffer);
347 				if(n <= 0)
348 					return false;
349 
350 				while(--n >= 0) {
351 					out << prefixBuffer[0];
352 					++prefixBuffer;
353 				}
354 			}
355 
356 			out << "!";
357 			StringCharSource tagBuffer(tag.c_str(), tag.size());
358 			while(tagBuffer) {
359 				int n = Exp::Tag().Match(tagBuffer);
360 				if(n <= 0)
361 					return false;
362 
363 				while(--n >= 0) {
364 					out << tagBuffer[0];
365 					++tagBuffer;
366 				}
367 			}
368 			return true;
369 		}
370 
WriteBinary(ostream & out,const Binary & binary)371 		bool WriteBinary(ostream& out, const Binary& binary)
372 		{
373             WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()), false);
374             return true;
375 		}
376 	}
377 }
378 
379