1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /* A JSON pretty-printer class. */ 8 9 // A typical JSON-writing library requires you to first build up a data 10 // structure that represents a JSON object and then serialize it (to file, or 11 // somewhere else). This approach makes for a clean API, but building the data 12 // structure takes up memory. Sometimes that isn't desirable, such as when the 13 // JSON data is produced for memory reporting. 14 // 15 // The JSONWriter class instead allows JSON data to be written out 16 // incrementally without building up large data structures. 17 // 18 // The API is slightly uglier than you would see in a typical JSON-writing 19 // library, but still fairly easy to use. It's possible to generate invalid 20 // JSON with JSONWriter, but typically the most basic testing will identify any 21 // such problems. 22 // 23 // Similarly, there are no RAII facilities for automatically closing objects 24 // and arrays. These would be nice if you are generating all your code within 25 // nested functions, but in other cases you'd have to maintain an explicit 26 // stack of RAII objects and manually unwind it, which is no better than just 27 // calling "end" functions. Furthermore, the consequences of forgetting to 28 // close an object or array are obvious and, again, will be identified via 29 // basic testing, unlike other cases where RAII is typically used (e.g. smart 30 // pointers) and the consequences of defects are more subtle. 31 // 32 // Importantly, the class does solve the two hard problems of JSON 33 // pretty-printing, which are (a) correctly escaping strings, and (b) adding 34 // appropriate indentation and commas between items. 35 // 36 // By default, every property is placed on its own line. However, it is 37 // possible to request that objects and arrays be placed entirely on a single 38 // line, which can reduce output size significantly in some cases. 39 // 40 // Strings used (for property names and string property values) are |const 41 // char*| throughout, and can be ASCII or UTF-8. 42 // 43 // EXAMPLE 44 // ------- 45 // Assume that |MyWriteFunc| is a class that implements |JSONWriteFunc|. The 46 // following code: 47 // 48 // JSONWriter w(MakeUnique<MyWriteFunc>()); 49 // w.Start(); 50 // { 51 // w.NullProperty("null"); 52 // w.BoolProperty("bool", true); 53 // w.IntProperty("int", 1); 54 // w.StartArrayProperty("array"); 55 // { 56 // w.StringElement("string"); 57 // w.StartObjectElement(); 58 // { 59 // w.DoubleProperty("double", 3.4); 60 // w.StartArrayProperty("single-line array", w.SingleLineStyle); 61 // { 62 // w.IntElement(1); 63 // w.StartObjectElement(); // SingleLineStyle is inherited from 64 // w.EndObjectElement(); // above for this collection 65 // } 66 // w.EndArray(); 67 // } 68 // w.EndObjectElement(); 69 // } 70 // w.EndArrayProperty(); 71 // } 72 // w.End(); 73 // 74 // will produce pretty-printed output for the following JSON object: 75 // 76 // { 77 // "null": null, 78 // "bool": true, 79 // "int": 1, 80 // "array": [ 81 // "string", 82 // { 83 // "double": 3.4, 84 // "single-line array": [1, {}] 85 // } 86 // ] 87 // } 88 // 89 // The nesting in the example code is obviously optional, but can aid 90 // readability. 91 92 #ifndef mozilla_JSONWriter_h 93 #define mozilla_JSONWriter_h 94 95 #include "double-conversion/double-conversion.h" 96 #include "mozilla/Assertions.h" 97 #include "mozilla/IntegerPrintfMacros.h" 98 #include "mozilla/PodOperations.h" 99 #include "mozilla/Span.h" 100 #include "mozilla/Sprintf.h" 101 #include "mozilla/UniquePtr.h" 102 #include "mozilla/Vector.h" 103 104 #include <utility> 105 106 namespace mozilla { 107 108 // A quasi-functor for JSONWriter. We don't use a true functor because that 109 // requires templatizing JSONWriter, and the templatization seeps to lots of 110 // places we don't want it to. 111 class JSONWriteFunc { 112 public: 113 virtual void Write(const Span<const char>& aStr) = 0; 114 virtual ~JSONWriteFunc() = default; 115 }; 116 117 // Ideally this would be within |EscapedString| but when compiling with GCC 118 // on Linux that caused link errors, whereas this formulation didn't. 119 namespace detail { 120 extern MFBT_DATA const char gTwoCharEscapes[256]; 121 } // namespace detail 122 123 class JSONWriter { 124 // From http://www.ietf.org/rfc/rfc4627.txt: 125 // 126 // "All Unicode characters may be placed within the quotation marks except 127 // for the characters that must be escaped: quotation mark, reverse 128 // solidus, and the control characters (U+0000 through U+001F)." 129 // 130 // This implementation uses two-char escape sequences where possible, namely: 131 // 132 // \", \\, \b, \f, \n, \r, \t 133 // 134 // All control characters not in the above list are represented with a 135 // six-char escape sequence, e.g. '\u000b' (a.k.a. '\v'). 136 // 137 class EscapedString { 138 // `mStringSpan` initially points at the user-provided string. If that 139 // string needs escaping, `mStringSpan` will point at `mOwnedStr` below. 140 Span<const char> mStringSpan; 141 // String storage in case escaping is actually needed, null otherwise. 142 UniquePtr<char[]> mOwnedStr; 143 CheckInvariants()144 void CheckInvariants() const { 145 // Either there was no escaping so `mOwnedStr` is null, or escaping was 146 // needed, in which case `mStringSpan` should point at `mOwnedStr`. 147 MOZ_ASSERT(!mOwnedStr || mStringSpan.data() == mOwnedStr.get()); 148 } 149 hexDigitToAsciiChar(uint8_t u)150 static char hexDigitToAsciiChar(uint8_t u) { 151 u = u & 0xf; 152 return u < 10 ? '0' + u : 'a' + (u - 10); 153 } 154 155 public: EscapedString(const Span<const char> & aStr)156 explicit EscapedString(const Span<const char>& aStr) : mStringSpan(aStr) { 157 // First, see if we need to modify the string. 158 size_t nExtra = 0; 159 for (const char& c : aStr) { 160 // ensure it can't be interpreted as negative 161 uint8_t u = static_cast<uint8_t>(c); 162 if (u == 0) { 163 // Null terminator within the span, assume we may have been given a 164 // span to a buffer that contains a null-terminated string in it. 165 // We need to truncate the Span so that it doesn't include this null 166 // terminator and anything past it; Either we will return it as-is, or 167 // processing should stop there. 168 mStringSpan = mStringSpan.First(&c - mStringSpan.data()); 169 break; 170 } 171 if (detail::gTwoCharEscapes[u]) { 172 nExtra += 1; 173 } else if (u <= 0x1f) { 174 nExtra += 5; 175 } 176 } 177 178 // Note: Don't use `aStr` anymore, as it could contain a null terminator; 179 // use the correctly-sized `mStringSpan` instead. 180 181 if (nExtra == 0) { 182 // No escapes needed. mStringSpan already points at the original string. 183 CheckInvariants(); 184 return; 185 } 186 187 // Escapes are needed. We'll create a new string. 188 mOwnedStr = MakeUnique<char[]>(mStringSpan.Length() + nExtra); 189 190 size_t i = 0; 191 for (const char c : mStringSpan) { 192 // ensure it can't be interpreted as negative 193 uint8_t u = static_cast<uint8_t>(c); 194 MOZ_ASSERT(u != 0, "Null terminator should have been handled above"); 195 if (detail::gTwoCharEscapes[u]) { 196 mOwnedStr[i++] = '\\'; 197 mOwnedStr[i++] = detail::gTwoCharEscapes[u]; 198 } else if (u <= 0x1f) { 199 mOwnedStr[i++] = '\\'; 200 mOwnedStr[i++] = 'u'; 201 mOwnedStr[i++] = '0'; 202 mOwnedStr[i++] = '0'; 203 mOwnedStr[i++] = hexDigitToAsciiChar((u & 0x00f0) >> 4); 204 mOwnedStr[i++] = hexDigitToAsciiChar(u & 0x000f); 205 } else { 206 mOwnedStr[i++] = u; 207 } 208 } 209 MOZ_ASSERT(i == mStringSpan.Length() + nExtra); 210 mStringSpan = Span<const char>(mOwnedStr.get(), i); 211 CheckInvariants(); 212 } 213 214 explicit EscapedString(const char* aStr) = delete; 215 SpanRef()216 const Span<const char>& SpanRef() const { return mStringSpan; } 217 }; 218 219 public: 220 // Collections (objects and arrays) are printed in a multi-line style by 221 // default. This can be changed to a single-line style if SingleLineStyle is 222 // specified. If a collection is printed in single-line style, every nested 223 // collection within it is also printed in single-line style, even if 224 // multi-line style is requested. 225 enum CollectionStyle { 226 MultiLineStyle, // the default 227 SingleLineStyle 228 }; 229 230 protected: 231 static constexpr Span<const char> scArrayBeginString = MakeStringSpan("["); 232 static constexpr Span<const char> scArrayEndString = MakeStringSpan("]"); 233 static constexpr Span<const char> scCommaString = MakeStringSpan(","); 234 static constexpr Span<const char> scEmptyString = MakeStringSpan(""); 235 static constexpr Span<const char> scFalseString = MakeStringSpan("false"); 236 static constexpr Span<const char> scNewLineString = MakeStringSpan("\n"); 237 static constexpr Span<const char> scNullString = MakeStringSpan("null"); 238 static constexpr Span<const char> scObjectBeginString = MakeStringSpan("{"); 239 static constexpr Span<const char> scObjectEndString = MakeStringSpan("}"); 240 static constexpr Span<const char> scPropertyBeginString = 241 MakeStringSpan("\""); 242 static constexpr Span<const char> scPropertyEndString = 243 MakeStringSpan("\": "); 244 static constexpr Span<const char> scQuoteString = MakeStringSpan("\""); 245 static constexpr Span<const char> scSpaceString = MakeStringSpan(" "); 246 static constexpr Span<const char> scTopObjectBeginString = 247 MakeStringSpan("{"); 248 static constexpr Span<const char> scTopObjectEndString = 249 MakeStringSpan("}\n"); 250 static constexpr Span<const char> scTrueString = MakeStringSpan("true"); 251 252 const UniquePtr<JSONWriteFunc> mWriter; 253 Vector<bool, 8> mNeedComma; // do we need a comma at depth N? 254 Vector<bool, 8> mNeedNewlines; // do we need newlines at depth N? 255 size_t mDepth; // the current nesting depth 256 Indent()257 void Indent() { 258 for (size_t i = 0; i < mDepth; i++) { 259 mWriter->Write(scSpaceString); 260 } 261 } 262 263 // Adds whatever is necessary (maybe a comma, and then a newline and 264 // whitespace) to separate an item (property or element) from what's come 265 // before. Separator()266 void Separator() { 267 if (mNeedComma[mDepth]) { 268 mWriter->Write(scCommaString); 269 } 270 if (mDepth > 0 && mNeedNewlines[mDepth]) { 271 mWriter->Write(scNewLineString); 272 Indent(); 273 } else if (mNeedComma[mDepth]) { 274 mWriter->Write(scSpaceString); 275 } 276 } 277 PropertyNameAndColon(const Span<const char> & aName)278 void PropertyNameAndColon(const Span<const char>& aName) { 279 mWriter->Write(scPropertyBeginString); 280 mWriter->Write(EscapedString(aName).SpanRef()); 281 mWriter->Write(scPropertyEndString); 282 } 283 Scalar(const Span<const char> & aMaybePropertyName,const Span<const char> & aStringValue)284 void Scalar(const Span<const char>& aMaybePropertyName, 285 const Span<const char>& aStringValue) { 286 Separator(); 287 if (!aMaybePropertyName.empty()) { 288 PropertyNameAndColon(aMaybePropertyName); 289 } 290 mWriter->Write(aStringValue); 291 mNeedComma[mDepth] = true; 292 } 293 QuotedScalar(const Span<const char> & aMaybePropertyName,const Span<const char> & aStringValue)294 void QuotedScalar(const Span<const char>& aMaybePropertyName, 295 const Span<const char>& aStringValue) { 296 Separator(); 297 if (!aMaybePropertyName.empty()) { 298 PropertyNameAndColon(aMaybePropertyName); 299 } 300 mWriter->Write(scQuoteString); 301 mWriter->Write(aStringValue); 302 mWriter->Write(scQuoteString); 303 mNeedComma[mDepth] = true; 304 } 305 NewVectorEntries()306 void NewVectorEntries() { 307 // If these tiny allocations OOM we might as well just crash because we 308 // must be in serious memory trouble. 309 MOZ_RELEASE_ASSERT(mNeedComma.resizeUninitialized(mDepth + 1)); 310 MOZ_RELEASE_ASSERT(mNeedNewlines.resizeUninitialized(mDepth + 1)); 311 mNeedComma[mDepth] = false; 312 mNeedNewlines[mDepth] = true; 313 } 314 315 void StartCollection(const Span<const char>& aMaybePropertyName, 316 const Span<const char>& aStartChar, 317 CollectionStyle aStyle = MultiLineStyle) { 318 Separator(); 319 if (!aMaybePropertyName.empty()) { 320 PropertyNameAndColon(aMaybePropertyName); 321 } 322 mWriter->Write(aStartChar); 323 mNeedComma[mDepth] = true; 324 mDepth++; 325 NewVectorEntries(); 326 mNeedNewlines[mDepth] = 327 mNeedNewlines[mDepth - 1] && aStyle == MultiLineStyle; 328 } 329 330 // Adds the whitespace and closing char necessary to end a collection. EndCollection(const Span<const char> & aEndChar)331 void EndCollection(const Span<const char>& aEndChar) { 332 MOZ_ASSERT(mDepth > 0); 333 if (mNeedNewlines[mDepth]) { 334 mWriter->Write(scNewLineString); 335 mDepth--; 336 Indent(); 337 } else { 338 mDepth--; 339 } 340 mWriter->Write(aEndChar); 341 } 342 343 public: JSONWriter(UniquePtr<JSONWriteFunc> aWriter)344 explicit JSONWriter(UniquePtr<JSONWriteFunc> aWriter) 345 : mWriter(std::move(aWriter)), mNeedComma(), mNeedNewlines(), mDepth(0) { 346 NewVectorEntries(); 347 } 348 349 // Returns the JSONWriteFunc passed in at creation, for temporary use. The 350 // JSONWriter object still owns the JSONWriteFunc. WriteFunc()351 JSONWriteFunc* WriteFunc() const { return mWriter.get(); } 352 353 // For all the following functions, the "Prints:" comment indicates what the 354 // basic output looks like. However, it doesn't indicate the whitespace and 355 // trailing commas, which are automatically added as required. 356 // 357 // All property names and string properties are escaped as necessary. 358 359 // Prints: { 360 void Start(CollectionStyle aStyle = MultiLineStyle) { 361 StartCollection(scEmptyString, scTopObjectBeginString, aStyle); 362 } 363 364 // Prints: } and final newline. End()365 void End() { EndCollection(scTopObjectEndString); } 366 367 // Prints: "<aName>": null NullProperty(const Span<const char> & aName)368 void NullProperty(const Span<const char>& aName) { 369 Scalar(aName, scNullString); 370 } 371 372 template <size_t N> NullProperty(const char (& aName)[N])373 void NullProperty(const char (&aName)[N]) { 374 // Keep null terminator from literal strings, will be removed by 375 // EscapedString. This way C buffer arrays can be used as well. 376 NullProperty(Span<const char>(aName, N)); 377 } 378 379 // Prints: null NullElement()380 void NullElement() { NullProperty(scEmptyString); } 381 382 // Prints: "<aName>": <aBool> BoolProperty(const Span<const char> & aName,bool aBool)383 void BoolProperty(const Span<const char>& aName, bool aBool) { 384 Scalar(aName, aBool ? scTrueString : scFalseString); 385 } 386 387 template <size_t N> BoolProperty(const char (& aName)[N],bool aBool)388 void BoolProperty(const char (&aName)[N], bool aBool) { 389 // Keep null terminator from literal strings, will be removed by 390 // EscapedString. This way C buffer arrays can be used as well. 391 BoolProperty(Span<const char>(aName, N), aBool); 392 } 393 394 // Prints: <aBool> BoolElement(bool aBool)395 void BoolElement(bool aBool) { BoolProperty(scEmptyString, aBool); } 396 397 // Prints: "<aName>": <aInt> IntProperty(const Span<const char> & aName,int64_t aInt)398 void IntProperty(const Span<const char>& aName, int64_t aInt) { 399 char buf[64]; 400 int len = SprintfLiteral(buf, "%" PRId64, aInt); 401 MOZ_RELEASE_ASSERT(len > 0); 402 Scalar(aName, Span<const char>(buf, size_t(len))); 403 } 404 405 template <size_t N> IntProperty(const char (& aName)[N],int64_t aInt)406 void IntProperty(const char (&aName)[N], int64_t aInt) { 407 // Keep null terminator from literal strings, will be removed by 408 // EscapedString. This way C buffer arrays can be used as well. 409 IntProperty(Span<const char>(aName, N), aInt); 410 } 411 412 // Prints: <aInt> IntElement(int64_t aInt)413 void IntElement(int64_t aInt) { IntProperty(scEmptyString, aInt); } 414 415 // Prints: "<aName>": <aDouble> DoubleProperty(const Span<const char> & aName,double aDouble)416 void DoubleProperty(const Span<const char>& aName, double aDouble) { 417 static const size_t buflen = 64; 418 char buf[buflen]; 419 const double_conversion::DoubleToStringConverter& converter = 420 double_conversion::DoubleToStringConverter::EcmaScriptConverter(); 421 double_conversion::StringBuilder builder(buf, buflen); 422 converter.ToShortest(aDouble, &builder); 423 // TODO: The builder should know the length?! 424 Scalar(aName, MakeStringSpan(builder.Finalize())); 425 } 426 427 template <size_t N> DoubleProperty(const char (& aName)[N],double aDouble)428 void DoubleProperty(const char (&aName)[N], double aDouble) { 429 // Keep null terminator from literal strings, will be removed by 430 // EscapedString. This way C buffer arrays can be used as well. 431 DoubleProperty(Span<const char>(aName, N), aDouble); 432 } 433 434 // Prints: <aDouble> DoubleElement(double aDouble)435 void DoubleElement(double aDouble) { DoubleProperty(scEmptyString, aDouble); } 436 437 // Prints: "<aName>": "<aStr>" StringProperty(const Span<const char> & aName,const Span<const char> & aStr)438 void StringProperty(const Span<const char>& aName, 439 const Span<const char>& aStr) { 440 QuotedScalar(aName, EscapedString(aStr).SpanRef()); 441 } 442 443 template <size_t NN> StringProperty(const char (& aName)[NN],const Span<const char> & aStr)444 void StringProperty(const char (&aName)[NN], const Span<const char>& aStr) { 445 // Keep null terminator from literal strings, will be removed by 446 // EscapedString. This way C buffer arrays can be used as well. 447 StringProperty(Span<const char>(aName, NN), aStr); 448 } 449 450 template <size_t SN> StringProperty(const Span<const char> & aName,const char (& aStr)[SN])451 void StringProperty(const Span<const char>& aName, const char (&aStr)[SN]) { 452 // Keep null terminator from literal strings, will be removed by 453 // EscapedString. This way C buffer arrays can be used as well. 454 StringProperty(aName, Span<const char>(aStr, SN)); 455 } 456 457 template <size_t NN, size_t SN> StringProperty(const char (& aName)[NN],const char (& aStr)[SN])458 void StringProperty(const char (&aName)[NN], const char (&aStr)[SN]) { 459 // Keep null terminators from literal strings, will be removed by 460 // EscapedString. This way C buffer arrays can be used as well. 461 StringProperty(Span<const char>(aName, NN), Span<const char>(aStr, SN)); 462 } 463 464 // Prints: "<aStr>" StringElement(const Span<const char> & aStr)465 void StringElement(const Span<const char>& aStr) { 466 StringProperty(scEmptyString, aStr); 467 } 468 469 template <size_t N> StringElement(const char (& aName)[N])470 void StringElement(const char (&aName)[N]) { 471 // Keep null terminator from literal strings, will be removed by 472 // EscapedString. This way C buffer arrays can be used as well. 473 StringElement(Span<const char>(aName, N)); 474 } 475 476 // Prints: "<aName>": [ 477 void StartArrayProperty(const Span<const char>& aName, 478 CollectionStyle aStyle = MultiLineStyle) { 479 StartCollection(aName, scArrayBeginString, aStyle); 480 } 481 482 template <size_t N> 483 void StartArrayProperty(const char (&aName)[N], 484 CollectionStyle aStyle = MultiLineStyle) { 485 // Keep null terminator from literal strings, will be removed by 486 // EscapedString. This way C buffer arrays can be used as well. 487 StartArrayProperty(Span<const char>(aName, N), aStyle); 488 } 489 490 // Prints: [ 491 void StartArrayElement(CollectionStyle aStyle = MultiLineStyle) { 492 StartArrayProperty(scEmptyString, aStyle); 493 } 494 495 // Prints: ] EndArray()496 void EndArray() { EndCollection(scArrayEndString); } 497 498 // Prints: "<aName>": { 499 void StartObjectProperty(const Span<const char>& aName, 500 CollectionStyle aStyle = MultiLineStyle) { 501 StartCollection(aName, scObjectBeginString, aStyle); 502 } 503 504 template <size_t N> 505 void StartObjectProperty(const char (&aName)[N], 506 CollectionStyle aStyle = MultiLineStyle) { 507 // Keep null terminator from literal strings, will be removed by 508 // EscapedString. This way C buffer arrays can be used as well. 509 StartObjectProperty(Span<const char>(aName, N), aStyle); 510 } 511 512 // Prints: { 513 void StartObjectElement(CollectionStyle aStyle = MultiLineStyle) { 514 StartObjectProperty(scEmptyString, aStyle); 515 } 516 517 // Prints: } EndObject()518 void EndObject() { EndCollection(scObjectEndString); } 519 }; 520 521 } // namespace mozilla 522 523 #endif /* mozilla_JSONWriter_h */ 524