1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /* A JSON pretty-printer class. */ 8 9 // A typical JSON-writing library requires you to first build up a data 10 // structure that represents a JSON object and then serialize it (to file, or 11 // somewhere else). This approach makes for a clean API, but building the data 12 // structure takes up memory. Sometimes that isn't desirable, such as when the 13 // JSON data is produced for memory reporting. 14 // 15 // The JSONWriter class instead allows JSON data to be written out 16 // incrementally without building up large data structures. 17 // 18 // The API is slightly uglier than you would see in a typical JSON-writing 19 // library, but still fairly easy to use. It's possible to generate invalid 20 // JSON with JSONWriter, but typically the most basic testing will identify any 21 // such problems. 22 // 23 // Similarly, there are no RAII facilities for automatically closing objects 24 // and arrays. These would be nice if you are generating all your code within 25 // nested functions, but in other cases you'd have to maintain an explicit 26 // stack of RAII objects and manually unwind it, which is no better than just 27 // calling "end" functions. Furthermore, the consequences of forgetting to 28 // close an object or array are obvious and, again, will be identified via 29 // basic testing, unlike other cases where RAII is typically used (e.g. smart 30 // pointers) and the consequences of defects are more subtle. 31 // 32 // Importantly, the class does solve the two hard problems of JSON 33 // pretty-printing, which are (a) correctly escaping strings, and (b) adding 34 // appropriate indentation and commas between items. 35 // 36 // By default, every property is placed on its own line. However, it is 37 // possible to request that objects and arrays be placed entirely on a single 38 // line, which can reduce output size significantly in some cases. 39 // 40 // Strings used (for property names and string property values) are |const 41 // char*| throughout, and can be ASCII or UTF-8. 42 // 43 // EXAMPLE 44 // ------- 45 // Assume that |MyWriteFunc| is a class that implements |JSONWriteFunc|. The 46 // following code: 47 // 48 // JSONWriter w(MakeUnique<MyWriteFunc>()); 49 // w.Start(); 50 // { 51 // w.NullProperty("null"); 52 // w.BoolProperty("bool", true); 53 // w.IntProperty("int", 1); 54 // w.StartArrayProperty("array"); 55 // { 56 // w.StringElement("string"); 57 // w.StartObjectElement(); 58 // { 59 // w.DoubleProperty("double", 3.4); 60 // w.StartArrayProperty("single-line array", w.SingleLineStyle); 61 // { 62 // w.IntElement(1); 63 // w.StartObjectElement(); // SingleLineStyle is inherited from 64 // w.EndObjectElement(); // above for this collection 65 // } 66 // w.EndArray(); 67 // } 68 // w.EndObjectElement(); 69 // } 70 // w.EndArrayProperty(); 71 // } 72 // w.End(); 73 // 74 // will produce pretty-printed output for the following JSON object: 75 // 76 // { 77 // "null": null, 78 // "bool": true, 79 // "int": 1, 80 // "array": [ 81 // "string", 82 // { 83 // "double": 3.4, 84 // "single-line array": [1, {}] 85 // } 86 // ] 87 // } 88 // 89 // The nesting in the example code is obviously optional, but can aid 90 // readability. 91 92 #ifndef mozilla_JSONWriter_h 93 #define mozilla_JSONWriter_h 94 95 #include "mozilla/double-conversion.h" 96 #include "mozilla/IntegerPrintfMacros.h" 97 #include "mozilla/PodOperations.h" 98 #include "mozilla/Snprintf.h" 99 #include "mozilla/UniquePtr.h" 100 #include "mozilla/Vector.h" 101 102 #include <stdio.h> 103 104 namespace mozilla { 105 106 // A quasi-functor for JSONWriter. We don't use a true functor because that 107 // requires templatizing JSONWriter, and the templatization seeps to lots of 108 // places we don't want it to. 109 class JSONWriteFunc 110 { 111 public: 112 virtual void Write(const char* aStr) = 0; ~JSONWriteFunc()113 virtual ~JSONWriteFunc() {} 114 }; 115 116 // Ideally this would be within |EscapedString| but when compiling with GCC 117 // on Linux that caused link errors, whereas this formulation didn't. 118 namespace detail { 119 extern MFBT_DATA const char gTwoCharEscapes[256]; 120 } // namespace detail 121 122 class JSONWriter 123 { 124 // From http://www.ietf.org/rfc/rfc4627.txt: 125 // 126 // "All Unicode characters may be placed within the quotation marks except 127 // for the characters that must be escaped: quotation mark, reverse 128 // solidus, and the control characters (U+0000 through U+001F)." 129 // 130 // This implementation uses two-char escape sequences where possible, namely: 131 // 132 // \", \\, \b, \f, \n, \r, \t 133 // 134 // All control characters not in the above list are represented with a 135 // six-char escape sequence, e.g. '\u000b' (a.k.a. '\v'). 136 // 137 class EscapedString 138 { 139 // Only one of |mUnownedStr| and |mOwnedStr| are ever non-null. |mIsOwned| 140 // indicates which one is in use. They're not within a union because that 141 // wouldn't work with UniquePtr. 142 bool mIsOwned; 143 const char* mUnownedStr; 144 UniquePtr<char[]> mOwnedStr; 145 SanityCheck()146 void SanityCheck() const 147 { 148 MOZ_ASSERT_IF( mIsOwned, mOwnedStr.get() && !mUnownedStr); 149 MOZ_ASSERT_IF(!mIsOwned, !mOwnedStr.get() && mUnownedStr); 150 } 151 hexDigitToAsciiChar(uint8_t u)152 static char hexDigitToAsciiChar(uint8_t u) 153 { 154 u = u & 0xf; 155 return u < 10 ? '0' + u : 'a' + (u - 10); 156 } 157 158 public: EscapedString(const char * aStr)159 explicit EscapedString(const char* aStr) 160 : mUnownedStr(nullptr) 161 , mOwnedStr(nullptr) 162 { 163 const char* p; 164 165 // First, see if we need to modify the string. 166 size_t nExtra = 0; 167 p = aStr; 168 while (true) { 169 uint8_t u = *p; // ensure it can't be interpreted as negative 170 if (u == 0) { 171 break; 172 } 173 if (detail::gTwoCharEscapes[u]) { 174 nExtra += 1; 175 } else if (u <= 0x1f) { 176 nExtra += 5; 177 } 178 p++; 179 } 180 181 if (nExtra == 0) { 182 // No escapes needed. Easy. 183 mIsOwned = false; 184 mUnownedStr = aStr; 185 return; 186 } 187 188 // Escapes are needed. We'll create a new string. 189 mIsOwned = true; 190 size_t len = (p - aStr) + nExtra; 191 mOwnedStr = MakeUnique<char[]>(len + 1); 192 193 p = aStr; 194 size_t i = 0; 195 196 while (true) { 197 uint8_t u = *p; // ensure it can't be interpreted as negative 198 if (u == 0) { 199 mOwnedStr[i] = 0; 200 break; 201 } 202 if (detail::gTwoCharEscapes[u]) { 203 mOwnedStr[i++] = '\\'; 204 mOwnedStr[i++] = detail::gTwoCharEscapes[u]; 205 } else if (u <= 0x1f) { 206 mOwnedStr[i++] = '\\'; 207 mOwnedStr[i++] = 'u'; 208 mOwnedStr[i++] = '0'; 209 mOwnedStr[i++] = '0'; 210 mOwnedStr[i++] = hexDigitToAsciiChar((u & 0x00f0) >> 4); 211 mOwnedStr[i++] = hexDigitToAsciiChar(u & 0x000f); 212 } else { 213 mOwnedStr[i++] = u; 214 } 215 p++; 216 } 217 } 218 ~EscapedString()219 ~EscapedString() 220 { 221 SanityCheck(); 222 } 223 get()224 const char* get() const 225 { 226 SanityCheck(); 227 return mIsOwned ? mOwnedStr.get() : mUnownedStr; 228 } 229 }; 230 231 public: 232 // Collections (objects and arrays) are printed in a multi-line style by 233 // default. This can be changed to a single-line style if SingleLineStyle is 234 // specified. If a collection is printed in single-line style, every nested 235 // collection within it is also printed in single-line style, even if 236 // multi-line style is requested. 237 enum CollectionStyle { 238 MultiLineStyle, // the default 239 SingleLineStyle 240 }; 241 242 protected: 243 const UniquePtr<JSONWriteFunc> mWriter; 244 Vector<bool, 8> mNeedComma; // do we need a comma at depth N? 245 Vector<bool, 8> mNeedNewlines; // do we need newlines at depth N? 246 size_t mDepth; // the current nesting depth 247 Indent()248 void Indent() 249 { 250 for (size_t i = 0; i < mDepth; i++) { 251 mWriter->Write(" "); 252 } 253 } 254 255 // Adds whatever is necessary (maybe a comma, and then a newline and 256 // whitespace) to separate an item (property or element) from what's come 257 // before. Separator()258 void Separator() 259 { 260 if (mNeedComma[mDepth]) { 261 mWriter->Write(","); 262 } 263 if (mDepth > 0 && mNeedNewlines[mDepth]) { 264 mWriter->Write("\n"); 265 Indent(); 266 } else if (mNeedComma[mDepth]) { 267 mWriter->Write(" "); 268 } 269 } 270 PropertyNameAndColon(const char * aName)271 void PropertyNameAndColon(const char* aName) 272 { 273 EscapedString escapedName(aName); 274 mWriter->Write("\""); 275 mWriter->Write(escapedName.get()); 276 mWriter->Write("\": "); 277 } 278 Scalar(const char * aMaybePropertyName,const char * aStringValue)279 void Scalar(const char* aMaybePropertyName, const char* aStringValue) 280 { 281 Separator(); 282 if (aMaybePropertyName) { 283 PropertyNameAndColon(aMaybePropertyName); 284 } 285 mWriter->Write(aStringValue); 286 mNeedComma[mDepth] = true; 287 } 288 QuotedScalar(const char * aMaybePropertyName,const char * aStringValue)289 void QuotedScalar(const char* aMaybePropertyName, const char* aStringValue) 290 { 291 Separator(); 292 if (aMaybePropertyName) { 293 PropertyNameAndColon(aMaybePropertyName); 294 } 295 mWriter->Write("\""); 296 mWriter->Write(aStringValue); 297 mWriter->Write("\""); 298 mNeedComma[mDepth] = true; 299 } 300 NewVectorEntries()301 void NewVectorEntries() 302 { 303 // If these tiny allocations OOM we might as well just crash because we 304 // must be in serious memory trouble. 305 MOZ_RELEASE_ASSERT(mNeedComma.resizeUninitialized(mDepth + 1)); 306 MOZ_RELEASE_ASSERT(mNeedNewlines.resizeUninitialized(mDepth + 1)); 307 mNeedComma[mDepth] = false; 308 mNeedNewlines[mDepth] = true; 309 } 310 311 void StartCollection(const char* aMaybePropertyName, const char* aStartChar, 312 CollectionStyle aStyle = MultiLineStyle) 313 { 314 Separator(); 315 if (aMaybePropertyName) { 316 mWriter->Write("\""); 317 mWriter->Write(aMaybePropertyName); 318 mWriter->Write("\": "); 319 } 320 mWriter->Write(aStartChar); 321 mNeedComma[mDepth] = true; 322 mDepth++; 323 NewVectorEntries(); 324 mNeedNewlines[mDepth] = 325 mNeedNewlines[mDepth - 1] && aStyle == MultiLineStyle; 326 } 327 328 // Adds the whitespace and closing char necessary to end a collection. EndCollection(const char * aEndChar)329 void EndCollection(const char* aEndChar) 330 { 331 if (mNeedNewlines[mDepth]) { 332 mWriter->Write("\n"); 333 mDepth--; 334 Indent(); 335 } else { 336 mDepth--; 337 } 338 mWriter->Write(aEndChar); 339 } 340 341 public: JSONWriter(UniquePtr<JSONWriteFunc> aWriter)342 explicit JSONWriter(UniquePtr<JSONWriteFunc> aWriter) 343 : mWriter(Move(aWriter)) 344 , mNeedComma() 345 , mNeedNewlines() 346 , mDepth(0) 347 { 348 NewVectorEntries(); 349 } 350 351 // Returns the JSONWriteFunc passed in at creation, for temporary use. The 352 // JSONWriter object still owns the JSONWriteFunc. WriteFunc()353 JSONWriteFunc* WriteFunc() const { return mWriter.get(); } 354 355 // For all the following functions, the "Prints:" comment indicates what the 356 // basic output looks like. However, it doesn't indicate the whitespace and 357 // trailing commas, which are automatically added as required. 358 // 359 // All property names and string properties are escaped as necessary. 360 361 // Prints: { 362 void Start(CollectionStyle aStyle = MultiLineStyle) 363 { 364 StartCollection(nullptr, "{", aStyle); 365 } 366 367 // Prints: } End()368 void End() { EndCollection("}\n"); } 369 370 // Prints: "<aName>": null NullProperty(const char * aName)371 void NullProperty(const char* aName) 372 { 373 Scalar(aName, "null"); 374 } 375 376 // Prints: null NullElement()377 void NullElement() { NullProperty(nullptr); } 378 379 // Prints: "<aName>": <aBool> BoolProperty(const char * aName,bool aBool)380 void BoolProperty(const char* aName, bool aBool) 381 { 382 Scalar(aName, aBool ? "true" : "false"); 383 } 384 385 // Prints: <aBool> BoolElement(bool aBool)386 void BoolElement(bool aBool) { BoolProperty(nullptr, aBool); } 387 388 // Prints: "<aName>": <aInt> IntProperty(const char * aName,int64_t aInt)389 void IntProperty(const char* aName, int64_t aInt) 390 { 391 char buf[64]; 392 snprintf_literal(buf, "%" PRId64, aInt); 393 Scalar(aName, buf); 394 } 395 396 // Prints: <aInt> IntElement(int64_t aInt)397 void IntElement(int64_t aInt) { IntProperty(nullptr, aInt); } 398 399 // Prints: "<aName>": <aDouble> DoubleProperty(const char * aName,double aDouble)400 void DoubleProperty(const char* aName, double aDouble) 401 { 402 static const size_t buflen = 64; 403 char buf[buflen]; 404 const double_conversion::DoubleToStringConverter &converter = 405 double_conversion::DoubleToStringConverter::EcmaScriptConverter(); 406 double_conversion::StringBuilder builder(buf, buflen); 407 converter.ToShortest(aDouble, &builder); 408 Scalar(aName, builder.Finalize()); 409 } 410 411 // Prints: <aDouble> DoubleElement(double aDouble)412 void DoubleElement(double aDouble) { DoubleProperty(nullptr, aDouble); } 413 414 // Prints: "<aName>": "<aStr>" StringProperty(const char * aName,const char * aStr)415 void StringProperty(const char* aName, const char* aStr) 416 { 417 EscapedString escapedStr(aStr); 418 QuotedScalar(aName, escapedStr.get()); 419 } 420 421 // Prints: "<aStr>" StringElement(const char * aStr)422 void StringElement(const char* aStr) { StringProperty(nullptr, aStr); } 423 424 // Prints: "<aName>": [ 425 void StartArrayProperty(const char* aName, 426 CollectionStyle aStyle = MultiLineStyle) 427 { 428 StartCollection(aName, "[", aStyle); 429 } 430 431 // Prints: [ 432 void StartArrayElement(CollectionStyle aStyle = MultiLineStyle) 433 { 434 StartArrayProperty(nullptr, aStyle); 435 } 436 437 // Prints: ] EndArray()438 void EndArray() { EndCollection("]"); } 439 440 // Prints: "<aName>": { 441 void StartObjectProperty(const char* aName, 442 CollectionStyle aStyle = MultiLineStyle) 443 { 444 StartCollection(aName, "{", aStyle); 445 } 446 447 // Prints: { 448 void StartObjectElement(CollectionStyle aStyle = MultiLineStyle) 449 { 450 StartObjectProperty(nullptr, aStyle); 451 } 452 453 // Prints: } EndObject()454 void EndObject() { EndCollection("}"); } 455 }; 456 457 } // namespace mozilla 458 459 #endif /* mozilla_JSONWriter_h */ 460 461