1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 /* A JSON pretty-printer class. */
8 
9 // A typical JSON-writing library requires you to first build up a data
10 // structure that represents a JSON object and then serialize it (to file, or
11 // somewhere else). This approach makes for a clean API, but building the data
12 // structure takes up memory. Sometimes that isn't desirable, such as when the
13 // JSON data is produced for memory reporting.
14 //
15 // The JSONWriter class instead allows JSON data to be written out
16 // incrementally without building up large data structures.
17 //
18 // The API is slightly uglier than you would see in a typical JSON-writing
19 // library, but still fairly easy to use. It's possible to generate invalid
20 // JSON with JSONWriter, but typically the most basic testing will identify any
21 // such problems.
22 //
23 // Similarly, there are no RAII facilities for automatically closing objects
24 // and arrays. These would be nice if you are generating all your code within
25 // nested functions, but in other cases you'd have to maintain an explicit
26 // stack of RAII objects and manually unwind it, which is no better than just
27 // calling "end" functions. Furthermore, the consequences of forgetting to
28 // close an object or array are obvious and, again, will be identified via
29 // basic testing, unlike other cases where RAII is typically used (e.g. smart
30 // pointers) and the consequences of defects are more subtle.
31 //
32 // Importantly, the class does solve the two hard problems of JSON
33 // pretty-printing, which are (a) correctly escaping strings, and (b) adding
34 // appropriate indentation and commas between items.
35 //
36 // By default, every property is placed on its own line. However, it is
37 // possible to request that objects and arrays be placed entirely on a single
38 // line, which can reduce output size significantly in some cases.
39 //
40 // Strings used (for property names and string property values) are |const
41 // char*| throughout, and can be ASCII or UTF-8.
42 //
43 // EXAMPLE
44 // -------
45 // Assume that |MyWriteFunc| is a class that implements |JSONWriteFunc|. The
46 // following code:
47 //
48 //   JSONWriter w(MakeUnique<MyWriteFunc>());
49 //   w.Start();
50 //   {
51 //     w.NullProperty("null");
52 //     w.BoolProperty("bool", true);
53 //     w.IntProperty("int", 1);
54 //     w.StartArrayProperty("array");
55 //     {
56 //       w.StringElement("string");
57 //       w.StartObjectElement();
58 //       {
59 //         w.DoubleProperty("double", 3.4);
60 //         w.StartArrayProperty("single-line array", w.SingleLineStyle);
61 //         {
62 //           w.IntElement(1);
63 //           w.StartObjectElement();  // SingleLineStyle is inherited from
64 //           w.EndObjectElement();    //   above for this collection
65 //         }
66 //         w.EndArray();
67 //       }
68 //       w.EndObjectElement();
69 //     }
70 //     w.EndArrayProperty();
71 //   }
72 //   w.End();
73 //
74 // will produce pretty-printed output for the following JSON object:
75 //
76 //  {
77 //   "null": null,
78 //   "bool": true,
79 //   "int": 1,
80 //   "array": [
81 //    "string",
82 //    {
83 //     "double": 3.4,
84 //     "single-line array": [1, {}]
85 //    }
86 //   ]
87 //  }
88 //
89 // The nesting in the example code is obviously optional, but can aid
90 // readability.
91 
92 #ifndef mozilla_JSONWriter_h
93 #define mozilla_JSONWriter_h
94 
95 #include "mozilla/double-conversion.h"
96 #include "mozilla/IntegerPrintfMacros.h"
97 #include "mozilla/PodOperations.h"
98 #include "mozilla/Snprintf.h"
99 #include "mozilla/UniquePtr.h"
100 #include "mozilla/Vector.h"
101 
102 #include <stdio.h>
103 
104 namespace mozilla {
105 
106 // A quasi-functor for JSONWriter. We don't use a true functor because that
107 // requires templatizing JSONWriter, and the templatization seeps to lots of
108 // places we don't want it to.
109 class JSONWriteFunc
110 {
111 public:
112   virtual void Write(const char* aStr) = 0;
~JSONWriteFunc()113   virtual ~JSONWriteFunc() {}
114 };
115 
116 // Ideally this would be within |EscapedString| but when compiling with GCC
117 // on Linux that caused link errors, whereas this formulation didn't.
118 namespace detail {
119 extern MFBT_DATA const char gTwoCharEscapes[256];
120 } // namespace detail
121 
122 class JSONWriter
123 {
124   // From http://www.ietf.org/rfc/rfc4627.txt:
125   //
126   //   "All Unicode characters may be placed within the quotation marks except
127   //   for the characters that must be escaped: quotation mark, reverse
128   //   solidus, and the control characters (U+0000 through U+001F)."
129   //
130   // This implementation uses two-char escape sequences where possible, namely:
131   //
132   //   \", \\, \b, \f, \n, \r, \t
133   //
134   // All control characters not in the above list are represented with a
135   // six-char escape sequence, e.g. '\u000b' (a.k.a. '\v').
136   //
137   class EscapedString
138   {
139     // Only one of |mUnownedStr| and |mOwnedStr| are ever non-null. |mIsOwned|
140     // indicates which one is in use. They're not within a union because that
141     // wouldn't work with UniquePtr.
142     bool mIsOwned;
143     const char* mUnownedStr;
144     UniquePtr<char[]> mOwnedStr;
145 
SanityCheck()146     void SanityCheck() const
147     {
148       MOZ_ASSERT_IF( mIsOwned,  mOwnedStr.get() && !mUnownedStr);
149       MOZ_ASSERT_IF(!mIsOwned, !mOwnedStr.get() &&  mUnownedStr);
150     }
151 
hexDigitToAsciiChar(uint8_t u)152     static char hexDigitToAsciiChar(uint8_t u)
153     {
154       u = u & 0xf;
155       return u < 10 ? '0' + u : 'a' + (u - 10);
156     }
157 
158   public:
EscapedString(const char * aStr)159     explicit EscapedString(const char* aStr)
160       : mUnownedStr(nullptr)
161       , mOwnedStr(nullptr)
162     {
163       const char* p;
164 
165       // First, see if we need to modify the string.
166       size_t nExtra = 0;
167       p = aStr;
168       while (true) {
169         uint8_t u = *p;   // ensure it can't be interpreted as negative
170         if (u == 0) {
171           break;
172         }
173         if (detail::gTwoCharEscapes[u]) {
174           nExtra += 1;
175         } else if (u <= 0x1f) {
176           nExtra += 5;
177         }
178         p++;
179       }
180 
181       if (nExtra == 0) {
182         // No escapes needed. Easy.
183         mIsOwned = false;
184         mUnownedStr = aStr;
185         return;
186       }
187 
188       // Escapes are needed. We'll create a new string.
189       mIsOwned = true;
190       size_t len = (p - aStr) + nExtra;
191       mOwnedStr = MakeUnique<char[]>(len + 1);
192 
193       p = aStr;
194       size_t i = 0;
195 
196       while (true) {
197         uint8_t u = *p;   // ensure it can't be interpreted as negative
198         if (u == 0) {
199           mOwnedStr[i] = 0;
200           break;
201         }
202         if (detail::gTwoCharEscapes[u]) {
203           mOwnedStr[i++] = '\\';
204           mOwnedStr[i++] = detail::gTwoCharEscapes[u];
205         } else if (u <= 0x1f) {
206           mOwnedStr[i++] = '\\';
207           mOwnedStr[i++] = 'u';
208           mOwnedStr[i++] = '0';
209           mOwnedStr[i++] = '0';
210           mOwnedStr[i++] = hexDigitToAsciiChar((u & 0x00f0) >> 4);
211           mOwnedStr[i++] = hexDigitToAsciiChar(u & 0x000f);
212         } else {
213           mOwnedStr[i++] = u;
214         }
215         p++;
216       }
217     }
218 
~EscapedString()219     ~EscapedString()
220     {
221       SanityCheck();
222     }
223 
get()224     const char* get() const
225     {
226       SanityCheck();
227       return mIsOwned ? mOwnedStr.get() : mUnownedStr;
228     }
229   };
230 
231 public:
232   // Collections (objects and arrays) are printed in a multi-line style by
233   // default. This can be changed to a single-line style if SingleLineStyle is
234   // specified. If a collection is printed in single-line style, every nested
235   // collection within it is also printed in single-line style, even if
236   // multi-line style is requested.
237   enum CollectionStyle {
238     MultiLineStyle,   // the default
239     SingleLineStyle
240   };
241 
242 protected:
243   const UniquePtr<JSONWriteFunc> mWriter;
244   Vector<bool, 8> mNeedComma;     // do we need a comma at depth N?
245   Vector<bool, 8> mNeedNewlines;  // do we need newlines at depth N?
246   size_t mDepth;                  // the current nesting depth
247 
Indent()248   void Indent()
249   {
250     for (size_t i = 0; i < mDepth; i++) {
251       mWriter->Write(" ");
252     }
253   }
254 
255   // Adds whatever is necessary (maybe a comma, and then a newline and
256   // whitespace) to separate an item (property or element) from what's come
257   // before.
Separator()258   void Separator()
259   {
260     if (mNeedComma[mDepth]) {
261       mWriter->Write(",");
262     }
263     if (mDepth > 0 && mNeedNewlines[mDepth]) {
264       mWriter->Write("\n");
265       Indent();
266     } else if (mNeedComma[mDepth]) {
267       mWriter->Write(" ");
268     }
269   }
270 
PropertyNameAndColon(const char * aName)271   void PropertyNameAndColon(const char* aName)
272   {
273     EscapedString escapedName(aName);
274     mWriter->Write("\"");
275     mWriter->Write(escapedName.get());
276     mWriter->Write("\": ");
277   }
278 
Scalar(const char * aMaybePropertyName,const char * aStringValue)279   void Scalar(const char* aMaybePropertyName, const char* aStringValue)
280   {
281     Separator();
282     if (aMaybePropertyName) {
283       PropertyNameAndColon(aMaybePropertyName);
284     }
285     mWriter->Write(aStringValue);
286     mNeedComma[mDepth] = true;
287   }
288 
QuotedScalar(const char * aMaybePropertyName,const char * aStringValue)289   void QuotedScalar(const char* aMaybePropertyName, const char* aStringValue)
290   {
291     Separator();
292     if (aMaybePropertyName) {
293       PropertyNameAndColon(aMaybePropertyName);
294     }
295     mWriter->Write("\"");
296     mWriter->Write(aStringValue);
297     mWriter->Write("\"");
298     mNeedComma[mDepth] = true;
299   }
300 
NewVectorEntries()301   void NewVectorEntries()
302   {
303     // If these tiny allocations OOM we might as well just crash because we
304     // must be in serious memory trouble.
305     MOZ_RELEASE_ASSERT(mNeedComma.resizeUninitialized(mDepth + 1));
306     MOZ_RELEASE_ASSERT(mNeedNewlines.resizeUninitialized(mDepth + 1));
307     mNeedComma[mDepth] = false;
308     mNeedNewlines[mDepth] = true;
309   }
310 
311   void StartCollection(const char* aMaybePropertyName, const char* aStartChar,
312                        CollectionStyle aStyle = MultiLineStyle)
313   {
314     Separator();
315     if (aMaybePropertyName) {
316       mWriter->Write("\"");
317       mWriter->Write(aMaybePropertyName);
318       mWriter->Write("\": ");
319     }
320     mWriter->Write(aStartChar);
321     mNeedComma[mDepth] = true;
322     mDepth++;
323     NewVectorEntries();
324     mNeedNewlines[mDepth] =
325       mNeedNewlines[mDepth - 1] && aStyle == MultiLineStyle;
326   }
327 
328   // Adds the whitespace and closing char necessary to end a collection.
EndCollection(const char * aEndChar)329   void EndCollection(const char* aEndChar)
330   {
331     if (mNeedNewlines[mDepth]) {
332       mWriter->Write("\n");
333       mDepth--;
334       Indent();
335     } else {
336       mDepth--;
337     }
338     mWriter->Write(aEndChar);
339   }
340 
341 public:
JSONWriter(UniquePtr<JSONWriteFunc> aWriter)342   explicit JSONWriter(UniquePtr<JSONWriteFunc> aWriter)
343     : mWriter(Move(aWriter))
344     , mNeedComma()
345     , mNeedNewlines()
346     , mDepth(0)
347   {
348     NewVectorEntries();
349   }
350 
351   // Returns the JSONWriteFunc passed in at creation, for temporary use. The
352   // JSONWriter object still owns the JSONWriteFunc.
WriteFunc()353   JSONWriteFunc* WriteFunc() const { return mWriter.get(); }
354 
355   // For all the following functions, the "Prints:" comment indicates what the
356   // basic output looks like. However, it doesn't indicate the whitespace and
357   // trailing commas, which are automatically added as required.
358   //
359   // All property names and string properties are escaped as necessary.
360 
361   // Prints: {
362   void Start(CollectionStyle aStyle = MultiLineStyle)
363   {
364     StartCollection(nullptr, "{", aStyle);
365   }
366 
367   // Prints: }
End()368   void End() { EndCollection("}\n"); }
369 
370   // Prints: "<aName>": null
NullProperty(const char * aName)371   void NullProperty(const char* aName)
372   {
373     Scalar(aName, "null");
374   }
375 
376   // Prints: null
NullElement()377   void NullElement() { NullProperty(nullptr); }
378 
379   // Prints: "<aName>": <aBool>
BoolProperty(const char * aName,bool aBool)380   void BoolProperty(const char* aName, bool aBool)
381   {
382     Scalar(aName, aBool ? "true" : "false");
383   }
384 
385   // Prints: <aBool>
BoolElement(bool aBool)386   void BoolElement(bool aBool) { BoolProperty(nullptr, aBool); }
387 
388   // Prints: "<aName>": <aInt>
IntProperty(const char * aName,int64_t aInt)389   void IntProperty(const char* aName, int64_t aInt)
390   {
391     char buf[64];
392     snprintf_literal(buf, "%" PRId64, aInt);
393     Scalar(aName, buf);
394   }
395 
396   // Prints: <aInt>
IntElement(int64_t aInt)397   void IntElement(int64_t aInt) { IntProperty(nullptr, aInt); }
398 
399   // Prints: "<aName>": <aDouble>
DoubleProperty(const char * aName,double aDouble)400   void DoubleProperty(const char* aName, double aDouble)
401   {
402     static const size_t buflen = 64;
403     char buf[buflen];
404     const double_conversion::DoubleToStringConverter &converter =
405       double_conversion::DoubleToStringConverter::EcmaScriptConverter();
406     double_conversion::StringBuilder builder(buf, buflen);
407     converter.ToShortest(aDouble, &builder);
408     Scalar(aName, builder.Finalize());
409   }
410 
411   // Prints: <aDouble>
DoubleElement(double aDouble)412   void DoubleElement(double aDouble) { DoubleProperty(nullptr, aDouble); }
413 
414   // Prints: "<aName>": "<aStr>"
StringProperty(const char * aName,const char * aStr)415   void StringProperty(const char* aName, const char* aStr)
416   {
417     EscapedString escapedStr(aStr);
418     QuotedScalar(aName, escapedStr.get());
419   }
420 
421   // Prints: "<aStr>"
StringElement(const char * aStr)422   void StringElement(const char* aStr) { StringProperty(nullptr, aStr); }
423 
424   // Prints: "<aName>": [
425   void StartArrayProperty(const char* aName,
426                           CollectionStyle aStyle = MultiLineStyle)
427   {
428     StartCollection(aName, "[", aStyle);
429   }
430 
431   // Prints: [
432   void StartArrayElement(CollectionStyle aStyle = MultiLineStyle)
433   {
434     StartArrayProperty(nullptr, aStyle);
435   }
436 
437   // Prints: ]
EndArray()438   void EndArray() { EndCollection("]"); }
439 
440   // Prints: "<aName>": {
441   void StartObjectProperty(const char* aName,
442                            CollectionStyle aStyle = MultiLineStyle)
443   {
444     StartCollection(aName, "{", aStyle);
445   }
446 
447   // Prints: {
448   void StartObjectElement(CollectionStyle aStyle = MultiLineStyle)
449   {
450     StartObjectProperty(nullptr, aStyle);
451   }
452 
453   // Prints: }
EndObject()454   void EndObject() { EndCollection("}"); }
455 };
456 
457 } // namespace mozilla
458 
459 #endif /* mozilla_JSONWriter_h */
460 
461