1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 /* A JSON pretty-printer class. */
8 
9 // A typical JSON-writing library requires you to first build up a data
10 // structure that represents a JSON object and then serialize it (to file, or
11 // somewhere else). This approach makes for a clean API, but building the data
12 // structure takes up memory. Sometimes that isn't desirable, such as when the
13 // JSON data is produced for memory reporting.
14 //
15 // The JSONWriter class instead allows JSON data to be written out
16 // incrementally without building up large data structures.
17 //
18 // The API is slightly uglier than you would see in a typical JSON-writing
19 // library, but still fairly easy to use. It's possible to generate invalid
20 // JSON with JSONWriter, but typically the most basic testing will identify any
21 // such problems.
22 //
23 // Similarly, there are no RAII facilities for automatically closing objects
24 // and arrays. These would be nice if you are generating all your code within
25 // nested functions, but in other cases you'd have to maintain an explicit
26 // stack of RAII objects and manually unwind it, which is no better than just
27 // calling "end" functions. Furthermore, the consequences of forgetting to
28 // close an object or array are obvious and, again, will be identified via
29 // basic testing, unlike other cases where RAII is typically used (e.g. smart
30 // pointers) and the consequences of defects are more subtle.
31 //
32 // Importantly, the class does solve the two hard problems of JSON
33 // pretty-printing, which are (a) correctly escaping strings, and (b) adding
34 // appropriate indentation and commas between items.
35 //
36 // By default, every property is placed on its own line. However, it is
37 // possible to request that objects and arrays be placed entirely on a single
38 // line, which can reduce output size significantly in some cases.
39 //
40 // Strings used (for property names and string property values) are |const
41 // char*| throughout, and can be ASCII or UTF-8.
42 //
43 // EXAMPLE
44 // -------
45 // Assume that |MyWriteFunc| is a class that implements |JSONWriteFunc|. The
46 // following code:
47 //
48 //   JSONWriter w(MakeUnique<MyWriteFunc>());
49 //   w.Start();
50 //   {
51 //     w.NullProperty("null");
52 //     w.BoolProperty("bool", true);
53 //     w.IntProperty("int", 1);
54 //     w.StartArrayProperty("array");
55 //     {
56 //       w.StringElement("string");
57 //       w.StartObjectElement();
58 //       {
59 //         w.DoubleProperty("double", 3.4);
60 //         w.StartArrayProperty("single-line array", w.SingleLineStyle);
61 //         {
62 //           w.IntElement(1);
63 //           w.StartObjectElement();  // SingleLineStyle is inherited from
64 //           w.EndObjectElement();    //   above for this collection
65 //         }
66 //         w.EndArray();
67 //       }
68 //       w.EndObjectElement();
69 //     }
70 //     w.EndArrayProperty();
71 //   }
72 //   w.End();
73 //
74 // will produce pretty-printed output for the following JSON object:
75 //
76 //  {
77 //   "null": null,
78 //   "bool": true,
79 //   "int": 1,
80 //   "array": [
81 //    "string",
82 //    {
83 //     "double": 3.4,
84 //     "single-line array": [1, {}]
85 //    }
86 //   ]
87 //  }
88 //
89 // The nesting in the example code is obviously optional, but can aid
90 // readability.
91 
92 #ifndef mozilla_JSONWriter_h
93 #define mozilla_JSONWriter_h
94 
95 #include "double-conversion/double-conversion.h"
96 #include "mozilla/Assertions.h"
97 #include "mozilla/IntegerPrintfMacros.h"
98 #include "mozilla/PodOperations.h"
99 #include "mozilla/Span.h"
100 #include "mozilla/Sprintf.h"
101 #include "mozilla/UniquePtr.h"
102 #include "mozilla/Vector.h"
103 
104 #include <utility>
105 
106 namespace mozilla {
107 
108 // A quasi-functor for JSONWriter. We don't use a true functor because that
109 // requires templatizing JSONWriter, and the templatization seeps to lots of
110 // places we don't want it to.
111 class JSONWriteFunc {
112  public:
113   virtual void Write(const Span<const char>& aStr) = 0;
114   virtual ~JSONWriteFunc() = default;
115 };
116 
117 // Ideally this would be within |EscapedString| but when compiling with GCC
118 // on Linux that caused link errors, whereas this formulation didn't.
119 namespace detail {
120 extern MFBT_DATA const char gTwoCharEscapes[256];
121 }  // namespace detail
122 
123 class JSONWriter {
124   // From http://www.ietf.org/rfc/rfc4627.txt:
125   //
126   //   "All Unicode characters may be placed within the quotation marks except
127   //   for the characters that must be escaped: quotation mark, reverse
128   //   solidus, and the control characters (U+0000 through U+001F)."
129   //
130   // This implementation uses two-char escape sequences where possible, namely:
131   //
132   //   \", \\, \b, \f, \n, \r, \t
133   //
134   // All control characters not in the above list are represented with a
135   // six-char escape sequence, e.g. '\u000b' (a.k.a. '\v').
136   //
137   class EscapedString {
138     // `mStringSpan` initially points at the user-provided string. If that
139     // string needs escaping, `mStringSpan` will point at `mOwnedStr` below.
140     Span<const char> mStringSpan;
141     // String storage in case escaping is actually needed, null otherwise.
142     UniquePtr<char[]> mOwnedStr;
143 
CheckInvariants()144     void CheckInvariants() const {
145       // Either there was no escaping so `mOwnedStr` is null, or escaping was
146       // needed, in which case `mStringSpan` should point at `mOwnedStr`.
147       MOZ_ASSERT(!mOwnedStr || mStringSpan.data() == mOwnedStr.get());
148     }
149 
hexDigitToAsciiChar(uint8_t u)150     static char hexDigitToAsciiChar(uint8_t u) {
151       u = u & 0xf;
152       return u < 10 ? '0' + u : 'a' + (u - 10);
153     }
154 
155    public:
EscapedString(const Span<const char> & aStr)156     explicit EscapedString(const Span<const char>& aStr) : mStringSpan(aStr) {
157       // First, see if we need to modify the string.
158       size_t nExtra = 0;
159       for (const char& c : aStr) {
160         // ensure it can't be interpreted as negative
161         uint8_t u = static_cast<uint8_t>(c);
162         if (u == 0) {
163           // Null terminator within the span, assume we may have been given a
164           // span to a buffer that contains a null-terminated string in it.
165           // We need to truncate the Span so that it doesn't include this null
166           // terminator and anything past it; Either we will return it as-is, or
167           // processing should stop there.
168           mStringSpan = mStringSpan.First(&c - mStringSpan.data());
169           break;
170         }
171         if (detail::gTwoCharEscapes[u]) {
172           nExtra += 1;
173         } else if (u <= 0x1f) {
174           nExtra += 5;
175         }
176       }
177 
178       // Note: Don't use `aStr` anymore, as it could contain a null terminator;
179       // use the correctly-sized `mStringSpan` instead.
180 
181       if (nExtra == 0) {
182         // No escapes needed. mStringSpan already points at the original string.
183         CheckInvariants();
184         return;
185       }
186 
187       // Escapes are needed. We'll create a new string.
188       mOwnedStr = MakeUnique<char[]>(mStringSpan.Length() + nExtra);
189 
190       size_t i = 0;
191       for (const char c : mStringSpan) {
192         // ensure it can't be interpreted as negative
193         uint8_t u = static_cast<uint8_t>(c);
194         MOZ_ASSERT(u != 0, "Null terminator should have been handled above");
195         if (detail::gTwoCharEscapes[u]) {
196           mOwnedStr[i++] = '\\';
197           mOwnedStr[i++] = detail::gTwoCharEscapes[u];
198         } else if (u <= 0x1f) {
199           mOwnedStr[i++] = '\\';
200           mOwnedStr[i++] = 'u';
201           mOwnedStr[i++] = '0';
202           mOwnedStr[i++] = '0';
203           mOwnedStr[i++] = hexDigitToAsciiChar((u & 0x00f0) >> 4);
204           mOwnedStr[i++] = hexDigitToAsciiChar(u & 0x000f);
205         } else {
206           mOwnedStr[i++] = u;
207         }
208       }
209       MOZ_ASSERT(i == mStringSpan.Length() + nExtra);
210       mStringSpan = Span<const char>(mOwnedStr.get(), i);
211       CheckInvariants();
212     }
213 
214     explicit EscapedString(const char* aStr) = delete;
215 
SpanRef()216     const Span<const char>& SpanRef() const { return mStringSpan; }
217   };
218 
219  public:
220   // Collections (objects and arrays) are printed in a multi-line style by
221   // default. This can be changed to a single-line style if SingleLineStyle is
222   // specified. If a collection is printed in single-line style, every nested
223   // collection within it is also printed in single-line style, even if
224   // multi-line style is requested.
225   enum CollectionStyle {
226     MultiLineStyle,  // the default
227     SingleLineStyle
228   };
229 
230  protected:
231   static constexpr Span<const char> scArrayBeginString = MakeStringSpan("[");
232   static constexpr Span<const char> scArrayEndString = MakeStringSpan("]");
233   static constexpr Span<const char> scCommaString = MakeStringSpan(",");
234   static constexpr Span<const char> scEmptyString = MakeStringSpan("");
235   static constexpr Span<const char> scFalseString = MakeStringSpan("false");
236   static constexpr Span<const char> scNewLineString = MakeStringSpan("\n");
237   static constexpr Span<const char> scNullString = MakeStringSpan("null");
238   static constexpr Span<const char> scObjectBeginString = MakeStringSpan("{");
239   static constexpr Span<const char> scObjectEndString = MakeStringSpan("}");
240   static constexpr Span<const char> scPropertyBeginString =
241       MakeStringSpan("\"");
242   static constexpr Span<const char> scPropertyEndString =
243       MakeStringSpan("\": ");
244   static constexpr Span<const char> scQuoteString = MakeStringSpan("\"");
245   static constexpr Span<const char> scSpaceString = MakeStringSpan(" ");
246   static constexpr Span<const char> scTopObjectBeginString =
247       MakeStringSpan("{");
248   static constexpr Span<const char> scTopObjectEndString =
249       MakeStringSpan("}\n");
250   static constexpr Span<const char> scTrueString = MakeStringSpan("true");
251 
252   const UniquePtr<JSONWriteFunc> mWriter;
253   Vector<bool, 8> mNeedComma;     // do we need a comma at depth N?
254   Vector<bool, 8> mNeedNewlines;  // do we need newlines at depth N?
255   size_t mDepth;                  // the current nesting depth
256 
Indent()257   void Indent() {
258     for (size_t i = 0; i < mDepth; i++) {
259       mWriter->Write(scSpaceString);
260     }
261   }
262 
263   // Adds whatever is necessary (maybe a comma, and then a newline and
264   // whitespace) to separate an item (property or element) from what's come
265   // before.
Separator()266   void Separator() {
267     if (mNeedComma[mDepth]) {
268       mWriter->Write(scCommaString);
269     }
270     if (mDepth > 0 && mNeedNewlines[mDepth]) {
271       mWriter->Write(scNewLineString);
272       Indent();
273     } else if (mNeedComma[mDepth]) {
274       mWriter->Write(scSpaceString);
275     }
276   }
277 
PropertyNameAndColon(const Span<const char> & aName)278   void PropertyNameAndColon(const Span<const char>& aName) {
279     mWriter->Write(scPropertyBeginString);
280     mWriter->Write(EscapedString(aName).SpanRef());
281     mWriter->Write(scPropertyEndString);
282   }
283 
Scalar(const Span<const char> & aMaybePropertyName,const Span<const char> & aStringValue)284   void Scalar(const Span<const char>& aMaybePropertyName,
285               const Span<const char>& aStringValue) {
286     Separator();
287     if (!aMaybePropertyName.empty()) {
288       PropertyNameAndColon(aMaybePropertyName);
289     }
290     mWriter->Write(aStringValue);
291     mNeedComma[mDepth] = true;
292   }
293 
QuotedScalar(const Span<const char> & aMaybePropertyName,const Span<const char> & aStringValue)294   void QuotedScalar(const Span<const char>& aMaybePropertyName,
295                     const Span<const char>& aStringValue) {
296     Separator();
297     if (!aMaybePropertyName.empty()) {
298       PropertyNameAndColon(aMaybePropertyName);
299     }
300     mWriter->Write(scQuoteString);
301     mWriter->Write(aStringValue);
302     mWriter->Write(scQuoteString);
303     mNeedComma[mDepth] = true;
304   }
305 
NewVectorEntries()306   void NewVectorEntries() {
307     // If these tiny allocations OOM we might as well just crash because we
308     // must be in serious memory trouble.
309     MOZ_RELEASE_ASSERT(mNeedComma.resizeUninitialized(mDepth + 1));
310     MOZ_RELEASE_ASSERT(mNeedNewlines.resizeUninitialized(mDepth + 1));
311     mNeedComma[mDepth] = false;
312     mNeedNewlines[mDepth] = true;
313   }
314 
315   void StartCollection(const Span<const char>& aMaybePropertyName,
316                        const Span<const char>& aStartChar,
317                        CollectionStyle aStyle = MultiLineStyle) {
318     Separator();
319     if (!aMaybePropertyName.empty()) {
320       PropertyNameAndColon(aMaybePropertyName);
321     }
322     mWriter->Write(aStartChar);
323     mNeedComma[mDepth] = true;
324     mDepth++;
325     NewVectorEntries();
326     mNeedNewlines[mDepth] =
327         mNeedNewlines[mDepth - 1] && aStyle == MultiLineStyle;
328   }
329 
330   // Adds the whitespace and closing char necessary to end a collection.
EndCollection(const Span<const char> & aEndChar)331   void EndCollection(const Span<const char>& aEndChar) {
332     MOZ_ASSERT(mDepth > 0);
333     if (mNeedNewlines[mDepth]) {
334       mWriter->Write(scNewLineString);
335       mDepth--;
336       Indent();
337     } else {
338       mDepth--;
339     }
340     mWriter->Write(aEndChar);
341   }
342 
343  public:
JSONWriter(UniquePtr<JSONWriteFunc> aWriter)344   explicit JSONWriter(UniquePtr<JSONWriteFunc> aWriter)
345       : mWriter(std::move(aWriter)), mNeedComma(), mNeedNewlines(), mDepth(0) {
346     NewVectorEntries();
347   }
348 
349   // Returns the JSONWriteFunc passed in at creation, for temporary use. The
350   // JSONWriter object still owns the JSONWriteFunc.
WriteFunc()351   JSONWriteFunc* WriteFunc() const { return mWriter.get(); }
352 
353   // For all the following functions, the "Prints:" comment indicates what the
354   // basic output looks like. However, it doesn't indicate the whitespace and
355   // trailing commas, which are automatically added as required.
356   //
357   // All property names and string properties are escaped as necessary.
358 
359   // Prints: {
360   void Start(CollectionStyle aStyle = MultiLineStyle) {
361     StartCollection(scEmptyString, scTopObjectBeginString, aStyle);
362   }
363 
364   // Prints: } and final newline.
End()365   void End() { EndCollection(scTopObjectEndString); }
366 
367   // Prints: "<aName>": null
NullProperty(const Span<const char> & aName)368   void NullProperty(const Span<const char>& aName) {
369     Scalar(aName, scNullString);
370   }
371 
372   template <size_t N>
NullProperty(const char (& aName)[N])373   void NullProperty(const char (&aName)[N]) {
374     // Keep null terminator from literal strings, will be removed by
375     // EscapedString. This way C buffer arrays can be used as well.
376     NullProperty(Span<const char>(aName, N));
377   }
378 
379   // Prints: null
NullElement()380   void NullElement() { NullProperty(scEmptyString); }
381 
382   // Prints: "<aName>": <aBool>
BoolProperty(const Span<const char> & aName,bool aBool)383   void BoolProperty(const Span<const char>& aName, bool aBool) {
384     Scalar(aName, aBool ? scTrueString : scFalseString);
385   }
386 
387   template <size_t N>
BoolProperty(const char (& aName)[N],bool aBool)388   void BoolProperty(const char (&aName)[N], bool aBool) {
389     // Keep null terminator from literal strings, will be removed by
390     // EscapedString. This way C buffer arrays can be used as well.
391     BoolProperty(Span<const char>(aName, N), aBool);
392   }
393 
394   // Prints: <aBool>
BoolElement(bool aBool)395   void BoolElement(bool aBool) { BoolProperty(scEmptyString, aBool); }
396 
397   // Prints: "<aName>": <aInt>
IntProperty(const Span<const char> & aName,int64_t aInt)398   void IntProperty(const Span<const char>& aName, int64_t aInt) {
399     char buf[64];
400     int len = SprintfLiteral(buf, "%" PRId64, aInt);
401     MOZ_RELEASE_ASSERT(len > 0);
402     Scalar(aName, Span<const char>(buf, size_t(len)));
403   }
404 
405   template <size_t N>
IntProperty(const char (& aName)[N],int64_t aInt)406   void IntProperty(const char (&aName)[N], int64_t aInt) {
407     // Keep null terminator from literal strings, will be removed by
408     // EscapedString. This way C buffer arrays can be used as well.
409     IntProperty(Span<const char>(aName, N), aInt);
410   }
411 
412   // Prints: <aInt>
IntElement(int64_t aInt)413   void IntElement(int64_t aInt) { IntProperty(scEmptyString, aInt); }
414 
415   // Prints: "<aName>": <aDouble>
DoubleProperty(const Span<const char> & aName,double aDouble)416   void DoubleProperty(const Span<const char>& aName, double aDouble) {
417     static const size_t buflen = 64;
418     char buf[buflen];
419     const double_conversion::DoubleToStringConverter& converter =
420         double_conversion::DoubleToStringConverter::EcmaScriptConverter();
421     double_conversion::StringBuilder builder(buf, buflen);
422     converter.ToShortest(aDouble, &builder);
423     // TODO: The builder should know the length?!
424     Scalar(aName, MakeStringSpan(builder.Finalize()));
425   }
426 
427   template <size_t N>
DoubleProperty(const char (& aName)[N],double aDouble)428   void DoubleProperty(const char (&aName)[N], double aDouble) {
429     // Keep null terminator from literal strings, will be removed by
430     // EscapedString. This way C buffer arrays can be used as well.
431     DoubleProperty(Span<const char>(aName, N), aDouble);
432   }
433 
434   // Prints: <aDouble>
DoubleElement(double aDouble)435   void DoubleElement(double aDouble) { DoubleProperty(scEmptyString, aDouble); }
436 
437   // Prints: "<aName>": "<aStr>"
StringProperty(const Span<const char> & aName,const Span<const char> & aStr)438   void StringProperty(const Span<const char>& aName,
439                       const Span<const char>& aStr) {
440     QuotedScalar(aName, EscapedString(aStr).SpanRef());
441   }
442 
443   template <size_t NN>
StringProperty(const char (& aName)[NN],const Span<const char> & aStr)444   void StringProperty(const char (&aName)[NN], const Span<const char>& aStr) {
445     // Keep null terminator from literal strings, will be removed by
446     // EscapedString. This way C buffer arrays can be used as well.
447     StringProperty(Span<const char>(aName, NN), aStr);
448   }
449 
450   template <size_t SN>
StringProperty(const Span<const char> & aName,const char (& aStr)[SN])451   void StringProperty(const Span<const char>& aName, const char (&aStr)[SN]) {
452     // Keep null terminator from literal strings, will be removed by
453     // EscapedString. This way C buffer arrays can be used as well.
454     StringProperty(aName, Span<const char>(aStr, SN));
455   }
456 
457   template <size_t NN, size_t SN>
StringProperty(const char (& aName)[NN],const char (& aStr)[SN])458   void StringProperty(const char (&aName)[NN], const char (&aStr)[SN]) {
459     // Keep null terminators from literal strings, will be removed by
460     // EscapedString. This way C buffer arrays can be used as well.
461     StringProperty(Span<const char>(aName, NN), Span<const char>(aStr, SN));
462   }
463 
464   // Prints: "<aStr>"
StringElement(const Span<const char> & aStr)465   void StringElement(const Span<const char>& aStr) {
466     StringProperty(scEmptyString, aStr);
467   }
468 
469   template <size_t N>
StringElement(const char (& aName)[N])470   void StringElement(const char (&aName)[N]) {
471     // Keep null terminator from literal strings, will be removed by
472     // EscapedString. This way C buffer arrays can be used as well.
473     StringElement(Span<const char>(aName, N));
474   }
475 
476   // Prints: "<aName>": [
477   void StartArrayProperty(const Span<const char>& aName,
478                           CollectionStyle aStyle = MultiLineStyle) {
479     StartCollection(aName, scArrayBeginString, aStyle);
480   }
481 
482   template <size_t N>
483   void StartArrayProperty(const char (&aName)[N],
484                           CollectionStyle aStyle = MultiLineStyle) {
485     // Keep null terminator from literal strings, will be removed by
486     // EscapedString. This way C buffer arrays can be used as well.
487     StartArrayProperty(Span<const char>(aName, N), aStyle);
488   }
489 
490   // Prints: [
491   void StartArrayElement(CollectionStyle aStyle = MultiLineStyle) {
492     StartArrayProperty(scEmptyString, aStyle);
493   }
494 
495   // Prints: ]
EndArray()496   void EndArray() { EndCollection(scArrayEndString); }
497 
498   // Prints: "<aName>": {
499   void StartObjectProperty(const Span<const char>& aName,
500                            CollectionStyle aStyle = MultiLineStyle) {
501     StartCollection(aName, scObjectBeginString, aStyle);
502   }
503 
504   template <size_t N>
505   void StartObjectProperty(const char (&aName)[N],
506                            CollectionStyle aStyle = MultiLineStyle) {
507     // Keep null terminator from literal strings, will be removed by
508     // EscapedString. This way C buffer arrays can be used as well.
509     StartObjectProperty(Span<const char>(aName, N), aStyle);
510   }
511 
512   // Prints: {
513   void StartObjectElement(CollectionStyle aStyle = MultiLineStyle) {
514     StartObjectProperty(scEmptyString, aStyle);
515   }
516 
517   // Prints: }
EndObject()518   void EndObject() { EndCollection(scObjectEndString); }
519 };
520 
521 }  // namespace mozilla
522 
523 #endif /* mozilla_JSONWriter_h */
524