1 /*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <folly/json.h>
18
19 #include <algorithm>
20 #include <functional>
21 #include <iterator>
22 #include <sstream>
23 #include <type_traits>
24
25 #include <boost/algorithm/string.hpp>
26 #include <glog/logging.h>
27
28 #include <folly/Conv.h>
29 #include <folly/Portability.h>
30 #include <folly/Range.h>
31 #include <folly/String.h>
32 #include <folly/Unicode.h>
33 #include <folly/Utility.h>
34 #include <folly/lang/Bits.h>
35 #include <folly/portability/Constexpr.h>
36
37 namespace folly {
38
39 //////////////////////////////////////////////////////////////////////
40
41 namespace json {
42
43 namespace {
44
make_parse_error(unsigned int line,std::string const & context,std::string const & expected)45 parse_error make_parse_error(
46 unsigned int line,
47 std::string const& context,
48 std::string const& expected) {
49 return parse_error(to<std::string>(
50 "json parse error on line ",
51 line,
52 !context.empty() ? to<std::string>(" near `", context, '\'') : "",
53 ": ",
54 expected));
55 }
56
57 struct Printer {
58 // Context class is allows to restore the path to element that we are about to
59 // print so that if error happens we can throw meaningful exception.
60 class Context {
61 public:
Context(const Context * parent_context,const dynamic & key)62 Context(const Context* parent_context, const dynamic& key)
63 : parent_context_(parent_context), key_(key), is_key_(false) {}
Context(const Context * parent_context,const dynamic & key,bool is_key)64 Context(const Context* parent_context, const dynamic& key, bool is_key)
65 : parent_context_(parent_context), key_(key), is_key_(is_key) {}
66
67 // Return location description of a context as a chain of keys
68 // ex., '"outherKey"->"innerKey"'.
locationDescription() const69 std::string locationDescription() const {
70 std::vector<std::string> keys;
71 const Context* ptr = parent_context_;
72 while (ptr) {
73 keys.push_back(ptr->getName());
74 ptr = ptr->parent_context_;
75 }
76 keys.push_back(getName());
77 std::ostringstream stream;
78 std::reverse_copy(
79 keys.begin(),
80 keys.end() - 1,
81 std::ostream_iterator<std::string>(stream, "->"));
82
83 // Add current key.
84 stream << keys.back();
85 return stream.str();
86 }
getName() const87 std::string getName() const {
88 return Printer::toStringOr(key_, "<unprintable>");
89 }
typeDescription() const90 std::string typeDescription() const { return is_key_ ? "key" : "value"; }
91
92 private:
93 const Context* const parent_context_;
94 const dynamic& key_;
95 bool is_key_;
96 };
97
Printerfolly::json::__anon3d94b9db0111::Printer98 explicit Printer(
99 std::string& out, unsigned* indentLevel, serialization_opts const* opts)
100 : out_(out), indentLevel_(indentLevel), opts_(*opts) {}
101
operator ()folly::json::__anon3d94b9db0111::Printer102 void operator()(dynamic const& v, const Context& context) const {
103 (*this)(v, &context);
104 }
operator ()folly::json::__anon3d94b9db0111::Printer105 void operator()(dynamic const& v, const Context* context) const {
106 switch (v.type()) {
107 case dynamic::DOUBLE:
108 if (!opts_.allow_nan_inf) {
109 if (std::isnan(v.asDouble())) {
110 throw json::print_error(
111 "folly::toJson: JSON object value was a NaN when serializing " +
112 contextDescription(context));
113 }
114 if (std::isinf(v.asDouble())) {
115 throw json::print_error(
116 "folly::toJson: JSON object value was an INF when serializing " +
117 contextDescription(context));
118 }
119 }
120 toAppend(
121 v.asDouble(), &out_, opts_.double_mode, opts_.double_num_digits);
122 break;
123 case dynamic::INT64: {
124 auto intval = v.asInt();
125 if (opts_.javascript_safe) {
126 // Use folly::to to check that this integer can be represented
127 // as a double without loss of precision.
128 intval = int64_t(to<double>(intval));
129 }
130 toAppend(intval, &out_);
131 break;
132 }
133 case dynamic::BOOL:
134 out_ += v.asBool() ? "true" : "false";
135 break;
136 case dynamic::NULLT:
137 out_ += "null";
138 break;
139 case dynamic::STRING:
140 escapeString(v.stringPiece(), out_, opts_);
141 break;
142 case dynamic::OBJECT:
143 printObject(v, context);
144 break;
145 case dynamic::ARRAY:
146 printArray(v, context);
147 break;
148 default:
149 CHECK(0) << "Bad type " << v.type();
150 }
151 }
152
153 private:
printKVfolly::json::__anon3d94b9db0111::Printer154 void printKV(
155 const std::pair<const dynamic, dynamic>& p,
156 const Context* context) const {
157 if (!opts_.allow_non_string_keys && !p.first.isString()) {
158 throw json::print_error(
159 "folly::toJson: JSON object key " +
160 toStringOr(p.first, "<unprintable key>") +
161 " was not a string when serializing key at " +
162 Context(context, p.first, true).locationDescription());
163 }
164 (*this)(p.first, Context(context, p.first, true)); // Key
165 mapColon();
166 (*this)(p.second, Context(context, p.first, false)); // Value
167 }
168
169 template <typename Iterator>
printKVPairsfolly::json::__anon3d94b9db0111::Printer170 void printKVPairs(
171 Iterator begin, Iterator end, const Context* context) const {
172 printKV(*begin, context);
173 for (++begin; begin != end; ++begin) {
174 out_ += ',';
175 newline();
176 printKV(*begin, context);
177 }
178 }
179
printObjectfolly::json::__anon3d94b9db0111::Printer180 void printObject(dynamic const& o, const Context* context) const {
181 if (o.empty()) {
182 out_ += "{}";
183 return;
184 }
185
186 out_ += '{';
187 indent();
188 newline();
189 if (opts_.sort_keys || opts_.sort_keys_by) {
190 using ref = std::reference_wrapper<decltype(o.items())::value_type const>;
191 auto sort_keys_by = [&](auto begin, auto end, const auto& comp) {
192 std::sort(begin, end, [&](ref a, ref b) {
193 // Only compare keys. No ordering among identical keys.
194 return comp(a.get().first, b.get().first);
195 });
196 };
197 std::vector<ref> refs(o.items().begin(), o.items().end());
198 if (opts_.sort_keys_by) {
199 sort_keys_by(refs.begin(), refs.end(), opts_.sort_keys_by);
200 } else {
201 sort_keys_by(refs.begin(), refs.end(), std::less<>());
202 }
203 printKVPairs(refs.cbegin(), refs.cend(), context);
204 } else {
205 printKVPairs(o.items().begin(), o.items().end(), context);
206 }
207 outdent();
208 newline();
209 out_ += '}';
210 }
211
toStringOrfolly::json::__anon3d94b9db0111::Printer212 static std::string toStringOr(dynamic const& v, const char* placeholder) {
213 try {
214 std::string result;
215 unsigned indentLevel = 0;
216 serialization_opts opts;
217 opts.allow_nan_inf = true;
218 opts.allow_non_string_keys = true;
219 Printer printer(result, &indentLevel, &opts);
220 printer(v, nullptr);
221 return result;
222 } catch (...) {
223 return placeholder;
224 }
225 }
226
contextDescriptionfolly::json::__anon3d94b9db0111::Printer227 static std::string contextDescription(const Context* context) {
228 if (!context) {
229 return "<undefined location>";
230 }
231 return context->typeDescription() + " at " + context->locationDescription();
232 }
233
printArrayfolly::json::__anon3d94b9db0111::Printer234 void printArray(dynamic const& a, const Context* context) const {
235 if (a.empty()) {
236 out_ += "[]";
237 return;
238 }
239
240 out_ += '[';
241 indent();
242 newline();
243 (*this)(a[0], Context(context, dynamic(0)));
244 for (auto it = std::next(a.begin()); it != a.end(); ++it) {
245 out_ += ',';
246 newline();
247 (*this)(*it, Context(context, dynamic(std::distance(a.begin(), it))));
248 }
249 outdent();
250 newline();
251 out_ += ']';
252 }
253
254 private:
outdentfolly::json::__anon3d94b9db0111::Printer255 void outdent() const {
256 if (indentLevel_) {
257 --*indentLevel_;
258 }
259 }
260
indentfolly::json::__anon3d94b9db0111::Printer261 void indent() const {
262 if (indentLevel_) {
263 ++*indentLevel_;
264 }
265 }
266
newlinefolly::json::__anon3d94b9db0111::Printer267 void newline() const {
268 if (indentLevel_) {
269 auto indent = *indentLevel_ * opts_.pretty_formatting_indent_width;
270 out_ += to<std::string>('\n', std::string(indent, ' '));
271 }
272 }
273
mapColonfolly::json::__anon3d94b9db0111::Printer274 void mapColon() const { out_ += indentLevel_ ? ": " : ":"; }
275
276 private:
277 std::string& out_;
278 unsigned* const indentLevel_;
279 serialization_opts const& opts_;
280 };
281
282 //////////////////////////////////////////////////////////////////////
283
284 // Wraps our input buffer with some helper functions.
285 struct Input {
Inputfolly::json::__anon3d94b9db0111::Input286 explicit Input(StringPiece range, json::serialization_opts const* opts)
287 : range_(range), opts_(*opts), lineNum_(0) {
288 storeCurrent();
289 }
290
291 Input(Input const&) = delete;
292 Input& operator=(Input const&) = delete;
293
beginfolly::json::__anon3d94b9db0111::Input294 char const* begin() const { return range_.begin(); }
295
getLineNumfolly::json::__anon3d94b9db0111::Input296 unsigned getLineNum() const { return lineNum_; }
297
298 // Parse ahead for as long as the supplied predicate is satisfied,
299 // returning a range of what was skipped.
300 template <class Predicate>
skipWhilefolly::json::__anon3d94b9db0111::Input301 StringPiece skipWhile(const Predicate& p) {
302 std::size_t skipped = 0;
303 for (; skipped < range_.size(); ++skipped) {
304 if (!p(range_[skipped])) {
305 break;
306 }
307 if (range_[skipped] == '\n') {
308 ++lineNum_;
309 }
310 }
311 auto ret = range_.subpiece(0, skipped);
312 range_.advance(skipped);
313 storeCurrent();
314 return ret;
315 }
316
skipDigitsfolly::json::__anon3d94b9db0111::Input317 StringPiece skipDigits() {
318 return skipWhile([](char c) { return c >= '0' && c <= '9'; });
319 }
320
skipMinusAndDigitsfolly::json::__anon3d94b9db0111::Input321 StringPiece skipMinusAndDigits() {
322 bool firstChar = true;
323 return skipWhile([&firstChar](char c) {
324 bool result = (c >= '0' && c <= '9') || (firstChar && c == '-');
325 firstChar = false;
326 return result;
327 });
328 }
329
skipWhitespacefolly::json::__anon3d94b9db0111::Input330 void skipWhitespace() {
331 unsigned index = 0;
332 while (true) {
333 while (index < range_.size() && range_[index] == ' ') {
334 index++;
335 }
336 if (index < range_.size()) {
337 if (range_[index] == '\n') {
338 index++;
339 ++lineNum_;
340 continue;
341 }
342 if (range_[index] == '\t' || range_[index] == '\r') {
343 index++;
344 continue;
345 }
346 }
347 break;
348 }
349 range_.advance(index);
350 storeCurrent();
351 }
352
expectfolly::json::__anon3d94b9db0111::Input353 void expect(char c) {
354 if (**this != c) {
355 throw json::make_parse_error(
356 lineNum_, context(), to<std::string>("expected '", c, '\''));
357 }
358 ++*this;
359 }
360
sizefolly::json::__anon3d94b9db0111::Input361 std::size_t size() const { return range_.size(); }
362
operator *folly::json::__anon3d94b9db0111::Input363 int operator*() const { return current_; }
364
operator ++folly::json::__anon3d94b9db0111::Input365 void operator++() {
366 range_.pop_front();
367 storeCurrent();
368 }
369
370 template <class T>
extractfolly::json::__anon3d94b9db0111::Input371 T extract() {
372 try {
373 return to<T>(&range_);
374 } catch (std::exception const& e) {
375 error(e.what());
376 }
377 }
378
consumefolly::json::__anon3d94b9db0111::Input379 bool consume(StringPiece str) {
380 if (boost::starts_with(range_, str)) {
381 range_.advance(str.size());
382 storeCurrent();
383 return true;
384 }
385 return false;
386 }
387
contextfolly::json::__anon3d94b9db0111::Input388 std::string context() const {
389 return range_.subpiece(0, 16 /* arbitrary */).toString();
390 }
391
errorfolly::json::__anon3d94b9db0111::Input392 dynamic error(char const* what) const {
393 throw json::make_parse_error(lineNum_, context(), what);
394 }
395
getOptsfolly::json::__anon3d94b9db0111::Input396 json::serialization_opts const& getOpts() { return opts_; }
397
incrementRecursionLevelfolly::json::__anon3d94b9db0111::Input398 void incrementRecursionLevel() {
399 if (currentRecursionLevel_ > opts_.recursion_limit) {
400 error("recursion limit exceeded");
401 }
402 currentRecursionLevel_++;
403 }
404
decrementRecursionLevelfolly::json::__anon3d94b9db0111::Input405 void decrementRecursionLevel() { currentRecursionLevel_--; }
406
407 private:
storeCurrentfolly::json::__anon3d94b9db0111::Input408 void storeCurrent() { current_ = range_.empty() ? EOF : range_.front(); }
409
410 private:
411 StringPiece range_;
412 json::serialization_opts const& opts_;
413 unsigned lineNum_;
414 int current_;
415 unsigned int currentRecursionLevel_{0};
416 };
417
418 class RecursionGuard {
419 public:
RecursionGuard(Input & in)420 explicit RecursionGuard(Input& in) : in_(in) {
421 in_.incrementRecursionLevel();
422 }
423
~RecursionGuard()424 ~RecursionGuard() { in_.decrementRecursionLevel(); }
425
426 private:
427 Input& in_;
428 };
429
430 dynamic parseValue(Input& in, json::metadata_map* map);
431 std::string parseString(Input& in);
432 dynamic parseNumber(Input& in);
433
434 template <class K>
parseObjectKeyValue(Input & in,dynamic & ret,K && key,json::metadata_map * map)435 void parseObjectKeyValue(
436 Input& in, dynamic& ret, K&& key, json::metadata_map* map) {
437 auto keyLineNumber = in.getLineNum();
438 in.skipWhitespace();
439 in.expect(':');
440 in.skipWhitespace();
441 K tmp;
442 if (map) {
443 tmp = K(key);
444 }
445 auto valueLineNumber = in.getLineNum();
446 ret.insert(std::forward<K>(key), parseValue(in, map));
447 if (map) {
448 auto val = ret.get_ptr(tmp);
449 // We just inserted it, so it should be there!
450 DCHECK(val != nullptr);
451 map->emplace(
452 val, json::parse_metadata{{{keyLineNumber}}, {{valueLineNumber}}});
453 }
454 }
455
parseObject(Input & in,json::metadata_map * map)456 dynamic parseObject(Input& in, json::metadata_map* map) {
457 DCHECK_EQ(*in, '{');
458 ++in;
459
460 dynamic ret = dynamic::object;
461
462 in.skipWhitespace();
463 if (*in == '}') {
464 ++in;
465 return ret;
466 }
467
468 for (;;) {
469 if (in.getOpts().allow_trailing_comma && *in == '}') {
470 break;
471 }
472 if (*in == '\"') { // string
473 auto key = parseString(in);
474 parseObjectKeyValue(in, ret, std::move(key), map);
475 } else if (!in.getOpts().allow_non_string_keys) {
476 in.error("expected string for object key name");
477 } else {
478 auto key = parseValue(in, map);
479 parseObjectKeyValue(in, ret, std::move(key), map);
480 }
481
482 in.skipWhitespace();
483 if (*in != ',') {
484 break;
485 }
486 ++in;
487 in.skipWhitespace();
488 }
489 in.expect('}');
490
491 return ret;
492 }
493
parseArray(Input & in,json::metadata_map * map)494 dynamic parseArray(Input& in, json::metadata_map* map) {
495 DCHECK_EQ(*in, '[');
496 ++in;
497
498 dynamic ret = dynamic::array;
499
500 in.skipWhitespace();
501 if (*in == ']') {
502 ++in;
503 return ret;
504 }
505
506 std::vector<uint32_t> lineNumbers;
507 for (;;) {
508 if (in.getOpts().allow_trailing_comma && *in == ']') {
509 break;
510 }
511 ret.push_back(parseValue(in, map));
512 if (map) {
513 lineNumbers.push_back(in.getLineNum());
514 }
515 in.skipWhitespace();
516 if (*in != ',') {
517 break;
518 }
519 ++in;
520 in.skipWhitespace();
521 }
522 if (map) {
523 for (size_t i = 0; i < ret.size(); i++) {
524 map->emplace(&ret[i], json::parse_metadata{{{0}}, {{lineNumbers[i]}}});
525 }
526 }
527 in.expect(']');
528
529 return ret;
530 }
531
parseNumber(Input & in)532 dynamic parseNumber(Input& in) {
533 bool const negative = (*in == '-');
534 if (negative && in.consume("-Infinity")) {
535 if (in.getOpts().parse_numbers_as_strings) {
536 return "-Infinity";
537 } else {
538 return -std::numeric_limits<double>::infinity();
539 }
540 }
541
542 auto integral = in.skipMinusAndDigits();
543 if (negative && integral.size() < 2) {
544 in.error("expected digits after `-'");
545 }
546
547 auto const wasE = *in == 'e' || *in == 'E';
548
549 constexpr const char* maxInt = "9223372036854775807";
550 constexpr const char* minInt = "-9223372036854775808";
551 constexpr auto maxIntLen = constexpr_strlen(maxInt);
552 constexpr auto minIntLen = constexpr_strlen(minInt);
553
554 if (*in != '.' && !wasE && in.getOpts().parse_numbers_as_strings) {
555 return integral;
556 }
557
558 if (*in != '.' && !wasE) {
559 if (LIKELY(!in.getOpts().double_fallback || integral.size() < maxIntLen) ||
560 (!negative && integral.size() == maxIntLen && integral <= maxInt) ||
561 (negative && integral.size() == minIntLen && integral <= minInt)) {
562 auto val = to<int64_t>(integral);
563 in.skipWhitespace();
564 return val;
565 } else {
566 auto val = to<double>(integral);
567 in.skipWhitespace();
568 return val;
569 }
570 }
571
572 auto end = !wasE ? (++in, in.skipDigits().end()) : in.begin();
573 if (*in == 'e' || *in == 'E') {
574 ++in;
575 if (*in == '+' || *in == '-') {
576 ++in;
577 }
578 auto expPart = in.skipDigits();
579 end = expPart.end();
580 }
581 auto fullNum = range(integral.begin(), end);
582 if (in.getOpts().parse_numbers_as_strings) {
583 return fullNum;
584 }
585 auto val = to<double>(fullNum);
586 return val;
587 }
588
decodeUnicodeEscape(Input & in)589 std::string decodeUnicodeEscape(Input& in) {
590 auto hexVal = [&](int c) -> uint16_t {
591 // clang-format off
592 return uint16_t(
593 c >= '0' && c <= '9' ? c - '0' :
594 c >= 'a' && c <= 'f' ? c - 'a' + 10 :
595 c >= 'A' && c <= 'F' ? c - 'A' + 10 :
596 (in.error("invalid hex digit"), 0));
597 // clang-format on
598 };
599
600 auto readHex = [&]() -> uint16_t {
601 if (in.size() < 4) {
602 in.error("expected 4 hex digits");
603 }
604
605 auto ret = uint16_t(hexVal(*in) * 4096);
606 ++in;
607 ret += hexVal(*in) * 256;
608 ++in;
609 ret += hexVal(*in) * 16;
610 ++in;
611 ret += hexVal(*in);
612 ++in;
613 return ret;
614 };
615
616 // If the value encoded is in the surrogate pair range, we need to make
617 // sure there is another escape that we can use also.
618 //
619 // See the explanation in folly/Unicode.h.
620 uint16_t prefix = readHex();
621 char32_t codePoint = prefix;
622 if (utf16_code_unit_is_high_surrogate(prefix)) {
623 if (!in.consume("\\u")) {
624 in.error(
625 "expected another unicode escape for second half of "
626 "surrogate pair");
627 }
628 uint16_t suffix = readHex();
629 if (!utf16_code_unit_is_low_surrogate(suffix)) {
630 in.error("second character in surrogate pair is invalid");
631 }
632 codePoint = unicode_code_point_from_utf16_surrogate_pair(prefix, suffix);
633 } else if (!utf16_code_unit_is_bmp(prefix)) {
634 in.error("invalid unicode code point (in range [0xdc00,0xdfff])");
635 }
636
637 return codePointToUtf8(codePoint);
638 }
639
parseString(Input & in)640 std::string parseString(Input& in) {
641 DCHECK_EQ(*in, '\"');
642 ++in;
643
644 std::string ret;
645 for (;;) {
646 auto range = in.skipWhile([](char c) { return c != '\"' && c != '\\'; });
647 ret.append(range.begin(), range.end());
648
649 if (*in == '\"') {
650 ++in;
651 break;
652 }
653 if (*in == '\\') {
654 ++in;
655 switch (*in) {
656 // clang-format off
657 case '\"': ret.push_back('\"'); ++in; break;
658 case '\\': ret.push_back('\\'); ++in; break;
659 case '/': ret.push_back('/'); ++in; break;
660 case 'b': ret.push_back('\b'); ++in; break;
661 case 'f': ret.push_back('\f'); ++in; break;
662 case 'n': ret.push_back('\n'); ++in; break;
663 case 'r': ret.push_back('\r'); ++in; break;
664 case 't': ret.push_back('\t'); ++in; break;
665 case 'u': ++in; ret += decodeUnicodeEscape(in); break;
666 // clang-format on
667 default:
668 in.error(
669 to<std::string>("unknown escape ", *in, " in string").c_str());
670 }
671 continue;
672 }
673 if (*in == EOF) {
674 in.error("unterminated string");
675 }
676 if (!*in) {
677 /*
678 * Apparently we're actually supposed to ban all control
679 * characters from strings. This seems unnecessarily
680 * restrictive, so we're only banning zero bytes. (Since the
681 * string is presumed to be UTF-8 encoded it's fine to just
682 * check this way.)
683 */
684 in.error("null byte in string");
685 }
686
687 ret.push_back(char(*in));
688 ++in;
689 }
690
691 return ret;
692 }
693
parseValue(Input & in,json::metadata_map * map)694 dynamic parseValue(Input& in, json::metadata_map* map) {
695 RecursionGuard guard(in);
696
697 in.skipWhitespace();
698 // clang-format off
699 return
700 *in == '[' ? parseArray(in, map) :
701 *in == '{' ? parseObject(in, map) :
702 *in == '\"' ? parseString(in) :
703 (*in == '-' || (*in >= '0' && *in <= '9')) ? parseNumber(in) :
704 in.consume("true") ? true :
705 in.consume("false") ? false :
706 in.consume("null") ? nullptr :
707 in.consume("Infinity") ?
708 (in.getOpts().parse_numbers_as_strings ? (dynamic)"Infinity" :
709 (dynamic)std::numeric_limits<double>::infinity()) :
710 in.consume("NaN") ?
711 (in.getOpts().parse_numbers_as_strings ? (dynamic)"NaN" :
712 (dynamic)std::numeric_limits<double>::quiet_NaN()) :
713 in.error("expected json value");
714 // clang-format on
715 }
716
717 } // namespace
718
719 //////////////////////////////////////////////////////////////////////
720
buildExtraAsciiToEscapeBitmap(StringPiece chars)721 std::array<uint64_t, 2> buildExtraAsciiToEscapeBitmap(StringPiece chars) {
722 std::array<uint64_t, 2> escapes{{0, 0}};
723 for (auto b : ByteRange(chars)) {
724 if (b >= 0x20 && b < 0x80) {
725 escapes[b / 64] |= uint64_t(1) << (b % 64);
726 }
727 }
728 return escapes;
729 }
730
serialize(dynamic const & dyn,serialization_opts const & opts)731 std::string serialize(dynamic const& dyn, serialization_opts const& opts) {
732 std::string ret;
733 unsigned indentLevel = 0;
734 Printer p(ret, opts.pretty_formatting ? &indentLevel : nullptr, &opts);
735 p(dyn, nullptr);
736 return ret;
737 }
738
739 // Fast path to determine the longest prefix that can be left
740 // unescaped in a string of sizeof(T) bytes packed in an integer of
741 // type T.
742 template <bool EnableExtraAsciiEscapes, class T>
firstEscapableInWord(T s,const serialization_opts & opts)743 size_t firstEscapableInWord(T s, const serialization_opts& opts) {
744 static_assert(std::is_unsigned<T>::value, "Unsigned integer required");
745 static constexpr T kOnes = ~T() / 255; // 0x...0101
746 static constexpr T kMsbs = kOnes * 0x80; // 0x...8080
747
748 // Sets the MSB of bytes < b. Precondition: b < 128.
749 auto isLess = [](T w, uint8_t b) {
750 // A byte is < b iff subtracting b underflows, so we check that
751 // the MSB wasn't set before and it's set after the subtraction.
752 return (w - kOnes * b) & ~w & kMsbs;
753 };
754
755 auto isChar = [&](uint8_t c) {
756 // A byte is == c iff it is 0 if xor'd with c.
757 return isLess(s ^ (kOnes * c), 1);
758 };
759
760 // The following masks have the MSB set for each byte of the word
761 // that satisfies the corresponding condition.
762 auto isHigh = s & kMsbs; // >= 128
763 auto isLow = isLess(s, 0x20); // <= 0x1f
764 auto needsEscape = isHigh | isLow | isChar('\\') | isChar('"');
765
766 if /* constexpr */ (EnableExtraAsciiEscapes) {
767 // Deal with optional bitmap for unicode escapes. Escapes can optionally be
768 // set for ascii characters 32 - 127, so the inner loop may run up to 96
769 // times. However, for the case where 0 or a handful of bits are set,
770 // looping will be minimal through use of findFirstSet.
771 for (size_t i = 0; i < opts.extra_ascii_to_escape_bitmap.size(); ++i) {
772 const auto offset = i * 64;
773 // Clear first 32 characters if this is the first index, since those are
774 // always escaped.
775 auto bitmap = opts.extra_ascii_to_escape_bitmap[i] &
776 (i == 0 ? uint64_t(-1) << 32 : ~0UL);
777 while (bitmap) {
778 auto bit = folly::findFirstSet(bitmap);
779 needsEscape |= isChar(static_cast<uint8_t>(offset + bit - 1));
780 bitmap &= bitmap - 1;
781 }
782 }
783 }
784
785 if (!needsEscape) {
786 return sizeof(T);
787 }
788
789 if (folly::kIsLittleEndian) {
790 return folly::findFirstSet(needsEscape) / 8 - 1;
791 } else {
792 return sizeof(T) - folly::findLastSet(needsEscape) / 8;
793 }
794 }
795
796 // Escape a string so that it is legal to print it in JSON text.
797 template <bool EnableExtraAsciiEscapes>
escapeStringImpl(StringPiece input,std::string & out,const serialization_opts & opts)798 void escapeStringImpl(
799 StringPiece input, std::string& out, const serialization_opts& opts) {
800 auto hexDigit = [](uint8_t c) -> char {
801 return c < 10 ? c + '0' : c - 10 + 'a';
802 };
803
804 out.push_back('\"');
805
806 auto* p = reinterpret_cast<const unsigned char*>(input.begin());
807 auto* q = reinterpret_cast<const unsigned char*>(input.begin());
808 auto* e = reinterpret_cast<const unsigned char*>(input.end());
809
810 while (p < e) {
811 // Find the longest prefix that does not need escaping, and copy
812 // it literally into the output string.
813 auto firstEsc = p;
814 while (firstEsc < e) {
815 auto avail = to_unsigned(e - firstEsc);
816 uint64_t word = 0;
817 if (avail >= 8) {
818 word = folly::loadUnaligned<uint64_t>(firstEsc);
819 } else {
820 word = folly::partialLoadUnaligned<uint64_t>(firstEsc, avail);
821 }
822 auto prefix = firstEscapableInWord<EnableExtraAsciiEscapes>(word, opts);
823 DCHECK_LE(prefix, avail);
824 firstEsc += prefix;
825 if (prefix < 8) {
826 break;
827 }
828 }
829 if (firstEsc > p) {
830 out.append(reinterpret_cast<const char*>(p), firstEsc - p);
831 p = firstEsc;
832 // We can't be in the middle of a multibyte sequence, so we can reset q.
833 q = p;
834 if (p == e) {
835 break;
836 }
837 }
838
839 // Handle the next byte that may need escaping.
840
841 // Since non-ascii encoding inherently does utf8 validation
842 // we explicitly validate utf8 only if non-ascii encoding is disabled.
843 if ((opts.validate_utf8 || opts.skip_invalid_utf8) &&
844 !opts.encode_non_ascii) {
845 // To achieve better spatial and temporal coherence
846 // we do utf8 validation progressively along with the
847 // string-escaping instead of two separate passes.
848
849 // As the encoding progresses, q will stay at or ahead of p.
850 CHECK_GE(q, p);
851
852 // As p catches up with q, move q forward.
853 if (q == p) {
854 // calling utf8_decode has the side effect of
855 // checking that utf8 encodings are valid
856 char32_t v = utf8ToCodePoint(q, e, opts.skip_invalid_utf8);
857 if (opts.skip_invalid_utf8 && v == U'\ufffd') {
858 out.append(reinterpret_cast<const char*>(u8"\ufffd"));
859 p = q;
860 continue;
861 }
862 }
863 }
864
865 auto encodeUnicode = opts.encode_non_ascii && (*p & 0x80);
866 if /* constexpr */ (EnableExtraAsciiEscapes) {
867 encodeUnicode = encodeUnicode ||
868 (*p >= 0x20 && *p < 0x80 &&
869 (opts.extra_ascii_to_escape_bitmap[*p / 64] &
870 (uint64_t(1) << (*p % 64))));
871 }
872
873 if (encodeUnicode) {
874 // note that this if condition captures utf8 chars
875 // with value > 127, so size > 1 byte (or they are whitelisted for
876 // Unicode encoding).
877 // NOTE: char32_t / char16_t are both unsigned.
878 char32_t cp = utf8ToCodePoint(p, e, opts.skip_invalid_utf8);
879 auto writeHex = [&](char16_t v) {
880 char buf[] = "\\u\0\0\0\0";
881 buf[2] = hexDigit((v >> 12) & 0x0f);
882 buf[3] = hexDigit((v >> 8) & 0x0f);
883 buf[4] = hexDigit((v >> 4) & 0x0f);
884 buf[5] = hexDigit(v & 0x0f);
885 out.append(buf, 6);
886 };
887 // From the ECMA-404 The JSON Data Interchange Syntax 2nd Edition Dec 2017
888 if (cp < 0x10000u) {
889 // If the code point is in the Basic Multilingual Plane (U+0000 through
890 // U+FFFF), then it may be represented as a six-character sequence:
891 // a reverse solidus, followed by the lowercase letter u, followed by
892 // four hexadecimal digits that encode the code point.
893 writeHex(static_cast<char16_t>(cp));
894 } else {
895 // To escape a code point that is not in the Basic Multilingual Plane,
896 // the character may be represented as a twelve-character sequence,
897 // encoding the UTF-16 surrogate pair corresponding to the code point.
898 writeHex(static_cast<char16_t>(
899 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu)));
900 writeHex(static_cast<char16_t>(0xdc00u + ((cp - 0x10000u) & 0x3ffu)));
901 }
902 } else if (*p == '\\' || *p == '\"') {
903 char buf[] = "\\\0";
904 buf[1] = char(*p++);
905 out.append(buf, 2);
906 } else if (*p <= 0x1f) {
907 switch (*p) {
908 // clang-format off
909 case '\b': out.append("\\b"); p++; break;
910 case '\f': out.append("\\f"); p++; break;
911 case '\n': out.append("\\n"); p++; break;
912 case '\r': out.append("\\r"); p++; break;
913 case '\t': out.append("\\t"); p++; break;
914 // clang-format on
915 default:
916 // Note that this if condition captures non readable chars
917 // with value < 32, so size = 1 byte (e.g control chars).
918 char buf[] = "\\u00\0\0";
919 buf[4] = hexDigit(uint8_t((*p & 0xf0) >> 4));
920 buf[5] = hexDigit(uint8_t(*p & 0xf));
921 out.append(buf, 6);
922 p++;
923 }
924 } else {
925 out.push_back(char(*p++));
926 }
927 }
928
929 out.push_back('\"');
930 }
931
escapeString(StringPiece input,std::string & out,const serialization_opts & opts)932 void escapeString(
933 StringPiece input, std::string& out, const serialization_opts& opts) {
934 if (FOLLY_UNLIKELY(
935 opts.extra_ascii_to_escape_bitmap[0] ||
936 opts.extra_ascii_to_escape_bitmap[1])) {
937 escapeStringImpl<true>(input, out, opts);
938 } else {
939 escapeStringImpl<false>(input, out, opts);
940 }
941 }
942
stripComments(StringPiece jsonC)943 std::string stripComments(StringPiece jsonC) {
944 std::string result;
945 enum class State {
946 None,
947 InString,
948 InlineComment,
949 LineComment
950 } state = State::None;
951
952 for (size_t i = 0; i < jsonC.size(); ++i) {
953 auto s = jsonC.subpiece(i);
954 switch (state) {
955 case State::None:
956 if (s.startsWith("/*")) {
957 state = State::InlineComment;
958 ++i;
959 continue;
960 } else if (s.startsWith("//")) {
961 state = State::LineComment;
962 ++i;
963 continue;
964 } else if (s[0] == '\"') {
965 state = State::InString;
966 }
967 result.push_back(s[0]);
968 break;
969 case State::InString:
970 if (s[0] == '\\') {
971 if (UNLIKELY(s.size() == 1)) {
972 throw std::logic_error("Invalid JSONC: string is not terminated");
973 }
974 result.push_back(s[0]);
975 result.push_back(s[1]);
976 ++i;
977 continue;
978 } else if (s[0] == '\"') {
979 state = State::None;
980 }
981 result.push_back(s[0]);
982 break;
983 case State::InlineComment:
984 if (s.startsWith("*/")) {
985 state = State::None;
986 ++i;
987 }
988 break;
989 case State::LineComment:
990 if (s[0] == '\n') {
991 // skip the line break. It doesn't matter.
992 state = State::None;
993 }
994 break;
995 default:
996 throw std::logic_error("Unknown comment state");
997 }
998 }
999 return result;
1000 }
1001
1002 } // namespace json
1003
1004 //////////////////////////////////////////////////////////////////////
1005
parseJsonWithMetadata(StringPiece range,json::metadata_map * map)1006 dynamic parseJsonWithMetadata(StringPiece range, json::metadata_map* map) {
1007 return parseJsonWithMetadata(range, json::serialization_opts(), map);
1008 }
1009
parseJsonWithMetadata(StringPiece range,json::serialization_opts const & opts,json::metadata_map * map)1010 dynamic parseJsonWithMetadata(
1011 StringPiece range,
1012 json::serialization_opts const& opts,
1013 json::metadata_map* map) {
1014 json::Input in(range, &opts);
1015
1016 uint32_t n = in.getLineNum();
1017 auto ret = parseValue(in, map);
1018 if (map) {
1019 map->emplace(&ret, json::parse_metadata{{{0}}, {{n}}});
1020 }
1021
1022 in.skipWhitespace();
1023 if (in.size() && *in != '\0') {
1024 in.error("parsing didn't consume all input");
1025 }
1026 return ret;
1027 }
1028
parseJson(StringPiece range)1029 dynamic parseJson(StringPiece range) {
1030 return parseJson(range, json::serialization_opts());
1031 }
1032
parseJson(StringPiece range,json::serialization_opts const & opts)1033 dynamic parseJson(StringPiece range, json::serialization_opts const& opts) {
1034 json::Input in(range, &opts);
1035
1036 auto ret = parseValue(in, nullptr);
1037 in.skipWhitespace();
1038 if (in.size() && *in != '\0') {
1039 in.error("parsing didn't consume all input");
1040 }
1041 return ret;
1042 }
1043
toJson(dynamic const & dyn)1044 std::string toJson(dynamic const& dyn) {
1045 return json::serialize(dyn, json::serialization_opts());
1046 }
1047
toPrettyJson(dynamic const & dyn)1048 std::string toPrettyJson(dynamic const& dyn) {
1049 json::serialization_opts opts;
1050 opts.pretty_formatting = true;
1051 opts.sort_keys = true;
1052 return json::serialize(dyn, opts);
1053 }
1054
1055 //////////////////////////////////////////////////////////////////////
1056 // dynamic::print_as_pseudo_json() is implemented here for header
1057 // ordering reasons (most of the dynamic implementation is in
1058 // dynamic-inl.h, which we don't want to include json.h).
1059
print_as_pseudo_json(std::ostream & out) const1060 void dynamic::print_as_pseudo_json(std::ostream& out) const {
1061 json::serialization_opts opts;
1062 opts.allow_non_string_keys = true;
1063 opts.allow_nan_inf = true;
1064 out << json::serialize(*this, opts);
1065 }
1066
PrintTo(const dynamic & dyn,std::ostream * os)1067 void PrintTo(const dynamic& dyn, std::ostream* os) {
1068 json::serialization_opts opts;
1069 opts.allow_nan_inf = true;
1070 opts.allow_non_string_keys = true;
1071 opts.pretty_formatting = true;
1072 opts.sort_keys = true;
1073 *os << json::serialize(dyn, opts);
1074 }
1075
1076 //////////////////////////////////////////////////////////////////////
1077
1078 } // namespace folly
1079