1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "json.h"
6 
7 #include <array>
8 #include <clocale>
9 #include <cmath>
10 #include <cstdlib>
11 #include <cstring>
12 #include <iomanip>
13 #include <iostream>
14 #include <sstream>
15 #include <string>
16 
17 #include "cbor.h"
18 #include "parser_handler.h"
19 #include "span.h"
20 #include "status.h"
21 #include "status_test_support.h"
22 #include "test_platform.h"
23 
24 namespace crdtp {
25 namespace json {
26 // =============================================================================
27 // json::NewJSONEncoder - for encoding streaming parser events as JSON
28 // =============================================================================
29 
WriteUTF8AsUTF16(ParserHandler * writer,const std::string & utf8)30 void WriteUTF8AsUTF16(ParserHandler* writer, const std::string& utf8) {
31   writer->HandleString16(SpanFrom(UTF8ToUTF16(SpanFrom(utf8))));
32 }
33 
TEST(JsonEncoder,OverlongEncodings)34 TEST(JsonEncoder, OverlongEncodings) {
35   std::string out;
36   Status status;
37   std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
38 
39   // We encode 0x7f, which is the DEL ascii character, as a 4 byte UTF8
40   // sequence. This is called an overlong encoding, because only 1 byte
41   // is needed to represent 0x7f as UTF8.
42   std::vector<uint8_t> chars = {
43       0xf0,  // Starts 4 byte utf8 sequence
44       0x80,  // continuation byte
45       0x81,  // continuation byte w/ payload bit 7 set to 1.
46       0xbf,  // continuation byte w/ payload bits 0-6 set to 11111.
47   };
48   writer->HandleString8(SpanFrom(chars));
49   EXPECT_EQ("\"\"", out);  // Empty string means that 0x7f was rejected (good).
50 }
51 
TEST(JsonEncoder,NotAContinuationByte)52 TEST(JsonEncoder, NotAContinuationByte) {
53   std::string out;
54   Status status;
55   std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
56 
57   // |world| encodes the globe as a 4 byte UTF8 sequence. So, naturally, it'll
58   // have a start byte, followed by three continuation bytes.
59   std::string world = "��";
60   ASSERT_EQ(4u, world.size());
61   ASSERT_EQ(world[1] & 0xc0, 0x80);  // checks for continuation byte
62   ASSERT_EQ(world[2] & 0xc0, 0x80);
63   ASSERT_EQ(world[3] & 0xc0, 0x80);
64 
65   // Now create a corrupted UTF8 string, starting with the first two bytes from
66   // |world|, followed by an ASCII message. Upon encountering '!', our decoder
67   // will realize that it's not a continuation byte; it'll skip to the end of
68   // this UTF8 sequence and continue with the next character. In this case, the
69   // 'H', of "Hello".
70   std::vector<uint8_t> chars;
71   chars.push_back(world[0]);
72   chars.push_back(world[1]);
73   chars.push_back('!');
74   chars.push_back('?');
75   chars.push_back('H');
76   chars.push_back('e');
77   chars.push_back('l');
78   chars.push_back('l');
79   chars.push_back('o');
80   writer->HandleString8(SpanFrom(chars));
81   EXPECT_EQ("\"Hello\"", out);  // "Hello" shows we restarted at 'H'.
82 }
83 
TEST(JsonEncoder,EscapesLoneHighSurrogates)84 TEST(JsonEncoder, EscapesLoneHighSurrogates) {
85   // This tests that the JSON encoder escapes lone high surrogates, i.e.
86   // invalid code points in the range from 0xD800 to 0xDBFF. In
87   // unescaped form, these cannot be represented in well-formed UTF-8 or
88   // UTF-16.
89   std::vector<uint16_t> chars = {'a', 0xd800, 'b', 0xdada, 'c', 0xdbff, 'd'};
90   std::string out;
91   Status status;
92   std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
93   writer->HandleString16(span<uint16_t>(chars.data(), chars.size()));
94   EXPECT_EQ("\"a\\ud800b\\udadac\\udbffd\"", out);
95 }
96 
TEST(JsonEncoder,EscapesLoneLowSurrogates)97 TEST(JsonEncoder, EscapesLoneLowSurrogates) {
98   // This tests that the JSON encoder escapes lone low surrogates, i.e.
99   // invalid code points in the range from 0xDC00 to 0xDFFF. In
100   // unescaped form, these cannot be represented in well-formed UTF-8 or
101   // UTF-16.
102   std::vector<uint16_t> chars = {'a', 0xdc00, 'b', 0xdede, 'c', 0xdfff, 'd'};
103   std::string out;
104   Status status;
105   std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
106   writer->HandleString16(span<uint16_t>(chars.data(), chars.size()));
107   EXPECT_EQ("\"a\\udc00b\\udedec\\udfffd\"", out);
108 }
109 
TEST(JsonEncoder,EscapesFFFF)110 TEST(JsonEncoder, EscapesFFFF) {
111   // This tests that the JSON encoder will escape the UTF16 input 0xffff as
112   // \uffff; useful to check this since it's an edge case.
113   std::vector<uint16_t> chars = {'a', 'b', 'c', 0xffff, 'd'};
114   std::string out;
115   Status status;
116   std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
117   writer->HandleString16(span<uint16_t>(chars.data(), chars.size()));
118   EXPECT_EQ("\"abc\\uffffd\"", out);
119 }
120 
TEST(JsonEncoder,IncompleteUtf8Sequence)121 TEST(JsonEncoder, IncompleteUtf8Sequence) {
122   std::string out;
123   Status status;
124   std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
125 
126   writer->HandleArrayBegin();  // This emits [, which starts an array.
127 
128   {  // �� takes four bytes to encode in UTF-8. We test with the first three;
129     // This means we're trying to emit a string that consists solely of an
130     // incomplete UTF-8 sequence. So the string in the JSON output is empty.
131     std::string world_utf8 = "��";
132     ASSERT_EQ(4u, world_utf8.size());
133     std::vector<uint8_t> chars(world_utf8.begin(), world_utf8.begin() + 3);
134     writer->HandleString8(SpanFrom(chars));
135     EXPECT_EQ("[\"\"", out);  // Incomplete sequence rejected: empty string.
136   }
137 
138   {  // This time, the incomplete sequence is at the end of the string.
139     std::string msg = "Hello, \xF0\x9F\x8C";
140     std::vector<uint8_t> chars(msg.begin(), msg.end());
141     writer->HandleString8(SpanFrom(chars));
142     EXPECT_EQ("[\"\",\"Hello, \"", out);  // Incomplete sequence dropped at end.
143   }
144 }
145 
TEST(JsonStdStringWriterTest,HelloWorld)146 TEST(JsonStdStringWriterTest, HelloWorld) {
147   std::string out;
148   Status status;
149   std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
150   writer->HandleMapBegin();
151   WriteUTF8AsUTF16(writer.get(), "msg1");
152   WriteUTF8AsUTF16(writer.get(), "Hello, ��.");
153   std::string key = "msg1-as-utf8";
154   std::string value = "Hello, ��.";
155   writer->HandleString8(SpanFrom(key));
156   writer->HandleString8(SpanFrom(value));
157   WriteUTF8AsUTF16(writer.get(), "msg2");
158   WriteUTF8AsUTF16(writer.get(), "\\\b\r\n\t\f\"");
159   WriteUTF8AsUTF16(writer.get(), "nested");
160   writer->HandleMapBegin();
161   WriteUTF8AsUTF16(writer.get(), "double");
162   writer->HandleDouble(3.1415);
163   WriteUTF8AsUTF16(writer.get(), "int");
164   writer->HandleInt32(-42);
165   WriteUTF8AsUTF16(writer.get(), "bool");
166   writer->HandleBool(false);
167   WriteUTF8AsUTF16(writer.get(), "null");
168   writer->HandleNull();
169   writer->HandleMapEnd();
170   WriteUTF8AsUTF16(writer.get(), "array");
171   writer->HandleArrayBegin();
172   writer->HandleInt32(1);
173   writer->HandleInt32(2);
174   writer->HandleInt32(3);
175   writer->HandleArrayEnd();
176   writer->HandleMapEnd();
177   EXPECT_TRUE(status.ok());
178   EXPECT_EQ(
179       "{\"msg1\":\"Hello, \\ud83c\\udf0e.\","
180       "\"msg1-as-utf8\":\"Hello, \\ud83c\\udf0e.\","
181       "\"msg2\":\"\\\\\\b\\r\\n\\t\\f\\\"\","
182       "\"nested\":{\"double\":3.1415,\"int\":-42,"
183       "\"bool\":false,\"null\":null},\"array\":[1,2,3]}",
184       out);
185 }
186 
TEST(JsonStdStringWriterTest,ScalarsAreRenderedAsInt)187 TEST(JsonStdStringWriterTest, ScalarsAreRenderedAsInt) {
188   // Test that Number.MIN_SAFE_INTEGER / Number.MAX_SAFE_INTEGER from Javascript
189   // are rendered as integers (no decimal point / rounding), even when we
190   // encode them from double. Javascript's Number is an IEE754 double, so
191   // it has 53 bits to represent integers.
192   std::string out;
193   Status status;
194   std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
195   writer->HandleMapBegin();
196 
197   writer->HandleString8(SpanFrom("Number.MIN_SAFE_INTEGER"));
198   EXPECT_EQ(-0x1fffffffffffff, -9007199254740991);  // 53 bits for integers.
199   writer->HandleDouble(-9007199254740991);          // Note HandleDouble here.
200 
201   writer->HandleString8(SpanFrom("Number.MAX_SAFE_INTEGER"));
202   EXPECT_EQ(0x1fffffffffffff, 9007199254740991);  // 53 bits for integers.
203   writer->HandleDouble(9007199254740991);         // Note HandleDouble here.
204 
205   writer->HandleMapEnd();
206   EXPECT_TRUE(status.ok());
207   EXPECT_EQ(
208       "{\"Number.MIN_SAFE_INTEGER\":-9007199254740991,"
209       "\"Number.MAX_SAFE_INTEGER\":9007199254740991}",
210       out);
211 }
212 
TEST(JsonStdStringWriterTest,RepresentingNonFiniteValuesAsNull)213 TEST(JsonStdStringWriterTest, RepresentingNonFiniteValuesAsNull) {
214   // JSON can't represent +Infinity, -Infinity, or NaN.
215   // So in practice it's mapped to null.
216   std::string out;
217   Status status;
218   std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
219   writer->HandleMapBegin();
220   writer->HandleString8(SpanFrom("Infinity"));
221   writer->HandleDouble(std::numeric_limits<double>::infinity());
222   writer->HandleString8(SpanFrom("-Infinity"));
223   writer->HandleDouble(-std::numeric_limits<double>::infinity());
224   writer->HandleString8(SpanFrom("NaN"));
225   writer->HandleDouble(std::numeric_limits<double>::quiet_NaN());
226   writer->HandleMapEnd();
227   EXPECT_TRUE(status.ok());
228   EXPECT_EQ("{\"Infinity\":null,\"-Infinity\":null,\"NaN\":null}", out);
229 }
230 
TEST(JsonStdStringWriterTest,BinaryEncodedAsJsonString)231 TEST(JsonStdStringWriterTest, BinaryEncodedAsJsonString) {
232   // The encoder emits binary submitted to ParserHandler::HandleBinary
233   // as base64. The following three examples are taken from
234   // https://en.wikipedia.org/wiki/Base64.
235   {
236     std::string out;
237     Status status;
238     std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
239     writer->HandleBinary(SpanFrom(std::vector<uint8_t>({'M', 'a', 'n'})));
240     EXPECT_TRUE(status.ok());
241     EXPECT_EQ("\"TWFu\"", out);
242   }
243   {
244     std::string out;
245     Status status;
246     std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
247     writer->HandleBinary(SpanFrom(std::vector<uint8_t>({'M', 'a'})));
248     EXPECT_TRUE(status.ok());
249     EXPECT_EQ("\"TWE=\"", out);
250   }
251   {
252     std::string out;
253     Status status;
254     std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
255     writer->HandleBinary(SpanFrom(std::vector<uint8_t>({'M'})));
256     EXPECT_TRUE(status.ok());
257     EXPECT_EQ("\"TQ==\"", out);
258   }
259   {  // "Hello, world.", verified with base64decode.org.
260     std::string out;
261     Status status;
262     std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
263     writer->HandleBinary(SpanFrom(std::vector<uint8_t>(
264         {'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '.'})));
265     EXPECT_TRUE(status.ok());
266     EXPECT_EQ("\"SGVsbG8sIHdvcmxkLg==\"", out);
267   }
268 }
269 
TEST(JsonStdStringWriterTest,HandlesErrors)270 TEST(JsonStdStringWriterTest, HandlesErrors) {
271   // When an error is sent via HandleError, it saves it in the provided
272   // status and clears the output.
273   std::string out;
274   Status status;
275   std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
276   writer->HandleMapBegin();
277   WriteUTF8AsUTF16(writer.get(), "msg1");
278   writer->HandleError(Status{Error::JSON_PARSER_VALUE_EXPECTED, 42});
279   EXPECT_THAT(status, StatusIs(Error::JSON_PARSER_VALUE_EXPECTED, 42u));
280   EXPECT_EQ("", out);
281 }
282 
TEST(JsonStdStringWriterTest,DoubleToString_LeadingZero)283 TEST(JsonStdStringWriterTest, DoubleToString_LeadingZero) {
284   // In JSON, .1 must be rendered as 0.1, and -.7 must be rendered as -0.7.
285   std::string out;
286   Status status;
287   std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
288   writer->HandleArrayBegin();
289   writer->HandleDouble(.1);
290   writer->HandleDouble(-.7);
291   writer->HandleArrayEnd();
292   EXPECT_EQ("[0.1,-0.7]", out);
293 }
294 
295 // =============================================================================
296 // json::ParseJSON - for receiving streaming parser events for JSON
297 // =============================================================================
298 
299 class Log : public ParserHandler {
300  public:
HandleMapBegin()301   void HandleMapBegin() override { log_ << "map begin\n"; }
302 
HandleMapEnd()303   void HandleMapEnd() override { log_ << "map end\n"; }
304 
HandleArrayBegin()305   void HandleArrayBegin() override { log_ << "array begin\n"; }
306 
HandleArrayEnd()307   void HandleArrayEnd() override { log_ << "array end\n"; }
308 
HandleString8(span<uint8_t> chars)309   void HandleString8(span<uint8_t> chars) override {
310     log_ << "string8: " << std::string(chars.begin(), chars.end()) << "\n";
311   }
312 
HandleString16(span<uint16_t> chars)313   void HandleString16(span<uint16_t> chars) override {
314     raw_log_string16_.emplace_back(chars.begin(), chars.end());
315     log_ << "string16: " << UTF16ToUTF8(chars) << "\n";
316   }
317 
HandleBinary(span<uint8_t> bytes)318   void HandleBinary(span<uint8_t> bytes) override {
319     // JSON doesn't have native support for arbitrary bytes, so our parser will
320     // never call this.
321     CHECK(false);
322   }
323 
HandleDouble(double value)324   void HandleDouble(double value) override {
325     log_ << "double: " << value << "\n";
326   }
327 
HandleInt32(int32_t value)328   void HandleInt32(int32_t value) override { log_ << "int: " << value << "\n"; }
329 
HandleBool(bool value)330   void HandleBool(bool value) override { log_ << "bool: " << value << "\n"; }
331 
HandleNull()332   void HandleNull() override { log_ << "null\n"; }
333 
HandleError(Status status)334   void HandleError(Status status) override { status_ = status; }
335 
str() const336   std::string str() const { return status_.ok() ? log_.str() : ""; }
337 
raw_log_string16() const338   std::vector<std::vector<uint16_t>> raw_log_string16() const {
339     return raw_log_string16_;
340   }
341 
status() const342   Status status() const { return status_; }
343 
344  private:
345   std::ostringstream log_;
346   std::vector<std::vector<uint16_t>> raw_log_string16_;
347   Status status_;
348 };
349 
350 class JsonParserTest : public ::testing::Test {
351  protected:
352   Log log_;
353 };
354 
TEST_F(JsonParserTest,SimpleDictionary)355 TEST_F(JsonParserTest, SimpleDictionary) {
356   std::string json = "{\"foo\": 42}";
357   ParseJSON(SpanFrom(json), &log_);
358   EXPECT_TRUE(log_.status().ok());
359   EXPECT_EQ(
360       "map begin\n"
361       "string16: foo\n"
362       "int: 42\n"
363       "map end\n",
364       log_.str());
365 }
366 
TEST_F(JsonParserTest,UsAsciiDelCornerCase)367 TEST_F(JsonParserTest, UsAsciiDelCornerCase) {
368   // DEL (0x7f) is a 7 bit US-ASCII character, and while it is a control
369   // character according to Unicode, it's not considered a control
370   // character in https://tools.ietf.org/html/rfc7159#section-7, so
371   // it can be placed directly into the JSON string, without JSON escaping.
372   std::string json = "{\"foo\": \"a\x7f\"}";
373   ParseJSON(SpanFrom(json), &log_);
374   EXPECT_TRUE(log_.status().ok());
375   EXPECT_EQ(
376       "map begin\n"
377       "string16: foo\n"
378       "string16: a\x7f\n"
379       "map end\n",
380       log_.str());
381 
382   // We've seen an implementation of UTF16ToUTF8 which would replace the DEL
383   // character with ' ', so this simple roundtrip tests the routines in
384   // encoding_test_helper.h, to make test failures of the above easier to
385   // diagnose.
386   std::vector<uint16_t> utf16 = UTF8ToUTF16(SpanFrom(json));
387   EXPECT_EQ(json, UTF16ToUTF8(SpanFrom(utf16)));
388 }
389 
TEST_F(JsonParserTest,Whitespace)390 TEST_F(JsonParserTest, Whitespace) {
391   std::string json = "\n  {\n\"msg\"\n: \v\"Hello, world.\"\t\r}\t";
392   ParseJSON(SpanFrom(json), &log_);
393   EXPECT_TRUE(log_.status().ok());
394   EXPECT_EQ(
395       "map begin\n"
396       "string16: msg\n"
397       "string16: Hello, world.\n"
398       "map end\n",
399       log_.str());
400 }
401 
TEST_F(JsonParserTest,NestedDictionary)402 TEST_F(JsonParserTest, NestedDictionary) {
403   std::string json = "{\"foo\": {\"bar\": {\"baz\": 1}, \"bar2\": 2}}";
404   ParseJSON(SpanFrom(json), &log_);
405   EXPECT_TRUE(log_.status().ok());
406   EXPECT_EQ(
407       "map begin\n"
408       "string16: foo\n"
409       "map begin\n"
410       "string16: bar\n"
411       "map begin\n"
412       "string16: baz\n"
413       "int: 1\n"
414       "map end\n"
415       "string16: bar2\n"
416       "int: 2\n"
417       "map end\n"
418       "map end\n",
419       log_.str());
420 }
421 
TEST_F(JsonParserTest,Doubles)422 TEST_F(JsonParserTest, Doubles) {
423   std::string json = "{\"foo\": 3.1415, \"bar\": 31415e-4}";
424   ParseJSON(SpanFrom(json), &log_);
425   EXPECT_TRUE(log_.status().ok());
426   EXPECT_EQ(
427       "map begin\n"
428       "string16: foo\n"
429       "double: 3.1415\n"
430       "string16: bar\n"
431       "double: 3.1415\n"
432       "map end\n",
433       log_.str());
434 }
435 
TEST_F(JsonParserTest,Unicode)436 TEST_F(JsonParserTest, Unicode) {
437   // Globe character. 0xF0 0x9F 0x8C 0x8E in utf8, 0xD83C 0xDF0E in utf16.
438   std::string json = "{\"msg\": \"Hello, \\uD83C\\uDF0E.\"}";
439   ParseJSON(SpanFrom(json), &log_);
440   EXPECT_TRUE(log_.status().ok());
441   EXPECT_EQ(
442       "map begin\n"
443       "string16: msg\n"
444       "string16: Hello, ��.\n"
445       "map end\n",
446       log_.str());
447 }
448 
TEST_F(JsonParserTest,Unicode_ParseUtf16)449 TEST_F(JsonParserTest, Unicode_ParseUtf16) {
450   // Globe character. utf8: 0xF0 0x9F 0x8C 0x8E; utf16: 0xD83C 0xDF0E.
451   // Crescent moon character. utf8: 0xF0 0x9F 0x8C 0x99; utf16: 0xD83C 0xDF19.
452 
453   // We provide the moon with json escape, but the earth as utf16 input.
454   // Either way they arrive as utf8 (after decoding in log_.str()).
455   std::vector<uint16_t> json =
456       UTF8ToUTF16(SpanFrom("{\"space\": \"�� \\uD83C\\uDF19.\"}"));
457   ParseJSON(SpanFrom(json), &log_);
458   EXPECT_TRUE(log_.status().ok());
459   EXPECT_EQ(
460       "map begin\n"
461       "string16: space\n"
462       "string16: �� ��.\n"
463       "map end\n",
464       log_.str());
465 }
466 
TEST_F(JsonParserTest,Unicode_ParseUtf16_SingleEscapeUpToFFFF)467 TEST_F(JsonParserTest, Unicode_ParseUtf16_SingleEscapeUpToFFFF) {
468   // 0xFFFF is the max codepoint that can be represented as a single \u escape.
469   // One way to write this is \uffff, another way is to encode it as a 3 byte
470   // UTF-8 sequence (0xef 0xbf 0xbf). Both are equivalent.
471 
472   // Example with both ways of encoding code point 0xFFFF in a JSON string.
473   std::string json = "{\"escape\": \"\xef\xbf\xbf or \\uffff\"}";
474   ParseJSON(SpanFrom(json), &log_);
475   EXPECT_TRUE(log_.status().ok());
476 
477   // Shows both inputs result in equivalent output once converted to UTF-8.
478   EXPECT_EQ(
479       "map begin\n"
480       "string16: escape\n"
481       "string16: \xEF\xBF\xBF or \xEF\xBF\xBF\n"
482       "map end\n",
483       log_.str());
484 
485   // Make an even stronger assertion: The parser represents \xffff as a single
486   // UTF-16 char.
487   ASSERT_EQ(2u, log_.raw_log_string16().size());
488   std::vector<uint16_t> expected = {0xffff, ' ', 'o', 'r', ' ', 0xffff};
489   EXPECT_EQ(expected, log_.raw_log_string16()[1]);
490 }
491 
TEST_F(JsonParserTest,Unicode_ParseUtf8)492 TEST_F(JsonParserTest, Unicode_ParseUtf8) {
493   // Used below:
494   // гласность - example for 2 byte utf8, Russian word "glasnost"
495   // 屋 - example for 3 byte utf8, Chinese word for "house"
496   // �� - example for 4 byte utf8: 0xF0 0x9F 0x8C 0x8E; utf16: 0xD83C 0xDF0E.
497   // �� - example for escapes: utf8: 0xF0 0x9F 0x8C 0x99; utf16: 0xD83C 0xDF19.
498 
499   // We provide the moon with json escape, but the earth as utf8 input.
500   // Either way they arrive as utf8 (after decoding in log_.str()).
501   std::string json =
502       "{"
503       "\"escapes\": \"\\uD83C\\uDF19\","
504       "\"2 byte\":\"гласность\","
505       "\"3 byte\":\"屋\","
506       "\"4 byte\":\"��\""
507       "}";
508   ParseJSON(SpanFrom(json), &log_);
509   EXPECT_TRUE(log_.status().ok());
510   EXPECT_EQ(
511       "map begin\n"
512       "string16: escapes\n"
513       "string16: ��\n"
514       "string16: 2 byte\n"
515       "string16: гласность\n"
516       "string16: 3 byte\n"
517       "string16: 屋\n"
518       "string16: 4 byte\n"
519       "string16: ��\n"
520       "map end\n",
521       log_.str());
522 }
523 
TEST_F(JsonParserTest,UnprocessedInputRemainsError)524 TEST_F(JsonParserTest, UnprocessedInputRemainsError) {
525   // Trailing junk after the valid JSON.
526   std::string json = "{\"foo\": 3.1415} junk";
527   size_t junk_idx = json.find("junk");
528   EXPECT_NE(junk_idx, std::string::npos);
529   ParseJSON(SpanFrom(json), &log_);
530   EXPECT_THAT(log_.status(),
531               StatusIs(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, junk_idx));
532   EXPECT_EQ("", log_.str());
533 }
534 
MakeNestedJson(int depth)535 std::string MakeNestedJson(int depth) {
536   std::string json;
537   for (int ii = 0; ii < depth; ++ii)
538     json += "{\"foo\":";
539   json += "42";
540   for (int ii = 0; ii < depth; ++ii)
541     json += "}";
542   return json;
543 }
544 
TEST_F(JsonParserTest,StackLimitExceededError_BelowLimit)545 TEST_F(JsonParserTest, StackLimitExceededError_BelowLimit) {
546   // kStackLimit is 300 (see json_parser.cc). First let's
547   // try with a small nested example.
548   std::string json_3 = MakeNestedJson(3);
549   ParseJSON(SpanFrom(json_3), &log_);
550   EXPECT_TRUE(log_.status().ok());
551   EXPECT_EQ(
552       "map begin\n"
553       "string16: foo\n"
554       "map begin\n"
555       "string16: foo\n"
556       "map begin\n"
557       "string16: foo\n"
558       "int: 42\n"
559       "map end\n"
560       "map end\n"
561       "map end\n",
562       log_.str());
563 }
564 
TEST_F(JsonParserTest,StackLimitExceededError_AtLimit)565 TEST_F(JsonParserTest, StackLimitExceededError_AtLimit) {
566   // Now with kStackLimit (300).
567   std::string json_limit = MakeNestedJson(300);
568   ParseJSON(span<uint8_t>(reinterpret_cast<const uint8_t*>(json_limit.data()),
569                           json_limit.size()),
570             &log_);
571   EXPECT_THAT(log_.status(), StatusIsOk());
572 }
573 
TEST_F(JsonParserTest,StackLimitExceededError_AboveLimit)574 TEST_F(JsonParserTest, StackLimitExceededError_AboveLimit) {
575   // Now with kStackLimit + 1 (301) - it exceeds in the innermost instance.
576   std::string exceeded = MakeNestedJson(301);
577   ParseJSON(SpanFrom(exceeded), &log_);
578   EXPECT_THAT(log_.status(), StatusIs(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED,
579                                       strlen("{\"foo\":") * 301));
580 }
581 
TEST_F(JsonParserTest,StackLimitExceededError_WayAboveLimit)582 TEST_F(JsonParserTest, StackLimitExceededError_WayAboveLimit) {
583   // Now way past the limit. Still, the point of exceeding is 301.
584   std::string far_out = MakeNestedJson(320);
585   ParseJSON(SpanFrom(far_out), &log_);
586   EXPECT_THAT(log_.status(), StatusIs(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED,
587                                       strlen("{\"foo\":") * 301));
588 }
589 
TEST_F(JsonParserTest,NoInputError)590 TEST_F(JsonParserTest, NoInputError) {
591   std::string json = "";
592   ParseJSON(SpanFrom(json), &log_);
593   EXPECT_THAT(log_.status(), StatusIs(Error::JSON_PARSER_NO_INPUT, 0u));
594   EXPECT_EQ("", log_.str());
595 }
596 
TEST_F(JsonParserTest,InvalidTokenError)597 TEST_F(JsonParserTest, InvalidTokenError) {
598   std::string json = "|";
599   ParseJSON(SpanFrom(json), &log_);
600   EXPECT_THAT(log_.status(), StatusIs(Error::JSON_PARSER_INVALID_TOKEN, 0u));
601   EXPECT_EQ("", log_.str());
602 }
603 
TEST_F(JsonParserTest,InvalidNumberError)604 TEST_F(JsonParserTest, InvalidNumberError) {
605   // Mantissa exceeds max (the constant used here is int64_t max).
606   std::string json = "1E9223372036854775807";
607   ParseJSON(SpanFrom(json), &log_);
608   EXPECT_THAT(log_.status(), StatusIs(Error::JSON_PARSER_INVALID_NUMBER, 0u));
609   EXPECT_EQ("", log_.str());
610 }
611 
TEST_F(JsonParserTest,InvalidStringError)612 TEST_F(JsonParserTest, InvalidStringError) {
613   // \x22 is an unsupported escape sequence
614   std::string json = "\"foo\\x22\"";
615   ParseJSON(SpanFrom(json), &log_);
616   EXPECT_THAT(log_.status(), StatusIs(Error::JSON_PARSER_INVALID_STRING, 0u));
617   EXPECT_EQ("", log_.str());
618 }
619 
TEST_F(JsonParserTest,UnexpectedArrayEndError)620 TEST_F(JsonParserTest, UnexpectedArrayEndError) {
621   std::string json = "[1,2,]";
622   ParseJSON(SpanFrom(json), &log_);
623   EXPECT_THAT(log_.status(),
624               StatusIs(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, 5u));
625   EXPECT_EQ("", log_.str());
626 }
627 
TEST_F(JsonParserTest,CommaOrArrayEndExpectedError)628 TEST_F(JsonParserTest, CommaOrArrayEndExpectedError) {
629   std::string json = "[1,2 2";
630   ParseJSON(SpanFrom(json), &log_);
631   EXPECT_THAT(log_.status(),
632               StatusIs(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED, 5u));
633   EXPECT_EQ("", log_.str());
634 }
635 
TEST_F(JsonParserTest,StringLiteralExpectedError)636 TEST_F(JsonParserTest, StringLiteralExpectedError) {
637   // There's an error because the key bar, a string, is not terminated.
638   std::string json = "{\"foo\": 3.1415, \"bar: 31415e-4}";
639   ParseJSON(SpanFrom(json), &log_);
640   EXPECT_THAT(log_.status(),
641               StatusIs(Error::JSON_PARSER_STRING_LITERAL_EXPECTED, 16u));
642   EXPECT_EQ("", log_.str());
643 }
644 
TEST_F(JsonParserTest,ColonExpectedError)645 TEST_F(JsonParserTest, ColonExpectedError) {
646   std::string json = "{\"foo\", 42}";
647   ParseJSON(SpanFrom(json), &log_);
648   EXPECT_THAT(log_.status(), StatusIs(Error::JSON_PARSER_COLON_EXPECTED, 6u));
649   EXPECT_EQ("", log_.str());
650 }
651 
TEST_F(JsonParserTest,UnexpectedMapEndError)652 TEST_F(JsonParserTest, UnexpectedMapEndError) {
653   std::string json = "{\"foo\": 42, }";
654   ParseJSON(SpanFrom(json), &log_);
655   EXPECT_THAT(log_.status(),
656               StatusIs(Error::JSON_PARSER_UNEXPECTED_MAP_END, 12u));
657   EXPECT_EQ("", log_.str());
658 }
659 
TEST_F(JsonParserTest,CommaOrMapEndExpectedError)660 TEST_F(JsonParserTest, CommaOrMapEndExpectedError) {
661   // The second separator should be a comma.
662   std::string json = "{\"foo\": 3.1415: \"bar\": 0}";
663   ParseJSON(SpanFrom(json), &log_);
664   EXPECT_THAT(log_.status(),
665               StatusIs(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED, 14u));
666   EXPECT_EQ("", log_.str());
667 }
668 
TEST_F(JsonParserTest,ValueExpectedError)669 TEST_F(JsonParserTest, ValueExpectedError) {
670   std::string json = "}";
671   ParseJSON(SpanFrom(json), &log_);
672   EXPECT_THAT(log_.status(), StatusIs(Error::JSON_PARSER_VALUE_EXPECTED, 0u));
673   EXPECT_EQ("", log_.str());
674 }
675 
676 template <typename T>
677 class ConvertJSONToCBORTest : public ::testing::Test {};
678 
679 using ContainerTestTypes = ::testing::Types<std::vector<uint8_t>, std::string>;
680 TYPED_TEST_SUITE(ConvertJSONToCBORTest, ContainerTestTypes);
681 
TYPED_TEST(ConvertJSONToCBORTest,RoundTripValidJson)682 TYPED_TEST(ConvertJSONToCBORTest, RoundTripValidJson) {
683   for (const std::string& json_in : {
684            "{\"msg\":\"Hello, world.\",\"lst\":[1,2,3]}",
685            "3.1415",
686            "false",
687            "true",
688            "\"Hello, world.\"",
689            "[1,2,3]",
690            "[]",
691        }) {
692     SCOPED_TRACE(json_in);
693     TypeParam json(json_in.begin(), json_in.end());
694     std::vector<uint8_t> cbor;
695     {
696       Status status = ConvertJSONToCBOR(SpanFrom(json), &cbor);
697       EXPECT_THAT(status, StatusIsOk());
698     }
699     TypeParam roundtrip_json;
700     {
701       Status status = ConvertCBORToJSON(SpanFrom(cbor), &roundtrip_json);
702       EXPECT_THAT(status, StatusIsOk());
703     }
704     EXPECT_EQ(json, roundtrip_json);
705   }
706 }
707 
TYPED_TEST(ConvertJSONToCBORTest,RoundTripValidJson16)708 TYPED_TEST(ConvertJSONToCBORTest, RoundTripValidJson16) {
709   std::vector<uint16_t> json16 = {
710       '{', '"', 'm', 's',    'g',    '"', ':', '"', 'H', 'e', 'l', 'l',
711       'o', ',', ' ', 0xd83c, 0xdf0e, '.', '"', ',', '"', 'l', 's', 't',
712       '"', ':', '[', '1',    ',',    '2', ',', '3', ']', '}'};
713   std::vector<uint8_t> cbor;
714   {
715     Status status =
716         ConvertJSONToCBOR(span<uint16_t>(json16.data(), json16.size()), &cbor);
717     EXPECT_THAT(status, StatusIsOk());
718   }
719   TypeParam roundtrip_json;
720   {
721     Status status = ConvertCBORToJSON(SpanFrom(cbor), &roundtrip_json);
722     EXPECT_THAT(status, StatusIsOk());
723   }
724   std::string json = "{\"msg\":\"Hello, \\ud83c\\udf0e.\",\"lst\":[1,2,3]}";
725   TypeParam expected_json(json.begin(), json.end());
726   EXPECT_EQ(expected_json, roundtrip_json);
727 }
728 }  // namespace json
729 }  // namespace crdtp
730