1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #include "mozilla/ArrayUtils.h"
6 #include "mozilla/TextUtils.h"
7 #include "mozilla/Utf8.h"
8 
9 #include <cstring>
10 
11 #include "jsfriendapi.h"
12 
13 #include "js/CharacterEncoding.h"
14 #include "js/CompilationAndEvaluation.h"  // JS::Compile
15 #include "js/Exception.h"
16 #include "js/friend/ErrorMessages.h"  // JSMSG_*
17 #include "js/SourceText.h"
18 #include "jsapi-tests/tests.h"
19 #include "util/Text.h"
20 #include "vm/ErrorReporting.h"
21 
22 using mozilla::ArrayEqual;
23 using mozilla::IsAsciiHexDigit;
24 using mozilla::Utf8Unit;
25 
BEGIN_TEST(testUtf8BadBytes)26 BEGIN_TEST(testUtf8BadBytes) {
27   static const char badLeadingUnit[] = "var x = \x80";
28   CHECK(testBadUtf8(
29       badLeadingUnit, JSMSG_BAD_LEADING_UTF8_UNIT,
30       [this](JS::ConstUTF8CharsZ message) {
31         const char* chars = message.c_str();
32         CHECK(startsWith(chars, "0x80"));
33         CHECK(isBadLeadUnitMessage(chars));
34         return true;
35       },
36       "0x80"));
37 
38   static const char badSecondInTwoByte[] = "var x = \xDF\x20";
39   CHECK(testBadUtf8(
40       badSecondInTwoByte, JSMSG_BAD_TRAILING_UTF8_UNIT,
41       [this](JS::ConstUTF8CharsZ message) {
42         const char* chars = message.c_str();
43         CHECK(isBadTrailingBytesMessage(chars));
44         CHECK(contains(chars, "0x20"));
45         return true;
46       },
47       "0xDF 0x20"));
48 
49   static const char badSecondInThreeByte[] = "var x = \xEF\x17\xA7";
50   CHECK(testBadUtf8(
51       badSecondInThreeByte, JSMSG_BAD_TRAILING_UTF8_UNIT,
52       [this](JS::ConstUTF8CharsZ message) {
53         const char* chars = message.c_str();
54         CHECK(isBadTrailingBytesMessage(chars));
55         CHECK(contains(chars, "0x17"));
56         return true;
57       },
58       // Validating stops with the first invalid code unit and
59       // shouldn't go beyond that.
60       "0xEF 0x17"));
61 
62   static const char lengthTwoTooShort[] = "var x = \xDF";
63   CHECK(testBadUtf8(
64       lengthTwoTooShort, JSMSG_NOT_ENOUGH_CODE_UNITS,
65       [this](JS::ConstUTF8CharsZ message) {
66         const char* chars = message.c_str();
67         CHECK(isNotEnoughUnitsMessage(chars));
68         CHECK(contains(chars, "0xDF"));
69         CHECK(contains(chars, " 1 byte, but 0 bytes were present"));
70         return true;
71       },
72       "0xDF"));
73 
74   static const char forbiddenHighSurrogate[] = "var x = \xED\xA2\x87";
75   CHECK(testBadUtf8(
76       forbiddenHighSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
77       [this](JS::ConstUTF8CharsZ message) {
78         const char* chars = message.c_str();
79         CHECK(isSurrogateMessage(chars));
80         CHECK(contains(chars, "0xD887"));
81         return true;
82       },
83       "0xED 0xA2 0x87"));
84 
85   static const char forbiddenLowSurrogate[] = "var x = \xED\xB7\xAF";
86   CHECK(testBadUtf8(
87       forbiddenLowSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
88       [this](JS::ConstUTF8CharsZ message) {
89         const char* chars = message.c_str();
90         CHECK(isSurrogateMessage(chars));
91         CHECK(contains(chars, "0xDDEF"));
92         return true;
93       },
94       "0xED 0xB7 0xAF"));
95 
96   static const char oneTooBig[] = "var x = \xF4\x90\x80\x80";
97   CHECK(testBadUtf8(
98       oneTooBig, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
99       [this](JS::ConstUTF8CharsZ message) {
100         const char* chars = message.c_str();
101         CHECK(isTooBigMessage(chars));
102         CHECK(contains(chars, "0x110000"));
103         return true;
104       },
105       "0xF4 0x90 0x80 0x80"));
106 
107   static const char notShortestFormZero[] = "var x = \xC0\x80";
108   CHECK(testBadUtf8(
109       notShortestFormZero, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
110       [this](JS::ConstUTF8CharsZ message) {
111         const char* chars = message.c_str();
112         CHECK(isNotShortestFormMessage(chars));
113         CHECK(startsWith(chars, "0x0 isn't "));
114         return true;
115       },
116       "0xC0 0x80"));
117 
118   static const char notShortestFormNonzero[] = "var x = \xE0\x87\x80";
119   CHECK(testBadUtf8(
120       notShortestFormNonzero, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
121       [this](JS::ConstUTF8CharsZ message) {
122         const char* chars = message.c_str();
123         CHECK(isNotShortestFormMessage(chars));
124         CHECK(startsWith(chars, "0x1C0 isn't "));
125         return true;
126       },
127       "0xE0 0x87 0x80"));
128 
129   return true;
130 }
131 
132 static constexpr size_t LengthOfByte = js_strlen("0xFF");
133 
startsWithByte(const char * str)134 static bool startsWithByte(const char* str) {
135   return str[0] == '0' && str[1] == 'x' && IsAsciiHexDigit(str[2]) &&
136          IsAsciiHexDigit(str[3]);
137 }
138 
startsWith(const char * str,const char * prefix)139 static bool startsWith(const char* str, const char* prefix) {
140   return std::strncmp(prefix, str, strlen(prefix)) == 0;
141 }
142 
contains(const char * str,const char * substr)143 static bool contains(const char* str, const char* substr) {
144   return std::strstr(str, substr) != nullptr;
145 }
146 
equals(const char * str,const char * expected)147 static bool equals(const char* str, const char* expected) {
148   return std::strcmp(str, expected) == 0;
149 }
150 
isBadLeadUnitMessage(const char * str)151 static bool isBadLeadUnitMessage(const char* str) {
152   return startsWithByte(str) &&
153          equals(str + LengthOfByte,
154                 " byte doesn't begin a valid UTF-8 code point");
155 }
156 
isBadTrailingBytesMessage(const char * str)157 static bool isBadTrailingBytesMessage(const char* str) {
158   return startsWith(str, "bad trailing UTF-8 byte ");
159 }
160 
isNotEnoughUnitsMessage(const char * str)161 static bool isNotEnoughUnitsMessage(const char* str) {
162   return startsWithByte(str) &&
163          startsWith(str + LengthOfByte, " byte in UTF-8 must be followed by ");
164 }
165 
isForbiddenCodePointMessage(const char * str)166 static bool isForbiddenCodePointMessage(const char* str) {
167   return contains(str, "isn't a valid code point because");
168 }
169 
isSurrogateMessage(const char * str)170 static bool isSurrogateMessage(const char* str) {
171   return isForbiddenCodePointMessage(str) &&
172          contains(str, " it's a UTF-16 surrogate");
173 }
174 
isTooBigMessage(const char * str)175 static bool isTooBigMessage(const char* str) {
176   return isForbiddenCodePointMessage(str) &&
177          contains(str, "the maximum code point is U+10FFFF");
178 }
179 
isNotShortestFormMessage(const char * str)180 static bool isNotShortestFormMessage(const char* str) {
181   return isForbiddenCodePointMessage(str) &&
182          contains(str, "it wasn't encoded in shortest possible form");
183 }
184 
185 template <size_t N, typename TestMessage>
testBadUtf8(const char (& chars)[N],unsigned errorNumber,TestMessage testMessage,const char * badBytes)186 bool testBadUtf8(const char (&chars)[N], unsigned errorNumber,
187                  TestMessage testMessage, const char* badBytes) {
188   JS::Rooted<JSScript*> script(cx);
189   {
190     JS::CompileOptions options(cx);
191 
192     JS::SourceText<mozilla::Utf8Unit> srcBuf;
193     CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed));
194 
195     script = JS::Compile(cx, options, srcBuf);
196     CHECK(!script);
197   }
198 
199   JS::ExceptionStack exnStack(cx);
200   CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
201 
202   JS::ErrorReportBuilder report(cx);
203   CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
204 
205   const auto* errorReport = report.report();
206 
207   CHECK(errorReport->errorNumber == errorNumber);
208 
209   CHECK(testMessage(errorReport->message()));
210 
211   {
212     const auto& notes = errorReport->notes;
213     CHECK(notes != nullptr);
214 
215     auto iter = notes->begin();
216     CHECK(iter != notes->end());
217 
218     const char* noteMessage = (*iter)->message().c_str();
219 
220     // The prefix ought always be the same.
221     static constexpr char expectedPrefix[] =
222         "the code units comprising this invalid code point were: ";
223     constexpr size_t expectedPrefixLen = js_strlen(expectedPrefix);
224 
225     CHECK(startsWith(noteMessage, expectedPrefix));
226 
227     // The end of the prefix is the bad bytes.
228     CHECK(equals(noteMessage + expectedPrefixLen, badBytes));
229 
230     ++iter;
231     CHECK(iter == notes->end());
232   }
233 
234   static constexpr char16_t expectedContext[] = u"var x = ";
235   constexpr size_t expectedContextLen = js_strlen(expectedContext);
236 
237   const char16_t* lineOfContext = errorReport->linebuf();
238   size_t lineOfContextLength = errorReport->linebufLength();
239 
240   CHECK(lineOfContext[lineOfContextLength] == '\0');
241   CHECK(lineOfContextLength == expectedContextLen);
242 
243   CHECK(std::memcmp(lineOfContext, expectedContext,
244                     expectedContextLen * sizeof(char16_t)) == 0);
245 
246   return true;
247 }
248 END_TEST(testUtf8BadBytes)
249 
BEGIN_TEST(testMultiUnitUtf8InWindow)250 BEGIN_TEST(testMultiUnitUtf8InWindow) {
251   static const char firstInWindowIsMultiUnit[] =
252       "\xCF\x80\xCF\x80 = 6.283185307; @ bad starts HERE:\x80\xFF\xFF";
253   CHECK(testContext(firstInWindowIsMultiUnit,
254                     u"ππ = 6.283185307; @ bad starts HERE:"));
255 
256   static const char atTokenOffsetIsMulti[] = "var z = ��";
257   CHECK(testContext(atTokenOffsetIsMulti, u"var z = ��"));
258 
259   static const char afterTokenOffsetIsMulti[] = "var z = @������X";
260   CHECK(testContext(afterTokenOffsetIsMulti, u"var z = @������X"));
261 
262   static const char atEndIsMulti[] = "var z = @@������";
263   CHECK(testContext(atEndIsMulti, u"var z = @@������"));
264 
265   return true;
266 }
267 
268 template <size_t N, size_t ContextLenWithNull>
testContext(const char (& chars)[N],const char16_t (& expectedContext)[ContextLenWithNull])269 bool testContext(const char (&chars)[N],
270                  const char16_t (&expectedContext)[ContextLenWithNull]) {
271   JS::Rooted<JSScript*> script(cx);
272   {
273     JS::CompileOptions options(cx);
274 
275     JS::SourceText<mozilla::Utf8Unit> srcBuf;
276     CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed));
277 
278     script = JS::Compile(cx, options, srcBuf);
279     CHECK(!script);
280   }
281 
282   JS::ExceptionStack exnStack(cx);
283   CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
284 
285   JS::ErrorReportBuilder report(cx);
286   CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
287 
288   const auto* errorReport = report.report();
289 
290   CHECK(errorReport->errorNumber == JSMSG_ILLEGAL_CHARACTER);
291 
292   const char16_t* lineOfContext = errorReport->linebuf();
293   size_t lineOfContextLength = errorReport->linebufLength();
294 
295   CHECK(lineOfContext[lineOfContextLength] == '\0');
296   CHECK(lineOfContextLength == ContextLenWithNull - 1);
297 
298   CHECK(ArrayEqual(lineOfContext, expectedContext, ContextLenWithNull));
299 
300   return true;
301 }
302 END_TEST(testMultiUnitUtf8InWindow)
303