1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include "mozilla/ArrayUtils.h"
6 #include "mozilla/TextUtils.h"
7 #include "mozilla/Utf8.h"
8
9 #include <cstring>
10
11 #include "jsfriendapi.h"
12
13 #include "js/CharacterEncoding.h"
14 #include "js/CompilationAndEvaluation.h" // JS::Compile
15 #include "js/Exception.h"
16 #include "js/friend/ErrorMessages.h" // JSMSG_*
17 #include "js/SourceText.h"
18 #include "jsapi-tests/tests.h"
19 #include "util/Text.h"
20 #include "vm/ErrorReporting.h"
21
22 using mozilla::ArrayEqual;
23 using mozilla::IsAsciiHexDigit;
24 using mozilla::Utf8Unit;
25
BEGIN_TEST(testUtf8BadBytes)26 BEGIN_TEST(testUtf8BadBytes) {
27 static const char badLeadingUnit[] = "var x = \x80";
28 CHECK(testBadUtf8(
29 badLeadingUnit, JSMSG_BAD_LEADING_UTF8_UNIT,
30 [this](JS::ConstUTF8CharsZ message) {
31 const char* chars = message.c_str();
32 CHECK(startsWith(chars, "0x80"));
33 CHECK(isBadLeadUnitMessage(chars));
34 return true;
35 },
36 "0x80"));
37
38 static const char badSecondInTwoByte[] = "var x = \xDF\x20";
39 CHECK(testBadUtf8(
40 badSecondInTwoByte, JSMSG_BAD_TRAILING_UTF8_UNIT,
41 [this](JS::ConstUTF8CharsZ message) {
42 const char* chars = message.c_str();
43 CHECK(isBadTrailingBytesMessage(chars));
44 CHECK(contains(chars, "0x20"));
45 return true;
46 },
47 "0xDF 0x20"));
48
49 static const char badSecondInThreeByte[] = "var x = \xEF\x17\xA7";
50 CHECK(testBadUtf8(
51 badSecondInThreeByte, JSMSG_BAD_TRAILING_UTF8_UNIT,
52 [this](JS::ConstUTF8CharsZ message) {
53 const char* chars = message.c_str();
54 CHECK(isBadTrailingBytesMessage(chars));
55 CHECK(contains(chars, "0x17"));
56 return true;
57 },
58 // Validating stops with the first invalid code unit and
59 // shouldn't go beyond that.
60 "0xEF 0x17"));
61
62 static const char lengthTwoTooShort[] = "var x = \xDF";
63 CHECK(testBadUtf8(
64 lengthTwoTooShort, JSMSG_NOT_ENOUGH_CODE_UNITS,
65 [this](JS::ConstUTF8CharsZ message) {
66 const char* chars = message.c_str();
67 CHECK(isNotEnoughUnitsMessage(chars));
68 CHECK(contains(chars, "0xDF"));
69 CHECK(contains(chars, " 1 byte, but 0 bytes were present"));
70 return true;
71 },
72 "0xDF"));
73
74 static const char forbiddenHighSurrogate[] = "var x = \xED\xA2\x87";
75 CHECK(testBadUtf8(
76 forbiddenHighSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
77 [this](JS::ConstUTF8CharsZ message) {
78 const char* chars = message.c_str();
79 CHECK(isSurrogateMessage(chars));
80 CHECK(contains(chars, "0xD887"));
81 return true;
82 },
83 "0xED 0xA2 0x87"));
84
85 static const char forbiddenLowSurrogate[] = "var x = \xED\xB7\xAF";
86 CHECK(testBadUtf8(
87 forbiddenLowSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
88 [this](JS::ConstUTF8CharsZ message) {
89 const char* chars = message.c_str();
90 CHECK(isSurrogateMessage(chars));
91 CHECK(contains(chars, "0xDDEF"));
92 return true;
93 },
94 "0xED 0xB7 0xAF"));
95
96 static const char oneTooBig[] = "var x = \xF4\x90\x80\x80";
97 CHECK(testBadUtf8(
98 oneTooBig, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
99 [this](JS::ConstUTF8CharsZ message) {
100 const char* chars = message.c_str();
101 CHECK(isTooBigMessage(chars));
102 CHECK(contains(chars, "0x110000"));
103 return true;
104 },
105 "0xF4 0x90 0x80 0x80"));
106
107 static const char notShortestFormZero[] = "var x = \xC0\x80";
108 CHECK(testBadUtf8(
109 notShortestFormZero, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
110 [this](JS::ConstUTF8CharsZ message) {
111 const char* chars = message.c_str();
112 CHECK(isNotShortestFormMessage(chars));
113 CHECK(startsWith(chars, "0x0 isn't "));
114 return true;
115 },
116 "0xC0 0x80"));
117
118 static const char notShortestFormNonzero[] = "var x = \xE0\x87\x80";
119 CHECK(testBadUtf8(
120 notShortestFormNonzero, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
121 [this](JS::ConstUTF8CharsZ message) {
122 const char* chars = message.c_str();
123 CHECK(isNotShortestFormMessage(chars));
124 CHECK(startsWith(chars, "0x1C0 isn't "));
125 return true;
126 },
127 "0xE0 0x87 0x80"));
128
129 return true;
130 }
131
132 static constexpr size_t LengthOfByte = js_strlen("0xFF");
133
startsWithByte(const char * str)134 static bool startsWithByte(const char* str) {
135 return str[0] == '0' && str[1] == 'x' && IsAsciiHexDigit(str[2]) &&
136 IsAsciiHexDigit(str[3]);
137 }
138
startsWith(const char * str,const char * prefix)139 static bool startsWith(const char* str, const char* prefix) {
140 return std::strncmp(prefix, str, strlen(prefix)) == 0;
141 }
142
contains(const char * str,const char * substr)143 static bool contains(const char* str, const char* substr) {
144 return std::strstr(str, substr) != nullptr;
145 }
146
equals(const char * str,const char * expected)147 static bool equals(const char* str, const char* expected) {
148 return std::strcmp(str, expected) == 0;
149 }
150
isBadLeadUnitMessage(const char * str)151 static bool isBadLeadUnitMessage(const char* str) {
152 return startsWithByte(str) &&
153 equals(str + LengthOfByte,
154 " byte doesn't begin a valid UTF-8 code point");
155 }
156
isBadTrailingBytesMessage(const char * str)157 static bool isBadTrailingBytesMessage(const char* str) {
158 return startsWith(str, "bad trailing UTF-8 byte ");
159 }
160
isNotEnoughUnitsMessage(const char * str)161 static bool isNotEnoughUnitsMessage(const char* str) {
162 return startsWithByte(str) &&
163 startsWith(str + LengthOfByte, " byte in UTF-8 must be followed by ");
164 }
165
isForbiddenCodePointMessage(const char * str)166 static bool isForbiddenCodePointMessage(const char* str) {
167 return contains(str, "isn't a valid code point because");
168 }
169
isSurrogateMessage(const char * str)170 static bool isSurrogateMessage(const char* str) {
171 return isForbiddenCodePointMessage(str) &&
172 contains(str, " it's a UTF-16 surrogate");
173 }
174
isTooBigMessage(const char * str)175 static bool isTooBigMessage(const char* str) {
176 return isForbiddenCodePointMessage(str) &&
177 contains(str, "the maximum code point is U+10FFFF");
178 }
179
isNotShortestFormMessage(const char * str)180 static bool isNotShortestFormMessage(const char* str) {
181 return isForbiddenCodePointMessage(str) &&
182 contains(str, "it wasn't encoded in shortest possible form");
183 }
184
185 template <size_t N, typename TestMessage>
testBadUtf8(const char (& chars)[N],unsigned errorNumber,TestMessage testMessage,const char * badBytes)186 bool testBadUtf8(const char (&chars)[N], unsigned errorNumber,
187 TestMessage testMessage, const char* badBytes) {
188 JS::Rooted<JSScript*> script(cx);
189 {
190 JS::CompileOptions options(cx);
191
192 JS::SourceText<mozilla::Utf8Unit> srcBuf;
193 CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed));
194
195 script = JS::Compile(cx, options, srcBuf);
196 CHECK(!script);
197 }
198
199 JS::ExceptionStack exnStack(cx);
200 CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
201
202 JS::ErrorReportBuilder report(cx);
203 CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
204
205 const auto* errorReport = report.report();
206
207 CHECK(errorReport->errorNumber == errorNumber);
208
209 CHECK(testMessage(errorReport->message()));
210
211 {
212 const auto& notes = errorReport->notes;
213 CHECK(notes != nullptr);
214
215 auto iter = notes->begin();
216 CHECK(iter != notes->end());
217
218 const char* noteMessage = (*iter)->message().c_str();
219
220 // The prefix ought always be the same.
221 static constexpr char expectedPrefix[] =
222 "the code units comprising this invalid code point were: ";
223 constexpr size_t expectedPrefixLen = js_strlen(expectedPrefix);
224
225 CHECK(startsWith(noteMessage, expectedPrefix));
226
227 // The end of the prefix is the bad bytes.
228 CHECK(equals(noteMessage + expectedPrefixLen, badBytes));
229
230 ++iter;
231 CHECK(iter == notes->end());
232 }
233
234 static constexpr char16_t expectedContext[] = u"var x = ";
235 constexpr size_t expectedContextLen = js_strlen(expectedContext);
236
237 const char16_t* lineOfContext = errorReport->linebuf();
238 size_t lineOfContextLength = errorReport->linebufLength();
239
240 CHECK(lineOfContext[lineOfContextLength] == '\0');
241 CHECK(lineOfContextLength == expectedContextLen);
242
243 CHECK(std::memcmp(lineOfContext, expectedContext,
244 expectedContextLen * sizeof(char16_t)) == 0);
245
246 return true;
247 }
248 END_TEST(testUtf8BadBytes)
249
BEGIN_TEST(testMultiUnitUtf8InWindow)250 BEGIN_TEST(testMultiUnitUtf8InWindow) {
251 static const char firstInWindowIsMultiUnit[] =
252 "\xCF\x80\xCF\x80 = 6.283185307; @ bad starts HERE:\x80\xFF\xFF";
253 CHECK(testContext(firstInWindowIsMultiUnit,
254 u"ππ = 6.283185307; @ bad starts HERE:"));
255
256 static const char atTokenOffsetIsMulti[] = "var z = ";
257 CHECK(testContext(atTokenOffsetIsMulti, u"var z = "));
258
259 static const char afterTokenOffsetIsMulti[] = "var z = @X";
260 CHECK(testContext(afterTokenOffsetIsMulti, u"var z = @X"));
261
262 static const char atEndIsMulti[] = "var z = @@";
263 CHECK(testContext(atEndIsMulti, u"var z = @@"));
264
265 return true;
266 }
267
268 template <size_t N, size_t ContextLenWithNull>
testContext(const char (& chars)[N],const char16_t (& expectedContext)[ContextLenWithNull])269 bool testContext(const char (&chars)[N],
270 const char16_t (&expectedContext)[ContextLenWithNull]) {
271 JS::Rooted<JSScript*> script(cx);
272 {
273 JS::CompileOptions options(cx);
274
275 JS::SourceText<mozilla::Utf8Unit> srcBuf;
276 CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed));
277
278 script = JS::Compile(cx, options, srcBuf);
279 CHECK(!script);
280 }
281
282 JS::ExceptionStack exnStack(cx);
283 CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
284
285 JS::ErrorReportBuilder report(cx);
286 CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
287
288 const auto* errorReport = report.report();
289
290 CHECK(errorReport->errorNumber == JSMSG_ILLEGAL_CHARACTER);
291
292 const char16_t* lineOfContext = errorReport->linebuf();
293 size_t lineOfContextLength = errorReport->linebufLength();
294
295 CHECK(lineOfContext[lineOfContextLength] == '\0');
296 CHECK(lineOfContextLength == ContextLenWithNull - 1);
297
298 CHECK(ArrayEqual(lineOfContext, expectedContext, ContextLenWithNull));
299
300 return true;
301 }
302 END_TEST(testMultiUnitUtf8InWindow)
303