1 //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Support/ConvertUTF.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "gtest/gtest.h"
12 #include <string>
13 #include <vector>
14 
15 using namespace llvm;
16 
TEST(ConvertUTFTest,ConvertUTF16LittleEndianToUTF8String)17 TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
18   // Src is the look of disapproval.
19   alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
20   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
21   std::string Result;
22   bool Success = convertUTF16ToUTF8String(Ref, Result);
23   EXPECT_TRUE(Success);
24   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
25   EXPECT_EQ(Expected, Result);
26 }
27 
TEST(ConvertUTFTest,ConvertUTF16BigEndianToUTF8String)28 TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
29   // Src is the look of disapproval.
30   alignas(UTF16) static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
31   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
32   std::string Result;
33   bool Success = convertUTF16ToUTF8String(Ref, Result);
34   EXPECT_TRUE(Success);
35   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
36   EXPECT_EQ(Expected, Result);
37 }
38 
TEST(ConvertUTFTest,ConvertUTF8ToUTF16String)39 TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
40   // Src is the look of disapproval.
41   static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
42   StringRef Ref(Src, sizeof(Src) - 1);
43   SmallVector<UTF16, 5> Result;
44   bool Success = convertUTF8ToUTF16String(Ref, Result);
45   EXPECT_TRUE(Success);
46   static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
47   ASSERT_EQ(3u, Result.size());
48   for (int I = 0, E = 3; I != E; ++I)
49     EXPECT_EQ(Expected[I], Result[I]);
50 }
51 
TEST(ConvertUTFTest,OddLengthInput)52 TEST(ConvertUTFTest, OddLengthInput) {
53   std::string Result;
54   bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);
55   EXPECT_FALSE(Success);
56 }
57 
TEST(ConvertUTFTest,Empty)58 TEST(ConvertUTFTest, Empty) {
59   std::string Result;
60   bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(None), Result);
61   EXPECT_TRUE(Success);
62   EXPECT_TRUE(Result.empty());
63 }
64 
TEST(ConvertUTFTest,HasUTF16BOM)65 TEST(ConvertUTFTest, HasUTF16BOM) {
66   bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
67   EXPECT_TRUE(HasBOM);
68   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
69   EXPECT_TRUE(HasBOM);
70   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
71   EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
72   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
73   EXPECT_TRUE(HasBOM);
74 
75   HasBOM = hasUTF16ByteOrderMark(None);
76   EXPECT_FALSE(HasBOM);
77   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
78   EXPECT_FALSE(HasBOM);
79 }
80 
TEST(ConvertUTFTest,UTF16WrappersForConvertUTF16ToUTF8String)81 TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) {
82   // Src is the look of disapproval.
83   alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
84   ArrayRef<UTF16> SrcRef = makeArrayRef((const UTF16 *)Src, 4);
85   std::string Result;
86   bool Success = convertUTF16ToUTF8String(SrcRef, Result);
87   EXPECT_TRUE(Success);
88   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
89   EXPECT_EQ(Expected, Result);
90 }
91 
TEST(ConvertUTFTest,ConvertUTF8toWide)92 TEST(ConvertUTFTest, ConvertUTF8toWide) {
93   // Src is the look of disapproval.
94   static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
95   std::wstring Result;
96   bool Success = ConvertUTF8toWide((const char*)Src, Result);
97   EXPECT_TRUE(Success);
98   std::wstring Expected(L"\x0ca0_\x0ca0");
99   EXPECT_EQ(Expected, Result);
100   Result.clear();
101   Success = ConvertUTF8toWide(StringRef(Src, 7), Result);
102   EXPECT_TRUE(Success);
103   EXPECT_EQ(Expected, Result);
104 }
105 
TEST(ConvertUTFTest,convertWideToUTF8)106 TEST(ConvertUTFTest, convertWideToUTF8) {
107   // Src is the look of disapproval.
108   static const wchar_t Src[] = L"\x0ca0_\x0ca0";
109   std::string Result;
110   bool Success = convertWideToUTF8(Src, Result);
111   EXPECT_TRUE(Success);
112   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
113   EXPECT_EQ(Expected, Result);
114 }
115 
116 struct ConvertUTFResultContainer {
117   ConversionResult ErrorCode;
118   std::vector<unsigned> UnicodeScalars;
119 
ConvertUTFResultContainerConvertUTFResultContainer120   ConvertUTFResultContainer(ConversionResult ErrorCode)
121       : ErrorCode(ErrorCode) {}
122 
123   ConvertUTFResultContainer
withScalarsConvertUTFResultContainer124   withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
125               unsigned US2 = 0x110000, unsigned US3 = 0x110000,
126               unsigned US4 = 0x110000, unsigned US5 = 0x110000,
127               unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
128     ConvertUTFResultContainer Result(*this);
129     if (US0 != 0x110000)
130       Result.UnicodeScalars.push_back(US0);
131     if (US1 != 0x110000)
132       Result.UnicodeScalars.push_back(US1);
133     if (US2 != 0x110000)
134       Result.UnicodeScalars.push_back(US2);
135     if (US3 != 0x110000)
136       Result.UnicodeScalars.push_back(US3);
137     if (US4 != 0x110000)
138       Result.UnicodeScalars.push_back(US4);
139     if (US5 != 0x110000)
140       Result.UnicodeScalars.push_back(US5);
141     if (US6 != 0x110000)
142       Result.UnicodeScalars.push_back(US6);
143     if (US7 != 0x110000)
144       Result.UnicodeScalars.push_back(US7);
145     return Result;
146   }
147 };
148 
149 std::pair<ConversionResult, std::vector<unsigned>>
ConvertUTF8ToUnicodeScalarsLenient(StringRef S)150 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
151   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
152 
153   const UTF8 *SourceNext = SourceStart;
154   std::vector<UTF32> Decoded(S.size(), 0);
155   UTF32 *TargetStart = Decoded.data();
156 
157   auto ErrorCode =
158       ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
159                          Decoded.data() + Decoded.size(), lenientConversion);
160 
161   Decoded.resize(TargetStart - Decoded.data());
162 
163   return std::make_pair(ErrorCode, Decoded);
164 }
165 
166 std::pair<ConversionResult, std::vector<unsigned>>
ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S)167 ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
168   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
169 
170   const UTF8 *SourceNext = SourceStart;
171   std::vector<UTF32> Decoded(S.size(), 0);
172   UTF32 *TargetStart = Decoded.data();
173 
174   auto ErrorCode = ConvertUTF8toUTF32Partial(
175       &SourceNext, SourceStart + S.size(), &TargetStart,
176       Decoded.data() + Decoded.size(), lenientConversion);
177 
178   Decoded.resize(TargetStart - Decoded.data());
179 
180   return std::make_pair(ErrorCode, Decoded);
181 }
182 
183 ::testing::AssertionResult
CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,StringRef S,bool Partial=false)184 CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
185                                  StringRef S, bool Partial = false) {
186   ConversionResult ErrorCode;
187   std::vector<unsigned> Decoded;
188   if (!Partial)
189     std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
190   else
191     std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
192 
193   if (Expected.ErrorCode != ErrorCode)
194     return ::testing::AssertionFailure() << "Expected error code "
195                                          << Expected.ErrorCode << ", actual "
196                                          << ErrorCode;
197 
198   if (Expected.UnicodeScalars != Decoded)
199     return ::testing::AssertionFailure()
200            << "Expected lenient decoded result:\n"
201            << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
202            << "Actual result:\n" << ::testing::PrintToString(Decoded);
203 
204   return ::testing::AssertionSuccess();
205 }
206 
TEST(ConvertUTFTest,UTF8ToUTF32Lenient)207 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
208 
209   //
210   // 1-byte sequences
211   //
212 
213   // U+0041 LATIN CAPITAL LETTER A
214   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
215       ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
216 
217   //
218   // 2-byte sequences
219   //
220 
221   // U+0283 LATIN SMALL LETTER ESH
222   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
223       ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
224       "\xca\x83"));
225 
226   // U+03BA GREEK SMALL LETTER KAPPA
227   // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
228   // U+03C3 GREEK SMALL LETTER SIGMA
229   // U+03BC GREEK SMALL LETTER MU
230   // U+03B5 GREEK SMALL LETTER EPSILON
231   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
232       ConvertUTFResultContainer(conversionOK)
233           .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
234       "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
235 
236   //
237   // 3-byte sequences
238   //
239 
240   // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
241   // U+6587 CJK UNIFIED IDEOGRAPH-6587
242   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
243       ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
244       "\xe4\xbe\x8b\xe6\x96\x87"));
245 
246   // U+D55C HANGUL SYLLABLE HAN
247   // U+AE00 HANGUL SYLLABLE GEUL
248   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
249       ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
250       "\xed\x95\x9c\xea\xb8\x80"));
251 
252   // U+1112 HANGUL CHOSEONG HIEUH
253   // U+1161 HANGUL JUNGSEONG A
254   // U+11AB HANGUL JONGSEONG NIEUN
255   // U+1100 HANGUL CHOSEONG KIYEOK
256   // U+1173 HANGUL JUNGSEONG EU
257   // U+11AF HANGUL JONGSEONG RIEUL
258   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
259       ConvertUTFResultContainer(conversionOK)
260           .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
261       "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
262       "\xe1\x86\xaf"));
263 
264   //
265   // 4-byte sequences
266   //
267 
268   // U+E0100 VARIATION SELECTOR-17
269   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
270       ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
271       "\xf3\xa0\x84\x80"));
272 
273   //
274   // First possible sequence of a certain length
275   //
276 
277   // U+0000 NULL
278   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
279       ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
280       StringRef("\x00", 1)));
281 
282   // U+0080 PADDING CHARACTER
283   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
284       ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
285       "\xc2\x80"));
286 
287   // U+0800 SAMARITAN LETTER ALAF
288   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
289       ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
290       "\xe0\xa0\x80"));
291 
292   // U+10000 LINEAR B SYLLABLE B008 A
293   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
294       ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
295       "\xf0\x90\x80\x80"));
296 
297   // U+200000 (invalid)
298   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
299       ConvertUTFResultContainer(sourceIllegal)
300           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
301       "\xf8\x88\x80\x80\x80"));
302 
303   // U+4000000 (invalid)
304   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
305       ConvertUTFResultContainer(sourceIllegal)
306           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
307       "\xfc\x84\x80\x80\x80\x80"));
308 
309   //
310   // Last possible sequence of a certain length
311   //
312 
313   // U+007F DELETE
314   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
315       ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
316 
317   // U+07FF (unassigned)
318   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
319       ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
320       "\xdf\xbf"));
321 
322   // U+FFFF (noncharacter)
323   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
324       ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
325       "\xef\xbf\xbf"));
326 
327   // U+1FFFFF (invalid)
328   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
329       ConvertUTFResultContainer(sourceIllegal)
330           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
331       "\xf7\xbf\xbf\xbf"));
332 
333   // U+3FFFFFF (invalid)
334   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
335       ConvertUTFResultContainer(sourceIllegal)
336           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
337       "\xfb\xbf\xbf\xbf\xbf"));
338 
339   // U+7FFFFFFF (invalid)
340   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
341       ConvertUTFResultContainer(sourceIllegal)
342           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
343       "\xfd\xbf\xbf\xbf\xbf\xbf"));
344 
345   //
346   // Other boundary conditions
347   //
348 
349   // U+D7FF (unassigned)
350   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
351       ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
352       "\xed\x9f\xbf"));
353 
354   // U+E000 (private use)
355   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
356       ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
357       "\xee\x80\x80"));
358 
359   // U+FFFD REPLACEMENT CHARACTER
360   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
361       ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
362       "\xef\xbf\xbd"));
363 
364   // U+10FFFF (noncharacter)
365   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
366       ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
367       "\xf4\x8f\xbf\xbf"));
368 
369   // U+110000 (invalid)
370   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
371       ConvertUTFResultContainer(sourceIllegal)
372           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
373       "\xf4\x90\x80\x80"));
374 
375   //
376   // Unexpected continuation bytes
377   //
378 
379   // A sequence of unexpected continuation bytes that don't follow a first
380   // byte, every byte is a maximal subpart.
381 
382   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
383       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
384   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
385       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
386   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
387       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
388       "\x80\x80"));
389   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
390       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
391       "\x80\xbf"));
392   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
393       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
394       "\xbf\x80"));
395   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
396       ConvertUTFResultContainer(sourceIllegal)
397           .withScalars(0xfffd, 0xfffd, 0xfffd),
398       "\x80\xbf\x80"));
399   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
400       ConvertUTFResultContainer(sourceIllegal)
401           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
402       "\x80\xbf\x80\xbf"));
403   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
404       ConvertUTFResultContainer(sourceIllegal)
405           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
406       "\x80\xbf\x82\xbf\xaa"));
407   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
408       ConvertUTFResultContainer(sourceIllegal)
409           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
410       "\xaa\xb0\xbb\xbf\xaa\xa0"));
411   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
412       ConvertUTFResultContainer(sourceIllegal)
413           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
414       "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
415 
416   // All continuation bytes (0x80--0xbf).
417   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
418       ConvertUTFResultContainer(sourceIllegal)
419           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
420                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
421           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
422                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
423           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
424                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
425           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
426                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
427           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
428                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
429           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
430                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
431           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
432                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
433           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
434                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
435       "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
436       "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
437       "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
438       "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
439 
440   //
441   // Lonely start bytes
442   //
443 
444   // Start bytes of 2-byte sequences (0xc0--0xdf).
445   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
446       ConvertUTFResultContainer(sourceIllegal)
447           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
448                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
449           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
450                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
451           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
452                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
453           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
454                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
455       "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
456       "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
457 
458   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
459       ConvertUTFResultContainer(sourceIllegal)
460           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
461                        0xfffd, 0x0020, 0xfffd, 0x0020)
462           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
463                        0xfffd, 0x0020, 0xfffd, 0x0020)
464           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
465                        0xfffd, 0x0020, 0xfffd, 0x0020)
466           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
467                        0xfffd, 0x0020, 0xfffd, 0x0020)
468           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
469                        0xfffd, 0x0020, 0xfffd, 0x0020)
470           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
471                        0xfffd, 0x0020, 0xfffd, 0x0020)
472           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
473                        0xfffd, 0x0020, 0xfffd, 0x0020)
474           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
475                        0xfffd, 0x0020, 0xfffd, 0x0020),
476       "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
477       "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
478       "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
479       "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
480 
481   // Start bytes of 3-byte sequences (0xe0--0xef).
482   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
483       ConvertUTFResultContainer(sourceIllegal)
484           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
485                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
486           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
487                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
488       "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
489 
490   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
491       ConvertUTFResultContainer(sourceIllegal)
492           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
493                        0xfffd, 0x0020, 0xfffd, 0x0020)
494           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
495                        0xfffd, 0x0020, 0xfffd, 0x0020)
496           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
497                        0xfffd, 0x0020, 0xfffd, 0x0020)
498           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
499                        0xfffd, 0x0020, 0xfffd, 0x0020),
500       "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
501       "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
502 
503   // Start bytes of 4-byte sequences (0xf0--0xf7).
504   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
505       ConvertUTFResultContainer(sourceIllegal)
506           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
507                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
508       "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
509 
510   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
511       ConvertUTFResultContainer(sourceIllegal)
512           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
513                        0xfffd, 0x0020, 0xfffd, 0x0020)
514           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
515                        0xfffd, 0x0020, 0xfffd, 0x0020),
516       "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
517 
518   // Start bytes of 5-byte sequences (0xf8--0xfb).
519   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
520       ConvertUTFResultContainer(sourceIllegal)
521           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
522       "\xf8\xf9\xfa\xfb"));
523 
524   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
525       ConvertUTFResultContainer(sourceIllegal)
526           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
527                        0xfffd, 0x0020, 0xfffd, 0x0020),
528       "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
529 
530   // Start bytes of 6-byte sequences (0xfc--0xfd).
531   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
532       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
533       "\xfc\xfd"));
534 
535   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
536       ConvertUTFResultContainer(sourceIllegal)
537           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
538       "\xfc\x20\xfd\x20"));
539 
540   //
541   // Other bytes (0xc0--0xc1, 0xfe--0xff).
542   //
543 
544   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
545       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
546   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
547       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
548   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
549       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
550   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
551       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
552 
553   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
554       ConvertUTFResultContainer(sourceIllegal)
555           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
556       "\xc0\xc1\xfe\xff"));
557 
558   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
559       ConvertUTFResultContainer(sourceIllegal)
560           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
561       "\xfe\xfe\xff\xff"));
562 
563   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
564       ConvertUTFResultContainer(sourceIllegal)
565           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
566       "\xfe\x80\x80\x80\x80\x80"));
567 
568   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
569       ConvertUTFResultContainer(sourceIllegal)
570           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
571       "\xff\x80\x80\x80\x80\x80"));
572 
573   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
574       ConvertUTFResultContainer(sourceIllegal)
575           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
576                        0xfffd, 0x0020, 0xfffd, 0x0020),
577       "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
578 
579   //
580   // Sequences with one continuation byte missing
581   //
582 
583   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
584       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
585   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
586       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
587   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
588       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
589       "\xe0\xa0"));
590   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
591       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
592       "\xe0\xbf"));
593   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
594       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
595       "\xe1\x80"));
596   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
597       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
598       "\xec\xbf"));
599   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
600       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
601       "\xed\x80"));
602   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
603       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
604       "\xed\x9f"));
605   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
606       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
607       "\xee\x80"));
608   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
609       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
610       "\xef\xbf"));
611   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
612       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
613       "\xf0\x90\x80"));
614   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
615       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
616       "\xf0\xbf\xbf"));
617   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
618       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
619       "\xf1\x80\x80"));
620   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
621       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
622       "\xf3\xbf\xbf"));
623   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
624       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
625       "\xf4\x80\x80"));
626   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
627       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
628       "\xf4\x8f\xbf"));
629 
630   // Overlong sequences with one trailing byte missing.
631   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
632       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
633       "\xc0"));
634   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
635       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
636       "\xc1"));
637   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
638       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
639       "\xe0\x80"));
640   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
641       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
642       "\xe0\x9f"));
643   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
644       ConvertUTFResultContainer(sourceIllegal)
645           .withScalars(0xfffd, 0xfffd, 0xfffd),
646       "\xf0\x80\x80"));
647   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
648       ConvertUTFResultContainer(sourceIllegal)
649           .withScalars(0xfffd, 0xfffd, 0xfffd),
650       "\xf0\x8f\x80"));
651   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
652       ConvertUTFResultContainer(sourceIllegal)
653           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
654       "\xf8\x80\x80\x80"));
655   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
656       ConvertUTFResultContainer(sourceIllegal)
657           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
658       "\xfc\x80\x80\x80\x80"));
659 
660   // Sequences that represent surrogates with one trailing byte missing.
661   // High surrogates
662   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
663       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
664       "\xed\xa0"));
665   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
666       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
667       "\xed\xac"));
668   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
669       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
670       "\xed\xaf"));
671   // Low surrogates
672   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
673       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
674       "\xed\xb0"));
675   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
676       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
677       "\xed\xb4"));
678   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
679       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
680       "\xed\xbf"));
681 
682   // Ill-formed 4-byte sequences.
683   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
684   // U+1100xx (invalid)
685   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
686       ConvertUTFResultContainer(sourceIllegal)
687           .withScalars(0xfffd, 0xfffd, 0xfffd),
688       "\xf4\x90\x80"));
689   // U+13FBxx (invalid)
690   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
691       ConvertUTFResultContainer(sourceIllegal)
692           .withScalars(0xfffd, 0xfffd, 0xfffd),
693       "\xf4\xbf\xbf"));
694   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
695       ConvertUTFResultContainer(sourceIllegal)
696           .withScalars(0xfffd, 0xfffd, 0xfffd),
697       "\xf5\x80\x80"));
698   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
699       ConvertUTFResultContainer(sourceIllegal)
700           .withScalars(0xfffd, 0xfffd, 0xfffd),
701       "\xf6\x80\x80"));
702   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
703       ConvertUTFResultContainer(sourceIllegal)
704           .withScalars(0xfffd, 0xfffd, 0xfffd),
705       "\xf7\x80\x80"));
706   // U+1FFBxx (invalid)
707   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
708       ConvertUTFResultContainer(sourceIllegal)
709           .withScalars(0xfffd, 0xfffd, 0xfffd),
710       "\xf7\xbf\xbf"));
711 
712   // Ill-formed 5-byte sequences.
713   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
714   // U+2000xx (invalid)
715   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
716       ConvertUTFResultContainer(sourceIllegal)
717           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
718       "\xf8\x88\x80\x80"));
719   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
720       ConvertUTFResultContainer(sourceIllegal)
721           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
722       "\xf8\xbf\xbf\xbf"));
723   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
724       ConvertUTFResultContainer(sourceIllegal)
725           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
726       "\xf9\x80\x80\x80"));
727   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
728       ConvertUTFResultContainer(sourceIllegal)
729           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
730       "\xfa\x80\x80\x80"));
731   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
732       ConvertUTFResultContainer(sourceIllegal)
733           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
734       "\xfb\x80\x80\x80"));
735   // U+3FFFFxx (invalid)
736   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
737       ConvertUTFResultContainer(sourceIllegal)
738           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
739       "\xfb\xbf\xbf\xbf"));
740 
741   // Ill-formed 6-byte sequences.
742   // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
743   // U+40000xx (invalid)
744   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
745       ConvertUTFResultContainer(sourceIllegal)
746           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
747       "\xfc\x84\x80\x80\x80"));
748   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
749       ConvertUTFResultContainer(sourceIllegal)
750           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
751       "\xfc\xbf\xbf\xbf\xbf"));
752   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
753       ConvertUTFResultContainer(sourceIllegal)
754           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
755       "\xfd\x80\x80\x80\x80"));
756   // U+7FFFFFxx (invalid)
757   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
758       ConvertUTFResultContainer(sourceIllegal)
759           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
760       "\xfd\xbf\xbf\xbf\xbf"));
761 
762   //
763   // Sequences with two continuation bytes missing
764   //
765 
766   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
767       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
768       "\xf0\x90"));
769   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
770       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
771       "\xf0\xbf"));
772   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
773       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
774       "\xf1\x80"));
775   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
776       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
777       "\xf3\xbf"));
778   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
779       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
780       "\xf4\x80"));
781   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
782       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
783       "\xf4\x8f"));
784 
785   // Overlong sequences with two trailing byte missing.
786   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
787       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
788   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
789       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
790       "\xf0\x80"));
791   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
792       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
793       "\xf0\x8f"));
794   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
795       ConvertUTFResultContainer(sourceIllegal)
796           .withScalars(0xfffd, 0xfffd, 0xfffd),
797       "\xf8\x80\x80"));
798   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
799       ConvertUTFResultContainer(sourceIllegal)
800           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
801       "\xfc\x80\x80\x80"));
802 
803   // Sequences that represent surrogates with two trailing bytes missing.
804   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
805       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
806 
807   // Ill-formed 4-byte sequences.
808   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
809   // U+110yxx (invalid)
810   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
811       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
812       "\xf4\x90"));
813   // U+13Fyxx (invalid)
814   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
815       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
816       "\xf4\xbf"));
817   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
818       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
819       "\xf5\x80"));
820   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
821       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
822       "\xf6\x80"));
823   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
824       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
825       "\xf7\x80"));
826   // U+1FFyxx (invalid)
827   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
828       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
829       "\xf7\xbf"));
830 
831   // Ill-formed 5-byte sequences.
832   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
833   // U+200yxx (invalid)
834   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
835       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
836       "\xf8\x88\x80"));
837   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
838       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
839       "\xf8\xbf\xbf"));
840   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
841       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
842       "\xf9\x80\x80"));
843   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
844       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
845       "\xfa\x80\x80"));
846   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
847       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
848       "\xfb\x80\x80"));
849   // U+3FFFyxx (invalid)
850   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
851       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
852       "\xfb\xbf\xbf"));
853 
854   // Ill-formed 6-byte sequences.
855   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
856   // U+4000yxx (invalid)
857   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
858       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
859       "\xfc\x84\x80\x80"));
860   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
861       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
862       "\xfc\xbf\xbf\xbf"));
863   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
864       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
865       "\xfd\x80\x80\x80"));
866   // U+7FFFFyxx (invalid)
867   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
868       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
869       "\xfd\xbf\xbf\xbf"));
870 
871   //
872   // Sequences with three continuation bytes missing
873   //
874 
875   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
876       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
877   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
878       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
879   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
880       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
881   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
882       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
883   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
884       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
885 
886   // Broken overlong sequences.
887   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
888       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
889   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
890       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
891       "\xf8\x80"));
892   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
893       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
894       "\xfc\x80\x80"));
895 
896   // Ill-formed 4-byte sequences.
897   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
898   // U+14yyxx (invalid)
899   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
900       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
901   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
902       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
903   // U+1Cyyxx (invalid)
904   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
905       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
906 
907   // Ill-formed 5-byte sequences.
908   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
909   // U+20yyxx (invalid)
910   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
911       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
912       "\xf8\x88"));
913   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
914       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
915       "\xf8\xbf"));
916   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
917       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
918       "\xf9\x80"));
919   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
920       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
921       "\xfa\x80"));
922   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
923       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
924       "\xfb\x80"));
925   // U+3FCyyxx (invalid)
926   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
927       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
928       "\xfb\xbf"));
929 
930   // Ill-formed 6-byte sequences.
931   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
932   // U+400yyxx (invalid)
933   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
934       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
935       "\xfc\x84\x80"));
936   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
937       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
938       "\xfc\xbf\xbf"));
939   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
940       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
941       "\xfd\x80\x80"));
942   // U+7FFCyyxx (invalid)
943   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
944       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
945       "\xfd\xbf\xbf"));
946 
947   //
948   // Sequences with four continuation bytes missing
949   //
950 
951   // Ill-formed 5-byte sequences.
952   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
953   // U+uzyyxx (invalid)
954   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
955       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
956   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
957       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
958   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
959       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
960   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
961       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
962   // U+3zyyxx (invalid)
963   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
964       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
965 
966   // Broken overlong sequences.
967   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
968       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
969   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
970       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
971       "\xfc\x80"));
972 
973   // Ill-formed 6-byte sequences.
974   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
975   // U+uzzyyxx (invalid)
976   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
977       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
978       "\xfc\x84"));
979   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
980       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
981       "\xfc\xbf"));
982   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
983       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
984       "\xfd\x80"));
985   // U+7Fzzyyxx (invalid)
986   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
987       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
988       "\xfd\xbf"));
989 
990   //
991   // Sequences with five continuation bytes missing
992   //
993 
994   // Ill-formed 6-byte sequences.
995   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
996   // U+uzzyyxx (invalid)
997   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
998       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
999   // U+uuzzyyxx (invalid)
1000   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1001       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
1002 
1003   //
1004   // Consecutive sequences with trailing bytes missing
1005   //
1006 
1007   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1008       ConvertUTFResultContainer(sourceIllegal)
1009           .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1010           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1011           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
1012           .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1013           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1014           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1015       "\xc0" "\xe0\x80" "\xf0\x80\x80"
1016       "\xf8\x80\x80\x80"
1017       "\xfc\x80\x80\x80\x80"
1018       "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
1019       "\xfb\xbf\xbf\xbf"
1020       "\xfd\xbf\xbf\xbf\xbf"));
1021 
1022   //
1023   // Overlong UTF-8 sequences
1024   //
1025 
1026   // U+002F SOLIDUS
1027   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1028       ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
1029 
1030   // Overlong sequences of the above.
1031   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1032       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1033       "\xc0\xaf"));
1034   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1035       ConvertUTFResultContainer(sourceIllegal)
1036           .withScalars(0xfffd, 0xfffd, 0xfffd),
1037       "\xe0\x80\xaf"));
1038   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1039       ConvertUTFResultContainer(sourceIllegal)
1040           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1041       "\xf0\x80\x80\xaf"));
1042   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1043       ConvertUTFResultContainer(sourceIllegal)
1044           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1045       "\xf8\x80\x80\x80\xaf"));
1046   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1047       ConvertUTFResultContainer(sourceIllegal)
1048           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1049       "\xfc\x80\x80\x80\x80\xaf"));
1050 
1051   // U+0000 NULL
1052   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1053       ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
1054       StringRef("\x00", 1)));
1055 
1056   // Overlong sequences of the above.
1057   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1058       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1059       "\xc0\x80"));
1060   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1061       ConvertUTFResultContainer(sourceIllegal)
1062           .withScalars(0xfffd, 0xfffd, 0xfffd),
1063       "\xe0\x80\x80"));
1064   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1065       ConvertUTFResultContainer(sourceIllegal)
1066           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1067       "\xf0\x80\x80\x80"));
1068   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1069       ConvertUTFResultContainer(sourceIllegal)
1070           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1071       "\xf8\x80\x80\x80\x80"));
1072   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1073       ConvertUTFResultContainer(sourceIllegal)
1074           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1075       "\xfc\x80\x80\x80\x80\x80"));
1076 
1077   // Other overlong sequences.
1078   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1079       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1080       "\xc0\xbf"));
1081   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1082       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1083       "\xc1\x80"));
1084   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1085       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1086       "\xc1\xbf"));
1087   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1088       ConvertUTFResultContainer(sourceIllegal)
1089           .withScalars(0xfffd, 0xfffd, 0xfffd),
1090       "\xe0\x9f\xbf"));
1091   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1092       ConvertUTFResultContainer(sourceIllegal)
1093           .withScalars(0xfffd, 0xfffd, 0xfffd),
1094       "\xed\xa0\x80"));
1095   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1096       ConvertUTFResultContainer(sourceIllegal)
1097           .withScalars(0xfffd, 0xfffd, 0xfffd),
1098       "\xed\xbf\xbf"));
1099   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1100       ConvertUTFResultContainer(sourceIllegal)
1101           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1102       "\xf0\x8f\x80\x80"));
1103   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1104       ConvertUTFResultContainer(sourceIllegal)
1105           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1106       "\xf0\x8f\xbf\xbf"));
1107   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1108       ConvertUTFResultContainer(sourceIllegal)
1109           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1110       "\xf8\x87\xbf\xbf\xbf"));
1111   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1112       ConvertUTFResultContainer(sourceIllegal)
1113           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1114       "\xfc\x83\xbf\xbf\xbf\xbf"));
1115 
1116   //
1117   // Isolated surrogates
1118   //
1119 
1120   // Unicode 6.3.0:
1121   //
1122   //    D71.  High-surrogate code point: A Unicode code point in the range
1123   //    U+D800 to U+DBFF.
1124   //
1125   //    D73.  Low-surrogate code point: A Unicode code point in the range
1126   //    U+DC00 to U+DFFF.
1127 
1128   // Note: U+E0100 is <DB40 DD00> in UTF16.
1129 
1130   // High surrogates
1131 
1132   // U+D800
1133   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1134       ConvertUTFResultContainer(sourceIllegal)
1135           .withScalars(0xfffd, 0xfffd, 0xfffd),
1136       "\xed\xa0\x80"));
1137 
1138   // U+DB40
1139   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1140       ConvertUTFResultContainer(sourceIllegal)
1141           .withScalars(0xfffd, 0xfffd, 0xfffd),
1142       "\xed\xac\xa0"));
1143 
1144   // U+DBFF
1145   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1146       ConvertUTFResultContainer(sourceIllegal)
1147           .withScalars(0xfffd, 0xfffd, 0xfffd),
1148       "\xed\xaf\xbf"));
1149 
1150   // Low surrogates
1151 
1152   // U+DC00
1153   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1154       ConvertUTFResultContainer(sourceIllegal)
1155           .withScalars(0xfffd, 0xfffd, 0xfffd),
1156       "\xed\xb0\x80"));
1157 
1158   // U+DD00
1159   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1160       ConvertUTFResultContainer(sourceIllegal)
1161           .withScalars(0xfffd, 0xfffd, 0xfffd),
1162       "\xed\xb4\x80"));
1163 
1164   // U+DFFF
1165   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1166       ConvertUTFResultContainer(sourceIllegal)
1167           .withScalars(0xfffd, 0xfffd, 0xfffd),
1168       "\xed\xbf\xbf"));
1169 
1170   // Surrogate pairs
1171 
1172   // U+D800 U+DC00
1173   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1174       ConvertUTFResultContainer(sourceIllegal)
1175           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1176       "\xed\xa0\x80\xed\xb0\x80"));
1177 
1178   // U+D800 U+DD00
1179   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1180       ConvertUTFResultContainer(sourceIllegal)
1181           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1182       "\xed\xa0\x80\xed\xb4\x80"));
1183 
1184   // U+D800 U+DFFF
1185   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1186       ConvertUTFResultContainer(sourceIllegal)
1187           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1188       "\xed\xa0\x80\xed\xbf\xbf"));
1189 
1190   // U+DB40 U+DC00
1191   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1192       ConvertUTFResultContainer(sourceIllegal)
1193           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1194       "\xed\xac\xa0\xed\xb0\x80"));
1195 
1196   // U+DB40 U+DD00
1197   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1198       ConvertUTFResultContainer(sourceIllegal)
1199           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1200       "\xed\xac\xa0\xed\xb4\x80"));
1201 
1202   // U+DB40 U+DFFF
1203   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1204       ConvertUTFResultContainer(sourceIllegal)
1205           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1206       "\xed\xac\xa0\xed\xbf\xbf"));
1207 
1208   // U+DBFF U+DC00
1209   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1210       ConvertUTFResultContainer(sourceIllegal)
1211           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1212       "\xed\xaf\xbf\xed\xb0\x80"));
1213 
1214   // U+DBFF U+DD00
1215   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1216       ConvertUTFResultContainer(sourceIllegal)
1217           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1218       "\xed\xaf\xbf\xed\xb4\x80"));
1219 
1220   // U+DBFF U+DFFF
1221   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1222       ConvertUTFResultContainer(sourceIllegal)
1223           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1224       "\xed\xaf\xbf\xed\xbf\xbf"));
1225 
1226   //
1227   // Noncharacters
1228   //
1229 
1230   // Unicode 6.3.0:
1231   //
1232   //    D14.  Noncharacter: A code point that is permanently reserved for
1233   //    internal use and that should never be interchanged. Noncharacters
1234   //    consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1235   //    and the values U+FDD0..U+FDEF.
1236 
1237   // U+FFFE
1238   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1239       ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
1240       "\xef\xbf\xbe"));
1241 
1242   // U+FFFF
1243   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1244       ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
1245       "\xef\xbf\xbf"));
1246 
1247   // U+1FFFE
1248   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1249       ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
1250       "\xf0\x9f\xbf\xbe"));
1251 
1252   // U+1FFFF
1253   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1254       ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
1255       "\xf0\x9f\xbf\xbf"));
1256 
1257   // U+2FFFE
1258   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1259       ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
1260       "\xf0\xaf\xbf\xbe"));
1261 
1262   // U+2FFFF
1263   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1264       ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
1265       "\xf0\xaf\xbf\xbf"));
1266 
1267   // U+3FFFE
1268   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1269       ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
1270       "\xf0\xbf\xbf\xbe"));
1271 
1272   // U+3FFFF
1273   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1274       ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
1275       "\xf0\xbf\xbf\xbf"));
1276 
1277   // U+4FFFE
1278   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1279       ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
1280       "\xf1\x8f\xbf\xbe"));
1281 
1282   // U+4FFFF
1283   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1284       ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
1285       "\xf1\x8f\xbf\xbf"));
1286 
1287   // U+5FFFE
1288   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1289       ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
1290       "\xf1\x9f\xbf\xbe"));
1291 
1292   // U+5FFFF
1293   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1294       ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
1295       "\xf1\x9f\xbf\xbf"));
1296 
1297   // U+6FFFE
1298   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1299       ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
1300       "\xf1\xaf\xbf\xbe"));
1301 
1302   // U+6FFFF
1303   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1304       ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
1305       "\xf1\xaf\xbf\xbf"));
1306 
1307   // U+7FFFE
1308   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1309       ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
1310       "\xf1\xbf\xbf\xbe"));
1311 
1312   // U+7FFFF
1313   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1314       ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
1315       "\xf1\xbf\xbf\xbf"));
1316 
1317   // U+8FFFE
1318   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1319       ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
1320       "\xf2\x8f\xbf\xbe"));
1321 
1322   // U+8FFFF
1323   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1324       ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
1325       "\xf2\x8f\xbf\xbf"));
1326 
1327   // U+9FFFE
1328   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1329       ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
1330       "\xf2\x9f\xbf\xbe"));
1331 
1332   // U+9FFFF
1333   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1334       ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
1335       "\xf2\x9f\xbf\xbf"));
1336 
1337   // U+AFFFE
1338   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1339       ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
1340       "\xf2\xaf\xbf\xbe"));
1341 
1342   // U+AFFFF
1343   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1344       ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
1345       "\xf2\xaf\xbf\xbf"));
1346 
1347   // U+BFFFE
1348   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1349       ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
1350       "\xf2\xbf\xbf\xbe"));
1351 
1352   // U+BFFFF
1353   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1354       ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
1355       "\xf2\xbf\xbf\xbf"));
1356 
1357   // U+CFFFE
1358   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1359       ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
1360       "\xf3\x8f\xbf\xbe"));
1361 
1362   // U+CFFFF
1363   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1364       ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
1365       "\xf3\x8f\xbf\xbf"));
1366 
1367   // U+DFFFE
1368   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1369       ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
1370       "\xf3\x9f\xbf\xbe"));
1371 
1372   // U+DFFFF
1373   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1374       ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
1375       "\xf3\x9f\xbf\xbf"));
1376 
1377   // U+EFFFE
1378   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1379       ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
1380       "\xf3\xaf\xbf\xbe"));
1381 
1382   // U+EFFFF
1383   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1384       ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
1385       "\xf3\xaf\xbf\xbf"));
1386 
1387   // U+FFFFE
1388   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1389       ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
1390       "\xf3\xbf\xbf\xbe"));
1391 
1392   // U+FFFFF
1393   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1394       ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
1395       "\xf3\xbf\xbf\xbf"));
1396 
1397   // U+10FFFE
1398   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1399       ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
1400       "\xf4\x8f\xbf\xbe"));
1401 
1402   // U+10FFFF
1403   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1404       ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
1405       "\xf4\x8f\xbf\xbf"));
1406 
1407   // U+FDD0
1408   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1409       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
1410       "\xef\xb7\x90"));
1411 
1412   // U+FDD1
1413   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1414       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
1415       "\xef\xb7\x91"));
1416 
1417   // U+FDD2
1418   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1419       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
1420       "\xef\xb7\x92"));
1421 
1422   // U+FDD3
1423   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1424       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
1425       "\xef\xb7\x93"));
1426 
1427   // U+FDD4
1428   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1429       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
1430       "\xef\xb7\x94"));
1431 
1432   // U+FDD5
1433   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1434       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
1435       "\xef\xb7\x95"));
1436 
1437   // U+FDD6
1438   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1439       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
1440       "\xef\xb7\x96"));
1441 
1442   // U+FDD7
1443   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1444       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
1445       "\xef\xb7\x97"));
1446 
1447   // U+FDD8
1448   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1449       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
1450       "\xef\xb7\x98"));
1451 
1452   // U+FDD9
1453   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1454       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
1455       "\xef\xb7\x99"));
1456 
1457   // U+FDDA
1458   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1459       ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
1460       "\xef\xb7\x9a"));
1461 
1462   // U+FDDB
1463   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1464       ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
1465       "\xef\xb7\x9b"));
1466 
1467   // U+FDDC
1468   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1469       ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
1470       "\xef\xb7\x9c"));
1471 
1472   // U+FDDD
1473   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1474       ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
1475       "\xef\xb7\x9d"));
1476 
1477   // U+FDDE
1478   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1479       ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
1480       "\xef\xb7\x9e"));
1481 
1482   // U+FDDF
1483   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1484       ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
1485       "\xef\xb7\x9f"));
1486 
1487   // U+FDE0
1488   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1489       ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
1490       "\xef\xb7\xa0"));
1491 
1492   // U+FDE1
1493   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1494       ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
1495       "\xef\xb7\xa1"));
1496 
1497   // U+FDE2
1498   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1499       ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
1500       "\xef\xb7\xa2"));
1501 
1502   // U+FDE3
1503   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1504       ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
1505       "\xef\xb7\xa3"));
1506 
1507   // U+FDE4
1508   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1509       ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
1510       "\xef\xb7\xa4"));
1511 
1512   // U+FDE5
1513   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1514       ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
1515       "\xef\xb7\xa5"));
1516 
1517   // U+FDE6
1518   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1519       ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
1520       "\xef\xb7\xa6"));
1521 
1522   // U+FDE7
1523   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1524       ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
1525       "\xef\xb7\xa7"));
1526 
1527   // U+FDE8
1528   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1529       ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
1530       "\xef\xb7\xa8"));
1531 
1532   // U+FDE9
1533   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1534       ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
1535       "\xef\xb7\xa9"));
1536 
1537   // U+FDEA
1538   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1539       ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
1540       "\xef\xb7\xaa"));
1541 
1542   // U+FDEB
1543   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1544       ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
1545       "\xef\xb7\xab"));
1546 
1547   // U+FDEC
1548   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1549       ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
1550       "\xef\xb7\xac"));
1551 
1552   // U+FDED
1553   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1554       ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
1555       "\xef\xb7\xad"));
1556 
1557   // U+FDEE
1558   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1559       ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
1560       "\xef\xb7\xae"));
1561 
1562   // U+FDEF
1563   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1564       ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
1565       "\xef\xb7\xaf"));
1566 
1567   // U+FDF0
1568   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1569       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
1570       "\xef\xb7\xb0"));
1571 
1572   // U+FDF1
1573   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1574       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
1575       "\xef\xb7\xb1"));
1576 
1577   // U+FDF2
1578   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1579       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
1580       "\xef\xb7\xb2"));
1581 
1582   // U+FDF3
1583   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1584       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
1585       "\xef\xb7\xb3"));
1586 
1587   // U+FDF4
1588   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1589       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
1590       "\xef\xb7\xb4"));
1591 
1592   // U+FDF5
1593   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1594       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
1595       "\xef\xb7\xb5"));
1596 
1597   // U+FDF6
1598   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1599       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
1600       "\xef\xb7\xb6"));
1601 
1602   // U+FDF7
1603   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1604       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
1605       "\xef\xb7\xb7"));
1606 
1607   // U+FDF8
1608   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1609       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
1610       "\xef\xb7\xb8"));
1611 
1612   // U+FDF9
1613   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1614       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
1615       "\xef\xb7\xb9"));
1616 
1617   // U+FDFA
1618   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1619       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
1620       "\xef\xb7\xba"));
1621 
1622   // U+FDFB
1623   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1624       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
1625       "\xef\xb7\xbb"));
1626 
1627   // U+FDFC
1628   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1629       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
1630       "\xef\xb7\xbc"));
1631 
1632   // U+FDFD
1633   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1634       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
1635       "\xef\xb7\xbd"));
1636 
1637   // U+FDFE
1638   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1639       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
1640       "\xef\xb7\xbe"));
1641 
1642   // U+FDFF
1643   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1644       ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
1645       "\xef\xb7\xbf"));
1646 }
1647 
TEST(ConvertUTFTest,UTF8ToUTF32PartialLenient)1648 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
1649   // U+0041 LATIN CAPITAL LETTER A
1650   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1651       ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
1652       "\x41", true));
1653 
1654   //
1655   // Sequences with one continuation byte missing
1656   //
1657 
1658   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1659       ConvertUTFResultContainer(sourceExhausted),
1660       "\xc2", true));
1661   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1662       ConvertUTFResultContainer(sourceExhausted),
1663       "\xdf", true));
1664   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1665       ConvertUTFResultContainer(sourceExhausted),
1666       "\xe0\xa0", true));
1667   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1668       ConvertUTFResultContainer(sourceExhausted),
1669       "\xe0\xbf", true));
1670   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1671       ConvertUTFResultContainer(sourceExhausted),
1672       "\xe1\x80", true));
1673   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1674       ConvertUTFResultContainer(sourceExhausted),
1675       "\xec\xbf", true));
1676   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1677       ConvertUTFResultContainer(sourceExhausted),
1678       "\xed\x80", true));
1679   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1680       ConvertUTFResultContainer(sourceExhausted),
1681       "\xed\x9f", true));
1682   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1683       ConvertUTFResultContainer(sourceExhausted),
1684       "\xee\x80", true));
1685   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1686       ConvertUTFResultContainer(sourceExhausted),
1687       "\xef\xbf", true));
1688   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1689       ConvertUTFResultContainer(sourceExhausted),
1690       "\xf0\x90\x80", true));
1691   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1692       ConvertUTFResultContainer(sourceExhausted),
1693       "\xf0\xbf\xbf", true));
1694   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1695       ConvertUTFResultContainer(sourceExhausted),
1696       "\xf1\x80\x80", true));
1697   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1698       ConvertUTFResultContainer(sourceExhausted),
1699       "\xf3\xbf\xbf", true));
1700   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1701       ConvertUTFResultContainer(sourceExhausted),
1702       "\xf4\x80\x80", true));
1703   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1704       ConvertUTFResultContainer(sourceExhausted),
1705       "\xf4\x8f\xbf", true));
1706 
1707   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1708       ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
1709       "\x41\xc2", true));
1710 }
1711 
1712