1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef utfstrings_h__
8 #define utfstrings_h__
9 
10 struct UTFStringsStringPair {
11   char16_t m16[16];
12   char m8[16];
13 };
14 
15 static const UTFStringsStringPair ValidStrings[] = {
16     {{'a', 'b', 'c', 'd'}, {'a', 'b', 'c', 'd'}},
17     {{'1', '2', '3', '4'}, {'1', '2', '3', '4'}},
18     {{0x7F, 'A', 0x80, 'B', 0x101, 0x200},
19      {0x7F, 'A', char(0xC2), char(0x80), 'B', char(0xC4), char(0x81),
20       char(0xC8), char(0x80)}},
21     {{0x7FF, 0x800, 0x1000},
22      {char(0xDF), char(0xBF), char(0xE0), char(0xA0), char(0x80), char(0xE1),
23       char(0x80), char(0x80)}},
24     {{0xD7FF, 0xE000, 0xF00F, 'A', 0xFFF0},
25      {char(0xED), char(0x9F), char(0xBF), char(0xEE), char(0x80), char(0x80),
26       char(0xEF), char(0x80), char(0x8F), 'A', char(0xEF), char(0xBF),
27       char(0xB0)}},
28     {{0xFFF7, 0xFFFC, 0xFFFD, 0xFFFD},
29      {char(0xEF), char(0xBF), char(0xB7), char(0xEF), char(0xBF), char(0xBC),
30       char(0xEF), char(0xBF), char(0xBD), char(0xEF), char(0xBF), char(0xBD)}},
31     {{0xD800, 0xDC00, 0xD800, 0xDCFF},
32      {char(0xF0), char(0x90), char(0x80), char(0x80), char(0xF0), char(0x90),
33       char(0x83), char(0xBF)}},
34     {{0xDBFF, 0xDFFF, 0xDBB7, 0xDCBA},
35      {char(0xF4), char(0x8F), char(0xBF), char(0xBF), char(0xF3), char(0xBD),
36       char(0xB2), char(0xBA)}},
37     {{0xFFFD, 0xFFFF},
38      {char(0xEF), char(0xBF), char(0xBD), char(0xEF), char(0xBF), char(0xBF)}},
39     {{0xFFFD, 0xFFFE, 0xFFFF},
40      {char(0xEF), char(0xBF), char(0xBD), char(0xEF), char(0xBF), char(0xBE),
41       char(0xEF), char(0xBF), char(0xBF)}},
42 };
43 
44 static const UTFStringsStringPair Invalid16Strings[] = {
45     {{'a', 'b', 0xD800}, {'a', 'b', char(0xEF), char(0xBF), char(0xBD)}},
46     {{0xD8FF, 'b'}, {char(0xEF), char(0xBF), char(0xBD), 'b'}},
47     {{0xD821}, {char(0xEF), char(0xBF), char(0xBD)}},
48     {{0xDC21}, {char(0xEF), char(0xBF), char(0xBD)}},
49     {{0xDC00, 0xD800, 'b'},
50      {char(0xEF), char(0xBF), char(0xBD), char(0xEF), char(0xBF), char(0xBD),
51       'b'}},
52     {{'b', 0xDC00, 0xD800},
53      {'b', char(0xEF), char(0xBF), char(0xBD), char(0xEF), char(0xBF),
54       char(0xBD)}},
55     {{0xDC00, 0xD800},
56      {char(0xEF), char(0xBF), char(0xBD), char(0xEF), char(0xBF), char(0xBD)}},
57     {{0xDC00, 0xD800, 0xDC00, 0xD800},
58      {char(0xEF), char(0xBF), char(0xBD), char(0xF0), char(0x90), char(0x80),
59       char(0x80), char(0xEF), char(0xBF), char(0xBD)}},
60     {{0xDC00, 0xD800, 0xD800, 0xDC00},
61      {char(0xEF), char(0xBF), char(0xBD), char(0xEF), char(0xBF), char(0xBD),
62       char(0xF0), char(0x90), char(0x80), char(0x80)}},
63 };
64 
65 static const UTFStringsStringPair Invalid8Strings[] = {
66     {{'a', 0xFFFD, 0xFFFD, 'b'}, {'a', char(0xC0), char(0x80), 'b'}},
67     {{0xFFFD, 0xFFFD, 0x80}, {char(0xC1), char(0xBF), char(0xC2), char(0x80)}},
68     {{0xFFFD, 0xFFFD}, {char(0xC1), char(0xBF)}},
69     {{0xFFFD, 0xFFFD, 0xFFFD, 'x', 0x0800},
70      {char(0xE0), char(0x80), char(0x80), 'x', char(0xE0), char(0xA0),
71       char(0x80)}},
72     {{0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'x', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD},
73      {char(0xF0), char(0x80), char(0x80), char(0x80), 'x', char(0xF0),
74       char(0x80), char(0x8F), char(0x80)}},
75     {{0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD},
76      {char(0xF4), char(0x90), char(0x80), char(0x80), char(0xF7), char(0xBF),
77       char(0xBF), char(0xBF)}},
78     {{0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'x', 0xD800, 0xDC00, 0xFFFD, 0xFFFD,
79       0xFFFD, 0xFFFD},
80      {char(0xF0), char(0x8F), char(0xBF), char(0xBF), 'x', char(0xF0),
81       char(0x90), char(0x80), char(0x80), char(0xF0), char(0x8F), char(0xBF),
82       char(0xBF)}},
83     {{0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'x', 0xFFFD, 0xFFFD, 0xFFFD,
84       0xFFFD, 0xFFFD},
85      {char(0xF8), char(0x80), char(0x80), char(0x80), char(0x80), 'x',
86       char(0xF8), char(0x88), char(0x80), char(0x80), char(0x80)}},
87     {{0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
88       0xFFFD, 0xFFFD},
89      {char(0xFB), char(0xBF), char(0xBF), char(0xBF), char(0xBF), char(0xFC),
90       char(0xA0), char(0x80), char(0x80), char(0x80), char(0x80)}},
91     {{0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
92       0xFFFD, 0xFFFD, 0xFFFD},
93      {char(0xFC), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80),
94       char(0xFD), char(0xBF), char(0xBF), char(0xBF), char(0xBF), char(0xBF)}},
95 };
96 
97 static const UTFStringsStringPair Malformed8Strings[] = {
98     {{0xFFFD}, {char(0x80)}},
99     {{'a', 0xFFFD, 'c'}, {'a', char(0xC8), 'c'}},
100     {{'a', 0xFFFD}, {'a', char(0xC8)}},
101     {{'a', 0xFFFD, 'c'}, {'a', char(0xE8), 'c'}},
102     {{'a', 0xFFFD, 'c'}, {'a', char(0xE8), char(0x80), 'c'}},
103     {{'a', 0xFFFD}, {'a', char(0xE8), char(0x80)}},
104     {{0xFFFD, 0x7F, 0xFFFD}, {char(0xE8), 0x7F, char(0x80)}},
105     {{'a', 0xFFFD, 0xFFFD}, {'a', char(0xE8), char(0xE8), char(0x80)}},
106     {{'a', 0xFFFD}, {'a', char(0xF4)}},
107     {{'a', 0xFFFD, 'c', 'c'},
108      {'a', char(0xF4), char(0x80), char(0x80), 'c', 'c'}},
109     {{'a', 0xFFFD, 'x', 0xFFFD},
110      {'a', char(0xF4), char(0x80), 'x', char(0x80)}},
111     {{0xDBC0, 0xDC00, 0xFFFD},
112      {char(0xF4), char(0x80), char(0x80), char(0x80), char(0x80)}},
113     {{'a', 0xFFFD, 'c'}, {'a', char(0xFA), 'c'}},
114     {{'a', 0xFFFD, 0xFFFD, 0xFFFD, 0x7F, 0xFFFD, 'c'},
115      {'a', char(0xFA), char(0x80), char(0x80), 0x7F, char(0x80), 'c'}},
116     {{'a', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'c'},
117      {'a', char(0xFA), char(0x80), char(0x80), char(0x80), char(0x80),
118       char(0x80), 'c'}},
119     {{'a', 0xFFFD}, {'a', char(0xFD)}},
120     {{'a', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'c'},
121      {'a', char(0xFD), char(0x80), char(0x80), char(0x80), char(0x80), 'c'}},
122     {{'a', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD},
123      {'a', char(0xFD), char(0x80), char(0x80), char(0x80), char(0x80),
124       char(0x80), char(0x80)}},
125     {{'a', 0xFFFD, 0xFFFD, 0xFFFD, 0x40, 0xFFFD, 0xFFFD, 'c'},
126      {'a', char(0xFD), char(0x80), char(0x80), 0x40, char(0x80), char(0x80),
127       'c'}},
128 };
129 
130 #endif
131