1 /*
2 * Copyright (C) 2013 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_TEST_HARNESS_H_
32 #define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_TEST_HARNESS_H_
33
34 #include <istream>
35 #include <map>
36 #include <stdio.h>
37 #include <string>
38
39 // FIXME: We don't have any business owning this code. We should try to
40 // upstream this to unicode.org if possible (for other implementations to use).
41 // Unicode.org provides a reference implmentation, including parser:
42 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceC/6.3.0/source/brtest.c
43 // But it, like the other implementations I've found, is rather tied to
44 // the algorithms it is testing. This file seeks to only implement the parser
45 // bits.
46
47 // Other C/C++ implementations of this parser:
48 // https://github.com/googlei18n/fribidi-vs-unicode/blob/master/test.c
49 // http://source.icu-project.org/repos/icu/icu/trunk/source/test/intltest/bidiconf.cpp
50 // Both of those are too tied to their respective projects to be use to Blink.
51
52 // There are non-C implmentations to parse BidiTest.txt as well, including:
53 // https://github.com/twitter/twitter-cldr-rb/blob/master/spec/bidi/bidi_spec.rb
54
55 // NOTE: None of this file is currently written to be thread-safe.
56
57 namespace bidi_test {
58
59 enum ParagraphDirection {
60 kDirectionNone = 0,
61 kDirectionAutoLTR = 1,
62 kDirectionLTR = 2,
63 kDirectionRTL = 4,
64 };
65 const int kMaxParagraphDirection =
66 kDirectionAutoLTR | kDirectionLTR | kDirectionRTL;
67
68 // For error printing:
NameFromParagraphDirection(ParagraphDirection paragraph_direction)69 std::string NameFromParagraphDirection(ParagraphDirection paragraph_direction) {
70 switch (paragraph_direction) {
71 case bidi_test::kDirectionAutoLTR:
72 return "Auto-LTR";
73 case bidi_test::kDirectionLTR:
74 return "LTR";
75 case bidi_test::kDirectionRTL:
76 return "RTL";
77 default:
78 // This should never be reached.
79 return "";
80 }
81 }
82
83 template <class Runner>
84 class Harness {
85 public:
Harness(Runner & runner)86 Harness(Runner& runner) : runner_(runner) {}
87 void Parse(std::istream& bidi_test_file);
88
89 private:
90 Runner& runner_;
91 };
92
93 // We could use boost::trim, but no other part of Blink uses boost yet.
Ltrim(std::string & s)94 inline void Ltrim(std::string& s) {
95 static const std::string kSeparators(" \t");
96 s.erase(0, s.find_first_not_of(kSeparators));
97 }
98
Rtrim(std::string & s)99 inline void Rtrim(std::string& s) {
100 static const std::string kSeparators(" \t");
101 size_t last_non_space = s.find_last_not_of(kSeparators);
102 if (last_non_space == std::string::npos) {
103 s.erase();
104 return;
105 }
106 size_t first_space_at_end_of_string = last_non_space + 1;
107 if (first_space_at_end_of_string >= s.size())
108 return; // lastNonSpace was the last char.
109 s.erase(first_space_at_end_of_string,
110 std::string::npos); // erase to the end of the string.
111 }
112
Trim(std::string & s)113 inline void Trim(std::string& s) {
114 Rtrim(s);
115 Ltrim(s);
116 }
117
ParseStringList(const std::string & str)118 static Vector<std::string> ParseStringList(const std::string& str) {
119 Vector<std::string> strings;
120 static const std::string kSeparators(" \t");
121 size_t last_pos = str.find_first_not_of(kSeparators); // skip leading spaces
122 size_t pos = str.find_first_of(kSeparators, last_pos); // find next space
123
124 while (std::string::npos != pos || std::string::npos != last_pos) {
125 strings.push_back(str.substr(last_pos, pos - last_pos));
126 last_pos = str.find_first_not_of(kSeparators, pos);
127 pos = str.find_first_of(kSeparators, last_pos);
128 }
129 return strings;
130 }
131
ParseInt(const std::string & str)132 static int ParseInt(const std::string& str) {
133 return atoi(str.c_str());
134 }
135
ParseIntList(const std::string & str)136 static Vector<int> ParseIntList(const std::string& str) {
137 Vector<int> ints;
138 Vector<std::string> strings = ParseStringList(str);
139 for (size_t x = 0; x < strings.size(); x++) {
140 int i = ParseInt(strings[x]);
141 ints.push_back(i);
142 }
143 return ints;
144 }
145
ParseLevels(const std::string & line)146 static Vector<int> ParseLevels(const std::string& line) {
147 Vector<int> levels;
148 Vector<std::string> strings = ParseStringList(line);
149 for (size_t x = 0; x < strings.size(); x++) {
150 const std::string& level_string = strings[x];
151 int i;
152 if (level_string == "x")
153 i = -1;
154 else
155 i = ParseInt(level_string);
156 levels.push_back(i);
157 }
158 return levels;
159 }
160
161 // This is not thread-safe as written.
ParseTestString(const std::string & line)162 static std::basic_string<UChar> ParseTestString(const std::string& line) {
163 std::basic_string<UChar> test_string;
164 static std::map<std::string, UChar> char_class_examples;
165 if (char_class_examples.empty()) {
166 char_class_examples = {{"L", 0x6c}, // 'l' for L
167 {"R", 0x05D0}, // HEBREW ALEF
168 {"EN", 0x33}, // '3' for EN
169 {"ES", 0x2d}, // '-' for ES
170 {"ET", 0x25}, // '%' for ET
171 {"AN", 0x0660}, // arabic 0
172 {"CS", 0x2c}, // ',' for CS
173 {"B", 0x0A}, // <control-000A>
174 {"S", 0x09}, // <control-0009>
175 {"WS", 0x20}, // ' ' for WS
176 {"ON", 0x3d}, // '=' for ON
177 {"NSM", 0x05BF}, // HEBREW POINT RAFE
178 {"AL", 0x0608}, // ARABIC RAY
179 {"BN", 0x00AD}, // SOFT HYPHEN
180 {"LRE", 0x202A}, {"RLE", 0x202B}, {"PDF", 0x202C},
181 {"LRO", 0x202D}, {"RLO", 0x202E}, {"LRI", 0x2066},
182 {"RLI", 0x2067}, {"FSI", 0x2068}, {"PDI", 0x2069}};
183 }
184
185 Vector<std::string> char_classes = ParseStringList(line);
186 for (size_t i = 0; i < char_classes.size(); i++) {
187 // FIXME: If the lookup failed we could return false for a parse error.
188 test_string.push_back(char_class_examples.find(char_classes[i])->second);
189 }
190 return test_string;
191 }
192
ParseParagraphDirectionMask(const std::string & line,int & mode_mask)193 static bool ParseParagraphDirectionMask(const std::string& line,
194 int& mode_mask) {
195 mode_mask = ParseInt(line);
196 return mode_mask >= 1 && mode_mask <= kMaxParagraphDirection;
197 }
198
ParseError(const std::string & line,size_t line_number)199 static void ParseError(const std::string& line, size_t line_number) {
200 // Use printf to avoid the expense of std::cout.
201 printf("Parse error, line %zu : %s\n", line_number, line.c_str());
202 }
203
204 template <class Runner>
Parse(std::istream & bidi_test_file)205 void Harness<Runner>::Parse(std::istream& bidi_test_file) {
206 static const std::string kLevelsPrefix("@Levels");
207 static const std::string kReorderPrefix("@Reorder");
208
209 // FIXME: UChar is an ICU type and cheating a bit to use here.
210 // uint16_t might be more portable.
211 std::basic_string<UChar> test_string;
212 Vector<int> levels;
213 Vector<int> reorder;
214 int paragraph_direction_mask;
215
216 std::string line;
217 size_t line_number = 0;
218 while (std::getline(bidi_test_file, line)) {
219 line_number++;
220 const std::string original_line = line;
221 size_t comment_start = line.find_first_of('#');
222 if (comment_start != std::string::npos)
223 line = line.substr(0, comment_start);
224 Trim(line);
225 if (line.empty())
226 continue;
227 if (line[0] == '@') {
228 if (!line.find(kLevelsPrefix)) {
229 levels = ParseLevels(line.substr(kLevelsPrefix.length() + 1));
230 continue;
231 }
232 if (!line.find(kReorderPrefix)) {
233 reorder = ParseIntList(line.substr(kReorderPrefix.length() + 1));
234 continue;
235 }
236 } else {
237 // Assume it's a data line.
238 size_t seperator_index = line.find_first_of(';');
239 if (seperator_index == std::string::npos) {
240 ParseError(original_line, line_number);
241 continue;
242 }
243 test_string = ParseTestString(line.substr(0, seperator_index));
244 if (!ParseParagraphDirectionMask(line.substr(seperator_index + 1),
245 paragraph_direction_mask)) {
246 ParseError(original_line, line_number);
247 continue;
248 }
249
250 if (paragraph_direction_mask & kDirectionAutoLTR) {
251 runner_.RunTest(test_string, reorder, levels, kDirectionAutoLTR,
252 original_line, line_number);
253 }
254 if (paragraph_direction_mask & kDirectionLTR) {
255 runner_.RunTest(test_string, reorder, levels, kDirectionLTR,
256 original_line, line_number);
257 }
258 if (paragraph_direction_mask & kDirectionRTL) {
259 runner_.RunTest(test_string, reorder, levels, kDirectionRTL,
260 original_line, line_number);
261 }
262 }
263 }
264 }
265
266 template <class Runner>
267 class CharacterHarness {
268 public:
CharacterHarness(Runner & runner)269 CharacterHarness(Runner& runner) : runner_(runner) {}
270 void Parse(std::istream& bidi_test_file);
271
272 private:
273 Runner& runner_;
274 };
275
ParseUCharHexadecimalList(const std::string & str)276 static std::basic_string<UChar> ParseUCharHexadecimalList(
277 const std::string& str) {
278 std::basic_string<UChar> string;
279 Vector<std::string> strings = ParseStringList(str);
280 for (size_t x = 0; x < strings.size(); x++) {
281 int i = strtol(strings[x].c_str(), nullptr, 16);
282 string.push_back((UChar)i);
283 }
284 return string;
285 }
286
ParseParagraphDirection(const std::string & str)287 static ParagraphDirection ParseParagraphDirection(const std::string& str) {
288 int i = ParseInt(str);
289 switch (i) {
290 case 0:
291 return kDirectionLTR;
292 case 1:
293 return kDirectionRTL;
294 case 2:
295 return kDirectionAutoLTR;
296 default:
297 return kDirectionNone;
298 }
299 }
300
ParseSuppresedChars(const std::string & str)301 static int ParseSuppresedChars(const std::string& str) {
302 Vector<std::string> strings = ParseStringList(str);
303 int suppresed_chars = 0;
304 for (size_t x = 0; x < strings.size(); x++) {
305 if (strings[x] == "x")
306 suppresed_chars++;
307 }
308 return suppresed_chars;
309 }
310
311 template <class Runner>
Parse(std::istream & bidi_test_file)312 void CharacterHarness<Runner>::Parse(std::istream& bidi_test_file) {
313 std::string line;
314 size_t line_number = 0;
315 while (std::getline(bidi_test_file, line)) {
316 line_number++;
317
318 const std::string original_line = line;
319 size_t comment_start = line.find_first_of('#');
320 if (comment_start != std::string::npos)
321 line = line.substr(0, comment_start);
322 Trim(line);
323 if (line.empty())
324 continue;
325
326 // Field 0: list of uchars as 4 char strings
327 size_t separator_index = line.find_first_of(';');
328 if (separator_index == std::string::npos) {
329 ParseError(original_line, line_number);
330 continue;
331 }
332
333 std::basic_string<UChar> test_string =
334 ParseUCharHexadecimalList(line.substr(0, separator_index));
335 if (test_string.empty()) {
336 ParseError(original_line, line_number);
337 continue;
338 }
339 line = line.substr(separator_index + 1);
340
341 // Field 1: paragraph direction (0 LTR, 1 RTL, 2 AutoLTR)
342 separator_index = line.find_first_of(';');
343 if (separator_index == std::string::npos) {
344 ParseError(original_line, line_number);
345 continue;
346 }
347
348 ParagraphDirection paragraph_direction =
349 ParseParagraphDirection(line.substr(0, separator_index));
350 if (paragraph_direction == kDirectionNone) {
351 ParseError(original_line, line_number);
352 continue;
353 }
354 line = line.substr(separator_index + 1);
355
356 // Field 2: resolved paragraph embedding level
357 separator_index = line.find_first_of(';');
358 if (separator_index == std::string::npos) {
359 ParseError(original_line, line_number);
360 continue;
361 }
362
363 int paragraph_embedding_level = ParseInt(line.substr(0, separator_index));
364 if (paragraph_embedding_level < 0) {
365 ParseError(original_line, line_number);
366 continue;
367 }
368 line = line.substr(separator_index + 1);
369
370 // Field 3: List of resolved levels
371 separator_index = line.find_first_of(';');
372 if (separator_index == std::string::npos) {
373 ParseError(original_line, line_number);
374 continue;
375 }
376
377 int supressed_chars = ParseSuppresedChars(line.substr(0, separator_index));
378 Vector<int> levels = ParseLevels(line.substr(0, separator_index));
379 if (test_string.size() != levels.size()) {
380 ParseError(original_line, line_number);
381 continue;
382 }
383 line = line.substr(separator_index + 1);
384
385 // Field 4: visual ordering of characters
386 separator_index = line.find_first_of(';');
387 if (separator_index != std::string::npos) {
388 ParseError(original_line, line_number);
389 continue;
390 }
391
392 Vector<int> visual_ordering = ParseIntList(line);
393 if (test_string.size() - supressed_chars != visual_ordering.size()) {
394 ParseError(original_line, line_number);
395 continue;
396 }
397
398 runner_.RunTest(test_string, visual_ordering, levels, paragraph_direction,
399 original_line, line_number);
400 }
401 }
402
403 } // namespace bidi_test
404
405 #endif // THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_TEST_HARNESS_H_
406