1 /*
2  * Copyright (C) 2013 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following disclaimer
12  * in the documentation and/or other materials provided with the
13  * distribution.
14  *     * Neither the name of Google Inc. nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_TEST_HARNESS_H_
32 #define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_TEST_HARNESS_H_
33 
34 #include <istream>
35 #include <map>
36 #include <stdio.h>
37 #include <string>
38 
39 // FIXME: We don't have any business owning this code. We should try to
40 // upstream this to unicode.org if possible (for other implementations to use).
41 // Unicode.org provides a reference implmentation, including parser:
42 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceC/6.3.0/source/brtest.c
43 // But it, like the other implementations I've found, is rather tied to
44 // the algorithms it is testing. This file seeks to only implement the parser
45 // bits.
46 
47 // Other C/C++ implementations of this parser:
48 // https://github.com/googlei18n/fribidi-vs-unicode/blob/master/test.c
49 // http://source.icu-project.org/repos/icu/icu/trunk/source/test/intltest/bidiconf.cpp
50 // Both of those are too tied to their respective projects to be use to Blink.
51 
52 // There are non-C implmentations to parse BidiTest.txt as well, including:
53 // https://github.com/twitter/twitter-cldr-rb/blob/master/spec/bidi/bidi_spec.rb
54 
55 // NOTE: None of this file is currently written to be thread-safe.
56 
57 namespace bidi_test {
58 
59 enum ParagraphDirection {
60   kDirectionNone = 0,
61   kDirectionAutoLTR = 1,
62   kDirectionLTR = 2,
63   kDirectionRTL = 4,
64 };
65 const int kMaxParagraphDirection =
66     kDirectionAutoLTR | kDirectionLTR | kDirectionRTL;
67 
68 // For error printing:
NameFromParagraphDirection(ParagraphDirection paragraph_direction)69 std::string NameFromParagraphDirection(ParagraphDirection paragraph_direction) {
70   switch (paragraph_direction) {
71     case bidi_test::kDirectionAutoLTR:
72       return "Auto-LTR";
73     case bidi_test::kDirectionLTR:
74       return "LTR";
75     case bidi_test::kDirectionRTL:
76       return "RTL";
77     default:
78       // This should never be reached.
79       return "";
80   }
81 }
82 
83 template <class Runner>
84 class Harness {
85  public:
Harness(Runner & runner)86   Harness(Runner& runner) : runner_(runner) {}
87   void Parse(std::istream& bidi_test_file);
88 
89  private:
90   Runner& runner_;
91 };
92 
93 // We could use boost::trim, but no other part of Blink uses boost yet.
Ltrim(std::string & s)94 inline void Ltrim(std::string& s) {
95   static const std::string kSeparators(" \t");
96   s.erase(0, s.find_first_not_of(kSeparators));
97 }
98 
Rtrim(std::string & s)99 inline void Rtrim(std::string& s) {
100   static const std::string kSeparators(" \t");
101   size_t last_non_space = s.find_last_not_of(kSeparators);
102   if (last_non_space == std::string::npos) {
103     s.erase();
104     return;
105   }
106   size_t first_space_at_end_of_string = last_non_space + 1;
107   if (first_space_at_end_of_string >= s.size())
108     return;  // lastNonSpace was the last char.
109   s.erase(first_space_at_end_of_string,
110           std::string::npos);  // erase to the end of the string.
111 }
112 
Trim(std::string & s)113 inline void Trim(std::string& s) {
114   Rtrim(s);
115   Ltrim(s);
116 }
117 
ParseStringList(const std::string & str)118 static Vector<std::string> ParseStringList(const std::string& str) {
119   Vector<std::string> strings;
120   static const std::string kSeparators(" \t");
121   size_t last_pos = str.find_first_not_of(kSeparators);   // skip leading spaces
122   size_t pos = str.find_first_of(kSeparators, last_pos);  // find next space
123 
124   while (std::string::npos != pos || std::string::npos != last_pos) {
125     strings.push_back(str.substr(last_pos, pos - last_pos));
126     last_pos = str.find_first_not_of(kSeparators, pos);
127     pos = str.find_first_of(kSeparators, last_pos);
128   }
129   return strings;
130 }
131 
ParseInt(const std::string & str)132 static int ParseInt(const std::string& str) {
133   return atoi(str.c_str());
134 }
135 
ParseIntList(const std::string & str)136 static Vector<int> ParseIntList(const std::string& str) {
137   Vector<int> ints;
138   Vector<std::string> strings = ParseStringList(str);
139   for (size_t x = 0; x < strings.size(); x++) {
140     int i = ParseInt(strings[x]);
141     ints.push_back(i);
142   }
143   return ints;
144 }
145 
ParseLevels(const std::string & line)146 static Vector<int> ParseLevels(const std::string& line) {
147   Vector<int> levels;
148   Vector<std::string> strings = ParseStringList(line);
149   for (size_t x = 0; x < strings.size(); x++) {
150     const std::string& level_string = strings[x];
151     int i;
152     if (level_string == "x")
153       i = -1;
154     else
155       i = ParseInt(level_string);
156     levels.push_back(i);
157   }
158   return levels;
159 }
160 
161 // This is not thread-safe as written.
ParseTestString(const std::string & line)162 static std::basic_string<UChar> ParseTestString(const std::string& line) {
163   std::basic_string<UChar> test_string;
164   static std::map<std::string, UChar> char_class_examples;
165   if (char_class_examples.empty()) {
166     char_class_examples = {{"L", 0x6c},      // 'l' for L
167                            {"R", 0x05D0},    // HEBREW ALEF
168                            {"EN", 0x33},     // '3' for EN
169                            {"ES", 0x2d},     // '-' for ES
170                            {"ET", 0x25},     // '%' for ET
171                            {"AN", 0x0660},   // arabic 0
172                            {"CS", 0x2c},     // ',' for CS
173                            {"B", 0x0A},      // <control-000A>
174                            {"S", 0x09},      // <control-0009>
175                            {"WS", 0x20},     // ' ' for WS
176                            {"ON", 0x3d},     // '=' for ON
177                            {"NSM", 0x05BF},  // HEBREW POINT RAFE
178                            {"AL", 0x0608},   // ARABIC RAY
179                            {"BN", 0x00AD},   // SOFT HYPHEN
180                            {"LRE", 0x202A}, {"RLE", 0x202B}, {"PDF", 0x202C},
181                            {"LRO", 0x202D}, {"RLO", 0x202E}, {"LRI", 0x2066},
182                            {"RLI", 0x2067}, {"FSI", 0x2068}, {"PDI", 0x2069}};
183   }
184 
185   Vector<std::string> char_classes = ParseStringList(line);
186   for (size_t i = 0; i < char_classes.size(); i++) {
187     // FIXME: If the lookup failed we could return false for a parse error.
188     test_string.push_back(char_class_examples.find(char_classes[i])->second);
189   }
190   return test_string;
191 }
192 
ParseParagraphDirectionMask(const std::string & line,int & mode_mask)193 static bool ParseParagraphDirectionMask(const std::string& line,
194                                         int& mode_mask) {
195   mode_mask = ParseInt(line);
196   return mode_mask >= 1 && mode_mask <= kMaxParagraphDirection;
197 }
198 
ParseError(const std::string & line,size_t line_number)199 static void ParseError(const std::string& line, size_t line_number) {
200   // Use printf to avoid the expense of std::cout.
201   printf("Parse error, line %zu : %s\n", line_number, line.c_str());
202 }
203 
204 template <class Runner>
Parse(std::istream & bidi_test_file)205 void Harness<Runner>::Parse(std::istream& bidi_test_file) {
206   static const std::string kLevelsPrefix("@Levels");
207   static const std::string kReorderPrefix("@Reorder");
208 
209   // FIXME: UChar is an ICU type and cheating a bit to use here.
210   // uint16_t might be more portable.
211   std::basic_string<UChar> test_string;
212   Vector<int> levels;
213   Vector<int> reorder;
214   int paragraph_direction_mask;
215 
216   std::string line;
217   size_t line_number = 0;
218   while (std::getline(bidi_test_file, line)) {
219     line_number++;
220     const std::string original_line = line;
221     size_t comment_start = line.find_first_of('#');
222     if (comment_start != std::string::npos)
223       line = line.substr(0, comment_start);
224     Trim(line);
225     if (line.empty())
226       continue;
227     if (line[0] == '@') {
228       if (!line.find(kLevelsPrefix)) {
229         levels = ParseLevels(line.substr(kLevelsPrefix.length() + 1));
230         continue;
231       }
232       if (!line.find(kReorderPrefix)) {
233         reorder = ParseIntList(line.substr(kReorderPrefix.length() + 1));
234         continue;
235       }
236     } else {
237       // Assume it's a data line.
238       size_t seperator_index = line.find_first_of(';');
239       if (seperator_index == std::string::npos) {
240         ParseError(original_line, line_number);
241         continue;
242       }
243       test_string = ParseTestString(line.substr(0, seperator_index));
244       if (!ParseParagraphDirectionMask(line.substr(seperator_index + 1),
245                                        paragraph_direction_mask)) {
246         ParseError(original_line, line_number);
247         continue;
248       }
249 
250       if (paragraph_direction_mask & kDirectionAutoLTR) {
251         runner_.RunTest(test_string, reorder, levels, kDirectionAutoLTR,
252                          original_line, line_number);
253       }
254       if (paragraph_direction_mask & kDirectionLTR) {
255         runner_.RunTest(test_string, reorder, levels, kDirectionLTR,
256                          original_line, line_number);
257       }
258       if (paragraph_direction_mask & kDirectionRTL) {
259         runner_.RunTest(test_string, reorder, levels, kDirectionRTL,
260                          original_line, line_number);
261       }
262     }
263   }
264 }
265 
266 template <class Runner>
267 class CharacterHarness {
268  public:
CharacterHarness(Runner & runner)269   CharacterHarness(Runner& runner) : runner_(runner) {}
270   void Parse(std::istream& bidi_test_file);
271 
272  private:
273   Runner& runner_;
274 };
275 
ParseUCharHexadecimalList(const std::string & str)276 static std::basic_string<UChar> ParseUCharHexadecimalList(
277     const std::string& str) {
278   std::basic_string<UChar> string;
279   Vector<std::string> strings = ParseStringList(str);
280   for (size_t x = 0; x < strings.size(); x++) {
281     int i = strtol(strings[x].c_str(), nullptr, 16);
282     string.push_back((UChar)i);
283   }
284   return string;
285 }
286 
ParseParagraphDirection(const std::string & str)287 static ParagraphDirection ParseParagraphDirection(const std::string& str) {
288   int i = ParseInt(str);
289   switch (i) {
290     case 0:
291       return kDirectionLTR;
292     case 1:
293       return kDirectionRTL;
294     case 2:
295       return kDirectionAutoLTR;
296     default:
297       return kDirectionNone;
298   }
299 }
300 
ParseSuppresedChars(const std::string & str)301 static int ParseSuppresedChars(const std::string& str) {
302   Vector<std::string> strings = ParseStringList(str);
303   int suppresed_chars = 0;
304   for (size_t x = 0; x < strings.size(); x++) {
305     if (strings[x] == "x")
306       suppresed_chars++;
307   }
308   return suppresed_chars;
309 }
310 
311 template <class Runner>
Parse(std::istream & bidi_test_file)312 void CharacterHarness<Runner>::Parse(std::istream& bidi_test_file) {
313   std::string line;
314   size_t line_number = 0;
315   while (std::getline(bidi_test_file, line)) {
316     line_number++;
317 
318     const std::string original_line = line;
319     size_t comment_start = line.find_first_of('#');
320     if (comment_start != std::string::npos)
321       line = line.substr(0, comment_start);
322     Trim(line);
323     if (line.empty())
324       continue;
325 
326     // Field 0: list of uchars as 4 char strings
327     size_t separator_index = line.find_first_of(';');
328     if (separator_index == std::string::npos) {
329       ParseError(original_line, line_number);
330       continue;
331     }
332 
333     std::basic_string<UChar> test_string =
334         ParseUCharHexadecimalList(line.substr(0, separator_index));
335     if (test_string.empty()) {
336       ParseError(original_line, line_number);
337       continue;
338     }
339     line = line.substr(separator_index + 1);
340 
341     // Field 1: paragraph direction (0 LTR, 1 RTL, 2 AutoLTR)
342     separator_index = line.find_first_of(';');
343     if (separator_index == std::string::npos) {
344       ParseError(original_line, line_number);
345       continue;
346     }
347 
348     ParagraphDirection paragraph_direction =
349         ParseParagraphDirection(line.substr(0, separator_index));
350     if (paragraph_direction == kDirectionNone) {
351       ParseError(original_line, line_number);
352       continue;
353     }
354     line = line.substr(separator_index + 1);
355 
356     // Field 2: resolved paragraph embedding level
357     separator_index = line.find_first_of(';');
358     if (separator_index == std::string::npos) {
359       ParseError(original_line, line_number);
360       continue;
361     }
362 
363     int paragraph_embedding_level = ParseInt(line.substr(0, separator_index));
364     if (paragraph_embedding_level < 0) {
365       ParseError(original_line, line_number);
366       continue;
367     }
368     line = line.substr(separator_index + 1);
369 
370     // Field 3: List of resolved levels
371     separator_index = line.find_first_of(';');
372     if (separator_index == std::string::npos) {
373       ParseError(original_line, line_number);
374       continue;
375     }
376 
377     int supressed_chars = ParseSuppresedChars(line.substr(0, separator_index));
378     Vector<int> levels = ParseLevels(line.substr(0, separator_index));
379     if (test_string.size() != levels.size()) {
380       ParseError(original_line, line_number);
381       continue;
382     }
383     line = line.substr(separator_index + 1);
384 
385     // Field 4: visual ordering of characters
386     separator_index = line.find_first_of(';');
387     if (separator_index != std::string::npos) {
388       ParseError(original_line, line_number);
389       continue;
390     }
391 
392     Vector<int> visual_ordering = ParseIntList(line);
393     if (test_string.size() - supressed_chars != visual_ordering.size()) {
394       ParseError(original_line, line_number);
395       continue;
396     }
397 
398     runner_.RunTest(test_string, visual_ordering, levels, paragraph_direction,
399                      original_line, line_number);
400   }
401 }
402 
403 }  // namespace bidi_test
404 
405 #endif  // THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_TEST_HARNESS_H_
406