1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/blink/renderer/core/editing/state_machines/forward_grapheme_boundary_state_machine.h"
6
7 #include "testing/gtest/include/gtest/gtest.h"
8 #include "third_party/blink/renderer/core/editing/state_machines/state_machine_test_util.h"
9 #include "third_party/blink/renderer/platform/wtf/text/character_names.h"
10
11 namespace blink {
12
13 namespace forward_grapheme_boundary_state_machine_test {
14
15 // Notations:
16 // | indicates inidicates initial offset position.
17 // SOT indicates start of text.
18 // EOT indicates end of text.
19 // [Lead] indicates broken lonely lead surrogate.
20 // [Trail] indicates broken lonely trail surrogate.
21 // [U] indicates regional indicator symbol U.
22 // [S] indicates regional indicator symbol S.
23
24 // kWatch kVS16, kEye kVS16 are valid standardized variants.
25 const UChar32 kWatch = 0x231A;
26 const UChar32 kEye = WTF::unicode::kEyeCharacter;
27 const UChar32 kVS16 = 0xFE0F;
28
29 // kHanBMP KVS17, kHanSIP kVS17 are valie IVD sequences.
30 const UChar32 kHanBMP = 0x845B;
31 const UChar32 kHanSIP = 0x20000;
32 const UChar32 kVS17 = 0xE0100;
33
34 // Following lead/trail values are used for invalid surrogate pairs.
35 const UChar kLead = 0xD83D;
36 const UChar kTrail = 0xDC66;
37
38 // U+1F1FA is REGIONAL INDICATOR SYMBOL LETTER U
39 const UChar32 kRisU = 0x1F1FA;
40 // U+1F1F8 is REGIONAL INDICATOR SYMBOL LETTER S
41 const UChar32 kRisS = 0x1F1F8;
42
43 class ForwardGraphemeBoundaryStatemachineTest
44 : public GraphemeStateMachineTestBase {
45 protected:
46 ForwardGraphemeBoundaryStatemachineTest() = default;
47 ~ForwardGraphemeBoundaryStatemachineTest() override = default;
48
49 private:
50 DISALLOW_COPY_AND_ASSIGN(ForwardGraphemeBoundaryStatemachineTest);
51 };
52
TEST_F(ForwardGraphemeBoundaryStatemachineTest,DoNothingCase)53 TEST_F(ForwardGraphemeBoundaryStatemachineTest, DoNothingCase) {
54 ForwardGraphemeBoundaryStateMachine machine;
55
56 EXPECT_EQ(0, machine.FinalizeAndGetBoundaryOffset());
57 EXPECT_EQ(0, machine.FinalizeAndGetBoundaryOffset());
58 }
59
TEST_F(ForwardGraphemeBoundaryStatemachineTest,PrecedingText)60 TEST_F(ForwardGraphemeBoundaryStatemachineTest, PrecedingText) {
61 ForwardGraphemeBoundaryStateMachine machine;
62 // Preceding text should not affect the result except for flags.
63 // SOT + | + 'a' + 'a'
64 EXPECT_EQ("SRF", ProcessSequenceForward(&machine, AsCodePoints(),
65 AsCodePoints('a', 'a')));
66 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
67 // SOT + [U] + | + 'a' + 'a'
68 EXPECT_EQ("RRSRF", ProcessSequenceForward(&machine, AsCodePoints(kRisU),
69 AsCodePoints('a', 'a')));
70 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
71 // SOT + [U] + [S] + | + 'a' + 'a'
72 EXPECT_EQ("RRRRSRF",
73 ProcessSequenceForward(&machine, AsCodePoints(kRisU, kRisS),
74 AsCodePoints('a', 'a')));
75 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
76
77 // U+0000 + | + 'a' + 'a'
78 EXPECT_EQ("SRF", ProcessSequenceForward(&machine, AsCodePoints(0),
79 AsCodePoints('a', 'a')));
80 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
81 // U+0000 + [U] + | + 'a' + 'a'
82 EXPECT_EQ("RRSRF", ProcessSequenceForward(&machine, AsCodePoints(0, kRisU),
83 AsCodePoints('a', 'a')));
84 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
85 // U+0000 + [U] + [S] + | + 'a' + 'a'
86 EXPECT_EQ("RRRRSRF",
87 ProcessSequenceForward(&machine, AsCodePoints(0, kRisU, kRisS),
88 AsCodePoints('a', 'a')));
89 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
90
91 // 'a' + | + 'a' + 'a'
92 EXPECT_EQ("SRF", ProcessSequenceForward(&machine, AsCodePoints('a'),
93 AsCodePoints('a', 'a')));
94 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
95 // 'a' + [U] + | + 'a' + 'a'
96 EXPECT_EQ("RRSRF", ProcessSequenceForward(&machine, AsCodePoints('a', kRisU),
97 AsCodePoints('a', 'a')));
98 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
99 // 'a' + [U] + [S] + | + 'a' + 'a'
100 EXPECT_EQ("RRRRSRF",
101 ProcessSequenceForward(&machine, AsCodePoints('a', kRisU, kRisS),
102 AsCodePoints('a', 'a')));
103 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
104
105 // U+1F441 + | + 'a' + 'a'
106 EXPECT_EQ("RSRF", ProcessSequenceForward(&machine, AsCodePoints(kEye),
107 AsCodePoints('a', 'a')));
108 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
109 // U+1F441 + [U] + | + 'a' + 'a'
110 EXPECT_EQ("RRRSRF",
111 ProcessSequenceForward(&machine, AsCodePoints(kEye, kRisU),
112 AsCodePoints('a', 'a')));
113 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
114 // U+1F441 + [U] + [S] + | + 'a' + 'a'
115 EXPECT_EQ("RRRRRSRF",
116 ProcessSequenceForward(&machine, AsCodePoints(kEye, kRisU, kRisS),
117 AsCodePoints('a', 'a')));
118 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
119
120 // Broken surrogates in preceding text.
121
122 // [Lead] + | + 'a' + 'a'
123 EXPECT_EQ("SRF", ProcessSequenceForward(&machine, AsCodePoints(kLead),
124 AsCodePoints('a', 'a')));
125 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
126 // [Lead] + [U] + | + 'a' + 'a'
127 EXPECT_EQ("RRSRF",
128 ProcessSequenceForward(&machine, AsCodePoints(kLead, kRisU),
129 AsCodePoints('a', 'a')));
130 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
131 // [Lead] + [U] + [S] + | + 'a' + 'a'
132 EXPECT_EQ("RRRRSRF",
133 ProcessSequenceForward(&machine, AsCodePoints(kLead, kRisU, kRisS),
134 AsCodePoints('a', 'a')));
135 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
136
137 // 'a' + [Trail] + | + 'a' + 'a'
138 EXPECT_EQ("RSRF", ProcessSequenceForward(&machine, AsCodePoints('a', kTrail),
139 AsCodePoints('a', 'a')));
140 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
141 // 'a' + [Trail] + [U] + | + 'a' + 'a'
142 EXPECT_EQ("RRRSRF",
143 ProcessSequenceForward(&machine, AsCodePoints('a', kTrail, kRisU),
144 AsCodePoints('a', 'a')));
145 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
146 // 'a' + [Trail] + [U] + [S] + | + 'a' + 'a'
147 EXPECT_EQ("RRRRRSRF", ProcessSequenceForward(
148 &machine, AsCodePoints('a', kTrail, kRisU, kRisS),
149 AsCodePoints('a', 'a')));
150 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
151
152 // [Trail] + [Trail] + | + 'a' + 'a'
153 EXPECT_EQ("RSRF",
154 ProcessSequenceForward(&machine, AsCodePoints(kTrail, kTrail),
155 AsCodePoints('a', 'a')));
156 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
157 // [Trail] + [Trail] + [U] + | + 'a' + 'a'
158 EXPECT_EQ("RRRSRF", ProcessSequenceForward(
159 &machine, AsCodePoints(kTrail, kTrail, kRisU),
160 AsCodePoints('a', 'a')));
161 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
162 // [Trail] + [Trail] + [U] + [S] + | + 'a' + 'a'
163 EXPECT_EQ("RRRRRSRF",
164 ProcessSequenceForward(&machine,
165 AsCodePoints(kTrail, kTrail, kRisU, kRisS),
166 AsCodePoints('a', 'a')));
167 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
168
169 // SOT + [Trail] + | + 'a' + 'a'
170 EXPECT_EQ("RSRF", ProcessSequenceForward(&machine, AsCodePoints(kTrail),
171 AsCodePoints('a', 'a')));
172 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
173 // SOT + [Trail] + [U] + | + 'a' + 'a'
174 EXPECT_EQ("RRRSRF",
175 ProcessSequenceForward(&machine, AsCodePoints(kTrail, kRisU),
176 AsCodePoints('a', 'a')));
177 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
178 // SOT + [Trail] + [U] + [S] + | + 'a' + 'a'
179 EXPECT_EQ("RRRRRSRF",
180 ProcessSequenceForward(&machine, AsCodePoints(kTrail, kRisU, kRisS),
181 AsCodePoints('a', 'a')));
182 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
183 }
184
TEST_F(ForwardGraphemeBoundaryStatemachineTest,BrokenSurrogatePair)185 TEST_F(ForwardGraphemeBoundaryStatemachineTest, BrokenSurrogatePair) {
186 ForwardGraphemeBoundaryStateMachine machine;
187 // SOT + | + [Trail]
188 EXPECT_EQ("SF", ProcessSequenceForward(&machine, AsCodePoints(),
189 AsCodePoints(kTrail)));
190 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
191 // SOT + | + [Lead] + 'a'
192 EXPECT_EQ("SRF", ProcessSequenceForward(&machine, AsCodePoints(),
193 AsCodePoints(kLead, 'a')));
194 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
195 // SOT + | + [Lead] + [Lead]
196 EXPECT_EQ("SRF", ProcessSequenceForward(&machine, AsCodePoints(),
197 AsCodePoints(kLead, kLead)));
198 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
199 // SOT + | + [Lead] + EOT
200 EXPECT_EQ("SR", ProcessSequenceForward(&machine, AsCodePoints(),
201 AsCodePoints(kLead)));
202 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
203 }
204
TEST_F(ForwardGraphemeBoundaryStatemachineTest,BreakImmediately_BMP)205 TEST_F(ForwardGraphemeBoundaryStatemachineTest, BreakImmediately_BMP) {
206 ForwardGraphemeBoundaryStateMachine machine;
207
208 // SOT + | + U+0000 + U+0000
209 EXPECT_EQ("SRF", ProcessSequenceForward(&machine, AsCodePoints(),
210 AsCodePoints(0, 0)));
211 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
212
213 // SOT + | + 'a' + 'a'
214 EXPECT_EQ("SRF", ProcessSequenceForward(&machine, AsCodePoints(),
215 AsCodePoints('a', 'a')));
216 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
217
218 // SOT + | + 'a' + U+1F441
219 EXPECT_EQ("SRRF", ProcessSequenceForward(&machine, AsCodePoints(),
220 AsCodePoints('a', kEye)));
221 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
222
223 // SOT + | + 'a' + EOT
224 EXPECT_EQ("SR", ProcessSequenceForward(&machine, AsCodePoints(),
225 AsCodePoints('a')));
226 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
227
228 // SOT + | + 'a' + [Trail]
229 EXPECT_EQ("SRF", ProcessSequenceForward(&machine, AsCodePoints(),
230 AsCodePoints('a', kTrail)));
231 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
232
233 // SOT + | + 'a' + [Lead] + 'a'
234 EXPECT_EQ("SRRF", ProcessSequenceForward(&machine, AsCodePoints(),
235 AsCodePoints('a', kLead, 'a')));
236 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
237
238 // SOT + | + 'a' + [Lead] + [Lead]
239 EXPECT_EQ("SRRF", ProcessSequenceForward(&machine, AsCodePoints(),
240 AsCodePoints('a', kLead, kLead)));
241 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
242
243 // SOT + | + 'a' + [Lead] + EOT
244 EXPECT_EQ("SRR", ProcessSequenceForward(&machine, AsCodePoints(),
245 AsCodePoints('a', kLead)));
246 EXPECT_EQ(1, machine.FinalizeAndGetBoundaryOffset());
247 }
248
TEST_F(ForwardGraphemeBoundaryStatemachineTest,BreakImmediately_Supplementary)249 TEST_F(ForwardGraphemeBoundaryStatemachineTest,
250 BreakImmediately_Supplementary) {
251 ForwardGraphemeBoundaryStateMachine machine;
252
253 // SOT + | + U+1F441 + 'a'
254 EXPECT_EQ("SRRF", ProcessSequenceForward(&machine, AsCodePoints(),
255 AsCodePoints(kEye, 'a')));
256 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
257
258 // SOT + | + U+1F441 + U+1F441
259 EXPECT_EQ("SRRRF", ProcessSequenceForward(&machine, AsCodePoints(),
260 AsCodePoints(kEye, kEye)));
261 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
262
263 // SOT + | + U+1F441 + EOT
264 EXPECT_EQ("SRR", ProcessSequenceForward(&machine, AsCodePoints(),
265 AsCodePoints(kEye)));
266 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
267
268 // SOT + | + U+1F441 + [Trail]
269 EXPECT_EQ("SRRF", ProcessSequenceForward(&machine, AsCodePoints(),
270 AsCodePoints(kEye, kTrail)));
271 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
272
273 // SOT + | + U+1F441 + [Lead] + 'a'
274 EXPECT_EQ("SRRRF", ProcessSequenceForward(&machine, AsCodePoints(),
275 AsCodePoints(kEye, kLead, 'a')));
276 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
277
278 // SOT + | + U+1F441 + [Lead] + [Lead]
279 EXPECT_EQ("SRRRF", ProcessSequenceForward(&machine, AsCodePoints(),
280 AsCodePoints(kEye, kLead, kLead)));
281 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
282
283 // SOT + | + U+1F441 + [Lead] + EOT
284 EXPECT_EQ("SRRR", ProcessSequenceForward(&machine, AsCodePoints(),
285 AsCodePoints(kEye, kLead)));
286 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
287 }
288
TEST_F(ForwardGraphemeBoundaryStatemachineTest,NotBreakImmediatelyAfter_BMP_BMP)289 TEST_F(ForwardGraphemeBoundaryStatemachineTest,
290 NotBreakImmediatelyAfter_BMP_BMP) {
291 ForwardGraphemeBoundaryStateMachine machine;
292
293 // SOT + | + U+231A + U+FE0F + 'a'
294 EXPECT_EQ("SRRF", ProcessSequenceForward(&machine, AsCodePoints(),
295 AsCodePoints(kWatch, kVS16, 'a')));
296 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
297
298 // SOT + | + U+231A + U+FE0F + U+1F441
299 EXPECT_EQ("SRRRF", ProcessSequenceForward(&machine, AsCodePoints(),
300 AsCodePoints(kWatch, kVS16, kEye)));
301 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
302
303 // SOT + | + U+231A + U+FE0F + EOT
304 EXPECT_EQ("SRR", ProcessSequenceForward(&machine, AsCodePoints(),
305 AsCodePoints(kWatch, kVS16)));
306 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
307
308 // SOT + | + U+231A + U+FE0F + [Trail]
309 EXPECT_EQ("SRRF",
310 ProcessSequenceForward(&machine, AsCodePoints(),
311 AsCodePoints(kWatch, kVS16, kTrail)));
312 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
313
314 // SOT + | + U+231A + U+FE0F + [Lead] + 'a'
315 EXPECT_EQ("SRRRF",
316 ProcessSequenceForward(&machine, AsCodePoints(),
317 AsCodePoints(kWatch, kVS16, kLead, 'a')));
318 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
319
320 // SOT + | + U+231A + U+FE0F + [Lead] + [Lead]
321 EXPECT_EQ("SRRRF",
322 ProcessSequenceForward(&machine, AsCodePoints(),
323 AsCodePoints(kWatch, kVS16, kLead, kLead)));
324 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
325
326 // SOT + | + U+231A + U+FE0F + [Lead] + EOT
327 EXPECT_EQ("SRRR", ProcessSequenceForward(&machine, AsCodePoints(),
328 AsCodePoints(kWatch, kVS16, kLead)));
329 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
330 }
331
TEST_F(ForwardGraphemeBoundaryStatemachineTest,NotBreakImmediatelyAfter_Supplementary_BMP)332 TEST_F(ForwardGraphemeBoundaryStatemachineTest,
333 NotBreakImmediatelyAfter_Supplementary_BMP) {
334 ForwardGraphemeBoundaryStateMachine machine;
335
336 // SOT + | + U+1F441 + U+FE0F + 'a'
337 EXPECT_EQ("SRRRF", ProcessSequenceForward(&machine, AsCodePoints(),
338 AsCodePoints(kEye, kVS16, 'a')));
339 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
340
341 // SOT + | + U+1F441 + U+FE0F + U+1F441
342 EXPECT_EQ("SRRRRF", ProcessSequenceForward(&machine, AsCodePoints(),
343 AsCodePoints(kEye, kVS16, kEye)));
344 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
345
346 // SOT + | + U+1F441 + U+FE0F + EOT
347 EXPECT_EQ("SRRR", ProcessSequenceForward(&machine, AsCodePoints(),
348 AsCodePoints(kEye, kVS16)));
349 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
350
351 // SOT + | + U+1F441 + U+FE0F + [Trail]
352 EXPECT_EQ("SRRRF", ProcessSequenceForward(&machine, AsCodePoints(),
353 AsCodePoints(kEye, kVS16, kTrail)));
354 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
355
356 // SOT + | + U+1F441 + U+FE0F + [Lead] + 'a'
357 EXPECT_EQ("SRRRRF",
358 ProcessSequenceForward(&machine, AsCodePoints(),
359 AsCodePoints(kEye, kVS16, kLead, 'a')));
360 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
361
362 // SOT + | + U+1F441 + U+FE0F + [Lead] + [Lead]
363 EXPECT_EQ("SRRRRF",
364 ProcessSequenceForward(&machine, AsCodePoints(),
365 AsCodePoints(kEye, kVS16, kLead, kLead)));
366 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
367
368 // SOT + | + U+1F441 + U+FE0F + [Lead] + EOT
369 EXPECT_EQ("SRRRR", ProcessSequenceForward(&machine, AsCodePoints(),
370 AsCodePoints(kEye, kVS16, kLead)));
371 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
372 }
373
TEST_F(ForwardGraphemeBoundaryStatemachineTest,NotBreakImmediatelyAfter_BMP_Supplementary)374 TEST_F(ForwardGraphemeBoundaryStatemachineTest,
375 NotBreakImmediatelyAfter_BMP_Supplementary) {
376 ForwardGraphemeBoundaryStateMachine machine;
377
378 // SOT + | + U+845B + U+E0100 + 'a'
379 EXPECT_EQ("SRRRF", ProcessSequenceForward(&machine, AsCodePoints(),
380 AsCodePoints(kHanBMP, kVS17, 'a')));
381 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
382
383 // SOT + | + U+845B + U+E0100 + U+1F441
384 EXPECT_EQ("SRRRRF",
385 ProcessSequenceForward(&machine, AsCodePoints(),
386 AsCodePoints(kHanBMP, kVS17, kEye)));
387 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
388
389 // SOT + | + U+845B + U+E0100 + EOT
390 EXPECT_EQ("SRRR", ProcessSequenceForward(&machine, AsCodePoints(),
391 AsCodePoints(kHanBMP, kVS17)));
392 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
393
394 // SOT + | + U+845B + U+E0100 + [Trail]
395 EXPECT_EQ("SRRRF",
396 ProcessSequenceForward(&machine, AsCodePoints(),
397 AsCodePoints(kHanBMP, kVS17, kTrail)));
398 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
399
400 // SOT + | + U+845B + U+E0100 + [Lead] + 'a'
401 EXPECT_EQ("SRRRRF",
402 ProcessSequenceForward(&machine, AsCodePoints(),
403 AsCodePoints(kHanBMP, kVS17, kLead, 'a')));
404 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
405
406 // SOT + | + U+845B + U+E0100 + [Lead] + [Lead]
407 EXPECT_EQ("SRRRRF",
408 ProcessSequenceForward(&machine, AsCodePoints(),
409 AsCodePoints(kHanBMP, kVS17, kLead, kLead)));
410 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
411
412 // SOT + | + U+845B + U+E0100 + [Lead] + EOT
413 EXPECT_EQ("SRRRR",
414 ProcessSequenceForward(&machine, AsCodePoints(),
415 AsCodePoints(kHanBMP, kVS17, kLead)));
416 EXPECT_EQ(3, machine.FinalizeAndGetBoundaryOffset());
417 }
418
TEST_F(ForwardGraphemeBoundaryStatemachineTest,NotBreakImmediatelyAfter_Supplementary_Supplementary)419 TEST_F(ForwardGraphemeBoundaryStatemachineTest,
420 NotBreakImmediatelyAfter_Supplementary_Supplementary) {
421 ForwardGraphemeBoundaryStateMachine machine;
422
423 // SOT + | + U+20000 + U+E0100 + 'a'
424 EXPECT_EQ("SRRRRF",
425 ProcessSequenceForward(&machine, AsCodePoints(),
426 AsCodePoints(kHanSIP, kVS17, 'a')));
427 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
428
429 // SOT + | + U+20000 + U+E0100 + U+1F441
430 EXPECT_EQ("SRRRRRF",
431 ProcessSequenceForward(&machine, AsCodePoints(),
432 AsCodePoints(kHanSIP, kVS17, kEye)));
433 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
434
435 // SOT + | + U+20000 + U+E0100 + EOT
436 EXPECT_EQ("SRRRR", ProcessSequenceForward(&machine, AsCodePoints(),
437 AsCodePoints(kHanSIP, kVS17)));
438 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
439
440 // SOT + | + U+20000 + U+E0100 + [Trail]
441 EXPECT_EQ("SRRRRF",
442 ProcessSequenceForward(&machine, AsCodePoints(),
443 AsCodePoints(kHanSIP, kVS17, kTrail)));
444 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
445
446 // SOT + | + U+20000 + U+E0100 + [Lead] + 'a'
447 EXPECT_EQ("SRRRRRF",
448 ProcessSequenceForward(&machine, AsCodePoints(),
449 AsCodePoints(kHanSIP, kVS17, kLead, 'a')));
450 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
451
452 // SOT + | + U+20000 + U+E0100 + [Lead] + [Lead]
453 EXPECT_EQ("SRRRRRF",
454 ProcessSequenceForward(&machine, AsCodePoints(),
455 AsCodePoints(kHanSIP, kVS17, kLead, kLead)));
456 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
457
458 // SOT + | + U+20000 + U+E0100 + [Lead] + EOT
459 EXPECT_EQ("SRRRRR",
460 ProcessSequenceForward(&machine, AsCodePoints(),
461 AsCodePoints(kHanSIP, kVS17, kLead)));
462 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
463 }
464
TEST_F(ForwardGraphemeBoundaryStatemachineTest,MuchLongerCase)465 TEST_F(ForwardGraphemeBoundaryStatemachineTest, MuchLongerCase) {
466 ForwardGraphemeBoundaryStateMachine machine;
467
468 const UChar32 kMan = WTF::unicode::kManCharacter;
469 const UChar32 kZwj = WTF::unicode::kZeroWidthJoinerCharacter;
470 const UChar32 kHeart = WTF::unicode::kHeavyBlackHeartCharacter;
471 const UChar32 kKiss = WTF::unicode::kKissMarkCharacter;
472
473 // U+1F468 U+200D U+2764 U+FE0F U+200D U+1F48B U+200D U+1F468 is a valid ZWJ
474 // emoji sequence.
475 // SOT + | + ZWJ Emoji Sequence + 'a'
476 EXPECT_EQ("SRRRRRRRRRRRF",
477 ProcessSequenceForward(&machine, AsCodePoints(),
478 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
479 kKiss, kZwj, kMan, 'a')));
480 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
481
482 // SOT + | + ZWJ Emoji Sequence + U+1F441
483 EXPECT_EQ("SRRRRRRRRRRRRF",
484 ProcessSequenceForward(&machine, AsCodePoints(),
485 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
486 kKiss, kZwj, kMan, kEye)));
487 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
488
489 // SOT + | + ZWJ Emoji Sequence + EOT
490 EXPECT_EQ("SRRRRRRRRRRR",
491 ProcessSequenceForward(&machine, AsCodePoints(),
492 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
493 kKiss, kZwj, kMan)));
494 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
495
496 // SOT + | + ZWJ Emoji Sequence + [Trail]
497 EXPECT_EQ("SRRRRRRRRRRRF",
498 ProcessSequenceForward(&machine, AsCodePoints(),
499 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
500 kKiss, kZwj, kMan, kTrail)));
501 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
502
503 // SOT + | + ZWJ Emoji Sequence + [Lead] + 'a'
504 EXPECT_EQ("SRRRRRRRRRRRRF", ProcessSequenceForward(
505 &machine, AsCodePoints(),
506 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
507 kKiss, kZwj, kMan, kLead, 'a')));
508 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
509
510 // SOT + | + ZWJ Emoji Sequence + [Lead] + [Lead]
511 EXPECT_EQ(
512 "SRRRRRRRRRRRRF",
513 ProcessSequenceForward(&machine, AsCodePoints(),
514 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
515 kKiss, kZwj, kMan, kLead, kLead)));
516 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
517
518 // SOT + | + ZWJ Emoji Sequence + [Lead] + EOT
519 EXPECT_EQ("SRRRRRRRRRRRR",
520 ProcessSequenceForward(&machine, AsCodePoints(),
521 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
522 kKiss, kZwj, kMan, kLead)));
523 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
524
525 // Preceding text should not affect the result except for flags.
526 // 'a' + | + ZWJ Emoji Sequence + [Lead] + EOT
527 EXPECT_EQ("SRRRRRRRRRRRF",
528 ProcessSequenceForward(&machine, AsCodePoints('a'),
529 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
530 kKiss, kZwj, kMan, 'a')));
531 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
532
533 // U+1F441 + | + ZWJ Emoji Sequence + [Lead] + EOT
534 EXPECT_EQ("RSRRRRRRRRRRRF",
535 ProcessSequenceForward(&machine, AsCodePoints(kEye),
536 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
537 kKiss, kZwj, kMan, 'a')));
538 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
539
540 // [Lead] + | + ZWJ Emoji Sequence + [Lead] + EOT
541 EXPECT_EQ("SRRRRRRRRRRRF",
542 ProcessSequenceForward(&machine, AsCodePoints(kLead),
543 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
544 kKiss, kZwj, kMan, 'a')));
545 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
546
547 // 'a' + [Trail] + | + ZWJ Emoji Sequence + [Lead] + EOT
548 EXPECT_EQ("RSRRRRRRRRRRRF",
549 ProcessSequenceForward(&machine, AsCodePoints('a', kTrail),
550 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
551 kKiss, kZwj, kMan, 'a')));
552 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
553
554 // [Trail] + [Trail] + | + ZWJ Emoji Sequence + [Lead] + EOT
555 EXPECT_EQ("RSRRRRRRRRRRRF",
556 ProcessSequenceForward(&machine, AsCodePoints(kTrail, kTrail),
557 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
558 kKiss, kZwj, kMan, 'a')));
559 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
560
561 // SOT + [Trail] + | + ZWJ Emoji Sequence + [Lead] + EOT
562 EXPECT_EQ("RSRRRRRRRRRRRF",
563 ProcessSequenceForward(&machine, AsCodePoints(kTrail),
564 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
565 kKiss, kZwj, kMan, 'a')));
566 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
567
568 // 'a' + [U] + | + ZWJ Emoji Sequence + [Lead] + EOT
569 EXPECT_EQ("RRSRRRRRRRRRRRF",
570 ProcessSequenceForward(&machine, AsCodePoints('a', kRisU),
571 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
572 kKiss, kZwj, kMan, 'a')));
573 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
574
575 // 'a' + [U] + [S] + | + ZWJ Emoji Sequence + [Lead] + EOT
576 EXPECT_EQ("RRRRSRRRRRRRRRRRF",
577 ProcessSequenceForward(&machine, AsCodePoints('a', kRisU, kRisS),
578 AsCodePoints(kMan, kZwj, kHeart, kVS16, kZwj,
579 kKiss, kZwj, kMan, 'a')));
580 EXPECT_EQ(11, machine.FinalizeAndGetBoundaryOffset());
581 }
582
TEST_F(ForwardGraphemeBoundaryStatemachineTest,singleFlags)583 TEST_F(ForwardGraphemeBoundaryStatemachineTest, singleFlags) {
584 ForwardGraphemeBoundaryStateMachine machine;
585
586 // SOT + | + [U] + [S]
587 EXPECT_EQ("SRRRF", ProcessSequenceForward(&machine, AsCodePoints(),
588 AsCodePoints(kRisU, kRisS)));
589 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
590
591 // 'a' + | + [U] + [S]
592 EXPECT_EQ("SRRRF", ProcessSequenceForward(&machine, AsCodePoints('a'),
593 AsCodePoints(kRisU, kRisS)));
594 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
595
596 // U+1F441 + | + [U] + [S]
597 EXPECT_EQ("RSRRRF", ProcessSequenceForward(&machine, AsCodePoints(kEye),
598 AsCodePoints(kRisU, kRisS)));
599 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
600
601 // [Lead] + | + [U] + [S]
602 EXPECT_EQ("SRRRF", ProcessSequenceForward(&machine, AsCodePoints(kLead),
603 AsCodePoints(kRisU, kRisS)));
604 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
605
606 // 'a' + [Trail] + | + [U] + [S]
607 EXPECT_EQ("RSRRRF",
608 ProcessSequenceForward(&machine, AsCodePoints('a', kTrail),
609 AsCodePoints(kRisU, kRisS)));
610 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
611
612 // [Trail] + [Trail] + | + [U] + [S]
613 EXPECT_EQ("RSRRRF",
614 ProcessSequenceForward(&machine, AsCodePoints(kTrail, kTrail),
615 AsCodePoints(kRisU, kRisS)));
616 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
617
618 // SOT + [Trail] + | + [U] + [S]
619 EXPECT_EQ("RSRRRF", ProcessSequenceForward(&machine, AsCodePoints(kTrail),
620 AsCodePoints(kRisU, kRisS)));
621 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
622 }
623
TEST_F(ForwardGraphemeBoundaryStatemachineTest,twoFlags)624 TEST_F(ForwardGraphemeBoundaryStatemachineTest, twoFlags) {
625 ForwardGraphemeBoundaryStateMachine machine;
626
627 // SOT + [U] + [S] + | + [U] + [S]
628 EXPECT_EQ("RRRRSRRRF",
629 ProcessSequenceForward(&machine, AsCodePoints(kRisU, kRisS),
630 AsCodePoints(kRisU, kRisS)));
631 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
632
633 // 'a' + [U] + [S] + | + [U] + [S]
634 EXPECT_EQ("RRRRSRRRF",
635 ProcessSequenceForward(&machine, AsCodePoints('a', kRisU, kRisS),
636 AsCodePoints(kRisU, kRisS)));
637 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
638
639 // U+1F441 + [U] + [S] + | + [U] + [S]
640 EXPECT_EQ("RRRRRSRRRF",
641 ProcessSequenceForward(&machine, AsCodePoints(kEye, kRisU, kRisS),
642 AsCodePoints(kRisU, kRisS)));
643 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
644
645 // [Lead] + [U] + [S] + | + [U] + [S]
646 EXPECT_EQ("RRRRSRRRF",
647 ProcessSequenceForward(&machine, AsCodePoints(kLead, kRisU, kRisS),
648 AsCodePoints(kRisU, kRisS)));
649 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
650
651 // 'a' + [Trail] + [U] + [S] + | + [U] + [S]
652 EXPECT_EQ("RRRRRSRRRF", ProcessSequenceForward(
653 &machine, AsCodePoints('a', kTrail, kRisU, kRisS),
654 AsCodePoints(kRisU, kRisS)));
655 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
656
657 // [Trail] + [Trail] + [U] + [S] + | + [U] + [S]
658 EXPECT_EQ("RRRRRSRRRF",
659 ProcessSequenceForward(&machine,
660 AsCodePoints(kTrail, kTrail, kRisU, kRisS),
661 AsCodePoints(kRisU, kRisS)));
662 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
663
664 // SOT + [Trail] + [U] + [S] + | + [U] + [S]
665 EXPECT_EQ("RRRRRSRRRF",
666 ProcessSequenceForward(&machine, AsCodePoints(kTrail, kRisU, kRisS),
667 AsCodePoints(kRisU, kRisS)));
668 EXPECT_EQ(4, machine.FinalizeAndGetBoundaryOffset());
669 }
670
TEST_F(ForwardGraphemeBoundaryStatemachineTest,oddNumberedFlags)671 TEST_F(ForwardGraphemeBoundaryStatemachineTest, oddNumberedFlags) {
672 ForwardGraphemeBoundaryStateMachine machine;
673
674 // SOT + [U] + | + [S] + [S]
675 EXPECT_EQ("RRSRRRF", ProcessSequenceForward(&machine, AsCodePoints(kRisU),
676 AsCodePoints(kRisS, kRisU)));
677 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
678
679 // 'a' + [U] + | + [S] + [S]
680 EXPECT_EQ("RRSRRRF",
681 ProcessSequenceForward(&machine, AsCodePoints('a', kRisU),
682 AsCodePoints(kRisS, kRisU)));
683 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
684
685 // U+1F441 + [U] + | + [S] + [S]
686 EXPECT_EQ("RRRSRRRF",
687 ProcessSequenceForward(&machine, AsCodePoints(kEye, kRisU),
688 AsCodePoints(kRisS, kRisU)));
689 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
690
691 // [Lead] + [U] + | + [S] + [S]
692 EXPECT_EQ("RRSRRRF",
693 ProcessSequenceForward(&machine, AsCodePoints(kLead, kRisU),
694 AsCodePoints(kRisS, kRisU)));
695 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
696
697 // 'a' + [Trail] + [U] + | + [S] + [S]
698 EXPECT_EQ("RRRSRRRF",
699 ProcessSequenceForward(&machine, AsCodePoints('a', kTrail, kRisU),
700 AsCodePoints(kRisS, kRisU)));
701 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
702
703 // [Trail] + [Trail] + [U] + | + [S] + [S]
704 EXPECT_EQ("RRRSRRRF", ProcessSequenceForward(
705 &machine, AsCodePoints(kTrail, kTrail, kRisU),
706 AsCodePoints(kRisS, kRisU)));
707 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
708
709 // SOT + [Trail] + [U] + | + [S] + [S]
710 EXPECT_EQ("RRRSRRRF",
711 ProcessSequenceForward(&machine, AsCodePoints(kTrail, kRisU),
712 AsCodePoints(kRisS, kRisU)));
713 EXPECT_EQ(2, machine.FinalizeAndGetBoundaryOffset());
714 }
715
716 } // namespace forward_grapheme_boundary_state_machine_test
717
718 } // namespace blink
719