1 /*
2 * Implementation of line breaking algorithm for the Uniscribe Script Processor
3 *
4 * Copyright 2011 CodeWeavers, Aric Stewart
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 *
20 */
21
22 #include <stdarg.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25
26 #include "windef.h"
27 #include "winbase.h"
28 #include "winuser.h"
29 #include "wingdi.h"
30 #include "winnls.h"
31 #include "usp10.h"
32 #include "winternl.h"
33
34 #include "wine/debug.h"
35 #include "wine/heap.h"
36 #include "usp10_internal.h"
37
38 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
39
40 extern const unsigned short wine_linebreak_table[] DECLSPEC_HIDDEN;
41
42 enum breaking_types {
43 b_BK=1, b_CR, b_LF, b_CM, b_SG, b_GL, b_CB, b_SP, b_ZW, b_NL, b_WJ, b_JL, b_JV, b_JT, b_H2, b_H3, b_XX, b_OP, b_CL,
44 b_CP, b_QU, b_NS, b_EX, b_SY, b_IS, b_PR, b_PO, b_NU, b_AL, b_ID, b_IN, b_HY, b_BB, b_BA, b_SA, b_AI, b_B2, b_HL,
45 b_CJ, b_RI, b_EB, b_EM, b_ZWJ
46 };
47
48 enum breaking_class {b_r=1, b_s, b_x};
49
debug_output_breaks(const short * breaks,int count)50 static void debug_output_breaks(const short* breaks, int count)
51 {
52 if (TRACE_ON(uniscribe))
53 {
54 int i;
55 TRACE("[");
56 for (i = 0; i < count && i < 200; i++)
57 {
58 switch (breaks[i])
59 {
60 case b_x: TRACE("x"); break;
61 case b_r: TRACE("!"); break;
62 case b_s: TRACE("+"); break;
63 default: TRACE("*");
64 }
65 }
66 if (i == 200)
67 TRACE("...");
68 TRACE("]\n");
69 }
70 }
71
else_break(short * before,short class)72 static inline void else_break(short* before, short class)
73 {
74 if (*before == 0) *before = class;
75 }
76
BREAK_line(const WCHAR * chars,int count,const SCRIPT_ANALYSIS * sa,SCRIPT_LOGATTR * la)77 void BREAK_line(const WCHAR *chars, int count, const SCRIPT_ANALYSIS *sa, SCRIPT_LOGATTR *la)
78 {
79 int i,j;
80 short *break_class;
81 short *break_before;
82
83 TRACE("In %s\n",debugstr_wn(chars,count));
84
85 break_class = heap_alloc(count * sizeof(*break_class));
86 break_before = heap_alloc(count * sizeof(*break_before));
87
88 for (i = 0; i < count; i++)
89 {
90 break_class[i] = get_table_entry( wine_linebreak_table, chars[i] );
91 break_before[i] = 0;
92
93 memset(&la[i],0,sizeof(SCRIPT_LOGATTR));
94
95 la[i].fCharStop = TRUE;
96 switch (break_class[i])
97 {
98 case b_BK:
99 case b_ZW:
100 case b_SP:
101 la[i].fWhiteSpace = TRUE;
102 break;
103 case b_CM:
104 la[i].fCharStop = FALSE;
105 break;
106 }
107 }
108
109 /* LB1 */
110 /* TODO: Have outside algorithms for these scripts */
111 for (i = 0; i < count; i++)
112 {
113 switch(break_class[i])
114 {
115 case b_AI:
116 case b_SA:
117 case b_SG:
118 case b_XX:
119 break_class[i] = b_AL;
120 break;
121 case b_CJ:
122 break_class[i] = b_NS;
123 break;
124 }
125 }
126
127 /* LB2 - LB3 */
128 break_before[0] = b_x;
129 for (i = 0; i < count; i++)
130 {
131 switch(break_class[i])
132 {
133 /* LB4 - LB6 */
134 case b_CR:
135 if (i < count-1 && break_class[i+1] == b_LF)
136 {
137 else_break(&break_before[i],b_x);
138 else_break(&break_before[i+1],b_x);
139 break;
140 }
141 case b_LF:
142 case b_NL:
143 case b_BK:
144 if (i < count-1) else_break(&break_before[i+1],b_r);
145 else_break(&break_before[i],b_x);
146 break;
147 /* LB7 */
148 case b_SP:
149 else_break(&break_before[i],b_x);
150 break;
151 case b_ZW:
152 else_break(&break_before[i],b_x);
153 /* LB8 */
154 while (i < count-1 && break_class[i+1] == b_SP)
155 i++;
156 else_break(&break_before[i],b_s);
157 break;
158 }
159 }
160
161 debug_output_breaks(break_before,count);
162
163 /* LB9 - LB10 */
164 for (i = 0; i < count; i++)
165 {
166 if (break_class[i] == b_CM)
167 {
168 if (i > 0)
169 {
170 switch (break_class[i-1])
171 {
172 case b_SP:
173 case b_BK:
174 case b_CR:
175 case b_LF:
176 case b_NL:
177 case b_ZW:
178 break_class[i] = b_AL;
179 break;
180 default:
181 break_class[i] = break_class[i-1];
182 }
183 }
184 else break_class[i] = b_AL;
185 }
186 }
187
188 for (i = 0; i < count; i++)
189 {
190 switch(break_class[i])
191 {
192 /* LB11 */
193 case b_WJ:
194 else_break(&break_before[i],b_x);
195 if (i < count-1)
196 else_break(&break_before[i+1],b_x);
197 break;
198 /* LB12 */
199 case b_GL:
200 if (i < count-1)
201 else_break(&break_before[i+1],b_x);
202 /* LB12a */
203 if (i > 0)
204 {
205 if (break_class[i-1] != b_SP &&
206 break_class[i-1] != b_BA &&
207 break_class[i-1] != b_HY)
208 else_break(&break_before[i],b_x);
209 }
210 break;
211 /* LB13 */
212 case b_CL:
213 case b_CP:
214 case b_EX:
215 case b_IS:
216 case b_SY:
217 else_break(&break_before[i],b_x);
218 break;
219 /* LB14 */
220 case b_OP:
221 while (i < count-1 && break_class[i+1] == b_SP)
222 {
223 else_break(&break_before[i+1],b_x);
224 i++;
225 }
226 else_break(&break_before[i+1],b_x);
227 break;
228 /* LB15 */
229 case b_QU:
230 j = i+1;
231 while (j < count-1 && break_class[j] == b_SP)
232 j++;
233 if (break_class[j] == b_OP)
234 {
235 for (; j > i; j--)
236 else_break(&break_before[j],b_x);
237 }
238 break;
239 /* LB16 */
240 case b_NS:
241 j = i-1;
242 while(j > 0 && break_class[j] == b_SP)
243 j--;
244 if (break_class[j] == b_CL || break_class[j] == b_CP)
245 {
246 for (j++; j <= i; j++)
247 else_break(&break_before[j],b_x);
248 }
249 break;
250 /* LB17 */
251 case b_B2:
252 j = i+1;
253 while (j < count && break_class[j] == b_SP)
254 j++;
255 if (break_class[j] == b_B2)
256 {
257 for (; j > i; j--)
258 else_break(&break_before[j],b_x);
259 }
260 break;
261 }
262 }
263
264 debug_output_breaks(break_before,count);
265
266 for (i = 0; i < count; i++)
267 {
268 switch(break_class[i])
269 {
270 /* LB18 */
271 case b_SP:
272 if (i < count-1)
273 else_break(&break_before[i+1],b_s);
274 break;
275 /* LB19 */
276 case b_QU:
277 else_break(&break_before[i],b_x);
278 if (i < count-1)
279 else_break(&break_before[i+1],b_x);
280 break;
281 /* LB20 */
282 case b_CB:
283 else_break(&break_before[i],b_s);
284 if (i < count-1)
285 else_break(&break_before[i+1],b_s);
286 break;
287 /* LB21 */
288 case b_BA:
289 case b_HY:
290 case b_NS:
291 else_break(&break_before[i],b_x);
292 break;
293 case b_BB:
294 if (i < count-1)
295 else_break(&break_before[i+1],b_x);
296 break;
297 /* LB21a */
298 case b_HL:
299 if (i < count-2)
300 switch (break_class[i+1])
301 {
302 case b_HY:
303 case b_BA:
304 else_break(&break_before[i+2], b_x);
305 }
306 break;
307 /* LB22 */
308 case b_IN:
309 if (i > 0)
310 {
311 switch (break_class[i-1])
312 {
313 case b_AL:
314 case b_HL:
315 case b_ID:
316 case b_IN:
317 case b_NU:
318 else_break(&break_before[i], b_x);
319 }
320 }
321 break;
322 }
323
324 if (i < count-1)
325 {
326 /* LB23 */
327 if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
328 (break_class[i] == b_AL && break_class[i+1] == b_NU) ||
329 (break_class[i] == b_HL && break_class[i+1] == b_NU) ||
330 (break_class[i] == b_NU && break_class[i+1] == b_AL) ||
331 (break_class[i] == b_NU && break_class[i+1] == b_HL))
332 else_break(&break_before[i+1],b_x);
333 /* LB24 */
334 if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
335 (break_class[i] == b_PR && break_class[i+1] == b_AL) ||
336 (break_class[i] == b_PR && break_class[i+1] == b_HL) ||
337 (break_class[i] == b_PO && break_class[i+1] == b_AL) ||
338 (break_class[i] == b_PO && break_class[i+1] == b_HL))
339 else_break(&break_before[i+1],b_x);
340
341 /* LB25 */
342 if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
343 (break_class[i] == b_CP && break_class[i+1] == b_PO) ||
344 (break_class[i] == b_CL && break_class[i+1] == b_PR) ||
345 (break_class[i] == b_CP && break_class[i+1] == b_PR) ||
346 (break_class[i] == b_NU && break_class[i+1] == b_PO) ||
347 (break_class[i] == b_NU && break_class[i+1] == b_PR) ||
348 (break_class[i] == b_PO && break_class[i+1] == b_OP) ||
349 (break_class[i] == b_PO && break_class[i+1] == b_NU) ||
350 (break_class[i] == b_PR && break_class[i+1] == b_OP) ||
351 (break_class[i] == b_PR && break_class[i+1] == b_NU) ||
352 (break_class[i] == b_HY && break_class[i+1] == b_NU) ||
353 (break_class[i] == b_IS && break_class[i+1] == b_NU) ||
354 (break_class[i] == b_NU && break_class[i+1] == b_NU) ||
355 (break_class[i] == b_SY && break_class[i+1] == b_NU))
356 else_break(&break_before[i+1],b_x);
357
358 /* LB26 */
359 if (break_class[i] == b_JL)
360 {
361 switch (break_class[i+1])
362 {
363 case b_JL:
364 case b_JV:
365 case b_H2:
366 case b_H3:
367 else_break(&break_before[i+1],b_x);
368 }
369 }
370 if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
371 (break_class[i+1] == b_JV || break_class[i+1] == b_JT))
372 else_break(&break_before[i+1],b_x);
373 if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
374 break_class[i+1] == b_JT)
375 else_break(&break_before[i+1],b_x);
376
377 /* LB27 */
378 switch (break_class[i])
379 {
380 case b_JL:
381 case b_JV:
382 case b_JT:
383 case b_H2:
384 case b_H3:
385 if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
386 else_break(&break_before[i+1],b_x);
387 }
388 if (break_class[i] == b_PR)
389 {
390 switch (break_class[i+1])
391 {
392 case b_JL:
393 case b_JV:
394 case b_JT:
395 case b_H2:
396 case b_H3:
397 else_break(&break_before[i+1],b_x);
398 }
399 }
400
401 /* LB28 */
402 if ((break_class[i] == b_AL && break_class[i+1] == b_AL) ||
403 (break_class[i] == b_AL && break_class[i+1] == b_HL) ||
404 (break_class[i] == b_HL && break_class[i+1] == b_AL) ||
405 (break_class[i] == b_HL && break_class[i+1] == b_HL))
406 else_break(&break_before[i+1],b_x);
407
408 /* LB29 */
409 if ((break_class[i] == b_IS && break_class[i+1] == b_AL) ||
410 (break_class[i] == b_IS && break_class[i+1] == b_HL))
411 else_break(&break_before[i+1],b_x);
412
413 /* LB30 */
414 if ((break_class[i] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU) &&
415 break_class[i+1] == b_OP)
416 else_break(&break_before[i+1],b_x);
417 if (break_class[i] == b_CP &&
418 (break_class[i+1] == b_AL || break_class[i+1] == b_HL || break_class[i+1] == b_NU))
419 else_break(&break_before[i+1],b_x);
420
421 /* LB30a */
422 if (break_class[i] == b_RI && break_class[i+1] == b_RI)
423 else_break(&break_before[i+1],b_x);
424 }
425 }
426 debug_output_breaks(break_before,count);
427
428 /* LB31 */
429 for (i = 0; i < count-1; i++)
430 else_break(&break_before[i+1],b_s);
431
432 debug_output_breaks(break_before,count);
433 for (i = 0; i < count; i++)
434 {
435 if (break_before[i] != b_x)
436 {
437 la[i].fSoftBreak = TRUE;
438 la[i].fWordStop = TRUE;
439 }
440 }
441
442 heap_free(break_before);
443 heap_free(break_class);
444 }
445