1 /* 2 * Implementation of line breaking algorithm for the Uniscribe Script Processor 3 * 4 * Copyright 2011 CodeWeavers, Aric Stewart 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 19 * 20 */ 21 22 #include <stdarg.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 26 #include "windef.h" 27 #include "winbase.h" 28 #include "winuser.h" 29 #include "wingdi.h" 30 #include "winnls.h" 31 #include "usp10.h" 32 #include "winternl.h" 33 34 #include "wine/debug.h" 35 #include "wine/heap.h" 36 #include "usp10_internal.h" 37 38 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe); 39 40 extern const unsigned short wine_linebreak_table[] DECLSPEC_HIDDEN; 41 42 enum breaking_types { 43 b_BK=1, b_CR, b_LF, b_CM, b_SG, b_GL, b_CB, b_SP, b_ZW, b_NL, b_WJ, b_JL, b_JV, b_JT, b_H2, b_H3, b_XX, b_OP, b_CL, 44 b_CP, b_QU, b_NS, b_EX, b_SY, b_IS, b_PR, b_PO, b_NU, b_AL, b_ID, b_IN, b_HY, b_BB, b_BA, b_SA, b_AI, b_B2, b_HL, 45 b_CJ, b_RI, b_EB, b_EM, b_ZWJ 46 }; 47 48 enum breaking_class {b_r=1, b_s, b_x}; 49 50 static void debug_output_breaks(const short* breaks, int count) 51 { 52 if (TRACE_ON(uniscribe)) 53 { 54 int i; 55 TRACE("["); 56 for (i = 0; i < count && i < 200; i++) 57 { 58 switch (breaks[i]) 59 { 60 case b_x: TRACE("x"); break; 61 case b_r: TRACE("!"); break; 62 case b_s: TRACE("+"); break; 63 default: TRACE("*"); 64 } 65 } 66 if (i == 200) 67 TRACE("..."); 68 TRACE("]\n"); 69 } 70 } 71 72 static inline void else_break(short* before, short class) 73 { 74 if (*before == 0) *before = class; 75 } 76 77 void BREAK_line(const WCHAR *chars, int count, const SCRIPT_ANALYSIS *sa, SCRIPT_LOGATTR *la) 78 { 79 int i,j; 80 short *break_class; 81 short *break_before; 82 83 TRACE("In %s\n",debugstr_wn(chars,count)); 84 85 break_class = heap_alloc(count * sizeof(*break_class)); 86 break_before = heap_alloc(count * sizeof(*break_before)); 87 88 for (i = 0; i < count; i++) 89 { 90 break_class[i] = get_table_entry( wine_linebreak_table, chars[i] ); 91 break_before[i] = 0; 92 93 memset(&la[i],0,sizeof(SCRIPT_LOGATTR)); 94 95 la[i].fCharStop = TRUE; 96 switch (break_class[i]) 97 { 98 case b_BK: 99 case b_ZW: 100 case b_SP: 101 la[i].fWhiteSpace = TRUE; 102 break; 103 case b_CM: 104 la[i].fCharStop = FALSE; 105 break; 106 } 107 } 108 109 /* LB1 */ 110 /* TODO: Have outside algorithms for these scripts */ 111 for (i = 0; i < count; i++) 112 { 113 switch(break_class[i]) 114 { 115 case b_AI: 116 case b_SA: 117 case b_SG: 118 case b_XX: 119 break_class[i] = b_AL; 120 break; 121 case b_CJ: 122 break_class[i] = b_NS; 123 break; 124 } 125 } 126 127 /* LB2 - LB3 */ 128 break_before[0] = b_x; 129 for (i = 0; i < count; i++) 130 { 131 switch(break_class[i]) 132 { 133 /* LB4 - LB6 */ 134 case b_CR: 135 if (i < count-1 && break_class[i+1] == b_LF) 136 { 137 else_break(&break_before[i],b_x); 138 else_break(&break_before[i+1],b_x); 139 break; 140 } 141 case b_LF: 142 case b_NL: 143 case b_BK: 144 if (i < count-1) else_break(&break_before[i+1],b_r); 145 else_break(&break_before[i],b_x); 146 break; 147 /* LB7 */ 148 case b_SP: 149 else_break(&break_before[i],b_x); 150 break; 151 case b_ZW: 152 else_break(&break_before[i],b_x); 153 /* LB8 */ 154 while (i < count-1 && break_class[i+1] == b_SP) 155 i++; 156 else_break(&break_before[i],b_s); 157 break; 158 } 159 } 160 161 debug_output_breaks(break_before,count); 162 163 /* LB9 - LB10 */ 164 for (i = 0; i < count; i++) 165 { 166 if (break_class[i] == b_CM) 167 { 168 if (i > 0) 169 { 170 switch (break_class[i-1]) 171 { 172 case b_SP: 173 case b_BK: 174 case b_CR: 175 case b_LF: 176 case b_NL: 177 case b_ZW: 178 break_class[i] = b_AL; 179 break; 180 default: 181 break_class[i] = break_class[i-1]; 182 } 183 } 184 else break_class[i] = b_AL; 185 } 186 } 187 188 for (i = 0; i < count; i++) 189 { 190 switch(break_class[i]) 191 { 192 /* LB11 */ 193 case b_WJ: 194 else_break(&break_before[i],b_x); 195 if (i < count-1) 196 else_break(&break_before[i+1],b_x); 197 break; 198 /* LB12 */ 199 case b_GL: 200 if (i < count-1) 201 else_break(&break_before[i+1],b_x); 202 /* LB12a */ 203 if (i > 0) 204 { 205 if (break_class[i-1] != b_SP && 206 break_class[i-1] != b_BA && 207 break_class[i-1] != b_HY) 208 else_break(&break_before[i],b_x); 209 } 210 break; 211 /* LB13 */ 212 case b_CL: 213 case b_CP: 214 case b_EX: 215 case b_IS: 216 case b_SY: 217 else_break(&break_before[i],b_x); 218 break; 219 /* LB14 */ 220 case b_OP: 221 while (i < count-1 && break_class[i+1] == b_SP) 222 { 223 else_break(&break_before[i+1],b_x); 224 i++; 225 } 226 else_break(&break_before[i+1],b_x); 227 break; 228 /* LB15 */ 229 case b_QU: 230 j = i+1; 231 while (j < count-1 && break_class[j] == b_SP) 232 j++; 233 if (break_class[j] == b_OP) 234 { 235 for (; j > i; j--) 236 else_break(&break_before[j],b_x); 237 } 238 break; 239 /* LB16 */ 240 case b_NS: 241 j = i-1; 242 while(j > 0 && break_class[j] == b_SP) 243 j--; 244 if (break_class[j] == b_CL || break_class[j] == b_CP) 245 { 246 for (j++; j <= i; j++) 247 else_break(&break_before[j],b_x); 248 } 249 break; 250 /* LB17 */ 251 case b_B2: 252 j = i+1; 253 while (j < count && break_class[j] == b_SP) 254 j++; 255 if (break_class[j] == b_B2) 256 { 257 for (; j > i; j--) 258 else_break(&break_before[j],b_x); 259 } 260 break; 261 } 262 } 263 264 debug_output_breaks(break_before,count); 265 266 for (i = 0; i < count; i++) 267 { 268 switch(break_class[i]) 269 { 270 /* LB18 */ 271 case b_SP: 272 if (i < count-1) 273 else_break(&break_before[i+1],b_s); 274 break; 275 /* LB19 */ 276 case b_QU: 277 else_break(&break_before[i],b_x); 278 if (i < count-1) 279 else_break(&break_before[i+1],b_x); 280 break; 281 /* LB20 */ 282 case b_CB: 283 else_break(&break_before[i],b_s); 284 if (i < count-1) 285 else_break(&break_before[i+1],b_s); 286 break; 287 /* LB21 */ 288 case b_BA: 289 case b_HY: 290 case b_NS: 291 else_break(&break_before[i],b_x); 292 break; 293 case b_BB: 294 if (i < count-1) 295 else_break(&break_before[i+1],b_x); 296 break; 297 /* LB21a */ 298 case b_HL: 299 if (i < count-2) 300 switch (break_class[i+1]) 301 { 302 case b_HY: 303 case b_BA: 304 else_break(&break_before[i+2], b_x); 305 } 306 break; 307 /* LB22 */ 308 case b_IN: 309 if (i > 0) 310 { 311 switch (break_class[i-1]) 312 { 313 case b_AL: 314 case b_HL: 315 case b_ID: 316 case b_IN: 317 case b_NU: 318 else_break(&break_before[i], b_x); 319 } 320 } 321 break; 322 } 323 324 if (i < count-1) 325 { 326 /* LB23 */ 327 if ((break_class[i] == b_ID && break_class[i+1] == b_PO) || 328 (break_class[i] == b_AL && break_class[i+1] == b_NU) || 329 (break_class[i] == b_HL && break_class[i+1] == b_NU) || 330 (break_class[i] == b_NU && break_class[i+1] == b_AL) || 331 (break_class[i] == b_NU && break_class[i+1] == b_HL)) 332 else_break(&break_before[i+1],b_x); 333 /* LB24 */ 334 if ((break_class[i] == b_PR && break_class[i+1] == b_ID) || 335 (break_class[i] == b_PR && break_class[i+1] == b_AL) || 336 (break_class[i] == b_PR && break_class[i+1] == b_HL) || 337 (break_class[i] == b_PO && break_class[i+1] == b_AL) || 338 (break_class[i] == b_PO && break_class[i+1] == b_HL)) 339 else_break(&break_before[i+1],b_x); 340 341 /* LB25 */ 342 if ((break_class[i] == b_CL && break_class[i+1] == b_PO) || 343 (break_class[i] == b_CP && break_class[i+1] == b_PO) || 344 (break_class[i] == b_CL && break_class[i+1] == b_PR) || 345 (break_class[i] == b_CP && break_class[i+1] == b_PR) || 346 (break_class[i] == b_NU && break_class[i+1] == b_PO) || 347 (break_class[i] == b_NU && break_class[i+1] == b_PR) || 348 (break_class[i] == b_PO && break_class[i+1] == b_OP) || 349 (break_class[i] == b_PO && break_class[i+1] == b_NU) || 350 (break_class[i] == b_PR && break_class[i+1] == b_OP) || 351 (break_class[i] == b_PR && break_class[i+1] == b_NU) || 352 (break_class[i] == b_HY && break_class[i+1] == b_NU) || 353 (break_class[i] == b_IS && break_class[i+1] == b_NU) || 354 (break_class[i] == b_NU && break_class[i+1] == b_NU) || 355 (break_class[i] == b_SY && break_class[i+1] == b_NU)) 356 else_break(&break_before[i+1],b_x); 357 358 /* LB26 */ 359 if (break_class[i] == b_JL) 360 { 361 switch (break_class[i+1]) 362 { 363 case b_JL: 364 case b_JV: 365 case b_H2: 366 case b_H3: 367 else_break(&break_before[i+1],b_x); 368 } 369 } 370 if ((break_class[i] == b_JV || break_class[i] == b_H2) && 371 (break_class[i+1] == b_JV || break_class[i+1] == b_JT)) 372 else_break(&break_before[i+1],b_x); 373 if ((break_class[i] == b_JT || break_class[i] == b_H3) && 374 break_class[i+1] == b_JT) 375 else_break(&break_before[i+1],b_x); 376 377 /* LB27 */ 378 switch (break_class[i]) 379 { 380 case b_JL: 381 case b_JV: 382 case b_JT: 383 case b_H2: 384 case b_H3: 385 if (break_class[i+1] == b_IN || break_class[i+1] == b_PO) 386 else_break(&break_before[i+1],b_x); 387 } 388 if (break_class[i] == b_PR) 389 { 390 switch (break_class[i+1]) 391 { 392 case b_JL: 393 case b_JV: 394 case b_JT: 395 case b_H2: 396 case b_H3: 397 else_break(&break_before[i+1],b_x); 398 } 399 } 400 401 /* LB28 */ 402 if ((break_class[i] == b_AL && break_class[i+1] == b_AL) || 403 (break_class[i] == b_AL && break_class[i+1] == b_HL) || 404 (break_class[i] == b_HL && break_class[i+1] == b_AL) || 405 (break_class[i] == b_HL && break_class[i+1] == b_HL)) 406 else_break(&break_before[i+1],b_x); 407 408 /* LB29 */ 409 if ((break_class[i] == b_IS && break_class[i+1] == b_AL) || 410 (break_class[i] == b_IS && break_class[i+1] == b_HL)) 411 else_break(&break_before[i+1],b_x); 412 413 /* LB30 */ 414 if ((break_class[i] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU) && 415 break_class[i+1] == b_OP) 416 else_break(&break_before[i+1],b_x); 417 if (break_class[i] == b_CP && 418 (break_class[i+1] == b_AL || break_class[i+1] == b_HL || break_class[i+1] == b_NU)) 419 else_break(&break_before[i+1],b_x); 420 421 /* LB30a */ 422 if (break_class[i] == b_RI && break_class[i+1] == b_RI) 423 else_break(&break_before[i+1],b_x); 424 } 425 } 426 debug_output_breaks(break_before,count); 427 428 /* LB31 */ 429 for (i = 0; i < count-1; i++) 430 else_break(&break_before[i+1],b_s); 431 432 debug_output_breaks(break_before,count); 433 for (i = 0; i < count; i++) 434 { 435 if (break_before[i] != b_x) 436 { 437 la[i].fSoftBreak = TRUE; 438 la[i].fWordStop = TRUE; 439 } 440 } 441 442 heap_free(break_before); 443 heap_free(break_class); 444 } 445