1 /*
2  * linebreak.c - implementation of Linebreak object.
3  *
4  * Copyright (C) 2009-2012 by Hatuka*nezumi - IKEDA Soji.
5  *
6  * This file is part of the Sombok Package.  This program is free
7  * software; you can redistribute it and/or modify it under the terms of
8  * either the GNU General Public License or the Artistic License, as
9  * specified in the README file.
10  *
11  */
12 
13 #include "sombok_constants.h"
14 #include "sombok.h"
15 
16 /** @defgroup linebreak linebreak
17  * @brief Handle linebreak object.
18  *
19  *@{*/
20 
21 /** Constructor
22  *
23  * Creates new linebreak object.
24  * Reference count of it will be set to 1.
25  * @param[in] ref_func function to handle reference count of external objects,
26  * or NULL.
27  * @return New linebreak object.
28  * If error occurred, errno is set then NULL is returned.
29  */
linebreak_new(linebreak_ref_func_t ref_func)30 linebreak_t *linebreak_new(linebreak_ref_func_t ref_func)
31 {
32     linebreak_t *obj;
33     if ((obj = malloc(sizeof(linebreak_t))) == NULL)
34 	return NULL;
35     memset(obj, 0, sizeof(linebreak_t));
36 
37 #ifdef USE_LIBTHAI
38     obj->options = LINEBREAK_OPTION_COMPLEX_BREAKING;
39 #endif				/* USE_LIBTHAI */
40     obj->ref_func = ref_func;
41     obj->refcount = 1UL;
42     return obj;
43 }
44 
45 /** Increase Reference Count
46  *
47  * Increse reference count of linebreak object.
48  * @param[in] obj linebreak object, must not be NULL.
49  * @return linebreak object itself.
50  * If error occurred, errno is set then NULL is returned.
51  */
linebreak_incref(linebreak_t * obj)52 linebreak_t *linebreak_incref(linebreak_t * obj)
53 {
54     obj->refcount += 1UL;
55     return obj;
56 }
57 
58 /** Copy Constructor
59  *
60  * Create deep copy of linebreak object.
61  * Reference count of new object will be set to 1.
62  * If ref_func member of object is not NULL, it will be executed to increase
63  * reference count of prep_data, format_data, sizing_data, urgent_data and
64  * stash members.
65  * @param[in] obj linebreak object, must not be NULL.
66  * @return New linebreak object.
67  * If error occurred, errno is set then NULL is returned.
68  */
linebreak_copy(linebreak_t * obj)69 linebreak_t *linebreak_copy(linebreak_t * obj)
70 {
71     linebreak_t *newobj;
72     mapent_t *newmap;
73     unichar_t *newstr;
74 
75     if (obj == NULL)
76 	return (errno = EINVAL), NULL;
77     if ((newobj = malloc(sizeof(linebreak_t))) == NULL)
78 	return NULL;
79     memcpy(newobj, obj, sizeof(linebreak_t));
80 
81     if (obj->map != NULL && obj->mapsiz) {
82 	if ((newmap = malloc(sizeof(mapent_t) * obj->mapsiz)) == NULL) {
83 	    free(newobj);
84 	    return NULL;
85 	}
86 	memcpy(newmap, obj->map, sizeof(mapent_t) * obj->mapsiz);
87 	newobj->map = newmap;
88     } else
89 	newobj->map = NULL;
90 
91     if (obj->newline.str != NULL && obj->newline.len) {
92 	if ((newstr =
93 	     malloc(sizeof(unichar_t) * obj->newline.len)) == NULL) {
94 	    free(newobj->map);
95 	    free(newobj);
96 	    return NULL;
97 	}
98 	memcpy(newstr, obj->newline.str,
99 	       sizeof(unichar_t) * obj->newline.len);
100 	newobj->newline.str = newstr;
101     } else
102 	newobj->newline.str = NULL;
103 
104     if (obj->bufstr.str != NULL && obj->bufstr.len) {
105 	if ((newstr = malloc(sizeof(unichar_t) * obj->bufstr.len)) == NULL) {
106 	    free(newobj->map);
107 	    free(newobj->newline.str);
108 	    free(newobj);
109 	    return NULL;
110 	}
111 	memcpy(newstr, obj->bufstr.str,
112 	       sizeof(unichar_t) * obj->bufstr.len);
113 	newobj->bufstr.str = newstr;
114     } else
115 	newobj->bufstr.str = NULL;
116 
117     if (obj->bufspc.str != NULL && obj->bufspc.len) {
118 	if ((newstr = malloc(sizeof(unichar_t) * obj->bufspc.len)) == NULL) {
119 	    free(newobj->map);
120 	    free(newobj->newline.str);
121 	    free(newobj->bufstr.str);
122 	    free(newobj);
123 	    return NULL;
124 	}
125 	memcpy(newstr, obj->bufspc.str,
126 	       sizeof(unichar_t) * obj->bufspc.len);
127 	newobj->bufspc.str = newstr;
128     } else
129 	newobj->bufspc.str = NULL;
130 
131     if (obj->unread.str != NULL && obj->unread.len) {
132 	if ((newstr = malloc(sizeof(unichar_t) * obj->unread.len)) == NULL) {
133 	    free(newobj->map);
134 	    free(newobj->newline.str);
135 	    free(newobj->bufstr.str);
136 	    free(newobj->bufspc.str);
137 	    free(newobj);
138 	    return NULL;
139 	}
140 	memcpy(newstr, obj->unread.str,
141 	       sizeof(unichar_t) * obj->unread.len);
142 	newobj->unread.str = newstr;
143     } else
144 	newobj->unread.str = NULL;
145 
146     if (obj->prep_func != NULL) {
147 	size_t i;
148 	for (i = 0; obj->prep_func[i] != NULL; i++);
149 	if ((newobj->prep_func =
150 	     malloc(sizeof(linebreak_prep_func_t) * (i + 1)))
151 	    == NULL) {
152 	    free(newobj->map);
153 	    free(newobj->newline.str);
154 	    free(newobj->bufstr.str);
155 	    free(newobj->bufspc.str);
156 	    free(newobj->unread.str);
157 	    free(newobj);
158 	    return NULL;
159 	}
160 	memcpy(newobj->prep_func, obj->prep_func,
161 	       sizeof(linebreak_prep_func_t) * (i + 1));
162 	if ((newobj->prep_data = malloc(sizeof(void *) * (i + 1))) == NULL) {
163 	    free(newobj->map);
164 	    free(newobj->newline.str);
165 	    free(newobj->bufstr.str);
166 	    free(newobj->bufspc.str);
167 	    free(newobj->unread.str);
168 	    free(newobj->prep_func);
169 	    free(newobj);
170 	    return NULL;
171 	}
172 	if (obj->prep_data == NULL)
173 	    memset(newobj->prep_data, 0, sizeof(void *) * (i + 1));
174 	else
175 	    memcpy(newobj->prep_data, obj->prep_data,
176 		   sizeof(void *) * (i + 1));
177     }
178 
179     if (newobj->ref_func != NULL) {
180 	if (newobj->stash != NULL)
181 	    (*newobj->ref_func) (newobj->stash, LINEBREAK_REF_STASH, +1);
182 	if (newobj->format_data != NULL)
183 	    (*newobj->ref_func) (newobj->format_data, LINEBREAK_REF_FORMAT,
184 				 +1);
185 	if (newobj->prep_data != NULL) {
186 	    size_t i;
187 	    for (i = 0; newobj->prep_func[i] != NULL; i++)
188 		if (newobj->prep_data[i] != NULL)
189 		    (*newobj->ref_func) (newobj->prep_data[i],
190 					 LINEBREAK_REF_PREP, +1);
191 	}
192 	if (newobj->sizing_data != NULL)
193 	    (*newobj->ref_func) (newobj->sizing_data, LINEBREAK_REF_SIZING,
194 				 +1);
195 	if (newobj->urgent_data != NULL)
196 	    (*newobj->ref_func) (newobj->urgent_data, LINEBREAK_REF_URGENT,
197 				 +1);
198 	if (newobj->user_data != NULL)
199 	    (*newobj->ref_func) (newobj->user_data, LINEBREAK_REF_USER,
200 				 +1);
201     }
202 
203     newobj->refcount = 1UL;
204     return newobj;
205 }
206 
207 /** Decrease Reference Count; Destructor
208  *
209  * Decrement reference count of linebreak object.
210  * When reference count becomes zero, free memories allocated for
211  * object and then, if ref_func member of object was not NULL,
212  * it will be executed to decrease reference count of prep_data, format_data,
213  * sizing_data, urgent_data and stash members.
214  * @param[in] obj linebreak object.
215  * @return none.
216  * If obj was NULL, do nothing.
217  */
linebreak_destroy(linebreak_t * obj)218 void linebreak_destroy(linebreak_t * obj)
219 {
220     if (obj == NULL)
221 	return;
222     if ((obj->refcount -= 1UL))
223 	return;
224     free(obj->map);
225     free(obj->newline.str);
226     free(obj->bufstr.str);
227     free(obj->bufspc.str);
228     free(obj->unread.str);
229     if (obj->ref_func != NULL) {
230 	if (obj->stash != NULL)
231 	    (*obj->ref_func) (obj->stash, LINEBREAK_REF_STASH, -1);
232 	if (obj->format_data != NULL)
233 	    (*obj->ref_func) (obj->format_data, LINEBREAK_REF_FORMAT, -1);
234 	if (obj->prep_func != NULL) {
235 	    size_t i;
236 	    for (i = 0; obj->prep_func[i] != NULL; i++)
237 		if (obj->prep_data[i] != NULL)
238 		    (*obj->ref_func) (obj->prep_data[i],
239 				      LINEBREAK_REF_PREP, -1);
240 	}
241 	if (obj->sizing_data != NULL)
242 	    (*obj->ref_func) (obj->sizing_data, LINEBREAK_REF_SIZING, -1);
243 	if (obj->urgent_data != NULL)
244 	    (*obj->ref_func) (obj->urgent_data, LINEBREAK_REF_URGENT, -1);
245 	if (obj->user_data != NULL)
246 	    (*obj->ref_func) (obj->user_data, LINEBREAK_REF_USER, -1);
247     }
248     free(obj->prep_func);
249     free(obj->prep_data);
250     free(obj);
251 }
252 
253 /** Setter: Update newline member
254  *
255  * @param[in] lbobj target linebreak object, must not be NULL.
256  * @param[in] newline pointer to Unicode string.
257  * @return none.
258  * Copy of newline is set.
259  * If error occurred, lbobj->errnum is set.
260  */
linebreak_set_newline(linebreak_t * lbobj,unistr_t * newline)261 void linebreak_set_newline(linebreak_t * lbobj, unistr_t * newline)
262 {
263     unichar_t *str;
264     size_t len;
265 
266     if (newline != NULL && newline->str != NULL && newline->len != 0) {
267 	if ((str = malloc(sizeof(unichar_t) * newline->len)) == NULL) {
268 	    lbobj->errnum = errno ? errno : ENOMEM;
269 	    return;
270 	}
271 	memcpy(str, newline->str, sizeof(unichar_t) * newline->len);
272 	len = newline->len;
273     } else {
274 	str = NULL;
275 	len = 0;
276     }
277     free(lbobj->newline.str);
278     lbobj->newline.str = str;
279     lbobj->newline.len = len;
280 }
281 
282 /** Setter: Update stash Member
283  *
284  * @param[in] lbobj target linebreak object, must not be NULL.
285  * @param[in] stash new stash value or NULL.
286  * @return none.
287  * New stash value is set.
288  * Reference count of stash member will be handled appropriately.
289  */
linebreak_set_stash(linebreak_t * lbobj,void * stash)290 void linebreak_set_stash(linebreak_t * lbobj, void *stash)
291 {
292     if (lbobj->ref_func != NULL) {
293 	if (stash != NULL)
294 	    (*(lbobj->ref_func)) (stash, LINEBREAK_REF_STASH, +1);
295 	if (lbobj->stash != NULL)
296 	    (*(lbobj->ref_func)) (lbobj->stash, LINEBREAK_REF_STASH, -1);
297     }
298     lbobj->stash = stash;
299 }
300 
301 /** Setter: Update format_func/format_data Member
302  *
303  * @param[in] lbobj target linebreak object.
304  * @param[in] format_func format callback function or NULL.
305  * @param[in] format_data new format_data value.
306  * @return none.
307  * New format callback is set.
308  * Reference count of format_data member will be handled appropriately.
309  */
linebreak_set_format(linebreak_t * lbobj,linebreak_format_func_t format_func,void * format_data)310 void linebreak_set_format(linebreak_t * lbobj,
311 			  linebreak_format_func_t format_func,
312 			  void *format_data)
313 {
314     if (lbobj->ref_func != NULL) {
315 	if (format_data != NULL)
316 	    (*(lbobj->ref_func)) (format_data, LINEBREAK_REF_FORMAT, +1);
317 	if (lbobj->format_data != NULL)
318 	    (*(lbobj->ref_func)) (lbobj->format_data, LINEBREAK_REF_FORMAT,
319 				  -1);
320     }
321     lbobj->format_func = format_func;
322     lbobj->format_data = format_data;
323 }
324 
325 /** Setter: Add/clear prep_func/prep_data Member
326  *
327  * @param[in] lbobj target linebreak object.
328  * @param[in] prep_func preprocessing callback function or NULL.
329  * @param[in] prep_data new prep_data value.
330  * @return none.
331  * New preprocessing callback is added.
332  * Reference count of prep_data item will be handled appropriately.
333  * if prep_func was NULL, all data are cleared.
334  */
linebreak_add_prep(linebreak_t * lbobj,linebreak_prep_func_t prep_func,void * prep_data)335 void linebreak_add_prep(linebreak_t * lbobj,
336 			linebreak_prep_func_t prep_func, void *prep_data)
337 {
338     size_t i;
339     linebreak_prep_func_t *p;
340     void **q;
341 
342     if (prep_func == NULL) {
343 	if (lbobj->prep_data != NULL) {
344 	    for (i = 0; lbobj->prep_func[i] != NULL; i++)
345 		if (lbobj->prep_data[i] != NULL)
346 		    (*lbobj->ref_func) (lbobj->prep_data[i],
347 					LINEBREAK_REF_PREP, -1);
348 	    free(lbobj->prep_data);
349 	    lbobj->prep_data = NULL;
350 	}
351 	free(lbobj->prep_func);
352 	lbobj->prep_func = NULL;
353 	return;
354     }
355 
356     if (lbobj->prep_func == NULL)
357 	i = 0;
358     else
359 	for (i = 0; lbobj->prep_func[i] != NULL; i++);
360 
361     if ((p =
362 	 realloc(lbobj->prep_func,
363 		 sizeof(linebreak_prep_func_t) * (i + 2)))
364 	== NULL) {
365 	lbobj->errnum = errno;
366 	return;
367     }
368     p[i] = NULL;
369     lbobj->prep_func = p;
370 
371     if ((q = realloc(lbobj->prep_data, sizeof(void *) * (i + 2))) == NULL) {
372 	lbobj->errnum = errno;
373 	return;
374     }
375     lbobj->prep_data = q;
376 
377     if (lbobj->ref_func != NULL && prep_data != NULL)
378 	(*(lbobj->ref_func)) (prep_data, LINEBREAK_REF_PREP, +1);
379     p[i] = prep_func;
380     p[i + 1] = NULL;
381     q[i] = prep_data;
382     q[i + 1] = NULL;
383 }
384 
385 /** Setter: Update sizing_func/sizing_data Member
386  *
387  * @param[in] lbobj target linebreak object.
388  * @param[in] sizing_func sizing callback function or NULL.
389  * @param[in] sizing_data new sizing_data value.
390  * @return none.
391  * New sizing callback is set.
392  * Reference count of sizing_data member will be handled appropriately.
393  */
linebreak_set_sizing(linebreak_t * lbobj,linebreak_sizing_func_t sizing_func,void * sizing_data)394 void linebreak_set_sizing(linebreak_t * lbobj,
395 			  linebreak_sizing_func_t sizing_func,
396 			  void *sizing_data)
397 {
398     if (lbobj->ref_func != NULL) {
399 	if (sizing_data != NULL)
400 	    (*(lbobj->ref_func)) (sizing_data, LINEBREAK_REF_SIZING, +1);
401 	if (lbobj->sizing_data != NULL)
402 	    (*(lbobj->ref_func)) (lbobj->sizing_data, LINEBREAK_REF_SIZING,
403 				  -1);
404     }
405     lbobj->sizing_func = sizing_func;
406     lbobj->sizing_data = sizing_data;
407 }
408 
409 /** Setter: Update urgent_func/urgent_data Member
410  *
411  * @param[in] lbobj target linebreak object.
412  * @param[in] urgent_func urgent breaking callback function or NULL.
413  * @param[in] urgent_data new urgent_data value.
414  * @return none.
415  * New urgent breaking callback is set.
416  * Reference count of urgent_data member will be handled appropriately.
417  */
linebreak_set_urgent(linebreak_t * lbobj,linebreak_urgent_func_t urgent_func,void * urgent_data)418 void linebreak_set_urgent(linebreak_t * lbobj,
419 			  linebreak_urgent_func_t urgent_func,
420 			  void *urgent_data)
421 {
422     if (lbobj->ref_func != NULL) {
423 	if (urgent_data != NULL)
424 	    (*(lbobj->ref_func)) (urgent_data, LINEBREAK_REF_URGENT, +1);
425 	if (lbobj->urgent_data != NULL)
426 	    (*(lbobj->ref_func)) (lbobj->urgent_data, LINEBREAK_REF_URGENT,
427 				  -1);
428     }
429     lbobj->urgent_func = urgent_func;
430     lbobj->urgent_data = urgent_data;
431 }
432 
433 /** Setter: Update user_func/user_data Member
434  * @deprecated Use linebreak_add_prep() instead.
435  *
436  * @param[in] lbobj target linebreak object.
437  * @param[in] user_func preprocessing callback function or NULL.
438  * @param[in] user_data new user_data value.
439  * @return none.
440  * New preprocessing callback is set.
441  * Reference count of user_data member will be handled appropriately.
442  */
linebreak_set_user(linebreak_t * lbobj,linebreak_obs_prep_func_t user_func,void * user_data)443 void linebreak_set_user(linebreak_t * lbobj,
444 			linebreak_obs_prep_func_t user_func,
445 			void *user_data)
446 {
447     if (lbobj->ref_func != NULL) {
448 	if (user_data != NULL)
449 	    (*(lbobj->ref_func)) (user_data, LINEBREAK_REF_USER, +1);
450 	if (lbobj->user_data != NULL)
451 	    (*(lbobj->ref_func)) (lbobj->user_data, LINEBREAK_REF_USER,
452 				  -1);
453     }
454     lbobj->user_func = user_func;
455     lbobj->user_data = user_data;
456 }
457 
458 /** Reset State
459  *
460  * Reset internal state of linebreak object.
461  * Internal state is set by linebreak_break_partial() function.
462  * @param[in] lbobj linebreak object.
463  * @return none.
464  * If lbobj was NULL, do nothing.
465  */
linebreak_reset(linebreak_t * lbobj)466 void linebreak_reset(linebreak_t * lbobj)
467 {
468     if (lbobj == NULL)
469 	return;
470     free(lbobj->unread.str);
471     lbobj->unread.str = NULL;
472     lbobj->unread.len = 0;
473     free(lbobj->bufstr.str);
474     lbobj->bufstr.str = NULL;
475     lbobj->bufstr.len = 0;
476     free(lbobj->bufspc.str);
477     lbobj->bufspc.str = NULL;
478     lbobj->bufspc.len = 0;
479     lbobj->bufcols = 0.0;
480     lbobj->state = LINEBREAK_STATE_NONE;
481     lbobj->errnum = 0;
482 }
483 
484 /** Get breaking rule between two classes
485  *
486  * From given two line breaking classes, get breaking rule determined by
487  * internal data.
488  * @param[in] obj linebreak object, must not be NULL.
489  * @param[in] albc line breaking class.
490  * @param[in] blbc line breaking class.
491  * @return line breaking action: MANDATORY, DIRECT, INDIRECT or PROHIBITED.
492  * If action was not determined, returns DIRECT.
493  *
494  * @note This method gives just approximate description of line breaking
495  * behavior.  Class AI and CJ will be resolved to approppriate classes.
496  * See also linebreak_lbrule().
497  *
498  * @note This method was introduced by Sombok 2.0.6.
499  * @note LEGACY_CM and HANGUL_AS_AL options are concerned as of Sombok 2.1.2.
500  * @note Only HANGUL_AS_AL is concerned as of Sombok 2.2.
501  *
502  */
linebreak_get_lbrule(linebreak_t * obj,propval_t blbc,propval_t albc)503 propval_t linebreak_get_lbrule(linebreak_t * obj, propval_t blbc,
504 			       propval_t albc)
505 {
506     switch (blbc) {
507     case LB_AI:
508 	blbc = (obj->options & LINEBREAK_OPTION_EASTASIAN_CONTEXT) ?
509 	    LB_ID : LB_AL;
510 	break;
511     case LB_CJ:
512 	blbc = (obj->options & LINEBREAK_OPTION_NONSTARTER_LOOSE) ?
513 	    LB_ID : LB_NS;
514 	break;
515     /* Optionally, treat hangul syllable as if it were AL. */
516     case LB_H2:
517     case LB_H3:
518     case LB_JL:
519     case LB_JV:
520     case LB_JT:
521 	if ((albc == LB_H2 || albc == LB_H3 || albc == LB_JL ||
522 	     albc == LB_JV || albc == LB_JT) &&
523 	    obj->options & LINEBREAK_OPTION_HANGUL_AS_AL)
524 	    return LINEBREAK_ACTION_INDIRECT;
525 	break;
526     }
527 
528     switch (albc) {
529     case LB_AI:
530 	albc = (obj->options & LINEBREAK_OPTION_EASTASIAN_CONTEXT) ?
531 	    LB_ID : LB_AL;
532 	break;
533     case LB_CJ:
534 	albc = (obj->options & LINEBREAK_OPTION_NONSTARTER_LOOSE) ?
535 	    LB_ID : LB_NS;
536 	break;
537     }
538 
539     return linebreak_lbrule(blbc, albc);
540 }
541 
542 /** Get Line Breaking Class
543  * @deprecated Use gcstring_lbclass() or gcstring_lbclass_ext() instead.
544  *
545  * Get UAX #14 line breaking class of Unicode character.
546  * Classes XX and SG will be resolved to AL.
547  * @param[in] obj linebreak object, must not be NULL.
548  * @param[in] c Unicode character.
549  * @return line breaking class property value.
550  */
linebreak_lbclass(linebreak_t * obj,unichar_t c)551 propval_t linebreak_lbclass(linebreak_t * obj, unichar_t c)
552 {
553     propval_t lbc, gcb, scr;
554 
555     linebreak_charprop(obj, c, &lbc, NULL, &gcb, &scr);
556     if (lbc == LB_AI)
557 	lbc = (obj->options & LINEBREAK_OPTION_EASTASIAN_CONTEXT) ?
558 	    LB_ID : LB_AL;
559     else if (lbc == LB_CJ)
560 	lbc = (obj->options & LINEBREAK_OPTION_NONSTARTER_LOOSE) ?
561 	    LB_ID : LB_NS;
562     else if (lbc == LB_SA) {
563 #ifdef USE_LIBTHAI
564 	if (scr != SC_Thai)
565 #endif				/* USE_LIBTHAI */
566 	    lbc = (gcb == GB_Extend || gcb == GB_SpacingMark
567 		   || gcb == GB_Virama) ? LB_CM : LB_AL;
568     }
569     return lbc;
570 }
571 
572 /** Get East_Asian_Width Property
573  * @deprecated Use gcstring_columns() instead.
574  *
575  * Get UAX #11 East_Asian_Width property value of Unicode character.
576  * Class A will be resolved to appropriate property F or N.
577  * @param[in] obj linebreak object, must not be NULL.
578  * @param[in] c Unicode character.
579  * @return East_Asian_Width property value.
580  */
linebreak_eawidth(linebreak_t * obj,unichar_t c)581 propval_t linebreak_eawidth(linebreak_t * obj, unichar_t c)
582 {
583     propval_t eaw;
584 
585     linebreak_charprop(obj, c, NULL, &eaw, NULL, NULL);
586     if (eaw == EA_A)
587 	eaw = (obj->options & LINEBREAK_OPTION_EASTASIAN_CONTEXT) ?
588 	    EA_F : EA_N;
589 
590     return eaw;
591 }
592