1 /*
2  * break.c - an implementation of Unicode line breaking algorithm.
3  *
4  * Copyright (C) 2009-2012 by Hatuka*nezumi - IKEDA Soji.
5  *
6  * This file is part of the Sombok Package.  This program is free
7  * software; you can redistribute it and/or modify it under the terms of
8  * either the GNU General Public License or the Artistic License, as
9  * specified in the README file.
10  *
11  */
12 
13 #include "sombok_constants.h"
14 #include "sombok.h"
15 
16 extern propval_t *linebreak_rules[];
17 extern size_t linebreak_rulessiz;
18 
19 /**
20  * @defgroup linebreak_break break
21  * @brief Perform line breaking algorithm
22  *@{*/
23 
24 static
_user(linebreak_t * lbobj,unistr_t * str)25 gcstring_t *_user(linebreak_t * lbobj, unistr_t * str)
26 {
27     gcstring_t *result;
28 
29     if (str == NULL)
30 	return NULL;
31     else if (lbobj->user_func == NULL ||
32 	     ((result = (*(lbobj->user_func)) (lbobj, str)) == NULL &&
33 	      !lbobj->errnum)) {
34 	if ((result = gcstring_newcopy(str, lbobj)) == NULL)
35 	    lbobj->errnum = errno ? errno : ENOMEM;
36     }
37     return result;
38 }
39 
40 static
_prep_sub(linebreak_t * lbobj,unistr_t * substr,unistr_t * text,size_t findex)41 gcstring_t *_prep_sub(linebreak_t * lbobj, unistr_t * substr,
42 		      unistr_t * text, size_t findex)
43 {
44     unistr_t unistr = { NULL, 0 };
45     gcstring_t *ret, *s;
46     unichar_t *prev_str;
47     size_t prev_len;
48     gcstring_t *(*func) (linebreak_t *, void *, unistr_t *, unistr_t *);
49     void *data;
50 
51     if ((func = lbobj->prep_func[findex]) == NULL) {
52 	if ((ret = gcstring_newcopy(substr, lbobj)) == NULL)
53 	    lbobj->errnum = errno ? errno : ENOMEM;
54 	return ret;
55     }
56     if (lbobj->prep_data == NULL)
57 	data = NULL;
58     else
59 	data = lbobj->prep_data[findex];
60 
61     if ((ret = gcstring_new(NULL, lbobj)) == NULL) {
62 	lbobj->errnum = errno ? errno : ENOMEM;
63 	return NULL;
64     }
65 
66     prev_str = substr->str;
67     prev_len = substr->len;
68     while (1) {
69 	/* Pass I: search. */
70 	unistr.str = prev_str;
71 	unistr.len = prev_len;
72 	gcstring_destroy((*func) (lbobj, data, &unistr, text));
73 	/* - no match: stop searching. */
74 	if (unistr.str == NULL)
75 	    break;
76 	/* - buffer may be modified: abort. */
77 	if (unistr.len < 0 ||
78 	    unistr.str < text->str ||
79 	    text->str + text->len < unistr.str + unistr.len) {
80 	    gcstring_destroy(ret);
81 	    lbobj->errnum = EINVAL;
82 	    return NULL;
83 	}
84 	/* - out of range: stop searching. */
85 	if (unistr.str < substr->str ||
86 	    substr->str + substr->len < unistr.str + unistr.len)
87 	    break;
88 
89 	/* apply next callback to unmatched part. */
90 	if (prev_str <= unistr.str) {
91 	    unistr_t us;
92 	    us.len = unistr.str - prev_str;
93 	    us.str = prev_str;
94 	    if ((s = _prep_sub(lbobj, &us, text, findex + 1)) == NULL) {
95 		gcstring_destroy(ret);
96 		return NULL;
97 	    }
98 	    if (gcstring_append(ret, s) == NULL) {
99 		gcstring_destroy(ret);
100 		gcstring_destroy(s);
101 		lbobj->errnum = errno ? errno : ENOMEM;
102 		return NULL;
103 	    }
104 	    gcstring_destroy(s);
105 	}
106 
107 	/* Pass II: process matched string. */
108 	if ((s = (*func) (lbobj, data, &unistr, NULL)) == NULL) {
109 	    if (lbobj->errnum != 0) {
110 		gcstring_destroy(ret);
111 		return NULL;
112 	    }
113 	    if ((s = gcstring_newcopy(&unistr, lbobj)) == NULL) {
114 		gcstring_destroy(ret);
115 		lbobj->errnum = errno ? errno : ENOMEM;
116 		return NULL;
117 	    }
118 	}
119 	if (gcstring_append(ret, s) == NULL) {
120 	    gcstring_destroy(ret);
121 	    gcstring_destroy(s);
122 	    lbobj->errnum = errno ? errno : ENOMEM;
123 	    return NULL;
124 	}
125 	gcstring_destroy(s);
126 
127 	/* skip zero length match to avoid infinite loop. */
128 	if (unistr.len == 0) {
129 	    if (substr->str + substr->len <= unistr.str) {
130 		prev_str = unistr.str;
131 		prev_len = 0;
132 		break;
133 	    } else {
134 		prev_str = unistr.str + 1;
135 		prev_len = substr->str + substr->len - prev_str;
136 		continue;
137 	    }
138 	}
139 	prev_str = unistr.str + unistr.len;
140 	prev_len = substr->str + substr->len - prev_str;
141     }
142 
143     /* apply next callback to the rest of string. */
144     if (prev_str < substr->str + substr->len) {
145 	unistr.str = prev_str;
146 	unistr.len = prev_len;
147 	if ((s = _prep_sub(lbobj, &unistr, text, findex + 1)) == NULL) {
148 	    gcstring_destroy(ret);
149 	    return NULL;
150 	}
151 	if (gcstring_append(ret, s) == NULL) {
152 	    gcstring_destroy(ret);
153 	    gcstring_destroy(s);
154 	    lbobj->errnum = errno ? errno : ENOMEM;
155 	    return NULL;
156 	}
157 	gcstring_destroy(s);
158     }
159 
160     return ret;
161 }
162 
163 static
_prep(linebreak_t * lbobj,unistr_t * text)164 gcstring_t *_prep(linebreak_t * lbobj, unistr_t * text)
165 {
166     gcstring_t *ret;
167 
168     if (lbobj->prep_func == NULL) {
169 	if ((ret = gcstring_newcopy(text, lbobj)) == NULL)
170 	    lbobj->errnum = errno ? errno : ENOMEM;
171 	return ret;
172     }
173     return _prep_sub(lbobj, text, text, 0);
174 }
175 
176 static
_format(linebreak_t * lbobj,linebreak_state_t action,gcstring_t * str)177 gcstring_t *_format(linebreak_t * lbobj, linebreak_state_t action,
178 		    gcstring_t * str)
179 {
180     gcstring_t *result;
181 
182     if (str == NULL)
183 	return NULL;
184     else if (lbobj->format_func == NULL ||
185 	     ((result =
186 	       (*(lbobj->format_func)) (lbobj, action, str)) == NULL &&
187 	      !lbobj->errnum)) {
188 	if ((result = gcstring_copy(str)) == NULL)
189 	    lbobj->errnum = errno ? errno : ENOMEM;
190     }
191     return result;
192 }
193 
194 static
_sizing(linebreak_t * lbobj,double len,gcstring_t * pre,gcstring_t * spc,gcstring_t * str)195 double _sizing(linebreak_t * lbobj, double len,
196 	       gcstring_t * pre, gcstring_t * spc, gcstring_t * str)
197 {
198     double ret;
199 
200     if (lbobj->sizing_func == NULL ||
201 	((ret = (*(lbobj->sizing_func)) (lbobj, len, pre, spc, str))
202 	 < 0.0 && !lbobj->errnum)) {
203 	if (spc != NULL)
204 	    len += (double) spc->gclen;
205 	if (str != NULL)
206 	    len += (double) str->gclen;
207 	return len;
208     }
209     return ret;
210 }
211 
212 static
_urgent_break(linebreak_t * lbobj,gcstring_t * str)213 gcstring_t *_urgent_break(linebreak_t * lbobj, gcstring_t * str)
214 {
215     gcstring_t *result;
216 
217     if (lbobj->urgent_func == NULL ||
218 	((result = (*(lbobj->urgent_func)) (lbobj, str)) == NULL &&
219 	 !lbobj->errnum)) {
220 	if ((result = gcstring_copy(str)) == NULL)
221 	    lbobj->errnum = errno ? errno : ENOMEM;
222     }
223     return result;
224 }
225 
226 #define gcstring_DESTROY(gcstr) \
227     gcstring_destroy(gcstr); gcstr = NULL;
228 
229 #define IF_NULL_THEN_ABORT(x)					\
230     if ((x) == NULL) {						\
231 	size_t i;						\
232 	if (lbobj->errnum == 0)					\
233 	    lbobj->errnum = errno? errno: EINVAL;		\
234 	gcstring_destroy(str);					\
235 	gcstring_destroy(bufStr);				\
236 	gcstring_destroy(bufSpc);				\
237 	for (i = 0; i < reslen; i++)				\
238 	    gcstring_destroy(results[i]);			\
239 	free(results);						\
240 	gcstring_destroy(s);					\
241 	gcstring_destroy(t);					\
242 	gcstring_destroy(beforeFrg);				\
243 	gcstring_destroy(fmt);					\
244 	gcstring_destroy(broken);				\
245 	return NULL;						\
246     }
247 
248 /** @fn propval_t linebreak_lbrule(propval_t b_idx, propval_t a_idx)
249  * @deprecated Use linebreak_get_lbrule().
250  *
251  * Get breaking rule between two classes
252  *
253  * From given two line breaking classes, get breaking rule determined by
254  * internal data.
255  * @param[in] a_idx line breaking class.
256  * @param[in] b_idx line breaking class.
257  * @return line breaking action: MANDATORY, DIRECT, INDIRECT or PROHIBITED.
258  * If action was not determined, returns DIRECT.
259  *
260  * @note This method gives just approximate description of line breaking
261  * behavior.  Especially, it won't give meaningful value related to classes
262  * AI and CJ.
263  * See also linebreak_get_lbrule().
264  *
265  */
266 static
_lbruleinfo(propval_t b_idx,propval_t a_idx)267 propval_t _lbruleinfo(propval_t b_idx, propval_t a_idx)
268 {
269     propval_t result = PROP_UNKNOWN;
270 
271     if (b_idx < 0 || linebreak_rulessiz <= b_idx ||
272 	a_idx < 0 || linebreak_rulessiz <= a_idx);
273     else
274 	result = linebreak_rules[b_idx][a_idx];
275     if (result == PROP_UNKNOWN)
276 	return LINEBREAK_ACTION_DIRECT;
277     return result;
278 }
279 
linebreak_lbrule(propval_t b_idx,propval_t a_idx)280 propval_t linebreak_lbrule(propval_t b_idx, propval_t a_idx)
281 {
282     /* Resolve before-side class. */
283 
284     switch (b_idx) {
285     /* LB1: Resolve SA, SG, XX to AL; AI and CJ cannot be resolved. */
286     case LB_SA:
287     case LB_SG:
288     case LB_XX:
289     /* LB10: Resolve CM to AL. */
290     case LB_CM:
291 #if 0
292     /* Resolve HL to AL. */
293     case LB_HL:
294 #endif
295 	b_idx = LB_AL;
296 	break;
297     }
298 
299     /* Resolve after-side class. */
300 
301     switch (a_idx) {
302     /* LB1 */
303     case LB_SA:
304     case LB_SG:
305     case LB_XX:
306 	a_idx = LB_AL;
307 	break;
308 
309     /* LB9, LB10 */
310     case LB_CM:
311     /* LB9: Treat X CM as if it were X, with some exceptions. */
312 	switch (b_idx) {
313 	case LB_BK:
314 	case LB_CR:
315 	case LB_LF:
316 	case LB_NL:
317 	case LB_SP:
318 	case LB_ZW:
319 	    break;
320 
321 	default:
322 	    return LINEBREAK_ACTION_PROHIBITED;
323 	}
324 
325 	/* XXX Legacy-CM rule cannot be applied. */
326 
327 	/* LB10: Treat any remaining combining mark as AL. */
328 	a_idx = LB_AL;
329 	if (b_idx == LB_CM)
330 	    b_idx = LB_AL;
331 	break;
332 
333 #if 0
334     /* Resolve HL to AL. */
335     case LB_HL:
336 	a_idx = LB_AL;
337 	break;
338 #endif
339     }
340 
341     /* LB25, simplified:
342      * (CL|CP|NU) × (PO|PR)
343      * (PO|PR) × (OP|NU)
344      * (HY|IS|NU|SY) × NU
345      */
346     if (((b_idx == LB_CL || b_idx == LB_CP || b_idx == LB_NU) &&
347 	 (a_idx == LB_PO || a_idx == LB_PR)) ||
348 	((b_idx == LB_PO || b_idx == LB_PR) &&
349 	 (a_idx == LB_OP || a_idx == LB_NU)) ||
350 	((b_idx == LB_HY || b_idx == LB_IS || b_idx == LB_NU ||
351 	  b_idx == LB_SY) && a_idx == LB_NU))
352 	return LINEBREAK_ACTION_PROHIBITED;
353 
354     return _lbruleinfo(b_idx, a_idx);
355 }
356 
357 /** @fn gcstring_t** linebreak_break_partial(linebreak_t *lbobj, unistr_t *input)
358  *
359  * Perform line breaking algorithm with incremental inputs.
360  *
361  * @param[in] lbobj linebreak object.
362  * @param[in] input Unicode string; give NULL to specify end of input.
363  * @return array of (partial) broken grapheme cluster strings terminated by NULL.
364  * If internal error occurred, lbobj->errnum is set then NULL is returned.
365  */
366 static
_break_partial(linebreak_t * lbobj,unistr_t * input,size_t * lenp,int eot)367 gcstring_t **_break_partial(linebreak_t * lbobj, unistr_t * input,
368 			    size_t * lenp, int eot)
369 {
370     int state;
371     gcstring_t *str = NULL, *bufStr = NULL, *bufSpc = NULL;
372     double bufCols;
373     size_t bBeg, bLen, bCM, bSpc, aCM, urgEnd;
374     gcstring_t **results = NULL;
375     size_t reslen = 0;
376 
377     gcstring_t *s = NULL, *t = NULL, *beforeFrg = NULL, *fmt = NULL,
378 	*broken = NULL;
379     unistr_t unistr;
380     size_t i;
381     gcstring_t empty = { NULL, 0, NULL, 0, 0, lbobj };
382 
383     /***
384      *** Unread and additional input.
385      ***/
386 
387     unistr.str = lbobj->unread.str;
388     unistr.len = lbobj->unread.len;
389     lbobj->unread.str = NULL;
390     lbobj->unread.len = 0;
391     if (input != NULL && input->len != 0) {
392 	unichar_t *_u;
393 	if ((_u = realloc(unistr.str,
394 			  sizeof(unichar_t) * (unistr.len + input->len)))
395 	    == NULL) {
396 	    lbobj->errnum = errno;
397 	    free(unistr.str);
398 	    return NULL;
399 	} else
400 	    unistr.str = _u;
401 	memcpy(unistr.str + unistr.len, input->str,
402 	       sizeof(unichar_t) * input->len);
403 	unistr.len += input->len;
404     }
405 
406     /***
407      *** Preprocessing.
408      ***/
409 
410     /* perform user breaking */
411     if (lbobj->user_func != NULL)
412 	str = _user(lbobj, &unistr);
413     else
414 	str = _prep(lbobj, &unistr);
415     free(unistr.str);
416     if (str == NULL)
417 	return NULL;
418 
419     /* South East Asian complex breaking. */
420     errno = 0;
421     linebreak_southeastasian_flagbreak(str);
422     if (errno) {
423 	lbobj->errnum = errno;
424 	gcstring_DESTROY(str);
425 	return NULL;
426     }
427 
428     /* LB21a (as of 6.1.0): HL (HY | BA) × [^ CB] */
429     if (str != NULL && str->gclen) {
430 	propval_t lbc;
431 
432 	for (i = 0; i < str->gclen; i++) {
433 	    /* HL */
434 	    if ((lbc = gcstring_lbclass(str, i)) == LB_HL &&
435 		gcstring_lbclass_ext(str, i) == lbc)
436 		/* avoid non-CM grapheme extenders */
437 		i++;
438 	    else
439 		continue;
440 	    /* CM* */
441 	    while (i < str->gclen && gcstring_lbclass(str, i) == LB_CM)
442 		i++;
443 	    if (str->gclen <= i)
444 		break;
445 
446 	    /* (HY|BA) */
447 	    if (((lbc = gcstring_lbclass(str, i)) == LB_HY ||
448 		 lbc == LB_BA) && gcstring_lbclass_ext(str, i) == lbc)
449 		/* avoid non-CM grapheme extenders */
450 		i++;
451 	    else
452 		continue;
453 	    /* CM* */
454 	    while (i < str->gclen && gcstring_lbclass(str, i) == LB_CM)
455 		i++;
456 	    if (str->gclen <= i)
457 		break;
458 
459 	    /* [^CB] */
460 	    switch (gcstring_lbclass(str, i)) {
461 	    /* prohibit break by default */
462 	    case LB_BK:	/* LB6 */
463 	    case LB_CR:
464 	    case LB_LF:
465 	    case LB_NL:
466 	    case LB_SP:	/* LB7 */
467 	    case LB_ZW:
468 	    case LB_CM:	/* LB9 */
469 	    case LB_WJ:	/* LB11 */
470 	    /* allow break by default */
471 	    case LB_CB:	/* LB20 */
472 		continue;
473 	    }
474 
475 	    if (!str->gcstr[i].flag)
476 		str->gcstr[i].flag = LINEBREAK_FLAG_PROHIBIT_BEFORE;
477 	}
478     }
479 
480     /* LB25: not break in (PR|PO)? (OP|HY)? NU (NU|SY|IS)* (CL|CP)? (PR|PO)? */
481     /* FIXME:Avoid non-CM grapheme extenders */
482     if (str != NULL && str->gclen) {
483 	size_t st, et;
484 
485 	for (i = 0; i < str->gclen; i++) {
486 	    st = et = (size_t) - 1;
487 
488 	    /* (PR|PO)? */
489 	    switch (gcstring_lbclass(str, i)) {
490 	    case LB_PR:
491 	    case LB_PO:
492 		if (st == (size_t) - 1)
493 		    st = i;
494 	      LB25_PRPO_PREFIX:
495 		i++;
496 		/* CM* */
497 		while (i < str->gclen && gcstring_lbclass(str, i) == LB_CM)
498 		    i++;
499 		if (str->gclen <= i)
500 		    goto LB25_BREAK;
501 	    }
502 
503 	    /* (OP|HY)? */
504 	    switch (gcstring_lbclass(str, i)) {
505 	    case LB_OP:
506 	    case LB_HY:
507 		if (st == (size_t) - 1)
508 		    st = i;
509 	      LB25_OPHY_PREFIX:
510 		i++;
511 		/* CM* */
512 		while (i < str->gclen && gcstring_lbclass(str, i) == LB_CM)
513 		    i++;
514 		if (str->gclen <= i) {
515 		    if (eot)
516 			goto LB25_BREAK;
517 		    else
518 			goto LB25_FOUND;	/* save possible partial sequence. */
519 		}
520 	    }
521 
522 	    /* NU (NU|SY|IS)* */
523 	    switch (gcstring_lbclass(str, i)) {
524 	    case LB_NU:
525 		if (st == (size_t) - 1)
526 		    st = i;
527 		i++;
528 		/* (NU|SY|IS|CM)* */
529 		while (i < str->gclen)
530 		    switch (gcstring_lbclass(str, i)) {
531 		    case LB_NU:
532 		    case LB_SY:
533 		    case LB_IS:
534 		    case LB_CM:
535 			i++;
536 			break;
537 
538 		    /* (CL|CP) */
539 		    case LB_CL:
540 		    case LB_CP:
541 			goto LB25_CLCP_SUFFIX;
542 
543 		    /* (PR|PO) */
544 		    case LB_PR:
545 		    case LB_PO:
546 			goto LB25_PRPO_SUFFIX;
547 
548 		    default:
549 			goto LB25_FOUND;
550 		    }
551 		if (str->gclen <= i)
552 		    goto LB25_FOUND;
553 		break;
554 
555 	    case LB_PR:
556 	    case LB_PO:
557 		st = i;
558 		goto LB25_PRPO_PREFIX;
559 
560 	    case LB_OP:
561 	    case LB_HY:
562 		st = i;
563 		goto LB25_OPHY_PREFIX;
564 
565 	    default:
566 		continue;
567 	    }
568 
569 	    /* (CL|CP)? */
570 	    switch (gcstring_lbclass(str, i)) {
571 	    case LB_CL:
572 	    case LB_CP:
573 	      LB25_CLCP_SUFFIX:
574 		i++;
575 		/* CM* */
576 		while (i < str->gclen && gcstring_lbclass(str, i) == LB_CM)
577 		    i++;
578 		if (str->gclen <= i)
579 		    goto LB25_FOUND;
580 	    }
581 
582 	    /* (PR|PO)? */
583 	    switch (gcstring_lbclass(str, i)) {
584 	    case LB_PR:
585 	    case LB_PO:
586 	      LB25_PRPO_SUFFIX:
587 		et = i;
588 		i++;
589 		/* CM* */
590 		while (i < str->gclen && gcstring_lbclass(str, i) == LB_CM)
591 		    i++;
592 		if (str->gclen <= i)
593 		    goto LB25_FOUND;
594 	    }
595 
596 	  LB25_FOUND:
597 	    for (st++; st < i; st++) {
598 		if (!str->gcstr[st].flag)
599 		    str->gcstr[st].flag = LINEBREAK_FLAG_PROHIBIT_BEFORE;
600 	    }
601 	    /* match may be overwrapped */
602 	    if (et != (size_t) - 1) {
603 		i = st = et;
604 		et = (size_t) - 1;
605 		goto LB25_PRPO_PREFIX;
606 	    }
607 	}
608       LB25_BREAK:
609 	;
610     }
611 
612     /***
613      *** Initialize status.
614      ***/
615 
616     str->pos = 0;
617 
618     /*
619      * Line buffer.
620      * bufStr: Unbreakable text fragment.
621      * bufSpc: Trailing spaces.
622      * bufCols: Columns of bufStr: can be differ from gcstring_columns().
623      * state: Start of text/paragraph status.
624      *   0: Start of text not done.
625      *   1: Start of text done while start of paragraph not done.
626      *   2: Start of paragraph done while end of paragraph not done.
627      */
628     state = lbobj->state;
629 
630     unistr.str = lbobj->bufstr.str;
631     unistr.len = lbobj->bufstr.len;
632     lbobj->bufstr.str = NULL;
633     lbobj->bufstr.len = 0;
634     IF_NULL_THEN_ABORT(bufStr = gcstring_new(&unistr, lbobj));
635 
636     unistr.str = lbobj->bufspc.str;
637     unistr.len = lbobj->bufspc.len;
638     lbobj->bufspc.str = NULL;
639     lbobj->bufspc.len = 0;
640     IF_NULL_THEN_ABORT(bufSpc = gcstring_new(&unistr, lbobj));
641 
642     bufCols = lbobj->bufcols;
643 
644     /*
645      * Indexes and flags
646      * bBeg:  Start of unbreakable text fragment.
647      * bLen:  Length of unbreakable text fragment.
648      * bSpc:  Length of trailing spaces.
649      * urgEnd: End of substring broken by urgent breaking.
650      *
651      * ...read...| before :CM |  spaces  | after :CM |...unread...|
652      *           ^       ->bCM<-         ^      ->aCM<-           ^
653      *           |<-- bLen -->|<- bSpc ->|           ^            |
654      *          bBeg                 candidate    str->pos     end of
655      *                                breaking                  input
656      *                                 point
657      * `read' positions shall never be read again.
658      */
659     bBeg = bLen = bCM = bSpc = aCM = urgEnd = 0;
660 
661     /* Result. */
662     IF_NULL_THEN_ABORT(results = malloc(sizeof(gcstring_t **)));
663     results[0] = NULL;
664 
665     while (1) {
666 	/***
667 	 *** Chop off a pair of unbreakable character clusters from text.
668 	 ***/
669 	int action = 0;
670 	propval_t lbc;
671 	double newcols;
672 
673 	/* Go ahead reading input. */
674 	while (!gcstring_eos(str)) {
675 	    lbc = gcstring_lbclass(str, str->pos);
676 
677 	    /**
678 	     ** Append SP/ZW/eop to ``before'' buffer.
679 	     **/
680 	    switch (lbc) {
681 	    /* - Explicit breaks and non-breaks */
682 
683 	    /* LB7(1): × SP+ */
684 	    case LB_SP:
685 		gcstring_next(str);
686 		bSpc++;
687 
688 		/* End of input. */
689 		continue;	/* while (!gcstring_eos(str)) */
690 
691 	    /* - Mandatory breaks */
692 
693 	    /* LB4 - LB7: × SP* (BK | CR LF | CR | LF | NL) ! */
694 	    case LB_BK:
695 	    case LB_CR:
696 	    case LB_LF:
697 	    case LB_NL:
698 		gcstring_next(str);
699 		bSpc++;
700 		goto last_CHARACTER_PAIR;	/* while (!gcstring_eos(str)) */
701 
702 	    /* - Explicit breaks and non-breaks */
703 
704 	    /* LB7(2): × (SP* ZW+)+ */
705 	    case LB_ZW:
706 		gcstring_next(str);
707 		bLen += bSpc + 1;
708 		bCM = 0;
709 		bSpc = 0;
710 
711 		/* End of input */
712 		continue;	/* while (!gcstring_eos(str)) */
713 	    }
714 
715 	    /**
716 	     ** Then fill ``after'' buffer.
717 	     **/
718 
719 	    gcstring_next(str);
720 
721 	    /* skip to end of unbreakable fragment by user/complex/urgent
722 	     * breaking. */
723 	    while (!gcstring_eos(str) && str->gcstr[str->pos].flag &
724 		   LINEBREAK_FLAG_PROHIBIT_BEFORE)
725 		gcstring_next(str);
726 
727 	    /* - Combining marks   */
728 	    /* LB9: Treat X CM+ as if it were X
729 	     * where X is anything except BK, CR, LF, NL, SP or ZW
730 	     * (NB: Some CM characters may be single grapheme cluster
731 	     * since they have Grapheme_Cluster_Break property Control.) */
732 	    while (!gcstring_eos(str) &&
733 		   gcstring_lbclass(str, str->pos) == LB_CM) {
734 		gcstring_next(str);
735 		aCM++;
736 	    }
737 
738 	    /* - Start of text */
739 
740 	    /* LB2: sot × */
741 	    if (0 < bLen || 0 < bSpc)
742 		break;		/* while (!gcstring_eos(str)) */
743 
744 	    /* shift buffers. */
745 	    bLen = str->pos - bBeg;
746 	    bSpc = 0;
747 	    bCM = aCM;
748 	    aCM = 0;
749 	}			/* while (!gcstring_eos(str)) */
750       last_CHARACTER_PAIR:
751 
752 	/***
753 	 *** Determin line breaking action by classes of adjacent characters.
754 	 ***/
755 
756 	/* Mandatory break. */
757 	if (0 < bSpc &&
758 	    (lbc = gcstring_lbclass(str, bBeg + bLen + bSpc - 1)) != LB_SP
759 	    && (lbc != LB_CR || eot || !gcstring_eos(str))) {
760 	    /* CR at end of input may be part of CR LF therefore not be eop. */
761 	    action = LINEBREAK_ACTION_MANDATORY;
762 	    /* LB11, LB12 and tailorable rules LB13 - LB31.
763 	     * Or urgent breaking. */
764 	} else if (bBeg + bLen + bSpc < str->pos) {
765 	    if (str->gcstr[bBeg + bLen + bSpc].flag &
766 		LINEBREAK_FLAG_ALLOW_BEFORE)
767 		action = LINEBREAK_ACTION_DIRECT;
768 	    else if (str->gcstr[bBeg + bLen + bSpc].flag &
769 		     LINEBREAK_FLAG_PROHIBIT_BEFORE)
770 		action = LINEBREAK_ACTION_PROHIBITED;
771 	    else if (lbobj->options & LINEBREAK_OPTION_BREAK_INDENT &&
772 		     bLen == 0 && 0 < bSpc)
773 		/* Allow break at sot or after breaking,
774 		 * although rules don't tell it obviously. */
775 		action = LINEBREAK_ACTION_DIRECT;
776 	    else {
777 		propval_t blbc, albc;
778 		size_t btail;
779 
780 		if (bLen == 0)
781 		    btail = bBeg + bSpc - 1;	/* before buffer is SP only. */
782 		else
783 		    btail = bBeg + bLen - bCM - 1;	/* LB9 */
784 
785 		blbc = gcstring_lbclass_ext(str, btail);
786 		switch (blbc) {
787 		/* (SG and XX are already resolved). */
788 		/* LB1: Resolve AI and CJ. */
789 		case LB_AI:
790 		    blbc = (lbobj->options &
791 			    LINEBREAK_OPTION_EASTASIAN_CONTEXT) ?
792 			LB_ID : LB_AL;
793 		    break;
794 		case LB_CJ:
795 		    blbc = (lbobj->options &
796 			    LINEBREAK_OPTION_NONSTARTER_LOOSE) ?
797 			LB_ID : LB_NS;
798 		    break;
799 		/* LB1: SA is resolved to AL. */
800 		case LB_SA:
801 		    blbc = LB_AL;
802 		    break;
803 		/* LB10: Treat any remaining CM+ as if it were AL. */
804 		case LB_CM:
805 		    blbc = LB_AL;
806 		    break;
807 #if 0
808 		/* (As of 6.1.0): Treat HL as AL. */
809 		case LB_HL:
810 		    blbc = LB_AL;
811 		    break;
812 #endif
813 		/* Optionally, treat hangul syllable as if it were AL. */
814 		case LB_H2:
815 		case LB_H3:
816 		case LB_JL:
817 		case LB_JV:
818 		case LB_JT:
819 		    if (lbobj->options & LINEBREAK_OPTION_HANGUL_AS_AL)
820 			blbc = LB_AL;
821 		    break;
822 		}
823 
824 		albc = gcstring_lbclass(str, bBeg + bLen + bSpc);
825 		switch (albc) {
826 		/* (SG and XX are already resolved). */
827 		/* LB1: Resolve AI and CJ. */
828 		case LB_AI:
829 		    albc = (lbobj->options &
830 			    LINEBREAK_OPTION_EASTASIAN_CONTEXT) ?
831 			LB_ID : LB_AL;
832 		    break;
833 		case LB_CJ:
834 		    albc = (lbobj->options &
835 			    LINEBREAK_OPTION_NONSTARTER_LOOSE) ?
836 			LB_ID : LB_NS;
837 		    break;
838 		/* LB1: SA is resolved to AL. */
839 		case LB_SA:
840 		    albc = LB_AL;
841 		    break;
842 		/* LB10: Treat any remaining CM+ as if it were AL. */
843 		case LB_CM:
844 		    albc = LB_AL;
845 		    break;
846 #if 0
847 		/* (As of 6.1.0): Treat HL as AL. */
848 		case LB_HL:
849 		    albc = LB_AL;
850 		    break;
851 #endif
852 		/* Optionally, treat hangul syllable as if it were AL. */
853 		case LB_H2:
854 		case LB_H3:
855 		case LB_JL:
856 		case LB_JV:
857 		case LB_JT:
858 		    if (lbobj->options & LINEBREAK_OPTION_HANGUL_AS_AL)
859 			albc = LB_AL;
860 		    break;
861 		}
862 
863 		action = _lbruleinfo(blbc, albc);
864 	    }
865 
866 	    /* Check prohibited break. */
867 	    if (action == LINEBREAK_ACTION_PROHIBITED ||
868 		(action == LINEBREAK_ACTION_INDIRECT && bSpc == 0)) {
869 		/* When conjunction is expected to exceed charmax,
870 		 * try urgent breaking. */
871 		if (urgEnd < bBeg + bLen + bSpc &&
872 		    0 < lbobj->charmax &&
873 		    lbobj->charmax < str->gcstr[str->pos - 1].idx +
874 		    str->gcstr[str->pos - 1].len - str->gcstr[bBeg].idx) {
875 		    size_t charmax, chars;
876 
877 		    IF_NULL_THEN_ABORT(s = gcstring_substr(str, bBeg,
878 							   str->pos -
879 							   bBeg));
880 		    IF_NULL_THEN_ABORT(broken = _urgent_break(lbobj, s));
881 		    gcstring_DESTROY(s);
882 
883 		    /* If any of urgently broken fragments still
884 		     * exceed CharactersMax, force chop them. */
885 		    charmax = lbobj->charmax;
886 		    broken->pos = 0;
887 		    chars = gcstring_next(broken)->len;
888 		    while (!gcstring_eos(broken)) {
889 			if (broken->gcstr[broken->pos].flag &
890 			    LINEBREAK_FLAG_ALLOW_BEFORE)
891 			    chars = 0;
892 			else if (charmax <
893 				 chars + broken->gcstr[broken->pos].len) {
894 			    broken->gcstr[broken->pos].flag |=
895 				LINEBREAK_FLAG_ALLOW_BEFORE;
896 			    chars = 0;
897 			} else
898 			    chars += broken->gcstr[broken->pos].len;
899 			gcstring_next(broken);
900 		    }		/* while (!gcstring_eos(broken)) */
901 
902 		    urgEnd = broken->gclen;
903 		    gcstring_replace(str, 0, str->pos, broken);
904 		    gcstring_DESTROY(broken);
905 		    str->pos = 0;
906 		    bBeg = bLen = bCM = bSpc = aCM = 0;
907 		    continue;	/* while (1) */
908 		}
909 
910 		/* if (urgEnd < ...) */
911 		/* Otherwise, fragments may be conjuncted safely. Read more. */
912 		bLen = str->pos - bBeg;
913 		bSpc = 0;
914 		bCM = aCM;
915 		aCM = 0;
916 		continue;	/* while (1) */
917 	    }			/* if (action == ...) */
918 	}			/* if (0 < bSpc && ...) */
919 	/***
920 	 *** Check end of input.
921 	 ***/
922 	if (!eot && str->gclen <= bBeg + bLen + bSpc) {
923 	    /* Save status then output partial result. */
924 	    lbobj->bufstr.str = bufStr->str;
925 	    lbobj->bufstr.len = bufStr->len;
926 	    bufStr->str = NULL;
927 	    bufStr->len = 0;
928 	    gcstring_DESTROY(bufStr);
929 
930 	    lbobj->bufspc.str = bufSpc->str;
931 	    lbobj->bufspc.len = bufSpc->len;
932 	    bufSpc->str = NULL;
933 	    bufSpc->len = 0;
934 	    gcstring_DESTROY(bufSpc);
935 
936 	    lbobj->bufcols = bufCols;
937 
938 	    s = gcstring_substr(str, bBeg, str->gclen - bBeg);
939 	    lbobj->unread.str = s->str;
940 	    lbobj->unread.len = s->len;
941 	    s->str = NULL;
942 	    s->len = 0;
943 	    gcstring_DESTROY(s);
944 
945 	    lbobj->state = state;
946 
947 	    /* clenup. */
948 	    gcstring_DESTROY(str);
949 
950 	    if (lenp != NULL)
951 		*lenp = reslen;
952 	    return results;
953 	}
954 
955 	/* After all, possible actions are MANDATORY and arbitrary. */
956 
957 	/***
958 	 *** Examine line breaking action
959 	 ***/
960 
961 	IF_NULL_THEN_ABORT(beforeFrg = gcstring_substr(str, bBeg, bLen));
962 
963 	if (state == LINEBREAK_STATE_NONE) {	/* sot undone. */
964 	    /* Process start of text. */
965 	    IF_NULL_THEN_ABORT(fmt = _format(lbobj, LINEBREAK_STATE_SOT,
966 					     beforeFrg));
967 	    if (gcstring_cmp(beforeFrg, fmt) != 0) {
968 		s = gcstring_substr(str, bBeg + bLen, bSpc);
969 		gcstring_append(fmt, s);
970 		gcstring_DESTROY(s);
971 		s = gcstring_substr(str, bBeg + bLen + bSpc,
972 				    str->pos - (bBeg + bLen + bSpc));
973 		gcstring_append(fmt, s);
974 		gcstring_DESTROY(s);
975 		gcstring_replace(str, 0, str->pos, fmt);
976 		str->pos = 0;
977 		bBeg = bLen = bCM = bSpc = aCM = 0;
978 		urgEnd = 0;
979 
980 		state = LINEBREAK_STATE_SOT_FORMAT;
981 		gcstring_DESTROY(fmt);
982 		gcstring_DESTROY(beforeFrg);
983 
984 		continue;	/* while (1) */
985 	    }
986 	    gcstring_DESTROY(fmt);
987 	    state = LINEBREAK_STATE_SOL;
988 	} else if (state == LINEBREAK_STATE_SOT_FORMAT)
989 	    state = LINEBREAK_STATE_SOL;
990 	else if (state == LINEBREAK_STATE_SOT) {	/* sop undone. */
991 	    /* Process start of paragraph. */
992 	    IF_NULL_THEN_ABORT(fmt = _format(lbobj, LINEBREAK_STATE_SOP,
993 					     beforeFrg));
994 	    if (gcstring_cmp(beforeFrg, fmt) != 0) {
995 		s = gcstring_substr(str, bBeg + bLen, bSpc);
996 		gcstring_append(fmt, s);
997 		gcstring_DESTROY(s);
998 		s = gcstring_substr(str, bBeg + bLen + bSpc,
999 				    str->pos - (bBeg + bLen + bSpc));
1000 		gcstring_append(fmt, s);
1001 		gcstring_DESTROY(s);
1002 		gcstring_replace(str, 0, str->pos, fmt);
1003 		str->pos = 0;
1004 		bBeg = bLen = bCM = bSpc = aCM = 0;
1005 		urgEnd = 0;
1006 
1007 		state = LINEBREAK_STATE_SOP_FORMAT;
1008 		gcstring_DESTROY(fmt);
1009 		gcstring_DESTROY(beforeFrg);
1010 
1011 		continue;	/* while (1) */
1012 	    }
1013 	    gcstring_DESTROY(fmt);
1014 	    state = LINEBREAK_STATE_SOP;
1015 	} else if (state == LINEBREAK_STATE_SOP_FORMAT)
1016 	    state = LINEBREAK_STATE_SOP;
1017 
1018 	/***
1019 	 *** Check if arbitrary break is needed.
1020 	 ***/
1021 	newcols = _sizing(lbobj, bufCols, bufStr, bufSpc, beforeFrg);
1022 	if (newcols < 0.0) {
1023 	    IF_NULL_THEN_ABORT(NULL);
1024 	}
1025 	if (0 < lbobj->colmax && lbobj->colmax < newcols) {
1026 	    newcols = _sizing(lbobj, 0.0, &empty, &empty, beforeFrg);
1027 	    if (newcols < 0.0) {
1028 		IF_NULL_THEN_ABORT(NULL);
1029 	    }
1030 
1031 	    /**
1032 	     ** When arbitrary break is expected to generate a line shorter
1033 	     ** than colmin or, beforeFrg will exceed colmax, try urgent
1034 	     ** breaking.
1035 	     **/
1036 	    if (urgEnd < bBeg + bLen + bSpc) {
1037 		broken = NULL;
1038 
1039 		if (0.0 < bufCols && bufCols < lbobj->colmin) {
1040 		    gcstring_replace(beforeFrg, 0, 0, bufSpc);
1041 		    gcstring_replace(beforeFrg, 0, 0, bufStr);
1042 		    gcstring_shrink(bufSpc, 0);
1043 		    gcstring_shrink(bufStr, 0);
1044 		    bufCols = 0.0;
1045 		    IF_NULL_THEN_ABORT(broken = _urgent_break(lbobj,
1046 							      beforeFrg));
1047 		} else if (lbobj->colmax < newcols) {
1048 		    IF_NULL_THEN_ABORT(broken = _urgent_break(lbobj,
1049 							      beforeFrg));
1050 		}
1051 
1052 		if (broken != NULL) {
1053 		    s = gcstring_substr(str, bBeg + bLen, bSpc);
1054 		    gcstring_append(broken, s);
1055 		    gcstring_DESTROY(s);
1056 		    gcstring_replace(str, 0, bBeg + bLen + bSpc, broken);
1057 		    str->pos = 0;
1058 		    urgEnd = broken->gclen;
1059 		    bBeg = bLen = bCM = bSpc = aCM = 0;
1060 		    gcstring_DESTROY(broken);
1061 
1062 		    gcstring_DESTROY(beforeFrg);
1063 		    continue;	/* while (1) */
1064 		}
1065 	    }
1066 
1067 	    /**
1068 	     ** Otherwise, process arbitrary break.
1069 	     **/
1070 	    if (bufStr->len || bufSpc->len) {
1071 		gcstring_t **r;
1072 
1073 		IF_NULL_THEN_ABORT(r = realloc(results,
1074 					       sizeof(gcstring_t *) *
1075 					       (reslen + 2)));
1076 		(results = r)[reslen + 1] = NULL;
1077 		IF_NULL_THEN_ABORT(s = _format(lbobj, LINEBREAK_STATE_LINE,
1078 					       bufStr));
1079 		IF_NULL_THEN_ABORT(t = _format(lbobj, LINEBREAK_STATE_EOL,
1080 					       bufSpc));
1081 		IF_NULL_THEN_ABORT(results[reslen] =
1082 				   gcstring_concat(s, t));
1083 		reslen++;
1084 		gcstring_DESTROY(s);
1085 		gcstring_DESTROY(t);
1086 
1087 		IF_NULL_THEN_ABORT(fmt =
1088 				   _format(lbobj, LINEBREAK_STATE_SOL,
1089 					   beforeFrg));
1090 		if (gcstring_cmp(beforeFrg, fmt) != 0) {
1091 		    gcstring_DESTROY(beforeFrg);
1092 		    beforeFrg = fmt;
1093 		    newcols =
1094 			_sizing(lbobj, 0.0, &empty, &empty, beforeFrg);
1095 		    if (newcols < 0.0) {
1096 			IF_NULL_THEN_ABORT(NULL);
1097 		    }
1098 		} else
1099 		    gcstring_DESTROY(fmt);
1100 	    }
1101 	    gcstring_shrink(bufStr, 0);
1102 	    gcstring_append(bufStr, beforeFrg);
1103 
1104 	    gcstring_shrink(bufSpc, 0);
1105 	    s = gcstring_substr(str, bBeg + bLen, bSpc);
1106 	    gcstring_append(bufSpc, s);
1107 	    gcstring_DESTROY(s);
1108 
1109 	    bufCols = newcols;
1110 	/***
1111 	 *** Arbitrary break is not needed.
1112 	 ***/
1113 	} else {
1114 	    gcstring_append(bufStr, bufSpc);
1115 	    gcstring_append(bufStr, beforeFrg);
1116 
1117 	    gcstring_shrink(bufSpc, 0);
1118 	    s = gcstring_substr(str, bBeg + bLen, bSpc);
1119 	    gcstring_append(bufSpc, s);
1120 	    gcstring_DESTROY(s);
1121 
1122 	    bufCols = newcols;
1123 	}			/* if (0 < lbobj->colmax ... ) */
1124 
1125 	gcstring_DESTROY(beforeFrg);
1126 
1127 	/***
1128 	 *** Mandatory break or end-of-text.
1129 	 ***/
1130 	if (eot && str->gclen <= bBeg + bLen + bSpc)
1131 	    break;		/* while (1) */
1132 
1133 	if (action == LINEBREAK_ACTION_MANDATORY) {
1134 	    /* Process mandatory break. */
1135 	    gcstring_t **r;
1136 
1137 	    IF_NULL_THEN_ABORT(r = realloc(results,
1138 					   sizeof(gcstring_t *) *
1139 					   (reslen + 2)));
1140 	    (results = r)[reslen + 1] = NULL;
1141 	    IF_NULL_THEN_ABORT(s = _format(lbobj, LINEBREAK_STATE_LINE,
1142 					   bufStr));
1143 	    IF_NULL_THEN_ABORT(t = _format(lbobj, LINEBREAK_STATE_EOP,
1144 					   bufSpc));
1145 	    IF_NULL_THEN_ABORT(results[reslen] = gcstring_concat(s, t));
1146 	    reslen++;
1147 	    gcstring_DESTROY(s);
1148 	    gcstring_DESTROY(t);
1149 
1150 	    /* eop done then sop must be carried out. */
1151 	    state = LINEBREAK_STATE_SOT;
1152 
1153 	    gcstring_shrink(bufStr, 0);
1154 	    gcstring_shrink(bufSpc, 0);
1155 	    bufCols = 0.0;
1156 	}
1157 
1158 	/***
1159 	 *** Shift buffers.
1160 	 ***/
1161 	bBeg += bLen + bSpc;
1162 	bLen = str->pos - bBeg;
1163 	bSpc = 0;
1164 	bCM = aCM;
1165 	aCM = 0;
1166     }				/* while (1) */
1167 
1168     /***
1169      *** Process end of text.
1170      ***/
1171     {
1172 	gcstring_t **r;
1173 
1174 	IF_NULL_THEN_ABORT(r = realloc(results,
1175 				       sizeof(gcstring_t *) * (reslen +
1176 							       2)));
1177 	(results = r)[reslen + 1] = NULL;
1178 	IF_NULL_THEN_ABORT(s =
1179 			   _format(lbobj, LINEBREAK_STATE_LINE, bufStr));
1180 	IF_NULL_THEN_ABORT(t =
1181 			   _format(lbobj, LINEBREAK_STATE_EOT, bufSpc));
1182 	IF_NULL_THEN_ABORT(results[reslen] = gcstring_concat(s, t));
1183 	reslen++;
1184 	gcstring_DESTROY(s);
1185 	gcstring_DESTROY(t);
1186     }
1187 
1188     /* clenup. */
1189     gcstring_DESTROY(str);
1190     gcstring_DESTROY(bufStr);
1191     gcstring_DESTROY(bufSpc);
1192 
1193     /* Reset status then return the rest of result. */
1194     linebreak_reset(lbobj);
1195 
1196     if (lenp != NULL)
1197 	*lenp = reslen;
1198     return results;
1199 }
1200 
linebreak_break_partial(linebreak_t * lbobj,unistr_t * input)1201 gcstring_t **linebreak_break_partial(linebreak_t * lbobj, unistr_t * input)
1202 {
1203     return _break_partial(lbobj, input, NULL, (input == NULL));
1204 }
1205 
1206 /**
1207  * Perform line breaking algorithm on complete input.
1208  *
1209  * This function will consume heap size proportional to input size.
1210  * linebreak_break() is highly recommended.
1211  *
1212  * @param[in] lbobj linebreak object.
1213  * @param[in] input Unicode string.
1214  * @return array of broken grapheme cluster strings terminated by NULL.
1215  * If internal error occurred, lbobj->errnum is set then NULL is returned.
1216  */
linebreak_break_fast(linebreak_t * lbobj,unistr_t * input)1217 gcstring_t **linebreak_break_fast(linebreak_t * lbobj, unistr_t * input)
1218 {
1219     gcstring_t **ret;
1220 
1221     if (input == NULL) {
1222 	if ((ret = malloc(sizeof(gcstring_t *))) == NULL)
1223 	    lbobj->errnum = errno ? errno : ENOMEM;
1224 	else
1225 	    ret[0] = NULL;
1226 	return ret;
1227     }
1228 
1229     return _break_partial(lbobj, input, NULL, 1);
1230 }
1231 
1232 #define PARTIAL_LENGTH (1000)
1233 
1234 /** Perform line breaking algorithm on complete input.
1235  *
1236  * This function will consume constant size of heap.
1237  *
1238  * @param[in] lbobj linebreak object.
1239  * @param[in] input Unicode string.
1240  * @return array of broken grapheme cluster strings terminated by NULL.
1241  * If internal error occurred, lbobj->errnum is set then NULL is returned.
1242  */
linebreak_break(linebreak_t * lbobj,unistr_t * input)1243 gcstring_t **linebreak_break(linebreak_t * lbobj, unistr_t * input)
1244 {
1245     unistr_t unistr = { NULL, 0 };
1246     gcstring_t **ret, **appe, **r;
1247     size_t i, j, k, retlen, appelen;
1248 
1249     if ((ret = malloc(sizeof(gcstring_t *))) == NULL) {
1250 	lbobj->errnum = errno ? errno : ENOMEM;
1251 	return NULL;
1252     } else
1253 	ret[0] = NULL;
1254     if (input == NULL)
1255 	return ret;
1256     retlen = 0;
1257 
1258     unistr.len = PARTIAL_LENGTH;
1259     for (k = 0; PARTIAL_LENGTH < input->len - k; k += PARTIAL_LENGTH) {
1260 	unistr.str = input->str + k;
1261 	if ((appe = _break_partial(lbobj, &unistr, &appelen, 0)) == NULL) {
1262 	    for (i = 0; i < retlen; i++)
1263 		gcstring_destroy(ret[i]);
1264 	    free(ret);
1265 	    return NULL;
1266 	}
1267 	if (appelen) {
1268 	    if ((r = realloc(ret,
1269 			     sizeof(gcstring_t *) *
1270 			     (retlen + appelen + 1))) == NULL) {
1271 		lbobj->errnum = errno ? errno : ENOMEM;
1272 		for (i = 0; i < retlen; i++)
1273 		    gcstring_destroy(ret[i]);
1274 		free(ret);
1275 		for (j = 0; j < appelen; j++)
1276 		    gcstring_destroy(appe[j]);
1277 		free(appe);
1278 		return NULL;
1279 	    } else
1280 		ret = r;
1281 	    memcpy(ret + retlen, appe,
1282 		   sizeof(gcstring_t *) * (appelen + 1));
1283 	    retlen += appelen;
1284 	}
1285 	free(appe);
1286     }
1287     unistr.len = input->len - k;
1288     unistr.str = input->str + k;
1289     if (k < input->len) {
1290 	if ((appe = _break_partial(lbobj, &unistr, &appelen, 1)) == NULL) {
1291 	    for (i = 0; i < retlen; i++)
1292 		gcstring_destroy(ret[i]);
1293 	    free(ret);
1294 	    return NULL;
1295 	}
1296 	if (appelen) {
1297 	    if ((r = realloc(ret,
1298 			     sizeof(gcstring_t *) *
1299 			     (retlen + appelen + 1))) == NULL) {
1300 		lbobj->errnum = errno ? errno : ENOMEM;
1301 		for (i = 0; i < retlen; i++)
1302 		    gcstring_destroy(ret[i]);
1303 		free(ret);
1304 		for (j = 0; j < appelen; j++)
1305 		    gcstring_destroy(appe[j]);
1306 		free(appe);
1307 		return NULL;
1308 	    } else
1309 		ret = r;
1310 	    memcpy(ret + retlen, appe,
1311 		   sizeof(gcstring_t *) * (appelen + 1));
1312 	    retlen += appelen;
1313 	}
1314 	free(appe);
1315     }
1316 
1317     return ret;
1318 }
1319 
1320 /** Perform line breaking algorithm on UTF-8 text
1321  *
1322  * This function will consume constant size of heap.
1323  *
1324  * @param[in] lbobj linebreak object.
1325  * @param[in] input UTF-8 string, must not be NULL.
1326  * @param[in] len length of UTF-8 string.
1327  * @param[in] check check input.  See sombok_decode_utf8().
1328  * @return array of broken grapheme cluster strings terminated by NULL.
1329  * If internal error occurred, lbobj->errnum is set then NULL is returned.
1330  */
linebreak_break_from_utf8(linebreak_t * lbobj,char * input,size_t len,int check)1331 gcstring_t **linebreak_break_from_utf8(linebreak_t * lbobj,
1332 				       char *input, size_t len, int check)
1333 {
1334     unistr_t unistr = { NULL, 0 };
1335     gcstring_t **ret;
1336 
1337     if (input == NULL) {
1338 	lbobj->errnum = EINVAL;
1339 	return NULL;
1340     }
1341     if (sombok_decode_utf8(&unistr, 0, input, len, check) == NULL)
1342 	return NULL;
1343 
1344     ret = linebreak_break(lbobj, &unistr);
1345     free(unistr.str);
1346     return ret;
1347 }
1348 
linebreak_free_result(gcstring_t ** result,int deep)1349 void linebreak_free_result(gcstring_t ** result, int deep)
1350 {
1351     size_t i;
1352 
1353     if (result == NULL)
1354 	return;
1355     if (deep)
1356 	for (i = 0; result[i] != NULL; i++)
1357 	    gcstring_destroy(result[i]);
1358     free(result);
1359 }
1360