1 /* Copyright (C) 2005 Morten K. Poulsen <morten at afdelingp.dk>
2  *
3  * $Id$
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy
6  * of this software and associated documentation files (the "Software"), to
7  * deal in the Software without restriction, including without limitation the
8  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
9  * sell copies of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <unistd.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <fcntl.h>
28 #include <stdlib.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/wait.h>
32 #include <ctype.h>
33 #include <iconv.h>
34 
35 #include "mlmmj.h"
36 #include "unistr.h"
37 #include "log_error.h"
38 #include "memory.h"
39 
40 /* This is allocated on the stack, so it can't be too big. */
41 #define ICONV_BUFFER_SIZE 160
42 
43 
unistr_new(void)44 unistr *unistr_new(void)
45 {
46 	unistr *ret;
47 
48 	ret = mymalloc(sizeof(unistr));
49 	ret->len = 0;
50 	ret->alloc_len = 64;
51 	ret->chars = mymalloc(ret->alloc_len * sizeof(unistr_char));
52 
53 	return ret;
54 }
55 
56 
unistr_free(unistr * str)57 void unistr_free(unistr *str)
58 {
59 	if (!str)
60 		return;
61 	myfree(str->chars);
62 	myfree(str);
63 }
64 
65 
unistr_cmp(const unistr * str1,const unistr * str2)66 int unistr_cmp(const unistr *str1, const unistr *str2)
67 {
68 	unsigned int i;
69 
70 	for (i=0; i<str1->len; i++) {
71 		if (str1->chars[i] < str2->chars[i]) {
72 			return -1;
73 		} else if (str1->chars[i] > str2->chars[i]) {
74 			return 1;
75 		}
76 	}
77 	if (str2->len > str1->len) {
78 		return 1;
79 	}
80 	return 0;
81 }
82 
83 
unistr_dup(const unistr * str)84 unistr *unistr_dup(const unistr *str)
85 {
86 	unistr *ret;
87 	unsigned int i;
88 
89 	ret = unistr_new();
90 	for (i=0; i<str->len; i++) {
91 		unistr_append_char(ret, str->chars[i]);
92 	}
93 
94 	return ret;
95 }
96 
97 
unistr_append_char(unistr * str,unistr_char uc)98 void unistr_append_char(unistr *str, unistr_char uc)
99 {
100 	if (str->len >= str->alloc_len) {
101 		str->alloc_len *= 2;
102 		str->chars = myrealloc(str->chars, str->alloc_len * sizeof(unistr_char));
103 	}
104 	str->chars[str->len++] = uc;
105 }
106 
107 
unistr_append_usascii(unistr * str,const char * binary,size_t bin_len)108 void unistr_append_usascii(unistr *str, const char *binary, size_t bin_len)
109 {
110 	unsigned int i;
111 
112 	for (i=0; i<bin_len; i++) {
113 		if ((unsigned char)binary[i] > 0x7F) {
114 			unistr_append_char(str, '?');
115 		} else {
116 			unistr_append_char(str, (unsigned char)binary[i]);
117 		}
118 	}
119 }
120 
121 
unistr_append_utf8(unistr * str,const char * binary,size_t bin_len)122 void unistr_append_utf8(unistr *str, const char *binary, size_t bin_len)
123 {
124 	unsigned int i, j;
125 	unistr_char ch;
126 	unsigned char *bin = (unsigned char *)binary;
127 
128 	for (i=0; i<bin_len; i++) {
129 		if (bin[i] <= 0x7F) {  /* 1 */
130 			unistr_append_char(str, bin[i]);
131 		} else {
132 			if ((bin[i] & 224) == 192) {  /* 2 */
133 				ch = bin[i] & 31;
134 				j = 1;
135 			} else if ((bin[i] & 240) == 224) {  /* 3 */
136 				ch = bin[i] & 15;
137 				j = 2;
138 			} else if ((bin[i] & 248) == 240) {  /* 4 */
139 				ch = bin[i] & 7;
140 				j = 3;
141 			} else if ((bin[i] & 252) == 248) {  /* 5 */
142 				ch = bin[i] & 3;
143 				j = 4;
144 			} else if ((bin[i] & 254) == 252) {  /* 6 */
145 				ch = bin[i] & 1;
146 				j = 5;
147 			} else {
148 				/* invalid byte sequence */
149 				unistr_append_char(str, '?');
150 				continue;
151 			}
152 			if (ch == 0) {
153 				/* invalid encoding, no data bits set in first byte */
154 				unistr_append_char(str, '?');
155 				continue;
156 			}
157 			for (;j>0; j--) {
158 				i++;
159 				ch <<= 6;
160 				if ((bin[i] & 192) != 128) {
161 					/* invalid byte sequence */
162 					ch = '?';
163 					break;
164 				}
165 				ch |= bin[i] & 63;
166 			}
167 			unistr_append_char(str, ch);
168 		}
169 	}
170 }
171 
172 
unistr_append_iso88591(unistr * str,const char * binary,size_t bin_len)173 void unistr_append_iso88591(unistr *str, const char *binary, size_t bin_len)
174 {
175 	unsigned int i;
176 
177 	for (i=0; i<bin_len; i++) {
178 		if (binary[i] == 0x00) {
179 			unistr_append_char(str, '?');
180 		} else {
181 			unistr_append_char(str, (unsigned char)binary[i]);
182 		}
183 	}
184 }
185 
186 
unistr_append_iconv(unistr * str,char * binary,size_t bin_len,const char * charset)187 void unistr_append_iconv(unistr *str, char *binary, size_t bin_len,
188 		const char * charset)
189 {
190 	char bytes[ICONV_BUFFER_SIZE];
191 	char * buffer;
192 	size_t bufferleft;
193 	iconv_t cd;
194 
195 	cd = iconv_open("UTF-8", charset);
196 	if (cd == (iconv_t)-1) {
197 		unistr_append_usascii(str, "???", 3);
198 		return;
199 	}
200 
201 	while (bin_len > 0) {
202 		buffer = bytes;
203 		bufferleft = ICONV_BUFFER_SIZE;
204 		if (iconv(cd, &binary, &bin_len, &buffer, &bufferleft) == (size_t)-1) {
205 			if (errno == EILSEQ) {
206 				/* illegal sequence; try to recover */
207 				unistr_append_utf8(str, bytes, ICONV_BUFFER_SIZE - bufferleft);
208 				unistr_append_usascii(str, "?", 1);
209 				bin_len--;
210 				binary++;
211 				continue;
212 			} else if (errno == EINVAL) {
213 				/* incomplete sequence; we're done */
214 				unistr_append_usascii(str, "?", 1);
215 				break;
216 			} else if (errno != E2BIG) {
217 				/* some other error; abort */
218 				unistr_append_usascii(str, "???", 1);
219 				break;
220 			}
221 		}
222 		/* success or buffer full */
223 		unistr_append_utf8(str, bytes, ICONV_BUFFER_SIZE - bufferleft);
224 	}
225 	iconv_close(cd);
226 }
227 
228 
unistr_dump(const unistr * str)229 void unistr_dump(const unistr *str)
230 {
231 	unsigned int i;
232 
233 	printf("unistr_dump(%p)\n", (void *)str);
234 	printf(" ->len = %lu\n", (unsigned long)str->len);
235 	printf(" ->alloc_len = %lu\n", (unsigned long)str->alloc_len);
236 	printf(" ->chars [ ");
237 	for (i=0; i<str->len; i++) {
238 		if ((str->chars[i] <= 0x7F) && (str->chars[i] != '\n')) {
239 			printf("'%c' ", str->chars[i]);
240 		} else {
241 			printf("0x%02X ", str->chars[i]);
242 		}
243 	}
244 	printf("]\n");
245 }
246 
247 
unistr_to_utf8(const unistr * str)248 char *unistr_to_utf8(const unistr *str)
249 {
250 	unsigned int i;
251 	size_t len = 0;
252 	char *ret;
253 	char *p;
254 
255 	for (i=0; i<str->len; i++) {
256 		if (str->chars[i] <= 0x7F) {
257 			len++;
258 		} else if (str->chars[i] <= 0x7FF) {
259 			len += 2;
260 		} else if (str->chars[i] <= 0xFFFF) {
261 			len += 3;
262 		} else if (str->chars[i] <= 0x1FFFFF) {
263 			len += 4;
264 		} else if (str->chars[i] <= 0x3FFFFFF) {
265 			len += 5;
266 		} else if (str->chars[i] <= 0x7FFFFFFF) {
267 			len += 6;
268 		} else {
269 			errno = 0;
270 			log_error(LOG_ARGS, "unistr_to_utf8(): can not utf-8 encode"
271 					"U+%04X", str->chars[i]);
272 			return mystrdup("");
273 		}
274 	}
275 	len++;  /* NUL */
276 
277 	ret = mymalloc(len);
278 	p = ret;
279 
280 	for (i=0; i<str->len; i++) {
281 		if (str->chars[i] <= 0x7F) {  /* 1 */
282 			*(p++) = str->chars[i];
283 		} else if (str->chars[i] <= 0x7FF) {  /* 2 */
284 			*(p++) = 192 + ((str->chars[i] & 1984) >> 6);
285 			*(p++) = 128 + (str->chars[i] & 63);
286 		} else if (str->chars[i] <= 0xFFFF) {  /* 3 */
287 			*(p++) = 224 + ((str->chars[i] & 61440) >> 12);
288 			*(p++) = 128 + ((str->chars[i] & 4032) >> 6);
289 			*(p++) = 128 + (str->chars[i] & 63);
290 		} else if (str->chars[i] <= 0x1FFFFF) {  /* 4 */
291 			*(p++) = 240 + ((str->chars[i] & 1835008) >> 18);
292 			*(p++) = 128 + ((str->chars[i] & 258048) >> 12);
293 			*(p++) = 128 + ((str->chars[i] & 4032) >> 6);
294 			*(p++) = 128 + (str->chars[i] & 63);
295 		} else if (str->chars[i] <= 0x3FFFFFF) {  /* 5 */
296 			*(p++) = 248 + ((str->chars[i] & 50331648) >> 24);
297 			*(p++) = 128 + ((str->chars[i] & 16515072) >> 18);
298 			*(p++) = 128 + ((str->chars[i] & 258048) >> 12);
299 			*(p++) = 128 + ((str->chars[i] & 4032) >> 6);
300 			*(p++) = 128 + (str->chars[i] & 63);
301 		} else if (str->chars[i] <= 0x7FFFFFFF) {  /* 6 */
302 			*(p++) = 252 + ((str->chars[i] & 1073741824) >> 30);
303 			*(p++) = 128 + ((str->chars[i] & 1056964608) >> 24);
304 			*(p++) = 128 + ((str->chars[i] & 16515072) >> 18);
305 			*(p++) = 128 + ((str->chars[i] & 258048) >> 12);
306 			*(p++) = 128 + ((str->chars[i] & 4032) >> 6);
307 			*(p++) = 128 + (str->chars[i] & 63);
308 		} else {
309 			errno = 0;
310 			log_error(LOG_ARGS, "unistr_to_utf8(): can not utf-8 encode"
311 					"U+%04X", str->chars[i]);
312 		}
313 	}
314 	*(p++) = '\0';
315 
316 	return ret;
317 }
318 
319 
hexval(char ch)320 static int hexval(char ch)
321 {
322 	ch = tolower(ch);
323 
324 	if ((ch >= 'a') && (ch <= 'f')) {
325 		return 10 + ch - 'a';
326 	}
327 
328 	if ((ch >= '0') && (ch <= '9')) {
329 		return ch - '0';
330 	}
331 
332 	return 0;
333 }
334 
335 
decode_qp(char * str,char ** binary,size_t * bin_len)336 static void decode_qp(char *str, char **binary, size_t *bin_len)
337 {
338 	int i;
339 
340 	/* decoded string will never be longer, and we don't include a NUL */
341 	*binary = mymalloc(strlen(str));
342 	*bin_len = 0;
343 
344 	for (i=0; str[i]; i++) {
345 		if ((str[i] == '=') && isxdigit(str[i+1]) && isxdigit(str[i+2])) {
346 			(*binary)[(*bin_len)++] = (hexval(str[i+1]) << 4) + hexval(str[i+2]);
347 			i += 2;
348 		} else if (str[i] == '_') {
349 			(*binary)[(*bin_len)++] = 0x20;
350 		} else {
351 			(*binary)[(*bin_len)++] = str[i];
352 		}
353 	}
354 }
355 
356 
decode_base64(char * str,char ** binary,size_t * bin_len)357 static void decode_base64(char *str, char **binary, size_t *bin_len)
358 {
359 	int tab[] = {
360 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
361 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
362 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
363 		52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
364 		-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
365 		15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
366 		-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
367 		41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
368 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
369 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
370 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
371 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
372 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
373 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
374 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
375 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
376 	};
377 	size_t len;
378 	unsigned int i;
379 	unsigned int out;
380 	int out_numbits;
381 	int val;
382 
383 	/* decoded string will never be longer, and we don't include a NUL */
384 	len = strlen(str);
385 	*binary = mymalloc(len);
386 	*bin_len = 0;
387 
388 	out = 0;
389 	out_numbits = 0;
390 	for (i=0; i<strlen(str); i++) {
391 		val = tab[(unsigned char)str[i]];
392 		if (val == -1)
393 			continue;
394 		out <<= 6;
395 		out |= val;
396 		out_numbits += 6;
397 		if (out_numbits >= 8) {
398 			(*binary)[(*bin_len)++] = (out >> (out_numbits - 8)) & 255;
399 			out_numbits -= 8;
400 		}
401 	}
402 }
403 
404 
405 /* wsp, if not NULL, is an earlier offset into the same string as word,
406  * to whitespace that should only be included if word is not encoded. */
header_decode_word(char * wsp,char * word,unistr * ret)407 static int header_decode_word(char *wsp, char *word, unistr *ret)
408 {
409 	char *my_word;
410 	char *charset, *encoding, *string, *end;
411 	char *binary;
412 	size_t bin_len;
413 
414 	if (wsp == NULL)
415 		wsp = word;
416 
417 	if ((word[0] != '=') || (word[1] != '?')) {
418 		unistr_append_usascii(ret, wsp, strlen(wsp));
419 		return 0;
420 	}
421 
422 	my_word = mystrdup(word);
423 
424 	charset = my_word + 2;
425 
426 	if ((encoding = strchr(charset, '?')) == NULL) {
427 		/* missing encoding */
428 		unistr_append_usascii(ret, wsp, word-wsp);
429 		unistr_append_usascii(ret, "???", 3);
430 		myfree(my_word);
431 		return 0;
432 	}
433 	*(encoding++) = '\0';
434 
435 	if ((string = strchr(encoding, '?')) == NULL) {
436 		/* missing string */
437 		unistr_append_usascii(ret, wsp, word-wsp);
438 		unistr_append_usascii(ret, "???", 3);
439 		myfree(my_word);
440 		return 0;
441 	}
442 	*(string++) = '\0';
443 
444 	if ((end = strchr(string, '?')) == NULL) {
445 		/* missing end */
446 		unistr_append_usascii(ret, wsp, word-wsp);
447 		unistr_append_usascii(ret, "???", 3);
448 		myfree(my_word);
449 		return 0;
450 	}
451 	*(end++) = '\0';
452 	if ((end[0] != '=') || (end[1] != '\0')) {
453 		/* broken end */
454 		unistr_append_usascii(ret, wsp, word-wsp);
455 		unistr_append_usascii(ret, "???", 3);
456 		myfree(my_word);
457 		return 0;
458 	}
459 
460 	if (tolower(encoding[0]) == 'q') {
461 		decode_qp(string, &binary, &bin_len);
462 	} else if (tolower(encoding[0]) == 'b') {
463 		decode_base64(string, &binary, &bin_len);
464 	} else {
465 		/* unknown encoding */
466 		unistr_append_usascii(ret, wsp, word-wsp);
467 		unistr_append_usascii(ret, "???", 3);
468 		myfree(my_word);
469 		return 0;
470 	}
471 
472 	if (strcasecmp(charset, "us-ascii") == 0) {
473 		unistr_append_usascii(ret, binary, bin_len);
474 	} else if (strcasecmp(charset, "utf-8") == 0) {
475 		unistr_append_utf8(ret, binary, bin_len);
476 	} else if (strcasecmp(charset, "iso-8859-1") == 0) {
477 		unistr_append_iso88591(ret, binary, bin_len);
478 	} else {
479 		unistr_append_iconv(ret, binary, bin_len, charset);
480 	}
481 
482 	myfree(my_word);
483 	myfree(binary);
484 
485 	return 1;
486 }
487 
488 
489 /* IN: "   =?iso-8859-1?Q?hyggem=F8de?= torsdag   "
490  * OUT: "hyggem\xC3\xB8de torsdag"
491  */
unistr_header_to_utf8(const char * str)492 char *unistr_header_to_utf8(const char *str)
493 {
494 	char *my_str;
495 	char *word;
496 	char *p;
497 	char c;
498 	char *wsp = NULL;
499 	int decoded = 0;
500 	unistr *us;
501 	char *ret;
502 
503 	my_str = mystrdup(str);
504 	us = unistr_new();
505 
506 	p = my_str + strspn(my_str, " \t\n");
507 	wsp = p;
508 	while (*p) {
509 		if (!decoded) {
510 			unistr_append_usascii(us, wsp, p-wsp);
511 			wsp = NULL;
512 		}
513 		word = p;
514 		p += strcspn(p, " \t\n");
515 		c = *p;
516 		*p = '\0';
517 		decoded = header_decode_word(wsp, word, us);
518 		*p = c;
519 		wsp = p;
520 		p += strspn(p, " \t\n");
521 	}
522 
523 	myfree(my_str);
524 
525 	ret = unistr_to_utf8(us);
526 	unistr_free(us);
527 
528 	return ret;
529 }
530 
531 
is_ok_in_header(char ch)532 static int is_ok_in_header(char ch)
533 {
534 	if ((ch >= 'a') && (ch <= 'z')) return 1;
535 	if ((ch >= 'A') && (ch <= 'Z')) return 1;
536 	if ((ch >= '0') && (ch <= '9')) return 1;
537 	if (ch == '.') return 1;
538 	if (ch == ',') return 1;
539 	if (ch == ':') return 1;
540 	if (ch == ';') return 1;
541 	if (ch == '-') return 1;
542 	if (ch == ' ') return 1;
543 	return 0;
544 }
545 
546 
547 /* IN: "   hyggem\xC3\xB8de torsdag   "
548  * OUT: "=?utf-8?Q?hyggem=C3=B8de_torsdag?="
549  */
unistr_utf8_to_header(const char * str)550 char *unistr_utf8_to_header(const char *str)
551 {
552 	unistr *us;
553 	char *my_str;
554 	char *ret;
555 	char *wsp = NULL;
556 	char *p;
557 	int clean;
558 	char buf[4];
559 
560 	my_str = mystrdup(str);
561 
562 	/* trim whitespace and see if the header is clean */
563 
564 	ret = my_str + strspn(my_str, " \t\n");
565 
566 	clean = 1;
567 	for (p=ret; *p; p++) {
568 		if (*p == ' ' || *p == '\t' || *p == '\n') {
569 			if (wsp == NULL)
570 				wsp = p;
571 		} else {
572 			wsp = NULL;
573 		}
574 		if (clean && !is_ok_in_header(*p))
575 			clean = 0;
576 	}
577 	if (wsp != NULL)
578 		*wsp = '\0';
579 
580 	if (clean) {
581 		ret = mystrdup(ret);
582 		myfree(my_str);
583 		return ret;
584 	}
585 
586 	us = unistr_new();
587 
588 	unistr_append_usascii(us, "=?utf-8?q?", 10);
589 	for (p=ret; *p; p++) {
590 		if (*p == 0x20) {
591 			unistr_append_char(us, '_');
592 		} else if (is_ok_in_header(*p)) {
593 			unistr_append_char(us, *p);
594 		} else {
595 			snprintf(buf, sizeof(buf), "=%02X", (unsigned char)*p);
596 			unistr_append_usascii(us, buf, 3);
597 		}
598 	}
599 	unistr_append_usascii(us, "?=", 2);
600 
601 	ret = unistr_to_utf8(us);
602 	unistr_free(us);
603 	myfree(my_str);
604 
605 	return ret;
606 }
607 
608 
609 /* IN: "hyggem\\u00F8de torsdag"
610  * OUT: "hyggem\xC3\xB8de torsdag"
611  */
unistr_escaped_to_utf8(const char * str)612 char *unistr_escaped_to_utf8(const char *str)
613 {
614 	unistr_char ch;
615 	unistr *us;
616 	char *ret;
617 	char u[5];
618 	int len;
619 	int skip = 0;
620 
621 	us = unistr_new();
622 
623 	while (*str) {
624 		if (*str == '\\') {
625 			str++;
626 			if (*str == 'u' && !skip) {
627 				str++;
628 				if (!isxdigit(str[0]) ||
629 						!isxdigit(str[1]) ||
630 						!isxdigit(str[2]) ||
631 						!isxdigit(str[3])) {
632 					unistr_append_char(us, '?');
633 					continue;
634 				}
635 				u[0] = *str++;
636 				u[1] = *str++;
637 				u[2] = *str++;
638 				u[3] = *str++;
639 				u[4] = '\0';
640 				ch = strtol(u, NULL, 16);
641 				unistr_append_char(us, ch);
642 				continue;
643 			} else {
644 				unistr_append_char(us, '\\');
645 				/* Avoid processing the second backslash of a
646 				 * double-backslash; but if this was a such a
647 				 * one, go back to normal */
648 				skip = !skip;
649 				continue;
650 			}
651 		} else {
652 			u[0] = *str;
653 			len = 1;
654 			str++;
655 			while (*str && (unsigned char)u[0] > 0x7F) {
656 				u[0] = *str;
657 				len++;
658 				str++;
659 			}
660 			unistr_append_utf8(us, str - len, len);
661 		}
662 	}
663 
664 	ret = unistr_to_utf8(us);
665 	unistr_free(us);
666 
667 	return ret;
668 }
669