1 /*-------------------------------------------------------------------------
2  *
3  *	  Utility functions for conversion procs.
4  *
5  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  *
8  * IDENTIFICATION
9  *	  src/backend/utils/mb/conv.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 #include "mb/pg_wchar.h"
15 
16 
17 /*
18  * local2local: a generic single byte charset encoding
19  * conversion between two ASCII-superset encodings.
20  *
21  * l points to the source string of length len
22  * p is the output area (must be large enough!)
23  * src_encoding is the PG identifier for the source encoding
24  * dest_encoding is the PG identifier for the target encoding
25  * tab holds conversion entries for the source charset
26  * starting from 128 (0x80). each entry in the table holds the corresponding
27  * code point for the target charset, or 0 if there is no equivalent code.
28  */
29 void
local2local(const unsigned char * l,unsigned char * p,int len,int src_encoding,int dest_encoding,const unsigned char * tab)30 local2local(const unsigned char *l,
31 			unsigned char *p,
32 			int len,
33 			int src_encoding,
34 			int dest_encoding,
35 			const unsigned char *tab)
36 {
37 	unsigned char c1,
38 				c2;
39 
40 	while (len > 0)
41 	{
42 		c1 = *l;
43 		if (c1 == 0)
44 			report_invalid_encoding(src_encoding, (const char *) l, len);
45 		if (!IS_HIGHBIT_SET(c1))
46 			*p++ = c1;
47 		else
48 		{
49 			c2 = tab[c1 - HIGHBIT];
50 			if (c2)
51 				*p++ = c2;
52 			else
53 				report_untranslatable_char(src_encoding, dest_encoding,
54 										   (const char *) l, len);
55 		}
56 		l++;
57 		len--;
58 	}
59 	*p = '\0';
60 }
61 
62 /*
63  * LATINn ---> MIC when the charset's local codes map directly to MIC
64  *
65  * l points to the source string of length len
66  * p is the output area (must be large enough!)
67  * lc is the mule character set id for the local encoding
68  * encoding is the PG identifier for the local encoding
69  */
70 void
latin2mic(const unsigned char * l,unsigned char * p,int len,int lc,int encoding)71 latin2mic(const unsigned char *l, unsigned char *p, int len,
72 		  int lc, int encoding)
73 {
74 	int			c1;
75 
76 	while (len > 0)
77 	{
78 		c1 = *l;
79 		if (c1 == 0)
80 			report_invalid_encoding(encoding, (const char *) l, len);
81 		if (IS_HIGHBIT_SET(c1))
82 			*p++ = lc;
83 		*p++ = c1;
84 		l++;
85 		len--;
86 	}
87 	*p = '\0';
88 }
89 
90 /*
91  * MIC ---> LATINn when the charset's local codes map directly to MIC
92  *
93  * mic points to the source string of length len
94  * p is the output area (must be large enough!)
95  * lc is the mule character set id for the local encoding
96  * encoding is the PG identifier for the local encoding
97  */
98 void
mic2latin(const unsigned char * mic,unsigned char * p,int len,int lc,int encoding)99 mic2latin(const unsigned char *mic, unsigned char *p, int len,
100 		  int lc, int encoding)
101 {
102 	int			c1;
103 
104 	while (len > 0)
105 	{
106 		c1 = *mic;
107 		if (c1 == 0)
108 			report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
109 		if (!IS_HIGHBIT_SET(c1))
110 		{
111 			/* easy for ASCII */
112 			*p++ = c1;
113 			mic++;
114 			len--;
115 		}
116 		else
117 		{
118 			int			l = pg_mic_mblen(mic);
119 
120 			if (len < l)
121 				report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
122 										len);
123 			if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
124 				report_untranslatable_char(PG_MULE_INTERNAL, encoding,
125 										   (const char *) mic, len);
126 			*p++ = mic[1];
127 			mic += 2;
128 			len -= 2;
129 		}
130 	}
131 	*p = '\0';
132 }
133 
134 
135 /*
136  * ASCII ---> MIC
137  *
138  * While ordinarily SQL_ASCII encoding is forgiving of high-bit-set
139  * characters, here we must take a hard line because we don't know
140  * the appropriate MIC equivalent.
141  */
142 void
pg_ascii2mic(const unsigned char * l,unsigned char * p,int len)143 pg_ascii2mic(const unsigned char *l, unsigned char *p, int len)
144 {
145 	int			c1;
146 
147 	while (len > 0)
148 	{
149 		c1 = *l;
150 		if (c1 == 0 || IS_HIGHBIT_SET(c1))
151 			report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
152 		*p++ = c1;
153 		l++;
154 		len--;
155 	}
156 	*p = '\0';
157 }
158 
159 /*
160  * MIC ---> ASCII
161  */
162 void
pg_mic2ascii(const unsigned char * mic,unsigned char * p,int len)163 pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len)
164 {
165 	int			c1;
166 
167 	while (len > 0)
168 	{
169 		c1 = *mic;
170 		if (c1 == 0 || IS_HIGHBIT_SET(c1))
171 			report_untranslatable_char(PG_MULE_INTERNAL, PG_SQL_ASCII,
172 									   (const char *) mic, len);
173 		*p++ = c1;
174 		mic++;
175 		len--;
176 	}
177 	*p = '\0';
178 }
179 
180 /*
181  * latin2mic_with_table: a generic single byte charset encoding
182  * conversion from a local charset to the mule internal code.
183  *
184  * l points to the source string of length len
185  * p is the output area (must be large enough!)
186  * lc is the mule character set id for the local encoding
187  * encoding is the PG identifier for the local encoding
188  * tab holds conversion entries for the local charset
189  * starting from 128 (0x80). each entry in the table holds the corresponding
190  * code point for the mule encoding, or 0 if there is no equivalent code.
191  */
192 void
latin2mic_with_table(const unsigned char * l,unsigned char * p,int len,int lc,int encoding,const unsigned char * tab)193 latin2mic_with_table(const unsigned char *l,
194 					 unsigned char *p,
195 					 int len,
196 					 int lc,
197 					 int encoding,
198 					 const unsigned char *tab)
199 {
200 	unsigned char c1,
201 				c2;
202 
203 	while (len > 0)
204 	{
205 		c1 = *l;
206 		if (c1 == 0)
207 			report_invalid_encoding(encoding, (const char *) l, len);
208 		if (!IS_HIGHBIT_SET(c1))
209 			*p++ = c1;
210 		else
211 		{
212 			c2 = tab[c1 - HIGHBIT];
213 			if (c2)
214 			{
215 				*p++ = lc;
216 				*p++ = c2;
217 			}
218 			else
219 				report_untranslatable_char(encoding, PG_MULE_INTERNAL,
220 										   (const char *) l, len);
221 		}
222 		l++;
223 		len--;
224 	}
225 	*p = '\0';
226 }
227 
228 /*
229  * mic2latin_with_table: a generic single byte charset encoding
230  * conversion from the mule internal code to a local charset.
231  *
232  * mic points to the source string of length len
233  * p is the output area (must be large enough!)
234  * lc is the mule character set id for the local encoding
235  * encoding is the PG identifier for the local encoding
236  * tab holds conversion entries for the mule internal code's second byte,
237  * starting from 128 (0x80). each entry in the table holds the corresponding
238  * code point for the local charset, or 0 if there is no equivalent code.
239  */
240 void
mic2latin_with_table(const unsigned char * mic,unsigned char * p,int len,int lc,int encoding,const unsigned char * tab)241 mic2latin_with_table(const unsigned char *mic,
242 					 unsigned char *p,
243 					 int len,
244 					 int lc,
245 					 int encoding,
246 					 const unsigned char *tab)
247 {
248 	unsigned char c1,
249 				c2;
250 
251 	while (len > 0)
252 	{
253 		c1 = *mic;
254 		if (c1 == 0)
255 			report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
256 		if (!IS_HIGHBIT_SET(c1))
257 		{
258 			/* easy for ASCII */
259 			*p++ = c1;
260 			mic++;
261 			len--;
262 		}
263 		else
264 		{
265 			int			l = pg_mic_mblen(mic);
266 
267 			if (len < l)
268 				report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
269 										len);
270 			if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
271 				(c2 = tab[mic[1] - HIGHBIT]) == 0)
272 			{
273 				report_untranslatable_char(PG_MULE_INTERNAL, encoding,
274 										   (const char *) mic, len);
275 				break;			/* keep compiler quiet */
276 			}
277 			*p++ = c2;
278 			mic += 2;
279 			len -= 2;
280 		}
281 	}
282 	*p = '\0';
283 }
284 
285 /*
286  * comparison routine for bsearch()
287  * this routine is intended for combined UTF8 -> local code
288  */
289 static int
compare3(const void * p1,const void * p2)290 compare3(const void *p1, const void *p2)
291 {
292 	uint32		s1,
293 				s2,
294 				d1,
295 				d2;
296 
297 	s1 = *(const uint32 *) p1;
298 	s2 = *((const uint32 *) p1 + 1);
299 	d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
300 	d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
301 	return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
302 }
303 
304 /*
305  * comparison routine for bsearch()
306  * this routine is intended for local code -> combined UTF8
307  */
308 static int
compare4(const void * p1,const void * p2)309 compare4(const void *p1, const void *p2)
310 {
311 	uint32		v1,
312 				v2;
313 
314 	v1 = *(const uint32 *) p1;
315 	v2 = ((const pg_local_to_utf_combined *) p2)->code;
316 	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
317 }
318 
319 /*
320  * store 32bit character representation into multibyte stream
321  */
322 static inline unsigned char *
store_coded_char(unsigned char * dest,uint32 code)323 store_coded_char(unsigned char *dest, uint32 code)
324 {
325 	if (code & 0xff000000)
326 		*dest++ = code >> 24;
327 	if (code & 0x00ff0000)
328 		*dest++ = code >> 16;
329 	if (code & 0x0000ff00)
330 		*dest++ = code >> 8;
331 	if (code & 0x000000ff)
332 		*dest++ = code;
333 	return dest;
334 }
335 
336 /*
337  * Convert a character using a conversion radix tree.
338  *
339  * 'l' is the length of the input character in bytes, and b1-b4 are
340  * the input character's bytes.
341  */
342 static inline uint32
pg_mb_radix_conv(const pg_mb_radix_tree * rt,int l,unsigned char b1,unsigned char b2,unsigned char b3,unsigned char b4)343 pg_mb_radix_conv(const pg_mb_radix_tree *rt,
344 				 int l,
345 				 unsigned char b1,
346 				 unsigned char b2,
347 				 unsigned char b3,
348 				 unsigned char b4)
349 {
350 	if (l == 4)
351 	{
352 		/* 4-byte code */
353 
354 		/* check code validity */
355 		if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||
356 			b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||
357 			b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||
358 			b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)
359 			return 0;
360 
361 		/* perform lookup */
362 		if (rt->chars32)
363 		{
364 			uint32		idx = rt->b4root;
365 
366 			idx = rt->chars32[b1 + idx - rt->b4_1_lower];
367 			idx = rt->chars32[b2 + idx - rt->b4_2_lower];
368 			idx = rt->chars32[b3 + idx - rt->b4_3_lower];
369 			return rt->chars32[b4 + idx - rt->b4_4_lower];
370 		}
371 		else
372 		{
373 			uint16		idx = rt->b4root;
374 
375 			idx = rt->chars16[b1 + idx - rt->b4_1_lower];
376 			idx = rt->chars16[b2 + idx - rt->b4_2_lower];
377 			idx = rt->chars16[b3 + idx - rt->b4_3_lower];
378 			return rt->chars16[b4 + idx - rt->b4_4_lower];
379 		}
380 	}
381 	else if (l == 3)
382 	{
383 		/* 3-byte code */
384 
385 		/* check code validity */
386 		if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||
387 			b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||
388 			b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)
389 			return 0;
390 
391 		/* perform lookup */
392 		if (rt->chars32)
393 		{
394 			uint32		idx = rt->b3root;
395 
396 			idx = rt->chars32[b2 + idx - rt->b3_1_lower];
397 			idx = rt->chars32[b3 + idx - rt->b3_2_lower];
398 			return rt->chars32[b4 + idx - rt->b3_3_lower];
399 		}
400 		else
401 		{
402 			uint16		idx = rt->b3root;
403 
404 			idx = rt->chars16[b2 + idx - rt->b3_1_lower];
405 			idx = rt->chars16[b3 + idx - rt->b3_2_lower];
406 			return rt->chars16[b4 + idx - rt->b3_3_lower];
407 		}
408 	}
409 	else if (l == 2)
410 	{
411 		/* 2-byte code */
412 
413 		/* check code validity - first byte */
414 		if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||
415 			b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)
416 			return 0;
417 
418 		/* perform lookup */
419 		if (rt->chars32)
420 		{
421 			uint32		idx = rt->b2root;
422 
423 			idx = rt->chars32[b3 + idx - rt->b2_1_lower];
424 			return rt->chars32[b4 + idx - rt->b2_2_lower];
425 		}
426 		else
427 		{
428 			uint16		idx = rt->b2root;
429 
430 			idx = rt->chars16[b3 + idx - rt->b2_1_lower];
431 			return rt->chars16[b4 + idx - rt->b2_2_lower];
432 		}
433 	}
434 	else if (l == 1)
435 	{
436 		/* 1-byte code */
437 
438 		/* check code validity - first byte */
439 		if (b4 < rt->b1_lower || b4 > rt->b1_upper)
440 			return 0;
441 
442 		/* perform lookup */
443 		if (rt->chars32)
444 			return rt->chars32[b4 + rt->b1root - rt->b1_lower];
445 		else
446 			return rt->chars16[b4 + rt->b1root - rt->b1_lower];
447 	}
448 	return 0;					/* shouldn't happen */
449 }
450 
451 /*
452  * UTF8 ---> local code
453  *
454  * utf: input string in UTF8 encoding (need not be null-terminated)
455  * len: length of input string (in bytes)
456  * iso: pointer to the output area (must be large enough!)
457 		  (output string will be null-terminated)
458  * map: conversion map for single characters
459  * cmap: conversion map for combined characters
460  *		  (optional, pass NULL if none)
461  * cmapsize: number of entries in the conversion map for combined characters
462  *		  (optional, pass 0 if none)
463  * conv_func: algorithmic encoding conversion function
464  *		  (optional, pass NULL if none)
465  * encoding: PG identifier for the local encoding
466  *
467  * For each character, the cmap (if provided) is consulted first; if no match,
468  * the map is consulted next; if still no match, the conv_func (if provided)
469  * is applied.  An error is raised if no match is found.
470  *
471  * See pg_wchar.h for more details about the data structures used here.
472  */
473 void
UtfToLocal(const unsigned char * utf,int len,unsigned char * iso,const pg_mb_radix_tree * map,const pg_utf_to_local_combined * cmap,int cmapsize,utf_local_conversion_func conv_func,int encoding)474 UtfToLocal(const unsigned char *utf, int len,
475 		   unsigned char *iso,
476 		   const pg_mb_radix_tree *map,
477 		   const pg_utf_to_local_combined *cmap, int cmapsize,
478 		   utf_local_conversion_func conv_func,
479 		   int encoding)
480 {
481 	uint32		iutf;
482 	int			l;
483 	const pg_utf_to_local_combined *cp;
484 
485 	if (!PG_VALID_ENCODING(encoding))
486 		ereport(ERROR,
487 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
488 				 errmsg("invalid encoding number: %d", encoding)));
489 
490 	for (; len > 0; len -= l)
491 	{
492 		unsigned char b1 = 0;
493 		unsigned char b2 = 0;
494 		unsigned char b3 = 0;
495 		unsigned char b4 = 0;
496 
497 		/* "break" cases all represent errors */
498 		if (*utf == '\0')
499 			break;
500 
501 		l = pg_utf_mblen(utf);
502 		if (len < l)
503 			break;
504 
505 		if (!pg_utf8_islegal(utf, l))
506 			break;
507 
508 		if (l == 1)
509 		{
510 			/* ASCII case is easy, assume it's one-to-one conversion */
511 			*iso++ = *utf++;
512 			continue;
513 		}
514 
515 		/* collect coded char of length l */
516 		if (l == 2)
517 		{
518 			b3 = *utf++;
519 			b4 = *utf++;
520 		}
521 		else if (l == 3)
522 		{
523 			b2 = *utf++;
524 			b3 = *utf++;
525 			b4 = *utf++;
526 		}
527 		else if (l == 4)
528 		{
529 			b1 = *utf++;
530 			b2 = *utf++;
531 			b3 = *utf++;
532 			b4 = *utf++;
533 		}
534 		else
535 		{
536 			elog(ERROR, "unsupported character length %d", l);
537 			iutf = 0;			/* keep compiler quiet */
538 		}
539 		iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
540 
541 		/* First, try with combined map if possible */
542 		if (cmap && len > l)
543 		{
544 			const unsigned char *utf_save = utf;
545 			int			len_save = len;
546 			int			l_save = l;
547 
548 			/* collect next character, same as above */
549 			len -= l;
550 
551 			l = pg_utf_mblen(utf);
552 			if (len < l)
553 				break;
554 
555 			if (!pg_utf8_islegal(utf, l))
556 				break;
557 
558 			/* We assume ASCII character cannot be in combined map */
559 			if (l > 1)
560 			{
561 				uint32		iutf2;
562 				uint32		cutf[2];
563 
564 				if (l == 2)
565 				{
566 					iutf2 = *utf++ << 8;
567 					iutf2 |= *utf++;
568 				}
569 				else if (l == 3)
570 				{
571 					iutf2 = *utf++ << 16;
572 					iutf2 |= *utf++ << 8;
573 					iutf2 |= *utf++;
574 				}
575 				else if (l == 4)
576 				{
577 					iutf2 = *utf++ << 24;
578 					iutf2 |= *utf++ << 16;
579 					iutf2 |= *utf++ << 8;
580 					iutf2 |= *utf++;
581 				}
582 				else
583 				{
584 					elog(ERROR, "unsupported character length %d", l);
585 					iutf2 = 0;	/* keep compiler quiet */
586 				}
587 
588 				cutf[0] = iutf;
589 				cutf[1] = iutf2;
590 
591 				cp = bsearch(cutf, cmap, cmapsize,
592 							 sizeof(pg_utf_to_local_combined), compare3);
593 
594 				if (cp)
595 				{
596 					iso = store_coded_char(iso, cp->code);
597 					continue;
598 				}
599 			}
600 
601 			/* fail, so back up to reprocess second character next time */
602 			utf = utf_save;
603 			len = len_save;
604 			l = l_save;
605 		}
606 
607 		/* Now check ordinary map */
608 		if (map)
609 		{
610 			uint32		converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
611 
612 			if (converted)
613 			{
614 				iso = store_coded_char(iso, converted);
615 				continue;
616 			}
617 		}
618 
619 		/* if there's a conversion function, try that */
620 		if (conv_func)
621 		{
622 			uint32		converted = (*conv_func) (iutf);
623 
624 			if (converted)
625 			{
626 				iso = store_coded_char(iso, converted);
627 				continue;
628 			}
629 		}
630 
631 		/* failed to translate this character */
632 		report_untranslatable_char(PG_UTF8, encoding,
633 								   (const char *) (utf - l), len);
634 	}
635 
636 	/* if we broke out of loop early, must be invalid input */
637 	if (len > 0)
638 		report_invalid_encoding(PG_UTF8, (const char *) utf, len);
639 
640 	*iso = '\0';
641 }
642 
643 /*
644  * local code ---> UTF8
645  *
646  * iso: input string in local encoding (need not be null-terminated)
647  * len: length of input string (in bytes)
648  * utf: pointer to the output area (must be large enough!)
649 		  (output string will be null-terminated)
650  * map: conversion map for single characters
651  * cmap: conversion map for combined characters
652  *		  (optional, pass NULL if none)
653  * cmapsize: number of entries in the conversion map for combined characters
654  *		  (optional, pass 0 if none)
655  * conv_func: algorithmic encoding conversion function
656  *		  (optional, pass NULL if none)
657  * encoding: PG identifier for the local encoding
658  *
659  * For each character, the map is consulted first; if no match, the cmap
660  * (if provided) is consulted next; if still no match, the conv_func
661  * (if provided) is applied.  An error is raised if no match is found.
662  *
663  * See pg_wchar.h for more details about the data structures used here.
664  */
665 void
LocalToUtf(const unsigned char * iso,int len,unsigned char * utf,const pg_mb_radix_tree * map,const pg_local_to_utf_combined * cmap,int cmapsize,utf_local_conversion_func conv_func,int encoding)666 LocalToUtf(const unsigned char *iso, int len,
667 		   unsigned char *utf,
668 		   const pg_mb_radix_tree *map,
669 		   const pg_local_to_utf_combined *cmap, int cmapsize,
670 		   utf_local_conversion_func conv_func,
671 		   int encoding)
672 {
673 	uint32		iiso;
674 	int			l;
675 	const pg_local_to_utf_combined *cp;
676 
677 	if (!PG_VALID_ENCODING(encoding))
678 		ereport(ERROR,
679 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
680 				 errmsg("invalid encoding number: %d", encoding)));
681 
682 	for (; len > 0; len -= l)
683 	{
684 		unsigned char b1 = 0;
685 		unsigned char b2 = 0;
686 		unsigned char b3 = 0;
687 		unsigned char b4 = 0;
688 
689 		/* "break" cases all represent errors */
690 		if (*iso == '\0')
691 			break;
692 
693 		if (!IS_HIGHBIT_SET(*iso))
694 		{
695 			/* ASCII case is easy, assume it's one-to-one conversion */
696 			*utf++ = *iso++;
697 			l = 1;
698 			continue;
699 		}
700 
701 		l = pg_encoding_verifymb(encoding, (const char *) iso, len);
702 		if (l < 0)
703 			break;
704 
705 		/* collect coded char of length l */
706 		if (l == 1)
707 			b4 = *iso++;
708 		else if (l == 2)
709 		{
710 			b3 = *iso++;
711 			b4 = *iso++;
712 		}
713 		else if (l == 3)
714 		{
715 			b2 = *iso++;
716 			b3 = *iso++;
717 			b4 = *iso++;
718 		}
719 		else if (l == 4)
720 		{
721 			b1 = *iso++;
722 			b2 = *iso++;
723 			b3 = *iso++;
724 			b4 = *iso++;
725 		}
726 		else
727 		{
728 			elog(ERROR, "unsupported character length %d", l);
729 			iiso = 0;			/* keep compiler quiet */
730 		}
731 		iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
732 
733 		if (map)
734 		{
735 			uint32		converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
736 
737 			if (converted)
738 			{
739 				utf = store_coded_char(utf, converted);
740 				continue;
741 			}
742 
743 			/* If there's a combined character map, try that */
744 			if (cmap)
745 			{
746 				cp = bsearch(&iiso, cmap, cmapsize,
747 							 sizeof(pg_local_to_utf_combined), compare4);
748 
749 				if (cp)
750 				{
751 					utf = store_coded_char(utf, cp->utf1);
752 					utf = store_coded_char(utf, cp->utf2);
753 					continue;
754 				}
755 			}
756 		}
757 
758 		/* if there's a conversion function, try that */
759 		if (conv_func)
760 		{
761 			uint32		converted = (*conv_func) (iiso);
762 
763 			if (converted)
764 			{
765 				utf = store_coded_char(utf, converted);
766 				continue;
767 			}
768 		}
769 
770 		/* failed to translate this character */
771 		report_untranslatable_char(encoding, PG_UTF8,
772 								   (const char *) (iso - l), len);
773 	}
774 
775 	/* if we broke out of loop early, must be invalid input */
776 	if (len > 0)
777 		report_invalid_encoding(encoding, (const char *) iso, len);
778 
779 	*utf = '\0';
780 }
781