1 /*-----------------------------------------------------------------------
2  * ascii.c
3  *	 The PostgreSQL routine for string to ascii conversion.
4  *
5  *	 Portions Copyright (c) 1999-2016, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *	  src/backend/utils/adt/ascii.c
9  *
10  *-----------------------------------------------------------------------
11  */
12 #include "postgres.h"
13 
14 #include "mb/pg_wchar.h"
15 #include "utils/ascii.h"
16 
17 static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
18 			unsigned char *dest, int enc);
19 static text *encode_to_ascii(text *data, int enc);
20 
21 
22 /* ----------
23  * to_ascii
24  * ----------
25  */
26 static void
pg_to_ascii(unsigned char * src,unsigned char * src_end,unsigned char * dest,int enc)27 pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
28 {
29 	unsigned char *x;
30 	const unsigned char *ascii;
31 	int			range;
32 
33 	/*
34 	 * relevant start for an encoding
35 	 */
36 #define RANGE_128	128
37 #define RANGE_160	160
38 
39 	if (enc == PG_LATIN1)
40 	{
41 		/*
42 		 * ISO-8859-1 <range: 160 -- 255>
43 		 */
44 		ascii = (const unsigned char *) "  cL Y  \"Ca  -R     'u .,      ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
45 		range = RANGE_160;
46 	}
47 	else if (enc == PG_LATIN2)
48 	{
49 		/*
50 		 * ISO-8859-2 <range: 160 -- 255>
51 		 */
52 		ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
53 		range = RANGE_160;
54 	}
55 	else if (enc == PG_LATIN9)
56 	{
57 		/*
58 		 * ISO-8859-15 <range: 160 -- 255>
59 		 */
60 		ascii = (const unsigned char *) "  cL YS sCa  -R     Zu .z   EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
61 		range = RANGE_160;
62 	}
63 	else if (enc == PG_WIN1250)
64 	{
65 		/*
66 		 * Window CP1250 <range: 128 -- 255>
67 		 */
68 		ascii = (const unsigned char *) "  ' \"    %S<STZZ `'\"\".--  s>stzz   L A  \"CS  -RZ  ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
69 		range = RANGE_128;
70 	}
71 	else
72 	{
73 		ereport(ERROR,
74 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
75 				 errmsg("encoding conversion from %s to ASCII not supported",
76 						pg_encoding_to_char(enc))));
77 		return;					/* keep compiler quiet */
78 	}
79 
80 	/*
81 	 * Encode
82 	 */
83 	for (x = src; x < src_end; x++)
84 	{
85 		if (*x < 128)
86 			*dest++ = *x;
87 		else if (*x < range)
88 			*dest++ = ' ';		/* bogus 128 to 'range' */
89 		else
90 			*dest++ = ascii[*x - range];
91 	}
92 }
93 
94 /* ----------
95  * encode text
96  *
97  * The text datum is overwritten in-place, therefore this coding method
98  * cannot support conversions that change the string length!
99  * ----------
100  */
101 static text *
encode_to_ascii(text * data,int enc)102 encode_to_ascii(text *data, int enc)
103 {
104 	pg_to_ascii((unsigned char *) VARDATA(data),		/* src */
105 				(unsigned char *) (data) + VARSIZE(data),		/* src end */
106 				(unsigned char *) VARDATA(data),		/* dest */
107 				enc);			/* encoding */
108 
109 	return data;
110 }
111 
112 /* ----------
113  * convert to ASCII - enc is set as 'name' arg.
114  * ----------
115  */
116 Datum
to_ascii_encname(PG_FUNCTION_ARGS)117 to_ascii_encname(PG_FUNCTION_ARGS)
118 {
119 	text	   *data = PG_GETARG_TEXT_P_COPY(0);
120 	char	   *encname = NameStr(*PG_GETARG_NAME(1));
121 	int			enc = pg_char_to_encoding(encname);
122 
123 	if (enc < 0)
124 		ereport(ERROR,
125 				(errcode(ERRCODE_UNDEFINED_OBJECT),
126 				 errmsg("%s is not a valid encoding name", encname)));
127 
128 	PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
129 }
130 
131 /* ----------
132  * convert to ASCII - enc is set as int4
133  * ----------
134  */
135 Datum
to_ascii_enc(PG_FUNCTION_ARGS)136 to_ascii_enc(PG_FUNCTION_ARGS)
137 {
138 	text	   *data = PG_GETARG_TEXT_P_COPY(0);
139 	int			enc = PG_GETARG_INT32(1);
140 
141 	if (!PG_VALID_ENCODING(enc))
142 		ereport(ERROR,
143 				(errcode(ERRCODE_UNDEFINED_OBJECT),
144 				 errmsg("%d is not a valid encoding code", enc)));
145 
146 	PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
147 }
148 
149 /* ----------
150  * convert to ASCII - current enc is DatabaseEncoding
151  * ----------
152  */
153 Datum
to_ascii_default(PG_FUNCTION_ARGS)154 to_ascii_default(PG_FUNCTION_ARGS)
155 {
156 	text	   *data = PG_GETARG_TEXT_P_COPY(0);
157 	int			enc = GetDatabaseEncoding();
158 
159 	PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
160 }
161 
162 /* ----------
163  * Copy a string in an arbitrary backend-safe encoding, converting it to a
164  * valid ASCII string by replacing non-ASCII bytes with '?'.  Otherwise the
165  * behavior is identical to strlcpy(), except that we don't bother with a
166  * return value.
167  *
168  * This must not trigger ereport(ERROR), as it is called in postmaster.
169  * ----------
170  */
171 void
ascii_safe_strlcpy(char * dest,const char * src,size_t destsiz)172 ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
173 {
174 	if (destsiz == 0)			/* corner case: no room for trailing nul */
175 		return;
176 
177 	while (--destsiz > 0)
178 	{
179 		/* use unsigned char here to avoid compiler warning */
180 		unsigned char ch = *src++;
181 
182 		if (ch == '\0')
183 			break;
184 		/* Keep printable ASCII characters */
185 		if (32 <= ch && ch <= 127)
186 			*dest = ch;
187 		/* White-space is also OK */
188 		else if (ch == '\n' || ch == '\r' || ch == '\t')
189 			*dest = ch;
190 		/* Everything else is replaced with '?' */
191 		else
192 			*dest = '?';
193 		dest++;
194 	}
195 
196 	*dest = '\0';
197 }
198