1 /*-----------------------------------------------------------------------
2  * ascii.c
3  *	 The PostgreSQL routine for string to ascii conversion.
4  *
5  *	 Portions Copyright (c) 1999-2018, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *	  src/backend/utils/adt/ascii.c
9  *
10  *-----------------------------------------------------------------------
11  */
12 #include "postgres.h"
13 
14 #include "mb/pg_wchar.h"
15 #include "utils/ascii.h"
16 #include "utils/builtins.h"
17 
18 static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
19 			unsigned char *dest, int enc);
20 static text *encode_to_ascii(text *data, int enc);
21 
22 
23 /* ----------
24  * to_ascii
25  * ----------
26  */
27 static void
pg_to_ascii(unsigned char * src,unsigned char * src_end,unsigned char * dest,int enc)28 pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
29 {
30 	unsigned char *x;
31 	const unsigned char *ascii;
32 	int			range;
33 
34 	/*
35 	 * relevant start for an encoding
36 	 */
37 #define RANGE_128	128
38 #define RANGE_160	160
39 
40 	if (enc == PG_LATIN1)
41 	{
42 		/*
43 		 * ISO-8859-1 <range: 160 -- 255>
44 		 */
45 		ascii = (const unsigned char *) "  cL Y  \"Ca  -R     'u .,      ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
46 		range = RANGE_160;
47 	}
48 	else if (enc == PG_LATIN2)
49 	{
50 		/*
51 		 * ISO-8859-2 <range: 160 -- 255>
52 		 */
53 		ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
54 		range = RANGE_160;
55 	}
56 	else if (enc == PG_LATIN9)
57 	{
58 		/*
59 		 * ISO-8859-15 <range: 160 -- 255>
60 		 */
61 		ascii = (const unsigned char *) "  cL YS sCa  -R     Zu .z   EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
62 		range = RANGE_160;
63 	}
64 	else if (enc == PG_WIN1250)
65 	{
66 		/*
67 		 * Window CP1250 <range: 128 -- 255>
68 		 */
69 		ascii = (const unsigned char *) "  ' \"    %S<STZZ `'\"\".--  s>stzz   L A  \"CS  -RZ  ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
70 		range = RANGE_128;
71 	}
72 	else
73 	{
74 		ereport(ERROR,
75 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
76 				 errmsg("encoding conversion from %s to ASCII not supported",
77 						pg_encoding_to_char(enc))));
78 		return;					/* keep compiler quiet */
79 	}
80 
81 	/*
82 	 * Encode
83 	 */
84 	for (x = src; x < src_end; x++)
85 	{
86 		if (*x < 128)
87 			*dest++ = *x;
88 		else if (*x < range)
89 			*dest++ = ' ';		/* bogus 128 to 'range' */
90 		else
91 			*dest++ = ascii[*x - range];
92 	}
93 }
94 
95 /* ----------
96  * encode text
97  *
98  * The text datum is overwritten in-place, therefore this coding method
99  * cannot support conversions that change the string length!
100  * ----------
101  */
102 static text *
encode_to_ascii(text * data,int enc)103 encode_to_ascii(text *data, int enc)
104 {
105 	pg_to_ascii((unsigned char *) VARDATA(data),	/* src */
106 				(unsigned char *) (data) + VARSIZE(data),	/* src end */
107 				(unsigned char *) VARDATA(data),	/* dest */
108 				enc);			/* encoding */
109 
110 	return data;
111 }
112 
113 /* ----------
114  * convert to ASCII - enc is set as 'name' arg.
115  * ----------
116  */
117 Datum
to_ascii_encname(PG_FUNCTION_ARGS)118 to_ascii_encname(PG_FUNCTION_ARGS)
119 {
120 	text	   *data = PG_GETARG_TEXT_P_COPY(0);
121 	char	   *encname = NameStr(*PG_GETARG_NAME(1));
122 	int			enc = pg_char_to_encoding(encname);
123 
124 	if (enc < 0)
125 		ereport(ERROR,
126 				(errcode(ERRCODE_UNDEFINED_OBJECT),
127 				 errmsg("%s is not a valid encoding name", encname)));
128 
129 	PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
130 }
131 
132 /* ----------
133  * convert to ASCII - enc is set as int4
134  * ----------
135  */
136 Datum
to_ascii_enc(PG_FUNCTION_ARGS)137 to_ascii_enc(PG_FUNCTION_ARGS)
138 {
139 	text	   *data = PG_GETARG_TEXT_P_COPY(0);
140 	int			enc = PG_GETARG_INT32(1);
141 
142 	if (!PG_VALID_ENCODING(enc))
143 		ereport(ERROR,
144 				(errcode(ERRCODE_UNDEFINED_OBJECT),
145 				 errmsg("%d is not a valid encoding code", enc)));
146 
147 	PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
148 }
149 
150 /* ----------
151  * convert to ASCII - current enc is DatabaseEncoding
152  * ----------
153  */
154 Datum
to_ascii_default(PG_FUNCTION_ARGS)155 to_ascii_default(PG_FUNCTION_ARGS)
156 {
157 	text	   *data = PG_GETARG_TEXT_P_COPY(0);
158 	int			enc = GetDatabaseEncoding();
159 
160 	PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
161 }
162 
163 /* ----------
164  * Copy a string in an arbitrary backend-safe encoding, converting it to a
165  * valid ASCII string by replacing non-ASCII bytes with '?'.  Otherwise the
166  * behavior is identical to strlcpy(), except that we don't bother with a
167  * return value.
168  *
169  * This must not trigger ereport(ERROR), as it is called in postmaster.
170  * ----------
171  */
172 void
ascii_safe_strlcpy(char * dest,const char * src,size_t destsiz)173 ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
174 {
175 	if (destsiz == 0)			/* corner case: no room for trailing nul */
176 		return;
177 
178 	while (--destsiz > 0)
179 	{
180 		/* use unsigned char here to avoid compiler warning */
181 		unsigned char ch = *src++;
182 
183 		if (ch == '\0')
184 			break;
185 		/* Keep printable ASCII characters */
186 		if (32 <= ch && ch <= 127)
187 			*dest = ch;
188 		/* White-space is also OK */
189 		else if (ch == '\n' || ch == '\r' || ch == '\t')
190 			*dest = ch;
191 		/* Everything else is replaced with '?' */
192 		else
193 			*dest = '?';
194 		dest++;
195 	}
196 
197 	*dest = '\0';
198 }
199