1 /*-----------------------------------------------------------------------
2 * ascii.c
3 * The PostgreSQL routine for string to ascii conversion.
4 *
5 * Portions Copyright (c) 1999-2016, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * src/backend/utils/adt/ascii.c
9 *
10 *-----------------------------------------------------------------------
11 */
12 #include "postgres.h"
13
14 #include "mb/pg_wchar.h"
15 #include "utils/ascii.h"
16
17 static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
18 unsigned char *dest, int enc);
19 static text *encode_to_ascii(text *data, int enc);
20
21
22 /* ----------
23 * to_ascii
24 * ----------
25 */
26 static void
pg_to_ascii(unsigned char * src,unsigned char * src_end,unsigned char * dest,int enc)27 pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
28 {
29 unsigned char *x;
30 const unsigned char *ascii;
31 int range;
32
33 /*
34 * relevant start for an encoding
35 */
36 #define RANGE_128 128
37 #define RANGE_160 160
38
39 if (enc == PG_LATIN1)
40 {
41 /*
42 * ISO-8859-1 <range: 160 -- 255>
43 */
44 ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
45 range = RANGE_160;
46 }
47 else if (enc == PG_LATIN2)
48 {
49 /*
50 * ISO-8859-2 <range: 160 -- 255>
51 */
52 ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
53 range = RANGE_160;
54 }
55 else if (enc == PG_LATIN9)
56 {
57 /*
58 * ISO-8859-15 <range: 160 -- 255>
59 */
60 ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
61 range = RANGE_160;
62 }
63 else if (enc == PG_WIN1250)
64 {
65 /*
66 * Window CP1250 <range: 128 -- 255>
67 */
68 ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
69 range = RANGE_128;
70 }
71 else
72 {
73 ereport(ERROR,
74 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
75 errmsg("encoding conversion from %s to ASCII not supported",
76 pg_encoding_to_char(enc))));
77 return; /* keep compiler quiet */
78 }
79
80 /*
81 * Encode
82 */
83 for (x = src; x < src_end; x++)
84 {
85 if (*x < 128)
86 *dest++ = *x;
87 else if (*x < range)
88 *dest++ = ' '; /* bogus 128 to 'range' */
89 else
90 *dest++ = ascii[*x - range];
91 }
92 }
93
94 /* ----------
95 * encode text
96 *
97 * The text datum is overwritten in-place, therefore this coding method
98 * cannot support conversions that change the string length!
99 * ----------
100 */
101 static text *
encode_to_ascii(text * data,int enc)102 encode_to_ascii(text *data, int enc)
103 {
104 pg_to_ascii((unsigned char *) VARDATA(data), /* src */
105 (unsigned char *) (data) + VARSIZE(data), /* src end */
106 (unsigned char *) VARDATA(data), /* dest */
107 enc); /* encoding */
108
109 return data;
110 }
111
112 /* ----------
113 * convert to ASCII - enc is set as 'name' arg.
114 * ----------
115 */
116 Datum
to_ascii_encname(PG_FUNCTION_ARGS)117 to_ascii_encname(PG_FUNCTION_ARGS)
118 {
119 text *data = PG_GETARG_TEXT_P_COPY(0);
120 char *encname = NameStr(*PG_GETARG_NAME(1));
121 int enc = pg_char_to_encoding(encname);
122
123 if (enc < 0)
124 ereport(ERROR,
125 (errcode(ERRCODE_UNDEFINED_OBJECT),
126 errmsg("%s is not a valid encoding name", encname)));
127
128 PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
129 }
130
131 /* ----------
132 * convert to ASCII - enc is set as int4
133 * ----------
134 */
135 Datum
to_ascii_enc(PG_FUNCTION_ARGS)136 to_ascii_enc(PG_FUNCTION_ARGS)
137 {
138 text *data = PG_GETARG_TEXT_P_COPY(0);
139 int enc = PG_GETARG_INT32(1);
140
141 if (!PG_VALID_ENCODING(enc))
142 ereport(ERROR,
143 (errcode(ERRCODE_UNDEFINED_OBJECT),
144 errmsg("%d is not a valid encoding code", enc)));
145
146 PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
147 }
148
149 /* ----------
150 * convert to ASCII - current enc is DatabaseEncoding
151 * ----------
152 */
153 Datum
to_ascii_default(PG_FUNCTION_ARGS)154 to_ascii_default(PG_FUNCTION_ARGS)
155 {
156 text *data = PG_GETARG_TEXT_P_COPY(0);
157 int enc = GetDatabaseEncoding();
158
159 PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
160 }
161
162 /* ----------
163 * Copy a string in an arbitrary backend-safe encoding, converting it to a
164 * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the
165 * behavior is identical to strlcpy(), except that we don't bother with a
166 * return value.
167 *
168 * This must not trigger ereport(ERROR), as it is called in postmaster.
169 * ----------
170 */
171 void
ascii_safe_strlcpy(char * dest,const char * src,size_t destsiz)172 ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
173 {
174 if (destsiz == 0) /* corner case: no room for trailing nul */
175 return;
176
177 while (--destsiz > 0)
178 {
179 /* use unsigned char here to avoid compiler warning */
180 unsigned char ch = *src++;
181
182 if (ch == '\0')
183 break;
184 /* Keep printable ASCII characters */
185 if (32 <= ch && ch <= 127)
186 *dest = ch;
187 /* White-space is also OK */
188 else if (ch == '\n' || ch == '\r' || ch == '\t')
189 *dest = ch;
190 /* Everything else is replaced with '?' */
191 else
192 *dest = '?';
193 dest++;
194 }
195
196 *dest = '\0';
197 }
198