xref: /linux/fs/smb/client/cifs_unicode.h (revision 84b9b44b)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * cifs_unicode:  Unicode kernel case support
4  *
5  * Function:
6  *     Convert a unicode character to upper or lower case using
7  *     compressed tables.
8  *
9  *   Copyright (c) International Business Machines  Corp., 2000,2009
10  *
11  * Notes:
12  *     These APIs are based on the C library functions.  The semantics
13  *     should match the C functions but with expanded size operands.
14  *
15  *     The upper/lower functions are based on a table created by mkupr.
16  *     This is a compressed table of upper and lower case conversion.
17  */
18 #ifndef _CIFS_UNICODE_H
19 #define _CIFS_UNICODE_H
20 
21 #include <asm/byteorder.h>
22 #include <linux/types.h>
23 #include <linux/nls.h>
24 
25 #define  UNIUPR_NOLOWER		/* Example to not expand lower case tables */
26 
27 /*
28  * Windows maps these to the user defined 16 bit Unicode range since they are
29  * reserved symbols (along with \ and /), otherwise illegal to store
30  * in filenames in NTFS
31  */
32 #define UNI_ASTERISK    (__u16) ('*' + 0xF000)
33 #define UNI_QUESTION    (__u16) ('?' + 0xF000)
34 #define UNI_COLON       (__u16) (':' + 0xF000)
35 #define UNI_GRTRTHAN    (__u16) ('>' + 0xF000)
36 #define UNI_LESSTHAN    (__u16) ('<' + 0xF000)
37 #define UNI_PIPE        (__u16) ('|' + 0xF000)
38 #define UNI_SLASH       (__u16) ('\\' + 0xF000)
39 
40 /*
41  * Macs use an older "SFM" mapping of the symbols above. Fortunately it does
42  * not conflict (although almost does) with the mapping above.
43  */
44 
45 #define SFM_DOUBLEQUOTE ((__u16) 0xF020)
46 #define SFM_ASTERISK    ((__u16) 0xF021)
47 #define SFM_QUESTION    ((__u16) 0xF025)
48 #define SFM_COLON       ((__u16) 0xF022)
49 #define SFM_GRTRTHAN    ((__u16) 0xF024)
50 #define SFM_LESSTHAN    ((__u16) 0xF023)
51 #define SFM_PIPE        ((__u16) 0xF027)
52 #define SFM_SLASH       ((__u16) 0xF026)
53 #define SFM_SPACE	((__u16) 0xF028)
54 #define SFM_PERIOD	((__u16) 0xF029)
55 
56 /*
57  * Mapping mechanism to use when one of the seven reserved characters is
58  * encountered.  We can only map using one of the mechanisms at a time
59  * since otherwise readdir could return directory entries which we would
60  * not be able to open
61  *
62  * NO_MAP_UNI_RSVD  = do not perform any remapping of the character
63  * SFM_MAP_UNI_RSVD = map reserved characters using SFM scheme (MAC compatible)
64  * SFU_MAP_UNI_RSVD = map reserved characters ala SFU ("mapchars" option)
65  *
66  */
67 #define NO_MAP_UNI_RSVD		0
68 #define SFM_MAP_UNI_RSVD	1
69 #define SFU_MAP_UNI_RSVD	2
70 
71 /* Just define what we want from uniupr.h.  We don't want to define the tables
72  * in each source file.
73  */
74 #ifndef	UNICASERANGE_DEFINED
75 struct UniCaseRange {
76 	wchar_t start;
77 	wchar_t end;
78 	signed char *table;
79 };
80 #endif				/* UNICASERANGE_DEFINED */
81 
82 #ifndef UNIUPR_NOUPPER
83 extern signed char CifsUniUpperTable[512];
84 extern const struct UniCaseRange CifsUniUpperRange[];
85 #endif				/* UNIUPR_NOUPPER */
86 
87 #ifndef UNIUPR_NOLOWER
88 extern signed char CifsUniLowerTable[512];
89 extern const struct UniCaseRange CifsUniLowerRange[];
90 #endif				/* UNIUPR_NOLOWER */
91 
92 #ifdef __KERNEL__
93 int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
94 		    const struct nls_table *cp, int map_type);
95 int cifs_utf16_bytes(const __le16 *from, int maxbytes,
96 		     const struct nls_table *codepage);
97 int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *);
98 char *cifs_strndup_from_utf16(const char *src, const int maxlen,
99 			      const bool is_unicode,
100 			      const struct nls_table *codepage);
101 extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen,
102 			      const struct nls_table *cp, int mapChars);
103 extern int cifs_remap(struct cifs_sb_info *cifs_sb);
104 extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
105 				     int *utf16_len, const struct nls_table *cp,
106 				     int remap);
107 #endif
108 
109 wchar_t cifs_toupper(wchar_t in);
110 
111 /*
112  * UniStrcat:  Concatenate the second string to the first
113  *
114  * Returns:
115  *     Address of the first string
116  */
117 static inline __le16 *
118 UniStrcat(__le16 *ucs1, const __le16 *ucs2)
119 {
120 	__le16 *anchor = ucs1;	/* save a pointer to start of ucs1 */
121 
122 	while (*ucs1++) ;	/* To end of first string */
123 	ucs1--;			/* Return to the null */
124 	while ((*ucs1++ = *ucs2++)) ;	/* copy string 2 over */
125 	return anchor;
126 }
127 
128 /*
129  * UniStrchr:  Find a character in a string
130  *
131  * Returns:
132  *     Address of first occurrence of character in string
133  *     or NULL if the character is not in the string
134  */
135 static inline wchar_t *
136 UniStrchr(const wchar_t *ucs, wchar_t uc)
137 {
138 	while ((*ucs != uc) && *ucs)
139 		ucs++;
140 
141 	if (*ucs == uc)
142 		return (wchar_t *) ucs;
143 	return NULL;
144 }
145 
146 /*
147  * UniStrcmp:  Compare two strings
148  *
149  * Returns:
150  *     < 0:  First string is less than second
151  *     = 0:  Strings are equal
152  *     > 0:  First string is greater than second
153  */
154 static inline int
155 UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
156 {
157 	while ((*ucs1 == *ucs2) && *ucs1) {
158 		ucs1++;
159 		ucs2++;
160 	}
161 	return (int) *ucs1 - (int) *ucs2;
162 }
163 
164 /*
165  * UniStrcpy:  Copy a string
166  */
167 static inline wchar_t *
168 UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
169 {
170 	wchar_t *anchor = ucs1;	/* save the start of result string */
171 
172 	while ((*ucs1++ = *ucs2++)) ;
173 	return anchor;
174 }
175 
176 /*
177  * UniStrlen:  Return the length of a string (in 16 bit Unicode chars not bytes)
178  */
179 static inline size_t
180 UniStrlen(const wchar_t *ucs1)
181 {
182 	int i = 0;
183 
184 	while (*ucs1++)
185 		i++;
186 	return i;
187 }
188 
189 /*
190  * UniStrnlen:  Return the length (in 16 bit Unicode chars not bytes) of a
191  *		string (length limited)
192  */
193 static inline size_t
194 UniStrnlen(const wchar_t *ucs1, int maxlen)
195 {
196 	int i = 0;
197 
198 	while (*ucs1++) {
199 		i++;
200 		if (i >= maxlen)
201 			break;
202 	}
203 	return i;
204 }
205 
206 /*
207  * UniStrncat:  Concatenate length limited string
208  */
209 static inline wchar_t *
210 UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
211 {
212 	wchar_t *anchor = ucs1;	/* save pointer to string 1 */
213 
214 	while (*ucs1++) ;
215 	ucs1--;			/* point to null terminator of s1 */
216 	while (n-- && (*ucs1 = *ucs2)) {	/* copy s2 after s1 */
217 		ucs1++;
218 		ucs2++;
219 	}
220 	*ucs1 = 0;		/* Null terminate the result */
221 	return (anchor);
222 }
223 
224 /*
225  * UniStrncmp:  Compare length limited string
226  */
227 static inline int
228 UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
229 {
230 	if (!n)
231 		return 0;	/* Null strings are equal */
232 	while ((*ucs1 == *ucs2) && *ucs1 && --n) {
233 		ucs1++;
234 		ucs2++;
235 	}
236 	return (int) *ucs1 - (int) *ucs2;
237 }
238 
239 /*
240  * UniStrncmp_le:  Compare length limited string - native to little-endian
241  */
242 static inline int
243 UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
244 {
245 	if (!n)
246 		return 0;	/* Null strings are equal */
247 	while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
248 		ucs1++;
249 		ucs2++;
250 	}
251 	return (int) *ucs1 - (int) __le16_to_cpu(*ucs2);
252 }
253 
254 /*
255  * UniStrncpy:  Copy length limited string with pad
256  */
257 static inline wchar_t *
258 UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
259 {
260 	wchar_t *anchor = ucs1;
261 
262 	while (n-- && *ucs2)	/* Copy the strings */
263 		*ucs1++ = *ucs2++;
264 
265 	n++;
266 	while (n--)		/* Pad with nulls */
267 		*ucs1++ = 0;
268 	return anchor;
269 }
270 
271 /*
272  * UniStrncpy_le:  Copy length limited string with pad to little-endian
273  */
274 static inline wchar_t *
275 UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
276 {
277 	wchar_t *anchor = ucs1;
278 
279 	while (n-- && *ucs2)	/* Copy the strings */
280 		*ucs1++ = __le16_to_cpu(*ucs2++);
281 
282 	n++;
283 	while (n--)		/* Pad with nulls */
284 		*ucs1++ = 0;
285 	return anchor;
286 }
287 
288 /*
289  * UniStrstr:  Find a string in a string
290  *
291  * Returns:
292  *     Address of first match found
293  *     NULL if no matching string is found
294  */
295 static inline wchar_t *
296 UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
297 {
298 	const wchar_t *anchor1 = ucs1;
299 	const wchar_t *anchor2 = ucs2;
300 
301 	while (*ucs1) {
302 		if (*ucs1 == *ucs2) {
303 			/* Partial match found */
304 			ucs1++;
305 			ucs2++;
306 		} else {
307 			if (!*ucs2)	/* Match found */
308 				return (wchar_t *) anchor1;
309 			ucs1 = ++anchor1;	/* No match */
310 			ucs2 = anchor2;
311 		}
312 	}
313 
314 	if (!*ucs2)		/* Both end together */
315 		return (wchar_t *) anchor1;	/* Match found */
316 	return NULL;		/* No match */
317 }
318 
319 #ifndef UNIUPR_NOUPPER
320 /*
321  * UniToupper:  Convert a unicode character to upper case
322  */
323 static inline wchar_t
324 UniToupper(register wchar_t uc)
325 {
326 	register const struct UniCaseRange *rp;
327 
328 	if (uc < sizeof(CifsUniUpperTable)) {
329 		/* Latin characters */
330 		return uc + CifsUniUpperTable[uc];	/* Use base tables */
331 	} else {
332 		rp = CifsUniUpperRange;	/* Use range tables */
333 		while (rp->start) {
334 			if (uc < rp->start)	/* Before start of range */
335 				return uc;	/* Uppercase = input */
336 			if (uc <= rp->end)	/* In range */
337 				return uc + rp->table[uc - rp->start];
338 			rp++;	/* Try next range */
339 		}
340 	}
341 	return uc;		/* Past last range */
342 }
343 
344 /*
345  * UniStrupr:  Upper case a unicode string
346  */
347 static inline __le16 *
348 UniStrupr(register __le16 *upin)
349 {
350 	register __le16 *up;
351 
352 	up = upin;
353 	while (*up) {		/* For all characters */
354 		*up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
355 		up++;
356 	}
357 	return upin;		/* Return input pointer */
358 }
359 #endif				/* UNIUPR_NOUPPER */
360 
361 #ifndef UNIUPR_NOLOWER
362 /*
363  * UniTolower:  Convert a unicode character to lower case
364  */
365 static inline wchar_t
366 UniTolower(register wchar_t uc)
367 {
368 	register const struct UniCaseRange *rp;
369 
370 	if (uc < sizeof(CifsUniLowerTable)) {
371 		/* Latin characters */
372 		return uc + CifsUniLowerTable[uc];	/* Use base tables */
373 	} else {
374 		rp = CifsUniLowerRange;	/* Use range tables */
375 		while (rp->start) {
376 			if (uc < rp->start)	/* Before start of range */
377 				return uc;	/* Uppercase = input */
378 			if (uc <= rp->end)	/* In range */
379 				return uc + rp->table[uc - rp->start];
380 			rp++;	/* Try next range */
381 		}
382 	}
383 	return uc;		/* Past last range */
384 }
385 
386 /*
387  * UniStrlwr:  Lower case a unicode string
388  */
389 static inline wchar_t *
390 UniStrlwr(register wchar_t *upin)
391 {
392 	register wchar_t *up;
393 
394 	up = upin;
395 	while (*up) {		/* For all characters */
396 		*up = UniTolower(*up);
397 		up++;
398 	}
399 	return upin;		/* Return input pointer */
400 }
401 
402 #endif
403 
404 #endif /* _CIFS_UNICODE_H */
405