xref: /freebsd/contrib/sendmail/libsm/lowercase.c (revision d39bd2c1)
12fb4f839SGregory Neil Shapiro /*
22fb4f839SGregory Neil Shapiro  * Copyright (c) 2020 Proofpoint, Inc. and its suppliers.
32fb4f839SGregory Neil Shapiro  *	All rights reserved.
42fb4f839SGregory Neil Shapiro  *
52fb4f839SGregory Neil Shapiro  * By using this file, you agree to the terms and conditions set
62fb4f839SGregory Neil Shapiro  * forth in the LICENSE file which can be found at the top level of
72fb4f839SGregory Neil Shapiro  * the sendmail distribution.
82fb4f839SGregory Neil Shapiro  *
92fb4f839SGregory Neil Shapiro  */
102fb4f839SGregory Neil Shapiro 
112fb4f839SGregory Neil Shapiro #include <sm/gen.h>
122fb4f839SGregory Neil Shapiro #include <sm/sendmail.h>
132fb4f839SGregory Neil Shapiro 
142fb4f839SGregory Neil Shapiro #include <ctype.h>
152fb4f839SGregory Neil Shapiro #include <sm/string.h>
162fb4f839SGregory Neil Shapiro #include <sm/heap.h>
172fb4f839SGregory Neil Shapiro #if USE_EAI
18*d39bd2c1SGregory Neil Shapiro # include <sm/limits.h>
192fb4f839SGregory Neil Shapiro # include <unicode/ucasemap.h>
202fb4f839SGregory Neil Shapiro # include <unicode/ustring.h>
212fb4f839SGregory Neil Shapiro # include <unicode/uchar.h>
22*d39bd2c1SGregory Neil Shapiro # include <sm/ixlen.h>
232fb4f839SGregory Neil Shapiro 
242fb4f839SGregory Neil Shapiro /*
252fb4f839SGregory Neil Shapiro **  ASCIISTR -- check whether a string is printable ASCII
262fb4f839SGregory Neil Shapiro **
272fb4f839SGregory Neil Shapiro **	Parameters:
282fb4f839SGregory Neil Shapiro **		str -- string
292fb4f839SGregory Neil Shapiro **
302fb4f839SGregory Neil Shapiro **	Returns:
312fb4f839SGregory Neil Shapiro **		TRUE iff printable ASCII
322fb4f839SGregory Neil Shapiro */
332fb4f839SGregory Neil Shapiro 
342fb4f839SGregory Neil Shapiro bool
asciistr(str)352fb4f839SGregory Neil Shapiro asciistr(str)
362fb4f839SGregory Neil Shapiro 	const char *str;
372fb4f839SGregory Neil Shapiro {
382fb4f839SGregory Neil Shapiro 	unsigned char ch;
392fb4f839SGregory Neil Shapiro 
402fb4f839SGregory Neil Shapiro 	if  (str == NULL)
412fb4f839SGregory Neil Shapiro 		return true;
422fb4f839SGregory Neil Shapiro 	while ((ch = (unsigned char)*str) != '\0' && ch >= 32 && ch < 127)
432fb4f839SGregory Neil Shapiro 		str++;
442fb4f839SGregory Neil Shapiro 	return ch == '\0';
452fb4f839SGregory Neil Shapiro }
46*d39bd2c1SGregory Neil Shapiro 
47*d39bd2c1SGregory Neil Shapiro /*
48*d39bd2c1SGregory Neil Shapiro **  ASCIINSTR -- check whether a string is printable ASCII up to len
49*d39bd2c1SGregory Neil Shapiro **
50*d39bd2c1SGregory Neil Shapiro **	Parameters:
51*d39bd2c1SGregory Neil Shapiro **		str -- string
52*d39bd2c1SGregory Neil Shapiro **		len -- length to check
53*d39bd2c1SGregory Neil Shapiro **
54*d39bd2c1SGregory Neil Shapiro **	Returns:
55*d39bd2c1SGregory Neil Shapiro **		TRUE iff printable ASCII
56*d39bd2c1SGregory Neil Shapiro */
57*d39bd2c1SGregory Neil Shapiro 
58*d39bd2c1SGregory Neil Shapiro bool
asciinstr(str,len)59*d39bd2c1SGregory Neil Shapiro asciinstr(str, len)
60*d39bd2c1SGregory Neil Shapiro 	const char *str;
61*d39bd2c1SGregory Neil Shapiro 	size_t len;
62*d39bd2c1SGregory Neil Shapiro {
63*d39bd2c1SGregory Neil Shapiro 	unsigned char ch;
64*d39bd2c1SGregory Neil Shapiro 	int n;
65*d39bd2c1SGregory Neil Shapiro 
66*d39bd2c1SGregory Neil Shapiro 	if (str == NULL)
67*d39bd2c1SGregory Neil Shapiro 		return true;
68*d39bd2c1SGregory Neil Shapiro 	SM_REQUIRE(len < INT_MAX);
69*d39bd2c1SGregory Neil Shapiro 	n = 0;
70*d39bd2c1SGregory Neil Shapiro 	while (n < len && (ch = (unsigned char)*str) != '\0'
71*d39bd2c1SGregory Neil Shapiro 	       && ch >= 32 && ch < 127)
72*d39bd2c1SGregory Neil Shapiro 	{
73*d39bd2c1SGregory Neil Shapiro 		n++;
74*d39bd2c1SGregory Neil Shapiro 		str++;
75*d39bd2c1SGregory Neil Shapiro 	}
76*d39bd2c1SGregory Neil Shapiro 	return n == len || ch == '\0';
77*d39bd2c1SGregory Neil Shapiro }
782fb4f839SGregory Neil Shapiro #endif /* USE_EAI */
792fb4f839SGregory Neil Shapiro 
802fb4f839SGregory Neil Shapiro /*
812fb4f839SGregory Neil Shapiro **  MAKELOWER -- Translate a line into lower case
822fb4f839SGregory Neil Shapiro **
832fb4f839SGregory Neil Shapiro **	Parameters:
842fb4f839SGregory Neil Shapiro **		p -- string to translate (modified in place if possible). [A]
852fb4f839SGregory Neil Shapiro **
862fb4f839SGregory Neil Shapiro **	Returns:
872fb4f839SGregory Neil Shapiro **		lower cased string
882fb4f839SGregory Neil Shapiro **
892fb4f839SGregory Neil Shapiro **	Side Effects:
902fb4f839SGregory Neil Shapiro **		String p is translated to lower case if possible.
912fb4f839SGregory Neil Shapiro */
922fb4f839SGregory Neil Shapiro 
932fb4f839SGregory Neil Shapiro char *
makelower(p)942fb4f839SGregory Neil Shapiro makelower(p)
952fb4f839SGregory Neil Shapiro 	char *p;
962fb4f839SGregory Neil Shapiro {
972fb4f839SGregory Neil Shapiro 	char c;
982fb4f839SGregory Neil Shapiro 	char *orig;
992fb4f839SGregory Neil Shapiro 
1002fb4f839SGregory Neil Shapiro 	if (p == NULL)
1012fb4f839SGregory Neil Shapiro 		return p;
1022fb4f839SGregory Neil Shapiro 	orig = p;
1032fb4f839SGregory Neil Shapiro #if USE_EAI
1042fb4f839SGregory Neil Shapiro 	if (!asciistr(p))
1052fb4f839SGregory Neil Shapiro 		return (char *)sm_lowercase(p);
1062fb4f839SGregory Neil Shapiro #endif
1072fb4f839SGregory Neil Shapiro 	for (; (c = *p) != '\0'; p++)
1082fb4f839SGregory Neil Shapiro 		if (isascii(c) && isupper(c))
1092fb4f839SGregory Neil Shapiro 			*p = tolower(c);
1102fb4f839SGregory Neil Shapiro 	return orig;
1112fb4f839SGregory Neil Shapiro }
1122fb4f839SGregory Neil Shapiro 
1132fb4f839SGregory Neil Shapiro #if USE_EAI
1142fb4f839SGregory Neil Shapiro /*
1152fb4f839SGregory Neil Shapiro **  SM_LOWERCASE -- lower case a UTF-8 string
1162fb4f839SGregory Neil Shapiro **	Note: this should ONLY be applied to a UTF-8 string,
1172fb4f839SGregory Neil Shapiro **	i.e., the caller should check first if it isn't an ASCII string.
1182fb4f839SGregory Neil Shapiro **
1192fb4f839SGregory Neil Shapiro **	Parameters:
1202fb4f839SGregory Neil Shapiro **		str -- original string
1212fb4f839SGregory Neil Shapiro **
1222fb4f839SGregory Neil Shapiro **	Returns:
1232fb4f839SGregory Neil Shapiro **		lower case version of string [S]
1242fb4f839SGregory Neil Shapiro **
1252fb4f839SGregory Neil Shapiro **	How to return an error description due to failed unicode calls?
1262fb4f839SGregory Neil Shapiro **	However, is that even relevant?
1272fb4f839SGregory Neil Shapiro */
1282fb4f839SGregory Neil Shapiro 
1292fb4f839SGregory Neil Shapiro char *
sm_lowercase(str)1302fb4f839SGregory Neil Shapiro sm_lowercase(str)
1312fb4f839SGregory Neil Shapiro 	const char *str;
1322fb4f839SGregory Neil Shapiro {
1332fb4f839SGregory Neil Shapiro 	int olen, ilen;
1342fb4f839SGregory Neil Shapiro 	UErrorCode error;
1352fb4f839SGregory Neil Shapiro 	ssize_t req;
1362fb4f839SGregory Neil Shapiro 	int n;
1372fb4f839SGregory Neil Shapiro 	static UCaseMap *csm = NULL;
1382fb4f839SGregory Neil Shapiro 	static char *out = NULL;
1392fb4f839SGregory Neil Shapiro 	static int outlen = 0;
1402fb4f839SGregory Neil Shapiro 
1412fb4f839SGregory Neil Shapiro # if SM_CHECK_REQUIRE
1422fb4f839SGregory Neil Shapiro 	if (sm_debug_active(&SmExpensiveRequire, 3))
1432fb4f839SGregory Neil Shapiro 		SM_REQUIRE(!asciistr(str));
1442fb4f839SGregory Neil Shapiro # endif
1452fb4f839SGregory Neil Shapiro 	/* an empty string is always ASCII */
1462fb4f839SGregory Neil Shapiro 	SM_REQUIRE(NULL != str && '\0' != *str);
1472fb4f839SGregory Neil Shapiro 
1482fb4f839SGregory Neil Shapiro 	if (NULL == csm)
1492fb4f839SGregory Neil Shapiro 	{
1502fb4f839SGregory Neil Shapiro 		error = U_ZERO_ERROR;
1512fb4f839SGregory Neil Shapiro 		csm = ucasemap_open("en_US", U_FOLD_CASE_DEFAULT, &error);
1522fb4f839SGregory Neil Shapiro 		if (U_SUCCESS(error) == 0)
1532fb4f839SGregory Neil Shapiro 		{
1542fb4f839SGregory Neil Shapiro 			/* syserr("ucasemap_open error: %s", u_errorName(error)); */
1552fb4f839SGregory Neil Shapiro 			return NULL;
1562fb4f839SGregory Neil Shapiro 		}
1572fb4f839SGregory Neil Shapiro 	}
1582fb4f839SGregory Neil Shapiro 
1592fb4f839SGregory Neil Shapiro 	ilen = strlen(str);
1602fb4f839SGregory Neil Shapiro 	olen = ilen + 1;
1612fb4f839SGregory Neil Shapiro 	if (olen > outlen)
1622fb4f839SGregory Neil Shapiro 	{
1632fb4f839SGregory Neil Shapiro 		outlen = olen;
1642fb4f839SGregory Neil Shapiro 		out = sm_realloc_x(out, outlen);
1652fb4f839SGregory Neil Shapiro 	}
1662fb4f839SGregory Neil Shapiro 
1672fb4f839SGregory Neil Shapiro 	for (n = 0; n < 3; n++)
1682fb4f839SGregory Neil Shapiro 	{
1692fb4f839SGregory Neil Shapiro 		error = U_ZERO_ERROR;
1702fb4f839SGregory Neil Shapiro 		req = ucasemap_utf8FoldCase(csm, out, olen, str, ilen, &error);
1712fb4f839SGregory Neil Shapiro 		if (U_SUCCESS(error))
1722fb4f839SGregory Neil Shapiro 		{
1732fb4f839SGregory Neil Shapiro 			if (req >= olen)
1742fb4f839SGregory Neil Shapiro 			{
1752fb4f839SGregory Neil Shapiro 				outlen = req + 1;
1762fb4f839SGregory Neil Shapiro 				out = sm_realloc_x(out, outlen);
1772fb4f839SGregory Neil Shapiro 				out[req] = '\0';
1782fb4f839SGregory Neil Shapiro 			}
1792fb4f839SGregory Neil Shapiro 			break;
1802fb4f839SGregory Neil Shapiro 		}
1812fb4f839SGregory Neil Shapiro 		else if (error == U_BUFFER_OVERFLOW_ERROR)
1822fb4f839SGregory Neil Shapiro 		{
1832fb4f839SGregory Neil Shapiro 			outlen = req + 1;
1842fb4f839SGregory Neil Shapiro 			out = sm_realloc_x(out, outlen);
1852fb4f839SGregory Neil Shapiro 			olen = outlen;
1862fb4f839SGregory Neil Shapiro 		}
1872fb4f839SGregory Neil Shapiro 		else
1882fb4f839SGregory Neil Shapiro 		{
1892fb4f839SGregory Neil Shapiro 			/* syserr("conversion error for \"%s\": %s", str, u_errorName(error)); */
1902fb4f839SGregory Neil Shapiro 			return NULL;
1912fb4f839SGregory Neil Shapiro 		}
1922fb4f839SGregory Neil Shapiro 	}
1932fb4f839SGregory Neil Shapiro 	return out;
1942fb4f839SGregory Neil Shapiro }
1952fb4f839SGregory Neil Shapiro #endif /* USE_EAI */
196