12fb4f839SGregory Neil Shapiro /*
22fb4f839SGregory Neil Shapiro * Copyright (c) 2020 Proofpoint, Inc. and its suppliers.
32fb4f839SGregory Neil Shapiro * All rights reserved.
42fb4f839SGregory Neil Shapiro *
52fb4f839SGregory Neil Shapiro * By using this file, you agree to the terms and conditions set
62fb4f839SGregory Neil Shapiro * forth in the LICENSE file which can be found at the top level of
72fb4f839SGregory Neil Shapiro * the sendmail distribution.
82fb4f839SGregory Neil Shapiro *
92fb4f839SGregory Neil Shapiro */
102fb4f839SGregory Neil Shapiro
112fb4f839SGregory Neil Shapiro #include <sm/gen.h>
122fb4f839SGregory Neil Shapiro #include <sm/sendmail.h>
132fb4f839SGregory Neil Shapiro
142fb4f839SGregory Neil Shapiro #include <ctype.h>
152fb4f839SGregory Neil Shapiro #include <sm/string.h>
162fb4f839SGregory Neil Shapiro #include <sm/heap.h>
172fb4f839SGregory Neil Shapiro #if USE_EAI
18*d39bd2c1SGregory Neil Shapiro # include <sm/limits.h>
192fb4f839SGregory Neil Shapiro # include <unicode/ucasemap.h>
202fb4f839SGregory Neil Shapiro # include <unicode/ustring.h>
212fb4f839SGregory Neil Shapiro # include <unicode/uchar.h>
22*d39bd2c1SGregory Neil Shapiro # include <sm/ixlen.h>
232fb4f839SGregory Neil Shapiro
242fb4f839SGregory Neil Shapiro /*
252fb4f839SGregory Neil Shapiro ** ASCIISTR -- check whether a string is printable ASCII
262fb4f839SGregory Neil Shapiro **
272fb4f839SGregory Neil Shapiro ** Parameters:
282fb4f839SGregory Neil Shapiro ** str -- string
292fb4f839SGregory Neil Shapiro **
302fb4f839SGregory Neil Shapiro ** Returns:
312fb4f839SGregory Neil Shapiro ** TRUE iff printable ASCII
322fb4f839SGregory Neil Shapiro */
332fb4f839SGregory Neil Shapiro
342fb4f839SGregory Neil Shapiro bool
asciistr(str)352fb4f839SGregory Neil Shapiro asciistr(str)
362fb4f839SGregory Neil Shapiro const char *str;
372fb4f839SGregory Neil Shapiro {
382fb4f839SGregory Neil Shapiro unsigned char ch;
392fb4f839SGregory Neil Shapiro
402fb4f839SGregory Neil Shapiro if (str == NULL)
412fb4f839SGregory Neil Shapiro return true;
422fb4f839SGregory Neil Shapiro while ((ch = (unsigned char)*str) != '\0' && ch >= 32 && ch < 127)
432fb4f839SGregory Neil Shapiro str++;
442fb4f839SGregory Neil Shapiro return ch == '\0';
452fb4f839SGregory Neil Shapiro }
46*d39bd2c1SGregory Neil Shapiro
47*d39bd2c1SGregory Neil Shapiro /*
48*d39bd2c1SGregory Neil Shapiro ** ASCIINSTR -- check whether a string is printable ASCII up to len
49*d39bd2c1SGregory Neil Shapiro **
50*d39bd2c1SGregory Neil Shapiro ** Parameters:
51*d39bd2c1SGregory Neil Shapiro ** str -- string
52*d39bd2c1SGregory Neil Shapiro ** len -- length to check
53*d39bd2c1SGregory Neil Shapiro **
54*d39bd2c1SGregory Neil Shapiro ** Returns:
55*d39bd2c1SGregory Neil Shapiro ** TRUE iff printable ASCII
56*d39bd2c1SGregory Neil Shapiro */
57*d39bd2c1SGregory Neil Shapiro
58*d39bd2c1SGregory Neil Shapiro bool
asciinstr(str,len)59*d39bd2c1SGregory Neil Shapiro asciinstr(str, len)
60*d39bd2c1SGregory Neil Shapiro const char *str;
61*d39bd2c1SGregory Neil Shapiro size_t len;
62*d39bd2c1SGregory Neil Shapiro {
63*d39bd2c1SGregory Neil Shapiro unsigned char ch;
64*d39bd2c1SGregory Neil Shapiro int n;
65*d39bd2c1SGregory Neil Shapiro
66*d39bd2c1SGregory Neil Shapiro if (str == NULL)
67*d39bd2c1SGregory Neil Shapiro return true;
68*d39bd2c1SGregory Neil Shapiro SM_REQUIRE(len < INT_MAX);
69*d39bd2c1SGregory Neil Shapiro n = 0;
70*d39bd2c1SGregory Neil Shapiro while (n < len && (ch = (unsigned char)*str) != '\0'
71*d39bd2c1SGregory Neil Shapiro && ch >= 32 && ch < 127)
72*d39bd2c1SGregory Neil Shapiro {
73*d39bd2c1SGregory Neil Shapiro n++;
74*d39bd2c1SGregory Neil Shapiro str++;
75*d39bd2c1SGregory Neil Shapiro }
76*d39bd2c1SGregory Neil Shapiro return n == len || ch == '\0';
77*d39bd2c1SGregory Neil Shapiro }
782fb4f839SGregory Neil Shapiro #endif /* USE_EAI */
792fb4f839SGregory Neil Shapiro
802fb4f839SGregory Neil Shapiro /*
812fb4f839SGregory Neil Shapiro ** MAKELOWER -- Translate a line into lower case
822fb4f839SGregory Neil Shapiro **
832fb4f839SGregory Neil Shapiro ** Parameters:
842fb4f839SGregory Neil Shapiro ** p -- string to translate (modified in place if possible). [A]
852fb4f839SGregory Neil Shapiro **
862fb4f839SGregory Neil Shapiro ** Returns:
872fb4f839SGregory Neil Shapiro ** lower cased string
882fb4f839SGregory Neil Shapiro **
892fb4f839SGregory Neil Shapiro ** Side Effects:
902fb4f839SGregory Neil Shapiro ** String p is translated to lower case if possible.
912fb4f839SGregory Neil Shapiro */
922fb4f839SGregory Neil Shapiro
932fb4f839SGregory Neil Shapiro char *
makelower(p)942fb4f839SGregory Neil Shapiro makelower(p)
952fb4f839SGregory Neil Shapiro char *p;
962fb4f839SGregory Neil Shapiro {
972fb4f839SGregory Neil Shapiro char c;
982fb4f839SGregory Neil Shapiro char *orig;
992fb4f839SGregory Neil Shapiro
1002fb4f839SGregory Neil Shapiro if (p == NULL)
1012fb4f839SGregory Neil Shapiro return p;
1022fb4f839SGregory Neil Shapiro orig = p;
1032fb4f839SGregory Neil Shapiro #if USE_EAI
1042fb4f839SGregory Neil Shapiro if (!asciistr(p))
1052fb4f839SGregory Neil Shapiro return (char *)sm_lowercase(p);
1062fb4f839SGregory Neil Shapiro #endif
1072fb4f839SGregory Neil Shapiro for (; (c = *p) != '\0'; p++)
1082fb4f839SGregory Neil Shapiro if (isascii(c) && isupper(c))
1092fb4f839SGregory Neil Shapiro *p = tolower(c);
1102fb4f839SGregory Neil Shapiro return orig;
1112fb4f839SGregory Neil Shapiro }
1122fb4f839SGregory Neil Shapiro
1132fb4f839SGregory Neil Shapiro #if USE_EAI
1142fb4f839SGregory Neil Shapiro /*
1152fb4f839SGregory Neil Shapiro ** SM_LOWERCASE -- lower case a UTF-8 string
1162fb4f839SGregory Neil Shapiro ** Note: this should ONLY be applied to a UTF-8 string,
1172fb4f839SGregory Neil Shapiro ** i.e., the caller should check first if it isn't an ASCII string.
1182fb4f839SGregory Neil Shapiro **
1192fb4f839SGregory Neil Shapiro ** Parameters:
1202fb4f839SGregory Neil Shapiro ** str -- original string
1212fb4f839SGregory Neil Shapiro **
1222fb4f839SGregory Neil Shapiro ** Returns:
1232fb4f839SGregory Neil Shapiro ** lower case version of string [S]
1242fb4f839SGregory Neil Shapiro **
1252fb4f839SGregory Neil Shapiro ** How to return an error description due to failed unicode calls?
1262fb4f839SGregory Neil Shapiro ** However, is that even relevant?
1272fb4f839SGregory Neil Shapiro */
1282fb4f839SGregory Neil Shapiro
1292fb4f839SGregory Neil Shapiro char *
sm_lowercase(str)1302fb4f839SGregory Neil Shapiro sm_lowercase(str)
1312fb4f839SGregory Neil Shapiro const char *str;
1322fb4f839SGregory Neil Shapiro {
1332fb4f839SGregory Neil Shapiro int olen, ilen;
1342fb4f839SGregory Neil Shapiro UErrorCode error;
1352fb4f839SGregory Neil Shapiro ssize_t req;
1362fb4f839SGregory Neil Shapiro int n;
1372fb4f839SGregory Neil Shapiro static UCaseMap *csm = NULL;
1382fb4f839SGregory Neil Shapiro static char *out = NULL;
1392fb4f839SGregory Neil Shapiro static int outlen = 0;
1402fb4f839SGregory Neil Shapiro
1412fb4f839SGregory Neil Shapiro # if SM_CHECK_REQUIRE
1422fb4f839SGregory Neil Shapiro if (sm_debug_active(&SmExpensiveRequire, 3))
1432fb4f839SGregory Neil Shapiro SM_REQUIRE(!asciistr(str));
1442fb4f839SGregory Neil Shapiro # endif
1452fb4f839SGregory Neil Shapiro /* an empty string is always ASCII */
1462fb4f839SGregory Neil Shapiro SM_REQUIRE(NULL != str && '\0' != *str);
1472fb4f839SGregory Neil Shapiro
1482fb4f839SGregory Neil Shapiro if (NULL == csm)
1492fb4f839SGregory Neil Shapiro {
1502fb4f839SGregory Neil Shapiro error = U_ZERO_ERROR;
1512fb4f839SGregory Neil Shapiro csm = ucasemap_open("en_US", U_FOLD_CASE_DEFAULT, &error);
1522fb4f839SGregory Neil Shapiro if (U_SUCCESS(error) == 0)
1532fb4f839SGregory Neil Shapiro {
1542fb4f839SGregory Neil Shapiro /* syserr("ucasemap_open error: %s", u_errorName(error)); */
1552fb4f839SGregory Neil Shapiro return NULL;
1562fb4f839SGregory Neil Shapiro }
1572fb4f839SGregory Neil Shapiro }
1582fb4f839SGregory Neil Shapiro
1592fb4f839SGregory Neil Shapiro ilen = strlen(str);
1602fb4f839SGregory Neil Shapiro olen = ilen + 1;
1612fb4f839SGregory Neil Shapiro if (olen > outlen)
1622fb4f839SGregory Neil Shapiro {
1632fb4f839SGregory Neil Shapiro outlen = olen;
1642fb4f839SGregory Neil Shapiro out = sm_realloc_x(out, outlen);
1652fb4f839SGregory Neil Shapiro }
1662fb4f839SGregory Neil Shapiro
1672fb4f839SGregory Neil Shapiro for (n = 0; n < 3; n++)
1682fb4f839SGregory Neil Shapiro {
1692fb4f839SGregory Neil Shapiro error = U_ZERO_ERROR;
1702fb4f839SGregory Neil Shapiro req = ucasemap_utf8FoldCase(csm, out, olen, str, ilen, &error);
1712fb4f839SGregory Neil Shapiro if (U_SUCCESS(error))
1722fb4f839SGregory Neil Shapiro {
1732fb4f839SGregory Neil Shapiro if (req >= olen)
1742fb4f839SGregory Neil Shapiro {
1752fb4f839SGregory Neil Shapiro outlen = req + 1;
1762fb4f839SGregory Neil Shapiro out = sm_realloc_x(out, outlen);
1772fb4f839SGregory Neil Shapiro out[req] = '\0';
1782fb4f839SGregory Neil Shapiro }
1792fb4f839SGregory Neil Shapiro break;
1802fb4f839SGregory Neil Shapiro }
1812fb4f839SGregory Neil Shapiro else if (error == U_BUFFER_OVERFLOW_ERROR)
1822fb4f839SGregory Neil Shapiro {
1832fb4f839SGregory Neil Shapiro outlen = req + 1;
1842fb4f839SGregory Neil Shapiro out = sm_realloc_x(out, outlen);
1852fb4f839SGregory Neil Shapiro olen = outlen;
1862fb4f839SGregory Neil Shapiro }
1872fb4f839SGregory Neil Shapiro else
1882fb4f839SGregory Neil Shapiro {
1892fb4f839SGregory Neil Shapiro /* syserr("conversion error for \"%s\": %s", str, u_errorName(error)); */
1902fb4f839SGregory Neil Shapiro return NULL;
1912fb4f839SGregory Neil Shapiro }
1922fb4f839SGregory Neil Shapiro }
1932fb4f839SGregory Neil Shapiro return out;
1942fb4f839SGregory Neil Shapiro }
1952fb4f839SGregory Neil Shapiro #endif /* USE_EAI */
196