1 /*
2     SPDX-FileCopyrightText: 2004 Matt Douhan <matt@fruitsalad.org>
3 
4     SPDX-License-Identifier: LGPL-2.0-or-later
5 */
6 
7 #include "kemailaddress.h"
8 #include "kcodecs.h"
9 #include "kcodecs_debug.h"
10 
11 #include <QRegularExpression>
12 
13 using namespace KEmailAddress;
14 
15 //-----------------------------------------------------------------------------
splitAddressList(const QString & aStr)16 QStringList KEmailAddress::splitAddressList(const QString &aStr)
17 {
18     // Features:
19     // - always ignores quoted characters
20     // - ignores everything (including parentheses and commas)
21     //   inside quoted strings
22     // - supports nested comments
23     // - ignores everything (including double quotes and commas)
24     //   inside comments
25 
26     QStringList list;
27 
28     if (aStr.isEmpty()) {
29         return list;
30     }
31 
32     QString addr;
33     uint addrstart = 0;
34     int commentlevel = 0;
35     bool insidequote = false;
36 
37     for (int index = 0; index < aStr.length(); index++) {
38         // the following conversion to latin1 is o.k. because
39         // we can safely ignore all non-latin1 characters
40         switch (aStr[index].toLatin1()) {
41         case '"': // start or end of quoted string
42             if (commentlevel == 0) {
43                 insidequote = !insidequote;
44             }
45             break;
46         case '(': // start of comment
47             if (!insidequote) {
48                 ++commentlevel;
49             }
50             break;
51         case ')': // end of comment
52             if (!insidequote) {
53                 if (commentlevel > 0) {
54                     --commentlevel;
55                 } else {
56                     return list;
57                 }
58             }
59             break;
60         case '\\': // quoted character
61             index++; // ignore the quoted character
62             break;
63         case ',':
64         case ';':
65             if (!insidequote && (commentlevel == 0)) {
66                 addr = aStr.mid(addrstart, index - addrstart);
67                 if (!addr.isEmpty()) {
68                     list += addr.simplified();
69                 }
70                 addrstart = index + 1;
71             }
72             break;
73         }
74     }
75     // append the last address to the list
76     if (!insidequote && (commentlevel == 0)) {
77         addr = aStr.mid(addrstart, aStr.length() - addrstart);
78         if (!addr.isEmpty()) {
79             list += addr.simplified();
80         }
81     }
82 
83     return list;
84 }
85 
86 //-----------------------------------------------------------------------------
87 // Used by KEmailAddress::splitAddress(...) and KEmailAddress::firstEmailAddress(...).
88 KEmailAddress::EmailParseResult
splitAddressInternal(const QByteArray & address,QByteArray & displayName,QByteArray & addrSpec,QByteArray & comment,bool allowMultipleAddresses)89 splitAddressInternal(const QByteArray &address, QByteArray &displayName, QByteArray &addrSpec, QByteArray &comment, bool allowMultipleAddresses)
90 {
91     //  qCDebug(KCODECS_LOG) << "address";
92     displayName = "";
93     addrSpec = "";
94     comment = "";
95 
96     if (address.isEmpty()) {
97         return AddressEmpty;
98     }
99 
100     // The following is a primitive parser for a mailbox-list (cf. RFC 2822).
101     // The purpose is to extract a displayable string from the mailboxes.
102     // Comments in the addr-spec are not handled. No error checking is done.
103 
104     enum {
105         TopLevel,
106         InComment,
107         InAngleAddress,
108     } context = TopLevel;
109     bool inQuotedString = false;
110     int commentLevel = 0;
111     bool stop = false;
112 
113     for (const char *p = address.data(); *p && !stop; ++p) {
114         switch (context) {
115         case TopLevel: {
116             switch (*p) {
117             case '"':
118                 inQuotedString = !inQuotedString;
119                 displayName += *p;
120                 break;
121             case '(':
122                 if (!inQuotedString) {
123                     context = InComment;
124                     commentLevel = 1;
125                 } else {
126                     displayName += *p;
127                 }
128                 break;
129             case '<':
130                 if (!inQuotedString) {
131                     context = InAngleAddress;
132                 } else {
133                     displayName += *p;
134                 }
135                 break;
136             case '\\': // quoted character
137                 displayName += *p;
138                 ++p; // skip the '\'
139                 if (*p) {
140                     displayName += *p;
141                 } else {
142                     return UnexpectedEnd;
143                 }
144                 break;
145             case ',':
146                 if (!inQuotedString) {
147                     if (allowMultipleAddresses) {
148                         stop = true;
149                     } else {
150                         return UnexpectedComma;
151                     }
152                 } else {
153                     displayName += *p;
154                 }
155                 break;
156             default:
157                 displayName += *p;
158             }
159             break;
160         }
161         case InComment: {
162             switch (*p) {
163             case '(':
164                 ++commentLevel;
165                 comment += *p;
166                 break;
167             case ')':
168                 --commentLevel;
169                 if (commentLevel == 0) {
170                     context = TopLevel;
171                     comment += ' '; // separate the text of several comments
172                 } else {
173                     comment += *p;
174                 }
175                 break;
176             case '\\': // quoted character
177                 comment += *p;
178                 ++p; // skip the '\'
179                 if (*p) {
180                     comment += *p;
181                 } else {
182                     return UnexpectedEnd;
183                 }
184                 break;
185             default:
186                 comment += *p;
187             }
188             break;
189         }
190         case InAngleAddress: {
191             switch (*p) {
192             case '"':
193                 inQuotedString = !inQuotedString;
194                 addrSpec += *p;
195                 break;
196             case '>':
197                 if (!inQuotedString) {
198                     context = TopLevel;
199                 } else {
200                     addrSpec += *p;
201                 }
202                 break;
203             case '\\': // quoted character
204                 addrSpec += *p;
205                 ++p; // skip the '\'
206                 if (*p) {
207                     addrSpec += *p;
208                 } else {
209                     return UnexpectedEnd;
210                 }
211                 break;
212             default:
213                 addrSpec += *p;
214             }
215             break;
216         }
217         } // switch ( context )
218     }
219     // check for errors
220     if (inQuotedString) {
221         return UnbalancedQuote;
222     }
223     if (context == InComment) {
224         return UnbalancedParens;
225     }
226     if (context == InAngleAddress) {
227         return UnclosedAngleAddr;
228     }
229 
230     displayName = displayName.trimmed();
231     comment = comment.trimmed();
232     addrSpec = addrSpec.trimmed();
233 
234     if (addrSpec.isEmpty()) {
235         if (displayName.isEmpty()) {
236             return NoAddressSpec;
237         } else {
238             addrSpec = displayName;
239             displayName.truncate(0);
240         }
241     }
242     /*
243       qCDebug(KCODECS_LOG) << "display-name : \"" << displayName << "\"";
244       qCDebug(KCODECS_LOG) << "comment      : \"" << comment << "\"";
245       qCDebug(KCODECS_LOG) << "addr-spec    : \"" << addrSpec << "\"";
246     */
247     return AddressOk;
248 }
249 
250 //-----------------------------------------------------------------------------
splitAddress(const QByteArray & address,QByteArray & displayName,QByteArray & addrSpec,QByteArray & comment)251 EmailParseResult KEmailAddress::splitAddress(const QByteArray &address, QByteArray &displayName, QByteArray &addrSpec, QByteArray &comment)
252 {
253     return splitAddressInternal(address, displayName, addrSpec, comment, false /* don't allow multiple addresses */);
254 }
255 
256 //-----------------------------------------------------------------------------
splitAddress(const QString & address,QString & displayName,QString & addrSpec,QString & comment)257 EmailParseResult KEmailAddress::splitAddress(const QString &address, QString &displayName, QString &addrSpec, QString &comment)
258 {
259     QByteArray d;
260     QByteArray a;
261     QByteArray c;
262     // FIXME: toUtf8() is probably not safe here, what if the second byte of a multi-byte character
263     //        has the same code as one of the ASCII characters that splitAddress uses as delimiters?
264     EmailParseResult result = splitAddress(address.toUtf8(), d, a, c);
265 
266     if (result == AddressOk) {
267         displayName = QString::fromUtf8(d);
268         addrSpec = QString::fromUtf8(a);
269         comment = QString::fromUtf8(c);
270     }
271     return result;
272 }
273 
274 //-----------------------------------------------------------------------------
isValidAddress(const QString & aStr)275 EmailParseResult KEmailAddress::isValidAddress(const QString &aStr)
276 {
277     // If we are passed an empty string bail right away no need to process
278     // further and waste resources
279     if (aStr.isEmpty()) {
280         return AddressEmpty;
281     }
282 
283     // count how many @'s are in the string that is passed to us
284     // if 0 or > 1 take action
285     // at this point to many @'s cannot bail out right away since
286     // @ is allowed in quotes, so we use a bool to keep track
287     // and then make a judgment further down in the parser
288 
289     bool tooManyAtsFlag = false;
290 
291     int atCount = aStr.count(QLatin1Char('@'));
292     if (atCount > 1) {
293         tooManyAtsFlag = true;
294     } else if (atCount == 0) {
295         return TooFewAts;
296     }
297 
298     int dotCount = aStr.count(QLatin1Char('.'));
299 
300     // The main parser, try and catch all weird and wonderful
301     // mistakes users and/or machines can create
302 
303     enum {
304         TopLevel,
305         InComment,
306         InAngleAddress,
307     } context = TopLevel;
308     bool inQuotedString = false;
309     int commentLevel = 0;
310 
311     unsigned int strlen = aStr.length();
312 
313     for (unsigned int index = 0; index < strlen; index++) {
314         switch (context) {
315         case TopLevel: {
316             switch (aStr[index].toLatin1()) {
317             case '"':
318                 inQuotedString = !inQuotedString;
319                 break;
320             case '(':
321                 if (!inQuotedString) {
322                     context = InComment;
323                     commentLevel = 1;
324                 }
325                 break;
326             case '[':
327                 if (!inQuotedString) {
328                     return InvalidDisplayName;
329                 }
330                 break;
331             case ']':
332                 if (!inQuotedString) {
333                     return InvalidDisplayName;
334                 }
335                 break;
336             case ':':
337                 if (!inQuotedString) {
338                     return DisallowedChar;
339                 }
340                 break;
341             case '<':
342                 if (!inQuotedString) {
343                     context = InAngleAddress;
344                 }
345                 break;
346             case '\\': // quoted character
347                 ++index; // skip the '\'
348                 if ((index + 1) > strlen) {
349                     return UnexpectedEnd;
350                 }
351                 break;
352             case ',':
353                 if (!inQuotedString) {
354                     return UnexpectedComma;
355                 }
356                 break;
357             case ')':
358                 if (!inQuotedString) {
359                     return UnbalancedParens;
360                 }
361                 break;
362             case '>':
363                 if (!inQuotedString) {
364                     return UnopenedAngleAddr;
365                 }
366                 break;
367             case '@':
368                 if (!inQuotedString) {
369                     if (index == 0) { // Missing local part
370                         return MissingLocalPart;
371                     } else if (index == strlen - 1) {
372                         return MissingDomainPart;
373                     }
374                 } else if (inQuotedString) {
375                     --atCount;
376                     if (atCount == 1) {
377                         tooManyAtsFlag = false;
378                     }
379                 }
380                 break;
381             case '.':
382                 if (inQuotedString) {
383                     --dotCount;
384                 }
385                 break;
386             }
387             break;
388         }
389         case InComment: {
390             switch (aStr[index].toLatin1()) {
391             case '(':
392                 ++commentLevel;
393                 break;
394             case ')':
395                 --commentLevel;
396                 if (commentLevel == 0) {
397                     context = TopLevel;
398                 }
399                 break;
400             case '\\': // quoted character
401                 ++index; // skip the '\'
402                 if ((index + 1) > strlen) {
403                     return UnexpectedEnd;
404                 }
405                 break;
406             }
407             break;
408         }
409 
410         case InAngleAddress: {
411             switch (aStr[index].toLatin1()) {
412             case ',':
413                 if (!inQuotedString) {
414                     return UnexpectedComma;
415                 }
416                 break;
417             case '"':
418                 inQuotedString = !inQuotedString;
419                 break;
420             case '@':
421                 if (inQuotedString) {
422                     --atCount;
423                 }
424                 if (atCount == 1) {
425                     tooManyAtsFlag = false;
426                 }
427                 break;
428             case '.':
429                 if (inQuotedString) {
430                     --dotCount;
431                 }
432                 break;
433             case '>':
434                 if (!inQuotedString) {
435                     context = TopLevel;
436                     break;
437                 }
438                 break;
439             case '\\': // quoted character
440                 ++index; // skip the '\'
441                 if ((index + 1) > strlen) {
442                     return UnexpectedEnd;
443                 }
444                 break;
445             }
446             break;
447         }
448         }
449     }
450 
451     if (dotCount == 0 && !inQuotedString) {
452         return TooFewDots;
453     }
454 
455     if (atCount == 0 && !inQuotedString) {
456         return TooFewAts;
457     }
458 
459     if (inQuotedString) {
460         return UnbalancedQuote;
461     }
462 
463     if (context == InComment) {
464         return UnbalancedParens;
465     }
466 
467     if (context == InAngleAddress) {
468         return UnclosedAngleAddr;
469     }
470 
471     if (tooManyAtsFlag) {
472         return TooManyAts;
473     }
474 
475     return AddressOk;
476 }
477 
478 //-----------------------------------------------------------------------------
isValidAddressList(const QString & aStr,QString & badAddr)479 KEmailAddress::EmailParseResult KEmailAddress::isValidAddressList(const QString &aStr, QString &badAddr)
480 {
481     if (aStr.isEmpty()) {
482         return AddressEmpty;
483     }
484 
485     const QStringList list = splitAddressList(aStr);
486     EmailParseResult errorCode = AddressOk;
487     auto it = std::find_if(list.cbegin(), list.cend(), [&errorCode](const QString &addr) {
488         qCDebug(KCODECS_LOG) << " address" << addr;
489         errorCode = isValidAddress(addr);
490         return errorCode != AddressOk;
491     });
492     if (it != list.cend()) {
493         badAddr = *it;
494     }
495     return errorCode;
496 }
497 
498 //-----------------------------------------------------------------------------
emailParseResultToString(EmailParseResult errorCode)499 QString KEmailAddress::emailParseResultToString(EmailParseResult errorCode)
500 {
501     switch (errorCode) {
502     case TooManyAts:
503         return QObject::tr(
504             "The email address you entered is not valid because it "
505             "contains more than one @.\n"
506             "You will not create valid messages if you do not "
507             "change your address.");
508     case TooFewAts:
509         return QObject::tr(
510             "The email address you entered is not valid because it "
511             "does not contain a @.\n"
512             "You will not create valid messages if you do not "
513             "change your address.");
514     case AddressEmpty:
515         return QObject::tr("You have to enter something in the email address field.");
516     case MissingLocalPart:
517         return QObject::tr(
518             "The email address you entered is not valid because it "
519             "does not contain a local part.");
520     case MissingDomainPart:
521         return QObject::tr(
522             "The email address you entered is not valid because it "
523             "does not contain a domain part.");
524     case UnbalancedParens:
525         return QObject::tr(
526             "The email address you entered is not valid because it "
527             "contains unclosed comments/brackets.");
528     case AddressOk:
529         return QObject::tr("The email address you entered is valid.");
530     case UnclosedAngleAddr:
531         return QObject::tr(
532             "The email address you entered is not valid because it "
533             "contains an unclosed angle bracket.");
534     case UnopenedAngleAddr:
535         return QObject::tr(
536             "The email address you entered is not valid because it "
537             "contains too many closing angle brackets.");
538     case UnexpectedComma:
539         return QObject::tr(
540             "The email address you have entered is not valid because it "
541             "contains an unexpected comma.");
542     case UnexpectedEnd:
543         return QObject::tr(
544             "The email address you entered is not valid because it ended "
545             "unexpectedly.\nThis probably means you have used an escaping "
546             "type character like a '\\' as the last character in your "
547             "email address.");
548     case UnbalancedQuote:
549         return QObject::tr(
550             "The email address you entered is not valid because it "
551             "contains quoted text which does not end.");
552     case NoAddressSpec:
553         return QObject::tr(
554             "The email address you entered is not valid because it "
555             "does not seem to contain an actual email address, i.e. "
556             "something of the form joe@example.org.");
557     case DisallowedChar:
558         return QObject::tr(
559             "The email address you entered is not valid because it "
560             "contains an illegal character.");
561     case InvalidDisplayName:
562         return QObject::tr(
563             "The email address you have entered is not valid because it "
564             "contains an invalid display name.");
565     case TooFewDots:
566         return QObject::tr(
567             "The email address you entered is not valid because it "
568             "does not contain a \'.\'.\n"
569             "You will not create valid messages if you do not "
570             "change your address.");
571     }
572     return QObject::tr("Unknown problem with email address");
573 }
574 
575 //-----------------------------------------------------------------------------
isValidSimpleAddress(const QString & aStr)576 bool KEmailAddress::isValidSimpleAddress(const QString &aStr)
577 {
578     // If we are passed an empty string bail right away no need to process further
579     // and waste resources
580     if (aStr.isEmpty()) {
581         return false;
582     }
583 
584     int atChar = aStr.lastIndexOf(QLatin1Char('@'));
585     QString domainPart = aStr.mid(atChar + 1);
586     QString localPart = aStr.left(atChar);
587 
588     // Both of these parts must be non empty
589     // after all we cannot have emails like:
590     // @kde.org, or  foo@
591     if (localPart.isEmpty() || domainPart.isEmpty()) {
592         return false;
593     }
594 
595     bool inQuotedString = false;
596     int atCount = localPart.count(QLatin1Char('@'));
597 
598     unsigned int strlen = localPart.length();
599     for (unsigned int index = 0; index < strlen; index++) {
600         switch (localPart[index].toLatin1()) {
601         case '"':
602             inQuotedString = !inQuotedString;
603             break;
604         case '@':
605             if (inQuotedString) {
606                 --atCount;
607             }
608             break;
609         }
610     }
611 
612     QString addrRx;
613 
614     if (localPart[0] == QLatin1Char('\"') || localPart[localPart.length() - 1] == QLatin1Char('\"')) {
615         addrRx = QStringLiteral("\"[a-zA-Z@]*[\\w.@-]*[a-zA-Z0-9@]\"@");
616     } else {
617         addrRx = QStringLiteral("[a-zA-Z]*[~|{}`\\^?=/+*'&%$#!_\\w.-]*[~|{}`\\^?=/+*'&%$#!_a-zA-Z0-9-]@");
618     }
619     if (domainPart[0] == QLatin1Char('[') || domainPart[domainPart.length() - 1] == QLatin1Char(']')) {
620         addrRx += QStringLiteral("\\[[0-9]{1,3}(\\.[0-9]{1,3}){3}\\]");
621     } else {
622         addrRx += QStringLiteral("[\\w#-]+(\\.[\\w#-]+)*");
623     }
624 
625     const QRegularExpression rx(QRegularExpression::anchoredPattern(addrRx), QRegularExpression::UseUnicodePropertiesOption);
626     return rx.match(aStr).hasMatch();
627 }
628 
629 //-----------------------------------------------------------------------------
simpleEmailAddressErrorMsg()630 QString KEmailAddress::simpleEmailAddressErrorMsg()
631 {
632     return QObject::tr(
633         "The email address you entered is not valid.\nIt "
634         "does not seem to contain an actual email address, i.e. "
635         "something of the form joe@example.org.");
636 }
637 
638 //-----------------------------------------------------------------------------
extractEmailAddress(const QByteArray & address)639 QByteArray KEmailAddress::extractEmailAddress(const QByteArray &address)
640 {
641     QString errorMessage;
642     return extractEmailAddress(address, errorMessage);
643 }
644 
extractEmailAddress(const QByteArray & address,QString & errorMessage)645 QByteArray KEmailAddress::extractEmailAddress(const QByteArray &address, QString &errorMessage)
646 {
647     QByteArray dummy1;
648     QByteArray dummy2;
649     QByteArray addrSpec;
650     const EmailParseResult result = splitAddressInternal(address, dummy1, addrSpec, dummy2, false /* don't allow multiple addresses */);
651     if (result != AddressOk) {
652         addrSpec = QByteArray();
653         if (result != AddressEmpty) {
654             errorMessage = emailParseResultToString(result);
655             qCDebug(KCODECS_LOG) << "Input:" << address << "\nError:" << errorMessage;
656         }
657     } else {
658         errorMessage.clear();
659     }
660 
661     return addrSpec;
662 }
663 
664 //-----------------------------------------------------------------------------
extractEmailAddress(const QString & address)665 QString KEmailAddress::extractEmailAddress(const QString &address)
666 {
667     QString errorMessage;
668     return extractEmailAddress(address, errorMessage);
669 }
670 
extractEmailAddress(const QString & address,QString & errorMessage)671 QString KEmailAddress::extractEmailAddress(const QString &address, QString &errorMessage)
672 {
673     return QString::fromUtf8(extractEmailAddress(address.toUtf8(), errorMessage));
674 }
675 
676 //-----------------------------------------------------------------------------
firstEmailAddress(const QByteArray & addresses)677 QByteArray KEmailAddress::firstEmailAddress(const QByteArray &addresses)
678 {
679     QString errorMessage;
680     return firstEmailAddress(addresses, errorMessage);
681 }
682 
firstEmailAddress(const QByteArray & addresses,QString & errorMessage)683 QByteArray KEmailAddress::firstEmailAddress(const QByteArray &addresses, QString &errorMessage)
684 {
685     QByteArray dummy1;
686     QByteArray dummy2;
687     QByteArray addrSpec;
688     const EmailParseResult result = splitAddressInternal(addresses, dummy1, addrSpec, dummy2, true /* allow multiple addresses */);
689     if (result != AddressOk) {
690         addrSpec = QByteArray();
691         if (result != AddressEmpty) {
692             errorMessage = emailParseResultToString(result);
693             qCDebug(KCODECS_LOG) << "Input: aStr\nError:" << errorMessage;
694         }
695     } else {
696         errorMessage.clear();
697     }
698 
699     return addrSpec;
700 }
701 
702 //-----------------------------------------------------------------------------
firstEmailAddress(const QString & addresses)703 QString KEmailAddress::firstEmailAddress(const QString &addresses)
704 {
705     QString errorMessage;
706     return firstEmailAddress(addresses, errorMessage);
707 }
708 
firstEmailAddress(const QString & addresses,QString & errorMessage)709 QString KEmailAddress::firstEmailAddress(const QString &addresses, QString &errorMessage)
710 {
711     return QString::fromUtf8(firstEmailAddress(addresses.toUtf8(), errorMessage));
712 }
713 
714 //-----------------------------------------------------------------------------
extractEmailAddressAndName(const QString & aStr,QString & mail,QString & name)715 bool KEmailAddress::extractEmailAddressAndName(const QString &aStr, QString &mail, QString &name)
716 {
717     name.clear();
718     mail.clear();
719 
720     const int len = aStr.length();
721     const char cQuotes = '"';
722 
723     bool bInComment = false;
724     bool bInQuotesOutsideOfEmail = false;
725     int i = 0;
726     int iAd = 0;
727     int iMailStart = 0;
728     int iMailEnd = 0;
729     QChar c;
730     unsigned int commentstack = 0;
731 
732     // Find the '@' of the email address
733     // skipping all '@' inside "(...)" comments:
734     while (i < len) {
735         c = aStr[i];
736         if (QLatin1Char('(') == c) {
737             ++commentstack;
738         }
739         if (QLatin1Char(')') == c) {
740             --commentstack;
741         }
742         bInComment = commentstack != 0;
743         if (QLatin1Char('"') == c && !bInComment) {
744             bInQuotesOutsideOfEmail = !bInQuotesOutsideOfEmail;
745         }
746 
747         if (!bInComment && !bInQuotesOutsideOfEmail) {
748             if (QLatin1Char('@') == c) {
749                 iAd = i;
750                 break; // found it
751             }
752         }
753         ++i;
754     }
755 
756     if (!iAd) {
757         // We suppose the user is typing the string manually and just
758         // has not finished typing the mail address part.
759         // So we take everything that's left of the '<' as name and the rest as mail
760         for (i = 0; len > i; ++i) {
761             c = aStr[i];
762             if (QLatin1Char('<') != c) {
763                 name.append(c);
764             } else {
765                 break;
766             }
767         }
768         mail = aStr.mid(i + 1);
769         if (mail.endsWith(QLatin1Char('>'))) {
770             mail.truncate(mail.length() - 1);
771         }
772 
773     } else {
774         // Loop backwards until we find the start of the string
775         // or a ',' that is outside of a comment
776         //          and outside of quoted text before the leading '<'.
777         bInComment = false;
778         bInQuotesOutsideOfEmail = false;
779         for (i = iAd - 1; 0 <= i; --i) {
780             c = aStr[i];
781             if (bInComment) {
782                 if (QLatin1Char('(') == c) {
783                     if (!name.isEmpty()) {
784                         name.prepend(QLatin1Char(' '));
785                     }
786                     bInComment = false;
787                 } else {
788                     name.prepend(c); // all comment stuff is part of the name
789                 }
790             } else if (bInQuotesOutsideOfEmail) {
791                 if (QLatin1Char(cQuotes) == c) {
792                     bInQuotesOutsideOfEmail = false;
793                 } else if (c != QLatin1Char('\\')) {
794                     name.prepend(c);
795                 }
796             } else {
797                 // found the start of this addressee ?
798                 if (QLatin1Char(',') == c) {
799                     break;
800                 }
801                 // stuff is before the leading '<' ?
802                 if (iMailStart) {
803                     if (QLatin1Char(cQuotes) == c) {
804                         bInQuotesOutsideOfEmail = true; // end of quoted text found
805                     } else {
806                         name.prepend(c);
807                     }
808                 } else {
809                     switch (c.toLatin1()) {
810                     case '<':
811                         iMailStart = i;
812                         break;
813                     case ')':
814                         if (!name.isEmpty()) {
815                             name.prepend(QLatin1Char(' '));
816                         }
817                         bInComment = true;
818                         break;
819                     default:
820                         if (QLatin1Char(' ') != c) {
821                             mail.prepend(c);
822                         }
823                     }
824                 }
825             }
826         }
827 
828         name = name.simplified();
829         mail = mail.simplified();
830 
831         if (mail.isEmpty()) {
832             return false;
833         }
834 
835         mail.append(QLatin1Char('@'));
836 
837         // Loop forward until we find the end of the string
838         // or a ',' that is outside of a comment
839         //          and outside of quoted text behind the trailing '>'.
840         bInComment = false;
841         bInQuotesOutsideOfEmail = false;
842         int parenthesesNesting = 0;
843         for (i = iAd + 1; len > i; ++i) {
844             c = aStr[i];
845             if (bInComment) {
846                 if (QLatin1Char(')') == c) {
847                     if (--parenthesesNesting == 0) {
848                         bInComment = false;
849                         if (!name.isEmpty()) {
850                             name.append(QLatin1Char(' '));
851                         }
852                     } else {
853                         // nested ")", add it
854                         name.append(QLatin1Char(')')); // name can't be empty here
855                     }
856                 } else {
857                     if (QLatin1Char('(') == c) {
858                         // nested "("
859                         ++parenthesesNesting;
860                     }
861                     name.append(c); // all comment stuff is part of the name
862                 }
863             } else if (bInQuotesOutsideOfEmail) {
864                 if (QLatin1Char(cQuotes) == c) {
865                     bInQuotesOutsideOfEmail = false;
866                 } else if (c != QLatin1Char('\\')) {
867                     name.append(c);
868                 }
869             } else {
870                 // found the end of this addressee ?
871                 if (QLatin1Char(',') == c) {
872                     break;
873                 }
874                 // stuff is behind the trailing '>' ?
875                 if (iMailEnd) {
876                     if (QLatin1Char(cQuotes) == c) {
877                         bInQuotesOutsideOfEmail = true; // start of quoted text found
878                     } else {
879                         name.append(c);
880                     }
881                 } else {
882                     switch (c.toLatin1()) {
883                     case '>':
884                         iMailEnd = i;
885                         break;
886                     case '(':
887                         if (!name.isEmpty()) {
888                             name.append(QLatin1Char(' '));
889                         }
890                         if (++parenthesesNesting > 0) {
891                             bInComment = true;
892                         }
893                         break;
894                     default:
895                         if (QLatin1Char(' ') != c) {
896                             mail.append(c);
897                         }
898                     }
899                 }
900             }
901         }
902     }
903 
904     name = name.simplified();
905     mail = mail.simplified();
906 
907     return !(name.isEmpty() || mail.isEmpty());
908 }
909 
910 //-----------------------------------------------------------------------------
compareEmail(const QString & email1,const QString & email2,bool matchName)911 bool KEmailAddress::compareEmail(const QString &email1, const QString &email2, bool matchName)
912 {
913     QString e1Name;
914     QString e1Email;
915     QString e2Name;
916     QString e2Email;
917 
918     extractEmailAddressAndName(email1, e1Email, e1Name);
919     extractEmailAddressAndName(email2, e2Email, e2Name);
920 
921     return e1Email == e2Email && (!matchName || (e1Name == e2Name));
922 }
923 
924 //-----------------------------------------------------------------------------
925 // Used internally by normalizedAddress()
removeBidiControlChars(const QString & input)926 QString removeBidiControlChars(const QString &input)
927 {
928     const int LRO = 0x202D;
929     const int RLO = 0x202E;
930     const int LRE = 0x202A;
931     const int RLE = 0x202B;
932     QString result = input;
933     result.remove(LRO);
934     result.remove(RLO);
935     result.remove(LRE);
936     result.remove(RLE);
937     return result;
938 }
939 
normalizedAddress(const QString & displayName,const QString & addrSpec,const QString & comment)940 QString KEmailAddress::normalizedAddress(const QString &displayName, const QString &addrSpec, const QString &comment)
941 {
942     const QString realDisplayName = removeBidiControlChars(displayName);
943     if (realDisplayName.isEmpty() && comment.isEmpty()) {
944         return addrSpec;
945     } else if (comment.isEmpty()) {
946         if (!realDisplayName.startsWith(QLatin1Char('\"'))) {
947             return quoteNameIfNecessary(realDisplayName) + QLatin1String(" <") + addrSpec + QLatin1Char('>');
948         } else {
949             return realDisplayName + QLatin1String(" <") + addrSpec + QLatin1Char('>');
950         }
951     } else if (realDisplayName.isEmpty()) {
952         return quoteNameIfNecessary(comment) + QLatin1String(" <") + addrSpec + QLatin1Char('>');
953     } else {
954         return realDisplayName + QLatin1String(" (") + comment + QLatin1String(") <") + addrSpec + QLatin1Char('>');
955     }
956 }
957 
958 //-----------------------------------------------------------------------------
fromIdn(const QString & addrSpec)959 QString KEmailAddress::fromIdn(const QString &addrSpec)
960 {
961     const int atPos = addrSpec.lastIndexOf(QLatin1Char('@'));
962     if (atPos == -1) {
963         return addrSpec;
964     }
965 
966     QString idn = QUrl::fromAce(addrSpec.mid(atPos + 1).toLatin1());
967     if (idn.isEmpty()) {
968         return QString();
969     }
970 
971     return addrSpec.left(atPos + 1) + idn;
972 }
973 
974 //-----------------------------------------------------------------------------
toIdn(const QString & addrSpec)975 QString KEmailAddress::toIdn(const QString &addrSpec)
976 {
977     const int atPos = addrSpec.lastIndexOf(QLatin1Char('@'));
978     if (atPos == -1) {
979         return addrSpec;
980     }
981 
982     QString idn = QLatin1String(QUrl::toAce(addrSpec.mid(atPos + 1)));
983     if (idn.isEmpty()) {
984         return addrSpec;
985     }
986 
987     return addrSpec.left(atPos + 1) + idn;
988 }
989 
990 //-----------------------------------------------------------------------------
normalizeAddressesAndDecodeIdn(const QString & str)991 QString KEmailAddress::normalizeAddressesAndDecodeIdn(const QString &str)
992 {
993     //  qCDebug(KCODECS_LOG) << str;
994     if (str.isEmpty()) {
995         return str;
996     }
997 
998     const QStringList addressList = splitAddressList(str);
999     QStringList normalizedAddressList;
1000 
1001     QByteArray displayName;
1002     QByteArray addrSpec;
1003     QByteArray comment;
1004 
1005     for (const auto &addr : addressList) {
1006         if (!addr.isEmpty()) {
1007             if (splitAddress(addr.toUtf8(), displayName, addrSpec, comment) == AddressOk) {
1008                 QByteArray cs;
1009                 displayName = KCodecs::decodeRFC2047String(displayName, &cs).toUtf8();
1010                 comment = KCodecs::decodeRFC2047String(comment, &cs).toUtf8();
1011 
1012                 normalizedAddressList << normalizedAddress(QString::fromUtf8(displayName), fromIdn(QString::fromUtf8(addrSpec)), QString::fromUtf8(comment));
1013             }
1014         }
1015     }
1016     /*
1017       qCDebug(KCODECS_LOG) << "normalizedAddressList: \""
1018                << normalizedAddressList.join( ", " )
1019                << "\"";
1020     */
1021     return normalizedAddressList.join(QStringLiteral(", "));
1022 }
1023 
1024 //-----------------------------------------------------------------------------
normalizeAddressesAndEncodeIdn(const QString & str)1025 QString KEmailAddress::normalizeAddressesAndEncodeIdn(const QString &str)
1026 {
1027     // qCDebug(KCODECS_LOG) << str;
1028     if (str.isEmpty()) {
1029         return str;
1030     }
1031 
1032     const QStringList addressList = splitAddressList(str);
1033     QStringList normalizedAddressList;
1034 
1035     QByteArray displayName;
1036     QByteArray addrSpec;
1037     QByteArray comment;
1038 
1039     for (const auto &addr : addressList) {
1040         if (!addr.isEmpty()) {
1041             if (splitAddress(addr.toUtf8(), displayName, addrSpec, comment) == AddressOk) {
1042                 normalizedAddressList << normalizedAddress(QString::fromUtf8(displayName), toIdn(QString::fromUtf8(addrSpec)), QString::fromUtf8(comment));
1043             }
1044         }
1045     }
1046 
1047     /*
1048       qCDebug(KCODECS_LOG) << "normalizedAddressList: \""
1049                << normalizedAddressList.join( ", " )
1050                << "\"";
1051     */
1052     return normalizedAddressList.join(QStringLiteral(", "));
1053 }
1054 
1055 //-----------------------------------------------------------------------------
1056 // Escapes unescaped doublequotes in str.
escapeQuotes(const QString & str)1057 static QString escapeQuotes(const QString &str)
1058 {
1059     if (str.isEmpty()) {
1060         return QString();
1061     }
1062 
1063     QString escaped;
1064     // reserve enough memory for the worst case ( """..."" -> \"\"\"...\"\" )
1065     escaped.reserve(2 * str.length());
1066     unsigned int len = 0;
1067     for (int i = 0, total = str.length(); i < total; ++i, ++len) {
1068         const QChar &c = str[i];
1069         if (c == QLatin1Char('"')) { // unescaped doublequote
1070             escaped.append(QLatin1Char('\\'));
1071             ++len;
1072         } else if (c == QLatin1Char('\\')) { // escaped character
1073             escaped.append(QLatin1Char('\\'));
1074             ++len;
1075             ++i;
1076             if (i >= str.length()) { // handle trailing '\' gracefully
1077                 break;
1078             }
1079         }
1080         // Keep str[i] as we increase i previously
1081         escaped.append(str[i]);
1082     }
1083     escaped.truncate(len);
1084     return escaped;
1085 }
1086 
1087 //-----------------------------------------------------------------------------
quoteNameIfNecessary(const QString & str)1088 QString KEmailAddress::quoteNameIfNecessary(const QString &str)
1089 {
1090     if (str.isEmpty()) {
1091         return str;
1092     }
1093     QString quoted = str;
1094 
1095     static const QRegularExpression needQuotes(QStringLiteral("[^ 0-9A-Za-z\\x{0080}-\\x{FFFF}]"));
1096     // avoid double quoting
1097     if ((quoted[0] == QLatin1Char('"')) && (quoted[quoted.length() - 1] == QLatin1Char('"'))) {
1098         quoted = QLatin1String("\"") + escapeQuotes(quoted.mid(1, quoted.length() - 2)) + QLatin1String("\"");
1099     } else if (quoted.indexOf(needQuotes) != -1) {
1100         quoted = QLatin1String("\"") + escapeQuotes(quoted) + QLatin1String("\"");
1101     }
1102 
1103     return quoted;
1104 }
1105 
encodeMailtoUrl(const QString & mailbox)1106 QUrl KEmailAddress::encodeMailtoUrl(const QString &mailbox)
1107 {
1108     const QByteArray encodedPath = KCodecs::encodeRFC2047String(mailbox, "utf-8");
1109     QUrl mailtoUrl;
1110     mailtoUrl.setScheme(QStringLiteral("mailto"));
1111     mailtoUrl.setPath(QLatin1String(encodedPath));
1112     return mailtoUrl;
1113 }
1114 
decodeMailtoUrl(const QUrl & mailtoUrl)1115 QString KEmailAddress::decodeMailtoUrl(const QUrl &mailtoUrl)
1116 {
1117     Q_ASSERT(mailtoUrl.scheme() == QLatin1String("mailto"));
1118     return KCodecs::decodeRFC2047String(mailtoUrl.path());
1119 }
1120