1 /*
2 This file is part of Choqok, the KDE micro-blogging client
3 
4 Copyright (C) 2008-2012 Mehrdad Momeny <mehrdad.momeny@gmail.com>
5 Copyright (C) 2014 Andrea Scarpino <scarpino@kde.org>
6 
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of
10 the License or (at your option) version 3 or any later version
11 accepted by the membership of KDE e.V. (or its successor approved
12 by the membership of KDE e.V.), which shall act as a proxy
13 defined in Section 14 of version 3 of the license.
14 
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, see http://www.gnu.org/licenses/
22 */
23 #include "urlutils.h"
24 
25 #include <QRegExp>
26 
27 const QString protocols = QLatin1String("((https?|ftps?)://)");
28 const QString subdomains = QLatin1String("(([a-z0-9\\-_]{1,}\\.)?)");
29 const QString auth = QLatin1String("(([a-z0-9\\-_]{1,})((:[\\S]{1,})?)@)");
30 const QString domains = QLatin1String("(([a-z0-9\\-\\x0080-\\xFFFF_]){1,63}\\.)+");
31 const QString port = QLatin1String("(:(6553[0-5]|655[0-2][0-9]|65[0-4][\\d]{2}|6[0-4][\\d]{3}|[1-5][\\d]{4}|[1-9][\\d]{0,3}))");
32 const QString zone = QLatin1String("((a[cdefgilmnoqrstuwxz])|(b[abdefghijlmnorstvwyz])|(c[acdfghiklmnoruvxyz])|(d[ejkmoz])|(e[ceghrstu])|\
33 (f[ijkmor])|(g[abdefghilmnpqrstuwy])|(h[kmnrtu])|(i[delmnoqrst])|(j[emop])|(k[eghimnprwyz])|(l[abcikrstuvy])|\
34 (m[acdefghklmnopqrstuvwxyz])|(n[acefgilopruz])|(om)|(p[aefghklnrstwy])|(qa)|(r[eosuw])|(s[abcdeghijklmnortuvyz])|\
35 (t[cdfghjkmnoprtvwz])|(u[agksyz])|(v[aceginu])|(w[fs])|(ye)|(z[amrw])\
36 |(asia|com|info|net|org|biz|name|pro|aero|cat|coop|edu|jobs|mobi|museum|tel|travel|gov|int|mil|local|xxx)|(中国)|(公司)|(网络)|(صر)|(امارات)|(рф))");
37 const QString ip = QLatin1String("(25[0-5]|[2][0-4][0-9]|[0-1]?[\\d]{1,2})(\\.(25[0-5]|[2][0-4][0-9]|[0-1]?[\\d]{1,2})){3}");
38 const QString params = QLatin1String("(((\\/)[\\w:/\\?#\\[\\]@!\\$&\\(\\)\\*%\\+,;=\\._~\\x0080-\\xFFFF\\-\\|]{1,}|%[0-9a-f]{2})?)");
39 const QString excludingCharacters = QStringLiteral("[^\\s`!()\\[\\]{};:'\".,<>?%1%2%3%4%5%6]")
40                                     .arg(QChar(0x00AB)).arg(QChar(0x00BB)).arg(QChar(0x201C)).arg(QChar(0x201D)).arg(QChar(0x2018)).arg(QChar(0x2019));
41 
42 const QRegExp UrlUtils::mUrlRegExp(QLatin1String("(((((") + protocols + auth + QLatin1String("?)?)") +
43                                    subdomains +
44                                    QLatin1Char('(') + domains +
45                                    zone + QLatin1String("(?!(\\w))))|(") + protocols + QLatin1Char('(') + ip + QLatin1String(")+))") +
46                                    QLatin1Char('(') + port + QLatin1String("?)") + QLatin1String("((\\/)?)")  +
47                                    params + QLatin1Char(')') + excludingCharacters, Qt::CaseInsensitive);
48 
49 const QRegExp UrlUtils::mEmailRegExp(QLatin1Char('^') + auth + subdomains + domains + zone);
50 const QString hrefTemplate = QLatin1String("<a href='%1' title='%1'>%2</a>");
51 
UrlUtils()52 UrlUtils::UrlUtils()
53 {
54 }
55 
~UrlUtils()56 UrlUtils::~UrlUtils()
57 {
58 }
59 
detectUrls(const QString & text)60 QStringList UrlUtils::detectUrls(const QString &text)
61 {
62     QStringList detectedUrls;
63 
64     int pos = 0;
65     while (((pos = mUrlRegExp.indexIn(text, pos)) != -1)) {
66         const QString link = mUrlRegExp.cap(0);
67         if ((pos - 1 > -1 && (text.at(pos - 1) != QLatin1Char('@') &&
68                               text.at(pos - 1) != QLatin1Char('#') && text.at(pos - 1) != QLatin1Char('!'))) ||
69                 (pos == 0)) {
70             detectedUrls << link;
71         }
72         pos += link.length();
73     }
74 
75     return detectedUrls;
76 }
77 
detectEmails(const QString & text)78 QString UrlUtils::detectEmails(const QString &text)
79 {
80     QString mailtoText(text);
81 
82     int pos = 0;
83     while (((pos = mEmailRegExp.indexIn(mailtoText, pos)) != -1)) {
84         QString link = mEmailRegExp.cap(0);
85         QString tmplink = link;
86         if ((pos - 1 > -1 && (mailtoText.at(pos - 1) != QLatin1Char('@') &&
87                               mailtoText.at(pos - 1) != QLatin1Char('#') && mailtoText.at(pos - 1) != QLatin1Char('!'))) ||
88                 pos == 0) {
89             tmplink.prepend(QLatin1String("mailto:"));
90             mailtoText.remove(pos, link.length());
91             tmplink = hrefTemplate.arg(tmplink, link);
92             mailtoText.insert(pos, tmplink);
93         }
94         pos += tmplink.length();
95     }
96 
97     return mailtoText;
98 }
99 
100