1 // This file is part of Desktop App Toolkit,
2 // a set of libraries for developing nice desktop applications.
3 //
4 // For license and copyright information please follow this link:
5 // https://github.com/desktop-app/legal/blob/master/LEGAL
6 //
7 #include "ui/text/text_entity.h"
8
9 #include "base/qthelp_url.h"
10 #include "base/qthelp_regex.h"
11 #include "base/crc32hash.h"
12 #include "ui/text/text.h"
13 #include "ui/widgets/input_fields.h"
14 #include "ui/emoji_config.h"
15 #include "base/qt_adapters.h"
16
17 #include <QtCore/QStack>
18 #include <QtCore/QMimeData>
19 #include <QtGui/QGuiApplication>
20 #include <QtGui/QClipboard>
21
22 namespace TextUtilities {
23 namespace {
24
25 using namespace Ui::Text;
26
ExpressionMailNameAtEnd()27 QString ExpressionMailNameAtEnd() {
28 // Matches email first part (before '@') at the end of the string.
29 // First we find a domain without protocol (like "gmail.com"), then
30 // we find '@' before it and then we look for the name before '@'.
31 return QString::fromUtf8("[a-zA-Z\\-_\\.0-9]{1,256}$");
32 }
33
Quotes()34 QString Quotes() {
35 // UTF8 quotes and ellipsis
36 return QString::fromUtf8("\xC2\xAB\xC2\xBB\xE2\x80\x9C\xE2\x80\x9D\xE2\x80\x98\xE2\x80\x99\xE2\x80\xA6");
37 }
38
ExpressionSeparators(const QString & additional)39 QString ExpressionSeparators(const QString &additional) {
40 static const auto quotes = Quotes();
41 return QString::fromUtf8("\\s\\.,:;<>|'\"\\[\\]\\{\\}\\~\\!\\?\\%\\^\\(\\)\\-\\+=\\x10") + quotes + additional;
42 }
43
Separators(const QString & additional)44 QString Separators(const QString &additional) {
45 static const auto quotes = Quotes();
46 return QString::fromUtf8(" \x10\n\r\t.,:;<>|'\"[]{}!?%^()-+=")
47 + QChar(0xfdd0) // QTextBeginningOfFrame
48 + QChar(0xfdd1) // QTextEndOfFrame
49 + QChar(QChar::ParagraphSeparator)
50 + QChar(QChar::LineSeparator)
51 + quotes
52 + additional;
53 }
54
SeparatorsBold()55 QString SeparatorsBold() {
56 return Separators(QString::fromUtf8("`~/"));
57 }
58
SeparatorsItalic()59 QString SeparatorsItalic() {
60 return Separators(QString::fromUtf8("`*~/"));
61 }
62
SeparatorsStrikeOut()63 QString SeparatorsStrikeOut() {
64 return Separators(QString::fromUtf8("`*~/"));
65 }
66
SeparatorsMono()67 QString SeparatorsMono() {
68 return Separators(QString::fromUtf8("*~/"));
69 }
70
ExpressionHashtag()71 QString ExpressionHashtag() {
72 return QString::fromUtf8("(^|[") + ExpressionSeparators(QString::fromUtf8("`\\*/")) + QString::fromUtf8("])#[\\w]{2,64}([\\W]|$)");
73 }
74
ExpressionHashtagExclude()75 QString ExpressionHashtagExclude() {
76 return QString::fromUtf8("^#?\\d+$");
77 }
78
ExpressionMention()79 QString ExpressionMention() {
80 return QString::fromUtf8("(^|[") + ExpressionSeparators(QString::fromUtf8("`\\*/")) + QString::fromUtf8("])@[A-Za-z_0-9]{1,32}([\\W]|$)");
81 }
82
ExpressionBotCommand()83 QString ExpressionBotCommand() {
84 return QString::fromUtf8("(^|[") + ExpressionSeparators(QString::fromUtf8("`\\*")) + QString::fromUtf8("])/[A-Za-z_0-9]{1,64}(@[A-Za-z_0-9]{5,32})?([\\W]|$)");
85 }
86
CreateRegExp(const QString & expression)87 QRegularExpression CreateRegExp(const QString &expression) {
88 auto result = QRegularExpression(
89 expression,
90 QRegularExpression::UseUnicodePropertiesOption);
91 result.optimize();
92 return result;
93 }
94
CreateValidProtocols()95 base::flat_set<int32> CreateValidProtocols() {
96 auto result = base::flat_set<int32>();
97 const auto addOne = [&](const QString &string) {
98 result.insert(base::crc32(string.constData(), string.size() * sizeof(QChar)));
99 };
100 addOne(QString::fromLatin1("itmss")); // itunes
101 addOne(QString::fromLatin1("http"));
102 addOne(QString::fromLatin1("https"));
103 addOne(QString::fromLatin1("ftp"));
104 addOne(QString::fromLatin1("tg")); // local urls
105 return result;
106 }
107
CreateValidTopDomains()108 base::flat_set<int32> CreateValidTopDomains() {
109 auto result = base::flat_set<int32>();
110 auto addOne = [&result](const QString &string) {
111 result.insert(base::crc32(string.constData(), string.size() * sizeof(QChar)));
112 };
113 addOne(QString::fromLatin1("ac"));
114 addOne(QString::fromLatin1("ad"));
115 addOne(QString::fromLatin1("ae"));
116 addOne(QString::fromLatin1("af"));
117 addOne(QString::fromLatin1("ag"));
118 addOne(QString::fromLatin1("ai"));
119 addOne(QString::fromLatin1("al"));
120 addOne(QString::fromLatin1("am"));
121 addOne(QString::fromLatin1("an"));
122 addOne(QString::fromLatin1("ao"));
123 addOne(QString::fromLatin1("aq"));
124 addOne(QString::fromLatin1("ar"));
125 addOne(QString::fromLatin1("as"));
126 addOne(QString::fromLatin1("at"));
127 addOne(QString::fromLatin1("au"));
128 addOne(QString::fromLatin1("aw"));
129 addOne(QString::fromLatin1("ax"));
130 addOne(QString::fromLatin1("az"));
131 addOne(QString::fromLatin1("ba"));
132 addOne(QString::fromLatin1("bb"));
133 addOne(QString::fromLatin1("bd"));
134 addOne(QString::fromLatin1("be"));
135 addOne(QString::fromLatin1("bf"));
136 addOne(QString::fromLatin1("bg"));
137 addOne(QString::fromLatin1("bh"));
138 addOne(QString::fromLatin1("bi"));
139 addOne(QString::fromLatin1("bj"));
140 addOne(QString::fromLatin1("bm"));
141 addOne(QString::fromLatin1("bn"));
142 addOne(QString::fromLatin1("bo"));
143 addOne(QString::fromLatin1("br"));
144 addOne(QString::fromLatin1("bs"));
145 addOne(QString::fromLatin1("bt"));
146 addOne(QString::fromLatin1("bv"));
147 addOne(QString::fromLatin1("bw"));
148 addOne(QString::fromLatin1("by"));
149 addOne(QString::fromLatin1("bz"));
150 addOne(QString::fromLatin1("ca"));
151 addOne(QString::fromLatin1("cc"));
152 addOne(QString::fromLatin1("cd"));
153 addOne(QString::fromLatin1("cf"));
154 addOne(QString::fromLatin1("cg"));
155 addOne(QString::fromLatin1("ch"));
156 addOne(QString::fromLatin1("ci"));
157 addOne(QString::fromLatin1("ck"));
158 addOne(QString::fromLatin1("cl"));
159 addOne(QString::fromLatin1("cm"));
160 addOne(QString::fromLatin1("cn"));
161 addOne(QString::fromLatin1("co"));
162 addOne(QString::fromLatin1("cr"));
163 addOne(QString::fromLatin1("cu"));
164 addOne(QString::fromLatin1("cv"));
165 addOne(QString::fromLatin1("cx"));
166 addOne(QString::fromLatin1("cy"));
167 addOne(QString::fromLatin1("cz"));
168 addOne(QString::fromLatin1("de"));
169 addOne(QString::fromLatin1("dj"));
170 addOne(QString::fromLatin1("dk"));
171 addOne(QString::fromLatin1("dm"));
172 addOne(QString::fromLatin1("do"));
173 addOne(QString::fromLatin1("dz"));
174 addOne(QString::fromLatin1("ec"));
175 addOne(QString::fromLatin1("ee"));
176 addOne(QString::fromLatin1("eg"));
177 addOne(QString::fromLatin1("eh"));
178 addOne(QString::fromLatin1("er"));
179 addOne(QString::fromLatin1("es"));
180 addOne(QString::fromLatin1("et"));
181 addOne(QString::fromLatin1("eu"));
182 addOne(QString::fromLatin1("fi"));
183 addOne(QString::fromLatin1("fj"));
184 addOne(QString::fromLatin1("fk"));
185 addOne(QString::fromLatin1("fm"));
186 addOne(QString::fromLatin1("fo"));
187 addOne(QString::fromLatin1("fr"));
188 addOne(QString::fromLatin1("ga"));
189 addOne(QString::fromLatin1("gd"));
190 addOne(QString::fromLatin1("ge"));
191 addOne(QString::fromLatin1("gf"));
192 addOne(QString::fromLatin1("gg"));
193 addOne(QString::fromLatin1("gh"));
194 addOne(QString::fromLatin1("gi"));
195 addOne(QString::fromLatin1("gl"));
196 addOne(QString::fromLatin1("gm"));
197 addOne(QString::fromLatin1("gn"));
198 addOne(QString::fromLatin1("gp"));
199 addOne(QString::fromLatin1("gq"));
200 addOne(QString::fromLatin1("gr"));
201 addOne(QString::fromLatin1("gs"));
202 addOne(QString::fromLatin1("gt"));
203 addOne(QString::fromLatin1("gu"));
204 addOne(QString::fromLatin1("gw"));
205 addOne(QString::fromLatin1("gy"));
206 addOne(QString::fromLatin1("hk"));
207 addOne(QString::fromLatin1("hm"));
208 addOne(QString::fromLatin1("hn"));
209 addOne(QString::fromLatin1("hr"));
210 addOne(QString::fromLatin1("ht"));
211 addOne(QString::fromLatin1("hu"));
212 addOne(QString::fromLatin1("id"));
213 addOne(QString::fromLatin1("ie"));
214 addOne(QString::fromLatin1("il"));
215 addOne(QString::fromLatin1("im"));
216 addOne(QString::fromLatin1("in"));
217 addOne(QString::fromLatin1("io"));
218 addOne(QString::fromLatin1("iq"));
219 addOne(QString::fromLatin1("ir"));
220 addOne(QString::fromLatin1("is"));
221 addOne(QString::fromLatin1("it"));
222 addOne(QString::fromLatin1("je"));
223 addOne(QString::fromLatin1("jm"));
224 addOne(QString::fromLatin1("jo"));
225 addOne(QString::fromLatin1("jp"));
226 addOne(QString::fromLatin1("ke"));
227 addOne(QString::fromLatin1("kg"));
228 addOne(QString::fromLatin1("kh"));
229 addOne(QString::fromLatin1("ki"));
230 addOne(QString::fromLatin1("km"));
231 addOne(QString::fromLatin1("kn"));
232 addOne(QString::fromLatin1("kp"));
233 addOne(QString::fromLatin1("kr"));
234 addOne(QString::fromLatin1("kw"));
235 addOne(QString::fromLatin1("ky"));
236 addOne(QString::fromLatin1("kz"));
237 addOne(QString::fromLatin1("la"));
238 addOne(QString::fromLatin1("lb"));
239 addOne(QString::fromLatin1("lc"));
240 addOne(QString::fromLatin1("li"));
241 addOne(QString::fromLatin1("lk"));
242 addOne(QString::fromLatin1("lr"));
243 addOne(QString::fromLatin1("ls"));
244 addOne(QString::fromLatin1("lt"));
245 addOne(QString::fromLatin1("lu"));
246 addOne(QString::fromLatin1("lv"));
247 addOne(QString::fromLatin1("ly"));
248 addOne(QString::fromLatin1("ma"));
249 addOne(QString::fromLatin1("mc"));
250 addOne(QString::fromLatin1("md"));
251 addOne(QString::fromLatin1("me"));
252 addOne(QString::fromLatin1("mg"));
253 addOne(QString::fromLatin1("mh"));
254 addOne(QString::fromLatin1("mk"));
255 addOne(QString::fromLatin1("ml"));
256 addOne(QString::fromLatin1("mm"));
257 addOne(QString::fromLatin1("mn"));
258 addOne(QString::fromLatin1("mo"));
259 addOne(QString::fromLatin1("mp"));
260 addOne(QString::fromLatin1("mq"));
261 addOne(QString::fromLatin1("mr"));
262 addOne(QString::fromLatin1("ms"));
263 addOne(QString::fromLatin1("mt"));
264 addOne(QString::fromLatin1("mu"));
265 addOne(QString::fromLatin1("mv"));
266 addOne(QString::fromLatin1("mw"));
267 addOne(QString::fromLatin1("mx"));
268 addOne(QString::fromLatin1("my"));
269 addOne(QString::fromLatin1("mz"));
270 addOne(QString::fromLatin1("na"));
271 addOne(QString::fromLatin1("nc"));
272 addOne(QString::fromLatin1("ne"));
273 addOne(QString::fromLatin1("nf"));
274 addOne(QString::fromLatin1("ng"));
275 addOne(QString::fromLatin1("ni"));
276 addOne(QString::fromLatin1("nl"));
277 addOne(QString::fromLatin1("no"));
278 addOne(QString::fromLatin1("np"));
279 addOne(QString::fromLatin1("nr"));
280 addOne(QString::fromLatin1("nu"));
281 addOne(QString::fromLatin1("nz"));
282 addOne(QString::fromLatin1("om"));
283 addOne(QString::fromLatin1("pa"));
284 addOne(QString::fromLatin1("pe"));
285 addOne(QString::fromLatin1("pf"));
286 addOne(QString::fromLatin1("pg"));
287 addOne(QString::fromLatin1("ph"));
288 addOne(QString::fromLatin1("pk"));
289 addOne(QString::fromLatin1("pl"));
290 addOne(QString::fromLatin1("pm"));
291 addOne(QString::fromLatin1("pn"));
292 addOne(QString::fromLatin1("pr"));
293 addOne(QString::fromLatin1("ps"));
294 addOne(QString::fromLatin1("pt"));
295 addOne(QString::fromLatin1("pw"));
296 addOne(QString::fromLatin1("py"));
297 addOne(QString::fromLatin1("qa"));
298 addOne(QString::fromLatin1("re"));
299 addOne(QString::fromLatin1("ro"));
300 addOne(QString::fromLatin1("ru"));
301 addOne(QString::fromLatin1("rs"));
302 addOne(QString::fromLatin1("rw"));
303 addOne(QString::fromLatin1("sa"));
304 addOne(QString::fromLatin1("sb"));
305 addOne(QString::fromLatin1("sc"));
306 addOne(QString::fromLatin1("sd"));
307 addOne(QString::fromLatin1("se"));
308 addOne(QString::fromLatin1("sg"));
309 addOne(QString::fromLatin1("sh"));
310 addOne(QString::fromLatin1("si"));
311 addOne(QString::fromLatin1("sj"));
312 addOne(QString::fromLatin1("sk"));
313 addOne(QString::fromLatin1("sl"));
314 addOne(QString::fromLatin1("sm"));
315 addOne(QString::fromLatin1("sn"));
316 addOne(QString::fromLatin1("so"));
317 addOne(QString::fromLatin1("sr"));
318 addOne(QString::fromLatin1("ss"));
319 addOne(QString::fromLatin1("st"));
320 addOne(QString::fromLatin1("su"));
321 addOne(QString::fromLatin1("sv"));
322 addOne(QString::fromLatin1("sx"));
323 addOne(QString::fromLatin1("sy"));
324 addOne(QString::fromLatin1("sz"));
325 addOne(QString::fromLatin1("tc"));
326 addOne(QString::fromLatin1("td"));
327 addOne(QString::fromLatin1("tf"));
328 addOne(QString::fromLatin1("tg"));
329 addOne(QString::fromLatin1("th"));
330 addOne(QString::fromLatin1("tj"));
331 addOne(QString::fromLatin1("tk"));
332 addOne(QString::fromLatin1("tl"));
333 addOne(QString::fromLatin1("tm"));
334 addOne(QString::fromLatin1("tn"));
335 addOne(QString::fromLatin1("to"));
336 addOne(QString::fromLatin1("tp"));
337 addOne(QString::fromLatin1("tr"));
338 addOne(QString::fromLatin1("tt"));
339 addOne(QString::fromLatin1("tv"));
340 addOne(QString::fromLatin1("tw"));
341 addOne(QString::fromLatin1("tz"));
342 addOne(QString::fromLatin1("ua"));
343 addOne(QString::fromLatin1("ug"));
344 addOne(QString::fromLatin1("uk"));
345 addOne(QString::fromLatin1("um"));
346 addOne(QString::fromLatin1("us"));
347 addOne(QString::fromLatin1("uy"));
348 addOne(QString::fromLatin1("uz"));
349 addOne(QString::fromLatin1("va"));
350 addOne(QString::fromLatin1("vc"));
351 addOne(QString::fromLatin1("ve"));
352 addOne(QString::fromLatin1("vg"));
353 addOne(QString::fromLatin1("vi"));
354 addOne(QString::fromLatin1("vn"));
355 addOne(QString::fromLatin1("vu"));
356 addOne(QString::fromLatin1("wf"));
357 addOne(QString::fromLatin1("ws"));
358 addOne(QString::fromLatin1("ye"));
359 addOne(QString::fromLatin1("yt"));
360 addOne(QString::fromLatin1("yu"));
361 addOne(QString::fromLatin1("za"));
362 addOne(QString::fromLatin1("zm"));
363 addOne(QString::fromLatin1("zw"));
364 addOne(QString::fromLatin1("arpa"));
365 addOne(QString::fromLatin1("aero"));
366 addOne(QString::fromLatin1("asia"));
367 addOne(QString::fromLatin1("biz"));
368 addOne(QString::fromLatin1("cat"));
369 addOne(QString::fromLatin1("com"));
370 addOne(QString::fromLatin1("coop"));
371 addOne(QString::fromLatin1("info"));
372 addOne(QString::fromLatin1("int"));
373 addOne(QString::fromLatin1("jobs"));
374 addOne(QString::fromLatin1("mobi"));
375 addOne(QString::fromLatin1("museum"));
376 addOne(QString::fromLatin1("name"));
377 addOne(QString::fromLatin1("net"));
378 addOne(QString::fromLatin1("org"));
379 addOne(QString::fromLatin1("post"));
380 addOne(QString::fromLatin1("pro"));
381 addOne(QString::fromLatin1("tel"));
382 addOne(QString::fromLatin1("travel"));
383 addOne(QString::fromLatin1("xxx"));
384 addOne(QString::fromLatin1("edu"));
385 addOne(QString::fromLatin1("gov"));
386 addOne(QString::fromLatin1("mil"));
387 addOne(QString::fromLatin1("local"));
388 addOne(QString::fromLatin1("xn--lgbbat1ad8j"));
389 addOne(QString::fromLatin1("xn--54b7fta0cc"));
390 addOne(QString::fromLatin1("xn--fiqs8s"));
391 addOne(QString::fromLatin1("xn--fiqz9s"));
392 addOne(QString::fromLatin1("xn--wgbh1c"));
393 addOne(QString::fromLatin1("xn--node"));
394 addOne(QString::fromLatin1("xn--j6w193g"));
395 addOne(QString::fromLatin1("xn--h2brj9c"));
396 addOne(QString::fromLatin1("xn--mgbbh1a71e"));
397 addOne(QString::fromLatin1("xn--fpcrj9c3d"));
398 addOne(QString::fromLatin1("xn--gecrj9c"));
399 addOne(QString::fromLatin1("xn--s9brj9c"));
400 addOne(QString::fromLatin1("xn--xkc2dl3a5ee0h"));
401 addOne(QString::fromLatin1("xn--45brj9c"));
402 addOne(QString::fromLatin1("xn--mgba3a4f16a"));
403 addOne(QString::fromLatin1("xn--mgbayh7gpa"));
404 addOne(QString::fromLatin1("xn--80ao21a"));
405 addOne(QString::fromLatin1("xn--mgbx4cd0ab"));
406 addOne(QString::fromLatin1("xn--l1acc"));
407 addOne(QString::fromLatin1("xn--mgbc0a9azcg"));
408 addOne(QString::fromLatin1("xn--mgb9awbf"));
409 addOne(QString::fromLatin1("xn--mgbai9azgqp6j"));
410 addOne(QString::fromLatin1("xn--ygbi2ammx"));
411 addOne(QString::fromLatin1("xn--wgbl6a"));
412 addOne(QString::fromLatin1("xn--p1ai"));
413 addOne(QString::fromLatin1("xn--mgberp4a5d4ar"));
414 addOne(QString::fromLatin1("xn--90a3ac"));
415 addOne(QString::fromLatin1("xn--yfro4i67o"));
416 addOne(QString::fromLatin1("xn--clchc0ea0b2g2a9gcd"));
417 addOne(QString::fromLatin1("xn--3e0b707e"));
418 addOne(QString::fromLatin1("xn--fzc2c9e2c"));
419 addOne(QString::fromLatin1("xn--xkc2al3hye2a"));
420 addOne(QString::fromLatin1("xn--mgbtf8fl"));
421 addOne(QString::fromLatin1("xn--kprw13d"));
422 addOne(QString::fromLatin1("xn--kpry57d"));
423 addOne(QString::fromLatin1("xn--o3cw4h"));
424 addOne(QString::fromLatin1("xn--pgbs0dh"));
425 addOne(QString::fromLatin1("xn--j1amh"));
426 addOne(QString::fromLatin1("xn--mgbaam7a8h"));
427 addOne(QString::fromLatin1("xn--mgb2ddes"));
428 addOne(QString::fromLatin1("xn--ogbpf8fl"));
429 addOne(QString::fromUtf8("\xd1\x80\xd1\x84"));
430 return result;
431 }
432
433 // accent char list taken from https://github.com/aristus/accent-folding
RemoveOneAccent(uint32 code)434 inline QChar RemoveOneAccent(uint32 code) {
435 switch (code) {
436 case 7834: return QChar(97);
437 case 193: return QChar(97);
438 case 225: return QChar(97);
439 case 192: return QChar(97);
440 case 224: return QChar(97);
441 case 258: return QChar(97);
442 case 259: return QChar(97);
443 case 7854: return QChar(97);
444 case 7855: return QChar(97);
445 case 7856: return QChar(97);
446 case 7857: return QChar(97);
447 case 7860: return QChar(97);
448 case 7861: return QChar(97);
449 case 7858: return QChar(97);
450 case 7859: return QChar(97);
451 case 194: return QChar(97);
452 case 226: return QChar(97);
453 case 7844: return QChar(97);
454 case 7845: return QChar(97);
455 case 7846: return QChar(97);
456 case 7847: return QChar(97);
457 case 7850: return QChar(97);
458 case 7851: return QChar(97);
459 case 7848: return QChar(97);
460 case 7849: return QChar(97);
461 case 461: return QChar(97);
462 case 462: return QChar(97);
463 case 197: return QChar(97);
464 case 229: return QChar(97);
465 case 506: return QChar(97);
466 case 507: return QChar(97);
467 case 196: return QChar(97);
468 case 228: return QChar(97);
469 case 478: return QChar(97);
470 case 479: return QChar(97);
471 case 195: return QChar(97);
472 case 227: return QChar(97);
473 case 550: return QChar(97);
474 case 551: return QChar(97);
475 case 480: return QChar(97);
476 case 481: return QChar(97);
477 case 260: return QChar(97);
478 case 261: return QChar(97);
479 case 256: return QChar(97);
480 case 257: return QChar(97);
481 case 7842: return QChar(97);
482 case 7843: return QChar(97);
483 case 512: return QChar(97);
484 case 513: return QChar(97);
485 case 514: return QChar(97);
486 case 515: return QChar(97);
487 case 7840: return QChar(97);
488 case 7841: return QChar(97);
489 case 7862: return QChar(97);
490 case 7863: return QChar(97);
491 case 7852: return QChar(97);
492 case 7853: return QChar(97);
493 case 7680: return QChar(97);
494 case 7681: return QChar(97);
495 case 570: return QChar(97);
496 case 11365: return QChar(97);
497 case 508: return QChar(97);
498 case 509: return QChar(97);
499 case 482: return QChar(97);
500 case 483: return QChar(97);
501 case 7682: return QChar(98);
502 case 7683: return QChar(98);
503 case 7684: return QChar(98);
504 case 7685: return QChar(98);
505 case 7686: return QChar(98);
506 case 7687: return QChar(98);
507 case 579: return QChar(98);
508 case 384: return QChar(98);
509 case 7532: return QChar(98);
510 case 385: return QChar(98);
511 case 595: return QChar(98);
512 case 386: return QChar(98);
513 case 387: return QChar(98);
514 case 262: return QChar(99);
515 case 263: return QChar(99);
516 case 264: return QChar(99);
517 case 265: return QChar(99);
518 case 268: return QChar(99);
519 case 269: return QChar(99);
520 case 266: return QChar(99);
521 case 267: return QChar(99);
522 case 199: return QChar(99);
523 case 231: return QChar(99);
524 case 7688: return QChar(99);
525 case 7689: return QChar(99);
526 case 571: return QChar(99);
527 case 572: return QChar(99);
528 case 391: return QChar(99);
529 case 392: return QChar(99);
530 case 597: return QChar(99);
531 case 270: return QChar(100);
532 case 271: return QChar(100);
533 case 7690: return QChar(100);
534 case 7691: return QChar(100);
535 case 7696: return QChar(100);
536 case 7697: return QChar(100);
537 case 7692: return QChar(100);
538 case 7693: return QChar(100);
539 case 7698: return QChar(100);
540 case 7699: return QChar(100);
541 case 7694: return QChar(100);
542 case 7695: return QChar(100);
543 case 272: return QChar(100);
544 case 273: return QChar(100);
545 case 7533: return QChar(100);
546 case 393: return QChar(100);
547 case 598: return QChar(100);
548 case 394: return QChar(100);
549 case 599: return QChar(100);
550 case 395: return QChar(100);
551 case 396: return QChar(100);
552 case 545: return QChar(100);
553 case 240: return QChar(100);
554 case 201: return QChar(101);
555 case 399: return QChar(101);
556 case 398: return QChar(101);
557 case 477: return QChar(101);
558 case 233: return QChar(101);
559 case 200: return QChar(101);
560 case 232: return QChar(101);
561 case 276: return QChar(101);
562 case 277: return QChar(101);
563 case 202: return QChar(101);
564 case 234: return QChar(101);
565 case 7870: return QChar(101);
566 case 7871: return QChar(101);
567 case 7872: return QChar(101);
568 case 7873: return QChar(101);
569 case 7876: return QChar(101);
570 case 7877: return QChar(101);
571 case 7874: return QChar(101);
572 case 7875: return QChar(101);
573 case 282: return QChar(101);
574 case 283: return QChar(101);
575 case 203: return QChar(101);
576 case 235: return QChar(101);
577 case 7868: return QChar(101);
578 case 7869: return QChar(101);
579 case 278: return QChar(101);
580 case 279: return QChar(101);
581 case 552: return QChar(101);
582 case 553: return QChar(101);
583 case 7708: return QChar(101);
584 case 7709: return QChar(101);
585 case 280: return QChar(101);
586 case 281: return QChar(101);
587 case 274: return QChar(101);
588 case 275: return QChar(101);
589 case 7702: return QChar(101);
590 case 7703: return QChar(101);
591 case 7700: return QChar(101);
592 case 7701: return QChar(101);
593 case 7866: return QChar(101);
594 case 7867: return QChar(101);
595 case 516: return QChar(101);
596 case 517: return QChar(101);
597 case 518: return QChar(101);
598 case 519: return QChar(101);
599 case 7864: return QChar(101);
600 case 7865: return QChar(101);
601 case 7878: return QChar(101);
602 case 7879: return QChar(101);
603 case 7704: return QChar(101);
604 case 7705: return QChar(101);
605 case 7706: return QChar(101);
606 case 7707: return QChar(101);
607 case 582: return QChar(101);
608 case 583: return QChar(101);
609 case 602: return QChar(101);
610 case 605: return QChar(101);
611 case 7710: return QChar(102);
612 case 7711: return QChar(102);
613 case 7534: return QChar(102);
614 case 401: return QChar(102);
615 case 402: return QChar(102);
616 case 500: return QChar(103);
617 case 501: return QChar(103);
618 case 286: return QChar(103);
619 case 287: return QChar(103);
620 case 284: return QChar(103);
621 case 285: return QChar(103);
622 case 486: return QChar(103);
623 case 487: return QChar(103);
624 case 288: return QChar(103);
625 case 289: return QChar(103);
626 case 290: return QChar(103);
627 case 291: return QChar(103);
628 case 7712: return QChar(103);
629 case 7713: return QChar(103);
630 case 484: return QChar(103);
631 case 485: return QChar(103);
632 case 403: return QChar(103);
633 case 608: return QChar(103);
634 case 292: return QChar(104);
635 case 293: return QChar(104);
636 case 542: return QChar(104);
637 case 543: return QChar(104);
638 case 7718: return QChar(104);
639 case 7719: return QChar(104);
640 case 7714: return QChar(104);
641 case 7715: return QChar(104);
642 case 7720: return QChar(104);
643 case 7721: return QChar(104);
644 case 7716: return QChar(104);
645 case 7717: return QChar(104);
646 case 7722: return QChar(104);
647 case 7723: return QChar(104);
648 case 817: return QChar(104);
649 case 7830: return QChar(104);
650 case 294: return QChar(104);
651 case 295: return QChar(104);
652 case 11367: return QChar(104);
653 case 11368: return QChar(104);
654 case 205: return QChar(105);
655 case 237: return QChar(105);
656 case 204: return QChar(105);
657 case 236: return QChar(105);
658 case 300: return QChar(105);
659 case 301: return QChar(105);
660 case 206: return QChar(105);
661 case 238: return QChar(105);
662 case 463: return QChar(105);
663 case 464: return QChar(105);
664 case 207: return QChar(105);
665 case 239: return QChar(105);
666 case 7726: return QChar(105);
667 case 7727: return QChar(105);
668 case 296: return QChar(105);
669 case 297: return QChar(105);
670 case 304: return QChar(105);
671 case 302: return QChar(105);
672 case 303: return QChar(105);
673 case 298: return QChar(105);
674 case 299: return QChar(105);
675 case 7880: return QChar(105);
676 case 7881: return QChar(105);
677 case 520: return QChar(105);
678 case 521: return QChar(105);
679 case 522: return QChar(105);
680 case 523: return QChar(105);
681 case 7882: return QChar(105);
682 case 7883: return QChar(105);
683 case 7724: return QChar(105);
684 case 7725: return QChar(105);
685 case 305: return QChar(105);
686 case 407: return QChar(105);
687 case 616: return QChar(105);
688 case 308: return QChar(106);
689 case 309: return QChar(106);
690 case 780: return QChar(106);
691 case 496: return QChar(106);
692 case 567: return QChar(106);
693 case 584: return QChar(106);
694 case 585: return QChar(106);
695 case 669: return QChar(106);
696 case 607: return QChar(106);
697 case 644: return QChar(106);
698 case 7728: return QChar(107);
699 case 7729: return QChar(107);
700 case 488: return QChar(107);
701 case 489: return QChar(107);
702 case 310: return QChar(107);
703 case 311: return QChar(107);
704 case 7730: return QChar(107);
705 case 7731: return QChar(107);
706 case 7732: return QChar(107);
707 case 7733: return QChar(107);
708 case 408: return QChar(107);
709 case 409: return QChar(107);
710 case 11369: return QChar(107);
711 case 11370: return QChar(107);
712 case 313: return QChar(97);
713 case 314: return QChar(108);
714 case 317: return QChar(108);
715 case 318: return QChar(108);
716 case 315: return QChar(108);
717 case 316: return QChar(108);
718 case 7734: return QChar(108);
719 case 7735: return QChar(108);
720 case 7736: return QChar(108);
721 case 7737: return QChar(108);
722 case 7740: return QChar(108);
723 case 7741: return QChar(108);
724 case 7738: return QChar(108);
725 case 7739: return QChar(108);
726 case 321: return QChar(108);
727 case 322: return QChar(108);
728 case 803: return QChar(108);
729 case 319: return QChar(108);
730 case 320: return QChar(108);
731 case 573: return QChar(108);
732 case 410: return QChar(108);
733 case 11360: return QChar(108);
734 case 11361: return QChar(108);
735 case 11362: return QChar(108);
736 case 619: return QChar(108);
737 case 620: return QChar(108);
738 case 621: return QChar(108);
739 case 564: return QChar(108);
740 case 7742: return QChar(109);
741 case 7743: return QChar(109);
742 case 7744: return QChar(109);
743 case 7745: return QChar(109);
744 case 7746: return QChar(109);
745 case 7747: return QChar(109);
746 case 625: return QChar(109);
747 case 323: return QChar(110);
748 case 324: return QChar(110);
749 case 504: return QChar(110);
750 case 505: return QChar(110);
751 case 327: return QChar(110);
752 case 328: return QChar(110);
753 case 209: return QChar(110);
754 case 241: return QChar(110);
755 case 7748: return QChar(110);
756 case 7749: return QChar(110);
757 case 325: return QChar(110);
758 case 326: return QChar(110);
759 case 7750: return QChar(110);
760 case 7751: return QChar(110);
761 case 7754: return QChar(110);
762 case 7755: return QChar(110);
763 case 7752: return QChar(110);
764 case 7753: return QChar(110);
765 case 413: return QChar(110);
766 case 626: return QChar(110);
767 case 544: return QChar(110);
768 case 414: return QChar(110);
769 case 627: return QChar(110);
770 case 565: return QChar(110);
771 case 776: return QChar(116);
772 case 211: return QChar(111);
773 case 243: return QChar(111);
774 case 210: return QChar(111);
775 case 242: return QChar(111);
776 case 334: return QChar(111);
777 case 335: return QChar(111);
778 case 212: return QChar(111);
779 case 244: return QChar(111);
780 case 7888: return QChar(111);
781 case 7889: return QChar(111);
782 case 7890: return QChar(111);
783 case 7891: return QChar(111);
784 case 7894: return QChar(111);
785 case 7895: return QChar(111);
786 case 7892: return QChar(111);
787 case 7893: return QChar(111);
788 case 465: return QChar(111);
789 case 466: return QChar(111);
790 case 214: return QChar(111);
791 case 246: return QChar(111);
792 case 554: return QChar(111);
793 case 555: return QChar(111);
794 case 336: return QChar(111);
795 case 337: return QChar(111);
796 case 213: return QChar(111);
797 case 245: return QChar(111);
798 case 7756: return QChar(111);
799 case 7757: return QChar(111);
800 case 7758: return QChar(111);
801 case 7759: return QChar(111);
802 case 556: return QChar(111);
803 case 557: return QChar(111);
804 case 558: return QChar(111);
805 case 559: return QChar(111);
806 case 560: return QChar(111);
807 case 561: return QChar(111);
808 case 216: return QChar(111);
809 case 248: return QChar(111);
810 case 510: return QChar(111);
811 case 511: return QChar(111);
812 case 490: return QChar(111);
813 case 491: return QChar(111);
814 case 492: return QChar(111);
815 case 493: return QChar(111);
816 case 332: return QChar(111);
817 case 333: return QChar(111);
818 case 7762: return QChar(111);
819 case 7763: return QChar(111);
820 case 7760: return QChar(111);
821 case 7761: return QChar(111);
822 case 7886: return QChar(111);
823 case 7887: return QChar(111);
824 case 524: return QChar(111);
825 case 525: return QChar(111);
826 case 526: return QChar(111);
827 case 527: return QChar(111);
828 case 416: return QChar(111);
829 case 417: return QChar(111);
830 case 7898: return QChar(111);
831 case 7899: return QChar(111);
832 case 7900: return QChar(111);
833 case 7901: return QChar(111);
834 case 7904: return QChar(111);
835 case 7905: return QChar(111);
836 case 7902: return QChar(111);
837 case 7903: return QChar(111);
838 case 7906: return QChar(111);
839 case 7907: return QChar(111);
840 case 7884: return QChar(111);
841 case 7885: return QChar(111);
842 case 7896: return QChar(111);
843 case 7897: return QChar(111);
844 case 415: return QChar(111);
845 case 629: return QChar(111);
846 case 7764: return QChar(112);
847 case 7765: return QChar(112);
848 case 7766: return QChar(112);
849 case 7767: return QChar(112);
850 case 11363: return QChar(112);
851 case 420: return QChar(112);
852 case 421: return QChar(112);
853 case 771: return QChar(112);
854 case 672: return QChar(113);
855 case 586: return QChar(113);
856 case 587: return QChar(113);
857 case 340: return QChar(114);
858 case 341: return QChar(114);
859 case 344: return QChar(114);
860 case 345: return QChar(114);
861 case 7768: return QChar(114);
862 case 7769: return QChar(114);
863 case 342: return QChar(114);
864 case 343: return QChar(114);
865 case 528: return QChar(114);
866 case 529: return QChar(114);
867 case 530: return QChar(114);
868 case 531: return QChar(114);
869 case 7770: return QChar(114);
870 case 7771: return QChar(114);
871 case 7772: return QChar(114);
872 case 7773: return QChar(114);
873 case 7774: return QChar(114);
874 case 7775: return QChar(114);
875 case 588: return QChar(114);
876 case 589: return QChar(114);
877 case 7538: return QChar(114);
878 case 636: return QChar(114);
879 case 11364: return QChar(114);
880 case 637: return QChar(114);
881 case 638: return QChar(114);
882 case 7539: return QChar(114);
883 case 223: return QChar(115);
884 case 346: return QChar(115);
885 case 347: return QChar(115);
886 case 7780: return QChar(115);
887 case 7781: return QChar(115);
888 case 348: return QChar(115);
889 case 349: return QChar(115);
890 case 352: return QChar(115);
891 case 353: return QChar(115);
892 case 7782: return QChar(115);
893 case 7783: return QChar(115);
894 case 7776: return QChar(115);
895 case 7777: return QChar(115);
896 case 7835: return QChar(115);
897 case 350: return QChar(115);
898 case 351: return QChar(115);
899 case 7778: return QChar(115);
900 case 7779: return QChar(115);
901 case 7784: return QChar(115);
902 case 7785: return QChar(115);
903 case 536: return QChar(115);
904 case 537: return QChar(115);
905 case 642: return QChar(115);
906 case 809: return QChar(115);
907 case 222: return QChar(116);
908 case 254: return QChar(116);
909 case 356: return QChar(116);
910 case 357: return QChar(116);
911 case 7831: return QChar(116);
912 case 7786: return QChar(116);
913 case 7787: return QChar(116);
914 case 354: return QChar(116);
915 case 355: return QChar(116);
916 case 7788: return QChar(116);
917 case 7789: return QChar(116);
918 case 538: return QChar(116);
919 case 539: return QChar(116);
920 case 7792: return QChar(116);
921 case 7793: return QChar(116);
922 case 7790: return QChar(116);
923 case 7791: return QChar(116);
924 case 358: return QChar(116);
925 case 359: return QChar(116);
926 case 574: return QChar(116);
927 case 11366: return QChar(116);
928 case 7541: return QChar(116);
929 case 427: return QChar(116);
930 case 428: return QChar(116);
931 case 429: return QChar(116);
932 case 430: return QChar(116);
933 case 648: return QChar(116);
934 case 566: return QChar(116);
935 case 218: return QChar(117);
936 case 250: return QChar(117);
937 case 217: return QChar(117);
938 case 249: return QChar(117);
939 case 364: return QChar(117);
940 case 365: return QChar(117);
941 case 219: return QChar(117);
942 case 251: return QChar(117);
943 case 467: return QChar(117);
944 case 468: return QChar(117);
945 case 366: return QChar(117);
946 case 367: return QChar(117);
947 case 220: return QChar(117);
948 case 252: return QChar(117);
949 case 471: return QChar(117);
950 case 472: return QChar(117);
951 case 475: return QChar(117);
952 case 476: return QChar(117);
953 case 473: return QChar(117);
954 case 474: return QChar(117);
955 case 469: return QChar(117);
956 case 470: return QChar(117);
957 case 368: return QChar(117);
958 case 369: return QChar(117);
959 case 360: return QChar(117);
960 case 361: return QChar(117);
961 case 7800: return QChar(117);
962 case 7801: return QChar(117);
963 case 370: return QChar(117);
964 case 371: return QChar(117);
965 case 362: return QChar(117);
966 case 363: return QChar(117);
967 case 7802: return QChar(117);
968 case 7803: return QChar(117);
969 case 7910: return QChar(117);
970 case 7911: return QChar(117);
971 case 532: return QChar(117);
972 case 533: return QChar(117);
973 case 534: return QChar(117);
974 case 535: return QChar(117);
975 case 431: return QChar(117);
976 case 432: return QChar(117);
977 case 7912: return QChar(117);
978 case 7913: return QChar(117);
979 case 7914: return QChar(117);
980 case 7915: return QChar(117);
981 case 7918: return QChar(117);
982 case 7919: return QChar(117);
983 case 7916: return QChar(117);
984 case 7917: return QChar(117);
985 case 7920: return QChar(117);
986 case 7921: return QChar(117);
987 case 7908: return QChar(117);
988 case 7909: return QChar(117);
989 case 7794: return QChar(117);
990 case 7795: return QChar(117);
991 case 7798: return QChar(117);
992 case 7799: return QChar(117);
993 case 7796: return QChar(117);
994 case 7797: return QChar(117);
995 case 580: return QChar(117);
996 case 649: return QChar(117);
997 case 7804: return QChar(118);
998 case 7805: return QChar(118);
999 case 7806: return QChar(118);
1000 case 7807: return QChar(118);
1001 case 434: return QChar(118);
1002 case 651: return QChar(118);
1003 case 7810: return QChar(119);
1004 case 7811: return QChar(119);
1005 case 7808: return QChar(119);
1006 case 7809: return QChar(119);
1007 case 372: return QChar(119);
1008 case 373: return QChar(119);
1009 case 778: return QChar(121);
1010 case 7832: return QChar(119);
1011 case 7812: return QChar(119);
1012 case 7813: return QChar(119);
1013 case 7814: return QChar(119);
1014 case 7815: return QChar(119);
1015 case 7816: return QChar(119);
1016 case 7817: return QChar(119);
1017 case 7820: return QChar(120);
1018 case 7821: return QChar(120);
1019 case 7818: return QChar(120);
1020 case 7819: return QChar(120);
1021 case 221: return QChar(121);
1022 case 253: return QChar(121);
1023 case 7922: return QChar(121);
1024 case 7923: return QChar(121);
1025 case 374: return QChar(121);
1026 case 375: return QChar(121);
1027 case 7833: return QChar(121);
1028 case 376: return QChar(121);
1029 case 255: return QChar(121);
1030 case 7928: return QChar(121);
1031 case 7929: return QChar(121);
1032 case 7822: return QChar(121);
1033 case 7823: return QChar(121);
1034 case 562: return QChar(121);
1035 case 563: return QChar(121);
1036 case 7926: return QChar(121);
1037 case 7927: return QChar(121);
1038 case 7924: return QChar(121);
1039 case 7925: return QChar(121);
1040 case 655: return QChar(121);
1041 case 590: return QChar(121);
1042 case 591: return QChar(121);
1043 case 435: return QChar(121);
1044 case 436: return QChar(121);
1045 case 377: return QChar(122);
1046 case 378: return QChar(122);
1047 case 7824: return QChar(122);
1048 case 7825: return QChar(122);
1049 case 381: return QChar(122);
1050 case 382: return QChar(122);
1051 case 379: return QChar(122);
1052 case 380: return QChar(122);
1053 case 7826: return QChar(122);
1054 case 7827: return QChar(122);
1055 case 7828: return QChar(122);
1056 case 7829: return QChar(122);
1057 case 437: return QChar(122);
1058 case 438: return QChar(122);
1059 case 548: return QChar(122);
1060 case 549: return QChar(122);
1061 case 656: return QChar(122);
1062 case 657: return QChar(122);
1063 case 11371: return QChar(122);
1064 case 11372: return QChar(122);
1065 case 494: return QChar(122);
1066 case 495: return QChar(122);
1067 case 442: return QChar(122);
1068 case 65298: return QChar(50);
1069 case 65302: return QChar(54);
1070 case 65314: return QChar(66);
1071 case 65318: return QChar(70);
1072 case 65322: return QChar(74);
1073 case 65326: return QChar(78);
1074 case 65330: return QChar(82);
1075 case 65334: return QChar(86);
1076 case 65338: return QChar(90);
1077 case 65346: return QChar(98);
1078 case 65350: return QChar(102);
1079 case 65354: return QChar(106);
1080 case 65358: return QChar(110);
1081 case 65362: return QChar(114);
1082 case 65366: return QChar(118);
1083 case 65370: return QChar(122);
1084 case 65297: return QChar(49);
1085 case 65301: return QChar(53);
1086 case 65305: return QChar(57);
1087 case 65313: return QChar(65);
1088 case 65317: return QChar(69);
1089 case 65321: return QChar(73);
1090 case 65325: return QChar(77);
1091 case 65329: return QChar(81);
1092 case 65333: return QChar(85);
1093 case 65337: return QChar(89);
1094 case 65345: return QChar(97);
1095 case 65349: return QChar(101);
1096 case 65353: return QChar(105);
1097 case 65357: return QChar(109);
1098 case 65361: return QChar(113);
1099 case 65365: return QChar(117);
1100 case 65369: return QChar(121);
1101 case 65296: return QChar(48);
1102 case 65300: return QChar(52);
1103 case 65304: return QChar(56);
1104 case 65316: return QChar(68);
1105 case 65320: return QChar(72);
1106 case 65324: return QChar(76);
1107 case 65328: return QChar(80);
1108 case 65332: return QChar(84);
1109 case 65336: return QChar(88);
1110 case 65348: return QChar(100);
1111 case 65352: return QChar(104);
1112 case 65356: return QChar(108);
1113 case 65360: return QChar(112);
1114 case 65364: return QChar(116);
1115 case 65368: return QChar(120);
1116 case 65299: return QChar(51);
1117 case 65303: return QChar(55);
1118 case 65315: return QChar(67);
1119 case 65319: return QChar(71);
1120 case 65323: return QChar(75);
1121 case 65327: return QChar(79);
1122 case 65331: return QChar(83);
1123 case 65335: return QChar(87);
1124 case 65347: return QChar(99);
1125 case 65351: return QChar(103);
1126 case 65355: return QChar(107);
1127 case 65359: return QChar(111);
1128 case 65363: return QChar(115);
1129 case 65367: return QChar(119);
1130 case 1105: return QChar(1077);
1131 default:
1132 break;
1133 }
1134 return QChar(0);
1135 }
1136
RegExpWordSplit()1137 const QRegularExpression &RegExpWordSplit() {
1138 static const auto result = QRegularExpression(QString::fromLatin1("[\\@\\s\\-\\+\\(\\)\\[\\]\\{\\}\\<\\>\\,\\.\\:\\!\\_\\;\\\"\\'\\x0]"));
1139 return result;
1140 }
1141
ExpandCustomLinks(const TextWithTags & text)1142 [[nodiscard]] QString ExpandCustomLinks(const TextWithTags &text) {
1143 const auto entities = ConvertTextTagsToEntities(text.tags);
1144 auto &&urls = ranges::make_subrange(
1145 entities.begin(),
1146 entities.end()
1147 ) | ranges::views::filter([](const EntityInText &entity) {
1148 return entity.type() == EntityType::CustomUrl;
1149 });
1150 const auto &original = text.text;
1151 if (urls.begin() == urls.end()) {
1152 return original;
1153 }
1154 auto result = QString();
1155 auto offset = 0;
1156 for (const auto &entity : urls) {
1157 const auto till = entity.offset() + entity.length();
1158 if (till > offset) {
1159 result.append(base::StringViewMid(original, offset, till - offset));
1160 }
1161 result.append(qstr(" (")).append(entity.data()).append(')');
1162 offset = till;
1163 }
1164 if (original.size() > offset) {
1165 result.append(base::StringViewMid(original, offset));
1166 }
1167 return result;
1168 }
1169
MimeDataFromText(TextWithTags && text,const QString & expanded)1170 std::unique_ptr<QMimeData> MimeDataFromText(
1171 TextWithTags &&text,
1172 const QString &expanded) {
1173 if (expanded.isEmpty()) {
1174 return nullptr;
1175 }
1176
1177 auto result = std::make_unique<QMimeData>();
1178 result->setText(expanded);
1179 if (!text.tags.isEmpty()) {
1180 for (auto &tag : text.tags) {
1181 tag.id = Ui::Integration::Instance().convertTagToMimeTag(tag.id);
1182 }
1183 result->setData(
1184 TextUtilities::TagsTextMimeType(),
1185 text.text.toUtf8());
1186 result->setData(
1187 TextUtilities::TagsMimeType(),
1188 TextUtilities::SerializeTags(text.tags));
1189 }
1190 return result;
1191 }
1192
IsSentencePartEnd(QChar ch)1193 bool IsSentencePartEnd(QChar ch) {
1194 return (ch == ',')
1195 || (ch == ':')
1196 || (ch == ';');
1197 }
1198
IsSentenceEnd(QChar ch)1199 bool IsSentenceEnd(QChar ch) {
1200 return (ch == '.')
1201 || (ch == '?')
1202 || (ch == '!');
1203 }
1204
1205 } // namespace
1206
RegExpMailNameAtEnd()1207 const QRegularExpression &RegExpMailNameAtEnd() {
1208 static const auto result = CreateRegExp(ExpressionMailNameAtEnd());
1209 return result;
1210 }
1211
RegExpHashtag()1212 const QRegularExpression &RegExpHashtag() {
1213 static const auto result = CreateRegExp(ExpressionHashtag());
1214 return result;
1215 }
1216
RegExpHashtagExclude()1217 const QRegularExpression &RegExpHashtagExclude() {
1218 static const auto result = CreateRegExp(ExpressionHashtagExclude());
1219 return result;
1220 }
1221
RegExpMention()1222 const QRegularExpression &RegExpMention() {
1223 static const auto result = CreateRegExp(ExpressionMention());
1224 return result;
1225 }
1226
RegExpBotCommand()1227 const QRegularExpression &RegExpBotCommand() {
1228 static const auto result = CreateRegExp(ExpressionBotCommand());
1229 return result;
1230 }
1231
MarkdownBoldGoodBefore()1232 QString MarkdownBoldGoodBefore() {
1233 return SeparatorsBold();
1234 }
1235
MarkdownBoldBadAfter()1236 QString MarkdownBoldBadAfter() {
1237 return QString::fromLatin1("*");
1238 }
1239
MarkdownItalicGoodBefore()1240 QString MarkdownItalicGoodBefore() {
1241 return SeparatorsItalic();
1242 }
1243
MarkdownItalicBadAfter()1244 QString MarkdownItalicBadAfter() {
1245 return QString::fromLatin1("_");
1246 }
1247
MarkdownStrikeOutGoodBefore()1248 QString MarkdownStrikeOutGoodBefore() {
1249 return SeparatorsStrikeOut();
1250 }
1251
MarkdownStrikeOutBadAfter()1252 QString MarkdownStrikeOutBadAfter() {
1253 return QString::fromLatin1("~");
1254 }
1255
MarkdownCodeGoodBefore()1256 QString MarkdownCodeGoodBefore() {
1257 return SeparatorsMono();
1258 }
1259
MarkdownCodeBadAfter()1260 QString MarkdownCodeBadAfter() {
1261 return QString::fromLatin1("`\n\r");
1262 }
1263
MarkdownPreGoodBefore()1264 QString MarkdownPreGoodBefore() {
1265 return SeparatorsMono();
1266 }
1267
MarkdownPreBadAfter()1268 QString MarkdownPreBadAfter() {
1269 return QString::fromLatin1("`");
1270 }
1271
IsValidProtocol(const QString & protocol)1272 bool IsValidProtocol(const QString &protocol) {
1273 static const auto list = CreateValidProtocols();
1274 return list.contains(base::crc32(protocol.constData(), protocol.size() * sizeof(QChar)));
1275 }
1276
IsValidTopDomain(const QString & protocol)1277 bool IsValidTopDomain(const QString &protocol) {
1278 static const auto list = CreateValidTopDomains();
1279 return list.contains(base::crc32(protocol.constData(), protocol.size() * sizeof(QChar)));
1280 }
1281
Clean(const QString & text)1282 QString Clean(const QString &text) {
1283 auto result = text;
1284 for (auto s = text.unicode(), ch = s, e = text.unicode() + text.size(); ch != e; ++ch) {
1285 if (*ch == TextCommand) {
1286 result[int(ch - s)] = QChar::Space;
1287 }
1288 }
1289 return result;
1290 }
1291
EscapeForRichParsing(const QString & text)1292 QString EscapeForRichParsing(const QString &text) {
1293 QString result;
1294 result.reserve(text.size());
1295 auto s = text.constData(), ch = s;
1296 for (const QChar *e = s + text.size(); ch != e; ++ch) {
1297 if (*ch == TextCommand) {
1298 if (ch > s) result.append(s, ch - s);
1299 result.append(QChar::Space);
1300 s = ch + 1;
1301 continue;
1302 }
1303 if (ch->unicode() == '\\' || ch->unicode() == '[') {
1304 if (ch > s) result.append(s, ch - s);
1305 result.append('\\');
1306 s = ch;
1307 continue;
1308 }
1309 }
1310 if (ch > s) result.append(s, ch - s);
1311 return result;
1312 }
1313
SingleLine(const QString & text)1314 QString SingleLine(const QString &text) {
1315 auto result = text;
1316 auto s = text.unicode(), e = text.unicode() + text.size();
1317
1318 // Trim.
1319 while (s < e && IsTrimmed(*s)) {
1320 ++s;
1321 }
1322 while (s < e && IsTrimmed(*(e - 1))) {
1323 --e;
1324 }
1325 if (e - s != text.size()) {
1326 result = text.mid(s - text.unicode(), e - s);
1327 }
1328
1329 for (auto ch = s; ch != e; ++ch) {
1330 if (IsNewline(*ch) || *ch == TextCommand) {
1331 result[int(ch - s)] = QChar::Space;
1332 }
1333 }
1334 return result;
1335 }
1336
SingleLine(const TextWithEntities & text)1337 TextWithEntities SingleLine(const TextWithEntities &text) {
1338 auto copy = text;
1339 Trim(copy);
1340 return { SingleLine(copy.text), std::move(copy.entities) };
1341 }
1342
RemoveAccents(const QString & text)1343 QString RemoveAccents(const QString &text) {
1344 auto result = text;
1345 auto copying = false;
1346 auto i = 0;
1347 for (auto s = text.unicode(), ch = s, e = text.unicode() + text.size(); ch != e; ++ch, ++i) {
1348 if (ch->unicode() < 128) {
1349 if (copying) result[i] = *ch;
1350 continue;
1351 }
1352 if (IsDiac(*ch)) {
1353 copying = true;
1354 --i;
1355 continue;
1356 }
1357 if (ch->isHighSurrogate() && ch + 1 < e && (ch + 1)->isLowSurrogate()) {
1358 auto noAccent = RemoveOneAccent(QChar::surrogateToUcs4(*ch, *(ch + 1)));
1359 if (noAccent.unicode() > 0) {
1360 copying = true;
1361 result[i] = noAccent;
1362 } else {
1363 if (copying) result[i] = *ch;
1364 ++ch, ++i;
1365 if (copying) result[i] = *ch;
1366 }
1367 } else {
1368 auto noAccent = RemoveOneAccent(ch->unicode());
1369 if (noAccent.unicode() > 0 && noAccent != *ch) {
1370 result[i] = noAccent;
1371 } else if (copying) {
1372 result[i] = *ch;
1373 }
1374 }
1375 }
1376 return (i < result.size()) ? result.mid(0, i) : result;
1377 }
1378
RemoveEmoji(const QString & text)1379 QString RemoveEmoji(const QString &text) {
1380 auto result = QString();
1381 result.reserve(text.size());
1382
1383 auto begin = text.data();
1384 const auto end = begin + text.size();
1385 while (begin != end) {
1386 auto length = 0;
1387 if (Ui::Emoji::Find(begin, end, &length)) {
1388 begin += length;
1389 } else {
1390 result.append(*begin++);
1391 }
1392 }
1393 return result;
1394 }
1395
PrepareSearchWords(const QString & query,const QRegularExpression * SplitterOverride)1396 QStringList PrepareSearchWords(
1397 const QString &query,
1398 const QRegularExpression *SplitterOverride) {
1399 auto clean = RemoveAccents(query.trimmed().toLower());
1400 auto result = QStringList();
1401 if (!clean.isEmpty()) {
1402 auto list = clean.split(SplitterOverride
1403 ? *SplitterOverride
1404 : RegExpWordSplit(),
1405 Qt::SkipEmptyParts);
1406 result.reserve(list.size());
1407 for (const auto &word : std::as_const(list)) {
1408 auto trimmed = word.trimmed();
1409 if (!trimmed.isEmpty()) {
1410 result.push_back(trimmed);
1411 }
1412 }
1413 }
1414 return result;
1415 }
1416
CutPart(TextWithEntities & sending,TextWithEntities & left,int32 limit)1417 bool CutPart(TextWithEntities &sending, TextWithEntities &left, int32 limit) {
1418 if (left.text.isEmpty() || !limit) return false;
1419
1420 int32 currentEntity = 0, goodEntity = currentEntity, entityCount = left.entities.size();
1421 bool goodInEntity = false, goodCanBreakEntity = false;
1422
1423 int32 s = 0, half = limit / 2, goodLevel = 0;
1424 for (const QChar *start = left.text.constData(), *ch = start, *end = left.text.constEnd(), *good = ch; ch != end; ++ch, ++s) {
1425 while (currentEntity < entityCount && ch >= start + left.entities[currentEntity].offset() + left.entities[currentEntity].length()) {
1426 ++currentEntity;
1427 }
1428
1429 if (s > half) {
1430 bool inEntity = (currentEntity < entityCount) && (ch > start + left.entities[currentEntity].offset()) && (ch < start + left.entities[currentEntity].offset() + left.entities[currentEntity].length());
1431 EntityType entityType = (currentEntity < entityCount) ? left.entities[currentEntity].type() : EntityType::Invalid;
1432 bool canBreakEntity = (entityType == EntityType::Pre || entityType == EntityType::Code); // #TODO entities
1433 int32 noEntityLevel = inEntity ? 0 : 1;
1434
1435 auto markGoodAsLevel = [&](int newLevel) {
1436 if (goodLevel > newLevel) {
1437 return;
1438 }
1439 goodLevel = newLevel;
1440 good = ch;
1441 goodEntity = currentEntity;
1442 goodInEntity = inEntity;
1443 goodCanBreakEntity = canBreakEntity;
1444 };
1445
1446 if (inEntity && !canBreakEntity) {
1447 markGoodAsLevel(0);
1448 } else {
1449 if (IsNewline(*ch)) {
1450 if (inEntity) {
1451 if (ch + 1 < end && IsNewline(*(ch + 1))) {
1452 markGoodAsLevel(12);
1453 } else {
1454 markGoodAsLevel(11);
1455 }
1456 } else if (ch + 1 < end && IsNewline(*(ch + 1))) {
1457 markGoodAsLevel(15);
1458 } else if (currentEntity < entityCount && ch + 1 == start + left.entities[currentEntity].offset() && left.entities[currentEntity].type() == EntityType::Pre) {
1459 markGoodAsLevel(14);
1460 } else if (currentEntity > 0 && ch == start + left.entities[currentEntity - 1].offset() + left.entities[currentEntity - 1].length() && left.entities[currentEntity - 1].type() == EntityType::Pre) {
1461 markGoodAsLevel(14);
1462 } else {
1463 markGoodAsLevel(13);
1464 }
1465 } else if (IsSpace(*ch)) {
1466 if (IsSentenceEnd(*(ch - 1))) {
1467 markGoodAsLevel(9 + noEntityLevel);
1468 } else if (IsSentencePartEnd(*(ch - 1))) {
1469 markGoodAsLevel(7 + noEntityLevel);
1470 } else {
1471 markGoodAsLevel(5 + noEntityLevel);
1472 }
1473 } else if (IsWordSeparator(*(ch - 1))) {
1474 markGoodAsLevel(3 + noEntityLevel);
1475 } else {
1476 markGoodAsLevel(1 + noEntityLevel);
1477 }
1478 }
1479 }
1480
1481 int elen = 0;
1482 if (Ui::Emoji::Find(ch, end, &elen)) {
1483 for (int i = 0; i < elen; ++i, ++ch, ++s) {
1484 if (ch->isHighSurrogate() && i + 1 < elen && (ch + 1)->isLowSurrogate()) {
1485 ++ch;
1486 ++i;
1487 }
1488 }
1489 --ch;
1490 --s;
1491 } else if (ch->isHighSurrogate() && ch + 1 < end && (ch + 1)->isLowSurrogate()) {
1492 ++ch;
1493 }
1494 if (s >= limit) {
1495 sending.text = left.text.mid(0, good - start);
1496 left.text = left.text.mid(good - start);
1497 if (goodInEntity) {
1498 if (goodCanBreakEntity) {
1499 sending.entities = left.entities.mid(0, goodEntity + 1);
1500 sending.entities.back().updateTextEnd(good - start);
1501 left.entities = left.entities.mid(goodEntity);
1502 for (auto &entity : left.entities) {
1503 entity.shiftLeft(good - start);
1504 }
1505 } else {
1506 sending.entities = left.entities.mid(0, goodEntity);
1507 left.entities = left.entities.mid(goodEntity + 1);
1508 }
1509 } else {
1510 sending.entities = left.entities.mid(0, goodEntity);
1511 left.entities = left.entities.mid(goodEntity);
1512 for (auto &entity : left.entities) {
1513 entity.shiftLeft(good - start);
1514 }
1515 }
1516 return true;
1517 }
1518 }
1519 sending.text = left.text;
1520 left.text = QString();
1521 sending.entities = left.entities;
1522 left.entities = EntitiesInText();
1523 return true;
1524 }
1525
textcmdStartsLink(const QChar * start,int32 len,int32 commandOffset)1526 bool textcmdStartsLink(const QChar *start, int32 len, int32 commandOffset) {
1527 if (commandOffset + 2 < len) {
1528 if (*(start + commandOffset + 1) == TextCommandLinkIndex) {
1529 return (*(start + commandOffset + 2) != 0);
1530 }
1531 return (*(start + commandOffset + 1) != TextCommandLinkText);
1532 }
1533 return false;
1534 }
1535
checkTagStartInCommand(const QChar * start,int32 len,int32 tagStart,int32 & commandOffset,bool & commandIsLink,bool & inLink)1536 bool checkTagStartInCommand(const QChar *start, int32 len, int32 tagStart, int32 &commandOffset, bool &commandIsLink, bool &inLink) {
1537 bool inCommand = false;
1538 const QChar *commandEnd = start + commandOffset;
1539 while (commandOffset < len && tagStart > commandOffset) { // skip commands, evaluating are we in link or not
1540 commandEnd = textSkipCommand(start + commandOffset, start + len);
1541 if (commandEnd > start + commandOffset) {
1542 if (tagStart < (commandEnd - start)) {
1543 inCommand = true;
1544 break;
1545 }
1546 for (commandOffset = commandEnd - start; commandOffset < len; ++commandOffset) {
1547 if (*(start + commandOffset) == TextCommand) {
1548 inLink = commandIsLink;
1549 commandIsLink = textcmdStartsLink(start, len, commandOffset);
1550 break;
1551 }
1552 }
1553 if (commandOffset >= len) {
1554 inLink = commandIsLink;
1555 commandIsLink = false;
1556 }
1557 } else {
1558 break;
1559 }
1560 }
1561 if (inCommand) {
1562 commandOffset = commandEnd - start;
1563 }
1564 return inCommand;
1565 }
1566
ParseEntities(const QString & text,int32 flags)1567 TextWithEntities ParseEntities(const QString &text, int32 flags) {
1568 const auto rich = ((flags & TextParseRichText) != 0);
1569 auto result = TextWithEntities{ text, EntitiesInText() };
1570 ParseEntities(result, flags, rich);
1571 return result;
1572 }
1573
1574 // Some code is duplicated in message_field.cpp!
ParseEntities(TextWithEntities & result,int32 flags,bool rich)1575 void ParseEntities(TextWithEntities &result, int32 flags, bool rich) {
1576 constexpr auto kNotFound = std::numeric_limits<int>::max();
1577
1578 auto newEntities = EntitiesInText();
1579 bool withHashtags = (flags & TextParseHashtags);
1580 bool withMentions = (flags & TextParseMentions);
1581 bool withBotCommands = (flags & TextParseBotCommands);
1582
1583 int existingEntityIndex = 0, existingEntitiesCount = result.entities.size();
1584 int existingEntityEnd = 0;
1585
1586 int32 len = result.text.size(), commandOffset = rich ? 0 : len;
1587 bool inLink = false, commandIsLink = false;
1588 const auto start = result.text.constData();
1589 const auto end = start + result.text.size();
1590 for (int32 offset = 0, matchOffset = offset, mentionSkip = 0; offset < len;) {
1591 if (commandOffset <= offset) {
1592 for (commandOffset = offset; commandOffset < len; ++commandOffset) {
1593 if (*(start + commandOffset) == TextCommand) {
1594 inLink = commandIsLink;
1595 commandIsLink = textcmdStartsLink(start, len, commandOffset);
1596 break;
1597 }
1598 }
1599 }
1600 auto mDomain = qthelp::RegExpDomain().match(result.text, matchOffset);
1601 auto mExplicitDomain = qthelp::RegExpDomainExplicit().match(result.text, matchOffset);
1602 auto mHashtag = withHashtags ? RegExpHashtag().match(result.text, matchOffset) : QRegularExpressionMatch();
1603 auto mMention = withMentions ? RegExpMention().match(result.text, qMax(mentionSkip, matchOffset)) : QRegularExpressionMatch();
1604 auto mBotCommand = withBotCommands ? RegExpBotCommand().match(result.text, matchOffset) : QRegularExpressionMatch();
1605
1606 auto lnkType = EntityType::Url;
1607 int32 lnkStart = 0, lnkLength = 0;
1608 auto domainStart = mDomain.hasMatch() ? mDomain.capturedStart() : kNotFound,
1609 domainEnd = mDomain.hasMatch() ? mDomain.capturedEnd() : kNotFound,
1610 explicitDomainStart = mExplicitDomain.hasMatch() ? mExplicitDomain.capturedStart() : kNotFound,
1611 explicitDomainEnd = mExplicitDomain.hasMatch() ? mExplicitDomain.capturedEnd() : kNotFound,
1612 hashtagStart = mHashtag.hasMatch() ? mHashtag.capturedStart() : kNotFound,
1613 hashtagEnd = mHashtag.hasMatch() ? mHashtag.capturedEnd() : kNotFound,
1614 mentionStart = mMention.hasMatch() ? mMention.capturedStart() : kNotFound,
1615 mentionEnd = mMention.hasMatch() ? mMention.capturedEnd() : kNotFound,
1616 botCommandStart = mBotCommand.hasMatch() ? mBotCommand.capturedStart() : kNotFound,
1617 botCommandEnd = mBotCommand.hasMatch() ? mBotCommand.capturedEnd() : kNotFound;
1618 auto hashtagIgnore = false;
1619 auto mentionIgnore = false;
1620
1621 if (mHashtag.hasMatch()) {
1622 if (!mHashtag.capturedView(1).isEmpty()) {
1623 ++hashtagStart;
1624 }
1625 if (!mHashtag.capturedView(2).isEmpty()) {
1626 --hashtagEnd;
1627 }
1628 if (RegExpHashtagExclude().match(
1629 result.text.mid(
1630 hashtagStart + 1,
1631 hashtagEnd - hashtagStart - 1)).hasMatch()) {
1632 hashtagIgnore = true;
1633 }
1634 }
1635 while (mMention.hasMatch()) {
1636 if (!mMention.capturedView(1).isEmpty()) {
1637 ++mentionStart;
1638 }
1639 if (!mMention.capturedView(2).isEmpty()) {
1640 --mentionEnd;
1641 }
1642 if (!(start + mentionStart + 1)->isLetter() || !(start + mentionEnd - 1)->isLetterOrNumber()) {
1643 mentionSkip = mentionEnd;
1644 if (mentionSkip < len
1645 && (start + mentionSkip)->isLowSurrogate()) {
1646 ++mentionSkip;
1647 }
1648 mMention = RegExpMention().match(result.text, qMax(mentionSkip, matchOffset));
1649 if (mMention.hasMatch()) {
1650 mentionStart = mMention.capturedStart();
1651 mentionEnd = mMention.capturedEnd();
1652 } else {
1653 mentionIgnore = true;
1654 }
1655 } else {
1656 break;
1657 }
1658 }
1659 if (mBotCommand.hasMatch()) {
1660 if (!mBotCommand.capturedView(1).isEmpty()) {
1661 ++botCommandStart;
1662 }
1663 if (!mBotCommand.capturedView(3).isEmpty()) {
1664 --botCommandEnd;
1665 }
1666 }
1667 if (!mDomain.hasMatch()
1668 && !mExplicitDomain.hasMatch()
1669 && !mHashtag.hasMatch()
1670 && !mMention.hasMatch()
1671 && !mBotCommand.hasMatch()) {
1672 break;
1673 }
1674
1675 if (explicitDomainStart < domainStart) {
1676 domainStart = explicitDomainStart;
1677 domainEnd = explicitDomainEnd;
1678 mDomain = mExplicitDomain;
1679 }
1680 if (mentionStart < hashtagStart
1681 && mentionStart < domainStart
1682 && mentionStart < botCommandStart) {
1683 if (mentionIgnore) {
1684 offset = matchOffset = mentionEnd;
1685 continue;
1686 }
1687 const auto inCommand = checkTagStartInCommand(
1688 start,
1689 len,
1690 mentionStart,
1691 commandOffset,
1692 commandIsLink,
1693 inLink);
1694 if (inCommand || inLink) {
1695 offset = matchOffset = commandOffset;
1696 continue;
1697 }
1698
1699 lnkType = EntityType::Mention;
1700 lnkStart = mentionStart;
1701 lnkLength = mentionEnd - mentionStart;
1702 } else if (hashtagStart < domainStart
1703 && hashtagStart < botCommandStart) {
1704 if (hashtagIgnore) {
1705 offset = matchOffset = hashtagEnd;
1706 continue;
1707 }
1708 const auto inCommand = checkTagStartInCommand(
1709 start,
1710 len,
1711 hashtagStart,
1712 commandOffset,
1713 commandIsLink,
1714 inLink);
1715 if (inCommand || inLink) {
1716 offset = matchOffset = commandOffset;
1717 continue;
1718 }
1719
1720 lnkType = EntityType::Hashtag;
1721 lnkStart = hashtagStart;
1722 lnkLength = hashtagEnd - hashtagStart;
1723 } else if (botCommandStart < domainStart) {
1724 const auto inCommand = checkTagStartInCommand(
1725 start,
1726 len,
1727 botCommandStart,
1728 commandOffset,
1729 commandIsLink,
1730 inLink);
1731 if (inCommand || inLink) {
1732 offset = matchOffset = commandOffset;
1733 continue;
1734 }
1735
1736 lnkType = EntityType::BotCommand;
1737 lnkStart = botCommandStart;
1738 lnkLength = botCommandEnd - botCommandStart;
1739 } else {
1740 const auto inCommand = checkTagStartInCommand(
1741 start,
1742 len,
1743 domainStart,
1744 commandOffset,
1745 commandIsLink,
1746 inLink);
1747 if (inCommand || inLink) {
1748 offset = matchOffset = commandOffset;
1749 continue;
1750 }
1751
1752 auto protocol = mDomain.captured(1).toLower();
1753 auto topDomain = mDomain.captured(3).toLower();
1754 auto isProtocolValid = protocol.isEmpty() || IsValidProtocol(protocol);
1755 auto isTopDomainValid = !protocol.isEmpty() || IsValidTopDomain(topDomain);
1756
1757 if (protocol.isEmpty() && domainStart > offset + 1 && *(start + domainStart - 1) == QChar('@')) {
1758 auto forMailName = result.text.mid(offset, domainStart - offset - 1);
1759 auto mMailName = RegExpMailNameAtEnd().match(forMailName);
1760 if (mMailName.hasMatch()) {
1761 auto mailStart = offset + mMailName.capturedStart();
1762 if (mailStart < offset) {
1763 mailStart = offset;
1764 }
1765 lnkType = EntityType::Email;
1766 lnkStart = mailStart;
1767 lnkLength = domainEnd - mailStart;
1768 }
1769 }
1770 if (lnkType == EntityType::Url && !lnkLength) {
1771 if (!isProtocolValid || !isTopDomainValid) {
1772 matchOffset = domainEnd;
1773 continue;
1774 }
1775 lnkStart = domainStart;
1776
1777 QStack<const QChar*> parenth;
1778 const QChar *domainEnd = start + mDomain.capturedEnd(), *p = domainEnd;
1779 for (; p < end; ++p) {
1780 QChar ch(*p);
1781 if (IsLinkEnd(ch)) {
1782 break; // link finished
1783 } else if (IsAlmostLinkEnd(ch)) {
1784 const QChar *endTest = p + 1;
1785 while (endTest < end && IsAlmostLinkEnd(*endTest)) {
1786 ++endTest;
1787 }
1788 if (endTest >= end || IsLinkEnd(*endTest)) {
1789 break; // link finished at p
1790 }
1791 p = endTest;
1792 ch = *p;
1793 }
1794 if (ch == '(' || ch == '[' || ch == '{' || ch == '<') {
1795 parenth.push(p);
1796 } else if (ch == ')' || ch == ']' || ch == '}' || ch == '>') {
1797 if (parenth.isEmpty()) break;
1798 const QChar *q = parenth.pop(), open(*q);
1799 if ((ch == ')' && open != '(') || (ch == ']' && open != '[') || (ch == '}' && open != '{') || (ch == '>' && open != '<')) {
1800 p = q;
1801 break;
1802 }
1803 }
1804 }
1805 if (p > domainEnd) { // check, that domain ended
1806 if (domainEnd->unicode() != '/' && domainEnd->unicode() != '?') {
1807 matchOffset = domainEnd - start;
1808 continue;
1809 }
1810 }
1811 lnkLength = (p - start) - lnkStart;
1812 }
1813 }
1814 for (; existingEntityIndex < existingEntitiesCount && result.entities[existingEntityIndex].offset() <= lnkStart; ++existingEntityIndex) {
1815 auto &entity = result.entities[existingEntityIndex];
1816 accumulate_max(existingEntityEnd, entity.offset() + entity.length());
1817 newEntities.push_back(entity);
1818 }
1819 if (lnkStart >= existingEntityEnd) {
1820 result.entities.push_back({ lnkType, lnkStart, lnkLength });
1821 }
1822
1823 offset = matchOffset = lnkStart + lnkLength;
1824 }
1825 if (!newEntities.isEmpty()) {
1826 for (; existingEntityIndex < existingEntitiesCount; ++existingEntityIndex) {
1827 auto &entity = result.entities[existingEntityIndex];
1828 newEntities.push_back(entity);
1829 }
1830 result.entities = newEntities;
1831 }
1832 }
1833
MoveStringPart(TextWithEntities & result,int to,int from,int count)1834 void MoveStringPart(TextWithEntities &result, int to, int from, int count) {
1835 if (!count) return;
1836 if (to != from) {
1837 auto start = result.text.data();
1838 memmove(start + to, start + from, count * sizeof(QChar));
1839
1840 for (auto &entity : result.entities) {
1841 if (entity.offset() >= from + count) break;
1842 if (entity.offset() + entity.length() <= from) continue;
1843 if (entity.offset() >= from) {
1844 entity.extendToLeft(from - to);
1845 }
1846 if (entity.offset() + entity.length() <= from + count) {
1847 entity.shrinkFromRight(from - to);
1848 }
1849 }
1850 }
1851 }
1852
MovePartAndGoForward(TextWithEntities & result,int & to,int & from,int count)1853 void MovePartAndGoForward(TextWithEntities &result, int &to, int &from, int count) {
1854 if (!count) return;
1855 MoveStringPart(result, to, from, count);
1856 to += count;
1857 from += count;
1858 }
1859
PrepareForSending(TextWithEntities & result,int32 flags)1860 void PrepareForSending(TextWithEntities &result, int32 flags) {
1861 ApplyServerCleaning(result);
1862
1863 if (flags) {
1864 ParseEntities(result, flags);
1865 }
1866
1867 Trim(result);
1868 }
1869
1870 // Replace bad symbols with space and remove '\r'.
ApplyServerCleaning(TextWithEntities & result)1871 void ApplyServerCleaning(TextWithEntities &result) {
1872 auto len = result.text.size();
1873
1874 // Replace tabs with two spaces.
1875 if (auto tabs = std::count(result.text.cbegin(), result.text.cend(), '\t')) {
1876 auto replacement = QString::fromLatin1(" ");
1877 auto replacementLength = replacement.size();
1878 auto shift = (replacementLength - 1);
1879 result.text.resize(len + shift * tabs);
1880 for (auto i = len, movedTill = len, to = result.text.size(); i > 0; --i) {
1881 if (result.text[i - 1] == '\t') {
1882 auto toMove = movedTill - i;
1883 to -= toMove;
1884 MoveStringPart(result, to, i, toMove);
1885 to -= replacementLength;
1886 memcpy(result.text.data() + to, replacement.constData(), replacementLength * sizeof(QChar));
1887 movedTill = i - 1;
1888 }
1889 }
1890 len = result.text.size();
1891 }
1892
1893 auto to = 0;
1894 auto from = 0;
1895 auto start = result.text.data();
1896 for (auto ch = start, end = start + len; ch < end; ++ch) {
1897 if (ch->unicode() == '\r') {
1898 MovePartAndGoForward(result, to, from, (ch - start) - from);
1899 ++from;
1900 } else if (IsReplacedBySpace(*ch)) {
1901 *ch = ' ';
1902 }
1903 }
1904 MovePartAndGoForward(result, to, from, len - from);
1905 if (to < len) result.text.resize(to);
1906 }
1907
Trim(TextWithEntities & result)1908 void Trim(TextWithEntities &result) {
1909 auto foundNotTrimmedChar = false;
1910
1911 // right trim
1912 for (auto s = result.text.data(), e = s + result.text.size(), ch = e; ch != s;) {
1913 --ch;
1914 if (!IsTrimmed(*ch)) {
1915 if (ch + 1 < e) {
1916 auto l = ch + 1 - s;
1917 for (auto &entity : result.entities) {
1918 entity.updateTextEnd(l);
1919 }
1920 result.text.resize(l);
1921 }
1922 foundNotTrimmedChar = true;
1923 break;
1924 }
1925 }
1926 if (!foundNotTrimmedChar) {
1927 result = TextWithEntities();
1928 return;
1929 }
1930
1931 const auto firstMonospaceOffset = EntityInText::FirstMonospaceOffset(
1932 result.entities,
1933 result.text.size());
1934
1935 // left trim
1936 for (auto s = result.text.data(), ch = s, e = s + result.text.size(); ch != e; ++ch) {
1937 if (!IsTrimmed(*ch) || (ch - s) == firstMonospaceOffset) {
1938 if (ch > s) {
1939 auto l = ch - s;
1940 for (auto &entity : result.entities) {
1941 entity.shiftLeft(l);
1942 }
1943 result.text = result.text.mid(l);
1944 }
1945 break;
1946 }
1947 }
1948 }
1949
SerializeTagsSize(const TextWithTags::Tags & tags)1950 int SerializeTagsSize(const TextWithTags::Tags &tags) {
1951 auto result = qint32(0);
1952 if (tags.isEmpty()) {
1953 return result;
1954 }
1955 result += sizeof(qint32);
1956 for (const auto &tag : tags) {
1957 result += 2 * sizeof(qint32) // offset, length
1958 + sizeof(quint32) // id.size
1959 + tag.id.size() * sizeof(ushort);
1960 }
1961 return result;
1962 }
1963
SerializeTags(const TextWithTags::Tags & tags)1964 QByteArray SerializeTags(const TextWithTags::Tags &tags) {
1965 if (tags.isEmpty()) {
1966 return QByteArray();
1967 }
1968
1969 QByteArray tagsSerialized;
1970 {
1971 QDataStream stream(&tagsSerialized, QIODevice::WriteOnly);
1972 stream.setVersion(QDataStream::Qt_5_1);
1973 stream << qint32(tags.size());
1974 for (const auto &tag : tags) {
1975 stream << qint32(tag.offset) << qint32(tag.length) << tag.id;
1976 }
1977 }
1978 return tagsSerialized;
1979 }
1980
DeserializeTags(QByteArray data,int textLength)1981 TextWithTags::Tags DeserializeTags(QByteArray data, int textLength) {
1982 auto result = TextWithTags::Tags();
1983 if (data.isEmpty()) {
1984 return result;
1985 }
1986
1987 QDataStream stream(data);
1988 stream.setVersion(QDataStream::Qt_5_1);
1989
1990 qint32 tagCount = 0;
1991 stream >> tagCount;
1992 if (stream.status() != QDataStream::Ok) {
1993 return result;
1994 }
1995 if (tagCount <= 0 || tagCount > textLength) {
1996 return result;
1997 }
1998
1999 for (auto i = 0; i != tagCount; ++i) {
2000 qint32 offset = 0, length = 0;
2001 QString id;
2002 stream >> offset >> length >> id;
2003 if (stream.status() != QDataStream::Ok) {
2004 return result;
2005 }
2006 if (offset < 0 || length <= 0 || offset + length > textLength) {
2007 return result;
2008 }
2009 result.push_back({ offset, length, id });
2010 }
2011 return result;
2012 }
2013
TagsMimeType()2014 QString TagsMimeType() {
2015 return QString::fromLatin1("application/x-td-field-tags");
2016 }
2017
TagsTextMimeType()2018 QString TagsTextMimeType() {
2019 return QString::fromLatin1("application/x-td-field-text");
2020 }
2021
IsMentionLink(QStringView link)2022 bool IsMentionLink(QStringView link) {
2023 return link.startsWith(kMentionTagStart);
2024 }
2025
IsSeparateTag(QStringView tag)2026 [[nodiscard]] bool IsSeparateTag(QStringView tag) {
2027 return (tag == Ui::InputField::kTagCode)
2028 || (tag == Ui::InputField::kTagPre);
2029 }
2030
JoinTag(const QList<QStringView> & list)2031 QString JoinTag(const QList<QStringView> &list) {
2032 if (list.isEmpty()) {
2033 return QString();
2034 }
2035 auto length = (list.size() - 1);
2036 for (const auto &entry : list) {
2037 length += entry.size();
2038 }
2039 auto result = QString();
2040 result.reserve(length);
2041 result.append(list.front());
2042 for (auto i = 1, count = int(list.size()); i != count; ++i) {
2043 if (!IsSeparateTag(list[i])) {
2044 result.append('|').append(list[i]);
2045 }
2046 }
2047 return result;
2048 }
2049
TagWithRemoved(const QString & tag,const QString & removed)2050 QString TagWithRemoved(const QString &tag, const QString &removed) {
2051 if (tag == removed) {
2052 return QString();
2053 }
2054 auto list = QStringView(tag).split('|');
2055 list.erase(ranges::remove(list, QStringView(removed)), list.end());
2056 return JoinTag(list);
2057 }
2058
TagWithAdded(const QString & tag,const QString & added)2059 QString TagWithAdded(const QString &tag, const QString &added) {
2060 if (tag.isEmpty() || tag == added) {
2061 return added;
2062 }
2063 auto list = QStringView(tag).split('|');
2064 const auto ref = QStringView(added);
2065 if (list.contains(ref)) {
2066 return tag;
2067 }
2068 list.push_back(ref);
2069 std::sort(list.begin(), list.end());
2070 return JoinTag(list);
2071 }
2072
ConvertTextTagsToEntities(const TextWithTags::Tags & tags)2073 EntitiesInText ConvertTextTagsToEntities(const TextWithTags::Tags &tags) {
2074 auto result = EntitiesInText();
2075 if (tags.isEmpty()) {
2076 return result;
2077 }
2078
2079 constexpr auto kInMaskTypes = std::array{
2080 EntityType::Bold,
2081 EntityType::Italic,
2082 EntityType::Underline,
2083 EntityType::StrikeOut,
2084 EntityType::Code,
2085 EntityType::Pre,
2086 };
2087 struct State {
2088 QString link;
2089 uint32 mask = 0;
2090
2091 void set(EntityType type) {
2092 mask |= (1 << int(type));
2093 }
2094 void remove(EntityType type) {
2095 mask &= ~(1 << int(type));
2096 }
2097 [[nodiscard]] bool has(EntityType type) const {
2098 return (mask & (1 << int(type)));
2099 }
2100 };
2101
2102 auto offset = 0;
2103 auto state = State();
2104 auto notClosedEntities = QVector<int>(); // Stack of indices.
2105 const auto closeOne = [&] {
2106 Expects(!notClosedEntities.isEmpty());
2107
2108 auto &entity = result[notClosedEntities.back()];
2109 entity = {
2110 entity.type(),
2111 entity.offset(),
2112 offset - entity.offset(),
2113 entity.data(),
2114 };
2115 if (ranges::contains(kInMaskTypes, entity.type())) {
2116 state.remove(entity.type());
2117 } else {
2118 state.link = QString();
2119 }
2120 notClosedEntities.pop_back();
2121 };
2122 const auto closeType = [&](EntityType type) {
2123 auto closeCount = 0;
2124 const auto notClosedCount = notClosedEntities.size();
2125 while (closeCount < notClosedCount) {
2126 const auto index = notClosedCount - closeCount - 1;
2127 if (result[notClosedEntities[index]].type() == type) {
2128 for (auto i = 0; i != closeCount + 1; ++i) {
2129 closeOne();
2130 }
2131 break;
2132 }
2133 ++closeCount;
2134 }
2135 };
2136 const auto openType = [&](EntityType type, const QString &data = {}) {
2137 notClosedEntities.push_back(result.size());
2138 result.push_back({ type, offset, -1, data });
2139 };
2140
2141 const auto processState = [&](State nextState) {
2142 const auto linkChanged = (nextState.link != state.link);
2143 if (linkChanged) {
2144 if (IsMentionLink(state.link)) {
2145 closeType(EntityType::MentionName);
2146 } else {
2147 closeType(EntityType::CustomUrl);
2148 }
2149 }
2150 for (const auto type : kInMaskTypes) {
2151 if (state.has(type) && !nextState.has(type)) {
2152 closeType(type);
2153 }
2154 }
2155 if (linkChanged && !nextState.link.isEmpty()) {
2156 if (IsMentionLink(nextState.link)) {
2157 const auto match = qthelp::regex_match(
2158 "^(\\d+\\.\\d+)(/|$)",
2159 base::StringViewMid(nextState.link, kMentionTagStart.size()));
2160 if (match) {
2161 openType(EntityType::MentionName, match->captured(1));
2162 }
2163 } else {
2164 openType(EntityType::CustomUrl, nextState.link);
2165 }
2166 }
2167 for (const auto type : kInMaskTypes) {
2168 if (nextState.has(type) && !state.has(type)) {
2169 openType(type);
2170 }
2171 }
2172 state = nextState;
2173 };
2174 const auto stateForTag = [&](const QString &tag) {
2175 auto result = State();
2176 const auto list = QStringView(tag).split('|');
2177 for (const auto &single : list) {
2178 if (single == Ui::InputField::kTagBold) {
2179 result.set(EntityType::Bold);
2180 } else if (single == Ui::InputField::kTagItalic) {
2181 result.set(EntityType::Italic);
2182 } else if (single == Ui::InputField::kTagUnderline) {
2183 result.set(EntityType::Underline);
2184 } else if (single == Ui::InputField::kTagStrikeOut) {
2185 result.set(EntityType::StrikeOut);
2186 } else if (single == Ui::InputField::kTagCode) {
2187 result.set(EntityType::Code);
2188 } else if (single == Ui::InputField::kTagPre) {
2189 result.set(EntityType::Pre);
2190 } else {
2191 result.link = single.toString();
2192 }
2193 }
2194 return result;
2195 };
2196 for (const auto &tag : tags) {
2197 if (tag.offset > offset) {
2198 processState(State());
2199 }
2200 offset = tag.offset;
2201 processState(stateForTag(tag.id));
2202 offset += tag.length;
2203 }
2204 processState(State());
2205
2206 result.erase(ranges::remove_if(result, [](const EntityInText &entity) {
2207 return (entity.length() <= 0);
2208 }), result.end());
2209
2210 return result;
2211 }
2212
ConvertEntitiesToTextTags(const EntitiesInText & entities)2213 TextWithTags::Tags ConvertEntitiesToTextTags(
2214 const EntitiesInText &entities) {
2215 auto result = TextWithTags::Tags();
2216 if (entities.isEmpty()) {
2217 return result;
2218 }
2219
2220 auto offset = 0;
2221 auto current = QString();
2222 const auto updateCurrent = [&](int nextOffset, const QString &next) {
2223 if (next == current) {
2224 return;
2225 } else if (nextOffset > offset) {
2226 if (!current.isEmpty()) {
2227 result.push_back({ offset, nextOffset - offset, current });
2228 }
2229 offset = nextOffset;
2230 }
2231 current = next;
2232 };
2233 auto toRemove = std::vector<std::pair<int, QString>>();
2234 const auto removeTill = [&](int nextOffset) {
2235 while (!toRemove.empty() && toRemove.front().first <= nextOffset) {
2236 updateCurrent(
2237 toRemove.front().first,
2238 TagWithRemoved(current, toRemove.front().second));
2239 toRemove.erase(toRemove.begin());
2240 }
2241 };
2242 for (const auto &entity : entities) {
2243 const auto push = [&](const QString &tag) {
2244 removeTill(entity.offset());
2245 updateCurrent(entity.offset(), TagWithAdded(current, tag));
2246 toRemove.push_back({ offset + entity.length(), tag });
2247 ranges::sort(toRemove);
2248 };
2249 switch (entity.type()) {
2250 case EntityType::MentionName: {
2251 auto match = QRegularExpression(
2252 R"(^(\d+\.\d+)$)"
2253 ).match(entity.data());
2254 if (match.hasMatch()) {
2255 push(kMentionTagStart + entity.data());
2256 }
2257 } break;
2258 case EntityType::CustomUrl: {
2259 const auto url = entity.data();
2260 if (Ui::InputField::IsValidMarkdownLink(url)
2261 && !IsMentionLink(url)) {
2262 push(url);
2263 }
2264 } break;
2265 case EntityType::Bold: push(Ui::InputField::kTagBold); break;
2266 //case EntityType::Semibold: // Semibold is for UI parts only.
2267 // push(Ui::InputField::kTagSemibold);
2268 // break;
2269 case EntityType::Italic: push(Ui::InputField::kTagItalic); break;
2270 case EntityType::Underline:
2271 push(Ui::InputField::kTagUnderline);
2272 break;
2273 case EntityType::StrikeOut:
2274 push(Ui::InputField::kTagStrikeOut);
2275 break;
2276 case EntityType::Code: push(Ui::InputField::kTagCode); break; // #TODO entities
2277 case EntityType::Pre: push(Ui::InputField::kTagPre); break;
2278 }
2279 }
2280 if (!toRemove.empty()) {
2281 removeTill(toRemove.back().first);
2282 }
2283 return result;
2284 }
2285
MimeDataFromText(const TextForMimeData & text)2286 std::unique_ptr<QMimeData> MimeDataFromText(const TextForMimeData &text) {
2287 return MimeDataFromText(
2288 { text.rich.text, ConvertEntitiesToTextTags(text.rich.entities) },
2289 text.expanded);
2290 }
2291
MimeDataFromText(TextWithTags && text)2292 std::unique_ptr<QMimeData> MimeDataFromText(TextWithTags &&text) {
2293 const auto expanded = ExpandCustomLinks(text);
2294 return MimeDataFromText(std::move(text), expanded);
2295 }
2296
SetClipboardText(const TextForMimeData & text,QClipboard::Mode mode)2297 void SetClipboardText(
2298 const TextForMimeData &text,
2299 QClipboard::Mode mode) {
2300 if (auto data = MimeDataFromText(text)) {
2301 QGuiApplication::clipboard()->setMimeData(data.release(), mode);
2302 }
2303 }
2304
2305 } // namespace TextUtilities
2306
EntityInText(EntityType type,int offset,int length,const QString & data)2307 EntityInText::EntityInText(
2308 EntityType type,
2309 int offset,
2310 int length,
2311 const QString &data)
2312 : _type(type)
2313 , _offset(offset)
2314 , _length(length)
2315 , _data(data) {
2316 }
2317
FirstMonospaceOffset(const EntitiesInText & entities,int textLength)2318 int EntityInText::FirstMonospaceOffset(
2319 const EntitiesInText &entities,
2320 int textLength) {
2321 auto &&monospace = ranges::make_subrange(
2322 entities.begin(),
2323 entities.end()
2324 ) | ranges::views::filter([](const EntityInText & entity) {
2325 return (entity.type() == EntityType::Pre)
2326 || (entity.type() == EntityType::Code);
2327 });
2328 const auto i = ranges::max_element(
2329 monospace,
2330 std::greater<>(),
2331 &EntityInText::offset);
2332 return (i == monospace.end()) ? textLength : i->offset();
2333 }
2334