1 /***************************************************************************
2 * 2007-2021 by Peter Semiletov *
3 * peter.semiletov@gmail.com *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 3 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20
21
22 /***************************************************************************
23 some code is taken from Scribus::util.cpp:
24 -------------------
25 begin : Fri Sep 14 2001
26 copyright : (C) 2001 by Franz Schmid
27 email : Franz.Schmid@altmuehlnet.de
28 ***************************************************************************/
29 /* roman.c by Adam Rogoyski (apoc@laker.net) Temperanc on EFNet irc
30 * Copyright (C) 1998 Adam Rogoyski
31 * Converts Decimal numbers to Roman Numerals and Roman Numberals to
32 * Decimals on the command line or in Interactive mode.
33 * Uses an expanded Roman Numeral set to handle numbers up to 999999999
34 */
35
36
37 #include <algorithm>
38
39 #include <QString>
40 #include <QMap>
41 #include <QDebug>
42
43
44 #if QT_VERSION < 0x050000
45 #include <QRegExp>
46 #else
47 #include <QRegularExpression>
48 #endif
49
50
51
52 #include "textproc.h"
53 #include "utils.h"
54
55 using namespace std;
56
57
58
qstring_length_less_than(const QString & v1,const QString & v2)59 bool qstring_length_less_than (const QString& v1, const QString& v2)
60 {
61 return v1.length() < v2.length();
62 }
63
64 /*
65 QString int_to_binary (int n)
66 {
67 std::bitset<sizeof (int)> bt (n);
68 return QString::fromStdString (bt.to_string<char,std::string::traits_type,std::string::allocator_type>());
69 }
70 */
71
72
str_fuzzy_search(const QString & s,const QString & text_to_find,int start_pos,double q)73 int str_fuzzy_search (const QString &s, const QString &text_to_find, int start_pos, double q)
74 {
75 if (s.isEmpty() || text_to_find.isEmpty())
76 return -1;
77
78 int counter;
79 int result = -1;
80
81 bool jump = false;
82
83 int end_pos = s.length() - 1;
84
85 for (int i = start_pos; i < end_pos; i++)
86 {
87 if (jump)
88 break;
89
90 counter = 0;
91 for (int j = 0; j < text_to_find.length(); j++)
92 {
93 if (s[i + j] == text_to_find[j])
94 counter++;
95
96 if (get_percent ((double)text_to_find.length(), (double)counter) >= q)
97 {
98 result = i;
99 jump = true;
100 break;
101 }
102 }
103 }
104
105 return result;
106 }
107
108
apply_table(const QString & s,const QString & fname,bool use_regexp)109 QString apply_table (const QString &s, const QString &fname, bool use_regexp)
110 {
111 QHash<QString, QString> h = hash_load_keyval (fname);
112
113 QString result = s;
114
115 for (int i = 0; i < h.size(); i++)
116 {
117 QString key = h.keys()[i];
118
119 if (use_regexp)
120 #if QT_VERSION < 0x050000
121 result.replace (QRegExp (key), h.value (key));
122 else
123 #else
124 result.replace (QRegularExpression (key), h.value (key));
125 else
126 #endif
127 result.replace (key, h.value (key));
128 }
129
130 return result;
131 }
132
133
strip_html(const QString & source)134 QString strip_html (const QString &source)
135 {
136 bool do_copy = true;
137 QString dest;
138
139 for (int i = 0; i < source.length(); i++)
140 {
141 if (source[i] == '<')
142 do_copy = false;
143 else
144 if (source[i] == '>')
145 {
146 do_copy = true;
147 if (i < source.length() - 1)
148 i++;
149 else
150 break;
151 }
152
153 if (do_copy)
154 dest += source[i];
155 }
156
157 return dest;
158 }
159
160
qstringlist_process(const QString & s,const QString & params,int mode)161 QString qstringlist_process (const QString &s, const QString ¶ms, int mode)
162 {
163 QStringList sl;
164 QStringList l;
165 QString result;
166
167 if (mode != QSTRL_PROC_FLT_WITH_SORTCASECARE_SEP && mode != QSTRL_PROC_LIST_FLIP_SEP)
168 sl = s.split (QChar::ParagraphSeparator);
169
170 switch (mode)
171 {
172 case QSTRL_PROC_FLT_WITH_SORTCASECARE_SEP:
173 {
174 if (s.indexOf (params) == -1)
175 return s;
176
177 QStringList t = s.split (params);
178 t.sort();
179 result = t.join (params);
180 return result;
181 };
182
183 case QSTRL_PROC_LIST_FLIP_SEP: {
184 if (s.indexOf (params) == -1)
185 return s;
186
187 QStringList t = s.split (params);
188 t.sort();
189
190 for (int i = 0; i < t.size(); i++)
191 l.prepend (t.at(i));
192
193 result = l.join (params);
194 return result;
195 };
196
197
198 case QSTRL_PROC_FLT_WITH_SORTNOCASECARE:
199 {
200 QMap <QString, QString> map;
201
202 for (int i = 0; i < sl.size(); i++)
203 map.insert (sl[i].toLower(), sl[i]);
204
205 for (QMap<QString, QString>::const_iterator i = map.constBegin();
206 i != map.constEnd();
207 ++i)
208 l.append (i.value());
209
210 break;
211 }
212
213 case QSTRL_PROC_FLT_WITH_SORTLEN:
214 {
215 l = sl;
216 std::sort (l.begin(), l.end(), qstring_length_less_than);
217 break;
218 }
219
220
221 case QSTRL_PROC_FLT_REMOVE_EMPTY:
222 {
223 for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
224 if (! i->isEmpty())
225 l.append (*i);
226
227 break;
228 };
229
230
231 case QSTRL_PROC_FLT_REMOVE_DUPS:
232 {
233 for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
234 if (! l.contains (*i))
235 l.append (*i);
236
237 break;
238 };
239
240 case QSTRL_PROC_REMOVE_FORMATTING:
241 {
242 for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
243 l.append (i->simplified());
244
245 break;
246 };
247
248 case QSTRL_PROC_FLT_WITH_REGEXP:
249 {
250 #if QT_VERSION < 0x050000
251 l = sl.filter (QRegExp (params));
252 #else
253 l = sl.filter (QRegularExpression (params));
254 #endif
255 break;
256 }
257
258 case QSTRL_PROC_FLT_WITH_SORTCASECARE:
259 {
260 l = sl;
261 l.sort();
262 break;
263 }
264
265 case QSTRL_PROC_LIST_FLIP:
266 {
267 for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
268 l.prepend (*i);
269
270 break;
271 }
272
273 case QSTRL_PROC_FLT_LESS:
274 {
275 int t = params.toInt();
276
277 for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
278 if (i->size() > t)
279 l.append (*i);
280
281 break;
282 }
283
284 case QSTRL_PROC_FLT_GREATER:
285 {
286 int t = params.toInt();
287
288 for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
289 if (i->size() < t)
290 l.append (*i);
291
292 break;
293 }
294 }
295
296 result = l.join ("\n");
297 return result;
298 }
299
300
string_reverse(const QString & s)301 QString string_reverse (const QString &s)
302 {
303 QString sn;
304
305 int c = s.length() - 1;
306 int x = 0;
307
308 for (int i = c; i > -1; i--)
309 sn[x++] = s.at(i);
310
311 return sn;
312 }
313
314
conv_quotes(const QString & source,const QString & c1,const QString & c2)315 QString conv_quotes (const QString &source, const QString &c1, const QString &c2)
316 {
317 QString x;
318 QString dest;
319
320 bool flag = true;
321 int c = source.size() - 1;
322 for (int i = 0; i <= c; i++)
323 {
324 if (source.at(i) == '\"')
325 {
326 if (flag)
327 x = c1;
328 else
329 x = c2;
330
331 flag = ! flag;
332 dest += x;
333 }
334 else
335 dest += source[i];
336 }
337
338 return dest;
339 }
340
341
html_get_by_patt(const QString & s,const QString & spatt)342 QStringList html_get_by_patt (const QString &s, const QString &spatt)
343 {
344 QStringList result;
345
346 int c = s.size();
347 int i = 0;
348
349 while (i < c)
350 {
351 int start = s.indexOf (spatt, i, Qt::CaseInsensitive);
352
353 if (start == -1)
354 break;
355
356 int end = s.indexOf ('"', start + spatt.size());
357 if (end == -1)
358 break;
359
360 result.prepend (s.mid (start + spatt.size(), (end - start) - spatt.size()));
361
362 i = end + 1;
363 }
364
365 return result;
366 }
367
368
anagram(const QString & s)369 QStringList anagram (const QString &s)
370 {
371 QString input = s;
372 QStringList sl;
373
374 sort (input.begin(), input.end());
375
376 do
377 sl.append (input);
378 while (next_permutation (input.begin(), input.end()));
379
380 return sl;
381 }
382
383 /*
384 from:
385
386 * roman.c by Adam Rogoyski (apoc@laker.net) Temperanc on EFNet irc
387 * Copyright (C) 1998 Adam Rogoyski
388 * Converts Decimal numbers to Roman Numerals and Roman Numberals to
389 * Decimals on the command line or in Interactive mode.
390 * Uses an expanded Roman Numeral set to handle numbers up to 999999999
391 */
392
393 #define FROM_ROMAN_I 1
394 #define FROM_ROMAN_V 5
395 #define FROM_ROMAN_X 10
396 #define FROM_ROMAN_L 50
397 #define FROM_ROMAN_C 100
398 #define FROM_ROMAN_D 500
399 #define FROM_ROMAN_M 1000
400 #define FROM_ROMAN_P 5000
401 #define FROM_ROMAN_Q 10000
402 #define FROM_ROMAN_R 50000
403 #define FROM_ROMAN_S 100000
404 #define FROM_ROMAN_T 500000
405 #define FROM_ROMAN_U 1000000
406 #define FROM_ROMAN_B 5000000
407 #define FROM_ROMAN_W 10000000
408 #define FROM_ROMAN_N 50000000
409 #define FROM_ROMAN_Y 100000000
410 #define FROM_ROMAN_Z 500000000
411
value(char c)412 int value (char c)
413 {
414 switch (c)
415 {
416 case 'I':
417 return FROM_ROMAN_I;
418 case 'V':
419 return FROM_ROMAN_V;
420 case 'X':
421 return FROM_ROMAN_X;
422 case 'L':
423 return FROM_ROMAN_L;
424 case 'C':
425 return FROM_ROMAN_C;
426 case 'D':
427 return FROM_ROMAN_D;
428 case 'M':
429 return FROM_ROMAN_M;
430 case 'P':
431 return FROM_ROMAN_P;
432 case 'Q':
433 return FROM_ROMAN_Q;
434 case 'R':
435 return FROM_ROMAN_R;
436 case 'S':
437 return FROM_ROMAN_S;
438 case 'T':
439 return FROM_ROMAN_T;
440 case 'U':
441 return FROM_ROMAN_U;
442 case 'B':
443 return FROM_ROMAN_B;
444 case 'W':
445 return FROM_ROMAN_W;
446 case 'N':
447 return FROM_ROMAN_N;
448 case 'Y':
449 return FROM_ROMAN_Y;
450 case 'Z':
451 return FROM_ROMAN_Z;
452 default:
453 return 0;
454 }
455 }
456
457
romanToDecimal(const char * roman)458 int romanToDecimal (const char *roman)
459 {
460 int decimal = 0;
461 for (; *roman; roman++)
462 {
463 /* Check for four of a letter in a fow */
464 if ((*(roman + 1) && *(roman + 2) && *(roman + 3))
465 && (*roman == *(roman + 1))
466 && (*roman == *(roman + 2))
467 && (*roman == *(roman + 3)))
468 return 0;
469 /* Check for two five type numbers */
470 if ( ((*roman == 'V') && (*(roman + 1) == 'V'))
471 || ((*roman == 'L') && (*(roman + 1) == 'L'))
472 || ((*roman == 'D') && (*(roman + 1) == 'D'))
473 || ((*roman == 'P') && (*(roman + 1) == 'P'))
474 || ((*roman == 'R') && (*(roman + 1) == 'R'))
475 || ((*roman == 'T') && (*(roman + 1) == 'T'))
476 || ((*roman == 'B') && (*(roman + 1) == 'B'))
477 || ((*roman == 'N') && (*(roman + 1) == 'N'))
478 || ((*roman == 'Z') && (*(roman + 1) == 'Z')))
479 return 0;
480 /* Check for two lower characters before a larger one */
481 if ((value(*roman) == value(*(roman + 1))) && (*(roman + 2))
482 && (value(*(roman + 1)) < value(*(roman + 2))))
483 return 0;
484 /* Check for the same character on either side of a larger one */
485 if ((*(roman + 1) && *(roman + 2))
486 && (value(*roman) == value(*(roman + 2)))
487 && (value(*roman) < value(*(roman + 1))))
488 return 0;
489 /* Check for illegal nine type numbers */
490 if (!strncmp(roman, "LXL", 3) || !strncmp(roman, "DCD", 3)
491 || !strncmp(roman, "PMP", 3) || !strncmp(roman, "RQR", 3)
492 || !strncmp(roman, "TST", 3) || !strncmp(roman, "BUB", 3)
493 || !strncmp(roman, "NWN", 3) || !strncmp(roman, "VIV", 3))
494 return 0;
495 if (value(*roman) < value(*(roman + 1)))
496 {
497 /* check that subtracted value is at least 10% larger,
498 i.e. 1990 is not MXM, but MCMXC */
499 if ((10 * value(*roman)) < value(*(roman + 1)))
500 return 0;
501 /* check for double subtraction, i.e. IVX */
502 if (value(*(roman + 1)) <= value(*(roman + 2)))
503 return 0;
504 /* check for subtracting by a number starting with a 5
505 ie. VX, LD LM */
506 if (*roman == 'V' || *roman == 'L' || *roman == 'D'
507 || *roman == 'P' || *roman == 'R' || *roman == 'T'
508 || *roman == 'B' || *roman == 'N')
509 return 0;
510 decimal += value (*(roman + 1)) - value (*roman);
511 roman++;
512 }
513 else
514 {
515 decimal += value (*roman);
516 }
517 }
518 return decimal;
519 }
520
521
522 //this code is taken from Scribus::util.cpp:
arabicToRoman(int i)523 QString arabicToRoman (int i)
524 {
525 QString roman;
526
527 int arabic = i;
528
529 while (arabic - 1000000 >= 0){
530 roman += "m";
531 arabic -= 1000000;
532 }
533 while (arabic - 900000 >= 0){
534 roman += "cm";
535 arabic -= 900000;
536 }
537 while (arabic - 500000 >= 0){
538 roman += "d";
539 arabic -= 500000;
540 }
541 while (arabic - 400000 >= 0){
542 roman += "cd";
543 arabic -= 400000;
544 }
545 while (arabic - 100000 >= 0){
546 roman += "c";
547 arabic -= 100000;
548 }
549 while (arabic - 90000 >= 0){
550 roman += "xc";
551 arabic -= 90000;
552 }
553 while (arabic - 50000 >= 0){
554 roman += "l";
555 arabic -= 50000;
556 }
557 while (arabic - 40000 >= 0){
558 roman += "xl";
559 arabic -= 40000;
560 }
561 while (arabic - 10000 >= 0){
562 roman += "x";
563 arabic -= 10000;
564 }
565 while (arabic - 9000 >= 0){
566 roman += "Mx";
567 arabic -= 9000;
568 }
569 while (arabic - 5000 >= 0){
570 roman += "v";
571 arabic -= 5000;
572 }
573 while (arabic - 4000 >= 0){
574 roman += "Mv";
575 arabic -= 4000;
576 }
577 while (arabic - 1000 >= 0){
578 roman += "M";
579 arabic -= 1000;
580 }
581 while (arabic - 900 >= 0){
582 roman += "CM";
583 arabic -= 900;
584 }
585 while (arabic - 500 >= 0){
586 roman += "D";
587 arabic -= 500;
588 }
589 while (arabic - 400 >= 0){
590 roman += "CD";
591 arabic -= 400;
592 }
593 while (arabic - 100 >= 0){
594 roman += "C";
595 arabic -= 100;
596 }
597 while (arabic - 90 >= 0){
598 roman += "XC";
599 arabic -= 90;
600 }
601 while (arabic - 50 >= 0){
602 roman += "L";
603 arabic -= 50;
604 }
605 while (arabic - 40 >= 0){
606 roman += "XL";
607 arabic -= 40;
608 }
609 while (arabic - 10 >= 0){
610 roman += "X";
611 arabic -= 10;
612 }
613 while (arabic - 9 >= 0){
614 roman += "IX";
615 arabic -= 9;
616 }
617 while (arabic - 5 >= 0){
618 roman += "V";
619 arabic -= 5;
620 }
621 while (arabic - 4 >= 0){
622 roman += "IV";
623 arabic -= 4;
624 }
625 while (arabic - 1 >= 0){
626 roman += "I";
627 arabic -= 1;
628 }
629 return roman;
630 }
631
632
int_to_binary(int n)633 QString int_to_binary (int n)
634 {
635 QString result;
636 int sz = sizeof (n) * 8 - 1;
637
638 for (int i = sz; i > -1; i--)
639 {
640 if (n & (1 << i))
641 result.append ("1");
642 else
643 result.append ("0");
644
645 if (i % 4 == 0)
646 result.append (" ");
647 }
648
649 return result;
650 }
651
652
bin_to_decimal(const QString & s)653 unsigned int bin_to_decimal (const QString &s)
654 {
655 unsigned int table[31];
656 unsigned int c = 1;
657 unsigned int result = 0;
658 QString sn = string_reverse (s);
659
660 table[0] = 1;
661
662 for (int i = 1; i < 31; i++)
663 {
664 c *= 2;
665 table[i] = c;
666 }
667
668 for (int i = 0; i < sn.size(); i++)
669 if (sn[i] == '1')
670 result += table[i];
671
672 return result;
673 }
674
675
str_to_entities(const QString & s)676 QString str_to_entities (const QString &s)
677 {
678 QString t = s;
679 t = t.replace ("&", "&");
680
681 t = t.replace ("\"", """);
682 t = t.replace ("'", "'");
683
684 t = t.replace ("<", "<");
685 t = t.replace (">", ">");
686
687 return t;
688 }
689
690
morse_from_lang(const QString & s,const QString & lang)691 QString morse_from_lang (const QString &s, const QString &lang)
692 {
693 QHash<QString, QString> h = hash_load_keyval (":/text-data/morse-" + lang);
694
695 QString result;
696 QString x = s.toUpper();
697
698 int c = x.size();
699 for (int i = 0; i < c; i++)
700 {
701 QString t = h.value (QString (x[i]));
702 if (! t.isEmpty())
703 result.append (t).append (" ");
704 }
705
706 return result;
707 }
708
709
morse_to_lang(const QString & s,const QString & lang)710 QString morse_to_lang (const QString &s, const QString &lang)
711 {
712 QHash<QString, QString> h = hash_load_keyval (":/text-data/morse-" + lang);
713
714 QStringList sl = s.toUpper().split (" ");
715
716 QString result;
717
718 for (int i = 0; i < sl.size(); i++)
719 {
720 QString t = h.key (sl[i]);
721 if (! t.isEmpty())
722 result.append (t);
723 }
724
725 return result;
726 }
727
728
729 //from http://www.cyberforum.ru/cpp-beginners/thread125615.html
get_arab_num(std::string rom_str)730 int get_arab_num (std::string rom_str)
731 {
732 int res = 0;
733
734 for (size_t i = 0; i < rom_str.length(); ++i)
735 {
736 switch (rom_str[i])
737 {
738 case 'M':
739 res += 1000;
740 break;
741 case 'D':
742 res += 500;
743 break;
744 case 'C':
745 i + 1 < rom_str.length() && (rom_str[i + 1] == 'D'
746 || rom_str[i + 1] == 'M') ? res -= 100 : res += 100;
747 break;
748 case 'L':
749 res += 50;
750 break;
751 case 'X':
752 i + 1 < rom_str.length()
753 && (rom_str[i + 1] == 'L'
754 || rom_str[i + 1] == 'C') ? res -= 10 : res += 10;
755 break;
756 case 'V':
757 res += 5;
758 break;
759 case 'I':
760 i + 1 < rom_str.length()
761 && (rom_str[i + 1] == 'V'
762 || rom_str[i + 1] == 'X') ? res -= 1 : res += 1;
763 break;
764
765 }//switch
766 }//for
767
768 return res;
769 }
770