1 /***************************************************************************
2  *   2007-2021 by Peter Semiletov                                          *
3  *   peter.semiletov@gmail.com                                             *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 3 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 
21 
22 /***************************************************************************
23 some code is taken from Scribus::util.cpp:
24                              -------------------
25     begin                : Fri Sep 14 2001
26     copyright            : (C) 2001 by Franz Schmid
27     email                : Franz.Schmid@altmuehlnet.de
28  ***************************************************************************/
29 /* roman.c by Adam Rogoyski (apoc@laker.net) Temperanc on EFNet irc
30  * Copyright (C) 1998 Adam Rogoyski
31  * Converts Decimal numbers to Roman Numerals and Roman Numberals to
32  * Decimals on the command line or in Interactive mode.
33  * Uses an expanded Roman Numeral set to handle numbers up to 999999999
34 */
35 
36 
37 #include <algorithm>
38 
39 #include <QString>
40 #include <QMap>
41 #include <QDebug>
42 
43 
44 #if QT_VERSION < 0x050000
45 #include <QRegExp>
46 #else
47 #include <QRegularExpression>
48 #endif
49 
50 
51 
52 #include "textproc.h"
53 #include "utils.h"
54 
55 using namespace std;
56 
57 
58 
qstring_length_less_than(const QString & v1,const QString & v2)59 bool qstring_length_less_than (const QString& v1, const QString& v2)
60 {
61    return v1.length() < v2.length();
62 }
63 
64 /*
65 QString int_to_binary (int n)
66 {
67   std::bitset<sizeof (int)> bt (n);
68   return QString::fromStdString (bt.to_string<char,std::string::traits_type,std::string::allocator_type>());
69 }
70 */
71 
72 
str_fuzzy_search(const QString & s,const QString & text_to_find,int start_pos,double q)73 int str_fuzzy_search (const QString &s, const QString &text_to_find, int start_pos, double q)
74 {
75   if (s.isEmpty() || text_to_find.isEmpty())
76      return -1;
77 
78   int counter;
79   int result = -1;
80 
81   bool jump = false;
82 
83   int end_pos = s.length() - 1;
84 
85   for (int i = start_pos; i < end_pos; i++)
86       {
87        if (jump)
88           break;
89 
90        counter = 0;
91        for (int j = 0; j < text_to_find.length(); j++)
92            {
93             if (s[i + j] == text_to_find[j])
94                counter++;
95 
96             if (get_percent ((double)text_to_find.length(), (double)counter) >= q)
97                {
98                 result = i;
99                 jump = true;
100                 break;
101                }
102            }
103       }
104 
105   return result;
106 }
107 
108 
apply_table(const QString & s,const QString & fname,bool use_regexp)109 QString apply_table (const QString &s, const QString &fname, bool use_regexp)
110 {
111   QHash<QString, QString> h = hash_load_keyval (fname);
112 
113   QString result = s;
114 
115   for (int i = 0; i < h.size(); i++)
116       {
117        QString key = h.keys()[i];
118 
119        if (use_regexp)
120 #if QT_VERSION < 0x050000
121            result.replace (QRegExp (key), h.value (key));
122        else
123 #else
124            result.replace (QRegularExpression (key), h.value (key));
125        else
126 #endif
127            result.replace (key, h.value (key));
128       }
129 
130   return result;
131 }
132 
133 
strip_html(const QString & source)134 QString strip_html (const QString &source)
135 {
136   bool do_copy = true;
137   QString dest;
138 
139   for (int i = 0; i < source.length(); i++)
140       {
141        if (source[i] == '<')
142           do_copy = false;
143        else
144        if (source[i] == '>')
145           {
146            do_copy = true;
147            if (i < source.length() - 1)
148               i++;
149            else
150                break;
151           }
152 
153        if (do_copy)
154           dest += source[i];
155       }
156 
157   return dest;
158 }
159 
160 
qstringlist_process(const QString & s,const QString & params,int mode)161 QString qstringlist_process (const QString &s, const QString &params, int mode)
162 {
163   QStringList sl;
164   QStringList l;
165   QString result;
166 
167   if (mode != QSTRL_PROC_FLT_WITH_SORTCASECARE_SEP && mode != QSTRL_PROC_LIST_FLIP_SEP)
168       sl = s.split (QChar::ParagraphSeparator);
169 
170   switch (mode)
171          {
172           case QSTRL_PROC_FLT_WITH_SORTCASECARE_SEP:
173                                                     {
174                                                      if (s.indexOf (params) == -1)
175                                                         return s;
176 
177                                                      QStringList t = s.split (params);
178                                                      t.sort();
179                                                      result = t.join (params);
180                                                      return result;
181                                                     };
182 
183           case QSTRL_PROC_LIST_FLIP_SEP:  {
184                                            if (s.indexOf (params) == -1)
185                                               return s;
186 
187                                            QStringList t = s.split (params);
188                                            t.sort();
189 
190                                            for (int i = 0; i < t.size(); i++)
191                                                 l.prepend (t.at(i));
192 
193                                            result = l.join (params);
194                                            return result;
195                                           };
196 
197 
198           case QSTRL_PROC_FLT_WITH_SORTNOCASECARE:
199                                                  {
200                                                   QMap <QString, QString> map;
201 
202                                                   for (int i = 0; i < sl.size(); i++)
203                                                       map.insert (sl[i].toLower(), sl[i]);
204 
205                                                   for (QMap<QString, QString>::const_iterator i = map.constBegin();
206                                                        i != map.constEnd();
207                                                        ++i)
208                                                        l.append (i.value());
209 
210                                                   break;
211                                                  }
212 
213           case QSTRL_PROC_FLT_WITH_SORTLEN:
214                                                  {
215                                                   l = sl;
216                                                   std::sort (l.begin(), l.end(), qstring_length_less_than);
217                                                   break;
218                                                  }
219 
220 
221           case QSTRL_PROC_FLT_REMOVE_EMPTY:
222                                            {
223                                             for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
224                                                 if (! i->isEmpty())
225                                                    l.append (*i);
226 
227                                             break;
228                                            };
229 
230 
231           case QSTRL_PROC_FLT_REMOVE_DUPS:
232                                           {
233                                            for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
234                                                if (! l.contains (*i))
235                                                   l.append (*i);
236 
237                                            break;
238                                           };
239 
240           case QSTRL_PROC_REMOVE_FORMATTING:
241                                            {
242                                             for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
243                                                  l.append (i->simplified());
244 
245                                             break;
246                                            };
247 
248            case QSTRL_PROC_FLT_WITH_REGEXP:
249                                           {
250 #if QT_VERSION < 0x050000
251                                            l = sl.filter (QRegExp (params));
252 #else
253                                            l = sl.filter (QRegularExpression (params));
254 #endif
255                                            break;
256                                           }
257 
258            case QSTRL_PROC_FLT_WITH_SORTCASECARE:
259                                                  {
260                                                   l = sl;
261                                                   l.sort();
262                                                   break;
263                                                  }
264 
265            case QSTRL_PROC_LIST_FLIP:
266                                      {
267                                       for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
268                                            l.prepend (*i);
269 
270                                       break;
271                                      }
272 
273            case QSTRL_PROC_FLT_LESS:
274                                     {
275                                      int t = params.toInt();
276 
277                                      for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
278                                           if (i->size() > t)
279                                               l.append (*i);
280 
281                                      break;
282                                     }
283 
284            case QSTRL_PROC_FLT_GREATER:
285                                     {
286                                      int t = params.toInt();
287 
288                                      for (QList <QString>::iterator i = sl.begin(); i != sl.end(); ++i)
289                                           if (i->size() < t)
290                                               l.append (*i);
291 
292                                      break;
293                                     }
294          }
295 
296   result = l.join ("\n");
297   return result;
298 }
299 
300 
string_reverse(const QString & s)301 QString string_reverse (const QString &s)
302 {
303   QString sn;
304 
305   int c = s.length() - 1;
306   int x = 0;
307 
308   for (int i = c; i > -1; i--)
309        sn[x++] = s.at(i);
310 
311  return sn;
312 }
313 
314 
conv_quotes(const QString & source,const QString & c1,const QString & c2)315 QString conv_quotes (const QString &source, const QString &c1, const QString &c2)
316 {
317   QString x;
318   QString dest;
319 
320   bool flag = true;
321   int c = source.size() - 1;
322   for (int i = 0; i <= c; i++)
323       {
324        if (source.at(i) == '\"')
325           {
326            if (flag)
327               x = c1;
328            else
329                x = c2;
330 
331            flag = ! flag;
332            dest += x;
333           }
334        else
335            dest += source[i];
336       }
337 
338   return dest;
339 }
340 
341 
html_get_by_patt(const QString & s,const QString & spatt)342 QStringList html_get_by_patt (const QString &s, const QString &spatt)
343 {
344   QStringList result;
345 
346   int c = s.size();
347   int i = 0;
348 
349   while (i < c)
350         {
351          int start = s.indexOf (spatt, i, Qt::CaseInsensitive);
352 
353          if (start == -1)
354              break;
355 
356          int end = s.indexOf ('"', start + spatt.size());
357          if (end == -1)
358              break;
359 
360          result.prepend (s.mid (start + spatt.size(), (end - start) - spatt.size()));
361 
362          i = end + 1;
363         }
364 
365   return result;
366 }
367 
368 
anagram(const QString & s)369 QStringList anagram (const QString &s)
370 {
371   QString input = s;
372   QStringList sl;
373 
374   sort (input.begin(), input.end());
375 
376   do
377     sl.append (input);
378   while (next_permutation (input.begin(), input.end()));
379 
380   return sl;
381 }
382 
383 /*
384 from:
385 
386 * roman.c by Adam Rogoyski (apoc@laker.net) Temperanc on EFNet irc
387  * Copyright (C) 1998 Adam Rogoyski
388  * Converts Decimal numbers to Roman Numerals and Roman Numberals to
389  * Decimals on the command line or in Interactive mode.
390  * Uses an expanded Roman Numeral set to handle numbers up to 999999999
391 */
392 
393 #define FROM_ROMAN_I 1
394 #define FROM_ROMAN_V 5
395 #define FROM_ROMAN_X 10
396 #define FROM_ROMAN_L 50
397 #define FROM_ROMAN_C 100
398 #define FROM_ROMAN_D 500
399 #define FROM_ROMAN_M 1000
400 #define FROM_ROMAN_P 5000
401 #define FROM_ROMAN_Q 10000
402 #define FROM_ROMAN_R 50000
403 #define FROM_ROMAN_S 100000
404 #define FROM_ROMAN_T 500000
405 #define FROM_ROMAN_U 1000000
406 #define FROM_ROMAN_B 5000000
407 #define FROM_ROMAN_W 10000000
408 #define FROM_ROMAN_N 50000000
409 #define FROM_ROMAN_Y 100000000
410 #define FROM_ROMAN_Z 500000000
411 
value(char c)412 int value (char c)
413 {
414   switch (c)
415    {
416       case 'I':
417          return FROM_ROMAN_I;
418       case 'V':
419          return FROM_ROMAN_V;
420       case 'X':
421          return FROM_ROMAN_X;
422       case 'L':
423          return FROM_ROMAN_L;
424       case 'C':
425          return FROM_ROMAN_C;
426       case 'D':
427          return FROM_ROMAN_D;
428       case 'M':
429          return FROM_ROMAN_M;
430       case 'P':
431          return FROM_ROMAN_P;
432       case 'Q':
433          return FROM_ROMAN_Q;
434       case 'R':
435          return FROM_ROMAN_R;
436       case 'S':
437          return FROM_ROMAN_S;
438       case 'T':
439          return FROM_ROMAN_T;
440       case 'U':
441          return FROM_ROMAN_U;
442       case 'B':
443          return FROM_ROMAN_B;
444       case 'W':
445          return FROM_ROMAN_W;
446       case 'N':
447          return FROM_ROMAN_N;
448       case 'Y':
449          return FROM_ROMAN_Y;
450       case 'Z':
451          return FROM_ROMAN_Z;
452       default:
453          return 0;
454    }
455 }
456 
457 
romanToDecimal(const char * roman)458 int romanToDecimal (const char *roman)
459 {
460   int decimal = 0;
461   for (; *roman; roman++)
462       {
463       /* Check for four of a letter in a fow */
464       if ((*(roman + 1) && *(roman + 2) && *(roman + 3))
465          && (*roman == *(roman + 1))
466          && (*roman == *(roman + 2))
467          && (*roman == *(roman + 3)))
468          return 0;
469       /* Check for two five type numbers */
470       if (  ((*roman == 'V') && (*(roman + 1) == 'V'))
471          || ((*roman == 'L') && (*(roman + 1) == 'L'))
472          || ((*roman == 'D') && (*(roman + 1) == 'D'))
473          || ((*roman == 'P') && (*(roman + 1) == 'P'))
474          || ((*roman == 'R') && (*(roman + 1) == 'R'))
475          || ((*roman == 'T') && (*(roman + 1) == 'T'))
476          || ((*roman == 'B') && (*(roman + 1) == 'B'))
477          || ((*roman == 'N') && (*(roman + 1) == 'N'))
478          || ((*roman == 'Z') && (*(roman + 1) == 'Z')))
479          return 0;
480       /* Check for two lower characters before a larger one */
481       if ((value(*roman) == value(*(roman + 1))) && (*(roman + 2))
482          && (value(*(roman + 1)) < value(*(roman + 2))))
483          return 0;
484       /* Check for the same character on either side of a larger one */
485       if ((*(roman + 1) && *(roman + 2))
486          && (value(*roman) == value(*(roman + 2)))
487          && (value(*roman) < value(*(roman + 1))))
488          return 0;
489       /* Check for illegal nine type numbers */
490       if (!strncmp(roman, "LXL", 3) || !strncmp(roman, "DCD", 3)
491        || !strncmp(roman, "PMP", 3) || !strncmp(roman, "RQR", 3)
492        || !strncmp(roman, "TST", 3) || !strncmp(roman, "BUB", 3)
493        || !strncmp(roman, "NWN", 3) || !strncmp(roman, "VIV", 3))
494          return 0;
495       if (value(*roman) < value(*(roman + 1)))
496       {
497          /* check that subtracted value is at least 10% larger,
498             i.e. 1990 is not MXM, but MCMXC */
499          if ((10 * value(*roman)) < value(*(roman + 1)))
500             return 0;
501          /* check for double subtraction, i.e. IVX */
502          if (value(*(roman + 1)) <= value(*(roman + 2)))
503             return 0;
504          /* check for subtracting by a number starting with a 5
505             ie.  VX, LD LM */
506          if (*roman == 'V' || *roman == 'L' || *roman == 'D'
507           || *roman == 'P' || *roman == 'R' || *roman == 'T'
508           || *roman == 'B' || *roman == 'N')
509             return 0;
510          decimal += value (*(roman + 1)) - value (*roman);
511          roman++;
512       }
513       else
514       {
515          decimal += value (*roman);
516       }
517    }
518    return decimal;
519 }
520 
521 
522 //this code is taken from Scribus::util.cpp:
arabicToRoman(int i)523 QString arabicToRoman (int i)
524 {
525   QString roman;
526 
527   int arabic = i;
528 
529   while (arabic - 1000000 >= 0){
530   roman += "m";
531   arabic -= 1000000;
532   }
533   while (arabic - 900000 >= 0){
534   roman += "cm";
535   arabic -= 900000;
536   }
537   while (arabic - 500000 >= 0){
538   roman += "d";
539   arabic -= 500000;
540   }
541   while (arabic - 400000 >= 0){
542   roman += "cd";
543   arabic -= 400000;
544   }
545   while (arabic - 100000 >= 0){
546   roman += "c";
547   arabic -= 100000;
548   }
549   while (arabic - 90000 >= 0){
550   roman += "xc";
551   arabic -= 90000;
552   }
553   while (arabic - 50000 >= 0){
554   roman += "l";
555   arabic -= 50000;
556   }
557   while (arabic - 40000 >= 0){
558   roman += "xl";
559   arabic -= 40000;
560   }
561   while (arabic - 10000 >= 0){
562   roman += "x";
563   arabic -= 10000;
564   }
565   while (arabic - 9000 >= 0){
566   roman += "Mx";
567   arabic -= 9000;
568   }
569   while (arabic - 5000 >= 0){
570   roman += "v";
571   arabic -= 5000;
572   }
573   while (arabic - 4000 >= 0){
574   roman += "Mv";
575   arabic -= 4000;
576   }
577   while (arabic - 1000 >= 0){
578   roman += "M";
579   arabic -= 1000;
580   }
581   while (arabic - 900 >= 0){
582   roman += "CM";
583   arabic -= 900;
584   }
585   while (arabic - 500 >= 0){
586   roman += "D";
587   arabic -= 500;
588   }
589   while (arabic - 400 >= 0){
590   roman += "CD";
591   arabic -= 400;
592   }
593   while (arabic - 100 >= 0){
594   roman += "C";
595   arabic -= 100;
596   }
597   while (arabic - 90 >= 0){
598   roman += "XC";
599   arabic -= 90;
600   }
601   while (arabic - 50 >= 0){
602   roman += "L";
603   arabic -= 50;
604   }
605   while (arabic - 40 >= 0){
606   roman += "XL";
607   arabic -= 40;
608   }
609   while (arabic - 10 >= 0){
610   roman += "X";
611   arabic -= 10;
612   }
613   while (arabic - 9 >= 0){
614   roman += "IX";
615   arabic -= 9;
616   }
617   while (arabic - 5 >= 0){
618   roman += "V";
619   arabic -= 5;
620   }
621   while (arabic - 4 >= 0){
622   roman += "IV";
623   arabic -= 4;
624   }
625   while (arabic - 1 >= 0){
626   roman += "I";
627   arabic -= 1;
628   }
629   return roman;
630 }
631 
632 
int_to_binary(int n)633 QString int_to_binary (int n)
634 {
635   QString result;
636   int sz = sizeof (n) * 8 - 1;
637 
638   for (int i = sz; i > -1; i--)
639       {
640        if (n & (1 << i))
641            result.append ("1");
642        else
643            result.append ("0");
644 
645        if (i % 4 == 0)
646           result.append (" ");
647       }
648 
649   return result;
650 }
651 
652 
bin_to_decimal(const QString & s)653 unsigned int bin_to_decimal (const QString &s)
654 {
655   unsigned int table[31];
656   unsigned int c = 1;
657   unsigned int result = 0;
658   QString sn = string_reverse (s);
659 
660   table[0] = 1;
661 
662   for (int i = 1; i < 31; i++)
663       {
664        c *= 2;
665        table[i] = c;
666       }
667 
668   for (int i = 0; i < sn.size(); i++)
669       if (sn[i] == '1')
670          result += table[i];
671 
672   return result;
673 }
674 
675 
str_to_entities(const QString & s)676 QString str_to_entities (const QString &s)
677 {
678   QString t = s;
679   t = t.replace ("&", "&amp;");
680 
681   t = t.replace ("\"", "&quot;");
682   t = t.replace ("'", "&apos;");
683 
684   t = t.replace ("<", "&lt;");
685   t = t.replace (">", "&gt;");
686 
687   return t;
688 }
689 
690 
morse_from_lang(const QString & s,const QString & lang)691 QString morse_from_lang (const QString &s, const QString &lang)
692 {
693   QHash<QString, QString> h = hash_load_keyval (":/text-data/morse-" + lang);
694 
695   QString result;
696   QString x = s.toUpper();
697 
698   int c = x.size();
699   for (int i = 0; i < c; i++)
700       {
701        QString t = h.value (QString (x[i]));
702        if (! t.isEmpty())
703           result.append (t).append (" ");
704       }
705 
706   return result;
707 }
708 
709 
morse_to_lang(const QString & s,const QString & lang)710 QString morse_to_lang (const QString &s, const QString &lang)
711 {
712   QHash<QString, QString> h = hash_load_keyval (":/text-data/morse-" + lang);
713 
714   QStringList sl = s.toUpper().split (" ");
715 
716   QString result;
717 
718   for (int i = 0; i < sl.size(); i++)
719       {
720        QString t = h.key (sl[i]);
721        if (! t.isEmpty())
722           result.append (t);
723       }
724 
725   return result;
726 }
727 
728 
729 //from http://www.cyberforum.ru/cpp-beginners/thread125615.html
get_arab_num(std::string rom_str)730 int get_arab_num (std::string rom_str)
731 {
732   int res = 0;
733 
734   for (size_t i = 0; i < rom_str.length(); ++i)
735       {
736        switch (rom_str[i])
737               {
738                case 'M':
739                         res += 1000;
740                         break;
741                case 'D':
742                         res += 500;
743                         break;
744                case 'C':
745                         i + 1 < rom_str.length() && (rom_str[i + 1] == 'D'
746                         || rom_str[i + 1] == 'M') ? res -= 100 : res += 100;
747                         break;
748                case 'L':
749                         res += 50;
750                         break;
751                case 'X':
752                         i + 1 < rom_str.length()
753                         &&  (rom_str[i + 1] == 'L'
754                         || rom_str[i + 1] == 'C') ? res -= 10 : res += 10;
755                         break;
756                case 'V':
757                         res += 5;
758                         break;
759                case 'I':
760                         i + 1 < rom_str.length()
761                         &&  (rom_str[i + 1] == 'V'
762                         || rom_str[i + 1] == 'X') ? res -= 1 : res += 1;
763                         break;
764 
765                 }//switch
766        }//for
767 
768   return res;
769 }
770