1 /*
2  * Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU  General Public License as published by
6  * the Free Software Foundation; either version 3 of the License, or
7  * any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17  * MA 02110-1301, USA.
18  */
19 
20 #include "reader.h"
21 #include <time.h>
22 
23 #include <zim/search.h>
24 
25 #include "tools/otherTools.h"
26 
hi(char v)27 inline char hi(char v)
28 {
29   char hex[] = "0123456789abcdef";
30   return hex[(v >> 4) & 0xf];
31 }
32 
lo(char v)33 inline char lo(char v)
34 {
35   char hex[] = "0123456789abcdef";
36   return hex[v & 0xf];
37 }
38 
hexUUID(std::string in)39 std::string hexUUID(std::string in)
40 {
41   std::ostringstream out;
42   for (unsigned n = 0; n < 4; ++n) {
43     out << hi(in[n]) << lo(in[n]);
44   }
45   out << '-';
46   for (unsigned n = 4; n < 6; ++n) {
47     out << hi(in[n]) << lo(in[n]);
48   }
49   out << '-';
50   for (unsigned n = 6; n < 8; ++n) {
51     out << hi(in[n]) << lo(in[n]);
52   }
53   out << '-';
54   for (unsigned n = 8; n < 10; ++n) {
55     out << hi(in[n]) << lo(in[n]);
56   }
57   out << '-';
58   for (unsigned n = 10; n < 16; ++n) {
59     out << hi(in[n]) << lo(in[n]);
60   }
61   std::string op = out.str();
62   return op;
63 }
64 
65 namespace kiwix
66 {
67 /* Constructor */
Reader(const string zimFilePath)68 Reader::Reader(const string zimFilePath) : zimFileHandler(NULL)
69 {
70   string tmpZimFilePath = zimFilePath;
71 
72   /* Remove potential trailing zimaa */
73   size_t found = tmpZimFilePath.rfind("zimaa");
74   if (found != string::npos && tmpZimFilePath.size() > 5
75       && found == tmpZimFilePath.size() - 5) {
76     tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
77   }
78 
79   this->zimFileHandler = new zim::File(tmpZimFilePath);
80 
81   if (this->zimFileHandler != NULL) {
82     this->firstArticleOffset
83         = this->zimFileHandler->getNamespaceBeginOffset('A');
84     this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
85     this->nsACount = this->zimFileHandler->getNamespaceCount('A');
86     this->nsICount = this->zimFileHandler->getNamespaceCount('I');
87     this->zimFilePath = zimFilePath;
88   }
89 
90   /* initialize random seed: */
91   srand(time(NULL));
92 }
93 
94 /* Destructor */
~Reader()95 Reader::~Reader()
96 {
97   if (this->zimFileHandler != NULL) {
98     delete this->zimFileHandler;
99   }
100 }
101 
getZimFileHandler() const102 zim::File* Reader::getZimFileHandler() const
103 {
104   return this->zimFileHandler;
105 }
106 
parseCounterMetadata() const107 MimeCounterType Reader::parseCounterMetadata() const
108 {
109   zim::Article article = this->zimFileHandler->getArticle('M', "Counter");
110 
111   if (article.good()) {
112     return parseMimetypeCounter(article.getData());
113   }
114 
115   return MimeCounterType();
116 }
117 
118 /* Get the count of articles which can be indexed/displayed */
getArticleCount() const119 unsigned int Reader::getArticleCount() const
120 {
121   std::map<const std::string, unsigned int> counterMap
122       = this->parseCounterMetadata();
123   unsigned int counter = 0;
124 
125   if (counterMap.empty()) {
126     counter = this->nsACount;
127   } else {
128     for(auto &pair:counterMap) {
129       if (startsWith(pair.first, "text/html")) {
130         counter += pair.second;
131       }
132     }
133   }
134 
135   return counter;
136 }
137 
138 /* Get the count of medias content in the ZIM file */
getMediaCount() const139 unsigned int Reader::getMediaCount() const
140 {
141   std::map<const std::string, unsigned int> counterMap
142       = this->parseCounterMetadata();
143   unsigned int counter = 0;
144 
145   if (counterMap.empty()) {
146     counter = this->nsICount;
147   } else {
148     auto it = counterMap.find("image/jpeg");
149     if (it != counterMap.end()) {
150       counter += it->second;
151     }
152 
153     it = counterMap.find("image/gif");
154     if (it != counterMap.end()) {
155       counter += it->second;
156     }
157 
158     it = counterMap.find("image/png");
159     if (it != counterMap.end()) {
160       counter += it->second;
161     }
162   }
163   return counter;
164 }
165 
166 /* Get the total of all items of a ZIM file, redirects included */
getGlobalCount() const167 unsigned int Reader::getGlobalCount() const
168 {
169   return this->zimFileHandler->getCountArticles();
170 }
171 
172 /* Return the UID of the ZIM file */
getId() const173 string Reader::getId() const
174 {
175   std::ostringstream s;
176   s << this->zimFileHandler->getFileheader().getUuid();
177   return s.str();
178 }
179 
180 /* Return a page url from a title */
getPageUrlFromTitle(const string & title,string & url) const181 bool Reader::getPageUrlFromTitle(const string& title, string& url) const
182 {
183   try {
184     auto entry = getEntryFromTitle(title);
185     entry = entry.getFinalEntry();
186     url = entry.getPath();
187     return true;
188   } catch (NoEntry& e) {
189     return false;
190   }
191 }
192 
193 /* Return an URL from a title */
getRandomPageUrl() const194 string Reader::getRandomPageUrl() const
195 {
196   return getRandomPage().getPath();
197 }
198 
getRandomPage() const199 Entry Reader::getRandomPage() const
200 {
201   if (!this->zimFileHandler) {
202     throw NoEntry();
203   }
204 
205   zim::Article article;
206   std::string mainPagePath = this->getMainPage().getPath();
207   int watchdog = 42;
208 
209   do {
210     auto idx = this->firstArticleOffset
211           + (zim::size_type)((double)rand() / ((double)RAND_MAX + 1)
212                              * this->nsACount);
213     article = zimFileHandler->getArticle(idx);
214     if (!watchdog--) {
215       throw NoEntry();
216     }
217   } while (!article.good() && article.getLongUrl() == mainPagePath);
218 
219   return article;
220 }
221 
222 /* Return the welcome page URL */
getMainPageUrl() const223 string Reader::getMainPageUrl() const
224 {
225   return getMainPage().getPath();
226 }
227 
getMainPage() const228 Entry Reader::getMainPage() const
229 {
230   if (!this->zimFileHandler) {
231     throw NoEntry();
232   }
233 
234   zim::Article article;
235   if (this->zimFileHandler->getFileheader().hasMainPage())
236   {
237     article = zimFileHandler->getArticle(
238         this->zimFileHandler->getFileheader().getMainPage());
239   }
240 
241   if (!article.good())
242   {
243     return getFirstPage();
244   }
245 
246   return article;
247 }
248 
getFavicon(string & content,string & mimeType) const249 bool Reader::getFavicon(string& content, string& mimeType) const
250 {
251   static const char* const paths[] = {"-/favicon", "-/favicon.png", "I/favicon.png", "I/favicon"};
252 
253   for (auto &path: paths) {
254     try {
255       auto entry = getEntryFromPath(path);
256       entry = entry.getFinalEntry();
257       content = entry.getContent();
258       mimeType = entry.getMimetype();
259       return true;
260     } catch(NoEntry& e) {};
261   }
262 
263   return false;
264 }
265 
getZimFilePath() const266 string Reader::getZimFilePath() const
267 {
268   return this->zimFilePath;
269 }
270 /* Return a metatag value */
getMetadata(const string & name,string & value) const271 bool Reader::getMetadata(const string& name, string& value) const
272 {
273   try {
274     auto entry = getEntryFromPath("M/"+name);
275     value = entry.getContent();
276     return true;
277   } catch(NoEntry& e) {
278     return false;
279   }
280 }
281 
282 #define METADATA(NAME) std::string v; getMetadata(NAME, v); return v;
283 
getName() const284 string Reader::getName() const
285 {
286   METADATA("Name")
287 }
288 
getTitle() const289 string Reader::getTitle() const
290 {
291   string value;
292   this->getMetadata("Title", value);
293   if (value.empty()) {
294     value = getLastPathElement(zimFileHandler->getFilename());
295     std::replace(value.begin(), value.end(), '_', ' ');
296     size_t pos = value.find(".zim");
297     value = value.substr(0, pos);
298   }
299   return value;
300 }
301 
getCreator() const302 string Reader::getCreator() const
303 {
304   METADATA("Creator")
305 }
306 
getPublisher() const307 string Reader::getPublisher() const
308 {
309   METADATA("Publisher")
310 }
311 
getDate() const312 string Reader::getDate() const
313 {
314   METADATA("Date")
315 }
316 
getDescription() const317 string Reader::getDescription() const
318 {
319   string value;
320   this->getMetadata("Description", value);
321 
322   /* Mediawiki Collection tends to use the "Subtitle" name */
323   if (value.empty()) {
324     this->getMetadata("Subtitle", value);
325   }
326 
327   return value;
328 }
329 
getLongDescription() const330 string Reader::getLongDescription() const
331 {
332   METADATA("LongDescription")
333 }
334 
getLanguage() const335 string Reader::getLanguage() const
336 {
337   METADATA("Language")
338 }
339 
getLicense() const340 string Reader::getLicense() const
341 {
342   METADATA("License")
343 }
344 
getTags(bool original) const345 string Reader::getTags(bool original) const
346 {
347   string tags_str;
348   getMetadata("Tags", tags_str);
349   if (original) {
350     return tags_str;
351   }
352   auto tags = convertTags(tags_str);
353   return join(tags, ";");
354 }
355 
356 
getTagStr(const std::string & tagName) const357 string Reader::getTagStr(const std::string& tagName) const
358 {
359   string tags_str;
360   getMetadata("Tags", tags_str);
361   return getTagValueFromTagList(convertTags(tags_str), tagName);
362 }
363 
getTagBool(const std::string & tagName) const364 bool Reader::getTagBool(const std::string& tagName) const
365 {
366   return convertStrToBool(getTagStr(tagName));
367 }
368 
getRelation() const369 string Reader::getRelation() const
370 {
371   METADATA("Relation")
372 }
373 
getFlavour() const374 string Reader::getFlavour() const
375 {
376   METADATA("Flavour")
377 }
378 
getSource() const379 string Reader::getSource() const
380 {
381   METADATA("Source")
382 }
383 
getScraper() const384 string Reader::getScraper() const
385 {
386   METADATA("Scraper")
387 }
388 #undef METADATA
389 
getOrigId() const390 string Reader::getOrigId() const
391 {
392   string value;
393   this->getMetadata("startfileuid", value);
394   if (value.empty()) {
395     return "";
396   }
397   std::string id = value;
398   std::string origID;
399   std::string temp = "";
400   unsigned int k = 0;
401   char tempArray[16] = "";
402   for (unsigned int i = 0; i < id.size(); i++) {
403     if (id[i] == '\n') {
404       tempArray[k] = atoi(temp.c_str());
405       temp = "";
406       k++;
407     } else {
408       temp += id[i];
409     }
410   }
411   origID = hexUUID(tempArray);
412   return origID;
413 }
414 
415 /* Return the first page URL */
getFirstPageUrl() const416 string Reader::getFirstPageUrl() const
417 {
418   return getFirstPage().getPath();
419 }
420 
getFirstPage() const421 Entry Reader::getFirstPage() const
422 {
423   if (!this->zimFileHandler) {
424     throw NoEntry();
425   }
426 
427   auto firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
428   auto article = zimFileHandler->getArticle(firstPageOffset);
429 
430   if (! article.good()) {
431     throw NoEntry();
432   }
433 
434   return article;
435 }
436 
_parseUrl(const string & url,char * ns,string & title)437 bool _parseUrl(const string& url, char* ns, string& title)
438 {
439   /* Offset to visit the url */
440   unsigned int urlLength = url.size();
441   unsigned int offset = 0;
442 
443   /* Ignore the first '/' */
444   if (url[offset] == '/')
445     offset++;
446 
447   if (url[offset] == '/' || offset >= urlLength)
448     return false;
449 
450   /* Get namespace */
451   *ns = url[offset++];
452 
453   if (url[offset] != '/' || offset >= urlLength)
454     return false;
455 
456   offset++;
457 
458   if ( offset >= urlLength)
459     return false;
460 
461   /* Get content title */
462   title = url.substr(offset, urlLength - offset);
463 
464   return true;
465 }
466 
parseUrl(const string & url,char * ns,string & title) const467 bool Reader::parseUrl(const string& url, char* ns, string& title) const
468 {
469   return _parseUrl(url, ns, title);
470 }
471 
getEntryFromPath(const std::string & path) const472 Entry Reader::getEntryFromPath(const std::string& path) const
473 {
474   char ns = 0;
475   std::string short_url;
476 
477   if (!this->zimFileHandler) {
478     throw NoEntry();
479   }
480   _parseUrl(path, &ns, short_url);
481 
482   if (short_url.empty() && ns == 0) {
483     return getMainPage();
484   }
485 
486   auto article = zimFileHandler->getArticle(ns, short_url);
487   if (!article.good()) {
488     throw NoEntry();
489   }
490 
491   return article;
492 }
493 
getEntryFromEncodedPath(const std::string & path) const494 Entry Reader::getEntryFromEncodedPath(const std::string& path) const
495 {
496   return getEntryFromPath(urlDecode(path, true));
497 }
498 
getEntryFromTitle(const std::string & title) const499 Entry Reader::getEntryFromTitle(const std::string& title) const
500 {
501   if (!this->zimFileHandler) {
502     throw NoEntry();
503   }
504 
505   auto article = this->zimFileHandler->getArticleByTitle('A', title);
506   if (!article.good()) {
507     throw NoEntry();
508   }
509 
510   return article;
511 }
512 
513 /* Return article by url */
getArticleObjectByDecodedUrl(const string & url,zim::Article & article) const514 bool Reader::getArticleObjectByDecodedUrl(const string& url,
515                                           zim::Article& article) const
516 {
517   if (this->zimFileHandler == NULL) {
518     return false;
519   }
520 
521   /* Parse the url */
522   char ns = 0;
523   string urlStr;
524   _parseUrl(url, &ns, urlStr);
525 
526   /* Main page */
527   if (urlStr.empty() && ns == 0) {
528     _parseUrl(this->getMainPage().getPath(), &ns, urlStr);
529   }
530 
531   /* Extract the content from the zim file */
532   article = zimFileHandler->getArticle(ns, urlStr);
533   return article.good();
534 }
535 
536 /* Return the mimeType without the content */
getMimeTypeByUrl(const string & url,string & mimeType) const537 bool Reader::getMimeTypeByUrl(const string& url, string& mimeType) const
538 {
539   try {
540     auto entry = getEntryFromPath(url);
541     mimeType = entry.getMimetype();
542     return true;
543   } catch (NoEntry& e) {
544     mimeType = "";
545     return false;
546   }
547 }
548 
get_content_by_decoded_url(const Reader & reader,const string & url,string & content,string & title,unsigned int & contentLength,string & contentType,string & baseUrl)549 bool get_content_by_decoded_url(const Reader& reader,
550                                 const string& url,
551                                 string& content,
552                                 string& title,
553                                 unsigned int& contentLength,
554                                 string& contentType,
555                                 string& baseUrl)
556 {
557   content = "";
558   contentType = "";
559   contentLength = 0;
560 
561   try {
562     auto entry = reader.getEntryFromPath(url);
563     entry = entry.getFinalEntry();
564     baseUrl = entry.getPath();
565     contentType = entry.getMimetype();
566     content = entry.getContent();
567     contentLength = entry.getSize();
568     title = entry.getTitle();
569 
570     /* Try to set a stub HTML header/footer if necesssary */
571     if (contentType.find("text/html") != string::npos
572       && content.find("<body") == std::string::npos
573       && content.find("<BODY") == std::string::npos) {
574       content = "<html><head><title>" + title +
575               "</title><meta http-equiv=\"Content-Type\" content=\"text/html; "
576               "charset=utf-8\" /></head><body>" +
577               content + "</body></html>";
578     }
579     return true;
580   } catch (NoEntry& e) {
581     return false;
582   }
583 }
584 
585 
586 /* Get a content from a zim file */
getContentByUrl(const string & url,string & content,string & title,unsigned int & contentLength,string & contentType) const587 bool Reader::getContentByUrl(const string& url,
588                              string& content,
589                              string& title,
590                              unsigned int& contentLength,
591                              string& contentType) const
592 {
593   std::string stubRedirectUrl;
594   return get_content_by_decoded_url(*this,
595                                 kiwix::urlDecode(url),
596                                 content,
597                                 title,
598                                 contentLength,
599                                 contentType,
600                                 stubRedirectUrl);
601 }
602 
getContentByEncodedUrl(const string & url,string & content,string & title,unsigned int & contentLength,string & contentType,string & baseUrl) const603 bool Reader::getContentByEncodedUrl(const string& url,
604                                     string& content,
605                                     string& title,
606                                     unsigned int& contentLength,
607                                     string& contentType,
608                                     string& baseUrl) const
609 {
610   return get_content_by_decoded_url(*this,
611                                 kiwix::urlDecode(url),
612                                 content,
613                                 title,
614                                 contentLength,
615                                 contentType,
616                                 baseUrl);
617 }
618 
getContentByEncodedUrl(const string & url,string & content,string & title,unsigned int & contentLength,string & contentType) const619 bool Reader::getContentByEncodedUrl(const string& url,
620                                     string& content,
621                                     string& title,
622                                     unsigned int& contentLength,
623                                     string& contentType) const
624 {
625   std::string stubRedirectUrl;
626   return get_content_by_decoded_url(*this,
627                                 kiwix::urlDecode(url),
628                                 content,
629                                 title,
630                                 contentLength,
631                                 contentType,
632                                 stubRedirectUrl);
633 }
634 
getContentByDecodedUrl(const string & url,string & content,string & title,unsigned int & contentLength,string & contentType) const635 bool Reader::getContentByDecodedUrl(const string& url,
636                                     string& content,
637                                     string& title,
638                                     unsigned int& contentLength,
639                                     string& contentType) const
640 {
641   std::string stubRedirectUrl;
642   return get_content_by_decoded_url(*this,
643                                 url,
644                                 content,
645                                 title,
646                                 contentLength,
647                                 contentType,
648                                 stubRedirectUrl);
649 }
650 
getContentByDecodedUrl(const string & url,string & content,string & title,unsigned int & contentLength,string & contentType,string & baseUrl) const651 bool Reader::getContentByDecodedUrl(const string& url,
652                                     string& content,
653                                     string& title,
654                                     unsigned int& contentLength,
655                                     string& contentType,
656                                     string& baseUrl) const
657 {
658   return get_content_by_decoded_url(*this,
659                                 url,
660                                 content,
661                                 title,
662                                 contentLength,
663                                 contentType,
664                                 baseUrl);
665 }
666 
667 /* Check if an article exists */
urlExists(const string & url) const668 bool Reader::urlExists(const string& url) const
669 {
670   return pathExists(url);
671 }
672 
pathExists(const string & path) const673 bool Reader::pathExists(const string& path) const
674 {
675   if (!zimFileHandler)
676   {
677     return false;
678   }
679 
680   char ns = 0;
681   string titleStr;
682   _parseUrl(path, &ns, titleStr);
683   zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr);
684   return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr;
685 }
686 
687 /* Does the ZIM file has a fulltext index */
hasFulltextIndex() const688 bool Reader::hasFulltextIndex() const
689 {
690   if (!zimFileHandler || zimFileHandler->is_multiPart() )
691   {
692     return false;
693   }
694 
695   return ( pathExists("Z//fulltextIndex/xapian")
696         || pathExists("X/fulltext/xapian"));
697 }
698 
699 /* Search titles by prefix */
700 
searchSuggestions(const string & prefix,unsigned int suggestionsCount,const bool reset)701 bool Reader::searchSuggestions(const string& prefix,
702                                unsigned int suggestionsCount,
703                                const bool reset)
704 {
705   /* Reset the suggestions otherwise check if the suggestions number is less
706    * than the suggestionsCount */
707   if (reset) {
708     this->suggestions.clear();
709     this->suggestionsOffset = this->suggestions.begin();
710   } else {
711     if (this->suggestions.size() > suggestionsCount) {
712       return false;
713     }
714   }
715 
716   auto ret =  searchSuggestions(prefix, suggestionsCount, this->suggestions);
717 
718   /* Set the cursor to the begining */
719   this->suggestionsOffset = this->suggestions.begin();
720 
721   return ret;
722 }
723 
724 
searchSuggestions(const string & prefix,unsigned int suggestionsCount,SuggestionsList_t & results)725 bool Reader::searchSuggestions(const string& prefix,
726                                unsigned int suggestionsCount,
727                                SuggestionsList_t& results)
728 {
729   bool retVal = false;
730 
731   /* Return if no prefix */
732   if (prefix.size() == 0) {
733     return false;
734   }
735 
736   for (auto articleItr = zimFileHandler->findByTitle('A', prefix);
737        articleItr != zimFileHandler->end()
738        && articleItr->getTitle().compare(0, prefix.size(), prefix) == 0
739        && results.size() < suggestionsCount;
740        ++articleItr) {
741     /* Extract the interesting part of article title & url */
742     std::string normalizedArticleTitle
743         = kiwix::normalize(articleItr->getTitle());
744     std::string articleFinalUrl = "/A/" + articleItr->getUrl();
745     if (articleItr->isRedirect()) {
746       zim::Article article = *articleItr;
747       unsigned int loopCounter = 0;
748       while (article.isRedirect() && loopCounter++ < 42) {
749         article = article.getRedirectArticle();
750       }
751       articleFinalUrl = "/A/" + article.getUrl();
752     }
753 
754     /* Go through all already found suggestions and skip if this
755        article is already in the suggestions list (with an other
756        title) */
757     bool insert = true;
758     std::vector<std::vector<std::string>>::iterator suggestionItr;
759     for (suggestionItr = results.begin();
760          suggestionItr != results.end();
761          suggestionItr++) {
762       int result = normalizedArticleTitle.compare((*suggestionItr)[2]);
763       if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) {
764         insert = false;
765         break;
766       } else if (result < 0) {
767         break;
768       }
769     }
770 
771     /* Insert if possible */
772     if (insert) {
773       std::vector<std::string> suggestion;
774       suggestion.push_back(articleItr->getTitle());
775       suggestion.push_back(articleFinalUrl);
776       suggestion.push_back(normalizedArticleTitle);
777       results.insert(suggestionItr, suggestion);
778     }
779 
780     /* Suggestions where found */
781     retVal = true;
782   }
783 
784   return retVal;
785 }
786 
getTitleVariants(const std::string & title) const787 std::vector<std::string> Reader::getTitleVariants(
788     const std::string& title) const
789 {
790   std::vector<std::string> variants;
791   variants.push_back(title);
792   variants.push_back(kiwix::ucFirst(title));
793   variants.push_back(kiwix::lcFirst(title));
794   variants.push_back(kiwix::toTitle(title));
795   return variants;
796 }
797 
798 
searchSuggestionsSmart(const string & prefix,unsigned int suggestionsCount)799 bool Reader::searchSuggestionsSmart(const string& prefix,
800                                     unsigned int suggestionsCount)
801 {
802   this->suggestions.clear();
803   this->suggestionsOffset = this->suggestions.begin();
804 
805   auto ret = searchSuggestionsSmart(prefix, suggestionsCount, this->suggestions);
806 
807   this->suggestionsOffset = this->suggestions.begin();
808 
809   return ret;
810 }
811 
812 /* Try also a few variations of the prefix to have better results */
searchSuggestionsSmart(const string & prefix,unsigned int suggestionsCount,SuggestionsList_t & results)813 bool Reader::searchSuggestionsSmart(const string& prefix,
814                                     unsigned int suggestionsCount,
815                                     SuggestionsList_t& results)
816 {
817   std::vector<std::string> variants = this->getTitleVariants(prefix);
818   bool retVal = false;
819 
820   /* Try to search in the title using fulltext search database */
821   const auto suggestionSearch
822       = this->getZimFileHandler()->suggestions(prefix, 0, suggestionsCount);
823 
824   if (suggestionSearch->get_matches_estimated()) {
825     for (auto current = suggestionSearch->begin();
826          current != suggestionSearch->end();
827          current++) {
828       if (!current->good()) {
829           continue;
830       }
831       std::vector<std::string> suggestion;
832       suggestion.push_back(current->getTitle());
833       suggestion.push_back("/A/" + current->getUrl());
834       suggestion.push_back(kiwix::normalize(current->getTitle()));
835       results.push_back(suggestion);
836     }
837     retVal = true;
838   } else {
839     for (std::vector<std::string>::iterator variantsItr = variants.begin();
840          variantsItr != variants.end();
841          variantsItr++) {
842       retVal = this->searchSuggestions(*variantsItr, suggestionsCount, results)
843                || retVal;
844     }
845   }
846 
847   return retVal;
848 }
849 
850 /* Get next suggestion */
getNextSuggestion(string & title)851 bool Reader::getNextSuggestion(string& title)
852 {
853   if (this->suggestionsOffset != this->suggestions.end()) {
854     /* title */
855     title = (*(this->suggestionsOffset))[0];
856 
857     /* increment the cursor for the next call */
858     this->suggestionsOffset++;
859 
860     return true;
861   }
862 
863   return false;
864 }
865 
getNextSuggestion(string & title,string & url)866 bool Reader::getNextSuggestion(string& title, string& url)
867 {
868   if (this->suggestionsOffset != this->suggestions.end()) {
869     /* title */
870     title = (*(this->suggestionsOffset))[0];
871     url = (*(this->suggestionsOffset))[1];
872 
873     /* increment the cursor for the next call */
874     this->suggestionsOffset++;
875 
876     return true;
877   }
878 
879   return false;
880 }
881 
882 /* Check if the file has as checksum */
canCheckIntegrity() const883 bool Reader::canCheckIntegrity() const
884 {
885   return this->zimFileHandler->getChecksum() != "";
886 }
887 
888 /* Return true if corrupted, false otherwise */
isCorrupted() const889 bool Reader::isCorrupted() const
890 {
891   try {
892     if (this->zimFileHandler->verify() == true) {
893       return false;
894     }
895   } catch (exception& e) {
896     cerr << e.what() << endl;
897     return true;
898   }
899 
900   return true;
901 }
902 
903 /* Return the file size, works also for splitted files */
getFileSize() const904 unsigned int Reader::getFileSize() const
905 {
906   zim::File* file = this->getZimFileHandler();
907   zim::size_type size = 0;
908 
909   if (file != NULL) {
910     size = file->getFilesize();
911   }
912 
913   return (size / 1024);
914 }
915 }
916