1 /* Copyright (C) 2007-2019 J.F.Dockes
2  *   This program is free software; you can redistribute it and/or modify
3  *   it under the terms of the GNU General Public License as published by
4  *   the Free Software Foundation; either version 2 of the License, or
5  *   (at your option) any later version.
6  *
7  *   This program is distributed in the hope that it will be useful,
8  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  *   GNU General Public License for more details.
11  *
12  *   You should have received a copy of the GNU General Public License
13  *   along with this program; if not, write to the
14  *   Free Software Foundation, Inc.,
15  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16  */
17 #include "autoconfig.h"
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <math.h>
22 #include <time.h>
23 #include <stdint.h>
24 
25 #include <sstream>
26 #include <iostream>
27 #include <list>
28 using std::ostringstream;
29 using std::endl;
30 using std::list;
31 
32 #include "cstr.h"
33 #include "reslistpager.h"
34 #include "log.h"
35 #include "rclconfig.h"
36 #include "smallut.h"
37 #include "rclutil.h"
38 #include "plaintorich.h"
39 #include "mimehandler.h"
40 #include "transcode.h"
41 
42 // Default highlighter. No need for locking, this is query-only.
43 static const string cstr_hlfontcolor("<span style='color: blue;'>");
44 static const string cstr_hlendfont("</span>");
45 class PlainToRichHtReslist : public PlainToRich {
46 public:
startMatch(unsigned int)47     virtual string startMatch(unsigned int) {
48         return cstr_hlfontcolor;
49     }
endMatch()50     virtual string endMatch() {
51         return cstr_hlendfont;
52     }
53 };
54 static PlainToRichHtReslist g_hiliter;
55 
ResListPager(int pagesize,bool alwaysSnippets)56 ResListPager::ResListPager(int pagesize, bool alwaysSnippets)
57     : m_pagesize(pagesize),
58       m_alwaysSnippets(alwaysSnippets),
59       m_newpagesize(pagesize),
60       m_resultsInCurrentPage(0),
61       m_winfirst(-1),
62       m_hasNext(true),
63       m_hiliter(&g_hiliter)
64 {
65 }
66 
resultPageNext()67 void ResListPager::resultPageNext()
68 {
69     if (!m_docSource) {
70         LOGDEB("ResListPager::resultPageNext: null source\n");
71         return;
72     }
73 
74     int resCnt = m_docSource->getResCnt();
75     LOGDEB("ResListPager::resultPageNext: rescnt " << resCnt <<
76            ", winfirst " << m_winfirst << "\n");
77 
78     if (m_winfirst < 0) {
79         m_winfirst = 0;
80     } else {
81         m_winfirst += int(m_respage.size());
82     }
83     // Get the next page of results. Note that we look ahead by one to
84     // determine if there is actually a next page
85     vector<ResListEntry> npage;
86     int pagelen = m_docSource->getSeqSlice(m_winfirst, m_pagesize + 1, npage);
87 
88     // If page was truncated, there is no next
89     m_hasNext = (pagelen == m_pagesize + 1);
90 
91     // Get rid of the possible excess result
92     if (pagelen == m_pagesize + 1) {
93         npage.resize(m_pagesize);
94         pagelen--;
95     }
96 
97     if (pagelen <= 0) {
98         // No results ? This can only happen on the first page or if the
99         // actual result list size is a multiple of the page pref (else
100         // there would have been no Next on the last page)
101         if (m_winfirst > 0) {
102             // Have already results. Let them show, just disable the
103             // Next button. We'd need to remove the Next link from the page
104             // too.
105             // Restore the m_winfirst value, let the current result vector alone
106             m_winfirst -= int(m_respage.size());
107         } else {
108             // No results at all (on first page)
109             m_winfirst = -1;
110         }
111         return;
112     }
113     m_resultsInCurrentPage = pagelen;
114     m_respage = npage;
115 }
maybeEscapeHtml(const string & fld)116 static string maybeEscapeHtml(const string& fld)
117 {
118     if (fld.compare(0, cstr_fldhtm.size(), cstr_fldhtm))
119         return escapeHtml(fld);
120     else
121         return fld.substr(cstr_fldhtm.size());
122 }
123 
124 
resultPageFor(int docnum)125 void ResListPager::resultPageFor(int docnum)
126 {
127     if (!m_docSource) {
128         LOGDEB("ResListPager::resultPageFor: null source\n");
129         return;
130     }
131 
132     int resCnt = m_docSource->getResCnt();
133     LOGDEB("ResListPager::resultPageFor(" << docnum << "): rescnt " <<
134            resCnt << ", winfirst " << m_winfirst << "\n");
135     m_winfirst = (docnum / m_pagesize) * m_pagesize;
136 
137     // Get the next page of results.
138     vector<ResListEntry> npage;
139     int pagelen = m_docSource->getSeqSlice(m_winfirst, m_pagesize, npage);
140 
141     // If page was truncated, there is no next
142     m_hasNext = (pagelen == m_pagesize);
143 
144     if (pagelen <= 0) {
145         m_winfirst = -1;
146         return;
147     }
148     m_respage = npage;
149 }
150 
displayDoc(RclConfig * config,int i,Rcl::Doc & doc,const HighlightData & hdata,const string & sh)151 void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
152                               const HighlightData& hdata, const string& sh)
153 {
154     ostringstream chunk;
155 
156     // Determine icon to display if any
157     string iconurl = iconUrl(config, doc);
158 
159     // Printable url: either utf-8 if transcoding succeeds, or url-encoded
160     string url;
161     printableUrl(config->getDefCharset(), doc.url, url);
162 
163     // Same as url, but with file:// possibly stripped. output by %u instead
164     // of %U.
165     string urlOrLocal;
166     urlOrLocal = fileurltolocalpath(url);
167     if (urlOrLocal.empty())
168         urlOrLocal = url;
169 
170     // Make title out of file name if none yet
171     string titleOrFilename;
172     string utf8fn;
173     doc.getmeta(Rcl::Doc::keytt, &titleOrFilename);
174     doc.getmeta(Rcl::Doc::keyfn, &utf8fn);
175     if (utf8fn.empty()) {
176         utf8fn = path_getsimple(url);
177     }
178     if (titleOrFilename.empty()) {
179         titleOrFilename = utf8fn;
180     }
181 
182     // Url for the parent directory. We strip the file:// part for local
183     // paths
184     string parenturl = url_parentfolder(url);
185     {
186         string localpath = fileurltolocalpath(parenturl);
187         if (!localpath.empty())
188             parenturl = localpath;
189     }
190 
191     // Result number
192     char numbuf[20];
193     int docnumforlinks = m_winfirst + 1 + i;
194     sprintf(numbuf, "%d", docnumforlinks);
195 
196     // Document date: either doc or file modification times
197     string datebuf;
198     if (!doc.dmtime.empty() || !doc.fmtime.empty()) {
199         time_t mtime = doc.dmtime.empty() ?
200             atoll(doc.fmtime.c_str()) : atoll(doc.dmtime.c_str());
201         struct tm *tm = localtime(&mtime);
202         datebuf = utf8datestring(dateFormat(), tm);
203     }
204 
205     // Size information. We print both doc and file if they differ a lot
206     int64_t fsize = -1, dsize = -1;
207     if (!doc.dbytes.empty())
208         dsize = static_cast<int64_t>(atoll(doc.dbytes.c_str()));
209     if (!doc.fbytes.empty())
210         fsize =  static_cast<int64_t>(atoll(doc.fbytes.c_str()));
211     string sizebuf;
212     if (dsize > 0) {
213         sizebuf = displayableBytes(dsize);
214         if (fsize > 10 * dsize && fsize - dsize > 1000)
215             sizebuf += string(" / ") + displayableBytes(fsize);
216     } else if (fsize >= 0) {
217         sizebuf = displayableBytes(fsize);
218     }
219 
220     string richabst;
221     bool needabstract = parFormat().find("%A") != string::npos;
222     if (needabstract && m_docSource) {
223         vector<string> vabs;
224         m_docSource->getAbstract(doc, vabs);
225         m_hiliter->set_inputhtml(false);
226 
227         for (vector<string>::const_iterator it = vabs.begin();
228              it != vabs.end(); it++) {
229             if (!it->empty()) {
230                 // No need to call escapeHtml(), plaintorich handles it
231                 list<string> lr;
232                 // There may be data like page numbers before the snippet text.
233                 // will be in brackets.
234                 string::size_type bckt = it->find("]");
235                 if (bckt == string::npos) {
236                     m_hiliter->plaintorich(*it, lr, hdata);
237                 } else {
238                     m_hiliter->plaintorich(it->substr(bckt), lr, hdata);
239                     lr.front() = it->substr(0, bckt) + lr.front();
240                 }
241                 richabst += lr.front();
242                 richabst += absSep();
243             }
244         }
245     }
246 
247     // Links; Uses utilities from mimehandler.h
248     ostringstream linksbuf;
249     if (canIntern(&doc, config)) {
250         linksbuf << "<a href=\""<< linkPrefix()<< "P" << docnumforlinks << "\">"
251                  << trans("Preview") << "</a>&nbsp;&nbsp;";
252     }
253     if (canOpen(&doc, config)) {
254         linksbuf << "<a href=\"" <<linkPrefix() + "E" <<docnumforlinks << "\">"
255                  << trans("Open") << "</a>";
256     }
257     ostringstream snipsbuf;
258     if (m_alwaysSnippets || doc.haspages) {
259         snipsbuf << "<a href=\"" <<linkPrefix()<<"A" << docnumforlinks << "\">"
260                  << trans("Snippets") << "</a>&nbsp;&nbsp;";
261         linksbuf << "&nbsp;&nbsp;" << snipsbuf.str();
262     }
263 
264     string collapscnt;
265     if (doc.getmeta(Rcl::Doc::keycc, &collapscnt) && !collapscnt.empty()) {
266         ostringstream collpsbuf;
267         int clc = atoi(collapscnt.c_str()) + 1;
268         collpsbuf << "<a href=\""<<linkPrefix()<<"D" << docnumforlinks << "\">"
269                   << trans("Dups") << "(" << clc << ")" << "</a>&nbsp;&nbsp;";
270         linksbuf << "&nbsp;&nbsp;" << collpsbuf.str();
271     }
272 
273     // Build the result list paragraph:
274 
275     // Subheader: this is used by history
276     if (!sh.empty())
277         chunk << "<p style='clear: both;'><b>" << sh << "</p>\n<p>";
278     else
279         chunk << "<p style='margin: 0px;padding: 0px;clear: both;'>";
280 
281     char xdocidbuf[100];
282     sprintf(xdocidbuf, "%lu", doc.xdocid);
283 
284     // Configurable stuff
285     map<string, string> subs;
286     subs["A"] = !richabst.empty() ? richabst : "";
287     subs["D"] = datebuf;
288     subs["E"] = snipsbuf.str();
289     subs["I"] = iconurl;
290     subs["i"] = doc.ipath;
291     subs["K"] = !doc.meta[Rcl::Doc::keykw].empty() ?
292         string("[") + maybeEscapeHtml(doc.meta[Rcl::Doc::keykw]) + "]" : "";
293     subs["L"] = linksbuf.str();
294     subs["N"] = numbuf;
295     subs["M"] = doc.mimetype;
296     subs["P"] = parenturl;
297     subs["R"] = doc.meta[Rcl::Doc::keyrr];
298     subs["S"] = sizebuf;
299     subs["T"] = maybeEscapeHtml(titleOrFilename);
300     subs["t"] = maybeEscapeHtml(doc.meta[Rcl::Doc::keytt]);
301     subs["U"] = url;
302     subs["u"] = urlOrLocal;
303     subs["x"] = xdocidbuf;
304 
305     // Let %(xx) access all metadata. HTML-neuter everything:
306     for (const auto& entry : doc.meta) {
307         if (!entry.first.empty())
308             subs[entry.first] = maybeEscapeHtml(entry.second);
309     }
310 
311     string formatted;
312     pcSubst(parFormat(), formatted, subs);
313     chunk << formatted;
314 
315     chunk << "</p>" << endl;
316     // This was to force qt 4.x to clear the margins (which it should do
317     // anyway because of the paragraph's style), but we finally took
318     // the table approach for 1.15 for now (in guiutils.cpp)
319 //      chunk << "<br style='clear:both;height:0;line-height:0;'>" << endl;
320 
321     LOGDEB2("Chunk: [" << chunk.rdbuf()->str() << "]\n");
322     append(chunk.rdbuf()->str(), i, doc);
323 }
324 
getDoc(int num,Rcl::Doc & doc)325 bool ResListPager::getDoc(int num, Rcl::Doc& doc)
326 {
327     if (m_winfirst < 0 || m_respage.size() == 0)
328         return false;
329     if (num < m_winfirst || num >= m_winfirst + int(m_respage.size()))
330         return false;
331     doc = m_respage[num-m_winfirst].doc;
332     return true;
333 }
334 
displayPage(RclConfig * config)335 void ResListPager::displayPage(RclConfig *config)
336 {
337     LOGDEB("ResListPager::displayPage. linkPrefix: " << linkPrefix() << "\n");
338     if (!m_docSource) {
339         LOGDEB("ResListPager::displayPage: null source\n");
340         return;
341     }
342     if (m_winfirst < 0 && !pageEmpty()) {
343         LOGDEB("ResListPager::displayPage: sequence error: winfirst < 0\n");
344         return;
345     }
346 
347     ostringstream chunk;
348 
349     // Display list header
350     // We could use a <title> but the textedit doesnt display
351     // it prominently
352     // Note: have to append text in chunks that make sense
353     // html-wise. If we break things up too much, the editor
354     // gets confused. Hence the use of the 'chunk' text
355     // accumulator
356     // Also note that there can be results beyond the estimated resCnt.
357     chunk << "<html><head>" << endl
358           << "<meta http-equiv=\"content-type\""
359           << " content=\"text/html; charset=utf-8\">" << endl
360           << headerContent()
361           << "</head><body " << bodyAttrs() << ">" << endl
362           << pageTop()
363           << "<p><span style=\"font-size:110%;\"><b>"
364           << m_docSource->title()
365           << "</b></span>&nbsp;&nbsp;&nbsp;";
366 
367     if (pageEmpty()) {
368         chunk << trans("<p><b>No results found</b><br>");
369         string reason = m_docSource->getReason();
370         if (!reason.empty()) {
371             chunk << "<blockquote>" << escapeHtml(reason) <<
372                 "</blockquote></p>";
373         } else {
374             HighlightData hldata;
375             m_docSource->getTerms(hldata);
376             vector<string> uterms(hldata.uterms.begin(), hldata.uterms.end());
377             if (!uterms.empty()) {
378                 map<string, vector<string> > spellings;
379                 suggest(uterms, spellings);
380                 if (!spellings.empty()) {
381                     if (o_index_stripchars) {
382                         chunk <<
383                             trans("<p><i>Alternate spellings (accents suppressed): </i>")
384                               << "<br /><blockquote>";
385                     } else {
386                         chunk <<
387                             trans("<p><i>Alternate spellings: </i>")
388                               << "<br /><blockquote>";
389 
390                     }
391 
392                     for (const auto& entry: spellings) {
393                         chunk << "<b>" << entry.first << "</b> : ";
394                         for (const auto& spelling : entry.second) {
395                             chunk << spelling << " ";
396                         }
397                         chunk << "<br />";
398                     }
399                     chunk << "</blockquote></p>";
400                 }
401             }
402         }
403     } else {
404         unsigned int resCnt = m_docSource->getResCnt();
405         if (m_winfirst + m_respage.size() < resCnt) {
406             chunk << trans("Documents") << " <b>" << m_winfirst + 1
407                   << "-" << m_winfirst + m_respage.size() << "</b> "
408                   << trans("out of at least") << " "
409                   << resCnt << " " << trans("for") << " " ;
410         } else {
411             chunk << trans("Documents") << " <b>"
412                   << m_winfirst + 1 << "-" << m_winfirst + m_respage.size()
413                   << "</b> " << trans("for") << " ";
414         }
415     }
416     chunk << detailsLink();
417     if (hasPrev() || hasNext()) {
418         chunk << "&nbsp;&nbsp;";
419         if (hasPrev()) {
420             chunk << "<a href=\"" << linkPrefix() + prevUrl() + "\"><b>"
421                   << trans("Previous")
422                   << "</b></a>&nbsp;&nbsp;&nbsp;";
423         }
424         if (hasNext()) {
425             chunk << "<a href=\"" << linkPrefix() + nextUrl() + "\"><b>"
426                   << trans("Next")
427                   << "</b></a>";
428         }
429     }
430     chunk << "</p>" << endl;
431 
432     append(chunk.rdbuf()->str());
433     chunk.rdbuf()->str("");
434     if (pageEmpty())
435         return;
436 
437     HighlightData hdata;
438     m_docSource->getTerms(hdata);
439 
440     // Emit data for result entry paragraph. Do it in chunks that make sense
441     // html-wise, else our client may get confused
442     for (int i = 0; i < (int)m_respage.size(); i++) {
443         Rcl::Doc& doc(m_respage[i].doc);
444         string& sh(m_respage[i].subHeader);
445         displayDoc(config, i, doc, hdata, sh);
446     }
447 
448     // Footer
449     chunk << "<p align=\"center\">";
450     if (hasPrev() || hasNext()) {
451         if (hasPrev()) {
452             chunk << "<a href=\"" + linkPrefix() + prevUrl() + "\"><b>"
453                   << trans("Previous")
454                   << "</b></a>&nbsp;&nbsp;&nbsp;";
455         }
456         if (hasNext()) {
457             chunk << "<a href=\"" << linkPrefix() + nextUrl() + "\"><b>"
458                   << trans("Next")
459                   << "</b></a>";
460         }
461     }
462     chunk << "</p>" << endl;
463     chunk << "</body></html>" << endl;
464     append(chunk.rdbuf()->str());
465     flush();
466 }
467 
displaySingleDoc(RclConfig * config,int idx,Rcl::Doc & doc,const HighlightData & hdata)468 void ResListPager::displaySingleDoc(RclConfig *config, int idx,
469                                     Rcl::Doc& doc,
470                                     const HighlightData& hdata)
471 {
472     ostringstream chunk;
473 
474     // Header
475     // Note: have to append text in chunks that make sense
476     // html-wise. If we break things up too much, the editor
477     // gets confused.
478     string bdtag("<body ");
479     bdtag += bodyAttrs();
480     rtrimstring(bdtag, " ");
481     bdtag += ">";
482     chunk << "<html><head>\n"
483           << "<meta http-equiv=\"content-type\""
484           << " content=\"text/html; charset=utf-8\">\n"
485           << headerContent()
486           << "</head>\n" << bdtag << "\n";
487     append(chunk.rdbuf()->str());
488     // Document
489     displayDoc(config, idx, doc, hdata, string());
490     // Footer
491     append("</body></html>\n");
492     flush();
493 }
494 
495 
496 // Default implementations for things that should be implemented by
497 // specializations
nextUrl()498 string ResListPager::nextUrl()
499 {
500     return "n-1";
501 }
502 
prevUrl()503 string ResListPager::prevUrl()
504 {
505     return "p-1";
506 }
507 
iconUrl(RclConfig * config,Rcl::Doc & doc)508 string ResListPager::iconUrl(RclConfig *config, Rcl::Doc& doc)
509 {
510     // If this is a top level doc, check for a thumbnail image
511     if (doc.ipath.empty()) {
512         vector<Rcl::Doc> docs;
513         docs.push_back(doc);
514         vector<string> paths;
515         Rcl::docsToPaths(docs, paths);
516         if (!paths.empty()) {
517             string path;
518             LOGDEB2("ResList::iconUrl: source path [" << paths[0] << "]\n");
519             if (thumbPathForUrl(cstr_fileu + paths[0], 128, path)) {
520                 LOGDEB2("ResList::iconUrl: icon path [" << path << "]\n");
521                 return cstr_fileu + path;
522             } else {
523                 LOGDEB2("ResList::iconUrl: no icon: path [" << path << "]\n");
524             }
525         } else {
526             LOGDEB("ResList::iconUrl: docsToPaths failed\n");
527         }
528     }
529 
530     // No thumbnail, look for the MIME type icon.
531     string apptag;
532     doc.getmeta(Rcl::Doc::keyapptg, &apptag);
533     return path_pathtofileurl(config->getMimeIconPath(doc.mimetype, apptag));
534 }
535 
append(const string & data)536 bool ResListPager::append(const string& data)
537 {
538     fprintf(stderr, "%s", data.c_str());
539     return true;
540 }
541 
trans(const string & in)542 string ResListPager::trans(const string& in)
543 {
544     return in;
545 }
546 
detailsLink()547 string ResListPager::detailsLink()
548 {
549     string chunk = string("<a href=\"") + linkPrefix() + "H-1\">";
550     chunk += trans("(show query)") + "</a>";
551     return chunk;
552 }
553 
parFormat()554 const string &ResListPager::parFormat()
555 {
556     static const string cstr_format("<img src=\"%I\" align=\"left\">"
557                                     "%R %S %L &nbsp;&nbsp;<b>%T</b><br>"
558                                     "%M&nbsp;%D&nbsp;&nbsp;&nbsp;<i>%U</i><br>"
559                                     "%A %K");
560     return cstr_format;
561 }
562 
dateFormat()563 const string &ResListPager::dateFormat()
564 {
565     static const string cstr_format("&nbsp;%Y-%m-%d&nbsp;%H:%M:%S&nbsp;%z");
566     return cstr_format;
567 }
568