1 /*
2  * BibTeX Converter
3  * Copyright (C) 2010-2021 by Thomas Dreibholz
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9 
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Contact: dreibh@iem.uni-due.de
19  */
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <string.h>
25 #include <assert.h>
26 #include <errno.h>
27 #include <iostream>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <curl/curl.h>
31 #include <curl/easy.h>
32 #include <openssl/md5.h>
33 
34 #include "node.h"
35 #include "publicationset.h"
36 #include "stringhandling.h"
37 
38 
39 extern int   yyparse();
40 extern FILE* yyin;
41 extern Node* bibTeXFile;
42 
43 
44 // ###### Get current timer #################################################
getMicroTime()45 unsigned long long getMicroTime()
46 {
47   struct timeval tv;
48   gettimeofday(&tv,NULL);
49   return(((unsigned long long)tv.tv_sec * (unsigned long long)1000000) +
50          (unsigned long long)tv.tv_usec);
51 }
52 
53 
54 // ###### Download file via libcurl #########################################
downloadFile(CURL * curl,const char * url,FILE * headerFH,FILE * downloadFH,unsigned int & errors)55 static bool downloadFile(CURL*         curl,
56                          const char*   url,
57                          FILE*         headerFH,
58                          FILE*         downloadFH,
59                          unsigned int& errors)
60 {
61    if( (ftruncate(fileno(headerFH), 0) != 0) ||
62        (ftruncate(fileno(downloadFH), 0) != 0) ) {
63       perror("Unable to truncate output files");
64       return(false);
65    }
66 
67    curl_easy_setopt(curl, CURLOPT_URL,            url);
68    curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
69    curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 1L);
70    curl_easy_setopt(curl, CURLOPT_WRITEDATA,      downloadFH);
71    curl_easy_setopt(curl, CURLOPT_WRITEHEADER,    headerFH);
72    curl_easy_setopt(curl, CURLOPT_USERAGENT,      "bibtexconv/1.1 (AmigaOS; MC680x0)");
73    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);   // follow redirects
74    curl_easy_setopt(curl, CURLOPT_AUTOREFERER,    1L);   // set referer on redirect
75    curl_easy_setopt(curl, CURLOPT_COOKIEFILE,     "");   // enable cookies
76    curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 30);   // 30s connect timeout
77 
78    bool  resultIsGood    = false;
79    const CURLcode result = curl_easy_perform(curl);
80    if(result == CURLE_OK) {
81       rewind(headerFH);
82       rewind(downloadFH);
83 
84       // ====== Check HTTP result =========================
85       if( (strncmp(url, "http", 4)) == 0) {
86          unsigned httpErrorCode = 999;
87          char     header[8192];
88          while(!feof(headerFH)) {
89             // The actual result will be of the last request
90             // in the header file (may have been redirected!)
91             if(!fgets((char*)&header, sizeof(header) - 1, headerFH)) {
92                break;
93             }
94             sscanf(header, "HTTP/%*[^ ] %u ", &httpErrorCode);
95          }
96          if(httpErrorCode == 200) {
97              resultIsGood = true;
98          }
99          if(httpErrorCode != 200) {
100             fprintf(stderr, "FAILED %s - HTTP returns code %u!\n",
101                     url, httpErrorCode);
102 
103             /*
104             rewind(headerFH);
105             size_t r = fread((char*)&header, 1, sizeof(header) - 1, headerFH);
106             if(r > 0) {
107                header[r] = 0x00;
108                fputs(header, stderr);
109             }
110             */
111 
112             errors++;
113          }
114       }
115    }
116    else {
117       fprintf(stderr, "FAILED %s: %s!\n", url, curl_easy_strerror(result));
118       errors++;
119    }
120 
121    return(resultIsGood);
122 }
123 
124 
125 // ###### Dynamic URL handling ##############################################
handleDynamicURL(CURL * curl,const std::string url,FILE * headerFH,FILE * downloadFH,unsigned int & errors)126 static bool handleDynamicURL(CURL*             curl,
127                              const std::string url,
128                              FILE*             headerFH,
129                              FILE*             downloadFH,
130                              unsigned int&     errors)
131 {
132    std::string rest;
133    std::string newURL = "";
134 
135    // ====== IEEExplore database ============================================
136    if( (hasPrefix(url, "http://ieeexplore.ieee.org/", rest)) ||
137        (hasPrefix(url, "https://ieeexplore.ieee.org/", rest)) ) {
138       char buffer[65536];
139       size_t r = fread((char*)&buffer, 1, sizeof(buffer) - 1, downloadFH);
140       if((r > 0) && (r < sizeof(buffer) - 1)) {
141          buffer[r] = 0x00;
142 
143          fprintf(stderr, "[IEEExplore");
144 
145          const std::string inputString(buffer);
146          const size_t      framePos = inputString.rfind("<frame src=\"");
147          // fputs(inputString.c_str(),stderr);
148          if(framePos != std::string::npos) {
149             const size_t a = inputString.find("\"", framePos);
150             const size_t b = inputString.find("\"", a + 1);
151             if( (a != std::string::npos) && (b != std::string::npos) ) {
152                newURL = inputString.substr(a + 1, b - a - 1);
153                fprintf(stderr, "->%s", newURL.c_str());
154             }
155          }
156 
157          fprintf(stderr, "] ");
158       }
159    }
160 
161    rewind(headerFH);
162    rewind(downloadFH);
163    if(newURL.size() > 0) {
164       // printf("NEW=<%s>\n", newURL.c_str());
165       return(downloadFile(curl, newURL.c_str(), headerFH, downloadFH, errors));
166    }
167 
168    return(true);
169 }
170 
171 
172 // ###### Check URLs ########################################################
checkAllURLs(PublicationSet * publicationSet,const char * downloadDirectory,const bool checkNewURLsOnly,const bool ignoreUpdatesForHTML)173 unsigned int checkAllURLs(PublicationSet* publicationSet,
174                           const char*     downloadDirectory,
175                           const bool      checkNewURLsOnly,
176                           const bool      ignoreUpdatesForHTML)
177 {
178    if(downloadDirectory != NULL) {
179       if( (mkdir(downloadDirectory, S_IRWXU|S_IXGRP|S_IRGRP|S_IXOTH|S_IROTH) < 0) &&
180           (errno != EEXIST) ) {
181          fprintf(stderr, "ERROR: Failed to create download directory: %s!\n",
182                  strerror(errno));
183          exit(1);
184       }
185    }
186 
187    CURL* curl = curl_easy_init();
188    if(curl == NULL) {
189       fputs("ERROR: Failed to initialize libcurl!\n", stderr);
190       exit(1);
191    }
192 
193    unsigned int errors = 0;
194    for(size_t index = 0; index < publicationSet->size(); index++) {
195       // ====== Get prev, current and next publications =====================
196       if(publicationSet->get(index)->value == "Comment") {
197          continue;
198       }
199       Node* publication = publicationSet->get(index);
200       Node* url         = findChildNode(publication, "url");
201       if(url != NULL) {
202          const Node* urlSize    = findChildNode(publication, "url.size");
203          const Node* urlMime    = findChildNode(publication, "url.mime");
204          const Node* urlChecked = findChildNode(publication, "url.checked");
205          if( (urlSize != NULL) && (urlMime != NULL) && (urlChecked != NULL) ) {
206             if(downloadDirectory != NULL) {
207                const std::string downloadFileName =
208                   PublicationSet::makeDownloadFileName(downloadDirectory,
209                                                        publication->keyword,
210                                                        urlMime->value);
211                FILE* downloadFH = fopen(downloadFileName.c_str(), "rb");
212                if(downloadFH != NULL) {
213                   fclose(downloadFH);
214                   fprintf(stderr, "Skipping URL of %s (already available as %s).\n",
215                           publication->keyword.c_str(),
216                           downloadFileName.c_str());
217                   continue;
218                }
219             }
220             else if(checkNewURLsOnly == true) {
221                fprintf(stderr, "Skipping URL of %s (not a new entry).\n", publication->keyword.c_str());
222                continue;
223             }
224          }
225 
226          fprintf(stderr, "Checking URL of %s ... ", publication->keyword.c_str());
227 
228          char downloadFileName[256];
229          char mimeFileName[256];
230          char metaFileName[256];
231          if(downloadDirectory != NULL) {
232             snprintf((char*)&downloadFileName, sizeof(downloadFileName), "%s/%s", downloadDirectory, "/bibtexconv-dXXXXXX");
233          }
234          else {
235             snprintf((char*)&downloadFileName, sizeof(downloadFileName), "%s", "/tmp/bibtexconv-dXXXXXX");
236          }
237          snprintf((char*)&mimeFileName, sizeof(mimeFileName), "%s",      "/tmp/bibtexconv-mXXXXXX");
238          snprintf((char*)&metaFileName, sizeof(metaFileName), "%s",      "/tmp/bibtexconv-pXXXXXX");
239 
240          const int dfd = mkstemp((char*)&downloadFileName);
241          const int mfd = mkstemp((char*)&mimeFileName);
242          if( (dfd > 0) && (mfd > 0) ) {
243             FILE* downloadFH = fopen(downloadFileName, "w+b");
244             if(downloadFH != NULL) {
245                FILE* headerFH = tmpfile();
246                if(headerFH != NULL) {
247                   bool resultIsGood = downloadFile(curl, url->value.c_str(), headerFH, downloadFH, errors);
248                   if(resultIsGood) {
249                      // Special handling for dynamic URLs of some publishers
250                      resultIsGood = handleDynamicURL(curl, url->value, headerFH, downloadFH, errors);
251                   }
252                   if(resultIsGood) {
253                      unsigned long long totalSize = 0;
254                      unsigned char      md5[MD5_DIGEST_LENGTH];
255                      MD5_CTX md5_ctx;
256                      MD5_Init(&md5_ctx);
257 
258                      // ====== Compute size and MD5 =========================
259                      while(!feof(downloadFH)) {
260                         char input[16384];
261                         const size_t bytesRead = fread(&input, 1, sizeof(input), downloadFH);
262                         if(bytesRead > 0) {
263                            totalSize += (unsigned long long)bytesRead;
264                            MD5_Update(&md5_ctx, &input, bytesRead);
265                         }
266                      }
267 
268                      if(totalSize > 0) {
269                         // ====== Compute mime type (using "file") =======
270                         std::string mimeString;
271                         std::string command = format("/usr/bin/file --mime-type -b %s >%s", downloadFileName, mimeFileName);
272                         if(system(command.c_str()) == 0) {
273                            FILE* mimeFH = fopen(mimeFileName, "r");
274                            if(mimeFH != NULL) {
275                               char input[1024];
276                               if(fgets((char*)&input, sizeof(input) - 1, mimeFH) != NULL) {
277                                  mimeString = std::string(input);
278                                  if( (mimeString.size() > 0) &&
279                                      (mimeString[mimeString.size() - 1] == '\n') ) {
280                                     mimeString = mimeString.substr(0, mimeString.size() - 1);
281                                  }
282 
283                                  // RFCs/I-Ds are sometimes misidentified as source code:
284                                  if( (mimeString == "text/x-pascal") ||
285                                      (mimeString == "text/x-c") ||
286                                      (mimeString == "text/x-c++") ) {
287                                     mimeString = "text/plain";
288                                  }
289                               }
290                               fclose(mimeFH);
291                            }
292                         }
293                         else {
294                            fprintf(stderr, "WARNING %s: failed to obtain mime type of download file!\n",
295                                    url->value.c_str());
296                         }
297 
298                         // ====== Compare size, mime type and MD5 ===========
299                         std::string sizeString = format("%llu", totalSize);
300                         std::string md5String;
301                         MD5_Final((unsigned char*)&md5, &md5_ctx);
302                         for(unsigned int i = 0; i < MD5_DIGEST_LENGTH; i++) {
303                            md5String += format("%02x", (unsigned int)md5[i]);
304                         }
305                         const Node* urlMimeNode = findChildNode(publication, "url.mime");
306                         const Node* urlSizeNode = findChildNode(publication, "url.size");
307                         const Node* urlMD5Node  = findChildNode(publication, "url.md5");
308 
309                         bool failed = false;
310                         if((urlMimeNode != NULL) && (urlMimeNode->value != mimeString)) {
311                            if( (urlMimeNode->value == "text/html") &&
312                                (mimeString == "application/pdf") ) {
313                               fprintf(stderr, "\nNOTE: change from HTML to PDF -> just updating entry\n");
314                               urlSizeNode = NULL;
315                               urlMD5Node  = NULL;
316                            }
317                            else {
318                               fprintf(stderr, "UPDATED %s: old mime type has been %s, new type mime is %s\n",
319                                       url->value.c_str(),
320                                       urlMimeNode->value.c_str(), mimeString.c_str());
321                            }
322                         }
323                         if( (!failed) && (urlSizeNode != NULL) && (urlSizeNode->value != sizeString) ) {
324                             if( (ignoreUpdatesForHTML == true) &&
325                                 ((urlMimeNode != NULL) &&
326                                  ((urlMimeNode->value == "text/html") ||
327                                   (urlMimeNode->value == "application/xml"))) ) {
328                                md5String = "ignore";
329                                fprintf(stderr, "[Size change for HTML/XML document -> setting url.md5=\"ignore\"] ");
330                             }
331                             else {
332                               fprintf(stderr, "UPDATED %s: old size has been %s, new size is %s\n",
333                                       url->value.c_str(),
334                                       urlSizeNode->value.c_str(), sizeString.c_str());
335                             }
336                         }
337                         if( (!failed) && (urlMD5Node != NULL) && (urlMD5Node->value != "ignore") &&
338                            (urlMD5Node->value != md5String)) {
339                             if( (ignoreUpdatesForHTML == true) &&
340                                 ((urlMimeNode != NULL) &&
341                                  ((urlMimeNode->value == "text/html") ||
342                                   (urlMimeNode->value == "application/xml"))) ) {
343                                md5String = "ignore";
344                                fprintf(stderr, "[MD5 change for HTML/XML document -> setting url.md5=\"ignore\"] ");
345                             }
346                             else {
347                                fprintf(stderr, "UPDATED %s: old MD5 has been %s, new MD5 is %s\n",
348                                        url->value.c_str(),
349                                        urlMD5Node->value.c_str(), md5String.c_str());
350                             }
351                         }
352 
353                         // ====== Check PDF metadata ========================
354                         if(mimeString == "application/pdf") {
355                            std::string command = format("/usr/bin/pdfinfo %s >%s", downloadFileName, metaFileName);
356                            if(system(command.c_str()) == 0) {
357                               FILE* metaFH = fopen(metaFileName, "r");
358                               if(metaFH != NULL) {
359                                  while(!feof(metaFH)) {
360                                     char input[1024];
361                                     if(fgets((char*)&input, sizeof(input) - 1, metaFH) != NULL) {
362                                        // printf("IN=%s",input);
363                                        if(strncmp(input, "Pages:          ", 16) == 0) {
364                                           addOrUpdateChildNode(publication, "numpages", format("%u", atol((const char*)&input[16])).c_str());
365                                        }
366                                        else if(strncmp(input, "Keywords:       ", 16) == 0) {
367                                           Node* keywords = findChildNode(publication, "keywords");
368                                           if(keywords == NULL) {
369                                              // If there are no "keywords", add "url.keywords".
370                                              // They can be renamed manually after a check.
371                                              addOrUpdateChildNode(publication, "url.keywords",
372                                                                   string2utf8(std::string((const char*)&input[16]), "~", "").c_str());
373                                           }
374                                        }
375                                        else if(strncmp(input, "Page size:      ", 16) == 0) {
376                                           addOrUpdateChildNode(publication, "url.pagesize",
377                                                                string2utf8(std::string((const char*)&input[16]), "~", "").c_str());
378                                        }
379                                     }
380                                  }
381                                  fclose(metaFH);
382                               }
383                            }
384 
385                         }
386 
387                         // ====== Update metadata ===========================
388                         if(!failed) {
389                            // ====== Update size, mime type and MD5 =========
390                            addOrUpdateChildNode(publication, "url.size", sizeString.c_str());
391                            addOrUpdateChildNode(publication, "url.mime", mimeString.c_str());
392                            if( (urlMD5Node == NULL) || (urlMD5Node->value != "ignore")) {
393                               addOrUpdateChildNode(publication, "url.md5",  md5String.c_str());
394                            }
395 
396                            // ====== Update check time ======================
397                            const unsigned long long microTime = getMicroTime();
398                            const time_t             timeStamp = microTime / 1000000;
399                            const tm*                timeptr   = localtime(&timeStamp);
400                            char  checkTime[128];
401                            strftime((char*)&checkTime, sizeof(checkTime), "%Y-%m-%d %H:%M:%S %Z", timeptr);
402                            addOrUpdateChildNode(publication, "url.checked", checkTime);
403 
404                            fprintf(stderr, "OK: size=%sB;\ttype=%s;\tMD5=%s\n",
405                                    sizeString.c_str(), mimeString.c_str(), md5String.c_str());
406 
407                            // ====== Move downloaded file ===================
408                            if(downloadDirectory != NULL) {
409                               fclose(downloadFH);
410                               downloadFH = NULL;
411                               const std::string newFileName =
412                                  PublicationSet::makeDownloadFileName(downloadDirectory, publication->keyword, mimeString);
413                               if(rename(downloadFileName, newFileName.c_str()) < 0) {
414                                  unlink(downloadFileName);
415                                  fprintf(stderr, "\nFAILED to store download file %s: %s!\n",
416                                          newFileName.c_str(), strerror(errno));
417                                  errors++;
418                               }
419                            }
420                         }
421                      }
422                      else {
423                         fprintf(stderr, "\nFAILED %s: size is zero!\n", url->value.c_str());
424                         errors++;
425                      }
426                   }
427                   fclose(headerFH);
428                   headerFH = NULL;
429                }
430                else {
431                   fputs("ERROR: Failed to create temporary header file!\n", stderr);
432                   errors++;
433                }
434                if(downloadFH != NULL) {
435                   fclose(downloadFH);
436                   downloadFH = NULL;
437                   unlink(downloadFileName);
438                }
439                unlink(mimeFileName);
440             }
441             else {
442                fputs("ERROR: Failed to create temporary download file!\n", stderr);
443                errors++;
444             }
445          }
446          else {
447             fputs("ERROR: Failed to create temporary file name!\n", stderr);
448             errors++;
449          }
450          if(dfd >= 0) {
451             close(dfd);
452          }
453          if(mfd >= 0) {
454             close(mfd);
455          }
456       }
457    }
458 
459    curl_easy_cleanup(curl);
460    curl = NULL;
461 
462    return(errors);
463 }
464 
465 
466 // ###### Handle interactive input ##########################################
467 static bool                     useXMLStyle            = false;
468 static std::string              nbsp                   = " ";
469 static std::string              lineBreak              = "\n";
470 static std::string              customPrintingHeader   = "";
471 static std::string              customPrintingTrailer  = "";
472 static std::string              customPrintingTemplate =
473    "\\[%C\\] %L\n %a\tAUTHOR: [[%fFIRST|%lLAST|%nNOT-FIRST]: initials=%g given=%G full=%F]\n%A\n";  // ", \"%T\"[, %B][, %J][, %?][, %$][, Volume~%V][, Number~%N][, pp.~%P][, %I][, %i][, %@][, [[%m, %D, |%m~]%Y].\\nURL: %U.\\n\\n";
474 static std::vector<std::string> monthNames;
475 
handleInput(FILE * fh,PublicationSet & publicationSet,const char * downloadDirectory,const bool checkURLs,const bool checkNewURLsOnly,const bool ignoreUpdatesForHTML,const char * exportToBibTeX,const char * exportToSeparateBibTeXs,const char * exportToXML,const char * exportToSeparateXMLs,const bool skipNotesWithISBNandISSN,const bool addNotesWithISBNandISSN,const bool addUrlCommand,unsigned int recursionLevel=0)476 static int handleInput(FILE*           fh,
477                        PublicationSet& publicationSet,
478                        const char*     downloadDirectory,
479                        const bool      checkURLs,
480                        const bool      checkNewURLsOnly,
481                        const bool      ignoreUpdatesForHTML,
482                        const char*     exportToBibTeX,
483                        const char*     exportToSeparateBibTeXs,
484                        const char*     exportToXML,
485                        const char*     exportToSeparateXMLs,
486                        const bool      skipNotesWithISBNandISSN,
487                        const bool      addNotesWithISBNandISSN,
488                        const bool      addUrlCommand,
489                        unsigned int    recursionLevel = 0)
490 {
491    int result = 0;
492    while(!feof(fh)) {
493       char input[65536];
494       if(fgets((char*)&input, sizeof(input), fh)) {
495          // ====== Remove newline =====================================
496          const size_t length = strlen(input);
497          if(length > 0) {
498             input[length - 1] = 0x00;
499          }
500 
501          // ====== Handle commands ====================================
502          if(input[0] == 0x00) {
503             // Empty line
504          }
505          else if(input[0] == '#') {
506             // Comment
507          }
508          else if(strncmp(input, "citeAll", 7) == 0) {
509             publicationSet.addAll(bibTeXFile);
510          }
511          else if(strncmp(input, "cite ", 5) == 0) {
512             std::string arguments = (const char*)&input[5];
513             const std::string keyword = extractToken(trim(arguments), " \t");
514             const std::string anchor  = extractToken(trim(arguments), " \t");
515             Node* publication = findNode(bibTeXFile, keyword.c_str());
516             if(publication) {
517                if(anchor.size() > 0) {
518                   publication->anchor = anchor;
519                }
520                else {
521                   char number[16];
522                   snprintf((char*)&number, sizeof(number), "%u",
523                            (unsigned int)publicationSet.size());
524                   publication->anchor = number;
525                }
526                if(!publicationSet.add(publication)) {
527                   fprintf(stderr, "ERROR: Publication '%s' has already been added!\n",
528                           (const char*)&input[5]);
529                   result++;
530                }
531                for(size_t i = 0; i < NODE_CUSTOM_ENTRIES; i++) {
532                   publication->custom[i] = extractToken(trim(arguments), " \t");
533                }
534             }
535             else {
536                fprintf(stderr, "ERROR: Publication '%s' not found!\n", keyword.c_str());
537                result++;
538             }
539          }
540          else if((strncmp(input, "sort ", 5)) == 0) {
541             const size_t maxSortLevels = 8;
542             std::string sortKey[maxSortLevels];
543             bool        sortAscending[maxSortLevels];
544             std::string arguments = (const char*)&input[5];
545             size_t sortLevels = 0;
546             for(size_t i = 0; i < maxSortLevels; i++) {
547                bool isAscending = true;
548                 std::string token = extractToken(trim(arguments), " \t");
549                 const size_t slash = token.find('/');
550                 if(slash != std::string::npos) {
551                    const std::string order = token.substr(slash + 1, token.size() - slash - 1);
552                    token = token.substr(0, slash);
553                    if( (order == "ascending") || (order == "A") ) {
554                      isAscending = true;
555                    }
556                    else if( (order == "descending") || (order == "D") ) {
557                      isAscending = false;
558                    }
559                    else {
560                       fprintf(stderr, "ERROR: Bad sorting order '%s' for key '%s'!\n",
561                               order.c_str(), token.c_str());
562                       result++;
563                       break;
564                    }
565                 }
566                 if(token != "") {
567                    sortKey[sortLevels]       = token;
568                    sortAscending[sortLevels] = isAscending;
569                    sortLevels++;
570                 }
571             }
572             publicationSet.sort((const std::string*)&sortKey,
573                                 (const bool*)&sortAscending,
574                                 sortLevels);
575          }
576          else if((strncmp(input, "export", 5)) == 0) {
577             if(checkURLs) {
578                result += checkAllURLs(&publicationSet, downloadDirectory, checkNewURLsOnly, ignoreUpdatesForHTML);
579             }
580             const char* namingTemplate = "%u";
581             if(input[6] == ' ') {
582                namingTemplate = (const char*)&input[7];
583             }
584 
585             // ====== Export all to custom ==================================
586             if(PublicationSet::exportPublicationSetToCustom(
587                   &publicationSet, namingTemplate,
588                   customPrintingHeader, customPrintingTrailer,
589                   customPrintingTemplate, monthNames, nbsp, lineBreak, useXMLStyle,
590                   downloadDirectory, stdout) == false) {
591                result++;
592             }
593 
594             // ====== Export all to BibTeX ==================================
595             if(exportToBibTeX) {
596                if(PublicationSet::exportPublicationSetToBibTeX(
597                   &publicationSet, exportToBibTeX, false,
598                   skipNotesWithISBNandISSN, addNotesWithISBNandISSN, addUrlCommand) == false) {
599                   exit(1);
600                }
601             }
602             if(exportToSeparateBibTeXs) {
603                if(PublicationSet::exportPublicationSetToBibTeX(
604                   &publicationSet, exportToSeparateBibTeXs, true,
605                   skipNotesWithISBNandISSN, addNotesWithISBNandISSN, addUrlCommand) == false) {
606                   exit(1);
607                }
608             }
609 
610             // ====== Export all to XML =====================================
611             if(exportToXML) {
612                if(PublicationSet::exportPublicationSetToXML(
613                   &publicationSet, exportToXML, false) == false) {
614                   exit(1);
615                }
616             }
617             if(exportToSeparateXMLs) {
618                if(PublicationSet::exportPublicationSetToXML(
619                   &publicationSet, exportToSeparateXMLs, true) == false) {
620                   exit(1);
621                }
622             }
623          }
624          else if((strncmp(input, "clear", 5)) == 0) {
625             publicationSet.clearAll();
626          }
627          else if((strncmp(input, "echo ", 5)) == 0) {
628             fputs(processBackslash(std::string((const char*)&input[5])).c_str(), stdout);
629          }
630          else if((strncmp(input, "header ", 7)) == 0) {
631             customPrintingHeader = (const char*)&input[7];
632          }
633          else if((strncmp(input, "trailer ", 8)) == 0) {
634             customPrintingTrailer = (const char*)&input[8];
635          }
636          else if((strncmp(input, "nbsp ", 5)) == 0) {
637             nbsp = (const char*)&input[5];
638          }
639          else if((strncmp(input, "linebreak ", 10)) == 0) {
640             lineBreak = (const char*)&input[10];
641          }
642          else if((strncmp(input, "utf8Style", 8)) == 0) {
643             useXMLStyle = false;
644          }
645          else if((strncmp(input, "xmlStyle", 8)) == 0) {
646             useXMLStyle = true;
647          }
648          else if((strncmp(input, "templatenew", 11)) == 0) {
649             customPrintingTemplate = "";
650          }
651          else if((strncmp(input, "template ", 9)) == 0) {
652             customPrintingTemplate = (const char*)&input[9];
653          }
654          else if((strncmp(input, "template+ ", 10)) == 0) {
655             customPrintingTemplate += (const char*)&input[10];
656          }
657          else if((strncmp(input, "include ", 8)) == 0) {
658             if(recursionLevel <= 9) {
659                const char* includeFileName = (const char*)&input[8];
660                FILE* includeFH = fopen(includeFileName, "r");
661                if(includeFH != NULL) {
662                   result += handleInput(includeFH, publicationSet,
663                                         downloadDirectory, checkURLs, checkNewURLsOnly, ignoreUpdatesForHTML,
664                                         exportToBibTeX, exportToSeparateBibTeXs,
665                                         exportToXML, exportToSeparateXMLs,
666                                         skipNotesWithISBNandISSN, addNotesWithISBNandISSN,
667                                         addUrlCommand,
668                                         recursionLevel + 1);
669                   fclose(includeFH);
670                }
671                else {
672                   fprintf(stderr, "ERROR: Unable to open include file '%s'!\n", includeFileName);
673                   exit(1);
674                }
675             }
676             else {
677                fprintf(stderr, "ERROR: Include file nesting level limit reached!\n");
678                exit(1);
679             }
680          }
681          else if((strncmp(input, "monthNames ", 11)) == 0) {
682             std::string  s = (const char*)&input[11];
683             unsigned int i = 0;
684             while(s != "") {
685                const std::string token = extractToken(trim(s), " ");
686                monthNames[i] = token;
687                if(i > 11) {
688                   fputs("ERROR: There are only 12 month names possible in monthNames!\n", stderr);
689                   exit(1);
690                }
691                i++;
692             }
693          }
694          else {
695             fprintf(stderr, "ERROR: Bad command '%s'!\n", input);
696             exit(1);
697          }
698       }
699    }
700    return(result);
701 }
702 
703 
704 
705 // ###### Main program ######################################################
main(int argc,char ** argv)706 int main(int argc, char** argv)
707 {
708    bool        interactive              = true;
709    bool        checkURLs                = false;
710    bool        checkNewURLsOnly         = false;
711    bool        ignoreUpdatesForHTML     = false;
712    bool        skipNotesWithISBNandISSN = false;
713    bool        addNotesWithISBNandISSN  = false;
714    bool        addUrlCommand            = false;
715    const char* exportToBibTeX           = NULL;
716    const char* exportToSeparateBibTeXs  = NULL;
717    const char* exportToXML              = NULL;
718    const char* exportToSeparateXMLs     = NULL;
719    const char* exportToCustom           = NULL;
720    const char* downloadDirectory        = NULL;
721 
722    monthNames.push_back("January");
723    monthNames.push_back("February");
724    monthNames.push_back("March");
725    monthNames.push_back("April");
726    monthNames.push_back("May");
727    monthNames.push_back("June");
728    monthNames.push_back("July");
729    monthNames.push_back("August");
730    monthNames.push_back("September");
731    monthNames.push_back("October");
732    monthNames.push_back("November");
733    monthNames.push_back("December");
734 
735    if(argc < 2) {
736       fprintf(stderr, "Usage: %s BibTeX_file {-export-to-bibtex=file} {-export-to-separate-bibtexs=prefix} {-export-to-xml=file} {-export-to-separate-xmls=prefix} {-export-to-custom=file} {-non-interactive} {-nbsp=string} {-linebreak=string} {-check-urls} {-only-check-new-urls} {-ignore-updates-for-html} {-add-url-command} {-skip-notes-with-isbn-and-issn} {-add-notes-with-isbn-and-issn} {-store-downloads=directory}\n", argv[0]);
737       exit(1);
738    }
739    for(int i = 2; i < argc; i++) {
740       if( strncmp(argv[i], "-export-to-bibtex=", 18) == 0 ) {
741          exportToBibTeX = (const char*)&argv[i][18];
742       }
743       else if( strncmp(argv[i], "-export-to-separate-bibtexs=", 28) == 0 ) {
744          exportToSeparateBibTeXs = (const char*)&argv[i][28];
745       }
746       else if( strncmp(argv[i], "-export-to-xml=", 15) == 0 ) {
747          exportToXML = (const char*)&argv[i][15];
748       }
749       else if( strncmp(argv[i], "-export-to-separate-xmls=", 25) == 0 ) {
750          exportToSeparateXMLs = (const char*)&argv[i][25];
751       }
752       else if( strncmp(argv[i], "-export-to-custom=", 18) == 0 ) {
753          exportToCustom = (const char*)&argv[i][18];
754       }
755       else if( strncmp(argv[i], "-store-downloads=", 17) == 0 ) {
756          downloadDirectory = (const char*)&argv[i][17];
757       }
758       else if( strncmp(argv[i], "-nbsp=", 5) == 0 ) {
759          nbsp = (const char*)&argv[i][5];
760       }
761       else if( strncmp(argv[i], "-linebreak=", 11) == 0 ) {
762          lineBreak = (const char*)&argv[i][11];
763       }
764       else if( strcmp(argv[i], "-non-interactive") == 0 ) {
765          interactive = false;
766       }
767       else if( strcmp(argv[i], "-check-urls") == 0 ) {
768          checkURLs = true;
769       }
770       else if( strcmp(argv[i], "-only-check-new-urls") == 0 ) {
771          checkNewURLsOnly = true;
772       }
773       else if( strcmp(argv[i], "-ignore-updates-for-html") == 0 ) {
774          ignoreUpdatesForHTML = true;
775       }
776       else if( strcmp(argv[i], "-add-url-command") == 0 ) {
777          addUrlCommand = true;
778       }
779       else if( strcmp(argv[i], "-skip-notes-with-isbn-and-issn") == 0 ) {
780          skipNotesWithISBNandISSN = true;
781       }
782       else if( strcmp(argv[i], "-add-notes-with-isbn-and-issn") == 0 ) {
783          skipNotesWithISBNandISSN = true;   // Drop old ones, if there are any
784          addNotesWithISBNandISSN  = true;   // Compute new ones
785       }
786       else {
787          fprintf(stderr, "ERROR: Bad argument %s!\n", argv[i]);
788          exit(1);
789       }
790    }
791 
792    yyin = fopen(argv[1], "r");
793    if(yyin == NULL) {
794       fprintf(stderr, "ERROR: Unable to open BibTeX input file %s!\n", argv[1]);
795       exit(1);
796    }
797    int result = yyparse();
798    fclose(yyin);
799 
800    if(result == 0) {
801       PublicationSet publicationSet(countNodes(bibTeXFile));
802       if(!interactive) {
803          publicationSet.addAll(bibTeXFile);
804          if(checkURLs) {
805             result += checkAllURLs(&publicationSet, downloadDirectory, checkNewURLsOnly, ignoreUpdatesForHTML);
806          }
807 
808          // ====== Export all to BibTeX =====================================
809          if(exportToBibTeX) {
810             if(PublicationSet::exportPublicationSetToBibTeX(
811                &publicationSet, exportToBibTeX, false,
812                skipNotesWithISBNandISSN, addNotesWithISBNandISSN, addUrlCommand) == false) {
813                exit(1);
814             }
815          }
816          if(exportToSeparateBibTeXs) {
817             if(PublicationSet::exportPublicationSetToBibTeX(
818                &publicationSet, exportToSeparateBibTeXs, true,
819                skipNotesWithISBNandISSN, addNotesWithISBNandISSN, addUrlCommand) == false) {
820                exit(1);
821             }
822          }
823 
824          // ====== Export all to XML ========================================
825          if(exportToXML) {
826             if(PublicationSet::exportPublicationSetToXML(
827                &publicationSet, exportToXML, false) == false) {
828                exit(1);
829             }
830          }
831          if(exportToSeparateXMLs) {
832             if(PublicationSet::exportPublicationSetToXML(
833                &publicationSet, exportToSeparateXMLs, true) == false) {
834                exit(1);
835             }
836          }
837 
838          // ====== Export all to custom format ==============================
839          if(exportToCustom) {
840             if(PublicationSet::exportPublicationSetToCustom(
841                   &publicationSet, "%u",
842                   customPrintingHeader, customPrintingTrailer,
843                   customPrintingTemplate, monthNames,
844                   nbsp, lineBreak, useXMLStyle, downloadDirectory,
845                   stdout) == false) {
846                exit(1);
847             }
848          }
849       }
850       else {
851          fprintf(stderr, "Got %u publications from BibTeX file.\n",
852                  (unsigned int)publicationSet.maxSize());
853          result = handleInput(stdin, publicationSet,
854                               downloadDirectory, checkURLs, checkNewURLsOnly, ignoreUpdatesForHTML,
855                               exportToBibTeX, exportToSeparateBibTeXs,
856                               exportToXML, exportToSeparateXMLs,
857                               skipNotesWithISBNandISSN, addNotesWithISBNandISSN,
858                               addUrlCommand);
859          fprintf(stderr, "Done. %u errors have occurred.\n", result);
860       }
861    }
862    if(bibTeXFile) {
863       freeNode(bibTeXFile);
864       bibTeXFile = NULL;
865    }
866 
867    return result;
868 }
869