1 /*
2 * BibTeX Converter
3 * Copyright (C) 2010-2021 by Thomas Dreibholz
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * Contact: dreibh@iem.uni-due.de
19 */
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <string.h>
25 #include <assert.h>
26 #include <errno.h>
27 #include <iostream>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <curl/curl.h>
31 #include <curl/easy.h>
32 #include <openssl/md5.h>
33
34 #include "node.h"
35 #include "publicationset.h"
36 #include "stringhandling.h"
37
38
39 extern int yyparse();
40 extern FILE* yyin;
41 extern Node* bibTeXFile;
42
43
44 // ###### Get current timer #################################################
getMicroTime()45 unsigned long long getMicroTime()
46 {
47 struct timeval tv;
48 gettimeofday(&tv,NULL);
49 return(((unsigned long long)tv.tv_sec * (unsigned long long)1000000) +
50 (unsigned long long)tv.tv_usec);
51 }
52
53
54 // ###### Download file via libcurl #########################################
downloadFile(CURL * curl,const char * url,FILE * headerFH,FILE * downloadFH,unsigned int & errors)55 static bool downloadFile(CURL* curl,
56 const char* url,
57 FILE* headerFH,
58 FILE* downloadFH,
59 unsigned int& errors)
60 {
61 if( (ftruncate(fileno(headerFH), 0) != 0) ||
62 (ftruncate(fileno(downloadFH), 0) != 0) ) {
63 perror("Unable to truncate output files");
64 return(false);
65 }
66
67 curl_easy_setopt(curl, CURLOPT_URL, url);
68 curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
69 curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 1L);
70 curl_easy_setopt(curl, CURLOPT_WRITEDATA, downloadFH);
71 curl_easy_setopt(curl, CURLOPT_WRITEHEADER, headerFH);
72 curl_easy_setopt(curl, CURLOPT_USERAGENT, "bibtexconv/1.1 (AmigaOS; MC680x0)");
73 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); // follow redirects
74 curl_easy_setopt(curl, CURLOPT_AUTOREFERER, 1L); // set referer on redirect
75 curl_easy_setopt(curl, CURLOPT_COOKIEFILE, ""); // enable cookies
76 curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 30); // 30s connect timeout
77
78 bool resultIsGood = false;
79 const CURLcode result = curl_easy_perform(curl);
80 if(result == CURLE_OK) {
81 rewind(headerFH);
82 rewind(downloadFH);
83
84 // ====== Check HTTP result =========================
85 if( (strncmp(url, "http", 4)) == 0) {
86 unsigned httpErrorCode = 999;
87 char header[8192];
88 while(!feof(headerFH)) {
89 // The actual result will be of the last request
90 // in the header file (may have been redirected!)
91 if(!fgets((char*)&header, sizeof(header) - 1, headerFH)) {
92 break;
93 }
94 sscanf(header, "HTTP/%*[^ ] %u ", &httpErrorCode);
95 }
96 if(httpErrorCode == 200) {
97 resultIsGood = true;
98 }
99 if(httpErrorCode != 200) {
100 fprintf(stderr, "FAILED %s - HTTP returns code %u!\n",
101 url, httpErrorCode);
102
103 /*
104 rewind(headerFH);
105 size_t r = fread((char*)&header, 1, sizeof(header) - 1, headerFH);
106 if(r > 0) {
107 header[r] = 0x00;
108 fputs(header, stderr);
109 }
110 */
111
112 errors++;
113 }
114 }
115 }
116 else {
117 fprintf(stderr, "FAILED %s: %s!\n", url, curl_easy_strerror(result));
118 errors++;
119 }
120
121 return(resultIsGood);
122 }
123
124
125 // ###### Dynamic URL handling ##############################################
handleDynamicURL(CURL * curl,const std::string url,FILE * headerFH,FILE * downloadFH,unsigned int & errors)126 static bool handleDynamicURL(CURL* curl,
127 const std::string url,
128 FILE* headerFH,
129 FILE* downloadFH,
130 unsigned int& errors)
131 {
132 std::string rest;
133 std::string newURL = "";
134
135 // ====== IEEExplore database ============================================
136 if( (hasPrefix(url, "http://ieeexplore.ieee.org/", rest)) ||
137 (hasPrefix(url, "https://ieeexplore.ieee.org/", rest)) ) {
138 char buffer[65536];
139 size_t r = fread((char*)&buffer, 1, sizeof(buffer) - 1, downloadFH);
140 if((r > 0) && (r < sizeof(buffer) - 1)) {
141 buffer[r] = 0x00;
142
143 fprintf(stderr, "[IEEExplore");
144
145 const std::string inputString(buffer);
146 const size_t framePos = inputString.rfind("<frame src=\"");
147 // fputs(inputString.c_str(),stderr);
148 if(framePos != std::string::npos) {
149 const size_t a = inputString.find("\"", framePos);
150 const size_t b = inputString.find("\"", a + 1);
151 if( (a != std::string::npos) && (b != std::string::npos) ) {
152 newURL = inputString.substr(a + 1, b - a - 1);
153 fprintf(stderr, "->%s", newURL.c_str());
154 }
155 }
156
157 fprintf(stderr, "] ");
158 }
159 }
160
161 rewind(headerFH);
162 rewind(downloadFH);
163 if(newURL.size() > 0) {
164 // printf("NEW=<%s>\n", newURL.c_str());
165 return(downloadFile(curl, newURL.c_str(), headerFH, downloadFH, errors));
166 }
167
168 return(true);
169 }
170
171
172 // ###### Check URLs ########################################################
checkAllURLs(PublicationSet * publicationSet,const char * downloadDirectory,const bool checkNewURLsOnly,const bool ignoreUpdatesForHTML)173 unsigned int checkAllURLs(PublicationSet* publicationSet,
174 const char* downloadDirectory,
175 const bool checkNewURLsOnly,
176 const bool ignoreUpdatesForHTML)
177 {
178 if(downloadDirectory != NULL) {
179 if( (mkdir(downloadDirectory, S_IRWXU|S_IXGRP|S_IRGRP|S_IXOTH|S_IROTH) < 0) &&
180 (errno != EEXIST) ) {
181 fprintf(stderr, "ERROR: Failed to create download directory: %s!\n",
182 strerror(errno));
183 exit(1);
184 }
185 }
186
187 CURL* curl = curl_easy_init();
188 if(curl == NULL) {
189 fputs("ERROR: Failed to initialize libcurl!\n", stderr);
190 exit(1);
191 }
192
193 unsigned int errors = 0;
194 for(size_t index = 0; index < publicationSet->size(); index++) {
195 // ====== Get prev, current and next publications =====================
196 if(publicationSet->get(index)->value == "Comment") {
197 continue;
198 }
199 Node* publication = publicationSet->get(index);
200 Node* url = findChildNode(publication, "url");
201 if(url != NULL) {
202 const Node* urlSize = findChildNode(publication, "url.size");
203 const Node* urlMime = findChildNode(publication, "url.mime");
204 const Node* urlChecked = findChildNode(publication, "url.checked");
205 if( (urlSize != NULL) && (urlMime != NULL) && (urlChecked != NULL) ) {
206 if(downloadDirectory != NULL) {
207 const std::string downloadFileName =
208 PublicationSet::makeDownloadFileName(downloadDirectory,
209 publication->keyword,
210 urlMime->value);
211 FILE* downloadFH = fopen(downloadFileName.c_str(), "rb");
212 if(downloadFH != NULL) {
213 fclose(downloadFH);
214 fprintf(stderr, "Skipping URL of %s (already available as %s).\n",
215 publication->keyword.c_str(),
216 downloadFileName.c_str());
217 continue;
218 }
219 }
220 else if(checkNewURLsOnly == true) {
221 fprintf(stderr, "Skipping URL of %s (not a new entry).\n", publication->keyword.c_str());
222 continue;
223 }
224 }
225
226 fprintf(stderr, "Checking URL of %s ... ", publication->keyword.c_str());
227
228 char downloadFileName[256];
229 char mimeFileName[256];
230 char metaFileName[256];
231 if(downloadDirectory != NULL) {
232 snprintf((char*)&downloadFileName, sizeof(downloadFileName), "%s/%s", downloadDirectory, "/bibtexconv-dXXXXXX");
233 }
234 else {
235 snprintf((char*)&downloadFileName, sizeof(downloadFileName), "%s", "/tmp/bibtexconv-dXXXXXX");
236 }
237 snprintf((char*)&mimeFileName, sizeof(mimeFileName), "%s", "/tmp/bibtexconv-mXXXXXX");
238 snprintf((char*)&metaFileName, sizeof(metaFileName), "%s", "/tmp/bibtexconv-pXXXXXX");
239
240 const int dfd = mkstemp((char*)&downloadFileName);
241 const int mfd = mkstemp((char*)&mimeFileName);
242 if( (dfd > 0) && (mfd > 0) ) {
243 FILE* downloadFH = fopen(downloadFileName, "w+b");
244 if(downloadFH != NULL) {
245 FILE* headerFH = tmpfile();
246 if(headerFH != NULL) {
247 bool resultIsGood = downloadFile(curl, url->value.c_str(), headerFH, downloadFH, errors);
248 if(resultIsGood) {
249 // Special handling for dynamic URLs of some publishers
250 resultIsGood = handleDynamicURL(curl, url->value, headerFH, downloadFH, errors);
251 }
252 if(resultIsGood) {
253 unsigned long long totalSize = 0;
254 unsigned char md5[MD5_DIGEST_LENGTH];
255 MD5_CTX md5_ctx;
256 MD5_Init(&md5_ctx);
257
258 // ====== Compute size and MD5 =========================
259 while(!feof(downloadFH)) {
260 char input[16384];
261 const size_t bytesRead = fread(&input, 1, sizeof(input), downloadFH);
262 if(bytesRead > 0) {
263 totalSize += (unsigned long long)bytesRead;
264 MD5_Update(&md5_ctx, &input, bytesRead);
265 }
266 }
267
268 if(totalSize > 0) {
269 // ====== Compute mime type (using "file") =======
270 std::string mimeString;
271 std::string command = format("/usr/bin/file --mime-type -b %s >%s", downloadFileName, mimeFileName);
272 if(system(command.c_str()) == 0) {
273 FILE* mimeFH = fopen(mimeFileName, "r");
274 if(mimeFH != NULL) {
275 char input[1024];
276 if(fgets((char*)&input, sizeof(input) - 1, mimeFH) != NULL) {
277 mimeString = std::string(input);
278 if( (mimeString.size() > 0) &&
279 (mimeString[mimeString.size() - 1] == '\n') ) {
280 mimeString = mimeString.substr(0, mimeString.size() - 1);
281 }
282
283 // RFCs/I-Ds are sometimes misidentified as source code:
284 if( (mimeString == "text/x-pascal") ||
285 (mimeString == "text/x-c") ||
286 (mimeString == "text/x-c++") ) {
287 mimeString = "text/plain";
288 }
289 }
290 fclose(mimeFH);
291 }
292 }
293 else {
294 fprintf(stderr, "WARNING %s: failed to obtain mime type of download file!\n",
295 url->value.c_str());
296 }
297
298 // ====== Compare size, mime type and MD5 ===========
299 std::string sizeString = format("%llu", totalSize);
300 std::string md5String;
301 MD5_Final((unsigned char*)&md5, &md5_ctx);
302 for(unsigned int i = 0; i < MD5_DIGEST_LENGTH; i++) {
303 md5String += format("%02x", (unsigned int)md5[i]);
304 }
305 const Node* urlMimeNode = findChildNode(publication, "url.mime");
306 const Node* urlSizeNode = findChildNode(publication, "url.size");
307 const Node* urlMD5Node = findChildNode(publication, "url.md5");
308
309 bool failed = false;
310 if((urlMimeNode != NULL) && (urlMimeNode->value != mimeString)) {
311 if( (urlMimeNode->value == "text/html") &&
312 (mimeString == "application/pdf") ) {
313 fprintf(stderr, "\nNOTE: change from HTML to PDF -> just updating entry\n");
314 urlSizeNode = NULL;
315 urlMD5Node = NULL;
316 }
317 else {
318 fprintf(stderr, "UPDATED %s: old mime type has been %s, new type mime is %s\n",
319 url->value.c_str(),
320 urlMimeNode->value.c_str(), mimeString.c_str());
321 }
322 }
323 if( (!failed) && (urlSizeNode != NULL) && (urlSizeNode->value != sizeString) ) {
324 if( (ignoreUpdatesForHTML == true) &&
325 ((urlMimeNode != NULL) &&
326 ((urlMimeNode->value == "text/html") ||
327 (urlMimeNode->value == "application/xml"))) ) {
328 md5String = "ignore";
329 fprintf(stderr, "[Size change for HTML/XML document -> setting url.md5=\"ignore\"] ");
330 }
331 else {
332 fprintf(stderr, "UPDATED %s: old size has been %s, new size is %s\n",
333 url->value.c_str(),
334 urlSizeNode->value.c_str(), sizeString.c_str());
335 }
336 }
337 if( (!failed) && (urlMD5Node != NULL) && (urlMD5Node->value != "ignore") &&
338 (urlMD5Node->value != md5String)) {
339 if( (ignoreUpdatesForHTML == true) &&
340 ((urlMimeNode != NULL) &&
341 ((urlMimeNode->value == "text/html") ||
342 (urlMimeNode->value == "application/xml"))) ) {
343 md5String = "ignore";
344 fprintf(stderr, "[MD5 change for HTML/XML document -> setting url.md5=\"ignore\"] ");
345 }
346 else {
347 fprintf(stderr, "UPDATED %s: old MD5 has been %s, new MD5 is %s\n",
348 url->value.c_str(),
349 urlMD5Node->value.c_str(), md5String.c_str());
350 }
351 }
352
353 // ====== Check PDF metadata ========================
354 if(mimeString == "application/pdf") {
355 std::string command = format("/usr/bin/pdfinfo %s >%s", downloadFileName, metaFileName);
356 if(system(command.c_str()) == 0) {
357 FILE* metaFH = fopen(metaFileName, "r");
358 if(metaFH != NULL) {
359 while(!feof(metaFH)) {
360 char input[1024];
361 if(fgets((char*)&input, sizeof(input) - 1, metaFH) != NULL) {
362 // printf("IN=%s",input);
363 if(strncmp(input, "Pages: ", 16) == 0) {
364 addOrUpdateChildNode(publication, "numpages", format("%u", atol((const char*)&input[16])).c_str());
365 }
366 else if(strncmp(input, "Keywords: ", 16) == 0) {
367 Node* keywords = findChildNode(publication, "keywords");
368 if(keywords == NULL) {
369 // If there are no "keywords", add "url.keywords".
370 // They can be renamed manually after a check.
371 addOrUpdateChildNode(publication, "url.keywords",
372 string2utf8(std::string((const char*)&input[16]), "~", "").c_str());
373 }
374 }
375 else if(strncmp(input, "Page size: ", 16) == 0) {
376 addOrUpdateChildNode(publication, "url.pagesize",
377 string2utf8(std::string((const char*)&input[16]), "~", "").c_str());
378 }
379 }
380 }
381 fclose(metaFH);
382 }
383 }
384
385 }
386
387 // ====== Update metadata ===========================
388 if(!failed) {
389 // ====== Update size, mime type and MD5 =========
390 addOrUpdateChildNode(publication, "url.size", sizeString.c_str());
391 addOrUpdateChildNode(publication, "url.mime", mimeString.c_str());
392 if( (urlMD5Node == NULL) || (urlMD5Node->value != "ignore")) {
393 addOrUpdateChildNode(publication, "url.md5", md5String.c_str());
394 }
395
396 // ====== Update check time ======================
397 const unsigned long long microTime = getMicroTime();
398 const time_t timeStamp = microTime / 1000000;
399 const tm* timeptr = localtime(&timeStamp);
400 char checkTime[128];
401 strftime((char*)&checkTime, sizeof(checkTime), "%Y-%m-%d %H:%M:%S %Z", timeptr);
402 addOrUpdateChildNode(publication, "url.checked", checkTime);
403
404 fprintf(stderr, "OK: size=%sB;\ttype=%s;\tMD5=%s\n",
405 sizeString.c_str(), mimeString.c_str(), md5String.c_str());
406
407 // ====== Move downloaded file ===================
408 if(downloadDirectory != NULL) {
409 fclose(downloadFH);
410 downloadFH = NULL;
411 const std::string newFileName =
412 PublicationSet::makeDownloadFileName(downloadDirectory, publication->keyword, mimeString);
413 if(rename(downloadFileName, newFileName.c_str()) < 0) {
414 unlink(downloadFileName);
415 fprintf(stderr, "\nFAILED to store download file %s: %s!\n",
416 newFileName.c_str(), strerror(errno));
417 errors++;
418 }
419 }
420 }
421 }
422 else {
423 fprintf(stderr, "\nFAILED %s: size is zero!\n", url->value.c_str());
424 errors++;
425 }
426 }
427 fclose(headerFH);
428 headerFH = NULL;
429 }
430 else {
431 fputs("ERROR: Failed to create temporary header file!\n", stderr);
432 errors++;
433 }
434 if(downloadFH != NULL) {
435 fclose(downloadFH);
436 downloadFH = NULL;
437 unlink(downloadFileName);
438 }
439 unlink(mimeFileName);
440 }
441 else {
442 fputs("ERROR: Failed to create temporary download file!\n", stderr);
443 errors++;
444 }
445 }
446 else {
447 fputs("ERROR: Failed to create temporary file name!\n", stderr);
448 errors++;
449 }
450 if(dfd >= 0) {
451 close(dfd);
452 }
453 if(mfd >= 0) {
454 close(mfd);
455 }
456 }
457 }
458
459 curl_easy_cleanup(curl);
460 curl = NULL;
461
462 return(errors);
463 }
464
465
466 // ###### Handle interactive input ##########################################
467 static bool useXMLStyle = false;
468 static std::string nbsp = " ";
469 static std::string lineBreak = "\n";
470 static std::string customPrintingHeader = "";
471 static std::string customPrintingTrailer = "";
472 static std::string customPrintingTemplate =
473 "\\[%C\\] %L\n %a\tAUTHOR: [[%fFIRST|%lLAST|%nNOT-FIRST]: initials=%g given=%G full=%F]\n%A\n"; // ", \"%T\"[, %B][, %J][, %?][, %$][, Volume~%V][, Number~%N][, pp.~%P][, %I][, %i][, %@][, [[%m, %D, |%m~]%Y].\\nURL: %U.\\n\\n";
474 static std::vector<std::string> monthNames;
475
handleInput(FILE * fh,PublicationSet & publicationSet,const char * downloadDirectory,const bool checkURLs,const bool checkNewURLsOnly,const bool ignoreUpdatesForHTML,const char * exportToBibTeX,const char * exportToSeparateBibTeXs,const char * exportToXML,const char * exportToSeparateXMLs,const bool skipNotesWithISBNandISSN,const bool addNotesWithISBNandISSN,const bool addUrlCommand,unsigned int recursionLevel=0)476 static int handleInput(FILE* fh,
477 PublicationSet& publicationSet,
478 const char* downloadDirectory,
479 const bool checkURLs,
480 const bool checkNewURLsOnly,
481 const bool ignoreUpdatesForHTML,
482 const char* exportToBibTeX,
483 const char* exportToSeparateBibTeXs,
484 const char* exportToXML,
485 const char* exportToSeparateXMLs,
486 const bool skipNotesWithISBNandISSN,
487 const bool addNotesWithISBNandISSN,
488 const bool addUrlCommand,
489 unsigned int recursionLevel = 0)
490 {
491 int result = 0;
492 while(!feof(fh)) {
493 char input[65536];
494 if(fgets((char*)&input, sizeof(input), fh)) {
495 // ====== Remove newline =====================================
496 const size_t length = strlen(input);
497 if(length > 0) {
498 input[length - 1] = 0x00;
499 }
500
501 // ====== Handle commands ====================================
502 if(input[0] == 0x00) {
503 // Empty line
504 }
505 else if(input[0] == '#') {
506 // Comment
507 }
508 else if(strncmp(input, "citeAll", 7) == 0) {
509 publicationSet.addAll(bibTeXFile);
510 }
511 else if(strncmp(input, "cite ", 5) == 0) {
512 std::string arguments = (const char*)&input[5];
513 const std::string keyword = extractToken(trim(arguments), " \t");
514 const std::string anchor = extractToken(trim(arguments), " \t");
515 Node* publication = findNode(bibTeXFile, keyword.c_str());
516 if(publication) {
517 if(anchor.size() > 0) {
518 publication->anchor = anchor;
519 }
520 else {
521 char number[16];
522 snprintf((char*)&number, sizeof(number), "%u",
523 (unsigned int)publicationSet.size());
524 publication->anchor = number;
525 }
526 if(!publicationSet.add(publication)) {
527 fprintf(stderr, "ERROR: Publication '%s' has already been added!\n",
528 (const char*)&input[5]);
529 result++;
530 }
531 for(size_t i = 0; i < NODE_CUSTOM_ENTRIES; i++) {
532 publication->custom[i] = extractToken(trim(arguments), " \t");
533 }
534 }
535 else {
536 fprintf(stderr, "ERROR: Publication '%s' not found!\n", keyword.c_str());
537 result++;
538 }
539 }
540 else if((strncmp(input, "sort ", 5)) == 0) {
541 const size_t maxSortLevels = 8;
542 std::string sortKey[maxSortLevels];
543 bool sortAscending[maxSortLevels];
544 std::string arguments = (const char*)&input[5];
545 size_t sortLevels = 0;
546 for(size_t i = 0; i < maxSortLevels; i++) {
547 bool isAscending = true;
548 std::string token = extractToken(trim(arguments), " \t");
549 const size_t slash = token.find('/');
550 if(slash != std::string::npos) {
551 const std::string order = token.substr(slash + 1, token.size() - slash - 1);
552 token = token.substr(0, slash);
553 if( (order == "ascending") || (order == "A") ) {
554 isAscending = true;
555 }
556 else if( (order == "descending") || (order == "D") ) {
557 isAscending = false;
558 }
559 else {
560 fprintf(stderr, "ERROR: Bad sorting order '%s' for key '%s'!\n",
561 order.c_str(), token.c_str());
562 result++;
563 break;
564 }
565 }
566 if(token != "") {
567 sortKey[sortLevels] = token;
568 sortAscending[sortLevels] = isAscending;
569 sortLevels++;
570 }
571 }
572 publicationSet.sort((const std::string*)&sortKey,
573 (const bool*)&sortAscending,
574 sortLevels);
575 }
576 else if((strncmp(input, "export", 5)) == 0) {
577 if(checkURLs) {
578 result += checkAllURLs(&publicationSet, downloadDirectory, checkNewURLsOnly, ignoreUpdatesForHTML);
579 }
580 const char* namingTemplate = "%u";
581 if(input[6] == ' ') {
582 namingTemplate = (const char*)&input[7];
583 }
584
585 // ====== Export all to custom ==================================
586 if(PublicationSet::exportPublicationSetToCustom(
587 &publicationSet, namingTemplate,
588 customPrintingHeader, customPrintingTrailer,
589 customPrintingTemplate, monthNames, nbsp, lineBreak, useXMLStyle,
590 downloadDirectory, stdout) == false) {
591 result++;
592 }
593
594 // ====== Export all to BibTeX ==================================
595 if(exportToBibTeX) {
596 if(PublicationSet::exportPublicationSetToBibTeX(
597 &publicationSet, exportToBibTeX, false,
598 skipNotesWithISBNandISSN, addNotesWithISBNandISSN, addUrlCommand) == false) {
599 exit(1);
600 }
601 }
602 if(exportToSeparateBibTeXs) {
603 if(PublicationSet::exportPublicationSetToBibTeX(
604 &publicationSet, exportToSeparateBibTeXs, true,
605 skipNotesWithISBNandISSN, addNotesWithISBNandISSN, addUrlCommand) == false) {
606 exit(1);
607 }
608 }
609
610 // ====== Export all to XML =====================================
611 if(exportToXML) {
612 if(PublicationSet::exportPublicationSetToXML(
613 &publicationSet, exportToXML, false) == false) {
614 exit(1);
615 }
616 }
617 if(exportToSeparateXMLs) {
618 if(PublicationSet::exportPublicationSetToXML(
619 &publicationSet, exportToSeparateXMLs, true) == false) {
620 exit(1);
621 }
622 }
623 }
624 else if((strncmp(input, "clear", 5)) == 0) {
625 publicationSet.clearAll();
626 }
627 else if((strncmp(input, "echo ", 5)) == 0) {
628 fputs(processBackslash(std::string((const char*)&input[5])).c_str(), stdout);
629 }
630 else if((strncmp(input, "header ", 7)) == 0) {
631 customPrintingHeader = (const char*)&input[7];
632 }
633 else if((strncmp(input, "trailer ", 8)) == 0) {
634 customPrintingTrailer = (const char*)&input[8];
635 }
636 else if((strncmp(input, "nbsp ", 5)) == 0) {
637 nbsp = (const char*)&input[5];
638 }
639 else if((strncmp(input, "linebreak ", 10)) == 0) {
640 lineBreak = (const char*)&input[10];
641 }
642 else if((strncmp(input, "utf8Style", 8)) == 0) {
643 useXMLStyle = false;
644 }
645 else if((strncmp(input, "xmlStyle", 8)) == 0) {
646 useXMLStyle = true;
647 }
648 else if((strncmp(input, "templatenew", 11)) == 0) {
649 customPrintingTemplate = "";
650 }
651 else if((strncmp(input, "template ", 9)) == 0) {
652 customPrintingTemplate = (const char*)&input[9];
653 }
654 else if((strncmp(input, "template+ ", 10)) == 0) {
655 customPrintingTemplate += (const char*)&input[10];
656 }
657 else if((strncmp(input, "include ", 8)) == 0) {
658 if(recursionLevel <= 9) {
659 const char* includeFileName = (const char*)&input[8];
660 FILE* includeFH = fopen(includeFileName, "r");
661 if(includeFH != NULL) {
662 result += handleInput(includeFH, publicationSet,
663 downloadDirectory, checkURLs, checkNewURLsOnly, ignoreUpdatesForHTML,
664 exportToBibTeX, exportToSeparateBibTeXs,
665 exportToXML, exportToSeparateXMLs,
666 skipNotesWithISBNandISSN, addNotesWithISBNandISSN,
667 addUrlCommand,
668 recursionLevel + 1);
669 fclose(includeFH);
670 }
671 else {
672 fprintf(stderr, "ERROR: Unable to open include file '%s'!\n", includeFileName);
673 exit(1);
674 }
675 }
676 else {
677 fprintf(stderr, "ERROR: Include file nesting level limit reached!\n");
678 exit(1);
679 }
680 }
681 else if((strncmp(input, "monthNames ", 11)) == 0) {
682 std::string s = (const char*)&input[11];
683 unsigned int i = 0;
684 while(s != "") {
685 const std::string token = extractToken(trim(s), " ");
686 monthNames[i] = token;
687 if(i > 11) {
688 fputs("ERROR: There are only 12 month names possible in monthNames!\n", stderr);
689 exit(1);
690 }
691 i++;
692 }
693 }
694 else {
695 fprintf(stderr, "ERROR: Bad command '%s'!\n", input);
696 exit(1);
697 }
698 }
699 }
700 return(result);
701 }
702
703
704
705 // ###### Main program ######################################################
main(int argc,char ** argv)706 int main(int argc, char** argv)
707 {
708 bool interactive = true;
709 bool checkURLs = false;
710 bool checkNewURLsOnly = false;
711 bool ignoreUpdatesForHTML = false;
712 bool skipNotesWithISBNandISSN = false;
713 bool addNotesWithISBNandISSN = false;
714 bool addUrlCommand = false;
715 const char* exportToBibTeX = NULL;
716 const char* exportToSeparateBibTeXs = NULL;
717 const char* exportToXML = NULL;
718 const char* exportToSeparateXMLs = NULL;
719 const char* exportToCustom = NULL;
720 const char* downloadDirectory = NULL;
721
722 monthNames.push_back("January");
723 monthNames.push_back("February");
724 monthNames.push_back("March");
725 monthNames.push_back("April");
726 monthNames.push_back("May");
727 monthNames.push_back("June");
728 monthNames.push_back("July");
729 monthNames.push_back("August");
730 monthNames.push_back("September");
731 monthNames.push_back("October");
732 monthNames.push_back("November");
733 monthNames.push_back("December");
734
735 if(argc < 2) {
736 fprintf(stderr, "Usage: %s BibTeX_file {-export-to-bibtex=file} {-export-to-separate-bibtexs=prefix} {-export-to-xml=file} {-export-to-separate-xmls=prefix} {-export-to-custom=file} {-non-interactive} {-nbsp=string} {-linebreak=string} {-check-urls} {-only-check-new-urls} {-ignore-updates-for-html} {-add-url-command} {-skip-notes-with-isbn-and-issn} {-add-notes-with-isbn-and-issn} {-store-downloads=directory}\n", argv[0]);
737 exit(1);
738 }
739 for(int i = 2; i < argc; i++) {
740 if( strncmp(argv[i], "-export-to-bibtex=", 18) == 0 ) {
741 exportToBibTeX = (const char*)&argv[i][18];
742 }
743 else if( strncmp(argv[i], "-export-to-separate-bibtexs=", 28) == 0 ) {
744 exportToSeparateBibTeXs = (const char*)&argv[i][28];
745 }
746 else if( strncmp(argv[i], "-export-to-xml=", 15) == 0 ) {
747 exportToXML = (const char*)&argv[i][15];
748 }
749 else if( strncmp(argv[i], "-export-to-separate-xmls=", 25) == 0 ) {
750 exportToSeparateXMLs = (const char*)&argv[i][25];
751 }
752 else if( strncmp(argv[i], "-export-to-custom=", 18) == 0 ) {
753 exportToCustom = (const char*)&argv[i][18];
754 }
755 else if( strncmp(argv[i], "-store-downloads=", 17) == 0 ) {
756 downloadDirectory = (const char*)&argv[i][17];
757 }
758 else if( strncmp(argv[i], "-nbsp=", 5) == 0 ) {
759 nbsp = (const char*)&argv[i][5];
760 }
761 else if( strncmp(argv[i], "-linebreak=", 11) == 0 ) {
762 lineBreak = (const char*)&argv[i][11];
763 }
764 else if( strcmp(argv[i], "-non-interactive") == 0 ) {
765 interactive = false;
766 }
767 else if( strcmp(argv[i], "-check-urls") == 0 ) {
768 checkURLs = true;
769 }
770 else if( strcmp(argv[i], "-only-check-new-urls") == 0 ) {
771 checkNewURLsOnly = true;
772 }
773 else if( strcmp(argv[i], "-ignore-updates-for-html") == 0 ) {
774 ignoreUpdatesForHTML = true;
775 }
776 else if( strcmp(argv[i], "-add-url-command") == 0 ) {
777 addUrlCommand = true;
778 }
779 else if( strcmp(argv[i], "-skip-notes-with-isbn-and-issn") == 0 ) {
780 skipNotesWithISBNandISSN = true;
781 }
782 else if( strcmp(argv[i], "-add-notes-with-isbn-and-issn") == 0 ) {
783 skipNotesWithISBNandISSN = true; // Drop old ones, if there are any
784 addNotesWithISBNandISSN = true; // Compute new ones
785 }
786 else {
787 fprintf(stderr, "ERROR: Bad argument %s!\n", argv[i]);
788 exit(1);
789 }
790 }
791
792 yyin = fopen(argv[1], "r");
793 if(yyin == NULL) {
794 fprintf(stderr, "ERROR: Unable to open BibTeX input file %s!\n", argv[1]);
795 exit(1);
796 }
797 int result = yyparse();
798 fclose(yyin);
799
800 if(result == 0) {
801 PublicationSet publicationSet(countNodes(bibTeXFile));
802 if(!interactive) {
803 publicationSet.addAll(bibTeXFile);
804 if(checkURLs) {
805 result += checkAllURLs(&publicationSet, downloadDirectory, checkNewURLsOnly, ignoreUpdatesForHTML);
806 }
807
808 // ====== Export all to BibTeX =====================================
809 if(exportToBibTeX) {
810 if(PublicationSet::exportPublicationSetToBibTeX(
811 &publicationSet, exportToBibTeX, false,
812 skipNotesWithISBNandISSN, addNotesWithISBNandISSN, addUrlCommand) == false) {
813 exit(1);
814 }
815 }
816 if(exportToSeparateBibTeXs) {
817 if(PublicationSet::exportPublicationSetToBibTeX(
818 &publicationSet, exportToSeparateBibTeXs, true,
819 skipNotesWithISBNandISSN, addNotesWithISBNandISSN, addUrlCommand) == false) {
820 exit(1);
821 }
822 }
823
824 // ====== Export all to XML ========================================
825 if(exportToXML) {
826 if(PublicationSet::exportPublicationSetToXML(
827 &publicationSet, exportToXML, false) == false) {
828 exit(1);
829 }
830 }
831 if(exportToSeparateXMLs) {
832 if(PublicationSet::exportPublicationSetToXML(
833 &publicationSet, exportToSeparateXMLs, true) == false) {
834 exit(1);
835 }
836 }
837
838 // ====== Export all to custom format ==============================
839 if(exportToCustom) {
840 if(PublicationSet::exportPublicationSetToCustom(
841 &publicationSet, "%u",
842 customPrintingHeader, customPrintingTrailer,
843 customPrintingTemplate, monthNames,
844 nbsp, lineBreak, useXMLStyle, downloadDirectory,
845 stdout) == false) {
846 exit(1);
847 }
848 }
849 }
850 else {
851 fprintf(stderr, "Got %u publications from BibTeX file.\n",
852 (unsigned int)publicationSet.maxSize());
853 result = handleInput(stdin, publicationSet,
854 downloadDirectory, checkURLs, checkNewURLsOnly, ignoreUpdatesForHTML,
855 exportToBibTeX, exportToSeparateBibTeXs,
856 exportToXML, exportToSeparateXMLs,
857 skipNotesWithISBNandISSN, addNotesWithISBNandISSN,
858 addUrlCommand);
859 fprintf(stderr, "Done. %u errors have occurred.\n", result);
860 }
861 }
862 if(bibTeXFile) {
863 freeNode(bibTeXFile);
864 bibTeXFile = NULL;
865 }
866
867 return result;
868 }
869