1 /*
2  * BibTeX Converter
3  * Copyright (C) 2010-2021 by Thomas Dreibholz
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9 
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Contact: dreibh@iem.uni-due.de
19  */
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <assert.h>
25 #include <set>
26 
27 #include "node.h"
28 #include "unification.h"
29 #include "stringhandling.h"
30 
31 
32 // ###### Allocate node #####################################################
createNode(const char * label)33 static Node* createNode(const char* label)
34 {
35    Node* node = new Node;
36    if(node == NULL) {
37       yyerror("out of memory");
38    }
39    node->keyword  = label;
40    node->number   = 0;
41    node->prev     = NULL;
42    node->next     = NULL;
43    node->child    = NULL;
44    node->priority = 0;
45    return(node);
46 }
47 
48 
49 // ###### Free nodes ########################################################
freeNode(Node * node)50 void freeNode(Node* node)
51 {
52    Node* next;
53    Node* child;
54    Node* nextChild;
55 
56    while(node != NULL) {
57       next = node->next;
58       child = node->child;
59       while(child != NULL) {
60          nextChild = child->next;
61          delete child;
62          child = nextChild;
63       }
64       delete node;
65       node = next;
66    }
67 }
68 
69 
70 // ###### Count nodes in chain ##############################################
countNodes(const Node * node)71 size_t countNodes(const Node* node)
72 {
73    size_t count = 0;
74    while(node != NULL) {
75       count++;
76       node = node->next;
77    }
78    return(count);
79 }
80 
81 
82 // ###### Dump nodes ########################################################
dumpNode(Node * node)83 void dumpNode(Node* node)
84 {
85    Node* child;
86 
87    puts("---- DUMP ----");
88    do {
89       printf("[%s] %s:\n", node->value.c_str(), node->keyword.c_str());
90       child = node->child;
91       while(child != NULL) {
92          printf("\t%s = %s\n", child->keyword.c_str(), child->value.c_str());
93          child = child->next;
94       }
95       node = node->next;
96    } while(node != NULL);
97    puts("--------------");
98 }
99 
100 
101 // ###### Find node #########################################################
findNode(Node * node,const char * keyword)102 Node* findNode(Node* node, const char* keyword)
103 {
104    const std::string keywordToFind(keyword);
105 
106    while(node != NULL) {
107       if(node->keyword == keywordToFind) {
108          return(node);
109       }
110       node = node->next;
111    }
112    return(NULL);
113 }
114 
115 
116 // ###### Find child node ###################################################
findChildNode(Node * node,const char * childKeyword)117 Node* findChildNode(Node* node, const char* childKeyword)
118 {
119    Node*             child;
120    const std::string keywordToFind(childKeyword);
121 
122    child = node->child;
123    while(child != NULL) {
124       if(child->keyword == keywordToFind) {
125          return(child);
126       }
127       child = child->next;
128    }
129    return(NULL);
130 }
131 
132 
133 // ###### Count child nodes #################################################
countChildNodes(const Node * node,const char * childKeyword)134 size_t countChildNodes(const Node* node, const char* childKeyword)
135 {
136    const Node*       child;
137    const std::string keywordToFind(childKeyword);
138    size_t            count = 0;
139 
140    child = node->child;
141    while(child != NULL) {
142       if(child->keyword == keywordToFind) {
143          count++;
144       }
145       child = child->next;
146    }
147    return(count);
148 }
149 
150 
151 // ###### Make publication collection #######################################
makePublicationCollection(Node * node1,Node * node2)152 Node* makePublicationCollection(Node* node1, Node* node2)
153 {
154    // ====== If there is already an existing node, clear and use it =========
155    Node* n = node2;
156    while(n != NULL) {
157       if(n->keyword == node1->keyword) {
158          // fprintf(stderr, "NOTE: Duplicate: %s\n", n->keyword.c_str());
159 
160          const Node* oldTitle = findChildNode(node1, "title");
161          Node*       newTitle = findChildNode(n, "title");
162          if( (oldTitle != NULL) && (newTitle != NULL) && (oldTitle->value != newTitle->value) ) {
163             fprintf(stderr, "NOTE: Keeping old title:\nOld = \"%s\"\nNew = \"%s\"\n",
164                     oldTitle->value.c_str(),
165                     newTitle->value.c_str());
166             newTitle->value = oldTitle->value;
167          }
168 
169          // node1 is old. Remove its contents, but reuse it for newer data.
170          freeNode(node1->child);
171          node1->child = n->child;
172          n->child     = NULL;
173 
174          // Get rid of old node n.
175          if(n->prev) {
176             n->prev->next = n->next;
177          }
178          if(n->next) {
179             n->next->prev = n->prev;
180          }
181          delete n;
182          break;
183       }
184       n = n->next;
185    }
186 
187    // ====== Add a new node =================================================
188    node2->prev = node1;
189    node1->next = node2;
190    return(node1);
191 }
192 
193 
194 // ###### Node comparison function ##########################################
nodeComparisonFunction(const void * node1ptr,const void * node2ptr)195 int nodeComparisonFunction(const void* node1ptr, const void* node2ptr)
196 {
197    const Node* node1 = *((Node**)node1ptr);
198    const Node* node2 = *((Node**)node2ptr);
199    if(node1->priority > node2->priority) {
200       return(-1);
201    }
202    else if(node1->priority < node2->priority) {
203       return(1);
204    }
205    if(node1->keyword < node2->keyword) {
206       return(-1);
207    }
208    else if(node1->keyword > node2->keyword) {
209       return(1);
210    }
211    return(0);
212 }
213 
214 
215 // ###### Sort children of node #############################################
sortChildren(Node * node)216 static void sortChildren(Node* node)
217 {
218    Node* child = node->child;
219    if(child) {
220       const size_t children = countNodes(child);
221       Node*        sortedChildrenSet[children];
222       size_t       i = 0;
223       while(child != NULL) {
224          sortedChildrenSet[i++] = child;
225          child = child->next;
226       }
227 
228       qsort((void*)&sortedChildrenSet[0], children, sizeof(sortedChildrenSet[0]), nodeComparisonFunction);
229 
230       for(i = 0; i < children; i++) {
231          if(i < children - 1) {
232             sortedChildrenSet[i]->next = sortedChildrenSet[i + 1];
233          }
234          else {
235             sortedChildrenSet[i]->next = NULL;
236          }
237          if(i > 0) {
238             sortedChildrenSet[i]->prev = sortedChildrenSet[i - 1];
239          }
240          else {
241             sortedChildrenSet[i]->prev = NULL;
242          }
243       }
244       node->child = sortedChildrenSet[0];
245    }
246 }
247 
248 
249 // ###### Find existing or create new child node ############################
addOrUpdateChildNode(Node * node,const char * childKeyword,const char * value)250 Node* addOrUpdateChildNode(Node* node, const char* childKeyword, const char* value)
251 {
252    Node* child = findChildNode(node, childKeyword);
253    if(child == NULL) {
254       child = makePublicationInfoItem(childKeyword, value);
255       assert(child != NULL);
256       child->next = node->child;
257       node->child = child;
258       sortChildren(node);
259    }
260    else {
261       child->value = value;
262    }
263    return(child);
264 }
265 
266 
267 // ###### Check number of occurrences for a field ###########################
requiresField(const Node * publication,const char * field,const size_t minimum,const size_t maximum)268 static bool requiresField(const Node* publication,
269                           const char* field,
270                           const size_t minimum,
271                           const size_t maximum)
272 {
273    const size_t count = countChildNodes(publication, field);
274    if(count < minimum) {
275       fprintf(stderr, "WARNING: Entry %s has no \"%s\" section!\n",
276               publication->keyword.c_str(),
277               field);
278       return(false);
279    }
280    else if(count > maximum) {
281       fprintf(stderr, "WARNING: Entry %s has %u \"%s\" sections!\n",
282               publication->keyword.c_str(),
283               (unsigned int)count, field);
284       return(false);
285    }
286    return(true);
287 }
288 
289 
290 // ###### Make publication ##################################################
makePublication(const char * type,const char * label,Node * publicationInfo)291 Node* makePublication(const char* type, const char* label, Node* publicationInfo)
292 {
293    Node* publication = createNode(label);
294    publication->child = publicationInfo;
295    publication->value = type;
296 
297    sortChildren(publication);
298 
299    if(publication->value != "Comment") {
300       requiresField(publication, "title",        1, 1);
301       requiresField(publication, "author",       1, 1);
302       requiresField(publication, "year",         1, 1);
303       requiresField(publication, "isbn",         0, 1);
304       requiresField(publication, "issn",         0, 1);
305       requiresField(publication, "doi",          0, 1);
306       requiresField(publication, "url",          0, 1);
307       requiresField(publication, "url.size",     0, 1);
308       requiresField(publication, "url.mime",     0, 1);
309       requiresField(publication, "url.md5",      0, 1);
310       requiresField(publication, "url.checked",  0, 1);
311       requiresField(publication, "urn",          0, 1);
312       requiresField(publication, "pages",        0, 1);
313       requiresField(publication, "numpages",     0, 1);
314       requiresField(publication, "day",          0, 1);
315       requiresField(publication, "month",        0, 1);
316       requiresField(publication, "address",      0, 1);
317       requiresField(publication, "location",     0, 1);
318       requiresField(publication, "note",         0, 1);
319       requiresField(publication, "howpublished", 0, 1);
320       requiresField(publication, "publisher",    0, 1);
321       requiresField(publication, "school",       0, 1);
322       requiresField(publication, "institution",  0, 1);
323       requiresField(publication, "type",         0, 1);
324       requiresField(publication, "number",       0, 1);
325       requiresField(publication, "issue",        0, 1);
326       requiresField(publication, "volume",       0, 1);
327       requiresField(publication, "abstract",     0, 1);
328       requiresField(publication, "keywords",     0, 1);
329       if(publication->value == "Article") {
330          requiresField(publication, "journal", 1, 1);
331       }
332       else if(publication->value == "Book") {
333          requiresField(publication, "publisher", 1, 1);
334       }
335       else if(publication->value == "InProceedings") {
336          requiresField(publication, "booktitle", 1, 1);
337       }
338       else if(publication->value == "TechReport") {
339          requiresField(publication, "institution", 1, 1);
340       }
341 
342       Node* author = findChildNode(publication, "author");
343       if(author != NULL) {
344          unifyAuthor(publication, author);
345       }
346       else {
347          fprintf(stderr, "WARNING: Entry %s has no \"author\" section!\n" , label);
348       }
349 
350       Node* booktitle = findChildNode(publication, "booktitle");
351       if(booktitle != NULL) {
352          unifyBookTitle(publication, booktitle);
353       }
354       Node* howPublished = findChildNode(publication, "howPublished");
355       if(howPublished != NULL) {
356          unifyBookTitle(publication, howPublished);
357       }
358       Node* journal = findChildNode(publication, "journal");
359       if(journal != NULL) {
360          unifyBookTitle(publication, journal);   // Same as for booktitle!
361       }
362       Node* pages = findChildNode(publication, "pages");
363       if(pages != NULL) {
364          unifyPages(publication, pages);
365       }
366       Node* numpages = findChildNode(publication, "numpages");
367       if(numpages != NULL) {
368          unifyNumPages(publication, numpages);
369       }
370 
371       Node* isbn = findChildNode(publication, "isbn");
372       if(isbn != NULL) {
373          unifyISBN(publication, isbn);
374       }
375       Node* issn = findChildNode(publication, "issn");
376       if(issn != NULL) {
377          unifyISSN(publication, issn);
378       }
379 
380       Node* year  = findChildNode(publication, "year");
381       Node* month = findChildNode(publication, "month");
382       Node* day   = findChildNode(publication, "day");
383       if( (year != NULL) || (month != NULL) || (day != NULL) ) {
384          unifyDate(publication, year, month, day);
385       }
386 
387       Node* url = findChildNode(publication, "url");
388       if(url != NULL) {
389          unifyURL(publication, url);
390       }
391    }
392 
393    return(publication);
394 }
395 
396 
397 // ###### Make publication info #############################################
makePublicationInfo(Node * node1,Node * node2)398 Node* makePublicationInfo(Node* node1, Node* node2)
399 {
400    if(node1 != NULL) {
401       node2->prev = node1;
402       node1->next = node2;
403       return(node1);
404    }
405    else {
406       return(node2);
407    }
408 }
409 
410 
411 // ###### Make publication info item ########################################
makePublicationInfoItem(const char * keyword,const char * value)412 Node* makePublicationInfoItem(const char* keyword, const char* value)
413 {
414    Node*        node          = createNode("PublicationInfoItem");
415    const size_t keywordLength = strlen(keyword);
416    char         keywordString[keywordLength + 1];
417    size_t       i;
418 
419    // ====== Create new entry ===============================================
420    for(i = 0;i < keywordLength;i++) {
421       keywordString[i] = tolower(keyword[i]);
422    }
423    keywordString[keywordLength] = 0x00;
424 
425    node->keyword = keywordString;
426    node->value   = value;
427    if( (node->keyword != "author") ) {   // Brackets must remain for author string!
428       removeBrackets(node->value);
429       trim(node->value);
430    }
431 
432    if(node->value == "") {   // Empty content -> This item is useless
433       node->keyword = "removeme";
434    }
435 
436    // ====== Set priorities for well-known keyword fields ===================
437    if(node->keyword == "author") {
438       node->priority = 255;
439    }
440    else if(node->keyword == "title") {
441       node->priority = 254;
442    }
443 
444    else if(node->keyword == "howpublished") {
445       node->priority = 252;
446    }
447    else if(node->keyword == "booktitle") {
448       node->priority = 251;
449    }
450    else if(node->keyword == "series") {
451       node->priority = 250;
452    }
453    else if(node->keyword == "journal") {
454       node->priority = 249;
455    }
456    else if(node->keyword == "type") {
457       node->priority = 248;
458    }
459    else if(node->keyword == "volume") {
460       node->priority = 247;
461    }
462    else if(node->keyword == "issue") {
463       node->priority = 246;
464    }
465    else if(node->keyword == "number") {
466       node->priority = 245;
467    }
468    else if(node->keyword == "edition") {
469       node->priority = 244;
470    }
471    else if(node->keyword == "editor") {
472       node->priority = 243;
473    }
474    else if(node->keyword == "pages") {
475       node->priority = 242;
476    }
477    else if(node->keyword == "numpages") {
478       node->priority = 241;
479    }
480 
481    else if(node->keyword == "day") {
482       node->priority = 239;
483    }
484    else if(node->keyword == "month") {
485       node->priority = 238;
486    }
487    else if(node->keyword == "year") {
488       node->priority = 237;
489    }
490 
491    else if(node->keyword == "organization") {
492       node->priority = 235;
493    }
494    else if(node->keyword == "school") {
495       node->priority = 234;
496    }
497    else if(node->keyword == "institution") {
498       node->priority = 233;
499    }
500    else if(node->keyword == "location") {
501       node->priority = 232;
502    }
503    else if(node->keyword == "publisher") {
504       node->priority = 231;
505    }
506    else if(node->keyword == "address") {
507       node->priority = 230;
508    }
509 
510    else if(node->keyword == "language") {
511       node->priority = 226;
512    }
513    else if(node->keyword == "content-language") {
514       node->priority = 225;
515    }
516    else if(node->keyword == "isbn") {
517       node->priority = 224;
518    }
519    else if(node->keyword == "issn") {
520       node->priority = 223;
521    }
522    else if(node->keyword == "urn") {
523       node->priority = 222;
524    }
525    else if(node->keyword == "doi") {
526       node->priority = 221;
527    }
528    else if(node->keyword == "note") {
529       node->priority = 220;
530    }
531 
532    else if(node->keyword == "keywords") {
533       node->priority = 211;
534    }
535    else if(node->keyword == "abstract") {
536       node->priority = 210;
537    }
538 
539    else if(node->keyword == "url") {
540       node->priority = 199;
541    }
542    else if(node->keyword == "url.size") {
543       node->priority = 198;
544    }
545    else if(node->keyword == "url.md5") {
546       node->priority = 197;
547    }
548    else if(node->keyword == "url.mime") {
549       node->priority = 196;
550    }
551    else if(node->keyword == "url.pagesize") {
552       node->priority = 195;
553    }
554    else if(node->keyword == "url.checked") {
555       node->priority = 194;
556    }
557    else if(node->keyword == "url.keywords") {
558       node->priority = 193;
559    }
560 
561    else {
562       // printf("UNKNOWN=<%s>\n", node->keyword.c_str());
563    }
564 
565    return(node);
566 }
567