1 /*
2 * BibTeX Converter
3 * Copyright (C) 2010-2021 by Thomas Dreibholz
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * Contact: dreibh@iem.uni-due.de
19 */
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <assert.h>
25 #include <set>
26
27 #include "node.h"
28 #include "unification.h"
29 #include "stringhandling.h"
30
31
32 // ###### Allocate node #####################################################
createNode(const char * label)33 static Node* createNode(const char* label)
34 {
35 Node* node = new Node;
36 if(node == NULL) {
37 yyerror("out of memory");
38 }
39 node->keyword = label;
40 node->number = 0;
41 node->prev = NULL;
42 node->next = NULL;
43 node->child = NULL;
44 node->priority = 0;
45 return(node);
46 }
47
48
49 // ###### Free nodes ########################################################
freeNode(Node * node)50 void freeNode(Node* node)
51 {
52 Node* next;
53 Node* child;
54 Node* nextChild;
55
56 while(node != NULL) {
57 next = node->next;
58 child = node->child;
59 while(child != NULL) {
60 nextChild = child->next;
61 delete child;
62 child = nextChild;
63 }
64 delete node;
65 node = next;
66 }
67 }
68
69
70 // ###### Count nodes in chain ##############################################
countNodes(const Node * node)71 size_t countNodes(const Node* node)
72 {
73 size_t count = 0;
74 while(node != NULL) {
75 count++;
76 node = node->next;
77 }
78 return(count);
79 }
80
81
82 // ###### Dump nodes ########################################################
dumpNode(Node * node)83 void dumpNode(Node* node)
84 {
85 Node* child;
86
87 puts("---- DUMP ----");
88 do {
89 printf("[%s] %s:\n", node->value.c_str(), node->keyword.c_str());
90 child = node->child;
91 while(child != NULL) {
92 printf("\t%s = %s\n", child->keyword.c_str(), child->value.c_str());
93 child = child->next;
94 }
95 node = node->next;
96 } while(node != NULL);
97 puts("--------------");
98 }
99
100
101 // ###### Find node #########################################################
findNode(Node * node,const char * keyword)102 Node* findNode(Node* node, const char* keyword)
103 {
104 const std::string keywordToFind(keyword);
105
106 while(node != NULL) {
107 if(node->keyword == keywordToFind) {
108 return(node);
109 }
110 node = node->next;
111 }
112 return(NULL);
113 }
114
115
116 // ###### Find child node ###################################################
findChildNode(Node * node,const char * childKeyword)117 Node* findChildNode(Node* node, const char* childKeyword)
118 {
119 Node* child;
120 const std::string keywordToFind(childKeyword);
121
122 child = node->child;
123 while(child != NULL) {
124 if(child->keyword == keywordToFind) {
125 return(child);
126 }
127 child = child->next;
128 }
129 return(NULL);
130 }
131
132
133 // ###### Count child nodes #################################################
countChildNodes(const Node * node,const char * childKeyword)134 size_t countChildNodes(const Node* node, const char* childKeyword)
135 {
136 const Node* child;
137 const std::string keywordToFind(childKeyword);
138 size_t count = 0;
139
140 child = node->child;
141 while(child != NULL) {
142 if(child->keyword == keywordToFind) {
143 count++;
144 }
145 child = child->next;
146 }
147 return(count);
148 }
149
150
151 // ###### Make publication collection #######################################
makePublicationCollection(Node * node1,Node * node2)152 Node* makePublicationCollection(Node* node1, Node* node2)
153 {
154 // ====== If there is already an existing node, clear and use it =========
155 Node* n = node2;
156 while(n != NULL) {
157 if(n->keyword == node1->keyword) {
158 // fprintf(stderr, "NOTE: Duplicate: %s\n", n->keyword.c_str());
159
160 const Node* oldTitle = findChildNode(node1, "title");
161 Node* newTitle = findChildNode(n, "title");
162 if( (oldTitle != NULL) && (newTitle != NULL) && (oldTitle->value != newTitle->value) ) {
163 fprintf(stderr, "NOTE: Keeping old title:\nOld = \"%s\"\nNew = \"%s\"\n",
164 oldTitle->value.c_str(),
165 newTitle->value.c_str());
166 newTitle->value = oldTitle->value;
167 }
168
169 // node1 is old. Remove its contents, but reuse it for newer data.
170 freeNode(node1->child);
171 node1->child = n->child;
172 n->child = NULL;
173
174 // Get rid of old node n.
175 if(n->prev) {
176 n->prev->next = n->next;
177 }
178 if(n->next) {
179 n->next->prev = n->prev;
180 }
181 delete n;
182 break;
183 }
184 n = n->next;
185 }
186
187 // ====== Add a new node =================================================
188 node2->prev = node1;
189 node1->next = node2;
190 return(node1);
191 }
192
193
194 // ###### Node comparison function ##########################################
nodeComparisonFunction(const void * node1ptr,const void * node2ptr)195 int nodeComparisonFunction(const void* node1ptr, const void* node2ptr)
196 {
197 const Node* node1 = *((Node**)node1ptr);
198 const Node* node2 = *((Node**)node2ptr);
199 if(node1->priority > node2->priority) {
200 return(-1);
201 }
202 else if(node1->priority < node2->priority) {
203 return(1);
204 }
205 if(node1->keyword < node2->keyword) {
206 return(-1);
207 }
208 else if(node1->keyword > node2->keyword) {
209 return(1);
210 }
211 return(0);
212 }
213
214
215 // ###### Sort children of node #############################################
sortChildren(Node * node)216 static void sortChildren(Node* node)
217 {
218 Node* child = node->child;
219 if(child) {
220 const size_t children = countNodes(child);
221 Node* sortedChildrenSet[children];
222 size_t i = 0;
223 while(child != NULL) {
224 sortedChildrenSet[i++] = child;
225 child = child->next;
226 }
227
228 qsort((void*)&sortedChildrenSet[0], children, sizeof(sortedChildrenSet[0]), nodeComparisonFunction);
229
230 for(i = 0; i < children; i++) {
231 if(i < children - 1) {
232 sortedChildrenSet[i]->next = sortedChildrenSet[i + 1];
233 }
234 else {
235 sortedChildrenSet[i]->next = NULL;
236 }
237 if(i > 0) {
238 sortedChildrenSet[i]->prev = sortedChildrenSet[i - 1];
239 }
240 else {
241 sortedChildrenSet[i]->prev = NULL;
242 }
243 }
244 node->child = sortedChildrenSet[0];
245 }
246 }
247
248
249 // ###### Find existing or create new child node ############################
addOrUpdateChildNode(Node * node,const char * childKeyword,const char * value)250 Node* addOrUpdateChildNode(Node* node, const char* childKeyword, const char* value)
251 {
252 Node* child = findChildNode(node, childKeyword);
253 if(child == NULL) {
254 child = makePublicationInfoItem(childKeyword, value);
255 assert(child != NULL);
256 child->next = node->child;
257 node->child = child;
258 sortChildren(node);
259 }
260 else {
261 child->value = value;
262 }
263 return(child);
264 }
265
266
267 // ###### Check number of occurrences for a field ###########################
requiresField(const Node * publication,const char * field,const size_t minimum,const size_t maximum)268 static bool requiresField(const Node* publication,
269 const char* field,
270 const size_t minimum,
271 const size_t maximum)
272 {
273 const size_t count = countChildNodes(publication, field);
274 if(count < minimum) {
275 fprintf(stderr, "WARNING: Entry %s has no \"%s\" section!\n",
276 publication->keyword.c_str(),
277 field);
278 return(false);
279 }
280 else if(count > maximum) {
281 fprintf(stderr, "WARNING: Entry %s has %u \"%s\" sections!\n",
282 publication->keyword.c_str(),
283 (unsigned int)count, field);
284 return(false);
285 }
286 return(true);
287 }
288
289
290 // ###### Make publication ##################################################
makePublication(const char * type,const char * label,Node * publicationInfo)291 Node* makePublication(const char* type, const char* label, Node* publicationInfo)
292 {
293 Node* publication = createNode(label);
294 publication->child = publicationInfo;
295 publication->value = type;
296
297 sortChildren(publication);
298
299 if(publication->value != "Comment") {
300 requiresField(publication, "title", 1, 1);
301 requiresField(publication, "author", 1, 1);
302 requiresField(publication, "year", 1, 1);
303 requiresField(publication, "isbn", 0, 1);
304 requiresField(publication, "issn", 0, 1);
305 requiresField(publication, "doi", 0, 1);
306 requiresField(publication, "url", 0, 1);
307 requiresField(publication, "url.size", 0, 1);
308 requiresField(publication, "url.mime", 0, 1);
309 requiresField(publication, "url.md5", 0, 1);
310 requiresField(publication, "url.checked", 0, 1);
311 requiresField(publication, "urn", 0, 1);
312 requiresField(publication, "pages", 0, 1);
313 requiresField(publication, "numpages", 0, 1);
314 requiresField(publication, "day", 0, 1);
315 requiresField(publication, "month", 0, 1);
316 requiresField(publication, "address", 0, 1);
317 requiresField(publication, "location", 0, 1);
318 requiresField(publication, "note", 0, 1);
319 requiresField(publication, "howpublished", 0, 1);
320 requiresField(publication, "publisher", 0, 1);
321 requiresField(publication, "school", 0, 1);
322 requiresField(publication, "institution", 0, 1);
323 requiresField(publication, "type", 0, 1);
324 requiresField(publication, "number", 0, 1);
325 requiresField(publication, "issue", 0, 1);
326 requiresField(publication, "volume", 0, 1);
327 requiresField(publication, "abstract", 0, 1);
328 requiresField(publication, "keywords", 0, 1);
329 if(publication->value == "Article") {
330 requiresField(publication, "journal", 1, 1);
331 }
332 else if(publication->value == "Book") {
333 requiresField(publication, "publisher", 1, 1);
334 }
335 else if(publication->value == "InProceedings") {
336 requiresField(publication, "booktitle", 1, 1);
337 }
338 else if(publication->value == "TechReport") {
339 requiresField(publication, "institution", 1, 1);
340 }
341
342 Node* author = findChildNode(publication, "author");
343 if(author != NULL) {
344 unifyAuthor(publication, author);
345 }
346 else {
347 fprintf(stderr, "WARNING: Entry %s has no \"author\" section!\n" , label);
348 }
349
350 Node* booktitle = findChildNode(publication, "booktitle");
351 if(booktitle != NULL) {
352 unifyBookTitle(publication, booktitle);
353 }
354 Node* howPublished = findChildNode(publication, "howPublished");
355 if(howPublished != NULL) {
356 unifyBookTitle(publication, howPublished);
357 }
358 Node* journal = findChildNode(publication, "journal");
359 if(journal != NULL) {
360 unifyBookTitle(publication, journal); // Same as for booktitle!
361 }
362 Node* pages = findChildNode(publication, "pages");
363 if(pages != NULL) {
364 unifyPages(publication, pages);
365 }
366 Node* numpages = findChildNode(publication, "numpages");
367 if(numpages != NULL) {
368 unifyNumPages(publication, numpages);
369 }
370
371 Node* isbn = findChildNode(publication, "isbn");
372 if(isbn != NULL) {
373 unifyISBN(publication, isbn);
374 }
375 Node* issn = findChildNode(publication, "issn");
376 if(issn != NULL) {
377 unifyISSN(publication, issn);
378 }
379
380 Node* year = findChildNode(publication, "year");
381 Node* month = findChildNode(publication, "month");
382 Node* day = findChildNode(publication, "day");
383 if( (year != NULL) || (month != NULL) || (day != NULL) ) {
384 unifyDate(publication, year, month, day);
385 }
386
387 Node* url = findChildNode(publication, "url");
388 if(url != NULL) {
389 unifyURL(publication, url);
390 }
391 }
392
393 return(publication);
394 }
395
396
397 // ###### Make publication info #############################################
makePublicationInfo(Node * node1,Node * node2)398 Node* makePublicationInfo(Node* node1, Node* node2)
399 {
400 if(node1 != NULL) {
401 node2->prev = node1;
402 node1->next = node2;
403 return(node1);
404 }
405 else {
406 return(node2);
407 }
408 }
409
410
411 // ###### Make publication info item ########################################
makePublicationInfoItem(const char * keyword,const char * value)412 Node* makePublicationInfoItem(const char* keyword, const char* value)
413 {
414 Node* node = createNode("PublicationInfoItem");
415 const size_t keywordLength = strlen(keyword);
416 char keywordString[keywordLength + 1];
417 size_t i;
418
419 // ====== Create new entry ===============================================
420 for(i = 0;i < keywordLength;i++) {
421 keywordString[i] = tolower(keyword[i]);
422 }
423 keywordString[keywordLength] = 0x00;
424
425 node->keyword = keywordString;
426 node->value = value;
427 if( (node->keyword != "author") ) { // Brackets must remain for author string!
428 removeBrackets(node->value);
429 trim(node->value);
430 }
431
432 if(node->value == "") { // Empty content -> This item is useless
433 node->keyword = "removeme";
434 }
435
436 // ====== Set priorities for well-known keyword fields ===================
437 if(node->keyword == "author") {
438 node->priority = 255;
439 }
440 else if(node->keyword == "title") {
441 node->priority = 254;
442 }
443
444 else if(node->keyword == "howpublished") {
445 node->priority = 252;
446 }
447 else if(node->keyword == "booktitle") {
448 node->priority = 251;
449 }
450 else if(node->keyword == "series") {
451 node->priority = 250;
452 }
453 else if(node->keyword == "journal") {
454 node->priority = 249;
455 }
456 else if(node->keyword == "type") {
457 node->priority = 248;
458 }
459 else if(node->keyword == "volume") {
460 node->priority = 247;
461 }
462 else if(node->keyword == "issue") {
463 node->priority = 246;
464 }
465 else if(node->keyword == "number") {
466 node->priority = 245;
467 }
468 else if(node->keyword == "edition") {
469 node->priority = 244;
470 }
471 else if(node->keyword == "editor") {
472 node->priority = 243;
473 }
474 else if(node->keyword == "pages") {
475 node->priority = 242;
476 }
477 else if(node->keyword == "numpages") {
478 node->priority = 241;
479 }
480
481 else if(node->keyword == "day") {
482 node->priority = 239;
483 }
484 else if(node->keyword == "month") {
485 node->priority = 238;
486 }
487 else if(node->keyword == "year") {
488 node->priority = 237;
489 }
490
491 else if(node->keyword == "organization") {
492 node->priority = 235;
493 }
494 else if(node->keyword == "school") {
495 node->priority = 234;
496 }
497 else if(node->keyword == "institution") {
498 node->priority = 233;
499 }
500 else if(node->keyword == "location") {
501 node->priority = 232;
502 }
503 else if(node->keyword == "publisher") {
504 node->priority = 231;
505 }
506 else if(node->keyword == "address") {
507 node->priority = 230;
508 }
509
510 else if(node->keyword == "language") {
511 node->priority = 226;
512 }
513 else if(node->keyword == "content-language") {
514 node->priority = 225;
515 }
516 else if(node->keyword == "isbn") {
517 node->priority = 224;
518 }
519 else if(node->keyword == "issn") {
520 node->priority = 223;
521 }
522 else if(node->keyword == "urn") {
523 node->priority = 222;
524 }
525 else if(node->keyword == "doi") {
526 node->priority = 221;
527 }
528 else if(node->keyword == "note") {
529 node->priority = 220;
530 }
531
532 else if(node->keyword == "keywords") {
533 node->priority = 211;
534 }
535 else if(node->keyword == "abstract") {
536 node->priority = 210;
537 }
538
539 else if(node->keyword == "url") {
540 node->priority = 199;
541 }
542 else if(node->keyword == "url.size") {
543 node->priority = 198;
544 }
545 else if(node->keyword == "url.md5") {
546 node->priority = 197;
547 }
548 else if(node->keyword == "url.mime") {
549 node->priority = 196;
550 }
551 else if(node->keyword == "url.pagesize") {
552 node->priority = 195;
553 }
554 else if(node->keyword == "url.checked") {
555 node->priority = 194;
556 }
557 else if(node->keyword == "url.keywords") {
558 node->priority = 193;
559 }
560
561 else {
562 // printf("UNKNOWN=<%s>\n", node->keyword.c_str());
563 }
564
565 return(node);
566 }
567