1 /*
2 * bibtexin.c
3 *
4 * Copyright (c) Chris Putnam 2003-2020
5 * Copyright (c) Georgi N. Boshnakov 2020
6 *
7 * Program and source code released under the GPL version 2
8 *
9 */
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <ctype.h>
14 #include "is_ws.h"
15 #include "intlist.h"
16 #include "str.h"
17 #include "utf8.h"
18 #include "str_conv.h"
19 #include "fields.h"
20 #include "slist.h"
21 #include "name.h"
22 #include "title.h"
23 #include "url.h"
24 #include "reftypes.h"
25 #include "latex_parse.h"
26 #include "bibformats.h"
27 #include "generic.h"
28
29 static slist find = { 0, 0, 0, NULL };
30 static slist replace = { 0, 0, 0, NULL };
31
32 extern variants bibtex_all[];
33 extern int bibtex_nall;
34
35 /*****************************************************
36 PUBLIC: void bibtexin_initparams()
37 *****************************************************/
38
39 static int bibtexin_convertf( fields *bibin, fields *info, int reftype, param *p );
40 static int bibtexin_processf( fields *bibin, const char *data, const char *filename, long nref, param *p );
41 static int bibtexin_cleanf( bibl *bin, param *p );
42 static int bibtexin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
43 static int bibtexin_typef( fields *bibin, const char *filename, int nrefs, param *p );
44
45 int
bibtexin_initparams(param * pm,const char * progname)46 bibtexin_initparams( param *pm, const char *progname )
47 {
48 pm->readformat = BIBL_BIBTEXIN;
49 pm->charsetin = BIBL_CHARSET_DEFAULT;
50 pm->charsetin_src = BIBL_SRC_DEFAULT;
51 pm->latexin = 1;
52 pm->xmlin = 0;
53 pm->utf8in = 0;
54 pm->nosplittitle = 0;
55 pm->verbose = 0;
56 pm->addcount = 0;
57 pm->output_raw = 0;
58
59 pm->readf = bibtexin_readf;
60 pm->processf = bibtexin_processf;
61 pm->cleanf = bibtexin_cleanf;
62 pm->typef = bibtexin_typef;
63 pm->convertf = bibtexin_convertf;
64 pm->all = bibtex_all;
65 pm->nall = bibtex_nall;
66
67 slist_init( &(pm->asis) );
68 slist_init( &(pm->corps) );
69
70 // TODO: these probably should be made parameters, as the others above;
71 // note that 'find' and 'replace' work in tandem, so both need to be cleared.
72 slist_free( &find );
73 slist_free( &replace );
74
75 if ( !progname ) pm->progname = NULL;
76 else {
77 pm->progname = strdup( progname );
78 if ( pm->progname==NULL ) return BIBL_ERR_MEMERR;
79 }
80
81 return BIBL_OK;
82 }
83
84 /*****************************************************
85 PUBLIC: int bibtexin_readf()
86 *****************************************************/
87
88 /*
89 * readf can "read too far", so we store this information in line, thus
90 * the next new text is in line, either from having read too far or
91 * from the next chunk obtained via str_fget()
92 *
93 * return 1 on success, 0 on error/end-of-file
94 *
95 */
96 static int
readmore(FILE * fp,char * buf,int bufsize,int * bufpos,str * line)97 readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line )
98 {
99 if ( line->len ) return 1;
100 else return str_fget( fp, buf, bufsize, bufpos, line );
101 }
102
103 /*
104 * readf()
105 *
106 * returns zero if cannot get reference and hit end of-file
107 * returns 1 if last reference in file, 2 if reference within file
108 */
109 static int
bibtexin_readf(FILE * fp,char * buf,int bufsize,int * bufpos,str * line,str * reference,int * fcharset)110 bibtexin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
111 {
112 int haveref = 0;
113 const char *p;
114 *fcharset = CHARSET_UNKNOWN;
115 while ( haveref!=2 && readmore( fp, buf, bufsize, bufpos, line ) ) {
116 if ( line->len == 0 ) continue; /* blank line */
117 p = &(line->data[0]);
118 /* Recognize UTF8 BOM */
119 if ( line->len > 2 &&
120 (unsigned char)(p[0])==0xEF &&
121 (unsigned char)(p[1])==0xBB &&
122 (unsigned char)(p[2])==0xBF ) {
123 *fcharset = CHARSET_UNICODE;
124 p += 3;
125 }
126 p = skip_ws( p );
127 if ( *p == '%' ) { /* commented out line */
128 str_empty( line );
129 continue;
130 }
131 if ( *p == '@' ) haveref++;
132 if ( haveref && haveref<2 ) {
133 str_strcatc( reference, p );
134 str_addchar( reference, '\n' );
135 str_empty( line );
136 } else if ( !haveref ) str_empty( line );
137
138 }
139 return haveref;
140 }
141
142 /*****************************************************
143 PUBLIC: int bibtexin_processf()
144 *****************************************************/
145
146 typedef struct loc {
147 const char *progname;
148 const char *filename;
149 long nref;
150 } loc;
151
152 /* process_bibtextype()
153 *
154 * extract 'article', 'book', etc. from:
155 *
156 * @article{...}
157 * @book(...)
158 *
159 * return pointer after '{' or '(' character
160 */
161 static const char*
process_bibtextype(const char * p,str * type)162 process_bibtextype( const char *p, str *type )
163 {
164 str tmp;
165
166 str_init( &tmp );
167
168 if ( *p=='@' ) p++;
169 p = skip_ws( p );
170
171 p = str_cpytodelim( &tmp, p, "{( \t\r\n", 0 );
172 p = skip_ws( p );
173
174 if ( *p=='{' || *p=='(' ) p++;
175 p = skip_ws( p );
176
177 if ( str_has_value( &tmp ) ) str_strcpy( type, &tmp );
178 else str_empty( type );
179
180 str_free( &tmp );
181
182 return p;
183 }
184
185 char *dummy_id = "dummyid";
186
187 static const char *
process_bibtexid(const char * p,str * id)188 process_bibtexid( const char *p, str *id )
189 {
190 const char *start_p = p;
191 str tmp;
192
193 str_init( &tmp );
194 p = str_cpytodelim( &tmp, p, ",", 1 );
195
196 if ( str_has_value( &tmp ) ) {
197 if ( strchr( tmp.data, '=' ) ) {
198 /* Endnote writes bibtex files w/o fields, try to
199 * distinguish via presence of an equal sign.... if
200 * it's there, assume that it's a tag/data pair instead
201 * and roll back.
202 */
203 p = start_p;
204 str_empty( id );
205 } else {
206 str_strcpy( id, &tmp );
207 }
208 } else {
209 // Georgi was: str_empty( id );
210 str_strcpyc( id, dummy_id );
211 }
212
213 str_free( &tmp );
214 return skip_ws( p );
215 }
216
217 /* bibtex_tag()
218 *
219 * returns NULL on memory error, else position after tag+whitespace
220 */
221 static const char *
bibtex_tag(const char * p,str * tag)222 bibtex_tag( const char *p, str *tag )
223 {
224 p = str_cpytodelim( tag, p, "= \t\r\n", 0 );
225 if ( str_memerr( tag ) ) return NULL;
226 return skip_ws( p );
227 }
228
229 static int
quotation_mark_is_escaped(int nbraces,const char * p,const char * startp)230 quotation_mark_is_escaped( int nbraces, const char *p, const char *startp )
231 {
232 if ( nbraces!=0 ) return 1;
233 if ( p!=startp && *(p-1)=='\\' ) return 1;
234 return 0;
235 }
236
237 static int
brace_is_escaped(int nquotes,const char * p,const char * startp)238 brace_is_escaped( int nquotes, const char *p, const char *startp )
239 {
240 if ( nquotes!=0 ) return 1;
241 if ( p!=startp && *(p-1)=='\\' ) return 1;
242 return 0;
243 }
244
245 static int
char_is_escaped(int nquotes,int nbraces)246 char_is_escaped( int nquotes, int nbraces )
247 {
248 if ( nquotes!=0 || nbraces!=0 ) return 1;
249 return 0;
250 }
251
252 static int
add_token(slist * tokens,str * token)253 add_token( slist *tokens, str *token )
254 {
255 int status;
256
257 if ( str_memerr( token ) ) return BIBL_ERR_MEMERR;
258
259 status = slist_add( tokens, token );
260 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
261
262 str_empty( token );
263
264 return BIBL_OK;
265 }
266
267 static const char *
bibtex_data(const char * p,slist * tokens,loc * currloc)268 bibtex_data( const char *p, slist *tokens, loc *currloc )
269 {
270 int nbraces = 0, nquotes = 0;
271 const char *startp = p;
272 int status;
273 str token;
274
275 str_init( &token );
276
277 while ( p && *p ) {
278
279 /* ...have we reached end-of-data? */
280 if ( nquotes==0 && nbraces==0 ) {
281 if ( *p==',' || *p=='=' || *p=='}' || *p==')' ) goto out;
282 }
283
284 if ( *p=='\"' ) {
285 str_addchar( &token, *p );
286 if ( !quotation_mark_is_escaped( nbraces, p, startp ) ) {
287 nquotes = !nquotes;
288 if ( nquotes==0 ) {
289 status = add_token( tokens, &token );
290 if ( status!=BIBL_OK ) { p=NULL; goto out0; }
291 }
292 }
293 }
294
295 else if ( *p=='{' ) {
296 str_addchar( &token, *p );
297 if ( !brace_is_escaped( nquotes, p, startp ) ) {
298 nbraces++;
299 }
300 }
301
302 else if ( *p=='}' ) {
303 str_addchar( &token, *p );
304 if ( !brace_is_escaped( nquotes, p, startp ) ) {
305 nbraces--;
306 if ( nbraces==0 ) {
307 status = add_token( tokens, &token );
308 if ( status!=BIBL_OK ) { p=NULL; goto out0; }
309 }
310 if ( nbraces<0 ) {
311 goto out;
312 }
313 }
314 }
315
316 else if ( *p=='#' ) {
317 if ( char_is_escaped( nquotes, nbraces ) ) {
318 str_addchar( &token, *p );
319 }
320 /* ...this is a bibtex string concatentation token */
321 else {
322 if ( str_has_value( &token ) ) {
323 status = add_token( tokens, &token );
324 if ( status!=BIBL_OK ) { p=NULL; goto out0; }
325 }
326 status = slist_addc( tokens, "#" );
327 if ( status!=SLIST_OK ) { p=NULL; goto out0; }
328 }
329 }
330
331 /* ...add escaped white-space and non-white-space to current token */
332 else if ( !is_ws( *p ) || char_is_escaped( nquotes, nbraces ) ) {
333 /* always add non-whitespace characters */
334 if ( !is_ws( *p ) ) {
335 str_addchar( &token, *p );
336 }
337 /* only add whitespace if token is non-empty; convert CR/LF to space */
338 else if ( token.len!=0 ) {
339 if ( *p!='\n' && *p!='\r' )
340 str_addchar( &token, *p );
341 else {
342 str_addchar( &token, ' ' );
343 while ( is_ws( *(p+1) ) ) p++;
344 }
345 }
346 }
347
348 /* ...unescaped white-space marks the end of a token */
349 else if ( is_ws( *p ) ) {
350 if ( token.len ) {
351 status = add_token( tokens, &token );
352 if ( status!=BIBL_OK ) { p=NULL; goto out0; }
353 }
354 }
355
356 p++;
357 }
358 out:
359 if ( nbraces!=0 ) {
360 REprintf( "%s: Mismatch in number of braces in file %s reference %ld.\n", currloc->progname, currloc->filename, currloc->nref );
361 }
362 if ( nquotes!=0 ) {
363 REprintf( "%s: Mismatch in number of quotes in file %s reference %ld.\n", currloc->progname, currloc->filename, currloc->nref );
364 }
365 if ( str_has_value( &token ) ) {
366 if ( str_memerr( &token ) ) { p = NULL; goto out; }
367 status = slist_add( tokens, &token );
368 if ( status!=SLIST_OK ) p = NULL;
369 }
370 out0:
371 str_free( &token );
372 return p;
373 }
374
375 #define NOT_ESCAPED (0)
376 #define ESCAPED_QUOTES (1)
377 #define ESCAPED_BRACES (2)
378
379 static int
token_is_escaped(str * s)380 token_is_escaped( str *s )
381 {
382 if ( s->data[0]=='\"' && s->data[s->len-1]=='\"' ) return ESCAPED_QUOTES;
383 if ( s->data[0]=='{' && s->data[s->len-1]=='}' ) return ESCAPED_BRACES;
384 return NOT_ESCAPED;
385 }
386
387 /* replace_strings()
388 *
389 * do bibtex string replacement for data tokens
390 */
391 static int
replace_strings(slist * tokens)392 replace_strings( slist *tokens )
393 {
394 int i, n;
395 str *s;
396
397 for ( i=0; i<tokens->n; ++i ) {
398
399 s = slist_str( tokens, i );
400
401 /* ...skip if token is protected by quotation marks or braces */
402 if ( token_is_escaped( s ) ) continue;
403
404 /* ...skip if token is string concatentation symbol */
405 if ( !str_strcmpc( s, "#" ) ) continue;
406
407 n = slist_find( &find, s );
408 if ( slist_wasnotfound( &find, n ) ) continue;
409
410 str_strcpy( s, slist_str( &replace, n ) );
411 if ( str_memerr( s ) ) return BIBL_ERR_MEMERR;
412
413 }
414
415 return BIBL_OK;
416 }
417
418 static int
string_concatenate(slist * tokens,loc * currloc)419 string_concatenate( slist *tokens, loc *currloc )
420 {
421 int i, status, esc_s, esc_t;
422 str *s, *t;
423
424 i = 0;
425 while ( i < tokens->n ) {
426
427 s = slist_str( tokens, i );
428 if ( str_strcmpc( s, "#" ) ) {
429 i++;
430 continue;
431 }
432
433 if ( i==0 || i==tokens->n-1 ) {
434 REprintf( "%s: Warning: Stray string concatenation ('#' character) in file %s reference %ld\n",
435 currloc->progname, currloc->filename, currloc->nref );
436 status = slist_remove( tokens, i );
437 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
438 continue;
439 }
440
441 s = slist_str( tokens, i-1 );
442 t = slist_str( tokens, i+1 );
443
444 esc_s = token_is_escaped( s );
445 esc_t = token_is_escaped( t );
446
447 if ( esc_s != NOT_ESCAPED ) str_trimend( s, 1 );
448 if ( esc_t != NOT_ESCAPED ) str_trimbegin( t, 1 );
449 if ( esc_s != esc_t ) {
450 if ( esc_s == NOT_ESCAPED ) {
451 if ( esc_t == ESCAPED_QUOTES ) str_prepend( s, "\"" );
452 else str_prepend( s, "{" );
453 }
454 else {
455 if ( esc_t != NOT_ESCAPED ) str_trimend( t, 1 );
456 if ( esc_s == ESCAPED_QUOTES ) str_addchar( t, '\"' );
457 else str_addchar( t, '}' );
458 }
459 }
460
461 str_strcat( s, t );
462 if ( str_memerr( s ) ) return BIBL_ERR_MEMERR;
463
464 /* ...remove concatenated string t */
465 status = slist_remove( tokens, i+1 );
466 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
467
468 /* ...remove concatentation token '#' */
469 status = slist_remove( tokens, i );
470 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
471
472 }
473
474 return BIBL_OK;
475 }
476
477 #define KEEP_QUOTES (0)
478 #define STRIP_QUOTES (1)
479
480 static int
merge_tokens_into_data(str * data,slist * tokens,int stripquotes)481 merge_tokens_into_data( str *data, slist *tokens, int stripquotes )
482 {
483 int i, esc_s;
484 str *s;
485
486 for ( i=0; i<tokens->n; i++ ) {
487
488 s = slist_str( tokens, i );
489 esc_s = token_is_escaped( s );
490
491 if ( ( esc_s == ESCAPED_BRACES ) ||
492 ( stripquotes == STRIP_QUOTES && esc_s == ESCAPED_QUOTES ) ) {
493 str_trimbegin( s, 1 );
494 str_trimend( s, 1 );
495 }
496
497 str_strcat( data, s );
498
499 }
500
501 if ( str_memerr( data ) ) return BIBL_ERR_MEMERR;
502 else return BIBL_OK;
503 }
504
505 /* return NULL on memory error */
506 static const char *
process_bibtexline(const char * p,str * tag,str * data,uchar stripquotes,loc * currloc)507 process_bibtexline( const char *p, str *tag, str *data, uchar stripquotes, loc *currloc )
508 {
509 slist tokens;
510 int status;
511
512 str_empty( data );
513
514 slist_init( &tokens );
515
516 p = bibtex_tag( skip_ws( p ), tag );
517 if ( p ) {
518 if ( str_is_empty( tag ) ) {
519 p = skip_line( p );
520 goto out;
521 }
522 }
523
524 if ( p && *p=='=' ) {
525 p = bibtex_data( p+1, &tokens, currloc );
526 }
527
528 if ( p ) {
529 status = replace_strings( &tokens );
530 if ( status!=BIBL_OK ) p = NULL;
531 }
532
533 if ( p ) {
534 status = string_concatenate( &tokens, currloc );
535 if ( status!=BIBL_OK ) p = NULL;
536 }
537
538 if ( p ) {
539 status = merge_tokens_into_data( data, &tokens, stripquotes );
540 if ( status!=BIBL_OK ) p = NULL;
541 }
542
543 out:
544 slist_free( &tokens );
545 return p;
546 }
547
548 /* process_ref()
549 *
550 */
551 static int
process_ref(fields * bibin,const char * p,loc * currloc)552 process_ref( fields *bibin, const char *p, loc *currloc )
553 {
554 int fstatus, status = BIBL_OK;
555 str type, id, tag, data;
556
557 strs_init( &type, &id, &tag, &data, NULL );
558
559 p = process_bibtextype( p, &type );
560 p = process_bibtexid( p, &id );
561
562 if ( str_is_empty( &type ) || str_is_empty( &id ) ) goto out;
563
564 fstatus = fields_add( bibin, "INTERNAL_TYPE", str_cstr( &type ), LEVEL_MAIN );
565 if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
566
567 fstatus = fields_add( bibin, "REFNUM", str_cstr( &id ), LEVEL_MAIN );
568 if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
569
570 while ( *p ) {
571
572 p = process_bibtexline( p, &tag, &data, STRIP_QUOTES, currloc );
573 if ( p==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
574
575 if ( !str_has_value( &tag ) || !str_has_value( &data ) ) continue;
576
577 fstatus = fields_add( bibin, str_cstr( &tag ), str_cstr( &data ), LEVEL_MAIN );
578 if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
579
580 }
581 out:
582 strs_free( &type, &id, &tag, &data, NULL );
583 return status;
584 }
585
586 /* process_string()
587 *
588 * Handle lines like:
589 *
590 * '@STRING{TL = {Tetrahedron Lett.}}'
591 *
592 * p should point to just after '@STRING'
593 *
594 * In BibTeX, if a string is defined several times, the last one is kept.
595 *
596 */
597 static int
process_string(const char * p,loc * currloc)598 process_string( const char *p, loc *currloc )
599 {
600 int n, status = BIBL_OK;
601 str s1, s2, *t;
602
603 strs_init( &s1, &s2, NULL );
604
605 while ( *p && *p!='{' && *p!='(' ) p++;
606 if ( *p=='{' || *p=='(' ) p++;
607
608 p = process_bibtexline( skip_ws( p ), &s1, &s2, KEEP_QUOTES, currloc );
609 if ( p==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
610
611 if ( str_has_value( &s2 ) ) {
612 str_findreplace( &s2, "\\ ", " " );
613 } else {
614 str_strcpyc( &s2, "" );
615 }
616
617 if ( str_has_value( &s1 ) ) {
618 n = slist_find( &find, &s1 );
619 if ( n==-1 ) {
620 status = slist_add_ret( &find, &s1, BIBL_OK, BIBL_ERR_MEMERR );
621 if ( status!=BIBL_OK ) goto out;
622 status = slist_add_ret( &replace, &s2, BIBL_OK, BIBL_ERR_MEMERR );
623 if ( status!=BIBL_OK ) goto out;
624 } else {
625 t = slist_set( &replace, n, &s2 );
626 if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; }
627 }
628 }
629
630 out:
631 strs_free( &s1, &s2, NULL );
632 return status;
633 }
634
635 /* bibtexin_processf()
636 *
637 * Handle '@STRING', '@reftype', and ignore '@COMMENT'
638 * Georgi: also ignore @PREAMBLE
639 */
640 static int
bibtexin_processf(fields * bibin,const char * data,const char * filename,long nref,param * pm)641 bibtexin_processf( fields *bibin, const char *data, const char *filename, long nref, param *pm )
642 {
643 loc currloc;
644
645 currloc.progname = pm->progname;
646 currloc.filename = filename;
647 currloc.nref = nref;
648
649 if ( !strncasecmp( data, "@STRING", 7 ) ) {
650 process_string( data+7, &currloc );
651 return 0;
652 } else if ( !strncasecmp( data, "@COMMENT", 8 ) || !strncasecmp( data, "@PREAMBLE", 9 )) {
653 // Georgi: added @PREAMBLE
654 // todo: It could make sense to keep it for output to bibtex (or TeX related)
655
656 /* Not sure if these are real Bibtex, but not references */
657 return 0;
658 } else {
659 process_ref( bibin, data, &currloc );
660 return 1;
661 }
662 }
663
664 /*****************************************************
665 PUBLIC: void bibtexin_cleanf()
666 *****************************************************/
667
668 static int
is_url_tag(str * tag)669 is_url_tag( str *tag )
670 {
671 if ( str_has_value( tag ) ) {
672 if ( !strcasecmp( str_cstr( tag ), "url" ) ) return 1;
673 if ( !strcasecmp( str_cstr( tag ), "file" ) ) return 1;
674 if ( !strcasecmp( str_cstr( tag ), "doi" ) ) return 1;
675 if ( !strcasecmp( str_cstr( tag ), "sentelink" ) ) return 1;
676 }
677 return 0;
678 }
679
680 static int
is_name_tag(str * tag)681 is_name_tag( str *tag )
682 {
683 if ( str_has_value( tag ) ) {
684 if ( !strcasecmp( str_cstr( tag ), "author" ) ) return 1;
685 if ( !strcasecmp( str_cstr( tag ), "editor" ) ) return 1;
686 if ( !strcasecmp( str_cstr( tag ), "translator" ) ) return 1;
687 }
688 return 0;
689 }
690
691 static int
bibtex_cleanvalue(str * value)692 bibtex_cleanvalue( str *value )
693 {
694 int status;
695 str parsed;
696
697 str_init( &parsed );
698 // REprintf("before clean: %s\n", value->data);
699
700 status = latex_parse( value, &parsed );
701 if ( status!=BIBL_OK ) goto out;
702
703 str_strcpy( value, &parsed );
704 if ( str_memerr( value ) ) status = BIBL_ERR_MEMERR;
705
706 // REprintf("after clean: %s\n", value->data);
707
708 out:
709 str_free( &parsed );
710 return status;
711 }
712
713 static int
bibtex_matches_list(fields * bibout,char * tag,char * suffix,str * data,int level,slist * names,int * match)714 bibtex_matches_list( fields *bibout, char *tag, char *suffix, str *data, int level, slist *names, int *match )
715 {
716 int n, fstatus;
717 str mergedtag;
718
719 *match = 0;
720
721 n = slist_find( names, data );
722 if ( slist_wasfound( names, n ) ) {
723 str_initstrsc( &mergedtag, tag, suffix, NULL );
724 fstatus = fields_add( bibout, str_cstr( &mergedtag ), str_cstr( data ), level );
725 str_free( &mergedtag );
726 if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
727 *match = 1;
728 }
729
730 return BIBL_OK;
731 }
732
733 static int
bibtex_matches_asis_or_corps(fields * bibin,int m,param * pm,int * match)734 bibtex_matches_asis_or_corps( fields *bibin, int m, param *pm, int *match )
735 {
736 int status;
737
738 status = bibtex_matches_list( bibin, fields_tag( bibin, m, FIELDS_STRP ), ":ASIS", fields_value( bibin, m, FIELDS_STRP ), LEVEL_MAIN, &(pm->asis), match );
739 if ( *match==1 || status!=BIBL_OK ) return status;
740
741 status = bibtex_matches_list( bibin, fields_tag( bibin, m, FIELDS_STRP ), ":CORP", fields_value( bibin, m, FIELDS_STRP ), LEVEL_MAIN, &(pm->corps), match );
742 if ( *match==1 || status!=BIBL_OK ) return status;
743
744 return BIBL_OK;
745 }
746
747 /* We need to:
748 * (1) break names into LaTeX tokens (e.g. respect "{van der Hoff}" as a single name element)
749 * (2) clean the values by removing brackets and things
750 * (3) convert the character set before any name processing happens (else things like "\"O" get split up)
751 */
752 static int
bibtex_person_tokenize(fields * bibin,int m,param * pm,slist * tokens)753 bibtex_person_tokenize( fields *bibin, int m, param *pm, slist *tokens )
754 {
755 int i, ok, status;
756 str *s;
757
758 // REprintf("person!\n");
759 status = latex_tokenize( tokens, fields_value( bibin, m, FIELDS_STRP ) );
760 if ( status!=BIBL_OK ) return status;
761
762
763 for ( i=0; i<tokens->n; ++i ) {
764
765 s = slist_str( tokens, i );
766
767 // Georgi: removing since changes latex characters to unicode
768 // in names, see comments in bibtexin_cleanref() (in bibtexin.c
769 // TODO: check if this causes bad side effects, ideally correct
770 //
771 // Reinstating this, bad side effects
772 status = bibtex_cleanvalue( s );
773 if ( status!=BIBL_OK ) return status;
774
775 // !!! Georgi: conversion is here!
776 // !!!
777 // REprintf("\ns before str_convert: %s\n", s->data);
778 ok = str_convert( s, pm->charsetin, 1, pm->utf8in, pm->xmlin,
779 // Georgi: change arg. latexout to 1
780 // TODO: make it argument to this function?
781 // it should depend on --no-latex
782 // v1.3 - restoring latexout to 0
783 pm->charsetout, 0, pm->utf8out, pm->xmlout );
784 // REprintf("s after str_convert: %s\n", s->data);
785 if ( !ok ) return BIBL_ERR_MEMERR;
786
787 }
788
789 return BIBL_OK;
790 }
791
792 /* We need to:
793 * (1) Build individual names
794 * (2) Add them to the end of fields *bibin -- because of this, we have to look up the tag/data every time
795 * because we can reallocate the raw data and make any pointers stale
796 */
797 static int
bibtex_person_add_names(fields * bibin,int m,slist * tokens)798 bibtex_person_add_names( fields *bibin, int m, slist *tokens )
799 {
800 int begin, end, ok, n, etal;
801
802 etal = name_findetal( tokens );
803 // REprintf("person_add_names!\n");
804
805 begin = 0;
806 n = tokens->n - etal;
807 while ( begin < n ) {
808
809 end = begin + 1;
810
811 while ( end < n && strcasecmp( slist_cstr( tokens, end ), "and" ) )
812 end++;
813
814
815 if ( end - begin == 1 ) {
816 ok = name_addsingleelement( bibin, fields_tag( bibin,m,FIELDS_CHRP), slist_cstr( tokens, begin ), LEVEL_MAIN, NAME_ASIS );
817 if ( !ok ) return BIBL_ERR_MEMERR;
818 } else {
819 ok = name_addmultielement( bibin, fields_tag(bibin,m,FIELDS_CHRP), tokens, begin, end, LEVEL_MAIN );
820 if ( !ok ) return BIBL_ERR_MEMERR;
821 }
822
823 begin = end + 1;
824
825 /* Handle repeated 'and' errors: authors="G. F. Author and and B. K. Author" */
826 while ( begin < n && !strcasecmp( slist_cstr( tokens, begin ), "and" ) )
827 begin++;
828
829 }
830
831 if ( etal ) {
832 ok = name_addsingleelement( bibin, fields_tag(bibin,m,FIELDS_CHRP), "et al.", LEVEL_MAIN, NAME_ASIS );
833 if ( !ok ) return BIBL_ERR_MEMERR;
834 }
835
836 return BIBL_OK;
837 }
838
839 /* Keep looking up tag values--we can reallocate when we add new names here */
840 static int
bibtexin_person(fields * bibin,int m,param * pm)841 bibtexin_person( fields *bibin, int m, param *pm )
842 {
843 int status, match = 0;
844 slist tokens;
845 // REprintf("bibtexin_person!\n");
846
847 status = bibtex_matches_asis_or_corps( bibin, m, pm, &match );
848 if ( status!=BIBL_OK || match==1 ) return status;
849
850 slist_init( &tokens );
851
852 status = bibtex_person_tokenize( bibin, m, pm, &tokens );
853 if ( status!=BIBL_OK ) goto out;
854
855 status = bibtex_person_add_names( bibin, m, &tokens );
856 if ( status!=BIBL_OK ) goto out;
857
858 out:
859 slist_free( &tokens );
860 return status;
861
862 }
863
864 static int
bibtexin_cleanref(fields * bibin,param * pm)865 bibtexin_cleanref( fields *bibin, param *pm )
866 {
867 int i, n, fstatus, status = BIBL_OK;
868 str *tag, *value;
869 intlist toremove;
870
871 intlist_init( &toremove );
872
873 n = fields_num( bibin );
874 // REprintf("n = %d\n", n);
875
876 // REprintf("n = %d\n" , n);
877 // for(i = 0; i < n; i++) {
878 // REprintf("i = %d, value = %s\n", i, (bibin->value[i]).data);
879 // }
880
881
882
883 for ( i=0; i<n; ++i ) {
884
885 tag = fields_tag( bibin, i, FIELDS_STRP_NOUSE );
886 // REprintf("\ntag = %s\n", tag->data);
887 if ( is_url_tag( tag ) ) continue; /* protect url from parsing */
888
889 /* Georgi: protecting names, otherwise havoc ensues if the input is
890 in a different encoding;
891 TODO: test side effects of doing this.
892 delay names from undergoing any parsing */
893 /* 2020-09-26: but names need parsing since there may be more than one!
894 Commenting out to process properly names fields
895
896 TODO: return to this and check again!
897 I commented this out because of encodings - do tests!
898 Amendment: run the nex two lines but only if tag is not names tag
899 (actually, moved them to the else part)
900 */
901 // if ( is_name_tag( tag ) ) return BIBL_OK;
902 // if ( !is_name_tag( tag ) ){
903 value = fields_value( bibin, i, FIELDS_STRP_NOUSE );
904 if ( str_is_empty( value ) ) continue;
905
906 // }
907 if ( is_name_tag( tag ) ) {
908 status = bibtexin_person( bibin, i, pm );
909 // REprintf("i = %d\n", i);
910 // REprintf("value = %s\n", (bibin->value[i]).data);
911
912 if ( status!=BIBL_OK ) goto out;
913
914 fstatus = intlist_add( &toremove, i );
915 if ( fstatus!=INTLIST_OK ) { status = BIBL_ERR_MEMERR; goto out; }
916 // REprintf("nout = %d\n" , fields_num( bibin ));
917 // goto out;
918 }
919
920 // else {
921 // // // REprintf("i = %d, value = %s\n", i, (bibin->value[i]).data);
922 // //
923 // value = fields_value( bibin, i, FIELDS_STRP_NOUSE );
924 // if ( str_is_empty( value ) ) continue;
925 // //
926 // // // Georgi: bibtex_cleanvalue() drops $, {, }, for now just skip it
927 // // // TODO: fix bibtex_cleanvalue() to not do that when not necessary
928 // // // // REprintf("i = %d, value = %s\n", i, value->data);
929 // // // status = bibtex_cleanvalue( value );
930 // // // // REprintf("i = %d, value = %s\n", i, (bibin->value[i]).data);
931 // // // if ( status!=BIBL_OK ) goto out;
932 // }
933
934 }
935
936
937 // int nout = fields_num( bibin );
938 // if(nout > n) {
939 // REprintf("nout = %d\n" , nout);
940 // for(i = 0; i < nout; i++) {
941 // REprintf("i = %d, value = %s\n", i, (bibin->value[i]).data);
942 // }
943 //
944 // }
945
946
947 for ( i=toremove.n-1; i>=0; i-- ) {
948 fstatus = fields_remove( bibin, intlist_get( &toremove, i ) );
949 if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
950 }
951
952
953 out:
954
955 intlist_free( &toremove );
956
957
958 // nout = fields_num( bibin );
959 // if(nout > n) {
960 // REprintf("nout = %d\n" , nout);
961 // for(i = 0; i < nout; i++) {
962 // REprintf("i = %d, value = %s\n", i, (bibin->value[i]).data);
963 // }
964 //
965 // }
966
967 return status;
968 }
969
970 static void
bibtexin_nocrossref(bibl * bin,long i,int n,param * p)971 bibtexin_nocrossref( bibl *bin, long i, int n, param *p )
972 {
973 int n1 = fields_find( bin->ref[i], "REFNUM", LEVEL_ANY );
974 if ( p->progname ) REprintf( "%s: ", p->progname );
975 REprintf( "Cannot find cross-reference '%s'", (char*) fields_value( bin->ref[i], n, FIELDS_CHRP_NOUSE ) );
976 if ( n1!=FIELDS_NOTFOUND ) REprintf( " for reference '%s'\n", (char*) fields_value( bin->ref[i], n1, FIELDS_CHRP_NOUSE ) );
977 REprintf( "\n" );
978 }
979
980 static int
bibtexin_crossref_oneref(fields * bibref,fields * bibcross)981 bibtexin_crossref_oneref( fields *bibref, fields *bibcross )
982 {
983 int i, n, newlevel, ntype, fstatus;
984 char *type, *newtag, *newvalue;
985
986 ntype = fields_find( bibref, "INTERNAL_TYPE", LEVEL_ANY );
987 type = ( char * ) fields_value( bibref, ntype, FIELDS_CHRP_NOUSE );
988
989 n = fields_num( bibcross );
990
991 for ( i=0; i<n; ++i ) {
992
993 newtag = ( char * ) fields_tag( bibcross, i, FIELDS_CHRP_NOUSE );
994 if ( !strcasecmp( newtag, "INTERNAL_TYPE" ) ) continue;
995 if ( !strcasecmp( newtag, "REFNUM" ) ) continue;
996 if ( !strcasecmp( newtag, "TITLE" ) ) {
997 if ( !strcasecmp( type, "Inproceedings" ) ||
998 !strcasecmp( type, "Incollection" ) )
999 newtag = "booktitle";
1000 }
1001
1002 newvalue = ( char * ) fields_value( bibcross, i, FIELDS_CHRP_NOUSE );
1003
1004 newlevel = fields_level( bibcross, i ) + 1;
1005
1006 fstatus = fields_add( bibref, newtag, newvalue, newlevel );
1007 if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
1008 }
1009
1010 return BIBL_OK;
1011 }
1012
1013 static int
bibtexin_crossref(bibl * bin,param * p)1014 bibtexin_crossref( bibl *bin, param *p )
1015 {
1016 int i, n, ncross, status = BIBL_OK;
1017 fields *bibref, *bibcross;
1018
1019 for ( i=0; i<bin->n; ++i ) {
1020 bibref = bin->ref[i];
1021 n = fields_find( bibref, "CROSSREF", LEVEL_ANY );
1022 if ( n==FIELDS_NOTFOUND ) continue;
1023 fields_set_used( bibref, n );
1024 ncross = bibl_findref( bin, (char*) fields_value( bibref, n, FIELDS_CHRP_NOUSE ) );
1025 if ( ncross==-1 ) {
1026 bibtexin_nocrossref( bin, i, n, p );
1027 continue;
1028 }
1029 bibcross = bin->ref[ncross];
1030 status = bibtexin_crossref_oneref( bibref, bibcross );
1031 if ( status!=BIBL_OK ) goto out;
1032 }
1033 out:
1034 return status;
1035 }
1036
1037 static int
bibtexin_cleanf(bibl * bin,param * p)1038 bibtexin_cleanf( bibl *bin, param *p )
1039 {
1040 int status;
1041 long i;
1042
1043 for ( i=0; i<bin->n; ++i ) {
1044 status = bibtexin_cleanref( bin->ref[i], p );
1045 if ( status!=BIBL_OK ) return status;
1046 }
1047 status = bibtexin_crossref( bin, p );
1048 return status;
1049 }
1050
1051 /*****************************************************
1052 PUBLIC: int bibtexin_typef()
1053 *****************************************************/
1054
1055 static int
bibtexin_typef(fields * bibin,const char * filename,int nrefs,param * p)1056 bibtexin_typef( fields *bibin, const char *filename, int nrefs, param *p )
1057 {
1058 int ntypename, nrefname, is_default;
1059 char *refname = "", *typename = "";
1060
1061 ntypename = fields_find( bibin, "INTERNAL_TYPE", LEVEL_MAIN );
1062 nrefname = fields_find( bibin, "REFNUM", LEVEL_MAIN );
1063 if ( nrefname!=FIELDS_NOTFOUND ) refname = fields_value( bibin, nrefname, FIELDS_CHRP_NOUSE );
1064 if ( ntypename!=FIELDS_NOTFOUND ) typename = fields_value( bibin, ntypename, FIELDS_CHRP_NOUSE );
1065
1066 return get_reftype( typename, nrefs, p->progname, p->all, p->nall, refname, &is_default, REFTYPE_CHATTY );
1067 }
1068
1069 /*****************************************************
1070 PUBLIC: int bibtexin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR
1071 *****************************************************/
1072
1073 /**** bibtexin_btorg ****/
1074
1075 /*
1076 * BibTeX uses 'organization' in lieu of publisher if that field is missing.
1077 * Otherwise output as
1078 * <name type="corporate">
1079 * <namePart>The organization</namePart>
1080 * <role>
1081 * <roleTerm authority="marcrelator" type="text">organizer of meeting</roleTerm>
1082 * </role>
1083 * </name>
1084 */
1085
1086 static int
bibtexin_btorg(fields * bibin,int m,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)1087 bibtexin_btorg( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
1088 {
1089 int n, fstatus;
1090 n = fields_find( bibin, "publisher", LEVEL_ANY );
1091 if ( n==FIELDS_NOTFOUND )
1092 fstatus = fields_add( bibout, "PUBLISHER", str_cstr( invalue ), level );
1093 else
1094 fstatus = fields_add( bibout, "ORGANIZER:CORP", str_cstr( invalue ), level );
1095 if ( fstatus==FIELDS_OK ) return BIBL_OK;
1096 else return BIBL_ERR_MEMERR;
1097 }
1098
1099 /**** bibtexin_btsente() ****/
1100
1101 /*
1102 * sentelink = {file://localhost/full/path/to/file.pdf,Sente,PDF}
1103 *
1104 * Sente is an academic reference manager for MacOSX and Apple iPad.
1105 */
1106
1107 static int
bibtexin_btsente(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)1108 bibtexin_btsente( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
1109 {
1110 int fstatus, status = BIBL_OK;
1111 str link;
1112
1113 str_init( &link );
1114 str_cpytodelim( &link, skip_ws( invalue->data ), ",", 0 );
1115 str_trimendingws( &link );
1116 if ( str_memerr( &link ) ) status = BIBL_ERR_MEMERR;
1117
1118 if ( status==BIBL_OK && link.len ) {
1119 fstatus = fields_add( bibout, "FILEATTACH", str_cstr( &link ), level );
1120 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1121 }
1122
1123 str_free( &link );
1124 return status;
1125 }
1126
1127 /**** bibtexin_linkedfile() ****/
1128
1129 static int
count_colons(char * p)1130 count_colons( char *p )
1131 {
1132 int n = 0;
1133 while ( *p ) {
1134 if ( *p==':' ) n++;
1135 p++;
1136 }
1137 return n;
1138 }
1139
1140 static int
first_colon(char * p)1141 first_colon( char *p )
1142 {
1143 int n = 0;
1144 while ( p[n] && p[n]!=':' ) n++;
1145 return n;
1146 }
1147
1148 static int
last_colon(char * p)1149 last_colon( char *p )
1150 {
1151 int n = strlen( p ) - 1;
1152 while ( n>0 && p[n]!=':' ) n--;
1153 return n;
1154 }
1155
1156 /*
1157 * file={Description:/full/path/to/file.pdf:PDF}
1158 */
1159 static int
bibtexin_linkedfile(fields * bibin,int m,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)1160 bibtexin_linkedfile( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
1161 {
1162 int fstatus, status = BIBL_OK;
1163 char *p = str_cstr( invalue );
1164 int i, n, n1, n2;
1165 str link;
1166
1167 n = count_colons( p );
1168 if ( n > 1 ) {
1169 /* A DOS file can contain a colon ":C:/....pdf:PDF" */
1170 /* Extract after 1st and up to last colons */
1171 n1 = first_colon( p ) + 1;
1172 n2 = last_colon( p );
1173 str_init( &link );
1174 for ( i=n1; i<n2; ++i ) {
1175 str_addchar( &link, p[i] );
1176 }
1177 str_trimstartingws( &link );
1178 str_trimendingws( &link );
1179 if ( str_memerr( &link ) ) {
1180 status = BIBL_ERR_MEMERR;
1181 goto out;
1182 }
1183 if ( link.len ) {
1184 fstatus = fields_add( bibout, "FILEATTACH", str_cstr( &link ), level );
1185 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1186 }
1187 out:
1188 str_free( &link );
1189 } else {
1190 /* This field isn't formatted properly, so just copy directly */
1191 fstatus = fields_add( bibout, "FILEATTACH", p, level );
1192 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1193 }
1194 return status;
1195
1196 }
1197
1198 /**** bibtexin_howpublished() ****/
1199
1200 /* howpublished={},
1201 *
1202 * Normally indicates the manner in which something was
1203 * published in lieu of a formal publisher, so typically
1204 * 'howpublished' and 'publisher' will never be in the
1205 * same reference.
1206 *
1207 * Occassionally, people put Diploma thesis information
1208 * into the field, so check that first.
1209 *
1210 * Returns BIBL_OK or BIBL_ERR_MEMERR
1211 */
1212
1213 static int
bibtexin_howpublished(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)1214 bibtexin_howpublished( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
1215 {
1216 int fstatus, status = BIBL_OK;
1217 if ( !strncasecmp( str_cstr( invalue ), "Diplom", 6 ) ) {
1218 fstatus = fields_replace_or_add( bibout, "GENRE:BIBUTILS", "Diploma thesis", level );
1219 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1220 }
1221 else if ( !strncasecmp( str_cstr( invalue ), "HSabilitation", 13 ) ) {
1222 fstatus = fields_replace_or_add( bibout, "GENRE:BIBUTILS", "Habilitation thesis", level );
1223 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1224 }
1225 else if ( !strncasecmp( str_cstr( invalue ), "Licentiate", 10 ) ) {
1226 fstatus = fields_replace_or_add( bibout, "GENRE:BIBUTILS", "Licentiate thesis", level );
1227 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1228 }
1229 else if ( is_embedded_link( str_cstr( invalue ) ) ) {
1230 status = urls_split_and_add( str_cstr( invalue ), bibout, level );
1231 }
1232 else {
1233 fstatus = fields_add( bibout, "PUBLISHER", str_cstr( invalue ), level );
1234 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1235 }
1236 return status;
1237 }
1238
1239 /**** bibtexin_eprint() ****/
1240
1241 /* Try to capture situations like
1242 *
1243 * eprint="1605.02026",
1244 * archivePrefix="arXiv",
1245 *
1246 * or
1247 *
1248 * eprint="13211131",
1249 * eprinttype="medline",
1250 *
1251 * If we don't know anything, concatenate archivePrefix:eprint
1252 * and push into URL. (Could be wrong)
1253 *
1254 * If no info, just push eprint into URL. (Could be wrong)
1255 */
1256 static int
process_eprint_with_prefix(fields * bibout,char * prefix,str * value,int level)1257 process_eprint_with_prefix( fields *bibout, char *prefix, str *value, int level )
1258 {
1259 int fstatus, status = BIBL_OK;
1260 str merge;
1261
1262 if ( !strcmp( prefix, "arXiv" ) ) {
1263 fstatus = fields_add( bibout, "ARXIV", str_cstr( value ), level );
1264 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1265 }
1266
1267 else if ( !strcmp( prefix, "jstor" ) ) {
1268 fstatus = fields_add( bibout, "JSTOR", str_cstr( value ), level );
1269 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1270 }
1271
1272 else if ( !strcmp( prefix, "medline" ) ) {
1273 fstatus = fields_add( bibout, "MEDLINE", str_cstr( value ), level );
1274 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1275 }
1276
1277 else if ( !strcmp( prefix, "pubmed" ) ) {
1278 fstatus = fields_add( bibout, "PMID", str_cstr( value ), level );
1279 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1280 }
1281
1282 /* ...if this is unknown prefix, merge prefix & eprint */
1283 else {
1284 str_init( &merge );
1285 str_mergestrs( &merge, prefix, ":", str_cstr( value ), NULL );
1286 fstatus = fields_add( bibout, "URL", str_cstr( &merge ), level );
1287 if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR;
1288 str_free( &merge );
1289 }
1290
1291 return status;
1292 }
1293 static int
process_eprint_without_prefix(fields * bibout,str * value,int level)1294 process_eprint_without_prefix( fields *bibout, str *value, int level )
1295 {
1296 int fstatus;
1297
1298 /* ...no archivePrefix, need to handle just 'eprint' tag */
1299 fstatus = fields_add( bibout, "URL", str_cstr( value ), level );
1300
1301 if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR;
1302 else return BIBL_OK;
1303 }
1304
1305 static int
bibtexin_eprint(fields * bibin,int m,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)1306 bibtexin_eprint( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
1307 {
1308 char *prefix;
1309 int n;
1310
1311 /* ...do we have an archivePrefix too? */
1312 n = fields_find( bibin, "ARCHIVEPREFIX", level );
1313 if ( n==FIELDS_NOTFOUND ) n = fields_find( bibin, "EPRINTTYPE", level );
1314 if ( n!=FIELDS_NOTFOUND ) {
1315 prefix = fields_value( bibin, n, FIELDS_CHRP );
1316 return process_eprint_with_prefix( bibout, prefix, invalue, level );
1317 }
1318
1319 /* ...no we don't */
1320 return process_eprint_without_prefix( bibout, invalue, level );
1321 }
1322
1323 /**** bibtexin_keyword() ****/
1324
1325 /* Split keywords="" with semicolons.
1326 * Commas are also frequently used, but will break
1327 * entries like:
1328 * keywords="Microscopy, Confocal"
1329 * Returns BIBL_OK or BIBL_ERR_MEMERR
1330 */
1331
1332 static int
bibtexin_keyword(fields * bibin,int m,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)1333 bibtexin_keyword( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
1334 {
1335 int fstatus, status = BIBL_OK;
1336 const char *p;
1337 str keyword;
1338
1339 p = str_cstr( invalue );
1340 str_init( &keyword );
1341
1342 while ( *p ) {
1343 p = str_cpytodelim( &keyword, skip_ws( p ), ";", 1 );
1344 str_trimendingws( &keyword );
1345 if ( str_memerr( &keyword ) ) {
1346 status = BIBL_ERR_MEMERR;
1347 goto out;
1348 }
1349 if ( keyword.len ) {
1350 fstatus = fields_add( bibout, "KEYWORD", str_cstr( &keyword ), level );
1351 if ( fstatus!=FIELDS_OK ) {
1352 status = BIBL_ERR_MEMERR;
1353 goto out;
1354 }
1355 }
1356 }
1357 out:
1358 str_free( &keyword );
1359 return status;
1360 }
1361
1362 /**** bibtexin_title() ****/
1363
1364 /* bibtexin_titleinbook_isbooktitle()
1365 *
1366 * Normally, the title field of inbook refers to the book. The
1367 * section in a @inbook reference is untitled. If it's titled,
1368 * the @incollection should be used. For example, in:
1369 *
1370 * @inbook{
1371 * title="xxx"
1372 * }
1373 *
1374 * the booktitle is "xxx".
1375 *
1376 * However, @inbook is frequently abused (and treated like
1377 * @incollection) so that title and booktitle are present
1378 * and title is now 'supposed' to refer to the section. For example:
1379 *
1380 * @inbook{
1381 * title="yyy",
1382 * booktitle="xxx"
1383 * }
1384 *
1385 * Therefore report whether or not booktitle is present as well
1386 * as title in @inbook references. If not, then make 'title'
1387 * correspond to the title of the book, not the section.
1388 *
1389 */
1390 static int
bibtexin_titleinbook_isbooktitle(fields * bibin,char * intag)1391 bibtexin_titleinbook_isbooktitle( fields *bibin, char *intag )
1392 {
1393 int n;
1394
1395 /* ...look only at 'title="xxx"' elements */
1396 if ( strcasecmp( intag, "TITLE" ) ) return 0;
1397
1398 /* ...look only at '@inbook' references */
1399 n = fields_find( bibin, "INTERNAL_TYPE", LEVEL_ANY );
1400 if ( n==FIELDS_NOTFOUND ) return 0;
1401 if ( strcasecmp( fields_value( bibin, n, FIELDS_CHRP ), "INBOOK" ) ) return 0;
1402
1403 /* ...look to see if 'booktitle="yyy"' exists */
1404 n = fields_find( bibin, "BOOKTITLE", LEVEL_ANY );
1405 if ( n==FIELDS_NOTFOUND ) return 0;
1406 else return 1;
1407 }
1408
1409 static int
bibtexin_title(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)1410 bibtexin_title( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
1411 {
1412 int ok;
1413
1414 if ( bibtexin_titleinbook_isbooktitle( bibin, intag->data ) ) level=LEVEL_MAIN;
1415 ok = title_process( bibout, "TITLE", invalue->data, level, pm->nosplittitle );
1416 if ( ok ) return BIBL_OK;
1417 else return BIBL_ERR_MEMERR;
1418 }
1419
1420 static void
bibtexin_notag(param * p,char * tag)1421 bibtexin_notag( param *p, char *tag )
1422 {
1423 if ( p->verbose && strcmp( tag, "INTERNAL_TYPE" ) ) {
1424 if ( p->progname ) REprintf( "%s: ", p->progname );
1425 REprintf( "Cannot find tag '%s'\n", tag );
1426 }
1427 }
1428
1429 static int
bibtexin_convertf(fields * bibin,fields * bibout,int reftype,param * p)1430 bibtexin_convertf( fields *bibin, fields *bibout, int reftype, param *p )
1431 {
1432 static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = {
1433 // [ 0 ... NUM_REFTYPES-1 ] = generic_null,
1434 // [ SIMPLE ] = generic_simple,
1435 // [ TITLE ] = bibtexin_title,
1436 // [ PERSON ] = generic_simple,
1437 // [ PAGES ] = generic_pages,
1438 // [ KEYWORD ] = bibtexin_keyword,
1439 // [ EPRINT ] = bibtexin_eprint,
1440 // [ HOWPUBLISHED ] = bibtexin_howpublished,
1441 // [ LINKEDFILE ] = bibtexin_linkedfile,
1442 // [ NOTES ] = generic_notes,
1443 // [ GENRE ] = generic_genre,
1444 // [ BT_SENTE ] = bibtexin_btsente,
1445 // [ BT_ORG ] = bibtexin_btorg,
1446 // [ URL ] = generic_url
1447
1448 [ ALWAYS ] = generic_null, // (0)
1449 [ DEFAULT ] = generic_null, // (1)
1450 [ SKIP ] = generic_null, // (2)
1451 [ SIMPLE ] = generic_simple, // (3)
1452 [ TYPE ] = generic_null, // (4)
1453 [ PERSON ] = generic_simple, // (5)
1454 [ DATE ] = generic_null, // (6)
1455 [ PAGES ] = generic_pages, // (7)
1456 [ SERIALNO ] = generic_null, // (8)
1457 [ TITLE ] = bibtexin_title, // (9)
1458 [ NOTES ] = generic_notes, // (10)
1459 [ DOI ] = generic_null, // (11)
1460 [ HOWPUBLISHED ] = bibtexin_howpublished, // (12)
1461 [ LINKEDFILE ] = bibtexin_linkedfile, // (13)
1462 [ KEYWORD ] = bibtexin_keyword, // (14)
1463 [ URL ] = generic_url, // (15)
1464 [ GENRE ] = generic_genre, // (16)
1465 [ BT_SENTE ] = bibtexin_btsente, // (17) /* Bibtex 'Sente' */
1466 [ BT_EPRINT ] = generic_null, // (18) /* Bibtex 'Eprint' */
1467 [ BT_ORG ] = bibtexin_btorg, // (19) /* Bibtex Organization */
1468 [ BLT_THESIS_TYPE ] = generic_null, // (20) /* Biblatex Thesis Type */
1469 [ BLT_SCHOOL ] = generic_null, // (21) /* Biblatex School */
1470 [ BLT_EDITOR ] = generic_null, // (22) /* Biblatex Editor */
1471 [ BLT_SUBTYPE ] = generic_null, // (23) /* Biblatex entrysubtype */
1472 [ BLT_SKIP ] = generic_skip, // (24) /* Biblatex Skip Entry */
1473 [ EPRINT ] = bibtexin_eprint // (25)
1474 };
1475
1476 int process, level, i, nfields, status = BIBL_OK;
1477 str *intag, *invalue;
1478 char *outtag;
1479
1480 nfields = fields_num( bibin );
1481 for ( i=0; i<nfields; ++i ) {
1482
1483 if ( fields_used( bibin, i ) ) continue; /* e.g. successful crossref */
1484 if ( fields_no_tag( bibin, i ) ) continue;
1485 if ( fields_no_value( bibin, i ) ) continue;
1486
1487 intag = fields_tag( bibin, i, FIELDS_STRP );
1488 invalue = fields_value( bibin, i, FIELDS_STRP );
1489
1490 if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) {
1491 bibtexin_notag( p, str_cstr( intag ) );
1492 continue;
1493 }
1494
1495 status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout );
1496 if ( status!=BIBL_OK ) return status;
1497 }
1498
1499 if ( status==BIBL_OK && p->verbose ) fields_report_stderr( bibout );
1500
1501 return status;
1502 }
1503