1 /*
2 * bibcore.c
3 *
4 * Copyright (c) Chris Putnam 2005-2021
5 *
6 * Source code released under the GPL version 2
7 *
8 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include "bibutils.h"
12
13 /* internal includes */
14 #include "reftypes.h"
15 #include "charsets.h"
16 #include "str_conv.h"
17 #include "is_ws.h"
18
19 /* illegal modes to pass in, but use internally for consistency */
20 #define BIBL_INTERNALIN (BIBL_LASTIN+1)
21 #define BIBL_INTERNALOUT (BIBL_LASTOUT+1)
22
23 #define debug_set( p ) ( (p)->verbose > 1 )
24 #define verbose_set( p ) ( (p)->verbose )
25
26 static void
report_params(FILE * fp,const char * f,param * p)27 report_params( FILE *fp, const char *f, param *p )
28 {
29 fflush( NULL );
30
31 fprintf( fp, "-------------------params start for %s\n", f );
32 fprintf( fp, "\tprogname='%s'\n\n", p->progname );
33
34 fprintf( fp, "\treadformat=%d", p->readformat );
35 switch ( p->readformat ) {
36 case BIBL_INTERNALIN: fprintf( fp, " (BIBL_INTERNALIN)\n" ); break;
37 case BIBL_MODSIN: fprintf( fp, " (BIBL_MODSIN)\n" ); break;
38 case BIBL_BIBTEXIN: fprintf( fp, " (BIBL_BIBTEXIN)\n" ); break;
39 case BIBL_RISIN: fprintf( fp, " (BIBL_RISIN)\n" ); break;
40 case BIBL_ENDNOTEIN: fprintf( fp, " (BIBL_ENDNOTEIN)\n" ); break;
41 case BIBL_COPACIN: fprintf( fp, " (BIBL_COPACIN)\n" ); break;
42 case BIBL_ISIIN: fprintf( fp, " (BIBL_ISIIN)\n" ); break;
43 case BIBL_MEDLINEIN: fprintf( fp, " (BIBL_MEDLINEIN)\n" ); break;
44 case BIBL_ENDNOTEXMLIN: fprintf( fp, " (BIBL_ENDNOTEXMLIN)\n" ); break;
45 case BIBL_BIBLATEXIN: fprintf( fp, " (BIBL_BIBLATEXIN)\n" ); break;
46 case BIBL_EBIIN: fprintf( fp, " (BIBL_EBIIN)\n" ); break;
47 case BIBL_WORDIN: fprintf( fp, " (BIBL_WORDIN)\n" ); break;
48 case BIBL_NBIBIN: fprintf( fp, " (BIBL_NBIBIN)\n" ); break;
49 default: fprintf( fp, " (Illegal value)\n" ); break;
50 }
51 fprintf( fp, "\tcharsetin=%d\n", p->charsetin );
52 fprintf( fp, "\tcharsetin_src=%d", p->charsetin_src );
53 switch ( p->charsetin_src ) {
54 case BIBL_SRC_DEFAULT: fprintf( fp, " (BIBL_SRC_DEFAULT)\n" ); break;
55 case BIBL_SRC_FILE: fprintf( fp, " (BIBL_SRC_FILE)\n" ); break;
56 case BIBL_SRC_USER: fprintf( fp, " (BIBL_SRC_USER)\n" ); break;
57 default: fprintf( fp, " (Illegal value)\n" ); break;
58 }
59 fprintf( fp, "\tutf8in=%d\n", p->utf8in );
60 fprintf( fp, "\tlatexin=%d\n", p->latexin );
61 fprintf( fp, "\txmlin=%d\n\n", p->xmlin );
62
63 fprintf( fp, "\twriteformat=%d", p->writeformat );
64 switch ( p->writeformat ) {
65 case BIBL_INTERNALOUT: fprintf( fp, " (BIBL_INTERNALOUT)\n" ); break;
66 case BIBL_ADSABSOUT: fprintf( fp, " (BIBL_ADSABSOUT)\n" ); break;
67 case BIBL_BIBTEXOUT: fprintf( fp, " (BIBL_BIBTEXOUT)\n" ); break;
68 case BIBL_ENDNOTEOUT: fprintf( fp, " (BIBL_ENDNOTEOUT)\n" ); break;
69 case BIBL_ISIOUT: fprintf( fp, " (BIBL_ISIOUT)\n" ); break;
70 case BIBL_MODSOUT: fprintf( fp, " (BIBL_MODSOUT)\n" ); break;
71 case BIBL_NBIBOUT: fprintf( fp, " (BIBL_NBIBOUT)\n" ); break;
72 case BIBL_RISOUT: fprintf( fp, " (BIBL_RISOUT)\n" ); break;
73 case BIBL_WORD2007OUT: fprintf( fp, " (BIBL_WORD2007OUT)\n" ); break;
74 default: fprintf( fp, " (Illegal value)\n"); break;
75 }
76 fprintf( fp, "\tcharsetout=%d\n", p->charsetout );
77 fprintf( fp, "\tcharsetout_src=%d", p->charsetout_src );
78 switch ( p->charsetout_src ) {
79 case BIBL_SRC_DEFAULT: fprintf( fp, " (BIBL_SRC_DEFAULT)\n" ); break;
80 case BIBL_SRC_FILE: fprintf( fp, " (BIBL_SRC_FILE)\n" ); break;
81 case BIBL_SRC_USER: fprintf( fp, " (BIBL_SRC_USER)\n" ); break;
82 default: fprintf( fp, " (Illegal value)\n" ); break;
83 }
84 fprintf( fp, "\tutf8out=%d\n", p->utf8out );
85 fprintf( fp, "\tutf8bom=%d\n", p->utf8bom );
86 fprintf( fp, "\tlatexout=%d\n", p->latexout );
87 fprintf( fp, "\txmlout=%d\n", p->xmlout );
88 fprintf( fp, "-------------------params end for %s\n", f );
89
90 fflush( fp );
91 }
92
93 /* bibl_duplicateparams()
94 *
95 * Returns status of BIBL_OK or BIBL_ERR_MEMERR
96 */
97 static int
bibl_duplicateparams(param * np,param * op)98 bibl_duplicateparams( param *np, param *op )
99 {
100 int status;
101
102 slist_init( &(np->asis) );
103 status = slist_copy( &(np->asis), &(op->asis ) );
104 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
105
106 slist_init( &(np->corps) );
107 status = slist_copy( &(np->corps), &(op->corps ) );
108 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
109
110 if ( !op->progname ) np->progname = NULL;
111 else {
112 np->progname = strdup( op->progname );
113 if ( !np->progname ) return BIBL_ERR_MEMERR;
114 }
115
116 np->readformat = op->readformat;
117 np->charsetin = op->charsetin;
118 np->charsetin_src = op->charsetin_src;
119 np->utf8in = op->utf8in;
120 np->latexin = op->latexin;
121 np->xmlin = op->xmlin;
122
123 np->writeformat = op->writeformat;
124 np->charsetout = op->charsetout;
125 np->charsetout_src = op->charsetout_src;
126 np->utf8out = op->utf8out;
127 np->utf8bom = op->utf8bom;
128 np->latexout = op->latexout;
129 np->xmlout = op->xmlout;
130 np->nosplittitle = op->nosplittitle;
131
132 np->verbose = op->verbose;
133 np->format_opts = op->format_opts;
134 np->addcount = op->addcount;
135 np->output_raw = op->output_raw;
136 np->singlerefperfile = op->singlerefperfile;
137
138 np->readf = op->readf;
139 np->processf = op->processf;
140 np->cleanf = op->cleanf;
141 np->typef = op->typef;
142 np->convertf = op->convertf;
143 np->headerf = op->headerf;
144 np->footerf = op->footerf;
145 np->assemblef = op->assemblef;
146 np->writef = op->writef;
147
148 np->all = op->all;
149 np->nall = op->nall;
150
151 return BIBL_OK;
152 }
153
154 /* bibl_setreadparams()
155 *
156 * Returns status of BIBL_OK or BIBL_ERR_MEMERR
157 */
158 static int
bibl_setreadparams(param * np,param * op)159 bibl_setreadparams( param *np, param *op )
160 {
161 int status;
162 status = bibl_duplicateparams( np, op );
163 if ( status == BIBL_OK ) {
164 np->utf8out = 1;
165 np->charsetout = BIBL_CHARSET_UNICODE;
166 np->charsetout_src = BIBL_SRC_DEFAULT;
167 np->xmlout = BIBL_XMLOUT_FALSE;
168 np->latexout = 0;
169 np->writeformat = BIBL_INTERNALOUT;
170 }
171 return status;
172 }
173
174 /* bibl_setwriteparams()
175 *
176 * Returns status of BIBL_OK or BIBL_ERR_MEMERR
177 */
178 static int
bibl_setwriteparams(param * np,param * op)179 bibl_setwriteparams( param *np, param *op )
180 {
181 int status;
182 status = bibl_duplicateparams( np, op );
183 if ( status == BIBL_OK ) {
184 np->xmlin = 0;
185 np->latexin = 0;
186 np->utf8in = 1;
187 np->charsetin = BIBL_CHARSET_UNICODE;
188 np->charsetin_src = BIBL_SRC_DEFAULT;
189 np->readformat = BIBL_INTERNALIN;
190 }
191 return status;
192 }
193
194 void
bibl_freeparams(param * p)195 bibl_freeparams( param *p )
196 {
197 if ( p ) {
198 slist_free( &(p->asis) );
199 slist_free( &(p->corps) );
200 if ( p->progname ) free( p->progname );
201 }
202 }
203
204 int
bibl_readasis(param * p,char * f)205 bibl_readasis( param *p, char *f )
206 {
207 int status;
208
209 if ( !p ) return BIBL_ERR_BADINPUT;
210 if ( !f ) return BIBL_ERR_BADINPUT;
211
212 status = slist_fill( &(p->asis), f, 1 );
213
214 if ( status == SLIST_ERR_CANTOPEN ) return BIBL_ERR_CANTOPEN;
215 else if ( status == SLIST_ERR_MEMERR ) return BIBL_ERR_MEMERR;
216 return BIBL_OK;
217 }
218
219 int
bibl_readcorps(param * p,char * f)220 bibl_readcorps( param *p, char *f )
221 {
222 int status;
223
224 if ( !p ) return BIBL_ERR_BADINPUT;
225 if ( !f ) return BIBL_ERR_BADINPUT;
226
227 status = slist_fill( &(p->corps), f, 1 );
228
229 if ( status == SLIST_ERR_CANTOPEN ) return BIBL_ERR_CANTOPEN;
230 else if ( status == 0 ) return BIBL_ERR_MEMERR;
231 return BIBL_OK;
232 }
233
234 /* bibl_addtoasis()
235 *
236 * Returns BIBL_OK or BIBL_ERR_MEMERR
237 */
238 int
bibl_addtoasis(param * p,char * d)239 bibl_addtoasis( param *p, char *d )
240 {
241 int status;
242
243 if ( !p ) return BIBL_ERR_BADINPUT;
244 if ( !d ) return BIBL_ERR_BADINPUT;
245
246 status = slist_addc( &(p->asis), d );
247
248 return ( status==SLIST_OK )? BIBL_OK : BIBL_ERR_MEMERR;
249 }
250
251 /* bibl_addtocorps()
252 *
253 * Returns BIBL_OK or BIBL_ERR_MEMERR
254 */
255 int
bibl_addtocorps(param * p,char * d)256 bibl_addtocorps( param *p, char *d )
257 {
258 int status;
259
260 if ( !p ) return BIBL_ERR_BADINPUT;
261 if ( !d ) return BIBL_ERR_BADINPUT;
262
263 status = slist_addc( &(p->corps), d );
264
265 return ( status==SLIST_OK )? BIBL_OK : BIBL_ERR_MEMERR;
266 }
267
268 void
bibl_reporterr(int err)269 bibl_reporterr( int err )
270 {
271 fprintf( stderr, "Bibutils: " );
272 switch( err ) {
273 case BIBL_OK:
274 fprintf( stderr, "No error." ); break;
275 case BIBL_ERR_BADINPUT:
276 fprintf( stderr, "Bad input." ); break;
277 case BIBL_ERR_MEMERR:
278 fprintf( stderr, "Memory error." ); break;
279 case BIBL_ERR_CANTOPEN:
280 fprintf( stderr, "Can't open." ); break;
281 default:
282 fprintf( stderr, "Cannot identify error code %d.", err ); break;
283 }
284 fprintf( stderr, "\n" );
285 }
286
287 static int
bibl_illegalinmode(int mode)288 bibl_illegalinmode( int mode )
289 {
290 if ( mode < BIBL_FIRSTIN || mode > BIBL_LASTIN ) return 1;
291 else return 0;
292 }
293
294 static int
bibl_illegaloutmode(int mode)295 bibl_illegaloutmode( int mode )
296 {
297 if ( mode < BIBL_FIRSTOUT || mode > BIBL_LASTOUT ) return 1;
298 else return 0;
299 }
300
301 static void
bibl_verbose_reference(fields * f,char * filename,long refnum)302 bibl_verbose_reference( fields *f, char *filename, long refnum )
303 {
304 int i, n;
305 n = fields_num( f );
306 fprintf( stderr, "======== %s %ld : converted\n", filename, refnum );
307 for ( i=0; i<n; ++i ) {
308 fprintf( stderr, "'%s'='%s' level=%d\n",
309 (char*) fields_tag( f, i, FIELDS_CHRP_NOUSE ),
310 (char*) fields_value( f, i, FIELDS_CHRP_NOUSE ),
311 fields_level( f, i ) );
312 }
313 fprintf( stderr, "\n" );
314 }
315
316 static void
bibl_verbose(bibl * bin,const char * msg1,const char * msg2)317 bibl_verbose( bibl *bin, const char *msg1, const char *msg2 )
318 {
319 long i;
320 fflush( stdout );
321 fprintf( stderr, "-------------------%s begin %s\n", msg1, msg2);
322 for ( i=0; i<bin->n; ++i )
323 bibl_verbose_reference( bin->ref[i], "", i+1 );
324 fprintf( stderr, "-------------------%s end %s\n", msg1, msg2);
325 fflush( stderr );
326 }
327
328
329 /* extract_tag_value
330 *
331 * Extract the tag and the value for ALWAYS/DEFAULT
332 * entries like: "GENRE:BIBUTILS|Masters thesis"
333 *
334 * tag = "GENRE:BIBUTILS"
335 * value = "Masters thesis"
336 */
337 static int
extract_tag_value(str * tag,str * value,char * p)338 extract_tag_value( str *tag, str *value, char *p )
339 {
340 str_empty( tag );
341 while ( p && *p && *p!='|' ) {
342 str_addchar( tag, *p );
343 p++;
344 }
345 if ( str_memerr( tag ) ) return BIBL_ERR_MEMERR;
346
347 if ( p && *p=='|' ) p++;
348
349 str_empty( value );
350 while ( p && *p ) {
351 str_addchar( value, *p );
352 p++;
353 }
354 if ( str_memerr( tag ) ) return BIBL_ERR_MEMERR;
355
356 return BIBL_OK;
357 }
358
359 /* process_defaultadd()
360 *
361 * Add tag/value pairs that have "DEFAULT" processing
362 * unless a tag/value pair with the same tag has already
363 * been adding during reference processing.
364 */
365 static int
process_defaultadd(fields * f,int reftype,param * r)366 process_defaultadd( fields *f, int reftype, param *r )
367 {
368 int i, n, process, level, status, ret = BIBL_OK;
369 str tag, value;
370 char *p;
371
372 strs_init( &tag, &value, NULL );
373
374 for ( i=0; i<r->all[reftype].ntags; ++i ) {
375
376 process = ((r->all[reftype]).tags[i]).processingtype;
377 if ( process!=DEFAULT ) continue;
378
379 level = ((r->all[reftype]).tags[i]).level;
380 p = ((r->all[reftype]).tags[i]).newstr;
381
382 status = extract_tag_value( &tag, &value, p );
383 if ( status!=BIBL_OK ) {
384 ret = status;
385 goto out;
386 }
387
388 n = fields_find( f, tag.data, level );
389 if ( n==FIELDS_NOTFOUND ) {
390 status = fields_add( f, tag.data, value.data, level );
391 if ( status!=FIELDS_OK ) {
392 ret = BIBL_ERR_MEMERR;
393 goto out;
394 }
395 }
396
397 }
398 out:
399 strs_free( &tag, &value, NULL );
400
401 return ret;
402 }
403
404 /* process_alwaysadd()
405 *
406 * Add tag/value pair to reference from the ALWAYS
407 * processing type without exception (the difference from
408 * DEFAULT processing).
409 */
410 static int
process_alwaysadd(fields * f,int reftype,param * r)411 process_alwaysadd( fields *f, int reftype, param *r )
412 {
413 int i, process, level, status, ret = BIBL_OK;
414 str tag, value;
415 char *p;
416
417 strs_init( &tag, &value, NULL );
418
419 for ( i=0; i<r->all[reftype].ntags; ++i ) {
420
421 process = ((r->all[reftype]).tags[i]).processingtype;
422 if ( process!=ALWAYS ) continue;
423
424 level = ((r->all[reftype]).tags[i]).level;
425 p = ((r->all[reftype]).tags[i]).newstr;
426
427 status = extract_tag_value( &tag, &value, p );
428 if ( status!=BIBL_OK ) {
429 ret = status;
430 goto out;
431 }
432
433 status = fields_add( f, tag.data, value.data, level );
434 if ( status!=FIELDS_OK ) {
435 ret = BIBL_ERR_MEMERR;
436 goto out;
437 }
438 }
439
440 out:
441 strs_free( &tag, &value, NULL );
442
443 return ret;
444 }
445
446 static int
read_refs(FILE * fp,bibl * bin,char * filename,param * p)447 read_refs( FILE *fp, bibl *bin, char *filename, param *p )
448 {
449 int refnum = 0, bufpos = 0, ret=BIBL_OK, fcharset;/* = CHARSET_UNKNOWN;*/
450 str reference, line;
451 char buf[256]="";
452 fields *ref;
453
454 str_init( &reference );
455 str_init( &line );
456 while ( p->readf( fp, buf, sizeof(buf), &bufpos, &line, &reference, &fcharset ) ) {
457 if ( reference.len==0 ) continue;
458 ref = fields_new();
459 if ( !ref ) {
460 ret = BIBL_ERR_MEMERR;
461 bibl_free( bin );
462 goto out;
463 }
464 if ( p->processf( ref, reference.data, filename, refnum+1, p )){
465 ret = bibl_addref( bin, ref );
466 if ( ret!=BIBL_OK ) {
467 bibl_free( bin );
468 fields_delete( ref );
469 goto out;
470 }
471 refnum += 1;
472 } else {
473 fields_delete( ref );
474 }
475 str_empty( &reference );
476 if ( fcharset!=CHARSET_UNKNOWN ) {
477 /* charset from file takes priority over default, but
478 * not user-specified */
479 if ( p->charsetin_src!=BIBL_SRC_USER ) {
480 p->charsetin_src = BIBL_SRC_FILE;
481 p->charsetin = fcharset;
482 if ( fcharset!=CHARSET_UNICODE ) p->utf8in = 0;
483 }
484 }
485 }
486 if ( p->charsetin==CHARSET_UNICODE ) p->utf8in = 1;
487 out:
488 str_free( &line );
489 str_free( &reference );
490 return ret;
491 }
492
493 /* Don't manipulate latex for URL's and the like */
494 static int
bibl_notexify(char * tag)495 bibl_notexify( char *tag )
496 {
497 char *protected[] = { "DOI", "URL", "REFNUM", "FILEATTACH", "FILE" };
498 int i, nprotected = sizeof( protected ) / sizeof( protected[0] );
499 for ( i=0; i<nprotected; ++i )
500 if ( !strcasecmp( tag, protected[i] ) ) return 1;
501 return 0;
502 }
503
504 /* bibl_fixcharsetdata()
505 *
506 * returns BIBL_OK or BIBL_ERR_MEMERR
507 */
508 static int
bibl_fixcharsetdata(fields * ref,param * p)509 bibl_fixcharsetdata( fields *ref, param *p )
510 {
511 str *data;
512 char *tag;
513 long i, n;
514 int ok;
515
516 n = fields_num( ref );
517
518 for ( i=0; i<n; ++i ) {
519
520 tag = fields_tag( ref, i, FIELDS_CHRP_NOUSE );
521 data = fields_value( ref, i, FIELDS_STRP_NOUSE );
522
523 if ( bibl_notexify( tag ) ) {
524 ok = str_convert( data,
525 p->charsetin, 0, p->utf8in, p->xmlin,
526 p->charsetout, 0, p->utf8out, p->xmlout );
527 } else {
528 ok = str_convert( data,
529 p->charsetin, p->latexin, p->utf8in, p->xmlin,
530 p->charsetout, p->latexout, p->utf8out, p->xmlout );
531 }
532
533 if ( !ok ) return BIBL_ERR_MEMERR;
534 }
535
536 return BIBL_OK;
537 }
538
539 /* bibl_fixcharsets()
540 *
541 * returns BIBL_OK or BIBL_ERR_MEMERR
542 */
543 static int
bibl_fixcharsets(bibl * b,param * p)544 bibl_fixcharsets( bibl *b, param *p )
545 {
546 int status;
547 long i;
548
549 for ( i=0; i<b->n; ++i ) {
550 status = bibl_fixcharsetdata( b->ref[i], p );
551 if ( status!=BIBL_OK ) return status;
552 }
553
554 return BIBL_OK;
555 }
556
557 static int
bibl_addcount(bibl * b)558 bibl_addcount( bibl *b )
559 {
560 char buf[512];
561 fields *ref;
562 long i;
563 int n;
564
565 for ( i=0; i<b->n; ++i ) {
566
567 ref = b->ref[i];
568
569 n = fields_find( ref, "REFNUM", LEVEL_MAIN );
570 if ( n==FIELDS_NOTFOUND ) continue;
571
572 sprintf( buf, "_%ld", i+1 );
573 str_strcatc( fields_value( ref, n, FIELDS_STRP_NOUSE ), buf );
574 if ( str_memerr( fields_value( ref, n, FIELDS_STRP_NOUSE ) ) ) {
575 return BIBL_ERR_MEMERR;
576 }
577
578 }
579
580 return BIBL_OK;
581 }
582
583 static int
generate_citekey(fields * f,long nref)584 generate_citekey( fields *f, long nref )
585 {
586 int n1, n2, status, ret;
587 char *p, buf[100];
588 str citekey;
589
590 str_init( &citekey );
591
592 n1 = fields_find( f, "AUTHOR", LEVEL_MAIN );
593 if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:ASIS", LEVEL_MAIN );
594 if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:CORP", LEVEL_MAIN );
595 if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR", LEVEL_ANY );
596 if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:ASIS", LEVEL_ANY );
597 if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:CORP", LEVEL_ANY );
598
599 n2 = fields_find( f, "DATE:YEAR", LEVEL_MAIN );
600 if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "DATE:YEAR", LEVEL_ANY );
601 if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "PARTDATE:YEAR", LEVEL_MAIN );
602 if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "PARTDATE:YEAR", LEVEL_ANY );
603
604 if ( n1!=FIELDS_NOTFOUND && n2!=FIELDS_NOTFOUND ) {
605
606 p = fields_value( f, n1, FIELDS_CHRP_NOUSE );
607 while ( p && *p && *p!='|' ) {
608 if ( !is_ws( *p ) ) str_addchar( &citekey, *p );
609 p++;
610 }
611
612 p = fields_value( f, n2, FIELDS_CHRP_NOUSE );
613 while ( p && *p ) {
614 if ( !is_ws( *p ) ) str_addchar( &citekey, *p );
615 p++;
616 }
617
618 }
619
620 else {
621 sprintf( buf, "ref%ld", nref );
622 str_strcpyc( &citekey, buf );
623 }
624
625 if ( str_memerr( &citekey ) ) {
626 ret = -1;
627 goto out;
628 }
629
630 status = fields_add( f, "REFNUM", str_cstr( &citekey ), LEVEL_MAIN );
631 if ( status!=FIELDS_OK ) {
632 ret = -1;
633 goto out;
634 }
635
636 ret = fields_find( f, "REFNUM", LEVEL_MAIN );
637 out:
638 str_free( &citekey );
639 return ret;
640 }
641
642 static int
get_citekeys(bibl * bin,slist * citekeys)643 get_citekeys( bibl *bin, slist *citekeys )
644 {
645 int n, status;
646 fields *f;
647 long i;
648
649 for ( i=0; i<bin->n; ++i ) {
650 f = bin->ref[i];
651 n = fields_find( f, "REFNUM", LEVEL_ANY );
652 if ( n==FIELDS_NOTFOUND ) n = generate_citekey( f, i+1 );
653 if ( n!=FIELDS_NOTFOUND && fields_has_value( f, n ) ) {
654 status = slist_add( citekeys, fields_value( f, n, FIELDS_STRP_NOUSE ) );
655 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
656 } else {
657 status = slist_addc( citekeys, "" );
658 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
659 }
660 }
661
662 return BIBL_OK;
663 }
664
665 static int
identify_duplicates(slist * citekeys,int * dup)666 identify_duplicates( slist *citekeys, int *dup )
667 {
668 int i, j, ndup = 0;
669
670 for ( i=0; i<citekeys->n; ++i ) dup[i] = -1;
671
672 for ( i=0; i<citekeys->n-1; ++i ) {
673 if ( dup[i]!=-1 ) continue;
674 for ( j=i+1; j<citekeys->n; ++j ) {
675 if ( !strcmp( slist_cstr( citekeys, i ),
676 slist_cstr( citekeys, j ) ) ) {
677 dup[i] = i;
678 dup[j] = i;
679 ndup++;
680 }
681 }
682 }
683
684 return ndup;
685 }
686
687 static int
build_new_citekey(int nsame,str * old_citekey,str * new_citekey)688 build_new_citekey( int nsame, str *old_citekey, str *new_citekey )
689 {
690 const char abc[]="abcdefghijklmnopqrstuvwxyz";
691
692 str_strcpy( new_citekey, old_citekey );
693
694 while ( nsame >= 26 ) {
695 str_addchar( new_citekey, 'a' );
696 nsame -= 26;
697 }
698
699 if ( nsame>=0 ) str_addchar( new_citekey, abc[nsame] );
700
701 return ( str_memerr( new_citekey ) ) ? BIBL_ERR_MEMERR : BIBL_OK;
702 }
703
704 static int
resolve_duplicates(bibl * b,slist * citekeys,int * dup)705 resolve_duplicates( bibl *b, slist *citekeys, int *dup )
706 {
707 int nsame, n, i, j, status = BIBL_OK;
708 str new_citekey, *ref_citekey;
709
710 str_init( &new_citekey );
711
712 for ( i=0; i<citekeys->n; ++i ) {
713
714 if ( dup[i]==-1 ) continue;
715
716 nsame = 0;
717
718 for ( j=i; j<citekeys->n; ++j ) {
719
720 if ( dup[j]!=i ) continue;
721
722 dup[j] = -1;
723
724 status = build_new_citekey( nsame, slist_str( citekeys, j ), &new_citekey );
725 if ( status!=BIBL_OK ) goto out;
726
727 n = fields_find( b->ref[j], "REFNUM", LEVEL_ANY );
728 if ( n==FIELDS_NOTFOUND ) continue;
729
730 ref_citekey = fields_value( b->ref[j], n, FIELDS_STRP_NOUSE );
731
732 str_strcpy( ref_citekey, &new_citekey );
733 if ( str_memerr( ref_citekey ) ) { status = BIBL_ERR_MEMERR; goto out; }
734
735 nsame++;
736 }
737 }
738 out:
739 str_free( &new_citekey );
740 return status;
741 }
742
743 static int
identify_and_resolve_duplicate_citekeys(bibl * b,slist * citekeys)744 identify_and_resolve_duplicate_citekeys( bibl *b, slist *citekeys )
745 {
746 int *dup, ndup, status=BIBL_OK;
747
748 dup = ( int * ) malloc( sizeof( int ) * citekeys->n );
749 if ( !dup ) return BIBL_ERR_MEMERR;
750
751 ndup = identify_duplicates( citekeys, dup );
752
753 if ( ndup ) status = resolve_duplicates( b, citekeys, dup );
754
755 free( dup );
756 return status;
757 }
758
759 static int
uniqueify_citekeys(bibl * bin)760 uniqueify_citekeys( bibl *bin )
761 {
762 slist citekeys;
763 int status;
764
765 slist_init( &citekeys );
766
767 status = get_citekeys( bin, &citekeys );
768 if ( status!=BIBL_OK ) goto out;
769
770 status = identify_and_resolve_duplicate_citekeys( bin, &citekeys );
771 out:
772 slist_free( &citekeys );
773 return status;
774 }
775
776 static int
clean_refs(bibl * bin,param * p)777 clean_refs( bibl *bin, param *p )
778 {
779 if ( p->cleanf ) return p->cleanf( bin, p );
780 else return BIBL_OK;
781 }
782
783 static int
convert_refs(bibl * bin,char * fname,bibl * bout,param * p)784 convert_refs( bibl *bin, char *fname, bibl *bout, param *p )
785 {
786 int reftype = 0, status;
787 fields *rin, *rout;
788 long i;
789
790 for ( i=0; i<bin->n; ++i ) {
791
792 rin = bin->ref[i];
793
794 rout = fields_new();
795 if ( !rout ) return BIBL_ERR_MEMERR;
796
797 if ( p->typef ) reftype = p->typef( rin, fname, i+1, p );
798
799 status = p->convertf( rin, rout, reftype, p );
800 if ( status!=BIBL_OK ) return status;
801
802 if ( p->all ) {
803 status = process_alwaysadd( rout, reftype, p );
804 if ( status!=BIBL_OK ) return status;
805 status = process_defaultadd( rout, reftype, p );
806 if ( status!=BIBL_OK ) return status;
807 }
808
809 status = bibl_addref( bout, rout );
810 if ( status!=BIBL_OK ) return status;
811 }
812
813 return BIBL_OK;
814 }
815
816 int
bibl_read(bibl * b,FILE * fp,char * filename,param * p)817 bibl_read( bibl *b, FILE *fp, char *filename, param *p )
818 {
819 int status = BIBL_OK;
820 param read_params;
821 bibl bin;
822
823 if ( !b ) return BIBL_ERR_BADINPUT;
824 if ( !fp ) return BIBL_ERR_BADINPUT;
825 if ( !p ) return BIBL_ERR_BADINPUT;
826
827 if ( bibl_illegalinmode( p->readformat ) ) {
828 if ( debug_set( p ) ) report_params( stderr, "bibl_read", p );
829 return BIBL_ERR_BADINPUT;
830 }
831
832 status = bibl_setreadparams( &read_params, p );
833 if ( status!=BIBL_OK ) {
834 if ( debug_set( p ) ) report_params( stderr, "bibl_read", p );
835 return status;
836 }
837
838 if ( debug_set( &read_params ) ) {
839 report_params( stderr, "bibl_read", &read_params );
840 }
841
842 bibl_init( &bin );
843
844 status = read_refs( fp, &bin, filename, &read_params );
845 if ( status!=BIBL_OK ) {
846 if ( debug_set( &read_params ) ) report_params( stderr, "bibl_read", &read_params );
847 bibl_freeparams( &read_params );
848 return status;
849 }
850
851 if ( debug_set( &read_params ) ) {
852 bibl_verbose( &bin, "raw_input", "for bibl_read" );
853 }
854
855 if ( !read_params.output_raw ) {
856 status = clean_refs( &bin, &read_params );
857 if ( status!=BIBL_OK ) goto out;
858 if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_clean_refs", "for bibl_read" );
859 }
860
861 if ( ( !read_params.output_raw ) || ( read_params.output_raw & BIBL_RAW_WITHCHARCONVERT ) ) {
862 status = bibl_fixcharsets( &bin, &read_params );
863 if ( status!=BIBL_OK ) goto out;
864 if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_fixcharsets", "for bibl_read" );
865 }
866
867 if ( !read_params.output_raw ) {
868 status = convert_refs( &bin, filename, b, &read_params );
869 if ( status!=BIBL_OK ) goto out;
870 if ( debug_set( &read_params ) ) bibl_verbose( b, "post_convert_refs", "for bibl_read" );
871 }
872
873 else {
874 status = bibl_copy( b, &bin );
875 if ( status!=BIBL_OK ) goto out;
876 if ( debug_set( &read_params ) ) bibl_verbose( b, "post_bibl_copy", "for bibl_read" );
877 }
878
879 if ( ( !read_params.output_raw ) || ( read_params.output_raw & BIBL_RAW_WITHMAKEREFID ) ) {
880 status = uniqueify_citekeys( b );
881 if ( status!=BIBL_OK ) goto out;
882 if ( read_params.addcount ) {
883 status = bibl_addcount( b );
884 if ( status!=BIBL_OK ) goto out;
885 }
886 if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_uniqueify_citekeys", "for bibl_read" );
887 }
888
889 out:
890 bibl_free( &bin );
891 bibl_freeparams( &read_params );
892
893 return status;
894 }
895
896 static FILE *
singlerefname(fields * reffields,long nref,int mode)897 singlerefname( fields *reffields, long nref, int mode )
898 {
899 char outfile[2048];
900 char suffix[5] = "xml";
901 FILE *fp;
902 long count;
903 int found;
904 if ( mode==BIBL_ADSABSOUT ) strcpy( suffix, "ads" );
905 else if ( mode==BIBL_BIBTEXOUT ) strcpy( suffix, "bib" );
906 else if ( mode==BIBL_ENDNOTEOUT ) strcpy( suffix, "end" );
907 else if ( mode==BIBL_ISIOUT ) strcpy( suffix, "isi" );
908 else if ( mode==BIBL_MODSOUT ) strcpy( suffix, "xml" );
909 else if ( mode==BIBL_RISOUT ) strcpy( suffix, "ris" );
910 else if ( mode==BIBL_WORD2007OUT ) strcpy( suffix, "xml" );
911 found = fields_find( reffields, "REFNUM", LEVEL_MAIN );
912 /* find new filename based on reference */
913 if ( found!=-1 ) {
914 sprintf( outfile,"%s.%s",(char*)fields_value(reffields,found,FIELDS_CHRP_NOUSE), suffix );
915 } else sprintf( outfile,"%ld.%s",nref, suffix );
916 count = 0;
917 fp = fopen( outfile, "r" );
918 while ( fp ) {
919 fclose(fp);
920 count++;
921 if ( count==60000 ) return NULL;
922 if ( found!=-1 )
923 sprintf( outfile, "%s_%ld.%s", (char*)fields_value( reffields, found, FIELDS_CHRP_NOUSE ), count, suffix );
924 else sprintf( outfile,"%ld_%ld.%s", nref, count, suffix );
925 fp = fopen( outfile, "r" );
926 }
927 return fopen( outfile, "w" );
928 }
929
930 static int
bibl_writeeachfp(FILE * fp,bibl * b,param * p)931 bibl_writeeachfp( FILE *fp, bibl *b, param *p )
932 {
933 fields out, *use = &out;
934 int status;
935 long i;
936
937 fields_init( &out );
938
939 for ( i=0; i<b->n; ++i ) {
940
941 fp = singlerefname( b->ref[i], i, p->writeformat );
942 if ( !fp ) return BIBL_ERR_CANTOPEN;
943
944 if ( p->headerf ) p->headerf( fp, p );
945
946 if ( p->assemblef ) {
947 fields_free( &out );
948 status = p->assemblef( b->ref[i], &out, p, i );
949 if ( status!=BIBL_OK ) break;
950 } else {
951 use = b->ref[i];
952 }
953
954 status = p->writef( use, fp, p, i );
955
956 if ( p->footerf ) p->footerf( fp );
957 fclose( fp );
958
959 if ( status!=BIBL_OK ) return status;
960 }
961
962 return BIBL_OK;
963 }
964
965 static int
bibl_writefp(FILE * fp,bibl * b,param * p)966 bibl_writefp( FILE *fp, bibl *b, param *p )
967 {
968 int status = BIBL_OK;
969 fields out, *use = &out;
970 long i;
971
972 fields_init( &out );
973
974 if ( debug_set( p ) && p->assemblef ) {
975 fprintf( stderr, "-------------------assemblef start for bibl_write\n");
976 }
977
978 if ( p->headerf ) p->headerf( fp, p );
979 for ( i=0; i<b->n; ++i ) {
980
981 if ( p->assemblef ) {
982 fields_free( &out );
983 status = p->assemblef( b->ref[i], &out, p, i );
984 if ( status!=BIBL_OK ) break;
985 if ( debug_set( p ) ) bibl_verbose_reference( &out, "", i+1 );
986 } else {
987 use = b->ref[i];
988 }
989
990 status = p->writef( use, fp, p, i );
991 if ( status!=BIBL_OK ) break;
992
993 }
994
995 if ( debug_set( p ) && p->assemblef ) {
996 fprintf( stderr, "-------------------assemblef end for bibl_write\n");
997 }
998
999 if ( p->footerf ) p->footerf( fp );
1000 return status;
1001 }
1002
1003 int
bibl_write(bibl * b,FILE * fp,param * p)1004 bibl_write( bibl *b, FILE *fp, param *p )
1005 {
1006 int status;
1007 param lp;
1008
1009 if ( !b ) return BIBL_ERR_BADINPUT;
1010 if ( !p ) return BIBL_ERR_BADINPUT;
1011 if ( bibl_illegaloutmode( p->writeformat ) ) return BIBL_ERR_BADINPUT;
1012 if ( !fp && !p->singlerefperfile ) return BIBL_ERR_BADINPUT;
1013
1014 status = bibl_setwriteparams( &lp, p );
1015 if ( status!=BIBL_OK ) return status;
1016
1017 if ( debug_set( p ) ) {
1018 report_params( stderr, "bibl_write", &lp );
1019 fflush( stdout );
1020 }
1021
1022 if ( debug_set( p ) ) bibl_verbose( b, "raw_input", "for bibl_write" );
1023
1024 status = bibl_fixcharsets( b, &lp );
1025 if ( status!=BIBL_OK ) goto out;
1026
1027 if ( debug_set( p ) ) bibl_verbose( b, "post-fixcharsets", "for bibl_write" );
1028
1029 if ( p->singlerefperfile ) status = bibl_writeeachfp( fp, b, &lp );
1030 else status = bibl_writefp( fp, b, &lp );
1031
1032 out:
1033 bibl_freeparams( &lp );
1034 return status;
1035 }
1036