1 /*
2  * bibcore.c
3  *
4  * Copyright (c) Chris Putnam 2005-2021
5  *
6  * Source code released under the GPL version 2
7  *
8  */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include "bibutils.h"
12 
13 /* internal includes */
14 #include "reftypes.h"
15 #include "charsets.h"
16 #include "str_conv.h"
17 #include "is_ws.h"
18 
19 /* illegal modes to pass in, but use internally for consistency */
20 #define BIBL_INTERNALIN   (BIBL_LASTIN+1)
21 #define BIBL_INTERNALOUT  (BIBL_LASTOUT+1)
22 
23 #define debug_set( p ) ( (p)->verbose > 1 )
24 #define verbose_set( p ) ( (p)->verbose )
25 
26 static void
report_params(FILE * fp,const char * f,param * p)27 report_params( FILE *fp, const char *f, param *p )
28 {
29 	fflush( NULL );
30 
31 	fprintf( fp, "-------------------params start for %s\n", f );
32 	fprintf( fp, "\tprogname='%s'\n\n", p->progname );
33 
34 	fprintf( fp, "\treadformat=%d", p->readformat );
35 	switch ( p->readformat ) {
36 		case BIBL_INTERNALIN:   fprintf( fp, " (BIBL_INTERNALIN)\n" );   break;
37 		case BIBL_MODSIN:       fprintf( fp, " (BIBL_MODSIN)\n" );       break;
38 		case BIBL_BIBTEXIN:     fprintf( fp, " (BIBL_BIBTEXIN)\n" );     break;
39 		case BIBL_RISIN:        fprintf( fp, " (BIBL_RISIN)\n" );        break;
40 		case BIBL_ENDNOTEIN:    fprintf( fp, " (BIBL_ENDNOTEIN)\n" );    break;
41 		case BIBL_COPACIN:      fprintf( fp, " (BIBL_COPACIN)\n" );      break;
42 		case BIBL_ISIIN:        fprintf( fp, " (BIBL_ISIIN)\n" );        break;
43 		case BIBL_MEDLINEIN:    fprintf( fp, " (BIBL_MEDLINEIN)\n" );    break;
44 		case BIBL_ENDNOTEXMLIN: fprintf( fp, " (BIBL_ENDNOTEXMLIN)\n" ); break;
45 		case BIBL_BIBLATEXIN:   fprintf( fp, " (BIBL_BIBLATEXIN)\n" );   break;
46 		case BIBL_EBIIN:        fprintf( fp, " (BIBL_EBIIN)\n" );        break;
47 		case BIBL_WORDIN:       fprintf( fp, " (BIBL_WORDIN)\n" );       break;
48 		case BIBL_NBIBIN:       fprintf( fp, " (BIBL_NBIBIN)\n" );       break;
49 		default:                fprintf( fp, " (Illegal value)\n" );     break;
50 	}
51 	fprintf( fp, "\tcharsetin=%d\n", p->charsetin );
52 	fprintf( fp, "\tcharsetin_src=%d", p->charsetin_src );
53 	switch ( p->charsetin_src ) {
54 		case BIBL_SRC_DEFAULT:  fprintf( fp, " (BIBL_SRC_DEFAULT)\n" ); break;
55 		case BIBL_SRC_FILE:     fprintf( fp, " (BIBL_SRC_FILE)\n" );    break;
56 		case BIBL_SRC_USER:     fprintf( fp, " (BIBL_SRC_USER)\n" );    break;
57 		default:                fprintf( fp, " (Illegal value)\n" );    break;
58 	}
59 	fprintf( fp, "\tutf8in=%d\n", p->utf8in );
60 	fprintf( fp, "\tlatexin=%d\n", p->latexin );
61 	fprintf( fp, "\txmlin=%d\n\n", p->xmlin );
62 
63 	fprintf( fp, "\twriteformat=%d", p->writeformat );
64 	switch ( p->writeformat ) {
65 		case BIBL_INTERNALOUT:  fprintf( fp, " (BIBL_INTERNALOUT)\n" );  break;
66 		case BIBL_ADSABSOUT:    fprintf( fp, " (BIBL_ADSABSOUT)\n" );    break;
67 		case BIBL_BIBTEXOUT:    fprintf( fp, " (BIBL_BIBTEXOUT)\n" );    break;
68 		case BIBL_ENDNOTEOUT:   fprintf( fp, " (BIBL_ENDNOTEOUT)\n" );   break;
69 		case BIBL_ISIOUT:       fprintf( fp, " (BIBL_ISIOUT)\n" );       break;
70 		case BIBL_MODSOUT:      fprintf( fp, " (BIBL_MODSOUT)\n" );      break;
71 		case BIBL_NBIBOUT:      fprintf( fp, " (BIBL_NBIBOUT)\n" );      break;
72 		case BIBL_RISOUT:       fprintf( fp, " (BIBL_RISOUT)\n" );       break;
73 		case BIBL_WORD2007OUT:  fprintf( fp, " (BIBL_WORD2007OUT)\n" );  break;
74 		default:                fprintf( fp, " (Illegal value)\n");      break;
75 	}
76 	fprintf( fp, "\tcharsetout=%d\n", p->charsetout );
77 	fprintf( fp, "\tcharsetout_src=%d", p->charsetout_src );
78 	switch ( p->charsetout_src ) {
79 		case BIBL_SRC_DEFAULT:  fprintf( fp, " (BIBL_SRC_DEFAULT)\n" ); break;
80 		case BIBL_SRC_FILE:     fprintf( fp, " (BIBL_SRC_FILE)\n" );    break;
81 		case BIBL_SRC_USER:     fprintf( fp, " (BIBL_SRC_USER)\n" );    break;
82 		default:                fprintf( fp, " (Illegal value)\n" );    break;
83 	}
84 	fprintf( fp, "\tutf8out=%d\n", p->utf8out );
85 	fprintf( fp, "\tutf8bom=%d\n", p->utf8bom );
86 	fprintf( fp, "\tlatexout=%d\n", p->latexout );
87 	fprintf( fp, "\txmlout=%d\n", p->xmlout );
88 	fprintf( fp, "-------------------params end for %s\n", f );
89 
90 	fflush( fp );
91 }
92 
93 /* bibl_duplicateparams()
94  *
95  * Returns status of BIBL_OK or BIBL_ERR_MEMERR
96  */
97 static int
bibl_duplicateparams(param * np,param * op)98 bibl_duplicateparams( param *np, param *op )
99 {
100 	int status;
101 
102 	slist_init( &(np->asis) );
103 	status = slist_copy( &(np->asis), &(op->asis ) );
104 	if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
105 
106 	slist_init( &(np->corps) );
107 	status = slist_copy( &(np->corps), &(op->corps ) );
108 	if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
109 
110 	if ( !op->progname ) np->progname = NULL;
111 	else {
112 		np->progname = strdup( op->progname );
113 		if ( !np->progname ) return BIBL_ERR_MEMERR;
114 	}
115 
116 	np->readformat    = op->readformat;
117 	np->charsetin     = op->charsetin;
118 	np->charsetin_src = op->charsetin_src;
119 	np->utf8in        = op->utf8in;
120 	np->latexin       = op->latexin;
121 	np->xmlin         = op->xmlin;
122 
123 	np->writeformat    = op->writeformat;
124 	np->charsetout     = op->charsetout;
125 	np->charsetout_src = op->charsetout_src;
126 	np->utf8out        = op->utf8out;
127 	np->utf8bom        = op->utf8bom;
128 	np->latexout       = op->latexout;
129 	np->xmlout         = op->xmlout;
130 	np->nosplittitle   = op->nosplittitle;
131 
132 	np->verbose          = op->verbose;
133 	np->format_opts      = op->format_opts;
134 	np->addcount         = op->addcount;
135 	np->output_raw       = op->output_raw;
136 	np->singlerefperfile = op->singlerefperfile;
137 
138 	np->readf     = op->readf;
139 	np->processf  = op->processf;
140 	np->cleanf    = op->cleanf;
141 	np->typef     = op->typef;
142 	np->convertf  = op->convertf;
143 	np->headerf   = op->headerf;
144 	np->footerf   = op->footerf;
145 	np->assemblef = op->assemblef;
146 	np->writef    = op->writef;
147 
148 	np->all       = op->all;
149 	np->nall      = op->nall;
150 
151 	return BIBL_OK;
152 }
153 
154 /* bibl_setreadparams()
155  *
156  * Returns status of BIBL_OK or BIBL_ERR_MEMERR
157  */
158 static int
bibl_setreadparams(param * np,param * op)159 bibl_setreadparams( param *np, param *op )
160 {
161 	int status;
162 	status = bibl_duplicateparams( np, op );
163 	if ( status == BIBL_OK ) {
164 		np->utf8out        = 1;
165 		np->charsetout     = BIBL_CHARSET_UNICODE;
166 		np->charsetout_src = BIBL_SRC_DEFAULT;
167 		np->xmlout         = BIBL_XMLOUT_FALSE;
168 		np->latexout       = 0;
169 		np->writeformat    = BIBL_INTERNALOUT;
170 	}
171 	return status;
172 }
173 
174 /* bibl_setwriteparams()
175  *
176  * Returns status of BIBL_OK or BIBL_ERR_MEMERR
177  */
178 static int
bibl_setwriteparams(param * np,param * op)179 bibl_setwriteparams( param *np, param *op )
180 {
181 	int status;
182 	status = bibl_duplicateparams( np, op );
183 	if ( status == BIBL_OK ) {
184 		np->xmlin         = 0;
185 		np->latexin       = 0;
186 		np->utf8in        = 1;
187 		np->charsetin     = BIBL_CHARSET_UNICODE;
188 		np->charsetin_src = BIBL_SRC_DEFAULT;
189 		np->readformat    = BIBL_INTERNALIN;
190 	}
191 	return status;
192 }
193 
194 void
bibl_freeparams(param * p)195 bibl_freeparams( param *p )
196 {
197 	if ( p ) {
198 		slist_free( &(p->asis) );
199 		slist_free( &(p->corps) );
200 		if ( p->progname ) free( p->progname );
201 	}
202 }
203 
204 int
bibl_readasis(param * p,char * f)205 bibl_readasis( param *p, char *f )
206 {
207 	int status;
208 
209 	if ( !p ) return BIBL_ERR_BADINPUT;
210 	if ( !f ) return BIBL_ERR_BADINPUT;
211 
212 	status = slist_fill( &(p->asis), f, 1 );
213 
214 	if ( status == SLIST_ERR_CANTOPEN ) return BIBL_ERR_CANTOPEN;
215 	else if ( status == SLIST_ERR_MEMERR ) return BIBL_ERR_MEMERR;
216 	return BIBL_OK;
217 }
218 
219 int
bibl_readcorps(param * p,char * f)220 bibl_readcorps( param *p, char *f )
221 {
222 	int status;
223 
224 	if ( !p ) return BIBL_ERR_BADINPUT;
225 	if ( !f ) return BIBL_ERR_BADINPUT;
226 
227 	status = slist_fill( &(p->corps), f, 1 );
228 
229 	if ( status == SLIST_ERR_CANTOPEN ) return BIBL_ERR_CANTOPEN;
230 	else if ( status == 0 ) return BIBL_ERR_MEMERR;
231 	return BIBL_OK;
232 }
233 
234 /* bibl_addtoasis()
235  *
236  * Returns BIBL_OK or BIBL_ERR_MEMERR
237  */
238 int
bibl_addtoasis(param * p,char * d)239 bibl_addtoasis( param *p, char *d )
240 {
241 	int status;
242 
243 	if ( !p ) return BIBL_ERR_BADINPUT;
244 	if ( !d ) return BIBL_ERR_BADINPUT;
245 
246 	status = slist_addc( &(p->asis), d );
247 
248 	return ( status==SLIST_OK )? BIBL_OK : BIBL_ERR_MEMERR;
249 }
250 
251 /* bibl_addtocorps()
252  *
253  * Returns BIBL_OK or BIBL_ERR_MEMERR
254  */
255 int
bibl_addtocorps(param * p,char * d)256 bibl_addtocorps( param *p, char *d )
257 {
258 	int status;
259 
260 	if ( !p ) return BIBL_ERR_BADINPUT;
261 	if ( !d ) return BIBL_ERR_BADINPUT;
262 
263 	status = slist_addc( &(p->corps), d );
264 
265 	return ( status==SLIST_OK )? BIBL_OK : BIBL_ERR_MEMERR;
266 }
267 
268 void
bibl_reporterr(int err)269 bibl_reporterr( int err )
270 {
271 	fprintf( stderr, "Bibutils: " );
272 	switch( err ) {
273 		case BIBL_OK:
274 			fprintf( stderr, "No error." ); break;
275 		case BIBL_ERR_BADINPUT:
276 			fprintf( stderr, "Bad input." ); break;
277 		case BIBL_ERR_MEMERR:
278 			fprintf( stderr, "Memory error." ); break;
279 		case BIBL_ERR_CANTOPEN:
280 			fprintf( stderr, "Can't open." ); break;
281 		default:
282 			fprintf( stderr, "Cannot identify error code %d.", err ); break;
283 	}
284 	fprintf( stderr, "\n" );
285 }
286 
287 static int
bibl_illegalinmode(int mode)288 bibl_illegalinmode( int mode )
289 {
290 	if ( mode < BIBL_FIRSTIN || mode > BIBL_LASTIN ) return 1;
291 	else return 0;
292 }
293 
294 static int
bibl_illegaloutmode(int mode)295 bibl_illegaloutmode( int mode )
296 {
297 	if ( mode < BIBL_FIRSTOUT || mode > BIBL_LASTOUT ) return 1;
298 	else return 0;
299 }
300 
301 static void
bibl_verbose_reference(fields * f,char * filename,long refnum)302 bibl_verbose_reference( fields *f, char *filename, long refnum )
303 {
304 	int i, n;
305 	n = fields_num( f );
306 	fprintf( stderr, "======== %s %ld : converted\n", filename, refnum );
307 	for ( i=0; i<n; ++i ) {
308 		fprintf( stderr, "'%s'='%s' level=%d\n",
309 			(char*) fields_tag( f, i, FIELDS_CHRP_NOUSE ),
310 			(char*) fields_value( f, i, FIELDS_CHRP_NOUSE ),
311 			fields_level( f, i ) );
312 	}
313 	fprintf( stderr, "\n" );
314 }
315 
316 static void
bibl_verbose(bibl * bin,const char * msg1,const char * msg2)317 bibl_verbose( bibl *bin, const char *msg1, const char *msg2 )
318 {
319 	long i;
320 	fflush( stdout );
321 	fprintf( stderr, "-------------------%s begin %s\n", msg1, msg2);
322 	for ( i=0; i<bin->n; ++i )
323 		bibl_verbose_reference( bin->ref[i], "", i+1 );
324 	fprintf( stderr, "-------------------%s end %s\n", msg1, msg2);
325 	fflush( stderr );
326 }
327 
328 
329 /* extract_tag_value
330  *
331  * Extract the tag and the value for ALWAYS/DEFAULT
332  * entries like: "GENRE:BIBUTILS|Masters thesis"
333  *
334  * tag = "GENRE:BIBUTILS"
335  * value = "Masters thesis"
336  */
337 static int
extract_tag_value(str * tag,str * value,char * p)338 extract_tag_value( str *tag, str *value, char *p )
339 {
340 	str_empty( tag );
341 	while ( p && *p && *p!='|' ) {
342 		str_addchar( tag, *p );
343 		p++;
344 	}
345 	if ( str_memerr( tag ) ) return BIBL_ERR_MEMERR;
346 
347 	if ( p && *p=='|' ) p++;
348 
349 	str_empty( value );
350 	while ( p && *p ) {
351 		str_addchar( value, *p );
352 		p++;
353 	}
354 	if ( str_memerr( tag ) ) return BIBL_ERR_MEMERR;
355 
356 	return BIBL_OK;
357 }
358 
359 /* process_defaultadd()
360  *
361  * Add tag/value pairs that have "DEFAULT" processing
362  * unless a tag/value pair with the same tag has already
363  * been adding during reference processing.
364  */
365 static int
process_defaultadd(fields * f,int reftype,param * r)366 process_defaultadd( fields *f, int reftype, param *r )
367 {
368 	int i, n, process, level, status, ret = BIBL_OK;
369 	str tag, value;
370 	char *p;
371 
372 	strs_init( &tag, &value, NULL );
373 
374 	for ( i=0; i<r->all[reftype].ntags; ++i ) {
375 
376 		process = ((r->all[reftype]).tags[i]).processingtype;
377 		if ( process!=DEFAULT ) continue;
378 
379 		level   = ((r->all[reftype]).tags[i]).level;
380 		p       = ((r->all[reftype]).tags[i]).newstr;
381 
382 		status = extract_tag_value( &tag, &value, p );
383 		if ( status!=BIBL_OK ) {
384 			ret = status;
385 			goto out;
386 		}
387 
388 		n = fields_find( f, tag.data, level );
389 		if ( n==FIELDS_NOTFOUND ) {
390 			status = fields_add( f, tag.data, value.data, level );
391 			if ( status!=FIELDS_OK ) {
392 				ret = BIBL_ERR_MEMERR;
393 				goto out;
394 			}
395 		}
396 
397 	}
398 out:
399 	strs_free( &tag, &value, NULL );
400 
401 	return ret;
402 }
403 
404 /* process_alwaysadd()
405  *
406  * Add tag/value pair to reference from the ALWAYS
407  * processing type without exception (the difference from
408  * DEFAULT processing).
409  */
410 static int
process_alwaysadd(fields * f,int reftype,param * r)411 process_alwaysadd( fields *f, int reftype, param *r )
412 {
413 	int i, process, level, status, ret = BIBL_OK;
414 	str tag, value;
415 	char *p;
416 
417 	strs_init( &tag, &value, NULL );
418 
419 	for ( i=0; i<r->all[reftype].ntags; ++i ) {
420 
421 		process = ((r->all[reftype]).tags[i]).processingtype;
422 		if ( process!=ALWAYS ) continue;
423 
424 		level   = ((r->all[reftype]).tags[i]).level;
425 		p       = ((r->all[reftype]).tags[i]).newstr;
426 
427 		status = extract_tag_value( &tag, &value, p );
428 		if ( status!=BIBL_OK ) {
429 			ret = status;
430 			goto out;
431 		}
432 
433 		status = fields_add( f, tag.data, value.data, level );
434 		if ( status!=FIELDS_OK ) {
435 			ret = BIBL_ERR_MEMERR;
436 			goto out;
437 		}
438 	}
439 
440 out:
441 	strs_free( &tag, &value, NULL );
442 
443 	return ret;
444 }
445 
446 static int
read_refs(FILE * fp,bibl * bin,char * filename,param * p)447 read_refs( FILE *fp, bibl *bin, char *filename, param *p )
448 {
449 	int refnum = 0, bufpos = 0, ret=BIBL_OK, fcharset;/* = CHARSET_UNKNOWN;*/
450 	str reference, line;
451 	char buf[256]="";
452 	fields *ref;
453 
454 	str_init( &reference );
455 	str_init( &line );
456 	while ( p->readf( fp, buf, sizeof(buf), &bufpos, &line, &reference, &fcharset ) ) {
457 		if ( reference.len==0 ) continue;
458 		ref = fields_new();
459 		if ( !ref ) {
460 			ret = BIBL_ERR_MEMERR;
461 			bibl_free( bin );
462 			goto out;
463 		}
464 		if ( p->processf( ref, reference.data, filename, refnum+1, p )){
465 			ret = bibl_addref( bin, ref );
466 			if ( ret!=BIBL_OK ) {
467 				bibl_free( bin );
468 				fields_delete( ref );
469 				goto out;
470 			}
471 			refnum += 1;
472 		} else {
473 			fields_delete( ref );
474 		}
475 		str_empty( &reference );
476 		if ( fcharset!=CHARSET_UNKNOWN ) {
477 			/* charset from file takes priority over default, but
478 			 * not user-specified */
479 			if ( p->charsetin_src!=BIBL_SRC_USER ) {
480 				p->charsetin_src = BIBL_SRC_FILE;
481 				p->charsetin = fcharset;
482 				if ( fcharset!=CHARSET_UNICODE ) p->utf8in = 0;
483 			}
484 		}
485 	}
486 	if ( p->charsetin==CHARSET_UNICODE ) p->utf8in = 1;
487 out:
488 	str_free( &line );
489 	str_free( &reference );
490 	return ret;
491 }
492 
493 /* Don't manipulate latex for URL's and the like */
494 static int
bibl_notexify(char * tag)495 bibl_notexify( char *tag )
496 {
497 	char *protected[] = { "DOI", "URL", "REFNUM", "FILEATTACH", "FILE" };
498 	int i, nprotected = sizeof( protected ) / sizeof( protected[0] );
499 	for ( i=0; i<nprotected; ++i )
500 		if ( !strcasecmp( tag, protected[i] ) ) return 1;
501 	return 0;
502 }
503 
504 /* bibl_fixcharsetdata()
505  *
506  * returns BIBL_OK or BIBL_ERR_MEMERR
507  */
508 static int
bibl_fixcharsetdata(fields * ref,param * p)509 bibl_fixcharsetdata( fields *ref, param *p )
510 {
511 	str *data;
512 	char *tag;
513 	long i, n;
514 	int ok;
515 
516 	n = fields_num( ref );
517 
518 	for ( i=0; i<n; ++i ) {
519 
520 		tag  = fields_tag( ref, i, FIELDS_CHRP_NOUSE );
521 		data = fields_value( ref, i, FIELDS_STRP_NOUSE );
522 
523 		if ( bibl_notexify( tag ) ) {
524 			ok = str_convert( data,
525 				p->charsetin,  0, p->utf8in,  p->xmlin,
526 				p->charsetout, 0, p->utf8out, p->xmlout );
527 		} else {
528 			ok = str_convert( data,
529 				p->charsetin,  p->latexin,  p->utf8in,  p->xmlin,
530 				p->charsetout, p->latexout, p->utf8out, p->xmlout );
531 		}
532 
533 		if ( !ok ) return BIBL_ERR_MEMERR;
534 	}
535 
536 	return BIBL_OK;
537 }
538 
539 /* bibl_fixcharsets()
540  *
541  * returns BIBL_OK or BIBL_ERR_MEMERR
542  */
543 static int
bibl_fixcharsets(bibl * b,param * p)544 bibl_fixcharsets( bibl *b, param *p )
545 {
546 	int status;
547 	long i;
548 
549 	for ( i=0; i<b->n; ++i ) {
550 		status = bibl_fixcharsetdata( b->ref[i], p );
551 		if ( status!=BIBL_OK ) return status;
552 	}
553 
554 	return BIBL_OK;
555 }
556 
557 static int
bibl_addcount(bibl * b)558 bibl_addcount( bibl *b )
559 {
560 	char buf[512];
561 	fields *ref;
562 	long i;
563 	int n;
564 
565 	for ( i=0; i<b->n; ++i ) {
566 
567 		ref = b->ref[i];
568 
569 		n = fields_find( ref, "REFNUM", LEVEL_MAIN );
570 		if ( n==FIELDS_NOTFOUND ) continue;
571 
572 		sprintf( buf, "_%ld", i+1 );
573 		str_strcatc( fields_value( ref, n, FIELDS_STRP_NOUSE ), buf );
574 		if ( str_memerr( fields_value( ref, n, FIELDS_STRP_NOUSE ) ) ) {
575 			return BIBL_ERR_MEMERR;
576 		}
577 
578 	}
579 
580 	return BIBL_OK;
581 }
582 
583 static int
generate_citekey(fields * f,long nref)584 generate_citekey( fields *f, long nref )
585 {
586 	int n1, n2, status, ret;
587 	char *p, buf[100];
588 	str citekey;
589 
590 	str_init( &citekey );
591 
592 	n1 = fields_find( f, "AUTHOR", LEVEL_MAIN );
593 	if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:ASIS", LEVEL_MAIN );
594 	if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:CORP", LEVEL_MAIN );
595 	if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR", LEVEL_ANY );
596 	if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:ASIS", LEVEL_ANY );
597 	if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:CORP", LEVEL_ANY );
598 
599 	n2 = fields_find( f, "DATE:YEAR", LEVEL_MAIN );
600 	if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "DATE:YEAR", LEVEL_ANY );
601 	if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "PARTDATE:YEAR", LEVEL_MAIN );
602 	if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "PARTDATE:YEAR", LEVEL_ANY );
603 
604 	if ( n1!=FIELDS_NOTFOUND && n2!=FIELDS_NOTFOUND ) {
605 
606 		p = fields_value( f, n1, FIELDS_CHRP_NOUSE );
607 		while ( p && *p && *p!='|' ) {
608 			if ( !is_ws( *p ) ) str_addchar( &citekey, *p );
609 			p++;
610 		}
611 
612 		p = fields_value( f, n2, FIELDS_CHRP_NOUSE );
613 		while ( p && *p ) {
614 			if ( !is_ws( *p ) ) str_addchar( &citekey, *p );
615 			p++;
616 		}
617 
618 	}
619 
620 	else {
621 		sprintf( buf, "ref%ld", nref );
622 		str_strcpyc( &citekey, buf );
623 	}
624 
625 	if ( str_memerr( &citekey ) ) {
626 		ret = -1;
627 		goto out;
628 	}
629 
630 	status = fields_add( f, "REFNUM", str_cstr( &citekey ), LEVEL_MAIN );
631 	if ( status!=FIELDS_OK ) {
632 		ret = -1;
633 		goto out;
634 	}
635 
636 	ret = fields_find( f, "REFNUM", LEVEL_MAIN );
637 out:
638 	str_free( &citekey );
639 	return ret;
640 }
641 
642 static int
get_citekeys(bibl * bin,slist * citekeys)643 get_citekeys( bibl *bin, slist *citekeys )
644 {
645 	int n, status;
646 	fields *f;
647 	long i;
648 
649 	for ( i=0; i<bin->n; ++i ) {
650 		f = bin->ref[i];
651 		n = fields_find( f, "REFNUM", LEVEL_ANY );
652 		if ( n==FIELDS_NOTFOUND ) n = generate_citekey( f, i+1 );
653 		if ( n!=FIELDS_NOTFOUND && fields_has_value( f, n ) ) {
654 			status = slist_add( citekeys, fields_value( f, n, FIELDS_STRP_NOUSE ) );
655 			if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
656 		} else {
657 			status = slist_addc( citekeys, "" );
658 			if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
659 		}
660 	}
661 
662 	return BIBL_OK;
663 }
664 
665 static int
identify_duplicates(slist * citekeys,int * dup)666 identify_duplicates( slist *citekeys, int *dup )
667 {
668 	int i, j, ndup = 0;
669 
670 	for ( i=0; i<citekeys->n; ++i ) dup[i] = -1;
671 
672 	for ( i=0; i<citekeys->n-1; ++i ) {
673 		if ( dup[i]!=-1 ) continue;
674 		for ( j=i+1; j<citekeys->n; ++j ) {
675 			if ( !strcmp( slist_cstr( citekeys, i ),
676 			              slist_cstr( citekeys, j ) ) ) {
677 					dup[i] = i;
678 					dup[j] = i;
679 					ndup++;
680 			}
681 		}
682 	}
683 
684 	return ndup;
685 }
686 
687 static int
build_new_citekey(int nsame,str * old_citekey,str * new_citekey)688 build_new_citekey( int nsame, str *old_citekey, str *new_citekey )
689 {
690 	const char abc[]="abcdefghijklmnopqrstuvwxyz";
691 
692 	str_strcpy( new_citekey, old_citekey );
693 
694 	while ( nsame >= 26 ) {
695 		str_addchar( new_citekey, 'a' );
696 		nsame -= 26;
697 	}
698 
699 	if ( nsame>=0 ) str_addchar( new_citekey, abc[nsame] );
700 
701 	return ( str_memerr( new_citekey ) ) ? BIBL_ERR_MEMERR : BIBL_OK;
702 }
703 
704 static int
resolve_duplicates(bibl * b,slist * citekeys,int * dup)705 resolve_duplicates( bibl *b, slist *citekeys, int *dup )
706 {
707 	int nsame, n, i, j, status = BIBL_OK;
708 	str new_citekey, *ref_citekey;
709 
710 	str_init( &new_citekey );
711 
712 	for ( i=0; i<citekeys->n; ++i ) {
713 
714 		if ( dup[i]==-1 ) continue;
715 
716 		nsame = 0;
717 
718 		for ( j=i; j<citekeys->n; ++j ) {
719 
720 			if ( dup[j]!=i ) continue;
721 
722 			dup[j] = -1;
723 
724 			status = build_new_citekey( nsame, slist_str( citekeys, j ), &new_citekey );
725 			if ( status!=BIBL_OK ) goto out;
726 
727 			n = fields_find( b->ref[j], "REFNUM", LEVEL_ANY );
728 			if ( n==FIELDS_NOTFOUND ) continue;
729 
730 			ref_citekey = fields_value( b->ref[j], n, FIELDS_STRP_NOUSE );
731 
732 			str_strcpy( ref_citekey, &new_citekey );
733 			if ( str_memerr( ref_citekey ) ) { status = BIBL_ERR_MEMERR; goto out; }
734 
735 			nsame++;
736 		}
737 	}
738 out:
739 	str_free( &new_citekey );
740 	return status;
741 }
742 
743 static int
identify_and_resolve_duplicate_citekeys(bibl * b,slist * citekeys)744 identify_and_resolve_duplicate_citekeys( bibl *b, slist *citekeys )
745 {
746 	int *dup, ndup, status=BIBL_OK;
747 
748 	dup = ( int * ) malloc( sizeof( int ) * citekeys->n );
749 	if ( !dup ) return BIBL_ERR_MEMERR;
750 
751 	ndup = identify_duplicates( citekeys, dup );
752 
753 	if ( ndup ) status = resolve_duplicates( b, citekeys, dup );
754 
755 	free( dup );
756 	return status;
757 }
758 
759 static int
uniqueify_citekeys(bibl * bin)760 uniqueify_citekeys( bibl *bin )
761 {
762 	slist citekeys;
763 	int status;
764 
765 	slist_init( &citekeys );
766 
767 	status = get_citekeys( bin, &citekeys );
768 	if ( status!=BIBL_OK ) goto out;
769 
770 	status = identify_and_resolve_duplicate_citekeys( bin, &citekeys );
771 out:
772 	slist_free( &citekeys );
773 	return status;
774 }
775 
776 static int
clean_refs(bibl * bin,param * p)777 clean_refs( bibl *bin, param *p )
778 {
779 	if ( p->cleanf ) return p->cleanf( bin, p );
780 	else return BIBL_OK;
781 }
782 
783 static int
convert_refs(bibl * bin,char * fname,bibl * bout,param * p)784 convert_refs( bibl *bin, char *fname, bibl *bout, param *p )
785 {
786 	int reftype = 0, status;
787 	fields *rin, *rout;
788 	long i;
789 
790 	for ( i=0; i<bin->n; ++i ) {
791 
792 		rin = bin->ref[i];
793 
794 		rout = fields_new();
795 		if ( !rout ) return BIBL_ERR_MEMERR;
796 
797 		if ( p->typef ) reftype = p->typef( rin, fname, i+1, p );
798 
799 		status = p->convertf( rin, rout, reftype, p );
800 		if ( status!=BIBL_OK ) return status;
801 
802 		if ( p->all ) {
803 			status = process_alwaysadd( rout, reftype, p );
804 			if ( status!=BIBL_OK ) return status;
805 			status = process_defaultadd( rout, reftype, p );
806 			if ( status!=BIBL_OK ) return status;
807 		}
808 
809 		status = bibl_addref( bout, rout );
810 		if ( status!=BIBL_OK ) return status;
811 	}
812 
813 	return BIBL_OK;
814 }
815 
816 int
bibl_read(bibl * b,FILE * fp,char * filename,param * p)817 bibl_read( bibl *b, FILE *fp, char *filename, param *p )
818 {
819 	int status = BIBL_OK;
820 	param read_params;
821 	bibl bin;
822 
823 	if ( !b )  return BIBL_ERR_BADINPUT;
824 	if ( !fp ) return BIBL_ERR_BADINPUT;
825 	if ( !p )  return BIBL_ERR_BADINPUT;
826 
827 	if ( bibl_illegalinmode( p->readformat ) ) {
828 		if ( debug_set( p ) ) report_params( stderr, "bibl_read", p );
829 		return BIBL_ERR_BADINPUT;
830 	}
831 
832 	status = bibl_setreadparams( &read_params, p );
833 	if ( status!=BIBL_OK ) {
834 		if ( debug_set( p ) ) report_params( stderr, "bibl_read", p );
835 		return status;
836 	}
837 
838 	if ( debug_set( &read_params ) ) {
839 		report_params( stderr, "bibl_read", &read_params );
840 	}
841 
842 	bibl_init( &bin );
843 
844 	status = read_refs( fp, &bin, filename, &read_params );
845 	if ( status!=BIBL_OK ) {
846 		if ( debug_set( &read_params ) ) report_params( stderr, "bibl_read", &read_params );
847 		bibl_freeparams( &read_params );
848 		return status;
849 	}
850 
851 	if ( debug_set( &read_params ) ) {
852 		bibl_verbose( &bin, "raw_input", "for bibl_read" );
853 	}
854 
855 	if ( !read_params.output_raw ) {
856 		status = clean_refs( &bin, &read_params );
857 		if ( status!=BIBL_OK ) goto out;
858 		if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_clean_refs", "for bibl_read" );
859 	}
860 
861 	if ( ( !read_params.output_raw ) || ( read_params.output_raw & BIBL_RAW_WITHCHARCONVERT ) ) {
862 		status = bibl_fixcharsets( &bin, &read_params );
863 		if ( status!=BIBL_OK ) goto out;
864 		if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_fixcharsets", "for bibl_read" );
865 	}
866 
867 	if ( !read_params.output_raw ) {
868 		status = convert_refs( &bin, filename, b, &read_params );
869 		if ( status!=BIBL_OK ) goto out;
870 		if ( debug_set( &read_params ) ) bibl_verbose( b, "post_convert_refs", "for bibl_read" );
871 	}
872 
873 	else {
874 		status = bibl_copy( b, &bin );
875 		if ( status!=BIBL_OK ) goto out;
876 		if ( debug_set( &read_params ) ) bibl_verbose( b, "post_bibl_copy", "for bibl_read" );
877 	}
878 
879 	if ( ( !read_params.output_raw ) || ( read_params.output_raw & BIBL_RAW_WITHMAKEREFID ) ) {
880 		status = uniqueify_citekeys( b );
881 		if ( status!=BIBL_OK ) goto out;
882 		if ( read_params.addcount ) {
883 			status = bibl_addcount( b );
884 			if ( status!=BIBL_OK ) goto out;
885 		}
886 		if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_uniqueify_citekeys", "for bibl_read" );
887 	}
888 
889 out:
890 	bibl_free( &bin );
891 	bibl_freeparams( &read_params );
892 
893 	return status;
894 }
895 
896 static FILE *
singlerefname(fields * reffields,long nref,int mode)897 singlerefname( fields *reffields, long nref, int mode )
898 {
899 	char outfile[2048];
900 	char suffix[5] = "xml";
901 	FILE *fp;
902 	long count;
903 	int  found;
904 	if      ( mode==BIBL_ADSABSOUT )     strcpy( suffix, "ads" );
905 	else if ( mode==BIBL_BIBTEXOUT )     strcpy( suffix, "bib" );
906 	else if ( mode==BIBL_ENDNOTEOUT )    strcpy( suffix, "end" );
907 	else if ( mode==BIBL_ISIOUT )        strcpy( suffix, "isi" );
908 	else if ( mode==BIBL_MODSOUT )       strcpy( suffix, "xml" );
909 	else if ( mode==BIBL_RISOUT )        strcpy( suffix, "ris" );
910 	else if ( mode==BIBL_WORD2007OUT )   strcpy( suffix, "xml" );
911 	found = fields_find( reffields, "REFNUM", LEVEL_MAIN );
912 	/* find new filename based on reference */
913 	if ( found!=-1 ) {
914 		sprintf( outfile,"%s.%s",(char*)fields_value(reffields,found,FIELDS_CHRP_NOUSE), suffix );
915 	} else  sprintf( outfile,"%ld.%s",nref, suffix );
916 	count = 0;
917 	fp = fopen( outfile, "r" );
918 	while ( fp ) {
919 		fclose(fp);
920 		count++;
921 		if ( count==60000 ) return NULL;
922 		if ( found!=-1 )
923 			sprintf( outfile, "%s_%ld.%s", (char*)fields_value( reffields, found, FIELDS_CHRP_NOUSE ), count, suffix );
924 		else sprintf( outfile,"%ld_%ld.%s", nref, count, suffix );
925 		fp = fopen( outfile, "r" );
926 	}
927 	return fopen( outfile, "w" );
928 }
929 
930 static int
bibl_writeeachfp(FILE * fp,bibl * b,param * p)931 bibl_writeeachfp( FILE *fp, bibl *b, param *p )
932 {
933 	fields out, *use = &out;
934 	int status;
935 	long i;
936 
937 	fields_init( &out );
938 
939 	for ( i=0; i<b->n; ++i ) {
940 
941 		fp = singlerefname( b->ref[i], i, p->writeformat );
942 		if ( !fp ) return BIBL_ERR_CANTOPEN;
943 
944 		if ( p->headerf ) p->headerf( fp, p );
945 
946 		if ( p->assemblef ) {
947 			fields_free( &out );
948 			status = p->assemblef( b->ref[i], &out, p, i );
949 			if ( status!=BIBL_OK ) break;
950 		} else {
951 			use = b->ref[i];
952 		}
953 
954 		status = p->writef( use, fp, p, i );
955 
956 		if ( p->footerf ) p->footerf( fp );
957 		fclose( fp );
958 
959 		if ( status!=BIBL_OK ) return status;
960 	}
961 
962 	return BIBL_OK;
963 }
964 
965 static int
bibl_writefp(FILE * fp,bibl * b,param * p)966 bibl_writefp( FILE *fp, bibl *b, param *p )
967 {
968 	int status = BIBL_OK;
969 	fields out, *use = &out;
970 	long i;
971 
972 	fields_init( &out );
973 
974 	if ( debug_set( p ) && p->assemblef ) {
975 		fprintf( stderr, "-------------------assemblef start for bibl_write\n");
976 	}
977 
978 	if ( p->headerf ) p->headerf( fp, p );
979 	for ( i=0; i<b->n; ++i ) {
980 
981 		if ( p->assemblef ) {
982 			fields_free( &out );
983 			status = p->assemblef( b->ref[i], &out, p, i );
984 			if ( status!=BIBL_OK ) break;
985 			if ( debug_set( p ) ) bibl_verbose_reference( &out, "", i+1 );
986 		} else {
987 			use = b->ref[i];
988 		}
989 
990 		status = p->writef( use, fp, p, i );
991 		if ( status!=BIBL_OK ) break;
992 
993 	}
994 
995 	if ( debug_set( p ) && p->assemblef ) {
996 		fprintf( stderr, "-------------------assemblef end for bibl_write\n");
997 	}
998 
999 	if ( p->footerf ) p->footerf( fp );
1000 	return status;
1001 }
1002 
1003 int
bibl_write(bibl * b,FILE * fp,param * p)1004 bibl_write( bibl *b, FILE *fp, param *p )
1005 {
1006 	int status;
1007 	param lp;
1008 
1009 	if ( !b ) return BIBL_ERR_BADINPUT;
1010 	if ( !p ) return BIBL_ERR_BADINPUT;
1011 	if ( bibl_illegaloutmode( p->writeformat ) ) return BIBL_ERR_BADINPUT;
1012 	if ( !fp && !p->singlerefperfile ) return BIBL_ERR_BADINPUT;
1013 
1014 	status = bibl_setwriteparams( &lp, p );
1015 	if ( status!=BIBL_OK ) return status;
1016 
1017 	if ( debug_set( p ) ) {
1018 		report_params( stderr, "bibl_write", &lp );
1019 		fflush( stdout );
1020 	}
1021 
1022 	if ( debug_set( p ) ) bibl_verbose( b, "raw_input", "for bibl_write" );
1023 
1024 	status = bibl_fixcharsets( b, &lp );
1025 	if ( status!=BIBL_OK ) goto out;
1026 
1027 	if ( debug_set( p ) ) bibl_verbose( b, "post-fixcharsets", "for bibl_write" );
1028 
1029 	if ( p->singlerefperfile ) status = bibl_writeeachfp( fp, b, &lp );
1030 	else status = bibl_writefp( fp, b, &lp );
1031 
1032 out:
1033 	bibl_freeparams( &lp );
1034 	return status;
1035 }
1036