1 /*
2  * isiin.c
3  *
4  * Copyright (c) Chris Putnam 2004-2021
5  *
6  * Program and source code released under the GPL version 2
7  *
8  */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include "is_ws.h"
14 #include "str.h"
15 #include "str_conv.h"
16 #include "month.h"
17 #include "name.h"
18 #include "fields.h"
19 #include "reftypes.h"
20 #include "bibformats.h"
21 #include "generic.h"
22 
23 extern variants isi_all[];
24 extern int isi_nall;
25 
26 static int isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
27 static int isiin_typef( fields *isiin, const char *filename, int nref, param *p );
28 static int isiin_convertf( fields *isiin, fields *info, int reftype, param *p );
29 static int isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm );
30 
31 
32 /*****************************************************
33  PUBLIC: void isiin_initparams()
34 *****************************************************/
35 int
isiin_initparams(param * pm,const char * progname)36 isiin_initparams( param *pm, const char *progname )
37 {
38 	pm->readformat       = BIBL_ISIIN;
39 	pm->charsetin        = BIBL_CHARSET_DEFAULT;
40 	pm->charsetin_src    = BIBL_SRC_DEFAULT;
41 	pm->latexin          = 0;
42 	pm->xmlin            = 0;
43 	pm->utf8in           = 0;
44 	pm->nosplittitle     = 0;
45 	pm->verbose          = 0;
46 	pm->addcount         = 0;
47 	pm->output_raw       = 0;
48 
49 	pm->readf    = isiin_readf;
50 	pm->processf = isiin_processf;
51 	pm->cleanf   = NULL;
52 	pm->typef    = isiin_typef;
53 	pm->convertf = isiin_convertf;
54 	pm->all      = isi_all;
55 	pm->nall     = isi_nall;
56 
57 	slist_init( &(pm->asis) );
58 	slist_init( &(pm->corps) );
59 
60 	if ( !progname ) pm->progname = NULL;
61 	else {
62 		pm->progname = strdup( progname );
63 		if ( !pm->progname ) return BIBL_ERR_MEMERR;
64 	}
65 
66 	return BIBL_OK;
67 }
68 
69 /*****************************************************
70  PUBLIC: int isiin_readf()
71 *****************************************************/
72 
73 /* ISI definition of a tag is strict:
74  *   char 1 = uppercase alphabetic character
75  *   char 2 = uppercase alphabetic character or digit
76  */
77 
78 static int
is_isi_tag(const char * buf)79 is_isi_tag( const char *buf )
80 {
81 	if ( !isupper( (unsigned char )buf[0] ) ) return 0;
82 	if ( !( isupper( (unsigned char )buf[1] ) || isdigit( (unsigned char )buf[1] ) ) ) return 0;
83 	return 1;
84 }
85 
86 static int
readmore(FILE * fp,char * buf,int bufsize,int * bufpos,str * line)87 readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line )
88 {
89 	if ( line->len ) return 1;
90 	else return str_fget( fp, buf, bufsize, bufpos, line );
91 }
92 
93 static int
isiin_readf(FILE * fp,char * buf,int bufsize,int * bufpos,str * line,str * reference,int * fcharset)94 isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
95 {
96 	int haveref = 0, inref = 0;
97 	char *p;
98 
99 	*fcharset = CHARSET_UNKNOWN;
100 
101 	while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) {
102 
103 		if ( str_is_empty( line ) ) continue;
104 
105 		p = str_cstr( line );
106 
107 		/* Recognize UTF8 BOM */
108 		if ( line->len > 2 &&
109 				(unsigned char)(p[0])==0xEF &&
110 				(unsigned char)(p[1])==0xBB &&
111 				(unsigned char)(p[2])==0xBF ) {
112 			*fcharset = CHARSET_UNICODE;
113 			p += 3;
114 		}
115 
116 		/* Each reference ends with 'ER ' */
117 		if ( is_isi_tag( p ) ) {
118 			if ( !strncmp( p, "FN ", 3 ) ) {
119 				if (strncasecmp( p, "FN ISI Export Format",20)){
120 					fprintf( stderr, ": warning file FN type not '%s' not recognized.\n", /*r->progname,*/ p );
121 				}
122 			} else if ( !strncmp( p, "VR ", 3 ) ) {
123 				if ( strncasecmp( p, "VR 1.0", 6 ) ) {
124 					fprintf(stderr,": warning file version number '%s' not recognized, expected 'VR 1.0'\n", /*r->progname,*/ p );
125 				}
126 			} else if ( !strncmp( p, "ER", 2 ) ) haveref = 1;
127 			else {
128 				str_addchar( reference, '\n' );
129 				str_strcatc( reference, p );
130 				inref = 1;
131 			}
132 			str_empty( line );
133 		}
134 		/* not a tag, but we'll append to the last values */
135 		else if ( inref ) {
136 			str_addchar( reference, '\n' );
137 			str_strcatc( reference, p );
138 			str_empty( line );
139 		}
140 		else {
141 			str_empty( line );
142 		}
143 	}
144 	return haveref;
145 }
146 
147 /*****************************************************
148  PUBLIC: int isiin_processf()
149 *****************************************************/
150 
151 static const char *
process_tagged_line(str * tag,str * value,const char * p)152 process_tagged_line( str *tag, str *value, const char *p )
153 {
154 	int i = 0;
155 
156 	/* collect tag and skip past it */
157 	while ( i<2 && *p && *p!='\r' && *p!='\n') {
158 		str_addchar( tag, *p );
159 		p++;
160 		i++;
161 	}
162 
163 	while ( *p==' ' || *p=='\t' ) p++;
164 
165 	while ( *p && *p!='\r' && *p!='\n' ) {
166 		str_addchar( value, *p );
167 		p++;
168 	}
169 
170 	str_trimendingws( value );
171 
172 	while ( *p=='\r' || *p=='\n' ) p++;
173 
174 	return p;
175 }
176 
177 static const char *
process_untagged_line(str * value,const char * p)178 process_untagged_line( str *value, const char *p )
179 {
180 	while ( *p==' ' || *p=='\t' ) p++;
181 
182 	while ( *p && *p!='\r' && *p!='\n' ) {
183 		str_addchar( value, *p );
184 		p++;
185 	}
186 
187 	str_trimendingws( value );
188 
189 	while ( *p=='\r' || *p=='\n' ) p++;
190 
191 	return p;
192 }
193 
194 static int
add_tag_value(fields * isiin,str * tag,str * value,int * tag_added)195 add_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
196 {
197 	int status;
198 
199 	if ( str_has_value( tag ) && str_has_value( value ) ) {
200 		status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
201 		if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
202 		*tag_added = 1;
203 	}
204 
205 	else {
206 		*tag_added = 0;
207 	}
208 
209 	return BIBL_OK;
210 }
211 
212 static int
merge_tag_value(fields * isiin,str * tag,str * value,int * tag_added)213 merge_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
214 {
215 	int n, status;
216 	str *oldvalue;
217 
218 	if ( str_has_value( value ) ) {
219 
220 		if ( *tag_added==1 ) {
221 
222 			n = fields_num( isiin );
223 			if ( n==0 ) return BIBL_OK;
224 
225 			/* only one AU or AF for list of authors */
226 			if ( !strcmp( str_cstr( tag ), "AU" ) ) {
227 				status = fields_add( isiin, "AU", str_cstr( value ), LEVEL_MAIN );
228 				if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
229 			} else if ( !strcmp( str_cstr( tag ), "AF" ) ) {
230 				status = fields_add( isiin, "AF", str_cstr( value ), LEVEL_MAIN );
231 				if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
232 			}
233 			/* otherwise append multiline data */
234 			else {
235 				oldvalue = fields_value( isiin, n-1, FIELDS_STRP_NOUSE );
236 				str_addchar( oldvalue, ' ' );
237 				str_strcat( oldvalue, value );
238 				if ( str_memerr( oldvalue ) ) return BIBL_ERR_MEMERR;
239 			}
240 		}
241 
242 		else {
243                         status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
244                         if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
245                         *tag_added = 1;
246 		}
247 	}
248 
249 	return BIBL_OK;
250 }
251 
252 static int
isiin_processf(fields * isiin,const char * p,const char * filename,long nref,param * pm)253 isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm )
254 {
255 	int status, tag_added = 0, ret = 1;
256 	str tag, value;
257 
258 	strs_init( &tag, &value, NULL );
259 
260 	while ( *p ) {
261 
262 		/* ...with tag, add */
263 		if ( is_isi_tag( p ) ) {
264 			str_empty( &tag );
265 			str_empty( &value );
266 			p = process_tagged_line( &tag, &value, p );
267 			status = add_tag_value( isiin, &tag, &value, &tag_added );
268 			if ( status!=BIBL_OK ) {
269 				ret = 0;
270 				goto out;
271 			}
272 		}
273 
274 		/* ...untagged, merge -- one AU or AF for list of authors */
275 		else {
276 			str_empty( &value );
277 			p = process_untagged_line( &value, p );
278 			status = merge_tag_value( isiin, &tag, &value, &tag_added );
279 			if ( status!=BIBL_OK ) {
280 				ret = 0;
281 				goto out;
282 			}
283 		}
284 
285 	}
286 out:
287 	strs_free( &value, &tag, NULL );
288 	return ret;
289 }
290 
291 /*****************************************************
292  PUBLIC: int isiin_typef()
293 *****************************************************/
294 static int
isiin_typef(fields * isiin,const char * filename,int nref,param * p)295 isiin_typef( fields *isiin, const char *filename, int nref, param *p )
296 {
297 	int ntypename, nrefname, is_default;
298 	char *refname = "", *typename="";
299 
300 	ntypename = fields_find( isiin, "PT", LEVEL_MAIN );
301 	nrefname  = fields_find( isiin, "UT", LEVEL_MAIN );
302 
303 	if ( nrefname!=FIELDS_NOTFOUND )  refname  = fields_value( isiin, nrefname,  FIELDS_CHRP_NOUSE );
304 	if ( ntypename!=FIELDS_NOTFOUND ) typename = fields_value( isiin, ntypename, FIELDS_CHRP_NOUSE );
305 
306 	return get_reftype( typename, nref, p->progname, p->all, p->nall, refname, &is_default, REFTYPE_CHATTY );
307 }
308 
309 /*****************************************************
310  PUBLIC: int isiin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR
311 *****************************************************/
312 
313 /* pull off authors first--use AF before AU */
314 static int
isiin_addauthors(fields * isiin,fields * info,int reftype,variants * all,int nall,slist * asis,slist * corps)315 isiin_addauthors( fields *isiin, fields *info, int reftype, variants *all, int nall, slist *asis, slist *corps )
316 {
317 	char *newtag, *authortype, use_af[]="AF", use_au[]="AU";
318 	int level, i, n, has_af=0, has_au=0, nfields, status;
319 	str *t, *d;
320 
321 	nfields = fields_num( isiin );
322 	for ( i=0; i<nfields && has_af==0; ++i ) {
323 		t = fields_tag( isiin, i, FIELDS_STRP );
324 		if ( !strcasecmp( t->data, "AU" ) ) has_au++;
325 		if ( !strcasecmp( t->data, "AF" ) ) has_af++;
326 	}
327 	if ( has_af ) authortype = use_af;
328 	else if ( has_au ) authortype = use_au;
329 	else return BIBL_OK; /* no authors */
330 
331 	for ( i=0; i<nfields; ++i ) {
332 		t = fields_tag( isiin, i, FIELDS_STRP );
333 		if ( strcasecmp( t->data, authortype ) ) continue;
334 		d = fields_value( isiin, i, FIELDS_STRP );
335 		n = process_findoldtag( authortype, reftype, all, nall );
336 		level = ((all[reftype]).tags[n]).level;
337 		newtag = all[reftype].tags[n].newstr;
338 		status = add_name( info, newtag, d->data, level, asis, corps );
339 		if ( status!=BIBL_OK ) return status;
340 	}
341 	return BIBL_OK;
342 }
343 
344 /* PD APR 16 */
345 static int
isiin_date(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)346 isiin_date( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
347 {
348 	int fstatus, sstatus, status = BIBL_OK;
349 	const char *monthtag = outtag;
350 	const char *daytag;
351 	const char *use;
352 	slist tokens;
353 	str *day;
354 
355 	slist_init( &tokens );
356 
357 	if ( !strcmp( monthtag, "DATE:MONTH" ) ) daytag = "DATE:DAY";
358 	else daytag = "PARTDATE:DAY";
359 
360 	sstatus = slist_tokenize( &tokens, invalue, " ", 1 );
361 	if ( sstatus!=SLIST_OK ) { status = BIBL_ERR_MEMERR; goto out; }
362 
363 	/* month */
364 	if ( tokens.n > 0 ) {
365 		(void) month_to_number( slist_cstr( &tokens, 0 ), &use );
366 		fstatus = fields_add( bibout, monthtag, use, level );
367 		if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
368 	}
369 
370 	/* day */
371 	if ( tokens.n > 1 ) {
372 		day = slist_str( &tokens, 1 );
373 		if ( str_strlen( day ) == 1 ) str_prepend( day, "0" );
374 		fstatus = fields_add( bibout, daytag, str_cstr( day ), level );
375 		if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
376 	}
377 
378 out:
379 	slist_free( &tokens );
380 	return status;
381 }
382 
383 static int
isiin_keyword(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)384 isiin_keyword( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
385 {
386 	const char *p = str_cstr( invalue );
387 	int fstatus, status = BIBL_OK;
388 	str keyword;
389 
390 	str_init( &keyword );
391 	while ( *p ) {
392 		p = str_cpytodelim( &keyword, skip_ws( p ), ";", 1 );
393 		if ( str_memerr( &keyword ) ) { status = BIBL_ERR_MEMERR; goto out; }
394 		if ( str_has_value( &keyword ) ) {
395 			fstatus = fields_add( bibout, outtag, keyword.data, level );
396 			if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
397 		}
398 	}
399 out:
400 	str_free( &keyword );
401 	return status;
402 }
403 
404 static void
isiin_report_notag(param * p,char * tag)405 isiin_report_notag( param *p, char *tag )
406 {
407 	if ( p->verbose && strcmp( tag, "PT" ) ) {
408 		if ( p->progname ) fprintf( stderr, "%s: ", p->progname );
409 		fprintf( stderr, "Did not identify ISI tag '%s'\n", tag );
410 	}
411 }
412 
413 static int
isiin_convertf(fields * bibin,fields * bibout,int reftype,param * p)414 isiin_convertf( fields *bibin, fields *bibout, int reftype, param *p )
415 {
416 	static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = {
417 		[ 0 ... NUM_REFTYPES-1 ] = generic_null,
418 		[ SIMPLE       ] = generic_simple,
419 		[ TITLE        ] = generic_title,
420 		[ PERSON       ] = generic_person,
421 		[ SERIALNO     ] = generic_serialno,
422 		[ DATE         ] = isiin_date,
423 		[ NOTES        ] = generic_notes,
424 		[ KEYWORD      ] = isiin_keyword,
425 	};
426 
427 	int process, level, i, nfields, status;
428 	str *intag, *invalue;
429 	char *outtag;
430 
431 	status = isiin_addauthors( bibin, bibout, reftype, p->all, p->nall, &(p->asis), &(p->corps) );
432 	if ( status!=BIBL_OK ) return status;
433 
434 	nfields = fields_num( bibin );
435 	for ( i=0; i<nfields; ++i ) {
436 
437 		intag = fields_tag( bibin, i, FIELDS_STRP );
438 		if ( !strcasecmp( str_cstr( intag ), "AU" ) || !strcasecmp( str_cstr( intag ), "AF" ) )
439 			continue;
440 
441 		if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) {
442 			isiin_report_notag( p, str_cstr( intag ) );
443 			continue;
444 		}
445 
446 		invalue = fields_value( bibin, i, FIELDS_STRP );
447 
448 		status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout );
449 		if ( status!=BIBL_OK ) return status;
450 	}
451 
452 	if ( status==BIBL_OK && p->verbose ) fields_report( bibout, stderr );
453 
454 	return status;
455 }
456