1 /*
2  * isiin.c
3  *
4  * Copyright (c) Chris Putnam 2004-2020
5  *
6  * Program and source code released under the GPL version 2
7  *
8  */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include "is_ws.h"
14 #include "str.h"
15 #include "str_conv.h"
16 #include "name.h"
17 #include "fields.h"
18 #include "reftypes.h"
19 #include "bibformats.h"
20 #include "generic.h"
21 
22 extern variants isi_all[];
23 extern int isi_nall;
24 
25 static int isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
26 static int isiin_typef( fields *isiin, const char *filename, int nref, param *p );
27 static int isiin_convertf( fields *isiin, fields *info, int reftype, param *p );
28 static int isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm );
29 
30 
31 /*****************************************************
32  PUBLIC: void isiin_initparams()
33 *****************************************************/
34 int
isiin_initparams(param * pm,const char * progname)35 isiin_initparams( param *pm, const char *progname )
36 {
37 	pm->readformat       = BIBL_ISIIN;
38 	pm->charsetin        = BIBL_CHARSET_DEFAULT;
39 	pm->charsetin_src    = BIBL_SRC_DEFAULT;
40 	pm->latexin          = 0;
41 	pm->xmlin            = 0;
42 	pm->utf8in           = 0;
43 	pm->nosplittitle     = 0;
44 	pm->verbose          = 0;
45 	pm->addcount         = 0;
46 	pm->output_raw       = 0;
47 
48 	pm->readf    = isiin_readf;
49 	pm->processf = isiin_processf;
50 	pm->cleanf   = NULL;
51 	pm->typef    = isiin_typef;
52 	pm->convertf = isiin_convertf;
53 	pm->all      = isi_all;
54 	pm->nall     = isi_nall;
55 
56 	slist_init( &(pm->asis) );
57 	slist_init( &(pm->corps) );
58 
59 	if ( !progname ) pm->progname = NULL;
60 	else {
61 		pm->progname = strdup( progname );
62 		if ( !pm->progname ) return BIBL_ERR_MEMERR;
63 	}
64 
65 	return BIBL_OK;
66 }
67 
68 /*****************************************************
69  PUBLIC: int isiin_readf()
70 *****************************************************/
71 
72 /* ISI definition of a tag is strict:
73  *   char 1 = uppercase alphabetic character
74  *   char 2 = uppercase alphabetic character or digit
75  */
76 
77 static int
is_isi_tag(const char * buf)78 is_isi_tag( const char *buf )
79 {
80 	if ( !isupper( (unsigned char )buf[0] ) ) return 0;
81 	if ( !( isupper( (unsigned char )buf[1] ) || isdigit( (unsigned char )buf[1] ) ) ) return 0;
82 	return 1;
83 }
84 
85 static int
readmore(FILE * fp,char * buf,int bufsize,int * bufpos,str * line)86 readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line )
87 {
88 	if ( line->len ) return 1;
89 	else return str_fget( fp, buf, bufsize, bufpos, line );
90 }
91 
92 static int
isiin_readf(FILE * fp,char * buf,int bufsize,int * bufpos,str * line,str * reference,int * fcharset)93 isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
94 {
95 	int haveref = 0, inref = 0;
96 	char *p;
97 
98 	*fcharset = CHARSET_UNKNOWN;
99 
100 	while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) {
101 
102 		if ( str_is_empty( line ) ) continue;
103 
104 		p = str_cstr( line );
105 
106 		/* Recognize UTF8 BOM */
107 		if ( line->len > 2 &&
108 				(unsigned char)(p[0])==0xEF &&
109 				(unsigned char)(p[1])==0xBB &&
110 				(unsigned char)(p[2])==0xBF ) {
111 			*fcharset = CHARSET_UNICODE;
112 			p += 3;
113 		}
114 
115 		/* Each reference ends with 'ER ' */
116 		if ( is_isi_tag( p ) ) {
117 			if ( !strncmp( p, "FN ", 3 ) ) {
118 				if (strncasecmp( p, "FN ISI Export Format",20)){
119 					// Patch: Disable output logging
120 				}
121 			} else if ( !strncmp( p, "VR ", 3 ) ) {
122 				if ( strncasecmp( p, "VR 1.0", 6 ) ) {
123 					// Patch: Disable output logging
124 				}
125 			} else if ( !strncmp( p, "ER", 2 ) ) haveref = 1;
126 			else {
127 				str_addchar( reference, '\n' );
128 				str_strcatc( reference, p );
129 				inref = 1;
130 			}
131 			str_empty( line );
132 		}
133 		/* not a tag, but we'll append to the last values */
134 		else if ( inref ) {
135 			str_addchar( reference, '\n' );
136 			str_strcatc( reference, p );
137 			str_empty( line );
138 		}
139 		else {
140 			str_empty( line );
141 		}
142 	}
143 	return haveref;
144 }
145 
146 /*****************************************************
147  PUBLIC: int isiin_processf()
148 *****************************************************/
149 
150 static const char *
process_tagged_line(str * tag,str * value,const char * p)151 process_tagged_line( str *tag, str *value, const char *p )
152 {
153 	int i = 0;
154 
155 	/* collect tag and skip past it */
156 	while ( i<2 && *p && *p!='\r' && *p!='\n') {
157 		str_addchar( tag, *p );
158 		p++;
159 		i++;
160 	}
161 
162 	while ( *p==' ' || *p=='\t' ) p++;
163 
164 	while ( *p && *p!='\r' && *p!='\n' ) {
165 		str_addchar( value, *p );
166 		p++;
167 	}
168 
169 	str_trimendingws( value );
170 
171 	while ( *p=='\r' || *p=='\n' ) p++;
172 
173 	return p;
174 }
175 
176 static const char *
process_untagged_line(str * value,const char * p)177 process_untagged_line( str *value, const char *p )
178 {
179 	while ( *p==' ' || *p=='\t' ) p++;
180 
181 	while ( *p && *p!='\r' && *p!='\n' ) {
182 		str_addchar( value, *p );
183 		p++;
184 	}
185 
186 	str_trimendingws( value );
187 
188 	while ( *p=='\r' || *p=='\n' ) p++;
189 
190 	return p;
191 }
192 
193 static int
add_tag_value(fields * isiin,str * tag,str * value,int * tag_added)194 add_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
195 {
196 	int status;
197 
198 	if ( str_has_value( tag ) && str_has_value( value ) ) {
199 		status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
200 		if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
201 		*tag_added = 1;
202 	}
203 
204 	else {
205 		*tag_added = 0;
206 	}
207 
208 	return BIBL_OK;
209 }
210 
211 static int
merge_tag_value(fields * isiin,str * tag,str * value,int * tag_added)212 merge_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
213 {
214 	int n, status;
215 	str *oldvalue;
216 
217 	if ( str_has_value( value ) ) {
218 
219 		if ( *tag_added==1 ) {
220 
221 			n = fields_num( isiin );
222 			if ( n==0 ) return BIBL_OK;
223 
224 			/* only one AU or AF for list of authors */
225 			if ( !strcmp( str_cstr( tag ), "AU" ) ) {
226 				status = fields_add( isiin, "AU", str_cstr( value ), LEVEL_MAIN );
227 				if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
228 			} else if ( !strcmp( str_cstr( tag ), "AF" ) ) {
229 				status = fields_add( isiin, "AF", str_cstr( value ), LEVEL_MAIN );
230 				if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
231 			}
232 			/* otherwise append multiline data */
233 			else {
234 				oldvalue = fields_value( isiin, n-1, FIELDS_STRP_NOUSE );
235 				str_addchar( oldvalue, ' ' );
236 				str_strcat( oldvalue, value );
237 				if ( str_memerr( oldvalue ) ) return BIBL_ERR_MEMERR;
238 			}
239 		}
240 
241 		else {
242                         status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
243                         if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
244                         *tag_added = 1;
245 		}
246 	}
247 
248 	return BIBL_OK;
249 }
250 
251 static int
isiin_processf(fields * isiin,const char * p,const char * filename,long nref,param * pm)252 isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm )
253 {
254 	int status, tag_added = 0, ret = 1;
255 	str tag, value;
256 
257 	strs_init( &tag, &value, NULL );
258 
259 	while ( *p ) {
260 
261 		/* ...with tag, add */
262 		if ( is_isi_tag( p ) ) {
263 			str_empty( &tag );
264 			str_empty( &value );
265 			p = process_tagged_line( &tag, &value, p );
266 			status = add_tag_value( isiin, &tag, &value, &tag_added );
267 			if ( status!=BIBL_OK ) {
268 				ret = 0;
269 				goto out;
270 			}
271 		}
272 
273 		/* ...untagged, merge -- one AU or AF for list of authors */
274 		else {
275 			str_empty( &value );
276 			p = process_untagged_line( &value, p );
277 			status = merge_tag_value( isiin, &tag, &value, &tag_added );
278 			if ( status!=BIBL_OK ) {
279 				ret = 0;
280 				goto out;
281 			}
282 		}
283 
284 	}
285 out:
286 	strs_free( &value, &tag, NULL );
287 	return ret;
288 }
289 
290 /*****************************************************
291  PUBLIC: int isiin_typef()
292 *****************************************************/
293 static int
isiin_typef(fields * isiin,const char * filename,int nref,param * p)294 isiin_typef( fields *isiin, const char *filename, int nref, param *p )
295 {
296 	int ntypename, nrefname, is_default;
297 	char *refname = "", *typename="";
298 
299 	ntypename = fields_find( isiin, "PT", LEVEL_MAIN );
300 	nrefname  = fields_find( isiin, "UT", LEVEL_MAIN );
301 
302 	if ( nrefname!=FIELDS_NOTFOUND )  refname  = fields_value( isiin, nrefname,  FIELDS_CHRP_NOUSE );
303 	if ( ntypename!=FIELDS_NOTFOUND ) typename = fields_value( isiin, ntypename, FIELDS_CHRP_NOUSE );
304 
305 	return get_reftype( typename, nref, p->progname, p->all, p->nall, refname, &is_default, REFTYPE_CHATTY );
306 }
307 
308 /*****************************************************
309  PUBLIC: int isiin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR
310 *****************************************************/
311 
312 /* pull off authors first--use AF before AU */
313 static int
isiin_addauthors(fields * isiin,fields * info,int reftype,variants * all,int nall,slist * asis,slist * corps)314 isiin_addauthors( fields *isiin, fields *info, int reftype, variants *all, int nall, slist *asis, slist *corps )
315 {
316 	char *newtag, *authortype, use_af[]="AF", use_au[]="AU";
317 	int level, i, n, has_af=0, has_au=0, nfields, ok;
318 	str *t, *d;
319 
320 	nfields = fields_num( isiin );
321 	for ( i=0; i<nfields && has_af==0; ++i ) {
322 		t = fields_tag( isiin, i, FIELDS_STRP );
323 		if ( !strcasecmp( t->data, "AU" ) ) has_au++;
324 		if ( !strcasecmp( t->data, "AF" ) ) has_af++;
325 	}
326 	if ( has_af ) authortype = use_af;
327 	else if ( has_au ) authortype = use_au;
328 	else return BIBL_OK; /* no authors */
329 
330 	for ( i=0; i<nfields; ++i ) {
331 		t = fields_tag( isiin, i, FIELDS_STRP );
332 		if ( strcasecmp( t->data, authortype ) ) continue;
333 		d = fields_value( isiin, i, FIELDS_STRP );
334 		n = process_findoldtag( authortype, reftype, all, nall );
335 		level = ((all[reftype]).tags[n]).level;
336 		newtag = all[reftype].tags[n].newstr;
337 		ok = name_add( info, newtag, d->data, level, asis, corps );
338 		if ( !ok ) return BIBL_ERR_MEMERR;
339 	}
340 	return BIBL_OK;
341 }
342 
343 static int
isiin_keyword(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)344 isiin_keyword( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
345 {
346 	const char *p = str_cstr( invalue );
347 	int fstatus, status = BIBL_OK;
348 	str keyword;
349 
350 	str_init( &keyword );
351 	while ( *p ) {
352 		p = str_cpytodelim( &keyword, skip_ws( p ), ";", 1 );
353 		if ( str_memerr( &keyword ) ) { status = BIBL_ERR_MEMERR; goto out; }
354 		if ( str_has_value( &keyword ) ) {
355 			fstatus = fields_add( bibout, outtag, keyword.data, level );
356 			if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
357 		}
358 	}
359 out:
360 	str_free( &keyword );
361 	return status;
362 }
363 
364 static void
isiin_report_notag(param * p,char * tag)365 isiin_report_notag( param *p, char *tag )
366 {
367 	// Patch: Disable output logging
368 }
369 
370 static int
isiin_convertf(fields * bibin,fields * bibout,int reftype,param * p)371 isiin_convertf( fields *bibin, fields *bibout, int reftype, param *p )
372 {
373 	static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = {
374 		// Patch: Remove GCC extension to allow building on MSVC
375 		// [ 0 ... NUM_REFTYPES-1 ] = generic_null,
376 		[ 0            ] = generic_null,
377 		[ 1            ] = generic_null,
378 		[ 2            ] = generic_null,
379 		[ 3            ] = generic_null,
380 		[ 4            ] = generic_null,
381 		[ 5            ] = generic_null,
382 		[ 6            ] = generic_null,
383 		[ 7            ] = generic_null,
384 		[ 8            ] = generic_null,
385 		[ 9            ] = generic_null,
386 		[ 10           ] = generic_null,
387 		[ 11           ] = generic_null,
388 		[ 12           ] = generic_null,
389 		[ 13           ] = generic_null,
390 		[ 14           ] = generic_null,
391 		[ 15           ] = generic_null,
392 		[ 16           ] = generic_null,
393 		[ 17           ] = generic_null,
394 		[ 18           ] = generic_null,
395 		[ 19           ] = generic_null,
396 		[ 20           ] = generic_null,
397 		[ 21           ] = generic_null,
398 		[ 22           ] = generic_null,
399 		[ 23           ] = generic_null,
400 		[ 24           ] = generic_null,
401 		[ 25           ] = generic_null,
402 		[ SIMPLE       ] = generic_simple,
403 		[ TITLE        ] = generic_title,
404 		[ PERSON       ] = generic_person,
405 		[ SERIALNO     ] = generic_serialno,
406 		[ DATE         ] = generic_simple,
407 		[ NOTES        ] = generic_notes,
408 		[ KEYWORD      ] = isiin_keyword,
409 	};
410 
411 	int process, level, i, nfields, status;
412 	str *intag, *invalue;
413 	char *outtag;
414 
415 	status = isiin_addauthors( bibin, bibout, reftype, p->all, p->nall, &(p->asis), &(p->corps) );
416 	if ( status!=BIBL_OK ) return status;
417 
418 	nfields = fields_num( bibin );
419 	for ( i=0; i<nfields; ++i ) {
420 
421 		intag = fields_tag( bibin, i, FIELDS_STRP );
422 		if ( !strcasecmp( str_cstr( intag ), "AU" ) || !strcasecmp( str_cstr( intag ), "AF" ) )
423 			continue;
424 
425 		if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) {
426 			isiin_report_notag( p, str_cstr( intag ) );
427 			continue;
428 		}
429 
430 		invalue = fields_value( bibin, i, FIELDS_STRP );
431 
432 		status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout );
433 		if ( status!=BIBL_OK ) return status;
434 	}
435 
436 	// Patch: Disable output logging
437 
438 	return status;
439 }
440