1 /*
2 * isiin.c
3 *
4 * Copyright (c) Chris Putnam 2004-2021
5 *
6 * Program and source code released under the GPL version 2
7 *
8 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include "is_ws.h"
14 #include "str.h"
15 #include "str_conv.h"
16 #include "month.h"
17 #include "name.h"
18 #include "fields.h"
19 #include "reftypes.h"
20 #include "bibformats.h"
21 #include "generic.h"
22
23 extern variants isi_all[];
24 extern int isi_nall;
25
26 static int isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
27 static int isiin_typef( fields *isiin, const char *filename, int nref, param *p );
28 static int isiin_convertf( fields *isiin, fields *info, int reftype, param *p );
29 static int isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm );
30
31
32 /*****************************************************
33 PUBLIC: void isiin_initparams()
34 *****************************************************/
35 int
isiin_initparams(param * pm,const char * progname)36 isiin_initparams( param *pm, const char *progname )
37 {
38 pm->readformat = BIBL_ISIIN;
39 pm->charsetin = BIBL_CHARSET_DEFAULT;
40 pm->charsetin_src = BIBL_SRC_DEFAULT;
41 pm->latexin = 0;
42 pm->xmlin = 0;
43 pm->utf8in = 0;
44 pm->nosplittitle = 0;
45 pm->verbose = 0;
46 pm->addcount = 0;
47 pm->output_raw = 0;
48
49 pm->readf = isiin_readf;
50 pm->processf = isiin_processf;
51 pm->cleanf = NULL;
52 pm->typef = isiin_typef;
53 pm->convertf = isiin_convertf;
54 pm->all = isi_all;
55 pm->nall = isi_nall;
56
57 slist_init( &(pm->asis) );
58 slist_init( &(pm->corps) );
59
60 if ( !progname ) pm->progname = NULL;
61 else {
62 pm->progname = strdup( progname );
63 if ( !pm->progname ) return BIBL_ERR_MEMERR;
64 }
65
66 return BIBL_OK;
67 }
68
69 /*****************************************************
70 PUBLIC: int isiin_readf()
71 *****************************************************/
72
73 /* ISI definition of a tag is strict:
74 * char 1 = uppercase alphabetic character
75 * char 2 = uppercase alphabetic character or digit
76 */
77
78 static int
is_isi_tag(const char * buf)79 is_isi_tag( const char *buf )
80 {
81 if ( !isupper( (unsigned char )buf[0] ) ) return 0;
82 if ( !( isupper( (unsigned char )buf[1] ) || isdigit( (unsigned char )buf[1] ) ) ) return 0;
83 return 1;
84 }
85
86 static int
readmore(FILE * fp,char * buf,int bufsize,int * bufpos,str * line)87 readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line )
88 {
89 if ( line->len ) return 1;
90 else return str_fget( fp, buf, bufsize, bufpos, line );
91 }
92
93 static int
isiin_readf(FILE * fp,char * buf,int bufsize,int * bufpos,str * line,str * reference,int * fcharset)94 isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
95 {
96 int haveref = 0, inref = 0;
97 char *p;
98
99 *fcharset = CHARSET_UNKNOWN;
100
101 while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) {
102
103 if ( str_is_empty( line ) ) continue;
104
105 p = str_cstr( line );
106
107 /* Recognize UTF8 BOM */
108 if ( line->len > 2 &&
109 (unsigned char)(p[0])==0xEF &&
110 (unsigned char)(p[1])==0xBB &&
111 (unsigned char)(p[2])==0xBF ) {
112 *fcharset = CHARSET_UNICODE;
113 p += 3;
114 }
115
116 /* Each reference ends with 'ER ' */
117 if ( is_isi_tag( p ) ) {
118 if ( !strncmp( p, "FN ", 3 ) ) {
119 if (strncasecmp( p, "FN ISI Export Format",20)){
120 fprintf( stderr, ": warning file FN type not '%s' not recognized.\n", /*r->progname,*/ p );
121 }
122 } else if ( !strncmp( p, "VR ", 3 ) ) {
123 if ( strncasecmp( p, "VR 1.0", 6 ) ) {
124 fprintf(stderr,": warning file version number '%s' not recognized, expected 'VR 1.0'\n", /*r->progname,*/ p );
125 }
126 } else if ( !strncmp( p, "ER", 2 ) ) haveref = 1;
127 else {
128 str_addchar( reference, '\n' );
129 str_strcatc( reference, p );
130 inref = 1;
131 }
132 str_empty( line );
133 }
134 /* not a tag, but we'll append to the last values */
135 else if ( inref ) {
136 str_addchar( reference, '\n' );
137 str_strcatc( reference, p );
138 str_empty( line );
139 }
140 else {
141 str_empty( line );
142 }
143 }
144 return haveref;
145 }
146
147 /*****************************************************
148 PUBLIC: int isiin_processf()
149 *****************************************************/
150
151 static const char *
process_tagged_line(str * tag,str * value,const char * p)152 process_tagged_line( str *tag, str *value, const char *p )
153 {
154 int i = 0;
155
156 /* collect tag and skip past it */
157 while ( i<2 && *p && *p!='\r' && *p!='\n') {
158 str_addchar( tag, *p );
159 p++;
160 i++;
161 }
162
163 while ( *p==' ' || *p=='\t' ) p++;
164
165 while ( *p && *p!='\r' && *p!='\n' ) {
166 str_addchar( value, *p );
167 p++;
168 }
169
170 str_trimendingws( value );
171
172 while ( *p=='\r' || *p=='\n' ) p++;
173
174 return p;
175 }
176
177 static const char *
process_untagged_line(str * value,const char * p)178 process_untagged_line( str *value, const char *p )
179 {
180 while ( *p==' ' || *p=='\t' ) p++;
181
182 while ( *p && *p!='\r' && *p!='\n' ) {
183 str_addchar( value, *p );
184 p++;
185 }
186
187 str_trimendingws( value );
188
189 while ( *p=='\r' || *p=='\n' ) p++;
190
191 return p;
192 }
193
194 static int
add_tag_value(fields * isiin,str * tag,str * value,int * tag_added)195 add_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
196 {
197 int status;
198
199 if ( str_has_value( tag ) && str_has_value( value ) ) {
200 status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
201 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
202 *tag_added = 1;
203 }
204
205 else {
206 *tag_added = 0;
207 }
208
209 return BIBL_OK;
210 }
211
212 static int
merge_tag_value(fields * isiin,str * tag,str * value,int * tag_added)213 merge_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
214 {
215 int n, status;
216 str *oldvalue;
217
218 if ( str_has_value( value ) ) {
219
220 if ( *tag_added==1 ) {
221
222 n = fields_num( isiin );
223 if ( n==0 ) return BIBL_OK;
224
225 /* only one AU or AF for list of authors */
226 if ( !strcmp( str_cstr( tag ), "AU" ) ) {
227 status = fields_add( isiin, "AU", str_cstr( value ), LEVEL_MAIN );
228 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
229 } else if ( !strcmp( str_cstr( tag ), "AF" ) ) {
230 status = fields_add( isiin, "AF", str_cstr( value ), LEVEL_MAIN );
231 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
232 }
233 /* otherwise append multiline data */
234 else {
235 oldvalue = fields_value( isiin, n-1, FIELDS_STRP_NOUSE );
236 str_addchar( oldvalue, ' ' );
237 str_strcat( oldvalue, value );
238 if ( str_memerr( oldvalue ) ) return BIBL_ERR_MEMERR;
239 }
240 }
241
242 else {
243 status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
244 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
245 *tag_added = 1;
246 }
247 }
248
249 return BIBL_OK;
250 }
251
252 static int
isiin_processf(fields * isiin,const char * p,const char * filename,long nref,param * pm)253 isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm )
254 {
255 int status, tag_added = 0, ret = 1;
256 str tag, value;
257
258 strs_init( &tag, &value, NULL );
259
260 while ( *p ) {
261
262 /* ...with tag, add */
263 if ( is_isi_tag( p ) ) {
264 str_empty( &tag );
265 str_empty( &value );
266 p = process_tagged_line( &tag, &value, p );
267 status = add_tag_value( isiin, &tag, &value, &tag_added );
268 if ( status!=BIBL_OK ) {
269 ret = 0;
270 goto out;
271 }
272 }
273
274 /* ...untagged, merge -- one AU or AF for list of authors */
275 else {
276 str_empty( &value );
277 p = process_untagged_line( &value, p );
278 status = merge_tag_value( isiin, &tag, &value, &tag_added );
279 if ( status!=BIBL_OK ) {
280 ret = 0;
281 goto out;
282 }
283 }
284
285 }
286 out:
287 strs_free( &value, &tag, NULL );
288 return ret;
289 }
290
291 /*****************************************************
292 PUBLIC: int isiin_typef()
293 *****************************************************/
294 static int
isiin_typef(fields * isiin,const char * filename,int nref,param * p)295 isiin_typef( fields *isiin, const char *filename, int nref, param *p )
296 {
297 int ntypename, nrefname, is_default;
298 char *refname = "", *typename="";
299
300 ntypename = fields_find( isiin, "PT", LEVEL_MAIN );
301 nrefname = fields_find( isiin, "UT", LEVEL_MAIN );
302
303 if ( nrefname!=FIELDS_NOTFOUND ) refname = fields_value( isiin, nrefname, FIELDS_CHRP_NOUSE );
304 if ( ntypename!=FIELDS_NOTFOUND ) typename = fields_value( isiin, ntypename, FIELDS_CHRP_NOUSE );
305
306 return get_reftype( typename, nref, p->progname, p->all, p->nall, refname, &is_default, REFTYPE_CHATTY );
307 }
308
309 /*****************************************************
310 PUBLIC: int isiin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR
311 *****************************************************/
312
313 /* pull off authors first--use AF before AU */
314 static int
isiin_addauthors(fields * isiin,fields * info,int reftype,variants * all,int nall,slist * asis,slist * corps)315 isiin_addauthors( fields *isiin, fields *info, int reftype, variants *all, int nall, slist *asis, slist *corps )
316 {
317 char *newtag, *authortype, use_af[]="AF", use_au[]="AU";
318 int level, i, n, has_af=0, has_au=0, nfields, status;
319 str *t, *d;
320
321 nfields = fields_num( isiin );
322 for ( i=0; i<nfields && has_af==0; ++i ) {
323 t = fields_tag( isiin, i, FIELDS_STRP );
324 if ( !strcasecmp( t->data, "AU" ) ) has_au++;
325 if ( !strcasecmp( t->data, "AF" ) ) has_af++;
326 }
327 if ( has_af ) authortype = use_af;
328 else if ( has_au ) authortype = use_au;
329 else return BIBL_OK; /* no authors */
330
331 for ( i=0; i<nfields; ++i ) {
332 t = fields_tag( isiin, i, FIELDS_STRP );
333 if ( strcasecmp( t->data, authortype ) ) continue;
334 d = fields_value( isiin, i, FIELDS_STRP );
335 n = process_findoldtag( authortype, reftype, all, nall );
336 level = ((all[reftype]).tags[n]).level;
337 newtag = all[reftype].tags[n].newstr;
338 status = add_name( info, newtag, d->data, level, asis, corps );
339 if ( status!=BIBL_OK ) return status;
340 }
341 return BIBL_OK;
342 }
343
344 /* PD APR 16 */
345 static int
isiin_date(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)346 isiin_date( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
347 {
348 int fstatus, sstatus, status = BIBL_OK;
349 const char *monthtag = outtag;
350 const char *daytag;
351 const char *use;
352 slist tokens;
353 str *day;
354
355 slist_init( &tokens );
356
357 if ( !strcmp( monthtag, "DATE:MONTH" ) ) daytag = "DATE:DAY";
358 else daytag = "PARTDATE:DAY";
359
360 sstatus = slist_tokenize( &tokens, invalue, " ", 1 );
361 if ( sstatus!=SLIST_OK ) { status = BIBL_ERR_MEMERR; goto out; }
362
363 /* month */
364 if ( tokens.n > 0 ) {
365 (void) month_to_number( slist_cstr( &tokens, 0 ), &use );
366 fstatus = fields_add( bibout, monthtag, use, level );
367 if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
368 }
369
370 /* day */
371 if ( tokens.n > 1 ) {
372 day = slist_str( &tokens, 1 );
373 if ( str_strlen( day ) == 1 ) str_prepend( day, "0" );
374 fstatus = fields_add( bibout, daytag, str_cstr( day ), level );
375 if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
376 }
377
378 out:
379 slist_free( &tokens );
380 return status;
381 }
382
383 static int
isiin_keyword(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)384 isiin_keyword( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
385 {
386 const char *p = str_cstr( invalue );
387 int fstatus, status = BIBL_OK;
388 str keyword;
389
390 str_init( &keyword );
391 while ( *p ) {
392 p = str_cpytodelim( &keyword, skip_ws( p ), ";", 1 );
393 if ( str_memerr( &keyword ) ) { status = BIBL_ERR_MEMERR; goto out; }
394 if ( str_has_value( &keyword ) ) {
395 fstatus = fields_add( bibout, outtag, keyword.data, level );
396 if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
397 }
398 }
399 out:
400 str_free( &keyword );
401 return status;
402 }
403
404 static void
isiin_report_notag(param * p,char * tag)405 isiin_report_notag( param *p, char *tag )
406 {
407 if ( p->verbose && strcmp( tag, "PT" ) ) {
408 if ( p->progname ) fprintf( stderr, "%s: ", p->progname );
409 fprintf( stderr, "Did not identify ISI tag '%s'\n", tag );
410 }
411 }
412
413 static int
isiin_convertf(fields * bibin,fields * bibout,int reftype,param * p)414 isiin_convertf( fields *bibin, fields *bibout, int reftype, param *p )
415 {
416 static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = {
417 [ 0 ... NUM_REFTYPES-1 ] = generic_null,
418 [ SIMPLE ] = generic_simple,
419 [ TITLE ] = generic_title,
420 [ PERSON ] = generic_person,
421 [ SERIALNO ] = generic_serialno,
422 [ DATE ] = isiin_date,
423 [ NOTES ] = generic_notes,
424 [ KEYWORD ] = isiin_keyword,
425 };
426
427 int process, level, i, nfields, status;
428 str *intag, *invalue;
429 char *outtag;
430
431 status = isiin_addauthors( bibin, bibout, reftype, p->all, p->nall, &(p->asis), &(p->corps) );
432 if ( status!=BIBL_OK ) return status;
433
434 nfields = fields_num( bibin );
435 for ( i=0; i<nfields; ++i ) {
436
437 intag = fields_tag( bibin, i, FIELDS_STRP );
438 if ( !strcasecmp( str_cstr( intag ), "AU" ) || !strcasecmp( str_cstr( intag ), "AF" ) )
439 continue;
440
441 if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) {
442 isiin_report_notag( p, str_cstr( intag ) );
443 continue;
444 }
445
446 invalue = fields_value( bibin, i, FIELDS_STRP );
447
448 status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout );
449 if ( status!=BIBL_OK ) return status;
450 }
451
452 if ( status==BIBL_OK && p->verbose ) fields_report( bibout, stderr );
453
454 return status;
455 }
456