1 /*
2 * isiin.c
3 *
4 * Copyright (c) Chris Putnam 2004-2020
5 *
6 * Program and source code released under the GPL version 2
7 *
8 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include "is_ws.h"
14 #include "str.h"
15 #include "str_conv.h"
16 #include "name.h"
17 #include "fields.h"
18 #include "reftypes.h"
19 #include "bibformats.h"
20 #include "generic.h"
21
22 extern variants isi_all[];
23 extern int isi_nall;
24
25 static int isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
26 static int isiin_typef( fields *isiin, const char *filename, int nref, param *p );
27 static int isiin_convertf( fields *isiin, fields *info, int reftype, param *p );
28 static int isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm );
29
30
31 /*****************************************************
32 PUBLIC: void isiin_initparams()
33 *****************************************************/
34 int
isiin_initparams(param * pm,const char * progname)35 isiin_initparams( param *pm, const char *progname )
36 {
37 pm->readformat = BIBL_ISIIN;
38 pm->charsetin = BIBL_CHARSET_DEFAULT;
39 pm->charsetin_src = BIBL_SRC_DEFAULT;
40 pm->latexin = 0;
41 pm->xmlin = 0;
42 pm->utf8in = 0;
43 pm->nosplittitle = 0;
44 pm->verbose = 0;
45 pm->addcount = 0;
46 pm->output_raw = 0;
47
48 pm->readf = isiin_readf;
49 pm->processf = isiin_processf;
50 pm->cleanf = NULL;
51 pm->typef = isiin_typef;
52 pm->convertf = isiin_convertf;
53 pm->all = isi_all;
54 pm->nall = isi_nall;
55
56 slist_init( &(pm->asis) );
57 slist_init( &(pm->corps) );
58
59 if ( !progname ) pm->progname = NULL;
60 else {
61 pm->progname = strdup( progname );
62 if ( !pm->progname ) return BIBL_ERR_MEMERR;
63 }
64
65 return BIBL_OK;
66 }
67
68 /*****************************************************
69 PUBLIC: int isiin_readf()
70 *****************************************************/
71
72 /* ISI definition of a tag is strict:
73 * char 1 = uppercase alphabetic character
74 * char 2 = uppercase alphabetic character or digit
75 */
76
77 static int
is_isi_tag(const char * buf)78 is_isi_tag( const char *buf )
79 {
80 if ( !isupper( (unsigned char )buf[0] ) ) return 0;
81 if ( !( isupper( (unsigned char )buf[1] ) || isdigit( (unsigned char )buf[1] ) ) ) return 0;
82 return 1;
83 }
84
85 static int
readmore(FILE * fp,char * buf,int bufsize,int * bufpos,str * line)86 readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line )
87 {
88 if ( line->len ) return 1;
89 else return str_fget( fp, buf, bufsize, bufpos, line );
90 }
91
92 static int
isiin_readf(FILE * fp,char * buf,int bufsize,int * bufpos,str * line,str * reference,int * fcharset)93 isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
94 {
95 int haveref = 0, inref = 0;
96 char *p;
97
98 *fcharset = CHARSET_UNKNOWN;
99
100 while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) {
101
102 if ( str_is_empty( line ) ) continue;
103
104 p = str_cstr( line );
105
106 /* Recognize UTF8 BOM */
107 if ( line->len > 2 &&
108 (unsigned char)(p[0])==0xEF &&
109 (unsigned char)(p[1])==0xBB &&
110 (unsigned char)(p[2])==0xBF ) {
111 *fcharset = CHARSET_UNICODE;
112 p += 3;
113 }
114
115 /* Each reference ends with 'ER ' */
116 if ( is_isi_tag( p ) ) {
117 if ( !strncmp( p, "FN ", 3 ) ) {
118 if (strncasecmp( p, "FN ISI Export Format",20)){
119 // Patch: Disable output logging
120 }
121 } else if ( !strncmp( p, "VR ", 3 ) ) {
122 if ( strncasecmp( p, "VR 1.0", 6 ) ) {
123 // Patch: Disable output logging
124 }
125 } else if ( !strncmp( p, "ER", 2 ) ) haveref = 1;
126 else {
127 str_addchar( reference, '\n' );
128 str_strcatc( reference, p );
129 inref = 1;
130 }
131 str_empty( line );
132 }
133 /* not a tag, but we'll append to the last values */
134 else if ( inref ) {
135 str_addchar( reference, '\n' );
136 str_strcatc( reference, p );
137 str_empty( line );
138 }
139 else {
140 str_empty( line );
141 }
142 }
143 return haveref;
144 }
145
146 /*****************************************************
147 PUBLIC: int isiin_processf()
148 *****************************************************/
149
150 static const char *
process_tagged_line(str * tag,str * value,const char * p)151 process_tagged_line( str *tag, str *value, const char *p )
152 {
153 int i = 0;
154
155 /* collect tag and skip past it */
156 while ( i<2 && *p && *p!='\r' && *p!='\n') {
157 str_addchar( tag, *p );
158 p++;
159 i++;
160 }
161
162 while ( *p==' ' || *p=='\t' ) p++;
163
164 while ( *p && *p!='\r' && *p!='\n' ) {
165 str_addchar( value, *p );
166 p++;
167 }
168
169 str_trimendingws( value );
170
171 while ( *p=='\r' || *p=='\n' ) p++;
172
173 return p;
174 }
175
176 static const char *
process_untagged_line(str * value,const char * p)177 process_untagged_line( str *value, const char *p )
178 {
179 while ( *p==' ' || *p=='\t' ) p++;
180
181 while ( *p && *p!='\r' && *p!='\n' ) {
182 str_addchar( value, *p );
183 p++;
184 }
185
186 str_trimendingws( value );
187
188 while ( *p=='\r' || *p=='\n' ) p++;
189
190 return p;
191 }
192
193 static int
add_tag_value(fields * isiin,str * tag,str * value,int * tag_added)194 add_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
195 {
196 int status;
197
198 if ( str_has_value( tag ) && str_has_value( value ) ) {
199 status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
200 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
201 *tag_added = 1;
202 }
203
204 else {
205 *tag_added = 0;
206 }
207
208 return BIBL_OK;
209 }
210
211 static int
merge_tag_value(fields * isiin,str * tag,str * value,int * tag_added)212 merge_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
213 {
214 int n, status;
215 str *oldvalue;
216
217 if ( str_has_value( value ) ) {
218
219 if ( *tag_added==1 ) {
220
221 n = fields_num( isiin );
222 if ( n==0 ) return BIBL_OK;
223
224 /* only one AU or AF for list of authors */
225 if ( !strcmp( str_cstr( tag ), "AU" ) ) {
226 status = fields_add( isiin, "AU", str_cstr( value ), LEVEL_MAIN );
227 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
228 } else if ( !strcmp( str_cstr( tag ), "AF" ) ) {
229 status = fields_add( isiin, "AF", str_cstr( value ), LEVEL_MAIN );
230 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
231 }
232 /* otherwise append multiline data */
233 else {
234 oldvalue = fields_value( isiin, n-1, FIELDS_STRP_NOUSE );
235 str_addchar( oldvalue, ' ' );
236 str_strcat( oldvalue, value );
237 if ( str_memerr( oldvalue ) ) return BIBL_ERR_MEMERR;
238 }
239 }
240
241 else {
242 status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
243 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
244 *tag_added = 1;
245 }
246 }
247
248 return BIBL_OK;
249 }
250
251 static int
isiin_processf(fields * isiin,const char * p,const char * filename,long nref,param * pm)252 isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm )
253 {
254 int status, tag_added = 0, ret = 1;
255 str tag, value;
256
257 strs_init( &tag, &value, NULL );
258
259 while ( *p ) {
260
261 /* ...with tag, add */
262 if ( is_isi_tag( p ) ) {
263 str_empty( &tag );
264 str_empty( &value );
265 p = process_tagged_line( &tag, &value, p );
266 status = add_tag_value( isiin, &tag, &value, &tag_added );
267 if ( status!=BIBL_OK ) {
268 ret = 0;
269 goto out;
270 }
271 }
272
273 /* ...untagged, merge -- one AU or AF for list of authors */
274 else {
275 str_empty( &value );
276 p = process_untagged_line( &value, p );
277 status = merge_tag_value( isiin, &tag, &value, &tag_added );
278 if ( status!=BIBL_OK ) {
279 ret = 0;
280 goto out;
281 }
282 }
283
284 }
285 out:
286 strs_free( &value, &tag, NULL );
287 return ret;
288 }
289
290 /*****************************************************
291 PUBLIC: int isiin_typef()
292 *****************************************************/
293 static int
isiin_typef(fields * isiin,const char * filename,int nref,param * p)294 isiin_typef( fields *isiin, const char *filename, int nref, param *p )
295 {
296 int ntypename, nrefname, is_default;
297 char *refname = "", *typename="";
298
299 ntypename = fields_find( isiin, "PT", LEVEL_MAIN );
300 nrefname = fields_find( isiin, "UT", LEVEL_MAIN );
301
302 if ( nrefname!=FIELDS_NOTFOUND ) refname = fields_value( isiin, nrefname, FIELDS_CHRP_NOUSE );
303 if ( ntypename!=FIELDS_NOTFOUND ) typename = fields_value( isiin, ntypename, FIELDS_CHRP_NOUSE );
304
305 return get_reftype( typename, nref, p->progname, p->all, p->nall, refname, &is_default, REFTYPE_CHATTY );
306 }
307
308 /*****************************************************
309 PUBLIC: int isiin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR
310 *****************************************************/
311
312 /* pull off authors first--use AF before AU */
313 static int
isiin_addauthors(fields * isiin,fields * info,int reftype,variants * all,int nall,slist * asis,slist * corps)314 isiin_addauthors( fields *isiin, fields *info, int reftype, variants *all, int nall, slist *asis, slist *corps )
315 {
316 char *newtag, *authortype, use_af[]="AF", use_au[]="AU";
317 int level, i, n, has_af=0, has_au=0, nfields, ok;
318 str *t, *d;
319
320 nfields = fields_num( isiin );
321 for ( i=0; i<nfields && has_af==0; ++i ) {
322 t = fields_tag( isiin, i, FIELDS_STRP );
323 if ( !strcasecmp( t->data, "AU" ) ) has_au++;
324 if ( !strcasecmp( t->data, "AF" ) ) has_af++;
325 }
326 if ( has_af ) authortype = use_af;
327 else if ( has_au ) authortype = use_au;
328 else return BIBL_OK; /* no authors */
329
330 for ( i=0; i<nfields; ++i ) {
331 t = fields_tag( isiin, i, FIELDS_STRP );
332 if ( strcasecmp( t->data, authortype ) ) continue;
333 d = fields_value( isiin, i, FIELDS_STRP );
334 n = process_findoldtag( authortype, reftype, all, nall );
335 level = ((all[reftype]).tags[n]).level;
336 newtag = all[reftype].tags[n].newstr;
337 ok = name_add( info, newtag, d->data, level, asis, corps );
338 if ( !ok ) return BIBL_ERR_MEMERR;
339 }
340 return BIBL_OK;
341 }
342
343 static int
isiin_keyword(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)344 isiin_keyword( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
345 {
346 const char *p = str_cstr( invalue );
347 int fstatus, status = BIBL_OK;
348 str keyword;
349
350 str_init( &keyword );
351 while ( *p ) {
352 p = str_cpytodelim( &keyword, skip_ws( p ), ";", 1 );
353 if ( str_memerr( &keyword ) ) { status = BIBL_ERR_MEMERR; goto out; }
354 if ( str_has_value( &keyword ) ) {
355 fstatus = fields_add( bibout, outtag, keyword.data, level );
356 if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
357 }
358 }
359 out:
360 str_free( &keyword );
361 return status;
362 }
363
364 static void
isiin_report_notag(param * p,char * tag)365 isiin_report_notag( param *p, char *tag )
366 {
367 // Patch: Disable output logging
368 }
369
370 static int
isiin_convertf(fields * bibin,fields * bibout,int reftype,param * p)371 isiin_convertf( fields *bibin, fields *bibout, int reftype, param *p )
372 {
373 static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = {
374 // Patch: Remove GCC extension to allow building on MSVC
375 // [ 0 ... NUM_REFTYPES-1 ] = generic_null,
376 [ 0 ] = generic_null,
377 [ 1 ] = generic_null,
378 [ 2 ] = generic_null,
379 [ 3 ] = generic_null,
380 [ 4 ] = generic_null,
381 [ 5 ] = generic_null,
382 [ 6 ] = generic_null,
383 [ 7 ] = generic_null,
384 [ 8 ] = generic_null,
385 [ 9 ] = generic_null,
386 [ 10 ] = generic_null,
387 [ 11 ] = generic_null,
388 [ 12 ] = generic_null,
389 [ 13 ] = generic_null,
390 [ 14 ] = generic_null,
391 [ 15 ] = generic_null,
392 [ 16 ] = generic_null,
393 [ 17 ] = generic_null,
394 [ 18 ] = generic_null,
395 [ 19 ] = generic_null,
396 [ 20 ] = generic_null,
397 [ 21 ] = generic_null,
398 [ 22 ] = generic_null,
399 [ 23 ] = generic_null,
400 [ 24 ] = generic_null,
401 [ 25 ] = generic_null,
402 [ SIMPLE ] = generic_simple,
403 [ TITLE ] = generic_title,
404 [ PERSON ] = generic_person,
405 [ SERIALNO ] = generic_serialno,
406 [ DATE ] = generic_simple,
407 [ NOTES ] = generic_notes,
408 [ KEYWORD ] = isiin_keyword,
409 };
410
411 int process, level, i, nfields, status;
412 str *intag, *invalue;
413 char *outtag;
414
415 status = isiin_addauthors( bibin, bibout, reftype, p->all, p->nall, &(p->asis), &(p->corps) );
416 if ( status!=BIBL_OK ) return status;
417
418 nfields = fields_num( bibin );
419 for ( i=0; i<nfields; ++i ) {
420
421 intag = fields_tag( bibin, i, FIELDS_STRP );
422 if ( !strcasecmp( str_cstr( intag ), "AU" ) || !strcasecmp( str_cstr( intag ), "AF" ) )
423 continue;
424
425 if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) {
426 isiin_report_notag( p, str_cstr( intag ) );
427 continue;
428 }
429
430 invalue = fields_value( bibin, i, FIELDS_STRP );
431
432 status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout );
433 if ( status!=BIBL_OK ) return status;
434 }
435
436 // Patch: Disable output logging
437
438 return status;
439 }
440