1 /*
2  * wordin.c
3  *
4  * Copyright (c) Chris Putnam 2010-2021
5  *
6  * Source code released under the GPL version 2
7  *
8  */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include "is_ws.h"
12 #include "str.h"
13 #include "str_conv.h"
14 #include "fields.h"
15 #include "pages.h"
16 #include "xml.h"
17 #include "xml_encoding.h"
18 #include "bibformats.h"
19 
20 static int wordin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
21 static int wordin_processf( fields *wordin, const char *data, const char *filename, long nref, param *p );
22 
23 
24 /*****************************************************
25  PUBLIC: void wordin_initparams()
26 *****************************************************/
27 
28 int
wordin_initparams(param * pm,const char * progname)29 wordin_initparams( param *pm, const char *progname )
30 {
31 	pm->readformat       = BIBL_WORDIN;
32 	pm->charsetin        = BIBL_CHARSET_DEFAULT;
33 	pm->charsetin_src    = BIBL_SRC_DEFAULT;
34 	pm->latexin          = 0;
35 	pm->xmlin            = 1;
36 	pm->utf8in           = 1;
37 	pm->nosplittitle     = 0;
38 	pm->verbose          = 0;
39 	pm->addcount         = 0;
40 	pm->output_raw       = BIBL_RAW_WITHMAKEREFID |
41 	                      BIBL_RAW_WITHCHARCONVERT;
42 
43 	pm->readf    = wordin_readf;
44 	pm->processf = wordin_processf;
45 	pm->cleanf   = NULL;
46 	pm->typef    = NULL;
47 	pm->convertf = NULL;
48 	pm->all      = NULL;
49 	pm->nall     = 0;
50 
51 	slist_init( &(pm->asis) );
52 	slist_init( &(pm->corps) );
53 
54 	if ( !progname ) pm->progname = NULL;
55 	else {
56 		pm->progname = strdup( progname );
57 		if ( !pm->progname ) return BIBL_ERR_MEMERR;
58 	}
59 
60 	return BIBL_OK;
61 }
62 
63 /*****************************************************
64  PUBLIC: int wordin_readf()
65 *****************************************************/
66 
67 static char *
wordin_findstartwrapper(char * buf,int * ntype)68 wordin_findstartwrapper( char *buf, int *ntype )
69 {
70 	return xml_find_start( buf, "b:Source" );
71 }
72 
73 static char *
wordin_findendwrapper(char * buf,int ntype)74 wordin_findendwrapper( char *buf, int ntype )
75 {
76 	return xml_find_end( buf, "b:Source" );
77 }
78 
79 static int
wordin_readf(FILE * fp,char * buf,int bufsize,int * bufpos,str * line,str * reference,int * fcharset)80 wordin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
81 {
82 	str tmp;
83 	char *startptr = NULL, *endptr;
84 	int haveref = 0, inref = 0, file_charset = CHARSET_UNKNOWN, m, type = 1;
85 	str_init( &tmp );
86 	while ( !haveref && str_fget( fp, buf, bufsize, bufpos, line ) ) {
87 		if ( str_cstr( line ) ) {
88 			m = xml_getencoding( line );
89 			if ( m!=CHARSET_UNKNOWN ) file_charset = m;
90 		}
91 		if ( str_cstr( line ) ) {
92 			startptr = wordin_findstartwrapper( str_cstr( line ), &type );
93 		}
94 		if ( startptr || inref ) {
95 			if ( inref ) str_strcat( &tmp, line );
96 			else {
97 				str_strcatc( &tmp, startptr );
98 				inref = 1;
99 			}
100 			endptr = wordin_findendwrapper( str_cstr( &tmp ), type );
101 			if ( endptr ) {
102 				str_segcpy( reference, str_cstr( &tmp ), endptr );
103 				haveref = 1;
104 			}
105 		}
106 	}
107 	str_free( &tmp );
108 	*fcharset = file_charset;
109 	return haveref;
110 }
111 
112 /*****************************************************
113  PUBLIC: int wordin_processf()
114 *****************************************************/
115 
116 typedef struct xml_convert {
117 	char *in;       /* The input tag */
118 	char *a, *aval; /* The attribute="attribute_value" pair, if nec. */
119 	char *out;      /* The output tag */
120 	int level;
121 } xml_convert;
122 
123 /* wordin_person_last()
124  *
125  * From an xml list, extract the value from the first entry
126  * of <b:Last>xxxx</b:Last> and copy into name
127  *
128  * Additional <b:Last>yyyyy</b:Last> will be ignored.
129  *
130  * Returns BIBL_ERR_MEMERR on memory error, BIBL_OK otherwise.
131  */
132 static int
wordin_person_last(xml * node,str * name)133 wordin_person_last( xml *node, str *name )
134 {
135 	while ( node && !xml_tag_matches( node, "b:Last" ) )
136 		node = node->next;
137 	if ( xml_has_value( node ) ) {
138 		str_strcpy( name, xml_value( node ) );
139 		if ( str_memerr( name ) ) return BIBL_ERR_MEMERR;
140 	}
141 	return BIBL_OK;
142 }
143 
144 /* wordin_person_first()
145  *
146  * From an xml list, extract the value of any
147  * <b:First>xxxx</b:First> and append "|xxxx" to name.
148  *
149  * Returns BIBL_ERR_MEMERR on memory error, BIBL_OK otherwise
150  */
151 static int
wordin_person_first(xml * node,str * name)152 wordin_person_first( xml *node, str *name )
153 {
154 	for ( ; node; node=node->next ) {
155 		if ( !xml_tag_matches( node, "b:First" ) ) continue;
156 		if ( xml_has_value( node ) ) {
157 			if ( str_has_value( name ) ) str_addchar( name, '|' );
158 			str_strcat( name, xml_value( node ) );
159 			if ( str_memerr( name ) ) return BIBL_ERR_MEMERR;
160 		}
161 	}
162 	return BIBL_OK;
163 }
164 
165 static int
wordin_person(xml * node,fields * info,char * type)166 wordin_person( xml *node, fields *info, char *type )
167 {
168 	int status, ret = BIBL_OK;
169 	str name;
170 
171 	str_init( &name );
172 
173 	status = wordin_person_last( node, &name );
174 	if ( status!=BIBL_OK ) {
175 		ret = status;
176 		goto out;
177 	}
178 
179 	status = wordin_person_first( node, &name );
180 	if ( status!=BIBL_OK ) {
181 		ret = status;
182 		goto out;
183 	}
184 
185 	status = fields_add( info, type, str_cstr( &name ), 0 );
186 	if ( status != FIELDS_OK ) ret = BIBL_ERR_MEMERR;
187 out:
188 	str_free( &name );
189 	return ret;
190 }
191 
192 static int
wordin_people(xml * node,fields * info,char * type)193 wordin_people( xml *node, fields *info, char *type )
194 {
195 	int ret = BIBL_OK;
196 	if ( xml_tag_matches( node, "b:Author" ) && node->down ) {
197 		ret = wordin_people( node->down, info, type );
198 	} else if ( xml_tag_matches( node, "b:NameList" ) && node->down ) {
199 		ret = wordin_people( node->down, info, type );
200 	} else if ( xml_tag_matches( node, "b:Person" ) ) {
201 		if ( node->down ) ret = wordin_person( node->down, info, type );
202 		if ( ret!=BIBL_OK ) return ret;
203 		if ( node->next ) ret = wordin_people( node->next, info, type );
204 	}
205 	return ret;
206 }
207 
208 static int
wordin_reference(xml * node,fields * info)209 wordin_reference( xml *node, fields *info )
210 {
211 	int status, ret = BIBL_OK;
212 	if ( xml_has_value( node ) ) {
213 		if ( xml_tag_matches( node, "b:Tag" ) ) {
214 			status = fields_add( info, "REFNUM", xml_value_cstr( node ), 0 );
215 			if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
216 		} else if ( xml_tag_matches( node, "b:SourceType" ) ) {
217 		} else if ( xml_tag_matches( node, "b:City" ) ) {
218 			status = fields_add( info, "ADDRESS", xml_value_cstr( node ), 0 );
219 			if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
220 		} else if ( xml_tag_matches( node, "b:Publisher" ) ) {
221 			status = fields_add( info, "PUBLISHER", xml_value_cstr( node ), 0 );
222 			if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
223 		} else if ( xml_tag_matches( node, "b:Title" ) ) {
224 			status = fields_add( info, "TITLE", xml_value_cstr( node ), 0 );
225 			if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
226 		} else if ( xml_tag_matches( node, "b:JournalName" ) ) {
227 			status = fields_add( info, "TITLE", xml_value_cstr( node ), 1 );
228 			if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
229 		} else if ( xml_tag_matches( node, "b:Volume" ) ) {
230 			status = fields_add( info, "VOLUME", xml_value_cstr( node ), 1 );
231 			if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
232 		} else if ( xml_tag_matches( node, "b:Comments" ) ) {
233 			status = fields_add( info, "NOTES", xml_value_cstr( node ), 0 );
234 			if ( status!=FIELDS_OK ) ret = BIBL_ERR_MEMERR;
235 		} else if ( xml_tag_matches( node, "b:Pages" ) ) {
236 			ret = add_pages( info, xml_value( node ), 1 );
237 		} else if ( xml_tag_matches( node, "b:Author" ) && node->down ) {
238 			ret = wordin_people( node->down, info, "AUTHOR" );
239 		} else if ( xml_tag_matches( node, "b:Editor" ) && node->down ) {
240 			ret = wordin_people( node->down, info, "EDITOR" );
241 		}
242 	}
243 	if ( ret==BIBL_OK && node->next ) wordin_reference( node->next, info );
244 	return ret;
245 }
246 
247 static int
wordin_assembleref(xml * node,fields * info)248 wordin_assembleref( xml *node, fields *info )
249 {
250 	int ret = BIBL_OK;
251 	if ( xml_tag_matches( node, "b:Source" ) ) {
252 		if ( node->down ) ret = wordin_reference( node->down, info );
253 	} else if ( str_is_empty( &(node->tag) ) && node->down ) {
254 		ret = wordin_assembleref( node->down, info );
255 	}
256 	return ret;
257 }
258 
259 static int
wordin_processf(fields * wordin,const char * data,const char * filename,long nref,param * p)260 wordin_processf( fields *wordin, const char *data, const char *filename, long nref, param *p )
261 {
262 	int status, ret = 1;
263 	xml top;
264 
265 	xml_init( &top );
266 	xml_parse( data, &top );
267 	status = wordin_assembleref( &top, wordin );
268 	xml_free( &top );
269 
270 	if ( status==BIBL_ERR_MEMERR ) ret = 0;
271 	return ret;
272 }
273