1 /*
2 * International Chemical Identifier (InChI)
3 * Version 1
4 * Software version 1.04
5 * September 9, 2011
6 *
7 * The InChI library and programs are free software developed under the
8 * auspices of the International Union of Pure and Applied Chemistry (IUPAC).
9 * Originally developed at NIST. Modifications and additions by IUPAC
10 * and the InChI Trust.
11 *
12 * IUPAC/InChI-Trust Licence for the International Chemical Identifier (InChI)
13 * Software version 1.0.
14 * Copyright (C) IUPAC and InChI Trust Limited
15 *
16 * This library is free software; you can redistribute it and/or modify it under the
17 * terms of the IUPAC/InChI Trust Licence for the International Chemical Identifier
18 * (InChI) Software version 1.0; either version 1.0 of the License, or
19 * (at your option) any later version.
20 *
21 * This library is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
24 * See the IUPAC/InChI Trust Licence for the International Chemical Identifier (InChI)
25 * Software version 1.0 for more details.
26 *
27 * You should have received a copy of the IUPAC/InChI Trust Licence for the
28 * International Chemical Identifier (InChI) Software version 1.0 along with
29 * this library; if not, write to:
30 *
31 * The InChI Trust
32 * c/o FIZ CHEMIE Berlin
33 * Franklinstrasse 11
34 * 10587 Berlin
35 * GERMANY
36 *
37 */
38
39
40 /* local prototypes */
41 int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment, int lcomment, char *name, int lname, int prev_err,
42 const char *pSdfLabel, char *pSdfValue, char *pStrErr );
43 MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
44 int bGetOrigCoord, int *err, char *pStrErr );
45
46
47 static int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr);
48 static int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr );
49 static int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
50 static int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
51 static int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
52 static int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr );
53
54 static int identify_sdf_label( char* inp_line, const char *pSdfLabel );
55 static long extract_cas_rn( char *line );
56 static int mol_copy_check_empty( char* dest, char* source, int len, char **first_space );
57 static int mol_read_datum(void* data, int field_len, int data_type, char** line_ptr);
58
59 static int RemoveNonPrintable( char *line );
60
61
62 /******/
63 #ifndef MOLFILE_ERR_FIN
64 #define MOLFILE_ERR_FIN(err, new_err, err_fin, msg) \
65 if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg)); goto err_fin
66 #endif
67 #ifndef MOLFILE_ERR_SET
68 #define MOLFILE_ERR_SET(err, new_err, msg) \
69 if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg))
70 #endif
71
72 /*************************************************************************/
AddMOLfileError(char * pStrErr,const char * szMsg)73 int AddMOLfileError( char *pStrErr, const char *szMsg )
74 {
75 if ( pStrErr && szMsg && szMsg[0] ) {
76 int lenStrErr = strlen( pStrErr );
77 int lenMsg = strlen( szMsg );
78 char *p = strstr( pStrErr, szMsg );
79 if ( p && (p==pStrErr || *(p-1) == ' ' && (*(p-2) == ';' || *(p-2) == ':' )) &&
80 (p+lenMsg == pStrErr+lenStrErr ||
81 p[lenMsg] == ';' && p[lenMsg+1] == ' ' ||
82 p[lenMsg-1]==':' && p[lenMsg]==' ') ) {
83 return 1; /* reject duplicates */
84 }
85 if ( lenStrErr + lenMsg + 2*(lenStrErr > 0) < STR_ERR_LEN ) {
86 /* enough room to add */
87 if (lenStrErr > 0) {
88 if ( pStrErr[lenStrErr-1] != ':' ) {
89 strcat( pStrErr, ";" );
90 }
91 strcat( pStrErr, " " );
92 }
93 strcat( pStrErr, szMsg );
94 return 1;
95 }
96 /* no room */
97 if ( strstr( pStrErr, "..." ) ) {
98 return 0; /* no room mark has already been set */
99 }
100 if ( lenStrErr + 3 < STR_ERR_LEN ) {
101 strcat( pStrErr, "..." );
102 }
103 }
104 return 0;
105 }
106 /*************** static **********************************************************/
mol_copy_check_empty(char * dest,char * source,int len,char ** first_space)107 int mol_copy_check_empty( char* dest, char* source, int len, char **first_space )
108 {
109 int i, c; /* required len >= 0; dest must have at least len+1 bytes */
110 if ( len > 0 )
111 strncpy( dest, source, len );
112 dest[len]='\0';
113 len = ( len > 0 )? (int)strlen( dest) : 0;
114 for ( i = (len-1); i >= 0 && 0 != (c = source[i]) && isspace(UCINT c); i-- )
115 ;
116 *first_space = dest + (i+1); /* first blank or zero terminating byte in dest */
117 return len; /* number of actually processed bytes; zero termination not included */
118 }
119 /************* static ************************************************************/
mol_read_datum(void * data,int field_len,int data_type,char ** line_ptr)120 int mol_read_datum(void* data, int field_len, int data_type, char** line_ptr)
121 {
122 /* 1. 'field_len' for MOL_STRING_DATA does not include trailing zero,
123 * that is actual length of the string pointed by 'data'
124 * should be at least field_len+1 bytes.
125 * For numerical data 'field_len' is length of input data field
126 * For numerical integral data field_len <= 0 means read up to first
127 * non-numeric character as strtod() does ("free format")
128 * 2. return value: for MOL_STRING_DATA: number of bytes excluding trailing zero
129 * for all others: 1=success; 0 = empty; -1= error
130 * 3. on exit *line_ptr points to the next byte after the last entered
131 */
132 char *p = *line_ptr, *q, *p_end;
133 int i, ret=1, c, len;
134 long ldata;
135 double ddata;
136
137 switch( data_type ) {
138 case MOL_STRING_DATA:
139 for ( i= 0; i < field_len && 0 != (c = p[i]) && isspace(UCINT c); i++ ) /* pass by all leading spaces */
140 ;
141 len = mol_copy_check_empty( (char*)data, &p[i], field_len-i, &q );
142 ret = ( q - (char*)data );/* actual data length */
143 *q = '\0'; /* add zero termination to data if it is not there yet*/
144 *line_ptr += (len+i); /* ptr to the 1st byte of the next input field or to zero termination */
145 break;
146
147 case MOL_CHAR_INT_DATA:
148 case MOL_SHORT_INT_DATA:
149 case MOL_LONG_INT_DATA:
150 { /* block start */
151 char str[MOL_MAX_VALUE_LEN+1];
152 ldata = 0L;
153 if ( field_len > MOL_MAX_VALUE_LEN ) {
154 ret = -1;
155 }else
156 if ( field_len > 0 ) { /* fixed length */
157 *line_ptr += ( len = mol_copy_check_empty( str, p, field_len, &q ) );
158 *q = '\0';
159 if ( !len || !(q-str) ) { /* empty string */
160 ret = 0;
161 }else
162 if ( (ldata=strtol(str,&p_end,10), p_end != q) ){ /* wrong data: incompletely interpreted */
163 ret = -1;
164 }
165 }else{ /* free format: field_len <= 0 */
166 ldata = strtol( p, &p_end, 10 );
167 *line_ptr += ( len = p_end - p );
168 if ( len == 0 ){
169 ret = 0;
170 }
171 }
172
173 switch( data_type ) {
174 case MOL_CHAR_INT_DATA:
175 if ( SCHAR_MIN <= ldata && ldata <= SCHAR_MAX ){ /* from || to &&: 11-19-96 */
176 *(S_CHAR*)data = (S_CHAR)ldata;
177 }else{
178 *(S_CHAR*)data = (S_CHAR)0;
179 ret = -1;
180 }
181 break;
182 case MOL_SHORT_INT_DATA:
183 if ( SHRT_MIN <= ldata && ldata <= SHRT_MAX ){
184 *(S_SHORT*)data = (S_SHORT)ldata;
185 }else{
186 *(S_SHORT*)data = (S_SHORT)0;
187 ret = -1;
188 }
189 break;
190 case MOL_LONG_INT_DATA:
191 if ( LONG_MIN < ldata && ldata < LONG_MAX ){
192 *(long*)data = (long)ldata;
193 }else{
194 *(long*)data = 0L;
195 ret = -1;
196 }
197 break;
198 default:
199 ret=-1;
200 }
201
202 } /* block end */
203 break;
204 case MOL_DOUBLE_DATA:
205 case MOL_FLOAT_DATA:
206 { /* block start */
207 char str[MOL_MAX_VALUE_LEN+1];
208 if ( field_len > MOL_MAX_VALUE_LEN ) {
209 ret = -1;
210 ddata = 0.0;
211 }else
212 if ( field_len > 0 ) {
213 *line_ptr += (len = mol_copy_check_empty( str, p, field_len, &q ));
214 *q = '\0';
215 if ( !len || !(q-str) ) { /* empty string */
216 ddata = 0.0;
217 ret = 0;
218 }else
219 if ( (ddata=strtod(str,&p_end), p_end != q) ){ /* wrong data */
220 ret = -1;
221 }
222 }else{ /* free format */
223 ddata = strtod( p, &p_end );
224 *line_ptr += ( len = p_end - p );
225 if ( len == 0 ){
226 ret = 0;
227 }
228 }
229 switch(data_type){
230 case MOL_DOUBLE_DATA:
231 if ( ddata != HUGE_VAL && /*ldata*/ ddata != -HUGE_VAL ){ /* replaced ldata with ddata 6-30-98 DCh */
232 *(double*)data = ddata;
233 }else{
234 *(double*)data = 0.0;
235 ret = -1;
236 }
237 break;
238 case MOL_FLOAT_DATA:
239 if ( fabs(ddata) <= (double)FLT_MIN ) {
240 *(float*)data = 0.0;
241 }else
242 if ( fabs(ddata) >= (double)FLT_MAX ) {
243 *(float*)data = 0.0;
244 ret = -1;
245 }else{
246 *(float*)data = (float)ddata;
247 }
248 break;
249 }
250 } /* block end */
251 break;
252 case MOL_JUMP_TO_RIGHT:
253 for ( i = 0; i < field_len && p[i]; i++ )
254 ;
255 *line_ptr += i;
256 ret = i;
257 break;
258 default:
259 ret = -1;
260 }
261 return ret;
262 }
263 /************* static ************************************************************/
mol_read_hdr(MOL_HEADER_BLOCK * hdr,FILE * inp,char * pStrErr)264 int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr)
265 {
266 /* All input lines can have are up 80 characters */
267 /* Header Block */
268 char line[MOLFILEINPLINELEN]; /* + cr +lf +zero termination + reserve */
269 int err = 0, len;
270 const int line_len = sizeof(line);
271 char *p;
272
273 /* memset( &hdr, 0, sizeof( MOL_HEADER_BLOCK ) ); */
274 /*------------ header line #1: name ----------------*/
275 if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
276 err = 1; /* can't read the input file line */
277 /* AddMOLfileError( pStrErr, "Can't read header block name line" ); */
278 goto err_fin;
279 }
280 remove_one_lf( line );
281 /* -- Disabled to relax strictness: allow > 80 chars names.
282 if ( line[MOLFILEMAXLINELEN] ){
283 err = 2; // too long line
284 goto err_fin;
285 }
286 */
287 len = mol_read_datum( hdr->szMoleculeName, sizeof(hdr->szMoleculeName)-1, MOL_STRING_DATA, &p );
288 /*----------- header line #2 -----------------------*/
289 if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
290 err = 3; /* can't read the input file line */
291 /* AddMOLfileError( pStrErr, "Can't read header block line 2" ); */
292 goto err_fin;
293 }
294 remove_one_lf( line );
295 /* -- Disabled to relax strictness: allow > 80 chars names.
296 if ( line[MOLFILEMAXLINELEN] ){
297 err = 4; // too long input file line
298 goto err_fin;
299 }
300 */
301 len = mol_read_datum( hdr->szUserInitials, sizeof(hdr->szUserInitials)-1, MOL_STRING_DATA, &p );
302 len = mol_read_datum( hdr->szProgramName, sizeof(hdr->szProgramName)-1, MOL_STRING_DATA, &p );
303
304 /*------------ Relax strictness -----------------------*/
305 len = mol_read_datum( &hdr->cMonth, 2, MOL_CHAR_INT_DATA, &p );
306 len = mol_read_datum( &hdr->cDay, 2, MOL_CHAR_INT_DATA, &p );
307 len = mol_read_datum( &hdr->cYear, 2, MOL_CHAR_INT_DATA, &p );
308 len = mol_read_datum( &hdr->cHour, 2, MOL_CHAR_INT_DATA, &p );
309 len = mol_read_datum( &hdr->cMinute, 2, MOL_CHAR_INT_DATA, &p );
310 len = mol_read_datum( hdr->szDimCode, sizeof(hdr->szDimCode)-1, MOL_STRING_DATA, &p );
311 len = mol_read_datum( &hdr->nScalingFactor1, 2, MOL_SHORT_INT_DATA, &p );
312 len = mol_read_datum( &hdr->dScalingFactor2, 10, MOL_DOUBLE_DATA, &p );
313 len = mol_read_datum( &hdr->dEnergy, 12, MOL_DOUBLE_DATA, &p );
314 len = mol_read_datum( &hdr->lInternalRegistryNumber, 6, MOL_LONG_INT_DATA, &p );
315
316 /* save the whole line 2 */
317 p = line;
318 len = mol_read_datum( hdr->szMoleculeLine2, sizeof(hdr->szMoleculeLine2)-1, MOL_STRING_DATA, &p );
319
320
321 /*------------ header line #3: comment ----------------*/
322 if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
323 err = 7; /* can't read the line */
324 /* AddMOLfileError( pStrErr, "Can't read header block comment line" ); */
325 goto err_fin;
326 }
327 remove_one_lf( line );
328 /* -- Disabled to relax strictness: allow > 80 chars comments.
329 if ( line[MOLFILEMAXLINELEN] ){
330 err = 8; // too long line
331 goto err_fin;
332 }
333 */
334 len = mol_read_datum( hdr->szComment, sizeof(hdr->szComment)-1, MOL_STRING_DATA, &p );
335
336 err_fin:
337
338 return err;
339 }
340 /********** static *****************************************************/
RemoveNonPrintable(char * line)341 int RemoveNonPrintable( char *line )
342 {
343 int i, c, num = 0;
344 if ( line ) {
345 for ( i = 0; c = UCINT line[i]; i ++ ) {
346 /* assuming ASCII charset */
347 if ( c < ' ' || c >= 0x7F ) {
348 line[i] = '.';
349 num ++;
350 }
351 }
352 }
353 return num;
354 }
355 /************** static *************************************************/
mol_read_counts_line(MOL_CTAB * ctab,FILE * inp,char * pStrErr)356 int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr )
357 {
358 char *p;
359 char line[MOLFILEINPLINELEN];
360 const int line_len = sizeof(line);
361 int err = 0, len;
362
363 if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
364 MOLFILE_ERR_FIN (err, 1, err_fin, "Cannot read counts line");
365 /* can't read the input file line */
366 }
367 remove_one_lf( line );
368 if ( line[MOLFILEMAXLINELEN] ){
369 MOLFILE_ERR_SET (err, 0, "Too long counts line"); /* too long input file line */
370 }
371 if ( 0 > mol_read_datum( &ctab->nNumberOfAtoms, 3, MOL_SHORT_INT_DATA, &p )
372 || 0 > mol_read_datum( &ctab->nNumberOfBonds, 3, MOL_SHORT_INT_DATA, &p )
373 #if ( MOL_QUERY == MOL_PRESENT )
374 || 0 > mol_read_datum( &ctab->nNumberOfAtomsLists, 3, MOL_SHORT_INT_DATA, &p )
375 #else
376 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
377 #endif
378 || 0 > mol_read_datum( NULL, /*obsolete*/ 3, MOL_JUMP_TO_RIGHT, &p )
379 || 0 > mol_read_datum( &ctab->cChiralFlag, 3, MOL_CHAR_INT_DATA, &p )
380 || 0 > mol_read_datum( &ctab->nNumberOfStextEntries, 3, MOL_SHORT_INT_DATA, &p )
381 #if ( MOL_CPSS == MOL_PRESENT )
382 || 0 > mol_read_datum( &ctab->nNumberOfReactionComponentsPlus1, 3, MOL_SHORT_INT_DATA, &p )
383 || 0 > mol_read_datum( &ctab->nNumberOfReactants, 3, MOL_SHORT_INT_DATA, &p )
384 || 0 > mol_read_datum( &ctab->nNumberOfProducts, 3, MOL_SHORT_INT_DATA, &p )
385 || 0 > mol_read_datum( &ctab->nNumberOfIntermediates, 3, MOL_SHORT_INT_DATA, &p )
386 #else
387 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
388 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
389 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
390 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
391 #endif
392 || 0 > mol_read_datum( &ctab->nNumberOfPropertyLines, 3, MOL_SHORT_INT_DATA, &p ) ){
393 err = 3; /* can't interpret counts line */
394 MOLFILE_ERR_SET (err, 3, "Cannot interpret counts line:"); /* too long input file line */
395 RemoveNonPrintable( line );
396 AddMOLfileError(pStrErr, line);
397 goto err_fin;
398 }
399 len = mol_read_datum( ctab->csCurrentCtabVersion, sizeof(ctab->csCurrentCtabVersion)-1, MOL_STRING_DATA, &p );
400 err_fin:
401 return err;
402 }
403
404 /************ static *************************************************************/
read_atom_block(MOL_CTAB * ctab,FILE * inp,int err,char * pStrErr)405 int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
406 {
407 char *p;
408 char line[MOLFILEINPLINELEN];
409 const int line_len = sizeof(line);
410 S_SHORT i, chg;
411 static S_SHORT charge_val[] = {0, 3, 2, 1, 'R', -1, -2, -3};
412 /* 0 1 2 3 4 5 6 7 */
413 /*
414 if ( NULL == ctab->MolAtom ){
415 err = 1;
416 goto err_fin; // internal error: MolAtom structure has not been allocated
417 }
418 */
419
420 for ( i = 0; i < ctab->nNumberOfAtoms; i++ ) {
421
422 if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
423 if ( !err ) {
424 MOLFILE_ERR_SET (err, 2, "Cannot read atom block line");
425 }
426 break;
427 }
428 remove_one_lf( line );
429 if ( line[MOLFILEMAXLINELEN] ){
430 MOLFILE_ERR_SET (err, 0, "Too long atom block line");
431 }
432 if ( err ) {
433 if ( !strcmp( line, SDF_END_OF_DATA ) ) {
434 err = -abs(err);
435 break;
436 }
437 continue; /* bypass the rest of the Atom block */
438 }
439 if ( NULL != ctab->szCoord ) {
440 mystrncpy( ctab->szCoord[i], p, 31 ); /* original coordinates */
441 }
442
443 if ( NULL != ctab->MolAtom ) {
444 if ( 0 > mol_read_datum( &ctab->MolAtom[i].fX, 10, MOL_DOUBLE_DATA, &p )
445 || 0 > mol_read_datum( &ctab->MolAtom[i].fY, 10, MOL_DOUBLE_DATA, &p )
446 || 0 > mol_read_datum( &ctab->MolAtom[i].fZ, 10, MOL_DOUBLE_DATA, &p )
447 || 0 > mol_read_datum( NULL, /* undescribed in article*/ 1, MOL_JUMP_TO_RIGHT, &p )
448 || 0 == mol_read_datum( &ctab->MolAtom[i].szAtomSymbol, 3, MOL_STRING_DATA, &p ) /* was sizeof(ctab->MolAtom[0].szAtomSymbol)-1 */
449 #ifdef TARGET_EXE_USING_API
450 || 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference, 2, MOL_SHORT_INT_DATA, &p )
451 #else
452 || 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference, 2, MOL_CHAR_INT_DATA, &p )
453 #endif
454 || 0 > mol_read_datum( &ctab->MolAtom[i].cCharge, 3, MOL_CHAR_INT_DATA, &p )
455 || 0 > mol_read_datum( &ctab->MolAtom[i].cStereoParity, 3, MOL_CHAR_INT_DATA, &p )
456 #if ( MOL_QUERY == MOL_PRESENT )
457 || 0 > mol_read_datum( &ctab->MolAtom[i].cH_countPlus1, 3, MOL_CHAR_INT_DATA, &p )
458 || 0 > mol_read_datum( &ctab->MolAtom[i].cStereoCare, 3, MOL_CHAR_INT_DATA, &p )
459 #else
460 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
461 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
462 #endif
463 || 0 > mol_read_datum( &ctab->MolAtom[i].cValence, 3, MOL_CHAR_INT_DATA, &p ) ) {
464
465 err = 4;
466 MOLFILE_ERR_SET (err, 4, "Cannot interpret atom block line:");
467 RemoveNonPrintable( line );
468 AddMOLfileError(pStrErr, line);
469 if ( !strcmp( line, SDF_END_OF_DATA ) ) {
470 err = -abs(err);
471 break;
472 }
473 continue; /* can't interpret a first half of atom block line */
474 }
475 if ( 2 == strlen(ctab->MolAtom[i].szAtomSymbol) && isupper(UCINT ctab->MolAtom[i].szAtomSymbol[1]))
476 ctab->MolAtom[i].szAtomSymbol[1] = (char)tolower(UCINT ctab->MolAtom[i].szAtomSymbol[1]); /* 5-4-99 DCh*/
477
478 if ( (chg = (S_SHORT) ctab->MolAtom[i].cCharge)< 0 || chg >= (int)(sizeof ( charge_val ) / sizeof( charge_val[0] )) ) {
479 /* ctab->MolAtom[i].cCharge = 0; */ /* error; ignore for now */
480 ctab->MolAtom[i].cCharge = (S_CHAR)(4 - chg); /* allow greater charges to accommodate NCI structures. 8-20-2002 */
481 ctab->MolAtom[i].cRadical = 0;
482 }else
483 if ( 'R' == (chg = charge_val[chg]) ){
484 ctab->MolAtom[i].cCharge = 0;
485 ctab->MolAtom[i].cRadical = RADICAL_DOUBLET;
486 }else{
487 ctab->MolAtom[i].cCharge = (S_CHAR)chg; /* actual charge value */
488 ctab->MolAtom[i].cRadical = 0;
489 }
490 #ifdef TARGET_EXE_USING_API
491 if ( ctab->MolAtom[i].cMassDifference ) { /* e_ReadMOL.c specific */
492 ctab->MolAtom[i].cMassDifference += ISOTOPIC_SHIFT_FLAG;
493 }
494 #endif
495
496 if (
497 #if ( MOL_CPSS == MOL_PRESENT )
498 0 > mol_read_datum( &ctab->MolAtom[i].cH0_designator, 3, MOL_CHAR_INT_DATA, &p )
499 || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType, 3, MOL_CHAR_INT_DATA, &p )
500 || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentNumber, 3, MOL_CHAR_INT_DATA, &p )
501 #else
502 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
503 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
504 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
505 #endif
506 #if ( MOL_REACT == MOL_PRESENT )
507 || 0 > mol_read_datum( &ctab->MolAtom[i].nAtomAtomMappingNumber, 3, MOL_SHORT_INT_DATA, &p )
508 || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType, 3, MOL_CHAR_INT_DATA, &p )
509 #else
510 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
511 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
512 #endif
513 #if ( MOL_REACT == MOL_PRESENT || MOL_QUERY == MOL_PRESENT )
514 || 0 > mol_read_datum( &ctab->MolAtom[i].cExactChargeFlag, 3, MOL_CHAR_INT_DATA, &p )
515 #else
516 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
517 #endif
518 ){
519 err = 5; /* can't interpret a second half of atom block line */
520 MOLFILE_ERR_SET (err, 5, "Cannot interpret atom block line:");
521 RemoveNonPrintable( line );
522 AddMOLfileError(pStrErr, line);
523 if ( !strcmp( line, SDF_END_OF_DATA ) ) {
524 err = -abs(err);
525 break;
526 }
527 continue;
528 }
529 }
530 }
531 /* err_fin: */
532 return err;
533 }
534 /************ static *************************************************************/
read_bonds_block(MOL_CTAB * ctab,FILE * inp,int err,char * pStrErr)535 int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
536 {
537 char *p;
538 char line[MOLFILEINPLINELEN];
539 const int line_len = sizeof(line);
540 S_SHORT i;
541 /*
542 if ( NULL == ctab->MolBond ){
543 err = 1;
544 goto err_fin; // internal error: memory has not been allocated for MolBond structure
545 }
546 */
547 for ( i = 0; i < ctab->nNumberOfBonds; i++ ) {
548
549 if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
550 if ( !err ) {
551 MOLFILE_ERR_SET (err, 2, "Cannot read bond block line");
552 }
553 break;
554 }
555 remove_one_lf( line );
556 if ( line[MOLFILEMAXLINELEN] ){
557 err = err? err : 3; /* too long input file line */
558 }
559 if ( err ) {
560 if ( !strcmp( line, SDF_END_OF_DATA ) ) {
561 err = -abs(err);
562 break;
563 }
564 continue;
565 }
566
567 if ( ctab->MolBond ) {
568 if ( 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo1, 3, MOL_SHORT_INT_DATA, &p )
569 || 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo2, 3, MOL_SHORT_INT_DATA, &p )
570 || 0 > mol_read_datum( &ctab->MolBond[i].cBondType, 3, MOL_CHAR_INT_DATA, &p )
571 || 0 > mol_read_datum( &ctab->MolBond[i].cBondStereo, 3, MOL_CHAR_INT_DATA, &p )
572 #if ( MOL_QUERY == MOL_PRESENT )
573 || 0 > mol_read_datum( &ctab->MolBond[i].cBondTopology, 3, MOL_CHAR_INT_DATA, &p ) /* ring/chain */
574 #else
575 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
576 #endif
577 #if ( MOL_REACT == MOL_PRESENT )
578 || 0 > mol_read_datum( &ctab->MolBond[i].cReactingCenterStatus, 3, MOL_CHAR_INT_DATA, &p )
579 #else
580 || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
581 #endif
582 ){
583 if ( !err ) {
584 /* can't interpret bonds block line */
585 MOLFILE_ERR_SET (err, 4, "Cannot interpret bond block line:");
586 RemoveNonPrintable( line );
587 AddMOLfileError(pStrErr, line);
588 }
589 if ( !strcmp( line, SDF_END_OF_DATA ) ) {
590 err = -abs(err);
591 break;
592 }
593 }
594 }
595 }
596 /* err_fin: */
597 return err;
598 }
599 /********** static ***************************************************************/
read_stext_block(MOL_CTAB * ctab,FILE * inp,int err,char * pStrErr)600 int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
601 {
602 /* just pass by all stext enties without attemp to interpret */
603 char *p;
604 char line[MOLFILEINPLINELEN];
605 const int line_len = sizeof(line);
606 S_SHORT i;
607
608 for ( i = 0; i < 2*ctab->nNumberOfStextEntries; i++ ) {
609
610 if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
611 if ( !err ) {
612 MOLFILE_ERR_FIN (err, 2, err_fin, "Cannot read STEXT block line");
613 }
614 break;
615 /* can't read the input file line */
616 }
617 /*
618 remove_one_lf( line );
619 if ( line[MOLFILEMAXLINELEN] ){
620 MOLFILE_ERR_SET (err, 2, "Warning: Too long STEXT block line");
621 // too long input file line
622 }
623 */
624 }
625 err_fin:
626 return err;
627 }
628 /************ static *************************************************************/
read_properties_block(MOL_CTAB * ctab,MOL_HEADER_BLOCK * pHdr,FILE * inp,int err,char * pStrErr)629 int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr )
630 {
631 enum { MULTI_LINE_MODE_NO_MODE, MULTI_LINE_MODE_ISIS_ALIAS };
632 char *p;
633 char line[MOLFILEINPLINELEN];
634 const int line_len = sizeof(line);
635 int nMultiLineMode = MULTI_LINE_MODE_NO_MODE, nAtomNumber=0;
636 S_SHORT i, j;
637 char charM[2];
638 char szBlank[3];
639 char szType[4];
640 S_SHORT skip_lines=0;
641 S_SHORT num_entries;
642 S_SHORT num_atoms = ctab->nNumberOfAtoms;
643
644 int charge_encountered = 0;
645 int radical_encountered = 0;
646 int isotope_encountered = 0;
647 /*
648 if ( NULL == ctab->MolAtom ){
649 err = 1;
650 goto err_fin; internal error: memory has not been allocated for MolAtom structure
651 }
652 */
653 for ( i = 0; ctab->csCurrentCtabVersion[0]? 1 : (i < ctab->nNumberOfPropertyLines); i++ ) { /* the last line should be M END */
654 /* ctab->csCurrentCtabVersion[0] == 0:
655 exactly ctab->nNumberOfPropertyLines lines including M END */
656 /* ctab->csCurrentCtabVersion[0] != 0:
657 read until M END line was encountered */
658 if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
659 if ( !err ) {
660 MOLFILE_ERR_SET (err, 2, "Cannot read properties block line");
661 }
662 goto err_fin;
663 }
664 remove_one_lf( line );
665 if ( line[MOLFILEMAXLINELEN] ){
666 MOLFILE_ERR_SET (err, 3, "Too long properties block line");
667 continue;
668 }
669 if ( skip_lines > 0 ) {
670 skip_lines --;
671 continue;
672 }
673 /* alias. */
674 if ( nMultiLineMode == MULTI_LINE_MODE_ISIS_ALIAS && nAtomNumber ) {
675 int len;
676 nMultiLineMode = MULTI_LINE_MODE_NO_MODE;
677 if ( 0 >= (len=normalize_name( p )) ) {
678 nAtomNumber = 0;
679 continue;
680 }
681 if( 0 < len && len < (int)(sizeof(ctab->MolAtom->szAtomSymbol)) ) {
682 int nCharge, nRad;
683 MOL_ATOM* MolAtom = ctab->MolAtom + nAtomNumber-1;
684 /* ctab->MolAtom[nAtomNumber-1].cAtomAliasedFlag = 1; */
685 /* extract radicals & charges */
686 extract_ChargeRadical( p, &nRad, &nCharge );
687 /* Aliased atom cannot have charge, radical & mass difference */
688 /* in the atom table or "M CHG", "M RAD", "M ISO" */
689 /* if ( nCharge ) */
690 MolAtom->cCharge = (S_CHAR)nCharge;
691 /* if ( nRad ) */
692 MolAtom->cRadical = (char)nRad;
693
694 if ( 1 == len && 'D' == p[0] ) {
695 /* H isotope */
696 p[0] = 'H';
697 #ifdef TARGET_EXE_USING_API
698 MolAtom->cMassDifference=(1 + ISOTOPIC_SHIFT_FLAG);
699 #else
700 MolAtom->cMassDifference=1;
701 #endif
702 } else
703 if ( 1 == len && 'T' == p[0] ) {
704 /* H isotope */
705 p[0] = 'H';
706 #ifdef TARGET_EXE_USING_API
707 MolAtom->cMassDifference=(2 + ISOTOPIC_SHIFT_FLAG);
708 #else
709 MolAtom->cMassDifference=2;
710 #endif
711 } else
712 MolAtom->cMassDifference=0;
713 if ( strlen(p) < sizeof(ctab->MolAtom[0].szAtomSymbol) ) {
714 strcpy(MolAtom->szAtomSymbol, p);
715 } else {
716 strcpy(MolAtom->szAtomSymbol, "???");
717 }
718 MolAtom->cAtomAliasedFlag ++;
719 }
720 skip_lines = 0;
721 nAtomNumber = 0;
722 continue;
723 }
724
725 if ( 1 != mol_read_datum( charM, sizeof(charM) - 1, MOL_STRING_DATA, &p )
726 || 0 != mol_read_datum( szBlank, sizeof(szBlank) - 1, MOL_STRING_DATA, &p ) /* must contain 0 bytes */
727 || 0 >= mol_read_datum( szType, sizeof(szType) - 1, MOL_STRING_DATA, &p ) /* must contain 3 bytes */
728 ) {
729 if ( !strcmp( line, SDF_END_OF_DATA ) ) {
730 err = err? -abs(err): -4;
731 break;
732 }
733 continue; /* ignore because cannot recognize */
734 }
735 if ( charM[0] == 'V' ){
736 skip_lines = 0; /* ISIS/Desktop Atom Value: one-line property */
737 continue;
738 }
739 if ( charM[0] == 'G' ){
740 skip_lines = 1; /* ISIS/Desktop Group abbreviation: two-line property */
741 continue;
742 }
743 if ( charM[0] == 'A' ) {
744 if ( NULL != ctab->MolAtom &&
745 0 < ( nAtomNumber = (int)strtol(szType, NULL, 10) ) &&
746 nAtomNumber <= ctab->nNumberOfAtoms ){
747 /* Atom Alias [ISIS/Desktop] two-line property */
748 nMultiLineMode = MULTI_LINE_MODE_ISIS_ALIAS;
749 continue;
750 } else {
751 nAtomNumber = 0;
752 skip_lines = 1;
753 continue;
754 }
755 }
756 if ( charM[0] == 'S' && !strcmp( szType, "SKP" ) ){ /* skip lines */
757 if ( 0 >= mol_read_datum( &skip_lines, 3, MOL_SHORT_INT_DATA, &p ) ) {
758 skip_lines = 0;
759 }
760 continue;
761 }
762 if ( charM[0] != 'M' ) {/* cannot recognize a line */
763 continue;
764 }
765 if ( !strcmp( szType, "REG" ) ) {
766 int len;
767 p = p + strspn( p, " " );
768 len = strcspn( p, " " );
769 len = inchi_min( len, MOL_MAX_VALUE_LEN );
770 mol_read_datum( &pHdr->lInternalRegistryNumber, len, MOL_LONG_INT_DATA, &p );
771 continue;
772 }
773
774 if ( !strcmp( szType, "END" ) ){
775 if ( ctab->csCurrentCtabVersion[0] )
776 break; /* end of property lines */
777 continue;
778 }
779
780 if ( NULL == ctab->MolAtom )
781 continue; /* ignore because the user requested to bypass all this stuff */
782
783 /*----------------------------------- charge: Generic */
784 if ( !strcmp( szType, "CHG" ) &&
785 0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
786 1 <= num_entries && num_entries <= 8 ) {
787 S_SHORT atoms[8];
788 S_SHORT charges[8];
789 if ( !charge_encountered && !radical_encountered ) {
790 /* first charge or radical record clears all Atom Block */
791 /* entered charge and radical data to zeroes */
792 charge_encountered = -1;
793 }
794 for ( j = 0; j < num_entries; j++ ) {
795 if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
796 0 > mol_read_datum( &charges[j], 0, MOL_SHORT_INT_DATA, &p ) ||
797 atoms[j] <= 0 || atoms[j] > num_atoms ||
798 charges[j] < -15 || charges[j] > 15 ) {
799 goto charge_error;
800 }
801 }
802 if ( charge_encountered == -1 ) {
803 for ( j = 0; j < num_atoms; j++ ) {
804 if ( !ctab->MolAtom[j].cAtomAliasedFlag ) /* do not clear aliased atoms.*/
805 ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
806 }
807 charge_encountered = 1;
808 }
809 for ( j = 0; j < num_entries; j++ ) {
810 if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) /* do not change aliased atoms.*/
811 ctab->MolAtom[atoms[j]-1].cCharge = (S_CHAR)charges[j];
812 }
813 continue;
814 charge_error:
815 MOLFILE_ERR_SET (err, 0, "Charge not recognized:");
816 RemoveNonPrintable( line );
817 AddMOLfileError(pStrErr, line);
818 continue; /* ignore for now */
819 }
820 /*-------------------------------------- radical: Generic */
821 if ( !strcmp( szType, "RAD" ) &&
822 0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
823 1 <= num_entries && num_entries <= 8 ) {
824 S_SHORT atoms[8];
825 S_SHORT radicals[8];
826 if ( !charge_encountered && !radical_encountered ) {
827 /* first charge or radical record clears all Atom Block */
828 /* entered charge and radical data to zeroes */
829 radical_encountered = -1;
830 }
831 for ( j = 0; j < num_entries; j++ ) {
832 if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
833 0 > mol_read_datum( &radicals[j], 0, MOL_SHORT_INT_DATA, &p ) ||
834 atoms[j] <= 0 || atoms[j] > num_atoms ||
835 radicals[j] < 0 || radicals[j] > 3 ) {
836 goto radical_error;
837 }
838 }
839 if ( radical_encountered == -1 ) {
840 for ( j = 0; j < num_atoms; j++ ) {
841 if ( !ctab->MolAtom[j].cAtomAliasedFlag ) /* do not clear aliased atoms. 5-3-99 DCh */
842 ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
843 }
844 radical_encountered = 1;
845 }
846 for ( j = 0; j < num_entries; j++ ) {
847 if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) { /* do not change aliased atoms. 5-3-99 DCh */
848 ctab->MolAtom[atoms[j]-1].cRadical = (S_CHAR)radicals[j];
849 }
850 }
851 continue;
852 radical_error:
853 MOLFILE_ERR_SET (err, 0, "Radical not recognized:");
854 RemoveNonPrintable( line );
855 AddMOLfileError(pStrErr, line);
856 continue; /* ignore error for now */
857 }
858 /*-------------------------------------- isotope: Generic */
859 if ( !strcmp( szType, "ISO" ) &&
860 0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
861 1 <= num_entries && num_entries <= 8 ) {
862 S_SHORT atoms[8];
863 S_SHORT iso_mass[8]; /* contains istotope mass number, not difference. 7-14-00 DCh. */
864 if ( !isotope_encountered ) {
865 /* first charge or radical record clears all Atom Block */
866 /* entered charge and radical data to zeroes */
867 isotope_encountered = -1;
868 }
869 for ( j = 0; j < num_entries; j++ ) {
870 if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
871 0 > mol_read_datum( &iso_mass[j], 0, MOL_SHORT_INT_DATA, &p ) ||
872 atoms[j] <= 0 || atoms[j] > num_atoms
873 /*|| iso_mass[j] < -18 || iso_mass[j] > 12*/ ) {
874 /* goto isotope_error; */
875 atoms[j] = -1; /* flag error */
876 MOLFILE_ERR_SET (err, 0, "Isotopic data not recognized:");
877 RemoveNonPrintable( line );
878 AddMOLfileError(pStrErr, line);
879 continue; /* ignore isotopic error for now */
880 }
881 }
882 if ( isotope_encountered == -1 ) {
883 for ( j = 0; j < num_atoms; j++ ) {
884 /*if ( !ctab->MolAtom[j].cAtomAliasedFlag )*/ /* clear even aliased atoms */
885 ctab->MolAtom[j].cMassDifference = 0;
886 }
887 isotope_encountered = 1;
888 }
889 for ( j = 0; j < num_entries; j++ ) {
890 if ( atoms[j] <= 0 )
891 continue; /* ignore isotopic error for now */
892 if ( 1 /* !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag */) {
893 char *at = ctab->MolAtom[atoms[j]-1].szAtomSymbol;
894 if ( at[1] || at[0] != 'D' && at[0] != 'T' ) { /* D & T cannot have ISO */
895 /* need atomic weight to calculate isotope difference. 7-14-00 DCh. */
896 #ifdef TARGET_EXE_USING_API
897 /*^^^ Check added 5-10-2008 - IPl */
898 if (iso_mass[j] > 0)
899 /* According to MDL specification, p.12, only a positive
900 integer is allowed. And yes, there appeared some MOL/SD
901 files contaning here a negative value. This manifested
902 in mismatch in InChI_MAIN vs. cInChI-1/stdinchi-1 results.
903 */
904 ctab->MolAtom[atoms[j]-1].cMassDifference = iso_mass[j]; /* mass, not difference */
905
906 #else
907 int atw, atw_diff;
908 /*^^^
909 NB: According to MDL specification, difference should be in
910 [-18; +12] range, not in [-19; +19] as is checked below. */
911 if ( (atw = get_atw( at )) && abs( atw_diff = (int)iso_mass[j] - atw ) < 20 ) {
912 ctab->MolAtom[atoms[j]-1].cMassDifference = (char)(atw_diff? atw_diff : ZERO_ATW_DIFF);
913 }
914 #endif
915 }
916 }
917 }
918 continue;
919 }
920 }
921 err_fin:
922 return err;
923 }
924 /************ global *************************************************************/
delete_mol_data(MOL_DATA * mol_data)925 MOL_DATA* delete_mol_data( MOL_DATA* mol_data )
926 {
927 if ( mol_data ) {
928 if ( mol_data->ctab.MolAtom )
929 inchi_free( mol_data->ctab.MolAtom );
930 if ( mol_data->ctab.MolBond )
931 inchi_free( mol_data->ctab.MolBond );
932 if ( mol_data->ctab.szCoord )
933 inchi_free( mol_data->ctab.szCoord );
934 inchi_free( mol_data );
935 mol_data = NULL;
936 }
937 return mol_data;
938 }
939 /************* global ************************************************************/
940 /* Comletely ingnore STEXT block, queries, and 3D features
941 */
read_mol_file(FILE * inp,MOL_HEADER_BLOCK * OnlyHeaderBlock,MOL_CTAB * OnlyCtab,int bGetOrigCoord,int * err,char * pStrErr)942 MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
943 int bGetOrigCoord, int *err, char *pStrErr )
944 {
945 MOL_DATA* mol_data = NULL;
946 int ret = 0, prev_ret, bEndOfData = 0;
947 int bReadAll = ( OnlyHeaderBlock == NULL );
948 MOL_CTAB ctab, *pCtab = NULL;
949 MOL_HEADER_BLOCK *pHdr = NULL;
950
951 *err = 0;
952 if ( bReadAll ) {
953 if ( NULL == ( mol_data = ( MOL_DATA* )inchi_calloc( 1, sizeof(MOL_DATA) ) ) ){
954 ret = 1; /* can't allocate mol_data structure */
955 AddMOLfileError( pStrErr, "Out of RAM" );
956 goto err_fin;
957 }
958 pHdr = &mol_data->hdr;
959 pCtab = &mol_data->ctab;
960 } else {
961 pHdr = OnlyHeaderBlock;
962 pCtab = OnlyCtab? OnlyCtab : &ctab;
963 memset( pHdr, 0, sizeof( MOL_HEADER_BLOCK ) );
964 memset( pCtab, 0, sizeof( MOL_CTAB ) );
965 }
966 pCtab->MolBond = NULL;
967 pCtab->MolAtom = NULL;
968 pCtab->szCoord = NULL;
969
970 if ( 0 != ( ret = mol_read_hdr(pHdr, inp, pStrErr) ) ){
971 ret += 10;
972 goto err_fin; /* most probably end of file */
973 }
974 if ( 0 != ( ret = mol_read_counts_line( pCtab , inp, pStrErr) ) ){
975 ret += 20;
976 goto err_fin;
977 }
978
979 if ( bReadAll ) {
980 if ( NULL == ( mol_data->ctab.MolAtom = (MOL_ATOM*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_ATOM)) ) ){
981 ret = 2; /* can't allocate MolAtom structure */
982 MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
983 }
984 if ( bGetOrigCoord &&
985 NULL == ( mol_data->ctab.szCoord = (MOL_COORD*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_COORD)) ) ){
986 ret = 2; /* can't allocate MolAtom structure */
987 MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
988 }
989 }
990 if ( 0 != ( ret = read_atom_block(pCtab, inp, ret, pStrErr) ) ){
991 if ( ret < 0 ) {
992 ret = -ret;
993 bEndOfData = 1;
994 }
995 ret += 30;
996 /* goto err_fin; */
997 }
998
999 if ( bReadAll && ret < 30 ) {
1000 if ( !bEndOfData && NULL == ( mol_data->ctab.MolBond = (MOL_BONDS*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfBonds,1), sizeof(MOL_BONDS)) ) ){
1001 ret = 3; /* can't allocate MolBond structure */
1002 MOLFILE_ERR_FIN (ret, 3, err_fin, "Out of RAM");
1003 }
1004 }
1005 prev_ret = ret;
1006 if ( !bEndOfData && 0 != ( ret = read_bonds_block(pCtab, inp, ret, pStrErr) ) ){
1007 if ( ret < 0 ) {
1008 ret = -ret;
1009 bEndOfData = 1;
1010 }
1011 ret = prev_ret? prev_ret : ret + 40;
1012 }
1013 prev_ret = ret;
1014 if ( !bEndOfData && 0 != ( ret = read_stext_block(pCtab, inp, ret, pStrErr) ) ){
1015 ret = prev_ret? prev_ret : ret + 50;
1016 }
1017 prev_ret = ret;
1018 if ( !bEndOfData && 0 != ( ret = read_properties_block(pCtab, pHdr, inp, ret, pStrErr) ) ){
1019 if ( ret < 0 ) {
1020 ret = -ret;
1021 bEndOfData = 1;
1022 }
1023 ret = prev_ret? prev_ret : ret + 60;
1024 }
1025
1026 err_fin:
1027 *err = bEndOfData? -ret : ret;
1028 if ( bReadAll ) {
1029 if ( ret )
1030 mol_data = delete_mol_data( mol_data ); /* delete all results */
1031 return mol_data;
1032 } else {
1033 if ( ret )
1034 return NULL;
1035 else
1036 return (MOL_DATA*)OnlyHeaderBlock;
1037 }
1038 }
1039
1040 /******************************************************************/
1041 static const char sdf_data_hdr_name[] = "NAME";
1042 static const char sdf_data_hdr_comm[] = "COMMENT";
1043 enum { SDF_START, SDF_DATA_HEADER, SDF_DATA_HEADER_NAME
1044 , SDF_DATA_HEADER_COMMENT, SDF_DATA_HEADER_CAS
1045 , SDF_DATA_HEADER_USER, SDF_DATA_LINE
1046 , SDF_END_OF_DATA_ITEM, SDF_EMPTY_LINE, SDF_END_OF_DATA_BLOCK };
1047 /********** static ********************************************************/
extract_cas_rn(char * line)1048 long extract_cas_rn( char *line )
1049 {
1050 int i, j;
1051 i = line[0] == '-'? 1 : 0;
1052 for ( j = i; line[i]; i ++ ) {
1053 if ( isdigit( UCINT line[i] ) ) {
1054 line[j++] = line[i];
1055 } else
1056 if ( line[i] != '-' ) {
1057 break;
1058 }
1059 }
1060 line[j] = '\0';
1061 return strtol( line, NULL, 10 );
1062 }
1063 /********** static ********************************************************/
identify_sdf_label(char * inp_line,const char * pSdfLabel)1064 int identify_sdf_label( char* inp_line, const char *pSdfLabel )
1065 {
1066 char line[MOLFILEMAXLINELEN];
1067 char *p, *q;
1068 int i, j, len;
1069 if ( (p = strchr( inp_line, '<' )) &&
1070 (q = strchr( p, '>' )) &&
1071 (len = q-p-1) > 0 && len < (int)sizeof(line) ) {
1072 memcpy( line, p+1, len );
1073 line[len] = '\0';
1074 for ( i = 0; isspace( UCINT line[i] ); i ++ )
1075 ;
1076 for ( j = len-1; j >= i && isspace( UCINT line[i] ); j -- )
1077 ;
1078 len = j-i+1;
1079 p = line+i;
1080 if ( pSdfLabel && pSdfLabel[0] && len == (int)strlen(pSdfLabel) && !memicmp( p, pSdfLabel, len ) )
1081 return SDF_DATA_HEADER_USER;
1082 if ( len == sizeof(sdf_data_hdr_name)-1 && !memicmp( p, sdf_data_hdr_name, len ) )
1083 return SDF_DATA_HEADER_NAME;
1084 if ( len == sizeof(sdf_data_hdr_comm)-1 && !memicmp( p, sdf_data_hdr_comm, len ) )
1085 return SDF_DATA_HEADER_COMMENT;
1086 if ( !memicmp( p, "CAS", 3 ) )
1087 return SDF_DATA_HEADER_CAS;
1088 }
1089 return SDF_DATA_HEADER;
1090 }
1091 /************* global *****************************************************/
bypass_sdf_data_items(FILE * inp,long * cas_reg_no,char * comment,int lcomment,char * name,int lname,int prev_err,const char * pSdfLabel,char * pSdfValue,char * pStrErr)1092 int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment,
1093 int lcomment, char *name, int lname, int prev_err,
1094 const char *pSdfLabel, char *pSdfValue, char *pStrErr )
1095 {
1096 char line[MOLFILEINPLINELEN];
1097 const int line_len = sizeof(line);
1098 int err = 0;
1099 int current_state = SDF_START;
1100 int n_blank_lines = 0;
1101 int n_lines = 0;
1102 char* p = NULL;
1103 int bNeedsName = name && lname > 0 && !name[0];
1104 int bNeedsComm = comment && lcomment > 0 && !comment[0];
1105 int bNeedsUser = pSdfLabel && pSdfLabel[0] && pSdfValue;
1106 int bNeedsCASrn = 0;
1107 int bCASrnIsUser = 0;
1108
1109 if ( cas_reg_no != NULL ) {
1110 bNeedsCASrn = 1;
1111 *cas_reg_no = 0;
1112 bCASrnIsUser = (bNeedsUser && !memicmp(pSdfLabel,"CAS", 3));
1113 }
1114
1115 while ( err == 0 &&
1116 current_state !=SDF_END_OF_DATA_BLOCK &&
1117 NULL != ( p = inchi_fgetsLf( line, line_len, inp ) ) ) {
1118
1119 if ( !n_lines && !memcmp(line, "M END", 6) ) {
1120 continue; /* allow subtle errors */
1121 }
1122 n_lines++;
1123
1124 remove_trailing_spaces( line );
1125 if ( line[MOLFILEMAXLINELEN] ){
1126 if ( current_state != SDF_DATA_HEADER &&
1127 current_state != SDF_DATA_LINE &&
1128 current_state != SDF_DATA_HEADER_NAME &&
1129 current_state != SDF_DATA_HEADER_USER &&
1130 current_state != SDF_DATA_HEADER_COMMENT ) {
1131 line[MOLFILEMAXLINELEN] = '\0';
1132 if ( !prev_err ) {
1133 MOLFILE_ERR_SET (err, 0, "Too long SData line truncated");
1134 }
1135 } else {
1136 /* allow long lines in SDF data. 9-29-00 DCh */
1137 line[MOLFILEMAXLINELEN] = '\0';
1138 }
1139 }
1140
1141 n_blank_lines += ( *line == '\0' );
1142
1143 switch( current_state ) {
1144
1145 case SDF_START:
1146 case SDF_END_OF_DATA_ITEM:
1147 case SDF_EMPTY_LINE: /* Added 9-25-97 DCh */
1148
1149 if ( 0 == strcmp( line, SDF_END_OF_DATA ) ) {
1150 current_state = SDF_END_OF_DATA_BLOCK;
1151 }
1152 else
1153 if ( '>' == *line ) {
1154 current_state = ( bNeedsName || bNeedsComm || bNeedsCASrn || bNeedsUser )? identify_sdf_label(line, pSdfLabel) : SDF_DATA_HEADER;
1155 }else
1156 if ( *line == '\0' ) { /* Added 9-25-97 DCh */
1157 /* Relax the strictness: Allow more than 1 empty line. */
1158 current_state=SDF_EMPTY_LINE;
1159 } else
1160 if ( !prev_err ) {
1161 MOLFILE_ERR_SET (err, 3, "Unexpected SData header line:");
1162 RemoveNonPrintable( line );
1163 AddMOLfileError(pStrErr, line);
1164 /* unexpected contents of data header line */
1165 } else {
1166 err = 3;
1167 }
1168 break;
1169
1170 case SDF_DATA_HEADER_NAME:
1171 if ( bNeedsName && 0 < normalize_name( line ) ) {
1172 bNeedsName = 0;
1173 mystrncpy( name, line, lname );
1174 }
1175 goto got_data_line;
1176
1177 case SDF_DATA_HEADER_COMMENT:
1178 if ( bNeedsComm && 0 < normalize_name( line ) ) {
1179 bNeedsComm = 0;
1180 mystrncpy( comment, line, lcomment );
1181 }
1182 goto got_data_line;
1183
1184 case SDF_DATA_HEADER_USER:
1185 if ( bNeedsUser && 0 < normalize_name( line ) ) {
1186 bNeedsUser = 0;
1187 mystrncpy( pSdfValue, line, MAX_SDF_VALUE+1 );
1188 if ( bCASrnIsUser && bNeedsCASrn ) {
1189 *cas_reg_no = extract_cas_rn( line );
1190 bNeedsCASrn = (0 == *cas_reg_no);
1191 }
1192 }
1193 goto got_data_line;
1194
1195 case SDF_DATA_HEADER_CAS:
1196 if ( bNeedsCASrn && 0 < normalize_name( line ) ) {
1197 *cas_reg_no = extract_cas_rn( line );
1198 bNeedsCASrn = (0 == *cas_reg_no);
1199 }
1200 goto got_data_line;
1201
1202 case SDF_DATA_HEADER:
1203 case SDF_DATA_LINE:
1204 got_data_line:
1205 current_state = *line? SDF_DATA_LINE : SDF_END_OF_DATA_ITEM;
1206 break;
1207
1208 }
1209 }
1210 if ( 0 == err && SDF_END_OF_DATA_BLOCK != current_state && NULL == p )
1211 ; /* err = 4; */ /* unexpected end of file: missing $$$$ */
1212 else
1213 if (err && ( n_blank_lines == n_lines && *line == '\0' ) )
1214 err = 5; /* empty lines -- do not know when this can happen */
1215
1216 if ( err && err != 5 && current_state != SDF_END_OF_DATA_BLOCK && p ) {
1217 /* bypass up to $$$$ */
1218 while ( ( p = inchi_fgetsLf( line, line_len, inp ) ) && memcmp( line, SDF_END_OF_DATA, 4 ) )
1219 ;
1220 if ( p ) {
1221 err = 9; /* bypassed to $$$$; non-fatal */
1222 AddMOLfileError(pStrErr, "Bypassing to next structure");
1223 }
1224
1225 }
1226
1227 return err;
1228 }
1229 /**************** global **************************************************/
read_sdfile_segment(FILE * inp,MOL_HEADER_BLOCK * OnlyHeaderBlock,MOL_CTAB * OnlyCtab,int bGetOrigCoord,char * pname,int lname,long * Id,const char * pSdfLabel,char * pSdfValue,int * err,char * pStrErr)1230 MOL_DATA* read_sdfile_segment(FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
1231 int bGetOrigCoord,
1232 char *pname, int lname,
1233 long *Id, const char *pSdfLabel, char *pSdfValue,
1234 int *err, char *pStrErr )
1235 {
1236 MOL_DATA* mol_data = read_mol_file( inp, OnlyHeaderBlock, OnlyCtab, bGetOrigCoord, err, pStrErr );
1237 int err_bypass_sdf = 0;
1238
1239 if ( pname && lname ) {
1240 pname[0] = '\0';
1241 }
1242 if ( Id ) {
1243 *Id = 0L; /* ignore for now */
1244 }
1245 /* if ( mol_data && !*err ) { */
1246 if ( *err < 0 ) {
1247 *err = -*err; /* end of data encountered */
1248 } else {
1249 err_bypass_sdf = bypass_sdf_data_items( inp, Id, NULL, 0, pname, lname, *err, pSdfLabel, pSdfValue, pStrErr );
1250 if ( err_bypass_sdf ) {
1251 *err = err_bypass_sdf; /* important to continue to the next good structure */
1252 }
1253 }
1254 /* } */
1255 return mol_data;
1256 }
1257 /******************* global *********************************************************/
CopyMOLfile(FILE * inp_file,long fPtrStart,long fPtrEnd,FILE * prb_file,long lNumb)1258 int CopyMOLfile(FILE *inp_file, long fPtrStart, long fPtrEnd, FILE *prb_file, long lNumb)
1259 {
1260 char line[MOLFILEINPLINELEN], *p;
1261 long fPtr;
1262 int ret = 1;
1263 char szNumber[32];
1264
1265 if ( inp_file && prb_file && fPtrStart >= 0L &&
1266 fPtrEnd > fPtrStart &&
1267 0 == fseek( inp_file, fPtrStart, SEEK_SET ) ) {
1268
1269 while ( fPtrEnd > (fPtr = ftell(inp_file)) && fPtr >= 0L &&
1270 inchi_fgetsLf( line, sizeof(line)-1, inp_file ) ) {
1271 line[sizeof(line)-1] = '\0'; /* unnecessary extra precaution */
1272 if ( fPtr == fPtrStart && lNumb ) {
1273 int len;
1274 LtrimRtrim( line, &len );
1275 len = sprintf( szNumber, "#%ld%s", lNumb, len?"/":"" );
1276 mystrncpy( line+len, line, sizeof(line)-len-1 );
1277 memcpy( line, szNumber, len );
1278 }
1279 if ( !strchr(line, '\n') ) {
1280 p = line+strlen(line);
1281 p[0] = '\n';
1282 p[1] = '\0';
1283 }
1284 fputs( line, prb_file );
1285 }
1286 ret = fseek( inp_file, fPtrEnd, SEEK_SET );
1287 }
1288 return ret;
1289 }
1290