1 /*
2  * International Chemical Identifier (InChI)
3  * Version 1
4  * Software version 1.04
5  * September 9, 2011
6  *
7  * The InChI library and programs are free software developed under the
8  * auspices of the International Union of Pure and Applied Chemistry (IUPAC).
9  * Originally developed at NIST. Modifications and additions by IUPAC
10  * and the InChI Trust.
11  *
12  * IUPAC/InChI-Trust Licence for the International Chemical Identifier (InChI)
13  * Software version 1.0.
14  * Copyright (C) IUPAC and InChI Trust Limited
15  *
16  * This library is free software; you can redistribute it and/or modify it under the
17  * terms of the IUPAC/InChI Trust Licence for the International Chemical Identifier
18  * (InChI) Software version 1.0; either version 1.0 of the License, or
19  * (at your option) any later version.
20  *
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
24  * See the IUPAC/InChI Trust Licence for the International Chemical Identifier (InChI)
25  * Software version 1.0 for more details.
26  *
27  * You should have received a copy of the IUPAC/InChI Trust Licence for the
28  * International Chemical Identifier (InChI) Software version 1.0 along with
29  * this library; if not, write to:
30  *
31  * The InChI Trust
32  * c/o FIZ CHEMIE Berlin
33  * Franklinstrasse 11
34  * 10587 Berlin
35  * GERMANY
36  *
37  */
38 
39 
40 /* local prototypes */
41 int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment, int lcomment, char *name, int lname, int prev_err,
42                            const char *pSdfLabel, char *pSdfValue, char *pStrErr );
43 MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
44                          int bGetOrigCoord, int *err, char *pStrErr );
45 
46 
47 static int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr);
48 static int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr );
49 static int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
50 static int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
51 static int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
52 static int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr );
53 
54 static int identify_sdf_label( char* inp_line, const char *pSdfLabel );
55 static long extract_cas_rn( char *line );
56 static int mol_copy_check_empty( char* dest, char* source, int len, char **first_space );
57 static int mol_read_datum(void* data, int field_len, int  data_type, char** line_ptr);
58 
59 static int RemoveNonPrintable( char *line );
60 
61 
62 /******/
63 #ifndef MOLFILE_ERR_FIN
64 #define MOLFILE_ERR_FIN(err, new_err, err_fin, msg) \
65         if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg)); goto err_fin
66 #endif
67 #ifndef MOLFILE_ERR_SET
68 #define MOLFILE_ERR_SET(err, new_err, msg) \
69         if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg))
70 #endif
71 
72 /*************************************************************************/
AddMOLfileError(char * pStrErr,const char * szMsg)73 int AddMOLfileError( char *pStrErr, const char *szMsg )
74 {
75     if ( pStrErr && szMsg && szMsg[0] ) {
76         int lenStrErr = strlen( pStrErr );
77         int lenMsg    = strlen( szMsg );
78         char *p = strstr( pStrErr, szMsg );
79         if ( p && (p==pStrErr || *(p-1) == ' ' && (*(p-2) == ';' || *(p-2) == ':' )) &&
80                   (p+lenMsg == pStrErr+lenStrErr ||
81                   p[lenMsg] == ';' && p[lenMsg+1] == ' ' ||
82                   p[lenMsg-1]==':' && p[lenMsg]==' ') ) {
83             return 1; /*  reject duplicates */
84         }
85         if ( lenStrErr + lenMsg + 2*(lenStrErr > 0) < STR_ERR_LEN ) {
86             /*  enough room to add */
87             if (lenStrErr > 0) {
88                 if ( pStrErr[lenStrErr-1] != ':' ) {
89                     strcat( pStrErr, ";" );
90                 }
91                 strcat( pStrErr, " " );
92             }
93             strcat( pStrErr, szMsg );
94             return 1;
95         }
96         /*  no room */
97         if ( strstr( pStrErr, "..." ) ) {
98             return 0; /*  no room mark has already been set */
99         }
100         if ( lenStrErr + 3 < STR_ERR_LEN ) {
101             strcat( pStrErr, "..." );
102         }
103     }
104     return 0;
105 }
106 /*************** static **********************************************************/
mol_copy_check_empty(char * dest,char * source,int len,char ** first_space)107 int mol_copy_check_empty( char* dest, char* source, int len, char **first_space )
108 {
109     int i, c;   /* required len >= 0; dest must have at least len+1 bytes */
110     if ( len > 0 )
111         strncpy( dest, source, len );
112     dest[len]='\0';
113     len = ( len > 0 )? (int)strlen( dest) : 0;
114     for ( i = (len-1); i >= 0 && 0 != (c = source[i]) && isspace(UCINT c); i-- )
115         ;
116     *first_space = dest + (i+1); /* first blank or zero terminating byte in dest */
117     return len; /* number of actually processed bytes; zero termination not included */
118 }
119 /************* static ************************************************************/
mol_read_datum(void * data,int field_len,int data_type,char ** line_ptr)120 int mol_read_datum(void* data, int field_len, int  data_type, char** line_ptr)
121 {
122 /* 1. 'field_len' for MOL_STRING_DATA does not include trailing zero,
123  *     that is actual length of the string pointed by 'data'
124  *     should be at least field_len+1 bytes.
125  *     For numerical data 'field_len' is length of input data field
126  *     For numerical integral data field_len <= 0 means read up to first
127  *     non-numeric character as strtod() does ("free format")
128  * 2.  return value: for MOL_STRING_DATA: number of bytes excluding trailing zero
129  *                   for all others:  1=success; 0 = empty; -1= error
130  * 3.  on exit *line_ptr points to the next byte after the last entered
131  */
132     char *p = *line_ptr, *q, *p_end;
133     int  i, ret=1, c, len;
134     long   ldata;
135     double ddata;
136 
137     switch( data_type ) {
138     case MOL_STRING_DATA:
139         for ( i= 0; i < field_len && 0 != (c = p[i]) && isspace(UCINT c); i++ ) /* pass by all leading spaces */
140             ;
141         len = mol_copy_check_empty( (char*)data, &p[i], field_len-i, &q );
142         ret = ( q - (char*)data );/* actual data length */
143         *q = '\0';                /* add zero termination to data if it is not there yet*/
144         *line_ptr += (len+i);     /* ptr to the 1st byte of the next input field or to zero termination */
145         break;
146 
147     case MOL_CHAR_INT_DATA:
148     case MOL_SHORT_INT_DATA:
149     case MOL_LONG_INT_DATA:
150         { /* block start */
151             char str[MOL_MAX_VALUE_LEN+1];
152             ldata = 0L;
153             if ( field_len > MOL_MAX_VALUE_LEN ) {
154                 ret = -1;
155             }else
156             if ( field_len > 0 ) { /* fixed length */
157                 *line_ptr += ( len = mol_copy_check_empty( str, p, field_len, &q ) );
158                 *q = '\0';
159                 if ( !len || !(q-str) ) { /* empty string */
160                     ret = 0;
161                 }else
162                 if ( (ldata=strtol(str,&p_end,10), p_end != q) ){ /* wrong data: incompletely interpreted */
163                     ret = -1;
164                 }
165             }else{  /* free format: field_len <= 0 */
166                 ldata = strtol( p, &p_end, 10 );
167                 *line_ptr += ( len = p_end - p );
168                 if ( len == 0 ){
169                     ret = 0;
170                 }
171             }
172 
173             switch( data_type ) {
174             case MOL_CHAR_INT_DATA:
175                 if ( SCHAR_MIN <= ldata  && ldata <= SCHAR_MAX ){ /* from || to &&: 11-19-96 */
176                     *(S_CHAR*)data = (S_CHAR)ldata;
177                 }else{
178                     *(S_CHAR*)data = (S_CHAR)0;
179                     ret = -1;
180                 }
181                 break;
182             case MOL_SHORT_INT_DATA:
183                 if ( SHRT_MIN <= ldata && ldata <= SHRT_MAX ){
184                     *(S_SHORT*)data = (S_SHORT)ldata;
185                 }else{
186                     *(S_SHORT*)data = (S_SHORT)0;
187                     ret = -1;
188                 }
189                 break;
190             case MOL_LONG_INT_DATA:
191                 if ( LONG_MIN < ldata && ldata < LONG_MAX ){
192                     *(long*)data = (long)ldata;
193                 }else{
194                     *(long*)data = 0L;
195                     ret = -1;
196                 }
197                 break;
198             default:
199                 ret=-1;
200             }
201 
202         } /* block end */
203         break;
204     case MOL_DOUBLE_DATA:
205     case MOL_FLOAT_DATA:
206         { /* block start */
207             char str[MOL_MAX_VALUE_LEN+1];
208             if ( field_len > MOL_MAX_VALUE_LEN ) {
209                 ret = -1;
210                 ddata = 0.0;
211             }else
212             if ( field_len > 0 ) {
213                 *line_ptr += (len = mol_copy_check_empty( str, p, field_len, &q ));
214                 *q = '\0';
215                 if ( !len || !(q-str) ) { /* empty string */
216                     ddata = 0.0;
217                     ret   = 0;
218                 }else
219                 if ( (ddata=strtod(str,&p_end), p_end != q) ){ /* wrong data */
220                     ret = -1;
221                 }
222             }else{ /* free format */
223                 ddata = strtod( p, &p_end );
224                 *line_ptr += ( len = p_end - p );
225                 if ( len == 0 ){
226                     ret = 0;
227                 }
228             }
229             switch(data_type){
230             case MOL_DOUBLE_DATA:
231                 if ( ddata != HUGE_VAL && /*ldata*/ ddata != -HUGE_VAL ){ /* replaced ldata with ddata 6-30-98 DCh */
232                     *(double*)data = ddata;
233                 }else{
234                     *(double*)data = 0.0;
235                     ret = -1;
236                 }
237                 break;
238             case MOL_FLOAT_DATA:
239                 if ( fabs(ddata) <= (double)FLT_MIN ) {
240                     *(float*)data = 0.0;
241                 }else
242                 if ( fabs(ddata) >= (double)FLT_MAX ) {
243                     *(float*)data = 0.0;
244                     ret = -1;
245                 }else{
246                     *(float*)data = (float)ddata;
247                 }
248                 break;
249             }
250         } /* block end */
251         break;
252     case MOL_JUMP_TO_RIGHT:
253         for ( i = 0; i < field_len && p[i]; i++ )
254             ;
255         *line_ptr += i;
256         ret = i;
257         break;
258     default:
259         ret = -1;
260     }
261     return ret;
262 }
263 /************* static ************************************************************/
mol_read_hdr(MOL_HEADER_BLOCK * hdr,FILE * inp,char * pStrErr)264 int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr)
265 {
266     /* All input lines can have are up 80 characters */
267     /* Header Block */
268     char line[MOLFILEINPLINELEN]; /* + cr +lf +zero termination + reserve */
269     int  err = 0, len;
270     const int  line_len = sizeof(line);
271     char *p;
272 
273     /* memset( &hdr, 0, sizeof( MOL_HEADER_BLOCK ) ); */
274     /*------------ header line #1: name ----------------*/
275     if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
276         err = 1;             /* can't read the input file line */
277         /* AddMOLfileError( pStrErr, "Can't read header block name line" ); */
278         goto err_fin;
279     }
280     remove_one_lf( line );
281     /* -- Disabled to relax strictness: allow > 80 chars names.
282     if ( line[MOLFILEMAXLINELEN] ){
283         err = 2;             // too long line
284         goto err_fin;
285     }
286     */
287     len = mol_read_datum( hdr->szMoleculeName, sizeof(hdr->szMoleculeName)-1, MOL_STRING_DATA, &p );
288     /*----------- header line #2 -----------------------*/
289     if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
290         err = 3;             /* can't read the input file line */
291         /* AddMOLfileError( pStrErr, "Can't read header block line 2" ); */
292         goto err_fin;
293     }
294     remove_one_lf( line );
295     /* -- Disabled to relax strictness: allow > 80 chars names.
296     if ( line[MOLFILEMAXLINELEN] ){
297         err = 4;             // too long input file line
298         goto err_fin;
299     }
300     */
301     len = mol_read_datum( hdr->szUserInitials, sizeof(hdr->szUserInitials)-1, MOL_STRING_DATA, &p );
302     len = mol_read_datum( hdr->szProgramName,  sizeof(hdr->szProgramName)-1,  MOL_STRING_DATA, &p );
303 
304     /*------------ Relax strictness -----------------------*/
305     len = mol_read_datum( &hdr->cMonth,                  2,  MOL_CHAR_INT_DATA,  &p );
306     len = mol_read_datum( &hdr->cDay,                    2,  MOL_CHAR_INT_DATA,  &p );
307     len = mol_read_datum( &hdr->cYear,                   2,  MOL_CHAR_INT_DATA,  &p );
308     len = mol_read_datum( &hdr->cHour,                   2,  MOL_CHAR_INT_DATA,  &p );
309     len = mol_read_datum( &hdr->cMinute,                 2,  MOL_CHAR_INT_DATA,  &p );
310     len = mol_read_datum( hdr->szDimCode, sizeof(hdr->szDimCode)-1,  MOL_STRING_DATA, &p );
311     len = mol_read_datum( &hdr->nScalingFactor1,         2,  MOL_SHORT_INT_DATA, &p );
312     len = mol_read_datum( &hdr->dScalingFactor2,        10,  MOL_DOUBLE_DATA,    &p );
313     len = mol_read_datum( &hdr->dEnergy,                12,  MOL_DOUBLE_DATA,    &p );
314     len = mol_read_datum( &hdr->lInternalRegistryNumber, 6, MOL_LONG_INT_DATA,   &p );
315 
316     /* save the whole line 2 */
317     p = line;
318     len = mol_read_datum( hdr->szMoleculeLine2, sizeof(hdr->szMoleculeLine2)-1, MOL_STRING_DATA, &p );
319 
320 
321     /*------------ header line #3: comment ----------------*/
322     if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
323         err = 7;             /* can't read the line */
324         /* AddMOLfileError( pStrErr, "Can't read header block comment line" ); */
325         goto err_fin;
326     }
327     remove_one_lf( line );
328     /* -- Disabled to relax strictness: allow > 80 chars comments.
329     if ( line[MOLFILEMAXLINELEN] ){
330         err = 8;             // too long line
331         goto err_fin;
332     }
333     */
334     len = mol_read_datum( hdr->szComment, sizeof(hdr->szComment)-1, MOL_STRING_DATA, &p );
335 
336 err_fin:
337 
338     return err;
339 }
340 /********** static *****************************************************/
RemoveNonPrintable(char * line)341 int RemoveNonPrintable( char *line )
342 {
343     int i, c, num = 0;
344     if ( line ) {
345         for ( i = 0; c = UCINT line[i]; i ++ ) {
346             /* assuming ASCII charset */
347             if ( c < ' ' || c >= 0x7F ) {
348                 line[i] = '.';
349                 num ++;
350             }
351         }
352     }
353     return num;
354 }
355 /************** static *************************************************/
mol_read_counts_line(MOL_CTAB * ctab,FILE * inp,char * pStrErr)356 int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr )
357 {
358     char *p;
359     char line[MOLFILEINPLINELEN];
360     const int line_len = sizeof(line);
361     int   err = 0, len;
362 
363     if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
364         MOLFILE_ERR_FIN (err, 1, err_fin, "Cannot read counts line");
365         /* can't read the input file line */
366     }
367     remove_one_lf( line );
368     if ( line[MOLFILEMAXLINELEN] ){
369         MOLFILE_ERR_SET (err, 0, "Too long counts line");  /* too long input file line */
370     }
371     if ( 0 > mol_read_datum( &ctab->nNumberOfAtoms,         3,  MOL_SHORT_INT_DATA, &p )
372          || 0 > mol_read_datum( &ctab->nNumberOfBonds,         3,  MOL_SHORT_INT_DATA, &p )
373 #if ( MOL_QUERY == MOL_PRESENT )
374          || 0 > mol_read_datum( &ctab->nNumberOfAtomsLists,    3,  MOL_SHORT_INT_DATA, &p )
375 #else
376          || 0 > mol_read_datum( NULL,                          3,  MOL_JUMP_TO_RIGHT,  &p )
377 #endif
378          || 0 > mol_read_datum( NULL, /*obsolete*/             3,  MOL_JUMP_TO_RIGHT,  &p )
379          || 0 > mol_read_datum( &ctab->cChiralFlag,            3,  MOL_CHAR_INT_DATA,  &p )
380          || 0 > mol_read_datum( &ctab->nNumberOfStextEntries,  3,  MOL_SHORT_INT_DATA, &p )
381 #if ( MOL_CPSS == MOL_PRESENT )
382          || 0 > mol_read_datum( &ctab->nNumberOfReactionComponentsPlus1, 3, MOL_SHORT_INT_DATA, &p )
383          || 0 > mol_read_datum( &ctab->nNumberOfReactants,     3,  MOL_SHORT_INT_DATA, &p )
384          || 0 > mol_read_datum( &ctab->nNumberOfProducts,      3,  MOL_SHORT_INT_DATA, &p )
385          || 0 > mol_read_datum( &ctab->nNumberOfIntermediates, 3,  MOL_SHORT_INT_DATA, &p )
386 #else
387          || 0 > mol_read_datum( NULL,                          3,  MOL_JUMP_TO_RIGHT,  &p )
388          || 0 > mol_read_datum( NULL,                          3,  MOL_JUMP_TO_RIGHT,  &p )
389          || 0 > mol_read_datum( NULL,                          3,  MOL_JUMP_TO_RIGHT,  &p )
390          || 0 > mol_read_datum( NULL,                          3,  MOL_JUMP_TO_RIGHT,  &p )
391 #endif
392          || 0 > mol_read_datum( &ctab->nNumberOfPropertyLines, 3,  MOL_SHORT_INT_DATA, &p ) ){
393         err = 3;  /* can't interpret counts line */
394         MOLFILE_ERR_SET (err, 3, "Cannot interpret counts line:");  /* too long input file line */
395         RemoveNonPrintable( line );
396         AddMOLfileError(pStrErr, line);
397         goto err_fin;
398     }
399     len = mol_read_datum( ctab->csCurrentCtabVersion, sizeof(ctab->csCurrentCtabVersion)-1, MOL_STRING_DATA, &p );
400 err_fin:
401     return err;
402 }
403 
404 /************ static *************************************************************/
read_atom_block(MOL_CTAB * ctab,FILE * inp,int err,char * pStrErr)405 int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
406 {
407     char *p;
408     char line[MOLFILEINPLINELEN];
409     const int line_len = sizeof(line);
410     S_SHORT i, chg;
411     static S_SHORT charge_val[] = {0, 3, 2, 1, 'R', -1, -2, -3};
412     /*                           0  1  2  3   4    5   6   7 */
413     /*
414       if ( NULL == ctab->MolAtom ){
415           err = 1;
416           goto err_fin; // internal error: MolAtom structure has not been allocated
417       }
418      */
419 
420     for ( i = 0; i < ctab->nNumberOfAtoms; i++ ) {
421 
422         if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
423             if ( !err ) {
424                 MOLFILE_ERR_SET (err, 2, "Cannot read atom block line");
425             }
426             break;
427         }
428         remove_one_lf( line );
429         if ( line[MOLFILEMAXLINELEN] ){
430             MOLFILE_ERR_SET (err, 0, "Too long atom block line");
431         }
432         if ( err ) {
433             if ( !strcmp( line, SDF_END_OF_DATA ) ) {
434                 err = -abs(err);
435                 break;
436             }
437             continue; /* bypass the rest of the Atom block */
438         }
439         if ( NULL != ctab->szCoord ) {
440             mystrncpy( ctab->szCoord[i], p, 31 ); /* original coordinates */
441         }
442 
443         if ( NULL != ctab->MolAtom ) {
444             if ( 0 > mol_read_datum( &ctab->MolAtom[i].fX,   10,  MOL_DOUBLE_DATA, &p )
445                 || 0 > mol_read_datum( &ctab->MolAtom[i].fY,   10,  MOL_DOUBLE_DATA, &p )
446                 || 0 > mol_read_datum( &ctab->MolAtom[i].fZ,   10,  MOL_DOUBLE_DATA, &p )
447                 || 0 > mol_read_datum( NULL, /* undescribed in article*/    1,  MOL_JUMP_TO_RIGHT, &p )
448                 || 0 == mol_read_datum( &ctab->MolAtom[i].szAtomSymbol,     3,  MOL_STRING_DATA, &p ) /* was sizeof(ctab->MolAtom[0].szAtomSymbol)-1 */
449 #ifdef TARGET_EXE_USING_API
450                 || 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference,   2,  MOL_SHORT_INT_DATA, &p )
451 #else
452                 || 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference,   2,  MOL_CHAR_INT_DATA, &p )
453 #endif
454                 || 0 > mol_read_datum( &ctab->MolAtom[i].cCharge,           3,  MOL_CHAR_INT_DATA, &p )
455                 || 0 > mol_read_datum( &ctab->MolAtom[i].cStereoParity,     3,  MOL_CHAR_INT_DATA, &p )
456 #if ( MOL_QUERY == MOL_PRESENT )
457                 || 0 > mol_read_datum( &ctab->MolAtom[i].cH_countPlus1,     3,  MOL_CHAR_INT_DATA, &p )
458                 || 0 > mol_read_datum( &ctab->MolAtom[i].cStereoCare,       3,  MOL_CHAR_INT_DATA, &p )
459 #else
460                 || 0 > mol_read_datum( NULL,                                3,  MOL_JUMP_TO_RIGHT,  &p )
461                 || 0 > mol_read_datum( NULL,                                3,  MOL_JUMP_TO_RIGHT,  &p )
462 #endif
463                 || 0 > mol_read_datum( &ctab->MolAtom[i].cValence,          3,  MOL_CHAR_INT_DATA, &p ) ) {
464 
465                 err = 4;
466                 MOLFILE_ERR_SET (err, 4, "Cannot interpret atom block line:");
467                 RemoveNonPrintable( line );
468                 AddMOLfileError(pStrErr, line);
469                 if ( !strcmp( line, SDF_END_OF_DATA ) ) {
470                     err = -abs(err);
471                     break;
472                 }
473                 continue; /* can't interpret a first half of atom block line */
474             }
475             if ( 2 == strlen(ctab->MolAtom[i].szAtomSymbol) && isupper(UCINT ctab->MolAtom[i].szAtomSymbol[1]))
476                 ctab->MolAtom[i].szAtomSymbol[1] = (char)tolower(UCINT ctab->MolAtom[i].szAtomSymbol[1]); /* 5-4-99 DCh*/
477 
478             if ( (chg = (S_SHORT) ctab->MolAtom[i].cCharge)< 0 || chg >= (int)(sizeof ( charge_val ) / sizeof( charge_val[0] )) ) {
479                 /* ctab->MolAtom[i].cCharge = 0; */ /* error; ignore for now */
480                 ctab->MolAtom[i].cCharge  = (S_CHAR)(4 - chg); /*  allow greater charges to accommodate NCI structures. 8-20-2002 */
481                 ctab->MolAtom[i].cRadical = 0;
482             }else
483             if ( 'R' == (chg = charge_val[chg]) ){
484                 ctab->MolAtom[i].cCharge  = 0;
485                 ctab->MolAtom[i].cRadical = RADICAL_DOUBLET;
486             }else{
487                 ctab->MolAtom[i].cCharge  = (S_CHAR)chg; /* actual charge value */
488                 ctab->MolAtom[i].cRadical = 0;
489             }
490 #ifdef TARGET_EXE_USING_API
491             if ( ctab->MolAtom[i].cMassDifference ) { /* e_ReadMOL.c specific */
492                 ctab->MolAtom[i].cMassDifference += ISOTOPIC_SHIFT_FLAG;
493             }
494 #endif
495 
496             if (
497 #if ( MOL_CPSS == MOL_PRESENT )
498                    0 > mol_read_datum( &ctab->MolAtom[i].cH0_designator,           3,  MOL_CHAR_INT_DATA, &p )
499                 || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType,   3,  MOL_CHAR_INT_DATA, &p )
500                 || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentNumber, 3,  MOL_CHAR_INT_DATA, &p )
501 #else
502                    0 > mol_read_datum( NULL,                                       3,  MOL_JUMP_TO_RIGHT,  &p )
503                 || 0 > mol_read_datum( NULL,                                       3,  MOL_JUMP_TO_RIGHT,  &p )
504                 || 0 > mol_read_datum( NULL,                                       3,  MOL_JUMP_TO_RIGHT,  &p )
505 #endif
506 #if ( MOL_REACT == MOL_PRESENT )
507                 || 0 > mol_read_datum( &ctab->MolAtom[i].nAtomAtomMappingNumber,   3,  MOL_SHORT_INT_DATA, &p )
508                 || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType,   3,  MOL_CHAR_INT_DATA, &p )
509 #else
510                 || 0 > mol_read_datum( NULL,                                       3,  MOL_JUMP_TO_RIGHT,  &p )
511                 || 0 > mol_read_datum( NULL,                                       3,  MOL_JUMP_TO_RIGHT,  &p )
512 #endif
513 #if ( MOL_REACT == MOL_PRESENT || MOL_QUERY == MOL_PRESENT )
514                 || 0 > mol_read_datum( &ctab->MolAtom[i].cExactChargeFlag,         3,  MOL_CHAR_INT_DATA, &p )
515 #else
516                 || 0 > mol_read_datum( NULL,                                       3,  MOL_JUMP_TO_RIGHT,  &p )
517 #endif
518             ){
519                 err = 5; /* can't interpret a second half of atom block line */
520                 MOLFILE_ERR_SET (err, 5, "Cannot interpret atom block line:");
521                 RemoveNonPrintable( line );
522                 AddMOLfileError(pStrErr, line);
523                 if ( !strcmp( line, SDF_END_OF_DATA ) ) {
524                     err = -abs(err);
525                     break;
526                 }
527                 continue;
528             }
529         }
530     }
531 /* err_fin: */
532     return err;
533 }
534 /************ static *************************************************************/
read_bonds_block(MOL_CTAB * ctab,FILE * inp,int err,char * pStrErr)535 int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
536 {
537     char *p;
538     char line[MOLFILEINPLINELEN];
539     const int line_len = sizeof(line);
540     S_SHORT i;
541     /*
542       if ( NULL == ctab->MolBond ){
543           err = 1;
544           goto err_fin;    // internal error: memory has not been allocated for MolBond structure
545       }
546      */
547     for ( i = 0; i < ctab->nNumberOfBonds; i++ ) {
548 
549         if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
550             if ( !err ) {
551                 MOLFILE_ERR_SET (err, 2, "Cannot read bond block line");
552             }
553             break;
554         }
555         remove_one_lf( line );
556         if ( line[MOLFILEMAXLINELEN] ){
557             err = err? err : 3;             /* too long input file line */
558         }
559         if ( err ) {
560             if ( !strcmp( line, SDF_END_OF_DATA ) ) {
561                 err = -abs(err);
562                 break;
563             }
564             continue;
565         }
566 
567         if ( ctab->MolBond ) {
568             if ( 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo1,      3,  MOL_SHORT_INT_DATA, &p )
569                 || 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo2,      3,  MOL_SHORT_INT_DATA, &p )
570                 || 0 > mol_read_datum( &ctab->MolBond[i].cBondType,     3,  MOL_CHAR_INT_DATA,  &p )
571                 || 0 > mol_read_datum( &ctab->MolBond[i].cBondStereo,   3,  MOL_CHAR_INT_DATA,  &p )
572 #if ( MOL_QUERY == MOL_PRESENT )
573                 || 0 > mol_read_datum( &ctab->MolBond[i].cBondTopology, 3,  MOL_CHAR_INT_DATA,  &p ) /* ring/chain */
574 #else
575                 || 0 > mol_read_datum( NULL,                            3,  MOL_JUMP_TO_RIGHT,  &p )
576 #endif
577 #if ( MOL_REACT == MOL_PRESENT )
578                 || 0 > mol_read_datum( &ctab->MolBond[i].cReactingCenterStatus, 3,  MOL_CHAR_INT_DATA,  &p )
579 #else
580                 || 0 > mol_read_datum( NULL,                            3,  MOL_JUMP_TO_RIGHT,  &p )
581 #endif
582             ){
583                 if ( !err ) {
584                     /* can't interpret bonds block line */
585                     MOLFILE_ERR_SET (err, 4, "Cannot interpret bond block line:");
586                     RemoveNonPrintable( line );
587                     AddMOLfileError(pStrErr, line);
588                 }
589                 if ( !strcmp( line, SDF_END_OF_DATA ) ) {
590                     err = -abs(err);
591                     break;
592                 }
593             }
594         }
595     }
596     /* err_fin: */
597     return err;
598 }
599 /********** static ***************************************************************/
read_stext_block(MOL_CTAB * ctab,FILE * inp,int err,char * pStrErr)600 int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
601 {
602     /* just pass by all stext enties without attemp to interpret */
603     char *p;
604     char line[MOLFILEINPLINELEN];
605     const int line_len = sizeof(line);
606     S_SHORT i;
607 
608     for ( i = 0; i < 2*ctab->nNumberOfStextEntries; i++ ) {
609 
610         if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
611             if ( !err ) {
612                 MOLFILE_ERR_FIN (err, 2, err_fin, "Cannot read STEXT block line");
613             }
614             break;
615             /* can't read the input file line */
616         }
617         /*
618         remove_one_lf( line );
619         if ( line[MOLFILEMAXLINELEN] ){
620             MOLFILE_ERR_SET (err, 2, "Warning: Too long STEXT block line");
621             // too long input file line
622         }
623         */
624     }
625 err_fin:
626     return err;
627 }
628 /************ static *************************************************************/
read_properties_block(MOL_CTAB * ctab,MOL_HEADER_BLOCK * pHdr,FILE * inp,int err,char * pStrErr)629 int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr )
630 {
631     enum { MULTI_LINE_MODE_NO_MODE, MULTI_LINE_MODE_ISIS_ALIAS };
632     char *p;
633     char line[MOLFILEINPLINELEN];
634     const int line_len = sizeof(line);
635     int   nMultiLineMode = MULTI_LINE_MODE_NO_MODE, nAtomNumber=0;
636     S_SHORT i, j;
637     char  charM[2];
638     char  szBlank[3];
639     char  szType[4];
640     S_SHORT  skip_lines=0;
641     S_SHORT  num_entries;
642     S_SHORT  num_atoms = ctab->nNumberOfAtoms;
643 
644     int  charge_encountered  = 0;
645     int  radical_encountered = 0;
646     int  isotope_encountered = 0;
647     /*
648       if ( NULL == ctab->MolAtom ){
649           err = 1;
650           goto err_fin;    internal error: memory has not been allocated for MolAtom structure
651       }
652      */
653     for ( i = 0; ctab->csCurrentCtabVersion[0]? 1 : (i < ctab->nNumberOfPropertyLines); i++ ) { /* the last line should be M END */
654         /* ctab->csCurrentCtabVersion[0] == 0:
655               exactly ctab->nNumberOfPropertyLines lines including M END */
656         /* ctab->csCurrentCtabVersion[0] != 0:
657               read until M END line was encountered */
658         if ( NULL == ( p = inchi_fgetsLf( line, line_len, inp ) ) ){
659             if ( !err ) {
660                 MOLFILE_ERR_SET (err, 2, "Cannot read properties block line");
661             }
662             goto err_fin;
663         }
664         remove_one_lf( line );
665         if ( line[MOLFILEMAXLINELEN] ){
666             MOLFILE_ERR_SET (err, 3, "Too long properties block line");
667             continue;
668         }
669         if ( skip_lines > 0 ) {
670             skip_lines --;
671             continue;
672         }
673         /* alias. */
674         if ( nMultiLineMode == MULTI_LINE_MODE_ISIS_ALIAS && nAtomNumber ) {
675             int  len;
676             nMultiLineMode = MULTI_LINE_MODE_NO_MODE;
677             if ( 0 >= (len=normalize_name( p )) ) {
678                 nAtomNumber = 0;
679                 continue;
680             }
681             if( 0 < len && len < (int)(sizeof(ctab->MolAtom->szAtomSymbol)) ) {
682                 int  nCharge, nRad;
683                 MOL_ATOM*  MolAtom = ctab->MolAtom + nAtomNumber-1;
684                 /* ctab->MolAtom[nAtomNumber-1].cAtomAliasedFlag = 1; */
685                 /*  extract radicals & charges */
686                 extract_ChargeRadical( p, &nRad, &nCharge );
687                 /*  Aliased atom cannot have charge, radical & mass difference */
688                 /*  in the atom table or "M  CHG", "M  RAD", "M  ISO" */
689                 /* if ( nCharge ) */
690                     MolAtom->cCharge = (S_CHAR)nCharge;
691                 /* if ( nRad ) */
692                     MolAtom->cRadical = (char)nRad;
693 
694                 if ( 1 == len && 'D' == p[0]    ) {
695                     /*  H isotope */
696                     p[0] = 'H';
697 #ifdef TARGET_EXE_USING_API
698                     MolAtom->cMassDifference=(1 + ISOTOPIC_SHIFT_FLAG);
699 #else
700                     MolAtom->cMassDifference=1;
701 #endif
702                 } else
703                 if ( 1 == len && 'T' == p[0]    ) {
704                     /*  H isotope */
705                     p[0] = 'H';
706 #ifdef TARGET_EXE_USING_API
707                     MolAtom->cMassDifference=(2 + ISOTOPIC_SHIFT_FLAG);
708 #else
709                     MolAtom->cMassDifference=2;
710 #endif
711                 } else
712                     MolAtom->cMassDifference=0;
713                 if ( strlen(p) < sizeof(ctab->MolAtom[0].szAtomSymbol) ) {
714                     strcpy(MolAtom->szAtomSymbol, p);
715                 } else {
716                     strcpy(MolAtom->szAtomSymbol, "???");
717                 }
718                 MolAtom->cAtomAliasedFlag ++;
719             }
720             skip_lines = 0;
721             nAtomNumber = 0;
722             continue;
723         }
724 
725         if ( 1 != mol_read_datum( charM,     sizeof(charM)   - 1,  MOL_STRING_DATA, &p )
726             || 0 != mol_read_datum( szBlank,   sizeof(szBlank) - 1,  MOL_STRING_DATA, &p ) /* must contain 0 bytes */
727             || 0 >= mol_read_datum( szType,    sizeof(szType)  - 1,  MOL_STRING_DATA, &p ) /* must contain 3 bytes */
728         ) {
729             if ( !strcmp( line, SDF_END_OF_DATA ) ) {
730                 err = err? -abs(err): -4;
731                 break;
732             }
733             continue;  /* ignore because cannot recognize */
734         }
735         if ( charM[0] == 'V' ){
736             skip_lines = 0;   /* ISIS/Desktop Atom Value: one-line property */
737             continue;
738         }
739         if ( charM[0] == 'G' ){
740             skip_lines = 1;   /* ISIS/Desktop Group abbreviation: two-line property */
741             continue;
742         }
743         if ( charM[0] == 'A' ) {
744             if ( NULL != ctab->MolAtom &&
745                  0 < ( nAtomNumber = (int)strtol(szType, NULL, 10) ) &&
746                  nAtomNumber <= ctab->nNumberOfAtoms  ){
747                 /* Atom Alias [ISIS/Desktop] two-line property */
748                 nMultiLineMode = MULTI_LINE_MODE_ISIS_ALIAS;
749                 continue;
750             } else {
751                 nAtomNumber = 0;
752                 skip_lines = 1;
753                 continue;
754             }
755         }
756         if ( charM[0] == 'S' && !strcmp( szType, "SKP" ) ){  /* skip lines */
757             if ( 0 >= mol_read_datum( &skip_lines, 3, MOL_SHORT_INT_DATA, &p ) ) {
758                 skip_lines = 0;
759             }
760             continue;
761         }
762         if ( charM[0] != 'M' ) {/* cannot recognize a line */
763             continue;
764         }
765         if ( !strcmp( szType, "REG" ) ) {
766             int len;
767             p = p + strspn( p, " " );
768             len = strcspn( p, " " );
769             len = inchi_min( len, MOL_MAX_VALUE_LEN );
770             mol_read_datum( &pHdr->lInternalRegistryNumber, len, MOL_LONG_INT_DATA, &p );
771             continue;
772         }
773 
774         if ( !strcmp( szType, "END" ) ){
775             if ( ctab->csCurrentCtabVersion[0] )
776                 break;  /* end of property lines */
777             continue;
778         }
779 
780         if ( NULL == ctab->MolAtom )
781             continue; /* ignore because the user requested to bypass all this stuff */
782 
783         /*----------------------------------- charge: Generic */
784         if ( !strcmp( szType, "CHG" ) &&
785              0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
786              1 <= num_entries && num_entries <= 8 ) {
787             S_SHORT atoms[8];
788             S_SHORT charges[8];
789             if ( !charge_encountered && !radical_encountered ) {
790                 /* first charge or radical record clears all Atom Block */
791                 /* entered charge and radical data to zeroes            */
792                 charge_encountered = -1;
793             }
794             for ( j = 0; j < num_entries; j++ ) {
795                 if ( 0 > mol_read_datum( &atoms[j],    0, MOL_SHORT_INT_DATA, &p ) ||
796                      0 > mol_read_datum( &charges[j],  0, MOL_SHORT_INT_DATA, &p ) ||
797                      atoms[j]   <=  0 || atoms[j]    > num_atoms ||
798                      charges[j] < -15 || charges[j]  > 15 ) {
799                     goto charge_error;
800                 }
801             }
802             if ( charge_encountered == -1 ) {
803                 for ( j = 0; j < num_atoms; j++ ) {
804                     if ( !ctab->MolAtom[j].cAtomAliasedFlag ) /* do not clear aliased atoms.*/
805                         ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
806                 }
807                 charge_encountered = 1;
808             }
809             for ( j = 0; j < num_entries; j++ ) {
810                 if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) /* do not change aliased atoms.*/
811                     ctab->MolAtom[atoms[j]-1].cCharge = (S_CHAR)charges[j];
812             }
813             continue;
814         charge_error:
815             MOLFILE_ERR_SET (err, 0, "Charge not recognized:");
816             RemoveNonPrintable( line );
817             AddMOLfileError(pStrErr, line);
818             continue; /* ignore for now */
819         }
820         /*-------------------------------------- radical: Generic */
821         if ( !strcmp( szType, "RAD" ) &&
822              0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
823              1 <= num_entries && num_entries <= 8 ) {
824             S_SHORT atoms[8];
825             S_SHORT radicals[8];
826             if ( !charge_encountered && !radical_encountered ) {
827                 /* first charge or radical record clears all Atom Block */
828                 /* entered charge and radical data to zeroes            */
829                 radical_encountered = -1;
830             }
831             for ( j = 0; j < num_entries; j++ ) {
832                 if ( 0 > mol_read_datum( &atoms[j],     0, MOL_SHORT_INT_DATA, &p ) ||
833                      0 > mol_read_datum( &radicals[j],  0, MOL_SHORT_INT_DATA, &p ) ||
834                      atoms[j]    <=  0 || atoms[j]    > num_atoms ||
835                      radicals[j] <   0 || radicals[j]  > 3 ) {
836                     goto radical_error;
837                 }
838             }
839             if ( radical_encountered == -1 ) {
840                 for ( j = 0; j < num_atoms; j++ ) {
841                     if ( !ctab->MolAtom[j].cAtomAliasedFlag )  /* do not clear aliased atoms. 5-3-99 DCh */
842                         ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
843                 }
844                 radical_encountered = 1;
845             }
846             for ( j = 0; j < num_entries; j++ ) {
847                 if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) { /* do not change aliased atoms. 5-3-99 DCh */
848                     ctab->MolAtom[atoms[j]-1].cRadical = (S_CHAR)radicals[j];
849                 }
850             }
851             continue;
852         radical_error:
853             MOLFILE_ERR_SET (err, 0, "Radical not recognized:");
854             RemoveNonPrintable( line );
855             AddMOLfileError(pStrErr, line);
856             continue; /* ignore error for now */
857         }
858         /*-------------------------------------- isotope: Generic */
859         if ( !strcmp( szType, "ISO" ) &&
860              0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
861              1 <= num_entries && num_entries <= 8 ) {
862             S_SHORT atoms[8];
863             S_SHORT iso_mass[8]; /*  contains istotope mass number, not difference. 7-14-00 DCh. */
864             if ( !isotope_encountered  ) {
865                 /* first charge or radical record clears all Atom Block */
866                 /* entered charge and radical data to zeroes            */
867                 isotope_encountered = -1;
868             }
869             for ( j = 0; j < num_entries; j++ ) {
870                 if ( 0 > mol_read_datum( &atoms[j],     0, MOL_SHORT_INT_DATA, &p ) ||
871                      0 > mol_read_datum( &iso_mass[j],  0, MOL_SHORT_INT_DATA, &p ) ||
872                      atoms[j]    <=  0 || atoms[j]    > num_atoms
873                      /*|| iso_mass[j] < -18 || iso_mass[j]  > 12*/ ) {
874                     /* goto isotope_error; */
875                     atoms[j] = -1; /*  flag error */
876                     MOLFILE_ERR_SET (err, 0, "Isotopic data not recognized:");
877                     RemoveNonPrintable( line );
878                     AddMOLfileError(pStrErr, line);
879                     continue; /* ignore isotopic error for now */
880                 }
881             }
882             if ( isotope_encountered == -1 ) {
883                 for ( j = 0; j < num_atoms; j++ ) {
884                     /*if ( !ctab->MolAtom[j].cAtomAliasedFlag )*/  /* clear even aliased atoms */
885                         ctab->MolAtom[j].cMassDifference = 0;
886                 }
887                 isotope_encountered = 1;
888             }
889             for ( j = 0; j < num_entries; j++ ) {
890                 if ( atoms[j] <= 0 )
891                     continue; /* ignore isotopic error for now */
892                 if ( 1 /* !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag */) {
893                     char *at =  ctab->MolAtom[atoms[j]-1].szAtomSymbol;
894                     if ( at[1] || at[0] != 'D' && at[0] != 'T' ) {  /*  D & T cannot have ISO */
895                         /*  need atomic weight to calculate isotope difference. 7-14-00 DCh. */
896 #ifdef TARGET_EXE_USING_API
897                         /*^^^ Check added 5-10-2008 - IPl */
898                         if (iso_mass[j] > 0)
899                             /* According to MDL specification, p.12, only a positive
900                             integer is allowed. And yes, there appeared some MOL/SD
901                             files contaning here a negative value. This manifested
902                             in mismatch in InChI_MAIN vs. cInChI-1/stdinchi-1 results.
903                             */
904                             ctab->MolAtom[atoms[j]-1].cMassDifference = iso_mass[j]; /* mass, not difference */
905 
906 #else
907                         int  atw, atw_diff;
908                         /*^^^
909                         NB: According to MDL specification, difference should be in
910                         [-18; +12] range, not in [-19; +19] as is checked below. */
911                         if ( (atw = get_atw( at )) && abs( atw_diff = (int)iso_mass[j] - atw ) < 20 ) {
912                             ctab->MolAtom[atoms[j]-1].cMassDifference = (char)(atw_diff? atw_diff : ZERO_ATW_DIFF);
913                         }
914 #endif
915                     }
916                 }
917             }
918             continue;
919         }
920     }
921 err_fin:
922     return err;
923 }
924 /************ global *************************************************************/
delete_mol_data(MOL_DATA * mol_data)925 MOL_DATA* delete_mol_data( MOL_DATA* mol_data )
926 {
927     if ( mol_data ) {
928         if ( mol_data->ctab.MolAtom )
929             inchi_free( mol_data->ctab.MolAtom );
930         if ( mol_data->ctab.MolBond )
931             inchi_free( mol_data->ctab.MolBond );
932         if ( mol_data->ctab.szCoord )
933             inchi_free( mol_data->ctab.szCoord );
934         inchi_free( mol_data );
935         mol_data = NULL;
936     }
937     return mol_data;
938 }
939 /************* global ************************************************************/
940 /*  Comletely ingnore STEXT block, queries, and 3D features
941  */
read_mol_file(FILE * inp,MOL_HEADER_BLOCK * OnlyHeaderBlock,MOL_CTAB * OnlyCtab,int bGetOrigCoord,int * err,char * pStrErr)942 MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
943                          int bGetOrigCoord, int *err, char *pStrErr )
944 {
945     MOL_DATA* mol_data = NULL;
946     int       ret      = 0, prev_ret, bEndOfData = 0;
947     int       bReadAll = ( OnlyHeaderBlock == NULL );
948     MOL_CTAB  ctab,  *pCtab = NULL;
949     MOL_HEADER_BLOCK *pHdr  = NULL;
950 
951     *err = 0;
952     if ( bReadAll ) {
953         if ( NULL == ( mol_data = ( MOL_DATA* )inchi_calloc( 1, sizeof(MOL_DATA) ) ) ){
954             ret = 1; /* can't allocate mol_data structure */
955             AddMOLfileError( pStrErr, "Out of RAM" );
956             goto err_fin;
957         }
958         pHdr  = &mol_data->hdr;
959         pCtab = &mol_data->ctab;
960     } else {
961         pHdr  = OnlyHeaderBlock;
962         pCtab = OnlyCtab? OnlyCtab : &ctab;
963         memset( pHdr,  0, sizeof( MOL_HEADER_BLOCK ) );
964         memset( pCtab, 0, sizeof( MOL_CTAB ) );
965     }
966     pCtab->MolBond = NULL;
967     pCtab->MolAtom = NULL;
968     pCtab->szCoord = NULL;
969 
970     if ( 0 != ( ret = mol_read_hdr(pHdr, inp, pStrErr) ) ){
971         ret += 10;
972         goto err_fin; /*  most probably end of file */
973     }
974     if ( 0 != ( ret = mol_read_counts_line( pCtab , inp, pStrErr) ) ){
975         ret += 20;
976         goto err_fin;
977     }
978 
979     if ( bReadAll ) {
980         if ( NULL == ( mol_data->ctab.MolAtom = (MOL_ATOM*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_ATOM)) ) ){
981             ret = 2; /* can't allocate MolAtom structure */
982             MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
983         }
984         if ( bGetOrigCoord &&
985              NULL == ( mol_data->ctab.szCoord = (MOL_COORD*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_COORD)) ) ){
986             ret = 2; /* can't allocate MolAtom structure */
987             MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
988         }
989     }
990     if ( 0 != ( ret = read_atom_block(pCtab, inp, ret, pStrErr) ) ){
991         if ( ret < 0 ) {
992             ret = -ret;
993             bEndOfData = 1;
994         }
995         ret += 30;
996         /* goto err_fin; */
997     }
998 
999     if ( bReadAll && ret < 30 ) {
1000         if ( !bEndOfData && NULL == ( mol_data->ctab.MolBond = (MOL_BONDS*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfBonds,1), sizeof(MOL_BONDS)) ) ){
1001             ret = 3; /* can't allocate MolBond structure */
1002             MOLFILE_ERR_FIN (ret, 3, err_fin, "Out of RAM");
1003         }
1004     }
1005     prev_ret = ret;
1006     if ( !bEndOfData && 0 != ( ret = read_bonds_block(pCtab, inp, ret, pStrErr) ) ){
1007         if ( ret < 0 ) {
1008             ret = -ret;
1009             bEndOfData = 1;
1010         }
1011         ret = prev_ret? prev_ret : ret + 40;
1012     }
1013     prev_ret = ret;
1014     if ( !bEndOfData && 0 != ( ret = read_stext_block(pCtab, inp, ret, pStrErr) ) ){
1015         ret = prev_ret? prev_ret : ret + 50;
1016     }
1017     prev_ret = ret;
1018     if ( !bEndOfData && 0 != ( ret = read_properties_block(pCtab, pHdr, inp, ret, pStrErr) ) ){
1019         if ( ret < 0 ) {
1020             ret = -ret;
1021             bEndOfData = 1;
1022         }
1023         ret = prev_ret? prev_ret : ret + 60;
1024     }
1025 
1026 err_fin:
1027     *err = bEndOfData? -ret : ret;
1028     if ( bReadAll ) {
1029         if ( ret )
1030             mol_data = delete_mol_data( mol_data ); /* delete all results */
1031         return mol_data;
1032     } else {
1033         if ( ret )
1034             return NULL;
1035         else
1036             return (MOL_DATA*)OnlyHeaderBlock;
1037     }
1038 }
1039 
1040 /******************************************************************/
1041 static const char sdf_data_hdr_name[] = "NAME";
1042 static const char sdf_data_hdr_comm[] = "COMMENT";
1043 enum { SDF_START, SDF_DATA_HEADER, SDF_DATA_HEADER_NAME
1044      , SDF_DATA_HEADER_COMMENT, SDF_DATA_HEADER_CAS
1045      , SDF_DATA_HEADER_USER, SDF_DATA_LINE
1046      , SDF_END_OF_DATA_ITEM, SDF_EMPTY_LINE, SDF_END_OF_DATA_BLOCK };
1047 /********** static ********************************************************/
extract_cas_rn(char * line)1048 long extract_cas_rn( char *line )
1049 {
1050     int i, j;
1051     i = line[0] == '-'? 1 : 0;
1052     for ( j = i; line[i]; i ++ ) {
1053         if ( isdigit( UCINT line[i] ) ) {
1054             line[j++] = line[i];
1055         } else
1056         if ( line[i] != '-' ) {
1057             break;
1058         }
1059     }
1060     line[j] = '\0';
1061     return strtol( line, NULL, 10 );
1062 }
1063 /********** static ********************************************************/
identify_sdf_label(char * inp_line,const char * pSdfLabel)1064 int identify_sdf_label( char* inp_line, const char *pSdfLabel )
1065 {
1066     char line[MOLFILEMAXLINELEN];
1067     char *p, *q;
1068     int  i, j, len;
1069     if ( (p = strchr( inp_line, '<' )) &&
1070          (q = strchr( p,        '>' )) &&
1071          (len = q-p-1) > 0 && len < (int)sizeof(line) ) {
1072         memcpy( line, p+1, len );
1073         line[len] = '\0';
1074         for ( i = 0; isspace( UCINT line[i] ); i ++ )
1075             ;
1076         for ( j = len-1; j >= i && isspace( UCINT line[i] ); j -- )
1077             ;
1078         len = j-i+1;
1079         p = line+i;
1080         if ( pSdfLabel && pSdfLabel[0] && len == (int)strlen(pSdfLabel) && !memicmp( p, pSdfLabel, len ) )
1081             return SDF_DATA_HEADER_USER;
1082         if ( len == sizeof(sdf_data_hdr_name)-1 && !memicmp( p, sdf_data_hdr_name, len ) )
1083             return SDF_DATA_HEADER_NAME;
1084         if ( len == sizeof(sdf_data_hdr_comm)-1 && !memicmp( p, sdf_data_hdr_comm, len ) )
1085             return SDF_DATA_HEADER_COMMENT;
1086         if ( !memicmp( p, "CAS", 3 ) )
1087             return SDF_DATA_HEADER_CAS;
1088     }
1089     return SDF_DATA_HEADER;
1090 }
1091 /************* global *****************************************************/
bypass_sdf_data_items(FILE * inp,long * cas_reg_no,char * comment,int lcomment,char * name,int lname,int prev_err,const char * pSdfLabel,char * pSdfValue,char * pStrErr)1092 int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment,
1093                            int lcomment, char *name, int lname, int prev_err,
1094                            const char *pSdfLabel, char *pSdfValue, char *pStrErr )
1095 {
1096     char line[MOLFILEINPLINELEN];
1097     const int line_len = sizeof(line);
1098     int   err = 0;
1099     int   current_state = SDF_START;
1100     int   n_blank_lines = 0;
1101     int   n_lines       = 0;
1102     char* p = NULL;
1103     int   bNeedsName   = name && lname > 0 && !name[0];
1104     int   bNeedsComm   = comment && lcomment > 0 && !comment[0];
1105     int   bNeedsUser   = pSdfLabel && pSdfLabel[0] && pSdfValue;
1106     int   bNeedsCASrn  = 0;
1107     int   bCASrnIsUser = 0;
1108 
1109     if ( cas_reg_no != NULL ) {
1110         bNeedsCASrn = 1;
1111         *cas_reg_no = 0;
1112         bCASrnIsUser = (bNeedsUser && !memicmp(pSdfLabel,"CAS", 3));
1113     }
1114 
1115     while ( err           == 0                    &&
1116             current_state !=SDF_END_OF_DATA_BLOCK &&
1117             NULL != ( p = inchi_fgetsLf( line, line_len, inp ) ) ) {
1118 
1119         if ( !n_lines && !memcmp(line, "M  END", 6) ) {
1120             continue; /*  allow subtle errors */
1121         }
1122         n_lines++;
1123 
1124         remove_trailing_spaces( line );
1125         if ( line[MOLFILEMAXLINELEN] ){
1126             if ( current_state != SDF_DATA_HEADER &&
1127                  current_state != SDF_DATA_LINE   &&
1128                  current_state != SDF_DATA_HEADER_NAME &&
1129                  current_state != SDF_DATA_HEADER_USER &&
1130                  current_state != SDF_DATA_HEADER_COMMENT ) {
1131                 line[MOLFILEMAXLINELEN] = '\0';
1132                 if ( !prev_err ) {
1133                     MOLFILE_ERR_SET (err, 0, "Too long SData line truncated");
1134                 }
1135             } else {
1136                 /* allow long lines in SDF data. 9-29-00 DCh */
1137                 line[MOLFILEMAXLINELEN] = '\0';
1138             }
1139         }
1140 
1141         n_blank_lines += ( *line == '\0' );
1142 
1143         switch( current_state ) {
1144 
1145         case SDF_START:
1146         case SDF_END_OF_DATA_ITEM:
1147         case SDF_EMPTY_LINE:              /* Added 9-25-97 DCh */
1148 
1149             if ( 0 == strcmp( line, SDF_END_OF_DATA ) ) {
1150                 current_state = SDF_END_OF_DATA_BLOCK;
1151             }
1152             else
1153             if ( '>' == *line ) {
1154                 current_state = ( bNeedsName || bNeedsComm || bNeedsCASrn || bNeedsUser )? identify_sdf_label(line, pSdfLabel) : SDF_DATA_HEADER;
1155             }else
1156             if ( *line == '\0' ) { /* Added 9-25-97 DCh */
1157                 /* Relax the strictness: Allow more than 1 empty line. */
1158                 current_state=SDF_EMPTY_LINE;
1159             } else
1160             if ( !prev_err ) {
1161                 MOLFILE_ERR_SET (err, 3, "Unexpected SData header line:");
1162                 RemoveNonPrintable( line );
1163                 AddMOLfileError(pStrErr, line);
1164                 /* unexpected contents of data header line */
1165             } else {
1166                 err = 3;
1167             }
1168             break;
1169 
1170         case SDF_DATA_HEADER_NAME:
1171              if ( bNeedsName && 0 < normalize_name( line ) ) {
1172                 bNeedsName = 0;
1173                 mystrncpy( name, line, lname );
1174              }
1175              goto got_data_line;
1176 
1177         case SDF_DATA_HEADER_COMMENT:
1178              if ( bNeedsComm && 0 < normalize_name( line ) ) {
1179                 bNeedsComm = 0;
1180                 mystrncpy( comment, line, lcomment );
1181              }
1182              goto got_data_line;
1183 
1184         case SDF_DATA_HEADER_USER:
1185              if ( bNeedsUser && 0 < normalize_name( line ) ) {
1186                  bNeedsUser = 0;
1187                  mystrncpy( pSdfValue, line, MAX_SDF_VALUE+1 );
1188                  if ( bCASrnIsUser && bNeedsCASrn ) {
1189                      *cas_reg_no = extract_cas_rn( line );
1190                      bNeedsCASrn = (0 == *cas_reg_no);
1191                  }
1192              }
1193              goto got_data_line;
1194 
1195         case SDF_DATA_HEADER_CAS:
1196              if ( bNeedsCASrn && 0 < normalize_name( line ) ) {
1197                  *cas_reg_no = extract_cas_rn( line );
1198                  bNeedsCASrn = (0 == *cas_reg_no);
1199              }
1200              goto got_data_line;
1201 
1202         case SDF_DATA_HEADER:
1203         case SDF_DATA_LINE:
1204 got_data_line:
1205             current_state = *line? SDF_DATA_LINE : SDF_END_OF_DATA_ITEM;
1206             break;
1207 
1208         }
1209     }
1210     if ( 0 == err && SDF_END_OF_DATA_BLOCK != current_state && NULL == p )
1211         ; /* err = 4; */ /* unexpected end of file: missing $$$$ */
1212     else
1213     if (err && ( n_blank_lines == n_lines && *line == '\0' ) )
1214         err = 5; /* empty lines -- do not know when this can happen */
1215 
1216     if ( err && err != 5 && current_state != SDF_END_OF_DATA_BLOCK && p ) {
1217         /*  bypass up to $$$$ */
1218         while ( ( p = inchi_fgetsLf( line, line_len, inp ) ) && memcmp( line, SDF_END_OF_DATA, 4 ) )
1219             ;
1220         if ( p ) {
1221             err = 9; /*  bypassed to $$$$; non-fatal */
1222             AddMOLfileError(pStrErr, "Bypassing to next structure");
1223         }
1224 
1225     }
1226 
1227     return err;
1228 }
1229 /**************** global **************************************************/
read_sdfile_segment(FILE * inp,MOL_HEADER_BLOCK * OnlyHeaderBlock,MOL_CTAB * OnlyCtab,int bGetOrigCoord,char * pname,int lname,long * Id,const char * pSdfLabel,char * pSdfValue,int * err,char * pStrErr)1230 MOL_DATA* read_sdfile_segment(FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
1231                               int bGetOrigCoord,
1232                               char *pname, int lname,
1233                               long *Id, const char *pSdfLabel, char *pSdfValue,
1234                               int *err, char *pStrErr )
1235 {
1236     MOL_DATA* mol_data = read_mol_file( inp, OnlyHeaderBlock, OnlyCtab, bGetOrigCoord, err, pStrErr );
1237     int       err_bypass_sdf = 0;
1238 
1239     if ( pname && lname ) {
1240         pname[0] = '\0';
1241     }
1242     if ( Id ) {
1243         *Id = 0L;  /* ignore for now */
1244     }
1245     /* if ( mol_data && !*err ) { */
1246     if ( *err < 0 ) {
1247         *err = -*err; /* end of data encountered */
1248     } else {
1249         err_bypass_sdf = bypass_sdf_data_items( inp, Id, NULL, 0, pname, lname, *err, pSdfLabel, pSdfValue, pStrErr );
1250         if ( err_bypass_sdf ) {
1251             *err = err_bypass_sdf; /* important to continue to the next good structure */
1252         }
1253     }
1254     /* } */
1255     return mol_data;
1256 }
1257 /******************* global *********************************************************/
CopyMOLfile(FILE * inp_file,long fPtrStart,long fPtrEnd,FILE * prb_file,long lNumb)1258 int CopyMOLfile(FILE *inp_file, long fPtrStart, long fPtrEnd, FILE *prb_file, long lNumb)
1259 {
1260     char line[MOLFILEINPLINELEN], *p;
1261     long fPtr;
1262     int  ret = 1;
1263     char szNumber[32];
1264 
1265     if ( inp_file && prb_file && fPtrStart >= 0L &&
1266          fPtrEnd > fPtrStart &&
1267          0 == fseek( inp_file, fPtrStart, SEEK_SET ) ) {
1268 
1269         while ( fPtrEnd > (fPtr = ftell(inp_file)) && fPtr >= 0L &&
1270                 inchi_fgetsLf( line, sizeof(line)-1, inp_file ) ) {
1271             line[sizeof(line)-1] = '\0'; /*  unnecessary extra precaution */
1272             if ( fPtr == fPtrStart && lNumb ) {
1273                 int len;
1274                 LtrimRtrim( line, &len );
1275                 len = sprintf( szNumber, "#%ld%s", lNumb, len?"/":"" );
1276                 mystrncpy( line+len, line, sizeof(line)-len-1 );
1277                 memcpy( line, szNumber, len );
1278             }
1279             if ( !strchr(line, '\n') ) {
1280                 p = line+strlen(line);
1281                 p[0] = '\n';
1282                 p[1] = '\0';
1283             }
1284             fputs( line, prb_file );
1285         }
1286         ret = fseek( inp_file, fPtrEnd, SEEK_SET );
1287     }
1288     return ret;
1289 }
1290