1 /*
2  * International Chemical Identifier (InChI)
3  * Version 1
4  * Software version 1.04
5  * September 9, 2011
6  *
7  * The InChI library and programs are free software developed under the
8  * auspices of the International Union of Pure and Applied Chemistry (IUPAC).
9  * Originally developed at NIST. Modifications and additions by IUPAC
10  * and the InChI Trust.
11  *
12  * IUPAC/InChI-Trust Licence for the International Chemical Identifier (InChI)
13  * Software version 1.0.
14  * Copyright (C) IUPAC and InChI Trust Limited
15  *
16  * This library is free software; you can redistribute it and/or modify it under the
17  * terms of the IUPAC/InChI Trust Licence for the International Chemical Identifier
18  * (InChI) Software version 1.0; either version 1.0 of the License, or
19  * (at your option) any later version.
20  *
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
24  * See the IUPAC/InChI Trust Licence for the International Chemical Identifier (InChI)
25  * Software version 1.0 for more details.
26  *
27  * You should have received a copy of the IUPAC/InChI Trust Licence for the
28  * International Chemical Identifier (InChI) Software version 1.0 along with
29  * this library; if not, write to:
30  *
31  * The InChI Trust
32  * c/o FIZ CHEMIE Berlin
33  * Franklinstrasse 11
34  * 10587 Berlin
35  * GERMANY
36  *
37  */
38 
39 
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 
44 #include "mode.h"
45 
46 #include "inpdef.h"
47 #include "extr_ct.h"
48 #include "ichitaut.h"
49 #include "ichinorm.h"
50 #include "ichierr.h"
51 #include "util.h"
52 
53 #include "ichicomp.h"
54 
55 #ifndef COMPILE_ALL_CPP
56 #ifdef __cplusplus
57 extern "C" {
58 #endif
59 #endif
60 
61 /* defined in ichisort.c, prototype in ichicomn.h */
62 int insertions_sort_AT_RANK( AT_RANK *base, int num );
63 
64 #ifndef COMPILE_ALL_CPP
65 #ifdef __cplusplus
66 }
67 #endif
68 #endif
69 
70 /* local prototypes */
71 int cmp_iso_atw_diff_component_no( const void *a1, const void *a2 );
72 
73 /********************************************************************************/
cmp_iso_atw_diff_component_no(const void * a1,const void * a2)74 int cmp_iso_atw_diff_component_no( const void *a1, const void *a2 )
75 {
76     int ret = (int)((const inp_ATOM*)a1)->iso_atw_diff - (int)((const inp_ATOM*)a2)->iso_atw_diff;
77     if ( !ret ) /*  make the sort stable */
78         ret = (int)((const inp_ATOM*)a1)->component - (int)((const inp_ATOM*)a2)->component;
79     return ret;
80 }
81 typedef struct tagTreeAtom {
82     AT_NUMB    neighbor[MAXVAL];        /* positions (from 0) of the neighbors in the inp_ATOM array */
83     S_CHAR     valence;                 /* number of bonds = number of neighbors */
84     AT_NUMB    nRingSystem;
85     AT_NUMB    nBlockSystem;
86     S_CHAR     bCutVertex;
87 } tre_ATOM;
88 
89 #if ( FIND_RING_SYSTEMS == 1 ) /* { */
90 /********************************************************************************/
MarkRingSystemsInp(inp_ATOM * at,int num_atoms,int start)91 int MarkRingSystemsInp( inp_ATOM *at, int num_atoms, int start )
92 {
93     AT_NUMB   *nStackAtom = NULL;
94     int        nTopStackAtom=-1;
95     AT_NUMB   *nRingStack = NULL;
96     int        nTopRingStack=-1; /* was AT_NUMB */
97     AT_NUMB   *nDfsNumber = NULL;
98     AT_NUMB   *nLowNumber = NULL;
99     S_CHAR    *cNeighNumb = NULL;
100     AT_NUMB    nDfs;
101 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
102     AT_NUMB    nRs, *nRsConnect = NULL;
103     int        k;
104     AT_NUMB   *tree = NULL;
105     int        nNumConnect, nMaxNumConnect, nLenConnect;
106 #endif
107     AT_NUMB    nNumAtInRingSystem;
108     int        i, j, u, /*start,*/ nNumRingSystems, nNumStartChildren;
109 
110     /*  allocate arrays */
111     nStackAtom = (AT_NUMB *) inchi_malloc(num_atoms*sizeof(nStackAtom[0]));
112     nRingStack = (AT_NUMB *) inchi_malloc(num_atoms*sizeof(nRingStack[0]));
113     nDfsNumber = (AT_NUMB *) inchi_malloc(num_atoms*sizeof(nDfsNumber[0]));
114     nLowNumber = (AT_NUMB *) inchi_malloc(num_atoms*sizeof(nLowNumber[0]));
115     cNeighNumb = (S_CHAR  *) inchi_malloc(num_atoms*sizeof(cNeighNumb[0]));
116 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
117     nRsConnect = (AT_NUMB *)inchi_calloc(3*num_atoms+3,sizeof(nRsConnect[0]));
118 #endif
119     /*  check allocation */
120     if ( !nStackAtom || !nRingStack || !nDfsNumber || !nLowNumber || !cNeighNumb
121 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
122         || !nRsConnect
123 #endif
124         ) {
125         nNumRingSystems = CT_OUT_OF_RAM;  /*  program error */ /*   <BRKPT> */
126         goto exit_function;
127     }
128 
129     /********************************************
130      *
131      * Find Cut-vertices & Blocks
132      *
133      ********************************************/
134 
135     /*  initiation */
136     /*start           = 0;*/
137     nNumRingSystems = 0;
138     u               = start; /*  start atom */
139     nDfs            = 0;
140     nTopStackAtom   =-1;
141     nTopRingStack   =-1;
142     memset( nDfsNumber, 0, num_atoms*sizeof(nDfsNumber[0]));
143     memset( cNeighNumb, 0, num_atoms*sizeof(cNeighNumb[0]));
144     /*  push the start atom on the stack */
145     nLowNumber[u] = nDfsNumber[u] = ++nDfs;
146     nStackAtom[++nTopStackAtom] = (AT_NUMB)u;
147     nRingStack[++nTopRingStack] = (AT_NUMB)u;
148 
149     nNumStartChildren = 0;
150 
151     do {
152         /* advance */
153 advance_block:
154         /*if ( (int)at[i=nStackAtom[nTopStackAtom]].valence > (j = (int)cNeighNumb[i]) )*/
155         /* replaced due to missing sequence point */
156         if ( i=(int)nStackAtom[nTopStackAtom], j = (int)cNeighNumb[i], (int)at[i].valence > j )
157         {
158             cNeighNumb[i] ++;
159             u = (int)at[i].neighbor[j];
160             if ( !nDfsNumber[u] ) {
161                 /* tree edge, 1st visit -- advance */
162                 nStackAtom[++nTopStackAtom] = (AT_NUMB)u;
163                 nRingStack[++nTopRingStack] = (AT_NUMB)u;
164                 nLowNumber[u] = nDfsNumber[u] = ++nDfs;
165                 nNumStartChildren += (i == start);
166             } else
167             if ( !nTopStackAtom || u != (int)nStackAtom[nTopStackAtom-1] ) { /*  may comment out ? */
168                 /* back edge: u is not a predecessor of i */
169                 if ( nDfsNumber[u] < nDfsNumber[i] ) {
170                     /* Back edge, 1st visit: u is an ancestor of i. Compare */
171                     if ( nLowNumber[i] > nDfsNumber[u] ) {
172                         nLowNumber[i] = nDfsNumber[u];
173                     }
174                 }
175             }                                                                /*  may comment out ? */
176             goto advance_block;
177         } else {
178             cNeighNumb[i] = 0;
179         }
180 
181         /* back up */
182         if ( i != start ) {
183             u = (int)nStackAtom[nTopStackAtom-1]; /* predecessor of i */
184             if ( nLowNumber[i] >= nDfsNumber[u] ) {
185                 /* output the block */
186                 nNumRingSystems ++;
187                 at[u].nBlockSystem = nNumRingSystems;
188                 if ( u != start || nNumStartChildren > 1 ) {
189                     at[u].bCutVertex += 1;
190                 }
191                 while ( nTopRingStack >= 0 ) {
192                     j = nRingStack[nTopRingStack--];
193                     at[j].nBlockSystem = nNumRingSystems; /*  mark the atom */
194                     if ( i == j ) {
195                         break;
196                     }
197                 }
198             } else
199             if ( nLowNumber[u] > nLowNumber[i] ) {
200                 /* inherit */
201                 nLowNumber[u] = nLowNumber[i];
202             }
203         }
204     } while ( --nTopStackAtom >= 0 );
205 
206 
207     /********************************************
208      *
209      * Find Ring Systems
210      * Including chain atoms X: A-X-B, where the bonds (of any kind) are bridges.
211      *
212      ********************************************/
213 
214     /*  initiation */
215     /*start           = 0;*/
216     nNumRingSystems = 0;
217     u               = start; /*  start atom */
218     nDfs            = 0;
219     nTopStackAtom   =-1;
220     nTopRingStack   =-1;
221     memset( nDfsNumber, 0, num_atoms*sizeof(nDfsNumber[0]));
222     memset( cNeighNumb, 0, num_atoms*sizeof(cNeighNumb[0]));
223     /*  push the start atom on the stack */
224     nLowNumber[u] = nDfsNumber[u] = ++nDfs;
225     nStackAtom[++nTopStackAtom] = (AT_NUMB)u;
226     nRingStack[++nTopRingStack] = (AT_NUMB)u;
227 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
228     nNumConnect = nLenConnect = nMaxNumConnect = 0;
229 #endif
230 
231     do {
232         /* advance */
233 advance_ring:
234         /*if ( (int)at[i=nStackAtom[nTopStackAtom]].valence > (j = (int)cNeighNumb[i]) )*/
235         /* replaced due to missing sequence point */
236         if ( i=(int)nStackAtom[nTopStackAtom], j = (int)cNeighNumb[i], (int)at[i].valence > j )
237         {
238             cNeighNumb[i] ++;
239             u = (int)at[i].neighbor[j];
240             if ( !nDfsNumber[u] ) {
241                 /* tree edge, 1st visit -- advance */
242                 nStackAtom[++nTopStackAtom] = (AT_NUMB)u;
243                 nRingStack[++nTopRingStack] = (AT_NUMB)u;
244                 nLowNumber[u] = nDfsNumber[u] = ++nDfs;
245             } else
246             if ( !nTopStackAtom || u != (int)nStackAtom[nTopStackAtom-1] ) {
247                 /* back edge: u is not a predecessor of i */
248                 if ( nDfsNumber[u] < nDfsNumber[i] ) {
249                     /* Back edge, 1st visit: u is ancestor of i. Compare */
250                     if ( nLowNumber[i] > nDfsNumber[u] ) {
251                         nLowNumber[i] = nDfsNumber[u];
252                     }
253                 }
254             }
255             goto advance_ring;
256         } else {
257             cNeighNumb[i] = 0;
258         }
259 
260         /* back up */
261         if ( nDfsNumber[i] == nLowNumber[i] ) {
262             /*  found a ring system */
263             nNumRingSystems ++;
264             /*  unwind nRingStack[] down to i */
265 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
266             nNumConnect = 2;
267             /* data structure: for each ring system nRsConnect[] contains:
268              * 1) nNumConnect+1 = (number of already discovered neighboring "ring systems" + 1)+1
269              * 2) nNumAtInRingSystem
270              * 3) (nNumConnect-1) numbers (IDs) of neighboring ring systems.
271              * BFS guarantees that each neighboring ring system is encountered only one time
272              * Number of all neighboring ring systems = (nNumConnect-1)+1 = nNumConnect
273              * (One additional ring system is where the BFS retracts from the vertex #i,
274              * except when i=DFS root node. In the latter case there is/are only (nNumConnect-1)
275              * neighboring ring system(s).
276              */
277 #endif
278             /*  count atoms in a ring system */
279             for ( nNumAtInRingSystem = 0, j =  nTopRingStack; 0 <= j; j -- ) {
280                 nNumAtInRingSystem ++;
281                 if ( i == (int)nRingStack[j] ) {
282                     break;
283                 }
284             }
285             while ( nTopRingStack >= 0 ) {
286                 j = (int)nRingStack[nTopRingStack--];
287                 at[j].nRingSystem        = (AT_NUMB)nNumRingSystems; /*  ring system id */
288                 at[j].nNumAtInRingSystem = nNumAtInRingSystem;
289 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
290                 for ( k = 0; k < at[j].valence; k ++ ) {
291                     if ( (nRs = at[at[j].neighbor[k]].nRingSystem) && (int)nRs != nNumRingSystems ) {
292                         nRsConnect[nLenConnect + (nNumConnect++)] = nRs; /*  adjacent ring system id */
293                     }
294                 }
295 #endif
296                 if ( i == j ) {
297                     /*  reached atom on the top of nStackAtom[] stack  */
298                     break;
299                 }
300             }
301 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
302             nRsConnect[nLenConnect] = nNumConnect;
303             nRsConnect[nLenConnect+1] = nNumAtInRingSystem;
304             nLenConnect += nNumConnect;
305             if ( nMaxNumConnect < nNumConnect ) {
306                 /*  max number of neighboring ring systems */
307                 nMaxNumConnect = nNumConnect;
308             }
309 #endif
310         } else
311         if ( nTopStackAtom > 0 ) {
312             j = (int)nStackAtom[nTopStackAtom-1];
313             /* inherit nLowNumber */
314             if ( nLowNumber[j] > nLowNumber[i] ) {
315                 nLowNumber[j] = nLowNumber[i];
316             }
317         }
318     } while ( --nTopStackAtom >= 0 );
319 
320 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 ) /*  normally disabled */
321     nMaxNumConnect ++;
322     if ( nNumRingSystems > 1 ) {
323         int nCol      = nMaxNumConnect+1;
324         int nNumInSyst= nMaxNumConnect;
325         int nMaxNeigh = nMaxNumConnect-1;
326 #define T(a,b) tree[(a)*nCol+b]
327         if ( tree = (AT_NUMB *)inchi_calloc( nCol * (nNumRingSystems+1), sizeof(tree[0])) ) {
328             int len, neigh;
329             /*  reuse previous allocations */
330             AT_NUMB *nNumVisitedNeighbors  = nStackAtom;
331             AT_NUMB *nDistanceFromTerminal = nRingStack;
332             AT_NUMB *nCurrActiveRingSystem = nDfsNumber;
333             AT_NUMB *nNextActiveRingSystem = nLowNumber;
334             int        nNumCurrActiveRingSystems, nNumNextActiveRingSystems, pass;
335             /* build a "condensation graph (actually, a tree)" in which
336              * each vertex corresponds to a ring system T(row, col) = T(ring syst, neighbors)
337              * Number of rows = column length = max. number of ring system neighbors + 2
338              * Number of cols = row length    = number of ring systems + 1
339              * Neighboring ring systems are contiguously stored in a row
340              * T(i,0) = number of neighbors,  1 <= i <= nNumRingSystems;
341              * T(i,k) = number of a neighboring ring system, 1 <= k <= T(i,0)
342              * T(i,nCol-1) = number of atoms in the system #i
343              */
344             for ( i = 1, j = 0; len=nRsConnect[j]; i ++ ) {
345                 T(i, nNumInSyst) = nRsConnect[j+1];
346                 for ( k = 2; k < len; k ++ ) {
347                     neigh = nRsConnect[j+k];
348                     if ( T(i,0) < nMaxNeigh && T(neigh,0) < nMaxNeigh ) {
349                         T(i,0) ++;
350                         T(neigh,0) ++;
351                         T(i,T(i,0))         = neigh;
352                         T(neigh,T(neigh,0)) = i;
353                     } else {
354                         nNumRingSystems = CT_OVERFLOW;  /*  program error */ /*   <BRKPT> */
355                         goto exit_function;
356                     }
357                 }
358                 j += len;
359             }
360             /*  clear memory */
361             memset( nNumVisitedNeighbors,  0, nNumRingSystems*sizeof(nNumVisitedNeighbors[0]) );
362             memset( nDistanceFromTerminal, 0, nNumRingSystems*sizeof(nDistanceFromTerminal[0]) );
363             memset( nCurrActiveRingSystem, 0, nNumRingSystems*sizeof(nCurrActiveRingSystem[0]) );
364             memset( nNextActiveRingSystem, 0, nNumRingSystems*sizeof(nNextActiveRingSystem[0]) );
365             nNumNextActiveRingSystems = 0;
366             for ( i = 0; i < nNumRingSystems; i ++ ) {
367                 if ( 1 == T(i+1,0) ) {
368                     nNextActiveRingSystem[i] = 1; /*  number of traversed neighbors + 1 */
369                     nDistanceFromTerminal[i] = 1;
370                     nNumNextActiveRingSystems ++;
371                 } else {
372                     nNextActiveRingSystem[i] = 0;
373                     nDistanceFromTerminal[i] = 0;
374                 }
375                 nNumVisitedNeighbors[i]  = 0;
376             }
377 
378             /* nCurrActiveRingSystem[i] = a sum of:
379              * 1) +1 if it is or was active
380              * 2) +(number of neighbors from which it was reached)
381              * 3) +1 if it was left and not active anymore
382              */
383             pass = 0;
384             do {
385                 nNumCurrActiveRingSystems = nNumNextActiveRingSystems;
386                 nNumNextActiveRingSystems = 0;
387                 memcpy( nCurrActiveRingSystem, nNextActiveRingSystem,
388                         nNumRingSystems*sizeof(nNextActiveRingSystem[0]));
389                 for ( i = 0; i < nNumRingSystems; i ++ ) {
390                     if ( T(i+1,0) == nCurrActiveRingSystem[i] ) {
391                         /* on the previous pass currently active ring system i+1 bas been reached
392                          * from all neighbors except one;
393                          * the neighbors from which it was reached have
394                          * T(neigh,0)+1 == nCurrActiveRingSystem[i]
395                          * this ring system has not been left yet
396                          */
397                         for ( k = 1, len=T(i+1,0); k <= len; k ++ ) {
398                             neigh = (int)T(i+1,k);
399                             if ( T(neigh,0) >= nCurrActiveRingSystem[neigh-1] ) {
400                                 if ( 0 == pass ) {
401                                     nDistanceFromTerminal[i] = 1;
402                                 }
403                                 break;
404                             }
405                         }
406                         if ( k <= len ) {
407                             /* neigh was not reached from at least 2 neighbors
408                              * walk along -R- chain (T(neigh,0)==2) up to
409                              * 1)  a terminal system, not including it or
410                              * 2)  a branching point.
411                              *
412                              * pass = 0: started from terminal systems:
413                              *     reach the branching point.
414                              * If chain system next to a terminal system has already been reached
415                              * then walk along it according to Note below
416                              *
417                              * pass > 0: started from branching points
418                              * 2a) If the branching point has not been reached from 2 or more neighbors,
419                              *     then include it
420                              * 2b) If the branching point has not been reached from 1 neighbor only,
421                              *     then do not include it: it will be a starting point later
422                              * Note: if a chain atom already has nDistanceFromTerminal[i] > 0, then
423                              *     the last atom should be the one such that
424                              *     its nDistanceFromTerminal[]+1>= nDistanceFromTerminal[] of the
425                              *     next in the chain
426                              */
427                             int bOk = 0;
428                             k = i+1; /*  starting point */
429                             if ( 0 == pass && T(k,nNumInSyst) > 1 ) {
430                                 nNumNextActiveRingSystems ++; /*  request next pass */
431                                 continue; /*  stop a the terminal ring system */
432                             }
433                             while( 2 == T(neigh,0) ) {
434                                 /*  walk along a chain */
435                                 if ( !nNextActiveRingSystem[neigh-1] ) {
436                                     nNextActiveRingSystem[neigh-1] = 1; /*  make neighbor active */
437                                 } else
438                                 if ( nDistanceFromTerminal[k-1]+1 <= nDistanceFromTerminal[neigh-1] ) {
439                                     /*  walking along the chain; already have had a walk */
440                                     /*  in the opposite direction at this pass */
441                                 } else {
442                                      /*  k is the last; neigh (it is a bridge -X-) has not been reached */
443                                     bOk = 1;
444                                     break;
445                                 }
446                                 nNextActiveRingSystem[k-1] ++; /*  leave system k */
447                                 if ( nNextActiveRingSystem[neigh-1] < T(neigh,0) ) {
448                                     nNextActiveRingSystem[neigh-1] ++; /*  add one connection to neigh */
449                                 }
450                                 nDistanceFromTerminal[neigh-1] = nDistanceFromTerminal[k-1]+1;
451                                 j = (T(neigh,1)==k)? 2:1;
452                                 k = neigh;
453                                 neigh = T(k,j); /*  next in the chain */
454                                 nNumNextActiveRingSystems ++;
455                                 if ( T(k,nNumInSyst) > 1 ) {
456                                     bOk = 1;
457                                     break; /*  stop on a ring system */
458                                 }
459                             }
460                             /*  neigh is a terminal or a bridge or a branching point */
461                             if ( 2 > T(neigh,0) ) {
462                                 /*  neighbor is a terminal atom */
463                                 if ( 1 < pass ) {
464                                     nNumRingSystems = CT_UNKNOWN_ERR; /*  error (debug only) */ /*   <BRKPT> */
465                                     goto exit_function;
466                                 }
467                                 continue;
468                             }
469                             if ( 2 == T(neigh,0) ) {
470                                 /*  neighbor is a bridge */
471                                 continue;
472                             }
473                             /*  neighbor is a branching point */
474                             if ( T(neigh,0) > nCurrActiveRingSystem[neigh-1] ) {
475                                 /*  move to the neigh (make neigh active): on previous pass it */
476                                 /*  has not been reached from 2 or more neighbors */
477                                 if ( !nNextActiveRingSystem[neigh-1] ) {
478                                     nNextActiveRingSystem[neigh-1] = 1;
479                                 }
480                                 if ( nDistanceFromTerminal[neigh-1] < nDistanceFromTerminal[k-1]+1 ) {
481                                     nDistanceFromTerminal[neigh-1] = nDistanceFromTerminal[k-1]+1;
482                                 }
483                                 nNextActiveRingSystem[k-1] ++; /*  leave system k */
484                                 if ( nNextActiveRingSystem[neigh-1] < T(neigh,0) ) {
485                                     nNextActiveRingSystem[neigh-1] ++; /*  add one connection to neigh */
486                                 }
487                                 nNumNextActiveRingSystems ++;
488                             }
489                         }
490                     }
491                 }
492                 pass ++;
493             } while ( nNumNextActiveRingSystems );
494 
495             for ( i = 0; i < num_atoms; i ++ ) {
496                 at[i].nDistanceFromTerminal = nDistanceFromTerminal[(int)at[i].nRingSystem-1];
497             }
498 
499             inchi_free( tree );
500             tree = NULL;
501 #undef T
502         } else {
503             nNumRingSystems = CT_OUT_OF_RAM; /*  error */ /*   <BRKPT> */
504             goto exit_function;
505         }
506     }
507 #endif
508 
509 
510 exit_function:
511     if ( nStackAtom )
512         inchi_free( nStackAtom );
513     if ( nRingStack )
514         inchi_free( nRingStack );
515     if ( nDfsNumber )
516         inchi_free( nDfsNumber );
517     if ( nLowNumber )
518         inchi_free( nLowNumber );
519     if ( cNeighNumb )
520         inchi_free( cNeighNumb );
521 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
522     if ( nRsConnect )
523         inchi_free( nRsConnect );
524     if ( tree )
525         inchi_free( tree );
526 #endif
527     return nNumRingSystems;
528 }
529 
530 
531 #endif /* } FIND_RING_SYSTEMS */
532 
533 /********************************************************************************/
534 /*  Return value: new number of atoms > 0 or -1=out of RAM */
remove_terminal_HDT(int num_atoms,inp_ATOM * at,int bFixTermHChrg)535 int remove_terminal_HDT( int num_atoms, inp_ATOM *at, int bFixTermHChrg )
536 {
537     AT_NUMB   *new_ord;
538     inp_ATOM  *new_at;
539     char *p;
540     const static char szHDT[]="HDT";
541     const static int  kMax = sizeof(szHDT); /*  = 4 */
542     int ret = -1;
543     int num_hydrogens=0, num_H = 0;  /*  number of terminal H, D, T */
544     int i, j, k, n, m;
545     int val;
546     AT_RANK new_HydrogenAt_order[NUM_H_ISOTOPES+1];
547     AT_RANK new_OtherNeigh_order[MAXVAL];
548     S_CHAR  old_trans[MAX_NUM_STEREO_BONDS];
549 
550     int  num_OtherNeigh, num_HydrogenAt;
551 
552     new_ord=(AT_NUMB *)inchi_calloc(num_atoms, sizeof(new_ord[0])); /* changed malloc to calloc 9-11-2003 */
553     new_at =(inp_ATOM  *) inchi_malloc(sizeof(new_at[0]) *num_atoms);
554     if (!new_ord || !new_at)
555         goto exit_function;
556 
557     /*  move H. D, T to the end of the list of atoms */
558     for ( i = 0; i < num_atoms; i ++ )
559     {
560         at[i].component = i; /*  temporarily save original numbering */
561         /*  get k = temp. hydrogen isotope/non-hydrogen atom type: */
562         /*  k=0:H, k=2:D, k=3:T, k=4=kMax: not a hydrogen */
563         k = at[i].elname[1]? kMax : (p=(char*)strchr(szHDT, at[i].elname[0]))? p-szHDT : kMax;
564         /*  set hydrogen isotope atw differences */
565         /*  Notes: k-value of isotopic H is incremented to correct iso_atw_diff value later. */
566         /*         1H isotope cannot be detected here. */
567         if ( k == ATW_H || k == ATW_H+1 )
568         {
569             /* D or T, k = 1 or 2 */
570             at[i].elname[0]    = 'H'; /*  hydrogen isotope */
571             at[i].iso_atw_diff = ++k; /*  increment k to make k = iso_atw_diff ( 2 for D, 3 for T ) */
572         }
573         num_H += (k != kMax && at[i].valence == 1 && at[i].chem_bonds_valence == 1 && !NUMH(at,i) );
574     }
575 
576     /* special case: HD, HT, DT, HH: the only non-isotopic H or
577      * the lightest isotopic H out of two is removed
578      * to become implicit (make the heavier H the "central atom").
579      * Note: This must be consistent with mol_to_atom()
580      * treatment of isotopic Hn aliases.
581      */
582     if ( 2 == num_H && 2 == num_atoms && !NUMH(at,0) && !NUMH(at,1) )
583     {
584 
585         if ( at[0].iso_atw_diff >= at[1].iso_atw_diff ) {
586             new_ord[0] = 0;
587             new_ord[1] = 1;
588         } else {
589             new_ord[0] = 1;
590             new_ord[1] = 0;
591         }
592         if ( at[new_ord[1]].charge ) {
593             at[new_ord[0]].charge += at[new_ord[1]].charge;
594             at[new_ord[1]].charge = 0;
595         }
596         new_at[new_ord[0]] = at[0];
597         new_at[new_ord[1]] = at[1];
598         num_hydrogens = 1;
599 
600     }
601     else
602     {
603         /* general case except H-H */
604         for ( i = 0; i < num_atoms; i ++ )
605         {
606             k = (at[i].elname[1] || NUMH(at,i))? kMax : (at[i].elname[0]=='H')? at[i].iso_atw_diff : kMax;
607             if ( k < kMax && at[i].valence == 1 && at[i].chem_bonds_valence == 1 &&
608                  /*  the order of comparison is important */
609                  ((n=(int)at[i].neighbor[0]) > i               /* at[n] has not been encountered yet*/ ||
610                   (int)new_ord[n] < num_atoms - num_hydrogens) /* at[n] might have been encountered; it has not been moved */ )
611             {
612                 /*  found an explicit terminal hydrogen */
613                 num_hydrogens ++;
614                 if ( k==0 && ATW_H <= at[i].iso_atw_diff && at[i].iso_atw_diff < ATW_H+NUM_H_ISOTOPES )
615                 {
616                     k = at[i].iso_atw_diff; /*  H isotope has already been marked above or elsewhere */
617                 }
618                 if ( at[i].charge )
619                 {
620                     /*  transfer charge from the hydrogen */
621                     at[n].charge += at[i].charge;
622                     at[i].charge = 0;
623                     if (bFixTermHChrg)
624                     {
625                         /*^^^^^ Fixed bug (July 6, 2008 IPl) :
626                                 if terminal H was charged (not neutralized before call of remove_terminal_HDT)
627                                 and had an ordering number > than that of heavy-atom neighbour, then
628                                 charge on neighbour atom was not adjusted (though charge on H was removed).
629                         ^^^^^ */
630                         if ( i > n )
631                             /* new_at[new_ord[n]] has been created and filled already */
632                             new_at[new_ord[n]].charge = at[n].charge;
633                     }
634                     /*^^^^^ */
635                 }
636                 new_ord[i] = num_atoms - num_hydrogens;  /*  move hydrogens to the end of the list */
637             }
638             else
639             {
640                 /* atom is not an explicit terminal hydrogen */
641                 new_ord[i] = i - num_hydrogens;  /*  adjust non-hydrogens positions */
642             }
643 
644             /*  copy atom to the new position */
645             new_at[new_ord[i]] = at[i];
646 
647         } /* i */
648 
649     } /* general case except H-H */
650 
651     if ( num_hydrogens ) {
652         int num_others = num_atoms-num_hydrogens; /*  atoms which are not terminal H, D, T */
653         if ( num_hydrogens > 1 ) {
654             /*  sort hydrogen isotopes in ascending order, */
655             /*  orig, numbers being the secondary sorting key */
656             qsort( new_at+num_others, num_hydrogens, sizeof(new_at[0]), cmp_iso_atw_diff_component_no );
657         }
658         /*  save new numbering of hydrogen atoms using temporarily saved orig numbering */
659         for ( i = num_others; i < num_atoms; i ++ ) {
660             new_ord[(int)new_at[i].component] = i;
661         }
662 
663         /*  renumber neighbors according to new_ord[] and detach terminal hydrogens */
664         for ( i = 0; i < num_others; i++ ) {
665             memset( new_HydrogenAt_order, 0, sizeof(new_HydrogenAt_order) );
666             memset( new_OtherNeigh_order, 0, sizeof(new_OtherNeigh_order) );
667             num_OtherNeigh = 0;
668             num_HydrogenAt = 0;
669             num_H          = 0;
670 
671             for ( m = 0; m < MAX_NUM_STEREO_BONDS && new_at[i].sb_parity[m]; m ++ ) {
672                 old_trans[m] = 2 - (new_at[i].sn_ord[m] + new_at[i].sb_ord[m] + (new_at[i].sn_ord[m] > new_at[i].sb_ord[m]))%2;
673             }
674 
675             for ( k = j = val = 0; k < new_at[i].valence; k++ ) {
676                 if ( num_others <= ( n = new_ord[new_at[i].neighbor[k]] ) ) {
677                     /*  discovered neighbor = disconnected explicit hydrogen
678                      *  i = new atom new_at[i] ordering number
679                      *  n = new number of the explicit H
680                      *  k = ordering number of the explicit H in new_at[i] adjacency list
681                      */
682                     if ( 0 < new_at[n].iso_atw_diff && new_at[n].iso_atw_diff < ATW_H+NUM_H_ISOTOPES ) {
683                         /* make explicit isotopic H implicit */
684                         new_at[i].num_iso_H[new_at[n].iso_atw_diff-1] ++; /*  isotopic H */
685                         num_HydrogenAt += !new_HydrogenAt_order[new_at[n].iso_atw_diff];
686                         new_HydrogenAt_order[new_at[n].iso_atw_diff] = k+1;
687                     } else {
688                         /* make explicit non-isotopic H implicit */
689                         new_at[i].num_H ++; /*  non-isotopic H */
690                         num_HydrogenAt += !num_H;
691                         num_H ++;
692                         new_HydrogenAt_order[0] = k+1;
693                     }
694                     /*  decrement chem. bonds valence because one bond is removed */
695                     new_at[i].chem_bonds_valence  = inchi_max( 0, new_at[i].chem_bonds_valence-1 );
696                     new_at[n].neighbor[0]         = i; /*  update removed hydrogen neighbor number */
697                     if ( new_at[i].sb_parity[0] ) {
698                         /* if the removed H is an SB neighbor then mark it as removed */
699                         for ( m = 0; m < MAX_NUM_STEREO_BONDS && new_at[i].sb_parity[m]; m ++ ) {
700                             if ( k == (int)new_at[i].sn_ord[m] ) {
701                                 new_at[i].sn_ord[m] = -(new_at[n].iso_atw_diff+1);
702                                 /* means the SB neighbor has been removed; (-4)=H, (-3)=1H, (-2)=D, (-1)=T */
703                             }
704                         }
705                     }
706                 } else {
707                     /* discovered a regular (not an explicit H) neighbor */
708                     if ( new_at[i].sb_parity[0] ) {
709                         if ( num_OtherNeigh < MAX_NUM_STEREO_BONDS ) {
710                             new_OtherNeigh_order[num_OtherNeigh] = k+1;
711                         }
712                         num_OtherNeigh ++; /* increment outside of if() to detect overflow */
713                         if ( val != k ) {
714                             /* store new stereobond and sb-neighbor ordering numbers */
715                             for ( m = 0; m < MAX_NUM_STEREO_BONDS && new_at[i].sb_parity[m]; m ++ ) {
716                                 if ( k == (int)new_at[i].sb_ord[m] )
717                                     new_at[i].sb_ord[m] = val;
718                                 else
719                                 if ( k == (int)new_at[i].sn_ord[m] )
720                                     new_at[i].sn_ord[m] = val;
721                             }
722                         }
723                     }
724                     new_at[i].neighbor[val]       = new_ord[new_at[i].neighbor[k]];
725                     new_at[i].bond_type[val]      = new_at[i].bond_type[k];
726                     new_at[i].bond_stereo[val]    = new_at[i].bond_stereo[k];
727                     val ++;
728                 }
729             }
730             if ( new_at[i].valence > val && new_at[i].sb_parity[0] ) {
731                 if ( num_HydrogenAt == new_at[i].valence - val && num_HydrogenAt + num_OtherNeigh <= MAXVAL ) {
732                     /* recalculate parity so that it would describe neighbor sequence H,1H,D,T,neigh[0],neigh[1]... */
733                     memmove( new_OtherNeigh_order + num_HydrogenAt, new_OtherNeigh_order, num_OtherNeigh*sizeof(new_OtherNeigh_order[0]));
734                     for ( k = 0, j = 1; k <= NUM_H_ISOTOPES; k ++ ) {
735                         if ( new_HydrogenAt_order[k] ) {
736                             new_OtherNeigh_order[num_HydrogenAt - j] = new_HydrogenAt_order[k];
737                             for ( m = 0; m < MAX_NUM_STEREO_BONDS && new_at[i].sb_parity[m]; m ++ ) {
738                                 if ( (int)new_at[i].sn_ord[m] == -(k+1) ) {
739                                     new_at[i].sn_ord[m] = -j;
740                                     /* negative means explicit H isotope ord are
741                                        (contiguously) in front of the adjacency list */
742                                 }
743                             }
744                             j ++;
745                         }
746                     }
747                     /* at this point new_OtherNeigh_order[] contains
748                        incremented old ordering numbers in new order */
749                     k = insertions_sort_AT_RANK( new_OtherNeigh_order, num_HydrogenAt + num_OtherNeigh );
750                     k = k%2; /* seems to be of no use */
751                     /*if ( k ) {*/
752                     /*
753                     for ( m = 0; m < MAX_NUM_STEREO_BONDS && new_at[i].sb_parity[m]; m ++ ) {
754                         if ( PARITY_WELL_DEF(new_at[i].sb_parity[m]) ) {
755                             if ( old_trans[m] != 2 - (4 + new_at[i].sn_ord[m] + new_at[i].sb_ord[m] + (new_at[i].sn_ord[m] > new_at[i].sb_ord[m]))%2 ) {
756                                 new_at[i].sb_parity[m] = 3 - new_at[i].sb_parity[m];
757                             }
758                         }
759                     }
760                     */
761                     /*}*/
762                 }
763 #ifdef _DEBUG
764                 else {
765                     /* error */
766                     int stop = 1;
767                 }
768 #endif
769             }
770             new_at[i].valence = val;
771         }
772         memcpy( at, new_at, sizeof(at[0])*num_atoms );
773         ret = num_others;
774     }  else {
775         ret = num_atoms;
776     }
777 exit_function:
778     if ( new_ord )
779         inchi_free ( new_ord );
780     if ( new_at )
781         inchi_free ( new_at );
782     return ret;
783 }
784 /************************************************************************/
add_DT_to_num_H(int num_atoms,inp_ATOM * at)785 int add_DT_to_num_H( int num_atoms, inp_ATOM *at )
786 /*  assume num_1H, num_D and num_T are not included in num_H */
787 {
788     int i, j;
789     for ( i = 0; i < num_atoms; i ++ ) {
790         for ( j = 0; j < NUM_H_ISOTOPES; j ++ )
791             at[i].num_H += at[i].num_iso_H[j];
792     }
793     return 0;
794 }
795 /***************************************************************/
796 /* not used ---
797 int FixAromaticOxygenAndSulfur( inp_ATOM *atom )
798 {
799     if ( !atom->elname[1] && (atom->elname[0]=='O' || atom->elname[0]=='S') &&
800          atom->valence==2 && !atom->charge && !atom->radical &&
801          atom->bond_type[0] + atom->bond_type[1] == 3 ) {
802         atom->charge = 1;
803         return 1; // fixed
804     }
805     return 0;
806 }
807 */
808 /********************************************************************
809 
810              InChI post-version 1.01 features implementation
811              (v. 1.04 : underivatize is still experimental and for engineering mode)
812 
813  ********************************************************************/
814 #if ( RING2CHAIN == 1 || UNDERIVATIZE == 1 )
815 
816 static U_CHAR el_number_O;
817 static U_CHAR el_number_C;
818 static U_CHAR el_number_N;
819 static U_CHAR el_number_P;
820 static U_CHAR el_number_S;
821 static U_CHAR el_number_Si;
822 static U_CHAR el_number_F;
823 static U_CHAR el_number_Cl;
824 static U_CHAR el_number_Br;
825 static U_CHAR el_number_I;
826 static U_CHAR el_number_B;
827 
828 typedef struct tagAtPair {
829     AT_NUMB at[2];  /* at[0] < at[1] */
830 } R2C_ATPAIR;
831 
832 int DisconnectInpAtBond( inp_ATOM *at, AT_NUMB *nOldCompNumber, int iat, int neigh_ord );
833 int ExtractConnectedComponent(  inp_ATOM *at, int num_at, int component_number, inp_ATOM *component_at );
834 int mark_arom_bonds( inp_ATOM *at, int num_atoms );
835 
836 void set_R2C_el_numbers( void );
837 int UnMarkDisconnectedComponents( ORIG_ATOM_DATA *orig_inp_data );
838 int UnMarkRingSystemsInp( inp_ATOM *at, int num_atoms );
839 int UnMarkOtherIndicators( inp_ATOM *at, int num_atoms );
840 int UnMarkOneComponent( inp_ATOM *at, int num_atoms );
841 int subtract_DT_from_num_H( int num_atoms, inp_ATOM *at );
842 int add_inp_ATOM( inp_ATOM *at, int len_at, int len_cur, inp_ATOM *add, int len_add );
843 int cmp_r2c_atpair( const void *p1, const void *p2 );
844 int has_atom_pair( R2C_ATPAIR *ap, int num_ap, AT_NUMB at1, AT_NUMB at2 );
845 int mark_atoms_ap( inp_ATOM *at, AT_NUMB start, R2C_ATPAIR *ap, int num_ap, int num, AT_NUMB cFlags );
846 
847 /********************************************************************/
set_R2C_el_numbers(void)848 void set_R2C_el_numbers( void )
849 {
850     if ( !el_number_O ) {
851         el_number_O  = (U_CHAR)get_periodic_table_number( "O" );
852         el_number_C  = (U_CHAR)get_periodic_table_number( "C" );
853         el_number_N  = (U_CHAR)get_periodic_table_number( "N" );
854         el_number_P  = (U_CHAR)get_periodic_table_number( "P" );
855         el_number_S  = (U_CHAR)get_periodic_table_number( "S" );
856         el_number_Si = (U_CHAR)get_periodic_table_number( "Si" );
857         el_number_F  = (U_CHAR)get_periodic_table_number( "F" );
858         el_number_Cl = (U_CHAR)get_periodic_table_number( "Cl" );
859         el_number_Br = (U_CHAR)get_periodic_table_number( "Br" );
860         el_number_I  = (U_CHAR)get_periodic_table_number( "I" );
861         el_number_B  = (U_CHAR)get_periodic_table_number( "B" );
862     }
863 }
864 /***************************************************************/
UnMarkDisconnectedComponents(ORIG_ATOM_DATA * orig_inp_data)865 int UnMarkDisconnectedComponents( ORIG_ATOM_DATA *orig_inp_data )
866 {
867     int i;
868     for ( i = 0; i < orig_inp_data->num_inp_atoms; i ++ ) {
869         orig_inp_data->at[i].orig_compt_at_numb = 0;
870         orig_inp_data->at[i].component          = 0;
871     }
872     if ( orig_inp_data->nCurAtLen ) {
873         inchi_free( orig_inp_data->nCurAtLen );
874         orig_inp_data->nCurAtLen = NULL;
875     }
876     if ( orig_inp_data->nOldCompNumber ) {
877         inchi_free( orig_inp_data->nOldCompNumber );
878         orig_inp_data->nOldCompNumber = NULL;
879     }
880     orig_inp_data->num_components = 0;
881     return 0;
882 }
883 /***************************************************************/
UnMarkRingSystemsInp(inp_ATOM * at,int num_atoms)884 int UnMarkRingSystemsInp( inp_ATOM *at, int num_atoms )
885 {
886     int i;
887     for ( i = 0; i < num_atoms; i ++ ) {
888         at[i].bCutVertex         = 0;
889         at[i].nRingSystem        = 0;
890         at[i].nNumAtInRingSystem = 0;
891         at[i].nBlockSystem       = 0;
892     }
893     return 0;
894 }
895 /***************************************************************/
UnMarkOtherIndicators(inp_ATOM * at,int num_atoms)896 int UnMarkOtherIndicators( inp_ATOM *at, int num_atoms )
897 {
898     int i;
899     for ( i = 0; i < num_atoms; i ++ ) {
900         at[i].at_type         = 0;
901         at[i].cFlags          = 0;
902     }
903     return 0;
904 }
905 /***************************************************************/
UnMarkOneComponent(inp_ATOM * at,int num_atoms)906 int UnMarkOneComponent( inp_ATOM *at, int num_atoms )
907 {
908     int i;
909     for ( i = 0; i < num_atoms; i ++ ) {
910         at[i].orig_compt_at_numb = 0;
911         at[i].component          = 0;
912     }
913     return 0;
914 }
915 /***************************************************************/
subtract_DT_from_num_H(int num_atoms,inp_ATOM * at)916 int subtract_DT_from_num_H( int num_atoms, inp_ATOM *at )
917 /*  assume num_1H, num_D and num_T are included in num_H */
918 {
919     int i, j;
920     for ( i = 0; i < num_atoms; i ++ ) {
921         for ( j = 0; j < NUM_H_ISOTOPES; j ++ )
922             at[i].num_H -= at[i].num_iso_H[j];
923     }
924     return 0;
925 }
926 /***************************************************************/
add_inp_ATOM(inp_ATOM * at,int len_at,int len_cur,inp_ATOM * add,int len_add)927 int add_inp_ATOM( inp_ATOM *at, int len_at, int len_cur, inp_ATOM *add, int len_add )
928 {
929     int i, j;
930     inp_ATOM *a;
931     /* chack correctness */
932     if ( len_cur < 0 )
933         return len_cur;
934     if ( len_add < 0 )
935         return len_add;
936     if ( len_cur + len_add > len_at )
937         return -1;
938     /* copy */
939     memcpy( at+len_cur, add, len_add*sizeof(at[0]) );
940     /* modify */
941     if ( len_cur ) {
942         a = at + len_cur;
943         for ( i = 0; i < len_add; i ++ ) {
944             for ( j = 0; j < a[i].valence; j ++ ) {
945                 a[i].neighbor[j] += len_cur;
946             }
947         }
948     }
949     return len_cur + len_add;
950 }
951 /****************************************************************************/
mark_arom_bonds(inp_ATOM * at,int num_atoms)952 int mark_arom_bonds( inp_ATOM *at, int num_atoms )
953 {
954     INCHI_MODE bTautFlags=0, bTautFlagsDone = 0;
955     inp_ATOM *at_fixed_bonds_out = NULL;
956     T_GROUP_INFO *t_group_info   = NULL;
957     int ret;
958     ret = mark_alt_bonds_and_taut_groups ( at, at_fixed_bonds_out, num_atoms,
959                                            t_group_info, &bTautFlags, &bTautFlagsDone );
960     return ret;
961 
962 }
963 
964 /********************************************************************/
cmp_r2c_atpair(const void * p1,const void * p2)965 int cmp_r2c_atpair( const void *p1, const void *p2 )
966 {
967     const R2C_ATPAIR *ap1 = (const R2C_ATPAIR *)p1;
968     const R2C_ATPAIR *ap2 = (const R2C_ATPAIR *)p2;
969     int diff = (int)ap1->at[0] - (int)ap2->at[0];
970     if ( !diff ) {
971         diff = (int)ap1->at[1] - (int)ap2->at[1];
972     }
973     return diff;
974 }
975 /***************************************************************/
has_atom_pair(R2C_ATPAIR * ap,int num_ap,AT_NUMB at1,AT_NUMB at2)976 int has_atom_pair( R2C_ATPAIR *ap, int num_ap, AT_NUMB at1, AT_NUMB at2 )
977 {
978     R2C_ATPAIR ap1;
979     int i1, i2, i3, diff;
980     int n = at1 > at2;
981 
982     ap1.at[n]   = at1;
983     ap1.at[1-n] = at2;
984     i1 = 0;
985     i2 = num_ap-1;
986     /* search for ap1 by simple bisections */
987     do {
988         i3 = (i1 + i2)/2;
989         if ( !(diff = cmp_r2c_atpair(&ap1, ap+i3) ) ) {
990             return i3+1;  /* found => positive number */
991         } else
992         if ( diff > 0 ) {
993             i1 = i3 + 1;
994         } else {
995             i2 = i3 - 1;
996         }
997     }while ( i2 >= i1 );
998     return 0; /* not found */
999 }
1000 
1001 /***************************************************************/
1002 /* DFS search for atoms that do not have a flag */
mark_atoms_ap(inp_ATOM * at,AT_NUMB start,R2C_ATPAIR * ap,int num_ap,int num,AT_NUMB cFlags)1003 int mark_atoms_ap( inp_ATOM *at, AT_NUMB start, R2C_ATPAIR *ap, int num_ap, int num, AT_NUMB cFlags )
1004 {
1005     if ( !at[start].at_type ) {
1006         int i;
1007         AT_NUMB neigh;
1008         at[start].at_type = cFlags;
1009         num ++;
1010         for ( i = 0; i < at[start].valence; i ++ ) {
1011             neigh = at[start].neighbor[i];
1012             if ( has_atom_pair( ap, num_ap, start, neigh ) )
1013                 continue;
1014             num = mark_atoms_ap( at, neigh, ap, num_ap, num, cFlags );
1015         }
1016     }
1017     return num; /* number of atoms traversed forward from at[start] */
1018 }
1019 
1020 #endif /* RING2CHAIN || UNDERIVATIZE */
1021 
1022 #if ( UNDERIVATIZE == 1 )
1023 /***************************************************************/
1024 
1025 #ifdef NEVER
1026 typedef struct tagAtTypeBitmap {
1027     AT_NUMB ord1 : 5; /* up to 2^5-1 = 31 = 0x0037 */
1028     AT_NUMB ord2 : 5;
1029     AT_NUMB type : 6; /* up to 2^6-1 = 63 = 0x0077 */
1030 } AtTypeBitmap;
1031 typedef union tagAtTypeUnion {
1032     AT_NUMB num;
1033     AtTypeBitmap bits;
1034 } AtTypeUnion;
1035 #endif
1036 
1037 #define DERIV_AT_LEN  4
1038 typedef struct tagDerivAttachment {
1039     char typ[DERIV_AT_LEN];
1040     char ord[DERIV_AT_LEN];
1041     char num[DERIV_AT_LEN];
1042 } DERIV_AT;
1043 
1044 
1045 #define DERIV_BRIDGE_O  0x0001   /* R1-O-R2 => R1-OH + HO-R2 */
1046 #define DERIV_BRIDGE_NH 0x0002   /* R1-NH-R2  amine */
1047 #define DERIV_AMINE_tN  0x0004   /* R1-N(-R2)-R3  tertiary amine */
1048 #define DERIV_RING_O    0x0008   /* -O- in a ring */
1049 #define DERIV_RING_NH   0x0010   /* -NH- in a ring */
1050 #define DERIV_UNMARK    0x0040   /* unmark the cut */
1051 #define DERIV_NOT       0x1000   /* cannot be a derivatization agent atom */
1052 
1053 #define DERIV_DUPLIC    0x0080   /* duplicated disconnection */
1054 
1055 #define DERIV_RING     (DERIV_RING_O | DERIV_RING_NH)
1056 
1057 #define MAX_AT_DERIV      12
1058 #define NOT_AT_DERIV      99
1059 #define MIN_AT_LEFT_DERIV 3
1060 #define NO_ORD_VAL        0x0037
1061 
1062 #define CFLAG_MARK_BRANCH      1          /* for main derivative traversal */
1063 #define CFLAG_MARK_BLOCK       2          /* for block detection */
1064 #define CFLAG_MARK_BLOCK_INV   ((char)~(CFLAG_MARK_BLOCK)) /* for block detection */
1065 #define COUNT_ALL_NOT_DERIV    1      /* 1=> count ALL atoms that are not in deriv. agents */
1066                                       /* 0=> only atoms that are not in DERIV_RING */
1067 
1068 int mark_atoms_cFlags( inp_ATOM *at, int start, int num, char cFlags );
1069 int un_mark_atoms_cFlags( inp_ATOM *at, int start, int num, char cFlags, char cInvFlags );
1070 int is_C_or_S_DB_O( inp_ATOM *at, int i );
1071 int is_C_unsat_not_arom( inp_ATOM *at, int i );
1072 int is_C_Alk( inp_ATOM *at, int i, char cFlags );
1073 int is_Si_IV( inp_ATOM *at, int i );
1074 int is_P_TB_N( inp_ATOM *at, int i );
1075 int is_possibly_deriv_neigh( inp_ATOM *at, int iat, int iord, int type, char cFlags );
1076 int get_traversed_deriv_type( inp_ATOM *at, DERIV_AT *da, int k, DERIV_AT *da1, char cFlags );
1077 int add_to_da( DERIV_AT *da, DERIV_AT *add );
1078 int mark_atoms_deriv( inp_ATOM *at, DERIV_AT *da, int start, int num, char cFlags, int *pbFound );
1079 int count_one_bond_atoms( inp_ATOM *at, DERIV_AT *da, int start, int ord, char cFlags, int *bFound );
1080 int is_silyl( inp_ATOM *at, int start, int ord_prev );
1081 int is_Me_or_Et( inp_ATOM *at, int start, int ord_prev );
1082 int is_CF3_or_linC3F7( inp_ATOM *at, int start, int ord_prev );
1083 int is_phenyl( inp_ATOM *at, int start, int ord_prev );
1084 int is_deriv_ring( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int idrv );
1085 int is_deriv_chain( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int idrv );
1086 int is_deriv_chain_or_ring( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int *idrv );
1087 int remove_deriv( DERIV_AT *da1, int idrv );
1088 int remove_deriv_mark( DERIV_AT *da1, int idrv );
1089 int EliminateDerivNotInList( inp_ATOM *at, DERIV_AT *da, int num_atoms );
1090 int make_single_cut( inp_ATOM *at, DERIV_AT *da, int iat, int icut );
1091 
1092 /***************************************************************/
1093 /* DFS search for atoms that do not have a flag */
mark_atoms_cFlags(inp_ATOM * at,int start,int num,char cFlags)1094 int mark_atoms_cFlags( inp_ATOM *at, int start, int num, char cFlags )
1095 {
1096     if ( !(at[start].cFlags & cFlags) ) {
1097         int i;
1098         at[start].cFlags |= cFlags;
1099         num ++;
1100         for ( i = 0; i < at[start].valence; i ++ ) {
1101             num = mark_atoms_cFlags( at, at[start].neighbor[i], num, cFlags );
1102         }
1103     }
1104     return num; /* number of atoms traversed forward from at[start] */
1105 }
1106 /***************************************************************/
1107 /* DFS search for atoms that do have a flag */
un_mark_atoms_cFlags(inp_ATOM * at,int start,int num,char cFlags,char cInvFlags)1108 int un_mark_atoms_cFlags( inp_ATOM *at, int start, int num, char cFlags, char cInvFlags )
1109 {
1110     if ( at[start].cFlags & cFlags ) {
1111         int i;
1112         at[start].cFlags &= cInvFlags;
1113         num ++;
1114         for ( i = 0; i < at[start].valence; i ++ ) {
1115             num = un_mark_atoms_cFlags( at, at[start].neighbor[i], num, cFlags, cInvFlags );
1116         }
1117     }
1118     return num; /* number of atoms traversed forward from at[start] */
1119 }
1120 /***************************************************************/
is_C_or_S_DB_O(inp_ATOM * at,int i)1121 int is_C_or_S_DB_O( inp_ATOM *at, int i )
1122 {
1123     int j, neigh;
1124     if ( at[i].el_number != el_number_C &&
1125          at[i].el_number != el_number_S ||
1126          at[i].charge || at[i].radical )
1127         return 0;
1128     for ( j = 0; j < at[i].valence; j ++ ) {
1129         neigh = at[i].neighbor[j];
1130         if ( (at[neigh].el_number == el_number_O ||
1131               at[neigh].el_number == el_number_S ) &&
1132               !at[neigh].num_H && 1 == at[neigh].valence &&
1133               2 == at[neigh].chem_bonds_valence ) {
1134             return 1;
1135         }
1136     }
1137     return 0;
1138 }
1139 /***************************************************************/
is_C_unsat_not_arom(inp_ATOM * at,int i)1140 int is_C_unsat_not_arom( inp_ATOM *at, int i )
1141 {
1142     int j, neigh, num_arom, num_DB;
1143     if ( at[i].el_number != el_number_C ||
1144          at[i].valence   == at[i].chem_bonds_valence ||
1145          at[i].valence+1 < at[i].chem_bonds_valence  ||
1146          at[i].chem_bonds_valence + at[i].num_H != 4 ||
1147          at[i].charge || at[i].radical )
1148         return 0;
1149     num_arom = num_DB = 0;
1150     for ( j = 0; j < at[i].valence; j ++ ) {
1151         neigh = at[i].neighbor[j];
1152         num_arom += at[i].bond_type[j] == BOND_TYPE_ALTERN;
1153         if ( (at[neigh].el_number == el_number_O ||
1154               at[neigh].el_number == el_number_S ) &&
1155               !at[neigh].num_H && 1 == at[neigh].valence &&
1156               2 == at[neigh].chem_bonds_valence ) {
1157             continue;
1158         }
1159         num_DB += at[i].bond_type[j] == BOND_TYPE_DOUBLE;
1160     }
1161     return num_DB && !num_arom;
1162 }
1163 /***************************************************************/
is_C_Alk(inp_ATOM * at,int i,char cFlags)1164 int is_C_Alk( inp_ATOM *at, int i, char cFlags )
1165 {
1166     if ( at[i].el_number == el_number_C &&
1167          at[i].valence == at[i].chem_bonds_valence ) {
1168         int j, k;
1169         U_CHAR el;
1170         for ( j = 0; j < at[i].valence; j ++ ) {
1171             k = at[i].neighbor[j];
1172             if ( at[k].cFlags & cFlags )
1173                 continue;
1174             el = at[k].el_number;
1175             if ( el != el_number_C &&
1176                  el != el_number_F &&
1177                  el != el_number_Cl &&
1178                  el != el_number_Br &&
1179                  el != el_number_I ) {
1180                 return 0;
1181             }
1182         }
1183         return 1;
1184     }
1185     return 0;
1186 }
1187 /***************************************************************/
is_Si_IV(inp_ATOM * at,int i)1188 int is_Si_IV( inp_ATOM *at, int i )
1189 {
1190     if ( at[i].el_number != el_number_Si ||
1191          at[i].charge || at[i].radical || at[i].valence != 4 || at[i].chem_bonds_valence != 4 )
1192         return 0;
1193     return 1;
1194 }
1195 /***************************************************************/
is_P_TB_N(inp_ATOM * at,int i)1196 int is_P_TB_N( inp_ATOM *at, int i )
1197 {
1198     int j, k;
1199     if ( at[i].el_number != el_number_P || at[i].chem_bonds_valence - at[i].valence != 2 )
1200         return 0;
1201     for ( j = 0; j < at[i].valence; j ++ ) {
1202         k = at[i].neighbor[j];
1203         if ( at[k].el_number == el_number_N && at[k].valence == 1 && at[k].chem_bonds_valence == 3 )
1204             return 1;
1205     }
1206     return 0;
1207 }
1208 /***************************************************************/
is_possibly_deriv_neigh(inp_ATOM * at,int iat,int iord,int type,char cFlags)1209 int is_possibly_deriv_neigh( inp_ATOM *at, int iat, int iord, int type, char cFlags )
1210 {
1211     int neigh = at[iat].neighbor[iord];
1212     int neigh_from = -1;
1213     U_CHAR el = at[neigh].el_number;
1214     int    bOk = 0;
1215     switch ( type ) {
1216     case DERIV_BRIDGE_O:
1217         neigh_from = at[iat].neighbor[!iord];
1218         /* -> A--O--B -> traversing from A(neigh_from) to B(neigh); may we cut O--B bond? */
1219         /* do not cut bond "---" in A=Si(IV), B(=O), B=C: Si(IV)-O---B(=O) */
1220         if ( !(is_C_or_S_DB_O( at, neigh ) && is_Si_IV( at, neigh_from )) &&
1221              !is_C_unsat_not_arom( at, neigh ) ) {
1222             bOk = ( el == el_number_C ||
1223                     el == el_number_Si ||
1224                     el == el_number_S ||
1225                     el == el_number_P );
1226         }
1227         break;
1228     case DERIV_BRIDGE_NH:
1229         /* -> A--NH--B -> traversing from A(neigh_from) to B(neigh); may we cut NH--B bond? */
1230         bOk = ( is_C_or_S_DB_O( at, neigh ) ||
1231                 is_C_Alk( at, neigh, cFlags ) ||
1232                 is_Si_IV( at, neigh ) ||
1233                 is_P_TB_N( at, neigh ) ) && !(is_C_unsat_not_arom( at, neigh ));
1234         break;
1235     case DERIV_AMINE_tN:
1236         bOk = ( is_C_or_S_DB_O( at, neigh ) ||
1237                 is_C_Alk( at, neigh, cFlags ) ||
1238                 is_Si_IV( at, neigh ) ||
1239                 is_P_TB_N( at, neigh ) ) && !(is_C_unsat_not_arom( at, neigh ));
1240         break;
1241     }
1242     return bOk;
1243 }
1244 /***************************************************************/
1245 /* determines derivative type on the forward step of the DFS   */
1246 /***************************************************************/
get_traversed_deriv_type(inp_ATOM * at,DERIV_AT * da,int k,DERIV_AT * da1,char cFlags)1247 int get_traversed_deriv_type( inp_ATOM *at, DERIV_AT *da, int k, DERIV_AT *da1, char cFlags )
1248 {
1249     int i, j, m, nBlockSystemFrom, nOrdBack1, nOrdBack2, nOrdBack3, nBackType1, nBackType2;
1250     memset( da1, 0, sizeof(*da1) );
1251     if ( at[k].cFlags & cFlags ) {
1252         return 0;
1253     }
1254     for ( m = 0; m < at[k].valence && !(at[(int)at[k].neighbor[m]].cFlags & cFlags); m ++ )
1255         ;
1256     if ( m == at[k].valence )
1257         return -1; /* error: at least one neighbor must have cFlags */
1258     if ( at[k].valence == 1 && at[k].num_H && (
1259            at[k].el_number == el_number_O ||
1260            at[k].el_number == el_number_N ||
1261            at[k].el_number == el_number_S ||
1262            at[k].el_number == el_number_P ) ) {
1263         return DERIV_NOT;
1264     }
1265     if ( is_el_a_metal( at[k].el_number ) ) {
1266         return DERIV_NOT;
1267     }
1268 #ifdef NEVER
1269     if ( at[k].el_number == el_number_N && at[k].valence >= 2 && at[k].chem_bonds_valence <= 3 ) {
1270         return DERIV_NOT; /* prohibit -N-, -N=, allow -N# as in isocyano -N#C or NO2 */
1271     }
1272 #endif
1273     /* m is the ord of the bond from which at[k] was reached first time */
1274     if ( at[k].nNumAtInRingSystem == 1 && (at[k].el_number == el_number_O || at[k].el_number == el_number_S) &&
1275          at[k].valence == 2 && at[k].chem_bonds_valence == 2 &&
1276          !at[k].num_H && !at[k].charge && !at[k].radical) {
1277         /* -> A--O--B -> traversing from A to B; cut O--B */
1278         /* check for carboxy A(=O)-O-B and A--O--B(=O) */
1279         /* int has_A_CO   = is_C_or_S_DB_O( at, at[k].neighbor[m] ); */
1280         int has_B_CO   = is_C_or_S_DB_O( at, at[k].neighbor[!m] );
1281         int is_A_Si_IV = is_Si_IV( at, at[k].neighbor[m] );
1282         /* int is_B_Si_IV = is_Si_IV( at, at[k].neighbor[!m] );*/
1283         if ( is_A_Si_IV && has_B_CO ) {
1284             ; /* do not cut bond --- in A=Si(IV), B(=O), B=C: Si(IV)-O---B(=O) */
1285         } else
1286         if ( is_possibly_deriv_neigh( at, k, !m, DERIV_BRIDGE_O, cFlags ) ) {
1287             da1->ord[0] =  !m;         /* ord of neighbor B, not traversed yet */
1288             da1->typ[0] =  DERIV_BRIDGE_O;   /* type */
1289             return DERIV_BRIDGE_O;   /* R-O-R */
1290         }
1291     }
1292     if ( at[k].bCutVertex && at[k].el_number == el_number_N &&
1293          at[k].valence == 2 && at[k].chem_bonds_valence == at[k].valence &&
1294          at[k].valence+at[k].num_H == 3 && !at[k].charge && !at[k].radical) {
1295         da1->ord[0] =  !m;         /* ord of neighbor B, not traversed yet */
1296         da1->typ[0] =  DERIV_BRIDGE_NH;   /* type */
1297         return DERIV_BRIDGE_NH;   /* R1-N(-R2)-R3 or R1-NH-R2  amine */
1298     }
1299     if ( at[k].bCutVertex && at[k].el_number == el_number_N &&
1300          at[k].valence == 3 && at[k].chem_bonds_valence == at[k].valence &&
1301          at[k].valence+at[k].num_H == 3 && !at[k].charge && !at[k].radical) {
1302         int rm1 = ( at[at[k].neighbor[m]].nRingSystem == at[at[k].neighbor[(m+1)%3]].nRingSystem );
1303         int rm2 = ( at[at[k].neighbor[m]].nRingSystem == at[at[k].neighbor[(m+2)%3]].nRingSystem );
1304         int r12 = ( at[at[k].neighbor[(m+1)%3]].nRingSystem == at[at[k].neighbor[(m+2)%3]].nRingSystem );
1305         int ord[2]= {-1, -1};
1306         i = 0; /* type: tertriary amine */
1307         switch( rm1 + rm2 + r12 ) {
1308         case 0:
1309             /* -N< has no ring bonds */
1310             if ( is_possibly_deriv_neigh( at, k, (m+1)%3, DERIV_AMINE_tN, cFlags ) ) {
1311                 ord[i ++] = (m+1)%3; /* ord of a non-ring neighbor, not traversed yet */
1312             }
1313             if ( is_possibly_deriv_neigh( at, k, (m+2)%3, DERIV_AMINE_tN, cFlags ) ) {
1314                 ord[i ++] = (m+2)%3; /* ord of another non-ring neighbor, not traversed yet */
1315             }
1316             if ( i == 2 && ord[0] > ord[1] ) {
1317                 int tmp = ord[0];
1318                 ord[0] = ord[1];
1319                 ord[1] = tmp;
1320             }
1321             break;
1322 
1323         case 1:
1324             if ( rm1 && is_possibly_deriv_neigh( at, k, (m+2)%3, DERIV_AMINE_tN, cFlags ) ) {
1325                 ord[i++] = (m+2)%3;   /* ord of a single non-ring neighbor, not traversed yet */
1326             } else
1327             if ( rm2 && is_possibly_deriv_neigh( at, k, (m+1)%3, DERIV_AMINE_tN, cFlags ) ) {
1328                 ord[i++] = (m+1)%3; /* ord of a single non-ring neighbor, not traversed yet */
1329             }
1330         }
1331         for ( j = 0; j < i; j ++ ) {
1332             da1->ord[j] = ord[j];
1333             da1->typ[j] = DERIV_AMINE_tN;
1334         }
1335         if ( i ) {
1336             return DERIV_AMINE_tN;
1337         }
1338         return 0; /* all neighbors or two untraversed edges are in one ring system */
1339     }
1340     if ( at[k].bCutVertex && /* DD */
1341          at[k].valence == at[k].chem_bonds_valence &&
1342          (!at[k].num_H || at[k].el_number == el_number_C && 1 == at[k].num_H) &&
1343          !at[k].charge && !at[k].radical &&
1344          (at[k].el_number == el_number_C  && at[k].valence+at[k].num_H == 4 ||
1345           at[k].el_number == el_number_Si && at[k].valence == 4 ||
1346           at[k].el_number == el_number_B  && at[k].valence == 3) ) {
1347 
1348         /*                  entering path: ->X--O--DD
1349             --X--O
1350               |    \   /    DD = C, Si, B
1351               |     DD
1352               |    /   \     O = O, NH
1353             --Y--O
1354                             X, Y -- ignored for now
1355          */
1356         nBlockSystemFrom = 0;
1357         nBackType1 = nBackType2 = 0;
1358         nOrdBack1 = nOrdBack2 = nOrdBack3 = -1;
1359         j=(int)at[k].neighbor[m];
1360         if ( (at[j].el_number == el_number_O || at[j].el_number == el_number_S) && at[j].valence == 2 &&
1361              at[j].chem_bonds_valence == at[j].valence &&
1362              at[j].nNumAtInRingSystem >= 5 &&
1363              !at[j].num_H && !at[j].charge && !at[j].radical ) {
1364             nBackType1 = DERIV_RING_O;
1365             nBlockSystemFrom = at[j].nBlockSystem;
1366             nOrdBack1 = m; /* predecessor */
1367         } else
1368         if ( at[j].el_number == el_number_N && at[j].valence == 2 &&
1369              at[j].chem_bonds_valence == at[j].valence &&
1370              at[j].nNumAtInRingSystem >= 5 &&
1371              1==at[j].num_H && !at[j].charge && !at[j].radical ) {
1372             nBackType1 = DERIV_RING_NH;
1373             nBlockSystemFrom = at[j].nBlockSystem;
1374             nOrdBack1 = m; /* predecessor */
1375         }
1376         if ( nBlockSystemFrom ) {
1377             int num1, num2, bFound;
1378             at[k].cFlags |= CFLAG_MARK_BLOCK;
1379             num1 = mark_atoms_cFlags( at, at[k].neighbor[nOrdBack1], 1, CFLAG_MARK_BLOCK );
1380             for ( i = 0; i < at[k].valence; i ++ ) {
1381                 if ( i == nOrdBack1 )
1382                     continue;
1383                 j=(int)at[k].neighbor[i];
1384                 bFound = 0;
1385                 if ( at[j].cFlags & CFLAG_MARK_BLOCK ) {
1386                     if ( (at[j].el_number == el_number_O || at[j].el_number == el_number_S) && at[j].valence == 2 &&
1387                          at[j].chem_bonds_valence == at[j].valence &&
1388                          at[j].nNumAtInRingSystem >= 5 &&
1389                          !at[j].num_H && !at[j].charge && !at[j].radical ) {
1390                        bFound = 1;
1391                        if ( nOrdBack2 < 0 ) {
1392                             nOrdBack2 = i; /* predecessor #2 */
1393                             nBackType2 = DERIV_RING_O;
1394                         } else {
1395                             nOrdBack3 = i; /* predecessor #3 -- should not happen */
1396                         }
1397                     }
1398                     if ( at[j].el_number == el_number_N && at[j].valence == 2 &&
1399                          at[j].chem_bonds_valence == at[j].valence &&
1400                          at[j].nNumAtInRingSystem >= 5 &&
1401                          1==at[j].num_H && !at[j].charge && !at[j].radical ) {
1402                         bFound = 1;
1403                         if ( nOrdBack2 < 0 ) {
1404                             nOrdBack2 = i; /* predecessor #2 */
1405                             nBackType2 = DERIV_RING_NH;
1406                         } else {
1407                             nOrdBack3 = i; /* predecessor #3 -- should not happen */
1408                         }
1409                     }
1410                     if ( !bFound ) {
1411                         nOrdBack3 = 99; /* reject: wrong neighboring atom in the same block */
1412                         break;
1413                     }
1414                 }
1415             }
1416             num2 = un_mark_atoms_cFlags( at, k, 0, CFLAG_MARK_BLOCK, CFLAG_MARK_BLOCK_INV );
1417             if ( num1 != num2 ) {
1418                 return -1; /* mark_atoms_cFlags() program error */
1419             }
1420             if ( nOrdBack2 >= 0 && nOrdBack3 < 0 ) {
1421                 if ( nOrdBack1 < nOrdBack2 ) {
1422                     da1->ord[0] = nOrdBack1;  /* ord of a ring neighbor, traversed */
1423                     da1->typ[0] = nBackType1;
1424                     da1->ord[1] = nOrdBack2;  /* ord of another ring neighbor, not traversed yet */
1425                     da1->typ[1] = nBackType2;
1426                 } else {
1427                     da1->ord[0] = nOrdBack2;  /* ord of a ring neighbor, traversed */
1428                     da1->typ[0] = nBackType2;
1429                     da1->ord[1] = nOrdBack1;  /* ord of another ring neighbor, not traversed yet */
1430                     da1->typ[1] = nBackType1;
1431                 }
1432                 return nBackType1 | nBackType2;
1433             }
1434         }
1435     }
1436     return 0;
1437 }
1438 /***************************************************************/
add_to_da(DERIV_AT * da,DERIV_AT * add)1439 int add_to_da( DERIV_AT *da, DERIV_AT *add )
1440 {
1441     /* if add has more than 1 element the elements are in ascending add.ord[] order */
1442     int i, j, len;
1443     for ( len = 0; len < DERIV_AT_LEN && da->typ[len]; len ++ )
1444         ;
1445     for ( j = 0; j < DERIV_AT_LEN && add->typ[j]; j ++ ) {
1446         for ( i = 0; i < len; i ++ ) {
1447             if ( add->ord[j] == da->ord[i] ) {
1448                 if ( add->typ[j] != da->typ[i] ) {
1449                     return -1; /* error, should not happen */
1450                 }
1451                 if ( add->num[j] != da->num[i] ) {
1452                     return -2; /* error, should not happen */
1453                 }
1454                 break;
1455             }
1456         }
1457         if ( i == len ) {
1458             if ( len < DERIV_AT_LEN ) {
1459                 da->ord[i] = add->ord[j];
1460                 da->typ[i] = add->typ[j];
1461                 da->num[i] = add->num[j];
1462                 len ++;
1463             } else {
1464                 return -3; /* overflow, should not happen */
1465             }
1466         }
1467     }
1468     return 0;
1469 }
1470 /***************************************************************/
1471 /* DFS search for atoms that do not have a flag */
mark_atoms_deriv(inp_ATOM * at,DERIV_AT * da,int start,int num,char cFlags,int * pbFound)1472 int mark_atoms_deriv( inp_ATOM *at, DERIV_AT *da, int start, int num, char cFlags, int *pbFound )
1473 {
1474     int i, nFound=0;
1475     DERIV_AT da1;
1476     if ( !(at[start].cFlags & cFlags) ) {
1477         if ( DERIV_NOT == get_traversed_deriv_type( at, da, start, &da1, cFlags ) ) {
1478             nFound ++; /* at[start] cannot belong to a derivatizing agent */
1479         }
1480         num ++;
1481         at[start].cFlags |= cFlags;
1482         if ( da1.typ[0] ) {
1483             /* possibly a derivatization agent attachment point. */
1484             /* check neighbors that have not been traversed yet */
1485             int n1=0, n2=0, i1=-1, i2=-1, nFound1=0, nFound2=0;
1486             switch( da1.typ[0] ) {
1487             case DERIV_BRIDGE_O:
1488             case DERIV_BRIDGE_NH:
1489                 n1 = mark_atoms_deriv( at, da, at[start].neighbor[(int)da1.ord[0]], 0, cFlags, &nFound1 );
1490                 if ( n1 > MAX_AT_DERIV || nFound1 ) {
1491                     da1.typ[0] = 0;
1492                 } else {
1493                     da1.num[0] = n1;
1494                     nFound ++;
1495                 }
1496                 break;
1497             case DERIV_AMINE_tN:
1498                 n1 = mark_atoms_deriv( at, da, at[start].neighbor[i1=da1.ord[0]], 0, cFlags, &nFound1 );
1499                 if ( da1.typ[1] ) {
1500                     n2 = mark_atoms_deriv( at, da, at[start].neighbor[i2=da1.ord[1]], 0, cFlags, &nFound2 );
1501                 }
1502                 if ( 0 < n1 && n1 <= MAX_AT_DERIV && !nFound1 ) {
1503                     da1.num[0] = n1;
1504                     i = 1;
1505                     nFound ++;
1506                 } else {
1507                     da1.ord[0] = da1.ord[1];
1508                     da1.num[0] = da1.num[1];
1509                     da1.typ[0] = da1.typ[1];
1510                     da1.typ[1] = 0;
1511                     i = 0;
1512                 }
1513                 if ( 0 < n2 && n2 <= MAX_AT_DERIV && !nFound2 ) {
1514                     da1.num[i] = n2;
1515                     nFound ++;
1516                 } else {
1517                     da1.typ[i] = 0;
1518                 }
1519                 break;
1520             case DERIV_RING_O:
1521             case DERIV_RING_NH:
1522                 for ( i = 0; i < at[start].valence; i ++ ) {
1523                     if ( i != da1.ord[0] && i != da1.ord[1] && !(at[at[start].neighbor[i]].cFlags & cFlags) ) {
1524                         if ( !n1 )
1525                             n1 = mark_atoms_deriv( at, da, at[start].neighbor[i1=i], 0, cFlags, &nFound1 );
1526                         else
1527                             n2 = mark_atoms_deriv( at, da, at[start].neighbor[i2=i], 0, cFlags, &nFound2 );
1528                     }
1529                 }
1530                 if ( n1+n2+1 > MAX_AT_DERIV || nFound1 || nFound2 ) {
1531                     da1.typ[1] = da1.typ[0] = 0;
1532                 } else {
1533                     da1.num[0] = n1;
1534                     da1.num[1] = n2;
1535                     nFound ++;
1536                 }
1537                 break;
1538             }
1539             if ( n1 < 0 )
1540                 return n1;
1541             if ( n2 < 0 )
1542                 return n2; /* errors */
1543 
1544             if ( i = add_to_da( da+start, &da1 ) ) {
1545                 return i;  /* error */
1546             }
1547 
1548             *pbFound += nFound1 + nFound2 + nFound;
1549             num      += n1 + n2;
1550         } else {
1551             *pbFound += nFound;
1552         }
1553         for ( i = 0; i < at[start].valence; i ++ ) {
1554             num = mark_atoms_deriv( at, da, at[start].neighbor[i], num, cFlags, pbFound );
1555             if ( num < 0 )
1556                 return num;
1557         }
1558     }
1559     /* *pbFound =  number of derivatizer attachment points traversed forward from at[start] */
1560     return num; /* number of atoms traversed forward from at[start] */
1561 }
1562 /***************************************************************/
count_one_bond_atoms(inp_ATOM * at,DERIV_AT * da,int start,int ord,char cFlags,int * bFound)1563 int count_one_bond_atoms( inp_ATOM *at, DERIV_AT *da, int start, int ord, char cFlags, int *bFound )
1564 {
1565     int num = 0;
1566     if ( !(at[at[start].neighbor[ord]].cFlags & cFlags) ) {
1567         at[at[start].neighbor[ord]].cFlags |= cFlags;
1568         num ++;
1569         num = mark_atoms_deriv( at, da, start, num, cFlags, bFound );
1570     }
1571     return num;
1572 }
1573 /***************************************************************
1574   List of allowed derivatives
1575 
1576   Legend:
1577 
1578   ->- marks the bond to be disconnexted: X->-Y => XD + TY
1579       where TY is a derivatizing agent eventually to be discarded
1580 
1581   Allowed Derivative Types List
1582   =============================
1583 
1584   DERIV_BRIDGE_O, DERIV_BRIDGE_NH, DERIV_AMINE_tN
1585   -----------------------------------------------
1586      CH3                CH3  CH3           CH3   CH3
1587      |                  |    |             |     |
1588  X->-Si--CH3        X->-Si---Si--CH3   X->-Si----C--CH3  X= O, NH, N
1589      |                  |    |             |     |
1590      CH3                CH3  CH3           CH3   CH3
1591 
1592      4 atoms           7 atoms             7 atoms        is_silyl()
1593 
1594 
1595 
1596 
1597      O                 O                     O      F         O
1598      ||                ||                    ||     |         ||
1599   R--C--O->-CH3     R--C--O->-CH2--CH3    R--C--O->-C--F   R--C--O->-CF2-CF2-CF3
1600                                                     |
1601                                                     F
1602 
1603 
1604 
1605            1 atom             2 atoms            3 atoms          10 atoms
1606            is_Me_or_Et()      is_Me_or_Et()         is_CF3_or_linC3F7()
1607 
1608 
1609  A. DERIV_BRIDGE_NH, DERIV_AMINE_tN
1610  -----------------------------------
1611 
1612 
1613      O                 O                     O   F             O
1614      ||                ||                    ||  |             ||
1615  N->-C--CH3        N->-C--CH2--CH3       N->-C---C--F      N->-C--CF2-CF2-CF3
1616                                                  |
1617                                                  F
1618 
1619 
1620 
1621     3 atoms           5 atoms              8 atoms                12 atoms
1622     is_Me_or_Et()     is_Me_or_Et()              is_CF3_or_linC3F7()
1623 
1624  DERIV_RING_O (da contains >B- or >C< or >CH- atom)
1625  ------------
1626 
1627   C----O               R----O                  R----O
1628   |     \              |     \     CH3         |     \
1629   |      >             |      >   /            |      >
1630   |       \            |       \ /             |       \
1631   |        B--CH3      |        C              |        CH--Ph
1632   |       /            |       / \             |       /
1633   |      >             |      >   \            |      >
1634   |     /              |     /     CH3         |     /
1635   C----O               R----O                  R----O
1636 
1637   5-member             5 or 6-member           5 or 6-member
1638 
1639 
1640            2 atoms              3 atoms                 7 atoms
1641 
1642  DERIV_RING_NH
1643  ------------
1644 
1645  None in the list
1646 
1647 ***************************************************************/
is_silyl(inp_ATOM * at,int start,int ord_prev)1648 int is_silyl( inp_ATOM *at, int start, int ord_prev )
1649 {
1650     int i, neigh, num_Me=0, iC_IV=-1, iSi_IV=-1, i_C_or_Si_IV;
1651 
1652     if ( at[start].el_number != el_number_Si || at[start].valence != 4 ||
1653          at[start].valence != at[start].chem_bonds_valence ||
1654          at[start].charge ||  at[start].radical )
1655         return 0;
1656     for ( i = 0; i < at[start].valence; i ++ ) {
1657         if ( i == ord_prev )
1658             continue;
1659         neigh = at[start].neighbor[i];
1660         if ( at[neigh].charge || at[neigh].radical ||
1661              at[neigh].valence != at[neigh].chem_bonds_valence)
1662             return 0;
1663         if ( at[neigh].valence == 4 ) {
1664             if ( at[neigh].el_number == el_number_C && iC_IV < 0 && iSi_IV < 0 )
1665                 iC_IV = neigh;
1666             else
1667             if ( at[neigh].el_number == el_number_Si && iC_IV < 0 && iSi_IV < 0 )
1668                 iSi_IV = neigh;
1669             else
1670                 return 0;
1671         } else
1672         if ( at[neigh].valence == 1 &&
1673              at[neigh].valence == at[neigh].chem_bonds_valence &&
1674              at[neigh].el_number == el_number_C && at[neigh].num_H == 3 ) {
1675             num_Me ++;
1676         } else {
1677             return 0;
1678         }
1679     }
1680     if ( num_Me == 3 && iC_IV < 0 && iSi_IV < 0 )
1681         return 1; /* Si(CH3)3 */
1682 
1683     /* next C(IV) or Si(IV) */
1684     i_C_or_Si_IV = iC_IV >= 0? iC_IV : iSi_IV >= 0? iSi_IV : -1;
1685     if ( num_Me != 2 || i_C_or_Si_IV < 0 )
1686         return 0;
1687 
1688     num_Me = 0;
1689     for ( i = 0; i < at[i_C_or_Si_IV].valence; i ++ ) {
1690         neigh = at[i_C_or_Si_IV].neighbor[i];
1691         if ( neigh == start )
1692             continue;
1693         if ( at[neigh].charge || at[neigh].radical ||
1694              at[neigh].valence != at[neigh].chem_bonds_valence)
1695             return 0;
1696         if ( at[neigh].valence == 1 &&
1697              at[neigh].valence == at[neigh].chem_bonds_valence &&
1698              at[neigh].el_number == el_number_C && at[neigh].num_H == 3 ) {
1699             num_Me ++;
1700         } else {
1701             return 0;
1702         }
1703     }
1704     if ( num_Me == 3 )
1705         return 2; /* Si(CH3)2Si/C(CH3)3 */
1706     return 0;
1707 }
1708 /****************************************************************/
is_Me_or_Et(inp_ATOM * at,int start,int ord_prev)1709 int is_Me_or_Et( inp_ATOM *at, int start, int ord_prev )
1710 {
1711     int i, neigh = -1;
1712     if ( at[start].el_number != el_number_C || at[start].valence > 2 ||
1713          at[start].valence != at[start].chem_bonds_valence ||
1714          at[start].chem_bonds_valence + at[start].num_H != 4 ||
1715          at[start].charge ||  at[start].radical )
1716         return 0;
1717     for ( i = 0; i < at[start].valence; i ++ ) {
1718         if ( i == ord_prev )
1719             continue;
1720         if ( neigh >= 0 )
1721             return 0;
1722 
1723         neigh = at[start].neighbor[i];
1724         if ( at[neigh].el_number != el_number_C || at[neigh].valence > 1 ||
1725              at[neigh].valence != at[neigh].chem_bonds_valence ||
1726              at[neigh].chem_bonds_valence + at[neigh].num_H != 4 ||
1727              at[neigh].charge ||  at[neigh].radical )
1728             return 0;
1729     }
1730     return 1 + (neigh >= 0);
1731 }
1732 #ifdef NEVER
1733 /****************************************************************
1734               CF3
1735  -CF3  or -CF<
1736               CF3
1737 *****************************************************************/
is_CF3_or_isoC3F7(inp_ATOM * at,int start,int ord_prev)1738 int is_CF3_or_isoC3F7( inp_ATOM *at, int start, int ord_prev )
1739 {
1740     int i, k, num_C_IV, iC_IV[2], neigh, num_F, iC;
1741     if ( at[start].el_number != el_number_C || at[start].valence != 4 ||
1742          at[start].valence != at[start].chem_bonds_valence ||
1743          at[start].chem_bonds_valence + at[start].num_H != 4 ||
1744          at[start].charge ||  at[start].radical )
1745         return 0;
1746 
1747     iC_IV[0] = iC_IV[1] = num_F = 0;
1748 
1749     for ( i = num_C_IV = 0; i < at[start].valence; i ++ ) {
1750         if ( i == ord_prev )
1751             continue;
1752 
1753         neigh = at[start].neighbor[i];
1754         if ( at[neigh].valence != at[neigh].chem_bonds_valence ||
1755              at[neigh].charge ||  at[neigh].radical )
1756             return 0;
1757         if ( at[neigh].el_number == el_number_F ) {
1758             if ( at[neigh].chem_bonds_valence + at[neigh].num_H != 1 )
1759                 return 0;
1760             num_F ++;
1761         } else
1762         if ( at[neigh].el_number == el_number_C &&
1763              at[neigh].valence == 4 &&
1764              !at[neigh].num_H && !at[neigh].charge && !at[neigh].radical && num_C_IV < 2 ) {
1765 
1766             if ( num_C_IV > 1 )
1767                 return 0;
1768 
1769             iC_IV[num_C_IV++] = neigh;
1770         }
1771     }
1772     if ( !num_C_IV && 3 == num_F )
1773         return 1; /* -CF3 */
1774     if ( 2 != num_C_IV || 1 != num_F )
1775         return 0;
1776 
1777     /* detect iso-C3F7 */
1778     for ( k = 0; k < num_C_IV; k ++ ) {
1779         iC = iC_IV[k];
1780         num_F = 0;
1781         for ( i = 0; i < at[iC].valence; i ++ ) {
1782             neigh = at[iC].neighbor[i];
1783             if ( neigh == start )
1784                 continue;
1785             if ( at[neigh].valence != at[neigh].chem_bonds_valence ||
1786                  at[neigh].charge ||  at[neigh].radical )
1787                 return 0;
1788             if ( at[neigh].el_number == el_number_F ) {
1789                 if ( at[neigh].chem_bonds_valence + at[neigh].num_H != 1 )
1790                     return 0;
1791                 num_F ++;
1792             } else {
1793                 return 0;
1794             }
1795         }
1796         if ( num_F != 3 )
1797             return 0;
1798     }
1799     return 2; /* iso-C3F7 */
1800 }
1801 #endif
1802 /**************************************************************/
is_CF3_or_linC3F7(inp_ATOM * at,int start,int ord_prev)1803 int is_CF3_or_linC3F7( inp_ATOM *at, int start, int ord_prev )
1804 {
1805     int i, num_C_IV, iC_IV, neigh, num_F, num_C=0;
1806     AT_NUMB *p;
1807 
1808     while( num_C < 4 ) {
1809 
1810         if ( at[start].el_number != el_number_C || at[start].valence != 4 ||
1811              at[start].valence != at[start].chem_bonds_valence ||
1812              at[start].chem_bonds_valence + at[start].num_H != 4 ||
1813              at[start].charge ||  at[start].radical )
1814             return 0;
1815 
1816         iC_IV = num_F = 0;
1817 
1818         for ( i = num_C_IV = 0; i < at[start].valence; i ++ ) {
1819             if ( i == ord_prev )
1820                 continue;
1821 
1822             neigh = at[start].neighbor[i];
1823             if ( at[neigh].valence != at[neigh].chem_bonds_valence ||
1824                  at[neigh].charge ||  at[neigh].radical )
1825                 return 0;
1826             if ( at[neigh].el_number == el_number_F ) {
1827                 if ( at[neigh].chem_bonds_valence + at[neigh].num_H != 1 )
1828                     return 0;
1829                 num_F ++;
1830             } else
1831             if ( at[neigh].el_number == el_number_C &&
1832                  at[neigh].valence == 4 &&
1833                  !at[neigh].num_H && !at[neigh].charge && !at[neigh].radical && num_C_IV < 2 ) {
1834 
1835                 if ( num_C_IV )
1836                     return 0;
1837 
1838                 iC_IV = neigh;
1839                 num_C_IV++;
1840             }
1841         }
1842         if ( num_C_IV + num_F != 3 )
1843             return 0;
1844 
1845         num_C ++; /* found -CF2-C or -CF3 */
1846         if ( !num_C_IV )
1847             break; /* -CF3 */
1848 
1849         /* treat next C(IV) as a new start atom */
1850         if ( p = is_in_the_list( at[iC_IV].neighbor, (AT_NUMB) start, at[iC_IV].valence ) ) {
1851             start = iC_IV;
1852             ord_prev = p - at[iC_IV].neighbor;
1853         } else {
1854             return -1; /* program error */
1855         }
1856     }
1857     return num_C == 1? 1 : num_C == 3? 2 : 0;
1858 }
1859 /****************************************************************/
is_phenyl(inp_ATOM * at,int start,int ord_prev)1860 int is_phenyl( inp_ATOM *at, int start, int ord_prev )
1861 {
1862     int k, neigh, cur_at, ord;
1863     if ( at[start].el_number != el_number_C || at[start].valence != 3 ||
1864          at[start].valence+1 != at[start].chem_bonds_valence ||
1865          at[start].chem_bonds_valence + at[start].num_H != 4 ||
1866          at[start].charge ||  at[start].radical )
1867         return 0;
1868 
1869     ord = (ord_prev + 1) % 3;
1870     cur_at = start;
1871 
1872     for ( k = 0; k < 5; k ++ ) {
1873         neigh = at[cur_at].neighbor[ord];
1874         if ( at[neigh].el_number != el_number_C || at[neigh].valence != 2 ||
1875              at[neigh].valence+1 != at[neigh].chem_bonds_valence ||
1876              at[neigh].chem_bonds_valence + at[neigh].num_H != 4 ||
1877              at[neigh].charge ||  at[neigh].radical )
1878             return 0;
1879         ord = (at[neigh].neighbor[0] == cur_at);
1880         cur_at = neigh;
1881     }
1882     return (at[cur_at].neighbor[ord] == start);
1883 }
1884 /****************************************************************/
is_deriv_ring(inp_ATOM * at,int start,int num_atoms,DERIV_AT * da1,int idrv)1885 int is_deriv_ring( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int idrv )
1886 {
1887     int i, k, neigh_at[2], prev_ord[2], neigh, is_B=0, is_C=0;
1888     AT_NUMB *p;
1889     if ( da1->typ[idrv] != DERIV_RING_O || da1->typ[idrv+1] != DERIV_RING_O )
1890         return 0;
1891     if ( at[start].charge || at[start].radical || at[start].valence != at[start].chem_bonds_valence )
1892         return 0;
1893     if ( at[start].el_number == el_number_B && at[start].valence == 3 )
1894         is_B = 1;
1895     else
1896     if ( at[start].el_number == el_number_C && (at[start].valence == 3 || at[start].valence == 4) &&
1897          at[start].chem_bonds_valence == at[start].valence &&
1898          at[start].num_H + at[start].chem_bonds_valence == 4 )
1899         is_C = 1;
1900     else
1901         return 0;
1902     /* locate bonds connecting >B- or >C< or >C- to the rest of the derivative */
1903     for ( i = k = 0; i < at[start].valence; i ++ ) {
1904         if ( i != da1->ord[idrv] && i != da1->ord[idrv+1] ) {
1905             neigh = at[start].neighbor[i];
1906             if ( k < 2 && (p = is_in_the_list( at[neigh].neighbor, (AT_NUMB) start, at[neigh].valence)) ) {
1907                 neigh_at[k] = neigh;
1908                 prev_ord[k] = p - at[neigh].neighbor;
1909                 k ++;
1910             } else {
1911                 return -1; /* program error */
1912             }
1913         }
1914     }
1915     if ( is_B && k == 1 && is_Me_or_Et( at, neigh_at[0], prev_ord[0]) )
1916         return 1;
1917     if ( is_C && k == 1 && at[start].num_H == 1 && is_phenyl( at, neigh_at[0], prev_ord[0]) )
1918         return 1;
1919     if ( is_C && k == 2 && is_Me_or_Et( at, neigh_at[0], prev_ord[0]) &&
1920                            is_Me_or_Et( at, neigh_at[1], prev_ord[1]) )
1921         return 1;
1922 
1923     return 0;
1924 }
1925 /****************************************************************/
is_deriv_chain(inp_ATOM * at,int start,int num_atoms,DERIV_AT * da1,int idrv)1926 int is_deriv_chain( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int idrv )
1927 {
1928     int i, k, prev_ord, neigh, iC, iO, iNxt;
1929     AT_NUMB *p;
1930     if ( !da1->typ[idrv] || (da1->typ[idrv] & DERIV_RING) )
1931         return 0;
1932     if ( at[start].charge || at[start].radical || at[start].valence != at[start].chem_bonds_valence )
1933         return 0;
1934 
1935     neigh = at[start].neighbor[(int)da1->ord[idrv]];
1936     p = is_in_the_list( at[neigh].neighbor, (AT_NUMB) start, at[neigh].valence);
1937     if ( !p )
1938         return -1; /* program error */
1939     prev_ord = p - at[neigh].neighbor;
1940 
1941     /* eliminate silyl possibility */
1942     if ( is_silyl( at, neigh, prev_ord ) )
1943         return 1;
1944 
1945     if ( da1->typ[idrv] == DERIV_BRIDGE_O ) {
1946         /* check acetyl */
1947         iC = at[start].neighbor[!da1->ord[idrv]];
1948         if ( at[iC].charge || at[iC].radical || at[iC].num_H ||
1949              at[iC].el_number != el_number_C || at[iC].valence != 3 ||
1950              at[iC].valence+1 != at[iC].chem_bonds_valence )
1951             return 0;
1952         for ( i = k = 0; i < at[iC].valence; i ++ ) {
1953             iO = at[iC].neighbor[i];
1954             if ( at[iO].charge || at[iO].radical || at[iO].num_H ||
1955                  at[iO].el_number != el_number_O || at[iO].valence != 1 ||
1956                  at[iO].valence+1 != at[iO].chem_bonds_valence )
1957                 continue;
1958             k ++; /* number of =O */
1959         }
1960         if ( k != 1 )
1961             return 0;
1962         /* check derivative */
1963         return ( is_Me_or_Et( at, neigh, prev_ord ) ||
1964                  is_CF3_or_linC3F7( at, neigh, prev_ord ) );
1965     }
1966 
1967     if ( da1->typ[idrv] == DERIV_BRIDGE_NH || da1->typ[idrv] == DERIV_AMINE_tN ) {
1968         /* check acetyl */
1969         iNxt = -1;
1970         iC = at[start].neighbor[(int)da1->ord[idrv]];
1971         if ( at[iC].charge || at[iC].radical || at[iC].num_H ||
1972              at[iC].el_number != el_number_C || at[iC].valence != 3 ||
1973              at[iC].valence+1 != at[iC].chem_bonds_valence )
1974             return 0;
1975         for ( i = k = 0; i < at[iC].valence; i ++ ) {
1976             iO = at[iC].neighbor[i];
1977             if ( at[iO].charge || at[iO].radical || at[iO].num_H ||
1978                  at[iO].el_number != el_number_O || at[iO].valence != 1 ||
1979                  at[iO].valence+1 != at[iO].chem_bonds_valence ) {
1980                 if ( iO != start ) {
1981                     if ( iNxt < 0 )
1982                         iNxt = iO;
1983                     else
1984                         return 0;
1985                 }
1986                 continue;
1987             }
1988             k ++; /* number of =O */
1989         }
1990         if ( k != 1 || iNxt < 0 )
1991             return 0;
1992         /* find bond from iNxt to iC */
1993         p = is_in_the_list( at[iNxt].neighbor, (AT_NUMB) iC, at[iNxt].valence);
1994         if ( !p )
1995             return -1; /* program error */
1996         prev_ord = p - at[iNxt].neighbor;
1997         /* check derivative */
1998         return ( is_Me_or_Et( at, iNxt, prev_ord ) ||
1999                  is_CF3_or_linC3F7( at, iNxt, prev_ord ) );
2000     }
2001     return 0;
2002 }
2003 /****************************************************************/
is_deriv_chain_or_ring(inp_ATOM * at,int start,int num_atoms,DERIV_AT * da1,int * idrv)2004 int is_deriv_chain_or_ring( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int *idrv )
2005 {
2006     int i, ret = -1;
2007     if ( da1->typ[*idrv] & DERIV_RING ) {
2008         /* find the first ord of this derivative; ord of ring derivatives are in pairs */
2009         int j = -1;
2010         for ( i = 0; i < DERIV_AT_LEN && da1->typ[i]; i ++ ) {
2011             if ( da1->typ[i] & DERIV_RING ) {
2012                 if ( i == *idrv || i+1 == *idrv ) {
2013                     *idrv = j = i;
2014                     break;
2015                 }
2016                 i ++; /* bypass the second bond to the same derivatization agent */
2017             }
2018         }
2019         /* check consistency */
2020         if ( j == -1 || j+1 >= DERIV_AT_LEN ||
2021              !(da1->typ[j] & DERIV_RING) || !(da1->typ[j+1] & DERIV_RING) ) {
2022             ret = -1; /* program error */
2023         } else {
2024             ret = is_deriv_ring( at, start, num_atoms, da1, j );
2025         }
2026     } else
2027     if ( da1->typ[*idrv] ) {
2028         ret = is_deriv_ring( at, start, num_atoms, da1, *idrv );
2029     }
2030     return ret;
2031 }
2032 /******************************************************/
remove_deriv(DERIV_AT * da1,int idrv)2033 int remove_deriv( DERIV_AT *da1, int idrv )
2034 {
2035     int i, j, ret = -1;
2036     if ( da1->typ[idrv] & DERIV_RING ) {
2037         /* find the first ord of this derivative; ord of ring derivatives are in pairs */
2038         j = -1;
2039         for ( i = 0; i < DERIV_AT_LEN && da1->typ[i]; i ++ ) {
2040             if ( da1->typ[i] & DERIV_RING ) {
2041                 if ( i == idrv || i+1 == idrv ) {
2042                     j = i;
2043                     break;
2044                 }
2045                 i ++; /* bypass the second bond to the same derivatization agent */
2046             }
2047         }
2048         /* delete if data are consistent */
2049         if ( j >= 0 && j+1 < DERIV_AT_LEN && (da1->typ[j] & DERIV_RING) && (da1->typ[j+1] & DERIV_RING) ) {
2050             for ( ; j < DERIV_AT_LEN-2 && da1->typ[j+2]; j ++ ) {
2051                 da1->typ[j] = da1->typ[j+2];
2052                 da1->num[j] = da1->num[j+2];
2053                 da1->ord[j] = da1->ord[j+2];
2054             }
2055             for ( ; j < DERIV_AT_LEN; j ++ ) {
2056                 da1->typ[j] = 0;
2057                 da1->num[j] = 0;
2058                 da1->ord[j] = 0;
2059             }
2060             ret = 0;
2061         }
2062     } else {
2063         j = idrv;
2064 
2065         for ( ; j < DERIV_AT_LEN-1 && da1->typ[j+1]; j ++ ) {
2066             da1->typ[j] = da1->typ[j+1];
2067             da1->num[j] = da1->num[j+1];
2068             da1->ord[j] = da1->ord[j+1];
2069         }
2070         for ( ; j < DERIV_AT_LEN; j ++ ) {
2071             da1->typ[j] = 0;
2072             da1->num[j] = 0;
2073             da1->ord[j] = 0;
2074         }
2075         ret = 0;
2076     }
2077     return ret;
2078 }
2079 /******************************************************/
remove_deriv_mark(DERIV_AT * da1,int idrv)2080 int remove_deriv_mark( DERIV_AT *da1, int idrv )
2081 {
2082     int i, j, ret = -1;
2083     if ( da1->typ[idrv] & DERIV_RING ) {
2084         /* find the first ord of this derivative; ord of ring derivatives are in pairs */
2085         j = -1;
2086         for ( i = 0; i < DERIV_AT_LEN && da1->typ[i]; i ++ ) {
2087             if ( da1->typ[i] & DERIV_RING ) {
2088                 if ( i == idrv || i+1 == idrv ) {
2089                     j = i;
2090                     break;
2091                 }
2092                 i ++; /* bypass the second bond to the same derivatization agent */
2093             }
2094         }
2095         /* delete if data are consistent */
2096         if ( j >= 0 && j+1 < DERIV_AT_LEN && (da1->typ[j] & DERIV_RING) && (da1->typ[j+1] & DERIV_RING) ) {
2097             da1->typ[j] |= DERIV_DUPLIC;
2098             da1->typ[j+1] |= DERIV_DUPLIC;
2099             ret = 0;
2100         }
2101     } else {
2102         j = idrv;
2103         da1->typ[j] |= DERIV_DUPLIC;
2104         ret = 0;
2105     }
2106     return ret;
2107 }
2108 /****************************************************************/
EliminateDerivNotInList(inp_ATOM * at,DERIV_AT * da,int num_atoms)2109 int EliminateDerivNotInList( inp_ATOM *at, DERIV_AT *da, int num_atoms )
2110 {
2111     int i, j, num_da, num_cuts=0, ret=0;
2112     for ( i = 0; i < num_atoms; i ++ ) {
2113         if ( !da[i].typ[0] )
2114             continue;
2115         /* count deriative attachments */
2116         for ( num_da = 0; num_da < DERIV_AT_LEN && da[i].typ[num_da]; num_da ++ )
2117             ;
2118         if ( num_da > 2 )
2119             return -1; /* should not happen */
2120         if ( num_da == 2 && da[i].typ[0] != da[i].typ[1] ) {
2121             da[i].typ[0] = da[i].typ[1] = 0; /* do not allow */
2122             continue;
2123         }
2124         if ( da[i].typ[0] & DERIV_RING ) {
2125             ret = 0;
2126             if ( num_da == 2 && 1 + da[i].num[0] + da[i].num[1] <= MAX_AT_DERIV &&
2127                  0 < (ret=is_deriv_ring( at, i, num_atoms, da+i, 0 ) ) ) {
2128                 num_cuts += 2;
2129             } else
2130             if ( ret < 0 ) {
2131                 return ret;
2132             } else {
2133                 da[i].typ[0] = da[i].typ[1] = 0; /* not a derivative */
2134             }
2135         } else {
2136             ret = 0;
2137             if ( da[i].num[0] <= MAX_AT_DERIV && 0 < (ret = is_deriv_chain( at, i, num_atoms, da+i, 0 )) ) {
2138                 num_cuts ++;
2139                 j = 1;
2140             } else
2141             if ( ret < 0 ) {
2142                 return ret;
2143             } else {
2144                 da[i].ord[0] = da[i].ord[1];
2145                 da[i].num[0] = da[i].num[1];
2146                 da[i].typ[0] = da[i].typ[1];
2147                 da[i].typ[1] = 0;
2148                 j = 0;
2149             }
2150             if ( da[i].typ[j] && da[i].num[j] <= MAX_AT_DERIV &&
2151                  0 < (ret = is_deriv_chain( at, i, num_atoms, da+i, j )) ) {
2152                 num_cuts ++;
2153             } else
2154             if ( ret < 0 ) {
2155                 return ret;
2156             } else {
2157                 da[i].typ[j] = 0;
2158             }
2159         }
2160     }
2161     return num_cuts;
2162 }
2163 /***************************************************************/
make_single_cut(inp_ATOM * at,DERIV_AT * da,int iat,int icut)2164 int make_single_cut( inp_ATOM *at, DERIV_AT *da, int iat, int icut )
2165 {
2166     int ret = -1; /* error flag */
2167     int iord = (int)da[iat].ord[icut]; /* ord of the bond in iat */
2168     if ( da[iat].typ[icut] & DERIV_DUPLIC ) {
2169         return 0;
2170     } else
2171     if ( iord < 0 ) {
2172         return -1; /* program error */
2173     } else {
2174         /* find other da[] that affect at[iat] */
2175         int jat  = at[iat].neighbor[iord];  /* opposite atom */
2176         AT_NUMB *p = is_in_the_list( at[jat].neighbor, (AT_NUMB) iat, at[jat].valence );
2177         int jord = p? (p - at[jat].neighbor) : -1;
2178         int i, iD=1, iT=2;
2179         if ( jord < 0 ) {
2180             return -1;  /* program error */
2181         }
2182         ret = DisconnectInpAtBond( at, NULL, iat, iord );
2183         if ( ret == 1 ) {
2184             if ( da[iat].typ[icut] & DERIV_RING ) {
2185                 /* at[jat] belongs to the main structure */
2186                 at[jat].num_H ++;        /* add D to the main structure */
2187                 at[jat].num_iso_H[iD] ++;
2188                 at[iat].num_H ++;        /* add T to the derivatizing fragment */
2189                 at[iat].num_iso_H[iT] ++;
2190             } else {
2191                 at[iat].num_H ++;        /* add D to the main structure */
2192                 at[iat].num_iso_H[iD] ++;
2193                 at[jat].num_H ++;        /* add T to the derivatizing fragment */
2194                 at[jat].num_iso_H[iT] ++;
2195             }
2196             /* adjust ord for other bonds */
2197             for ( i = 0; i < DERIV_AT_LEN && da[iat].typ[i]; i ++ ) {
2198                 if ( da[iat].ord[i] == iord ) {
2199                     da[iat].ord[i] = -(1+da[iat].ord[i]); /* mark the bond being disconnected */
2200                 } else
2201                 if ( da[iat].ord[i] > iord ) {
2202                     da[iat].ord[i] --;
2203                 }
2204             }
2205             for ( i = 0; i < DERIV_AT_LEN && da[jat].typ[i]; i ++ ) {
2206                 if ( da[jat].ord[i] == jord ) {
2207                     /* opposite atom needs the same bond to be disconnected */
2208                     if ( da[iat].num[icut] == da[jat].num[i] ) {
2209                         iD=2; /* mark both as fragments */
2210                     } else
2211                     if ( da[iat].num[icut] > da[jat].num[i] ) {
2212                         iD = 2; /* greater as a main structure */
2213                         iT = 1; /* mark smaller as a derivatizing fragment */
2214                     }
2215                     da[jat].ord[i]  = -(1+da[jat].ord[i]);
2216                     da[jat].typ[i] |= DERIV_DUPLIC;
2217                 } else
2218                 if ( da[jat].ord[i] > jord ) {
2219                     da[jat].ord[i] --;
2220                 }
2221             }
2222         }
2223     }
2224     return ret;
2225 }
2226 /***************************************************************/
underivatize(ORIG_ATOM_DATA * orig_inp_data)2227 int underivatize( ORIG_ATOM_DATA *orig_inp_data )
2228 {
2229 #define REMOVE_CUT_DERIV 1  /* remove disconnected derivatizing agents */
2230     int ret = 0, i, j, k, m, n, num_atoms, num_components, i_component, nFound, num, cur_num_at, len;
2231     int num_cuts, num_ring_cuts, num_cut_pieces, num_cuts_to_check;
2232     inp_ATOM *at = orig_inp_data->at;
2233     INP_ATOM_DATA *inp_cur_data = NULL;
2234     DERIV_AT      *da           = NULL;
2235     int  nTotNumCuts = 0;
2236 
2237     set_R2C_el_numbers( );
2238     /* prepare */
2239     num_atoms = remove_terminal_HDT( orig_inp_data->num_inp_atoms, at, 1 );
2240                 /*^^^^^ always accomodate accomodate FIX_TERM_H_CHRG_BUG - IPl, July 2008*/
2241     orig_inp_data->num_inp_atoms = num_atoms;
2242 
2243     /* initialize */
2244     UnMarkDisconnectedComponents( orig_inp_data );
2245     num_cuts = 0;
2246     /* mark */
2247     num_components = MarkDisconnectedComponents( orig_inp_data, 0 );
2248     inp_cur_data = (INP_ATOM_DATA *)inchi_calloc( num_components, sizeof(inp_cur_data[0]) );
2249     for ( i_component = 0; i_component < num_components; i_component ++ ) {
2250         CreateInpAtomData( inp_cur_data+i_component, orig_inp_data->nCurAtLen[i_component], 0 );
2251         inp_cur_data[i_component].num_at = ExtractConnectedComponent( orig_inp_data->at, orig_inp_data->num_inp_atoms, i_component+1, inp_cur_data[i_component].at );
2252         /*  error processing */
2253         if ( inp_cur_data[i_component].num_at <= 0 || orig_inp_data->nCurAtLen[i_component] != inp_cur_data[i_component].num_at ) {
2254             ret = -(i_component+1); /* severe error */
2255             goto exit_function;
2256         }
2257         /* initialize */
2258         num_atoms = inp_cur_data[i_component].num_at;
2259         at        = inp_cur_data[i_component].at;
2260         add_DT_to_num_H( num_atoms, at );
2261 
2262         UnMarkRingSystemsInp( at, num_atoms );
2263         UnMarkOtherIndicators( at, num_atoms );
2264         UnMarkOneComponent( at, num_atoms );
2265         MarkRingSystemsInp( at, num_atoms, 0 );
2266         ret = mark_arom_bonds( at, num_atoms );
2267         if ( ret < 0 ) {
2268             goto exit_function;
2269         }
2270         ret = 0;
2271         if ( da ) inchi_free( da );
2272         da = (DERIV_AT *)inchi_calloc( num_atoms, sizeof(da[0]) );
2273 
2274         /* detect derivatives */
2275         nFound = 0;
2276         for ( i = 0; i < num_atoms; i ++ ) {
2277             if ( at[i].bCutVertex && !da[i].typ[0] ) {
2278                 for ( k = 0; k < at[i].valence; k ++ ) {
2279                     num = count_one_bond_atoms( at, da, i, k, CFLAG_MARK_BRANCH, &nFound );
2280                     UnMarkOtherIndicators( at, num_atoms );
2281                     if ( num < 0 ) {
2282                         ret = num; /* severe error */
2283                         goto exit_function;
2284                     }
2285                 }
2286             }
2287         }
2288         /* prepare cuts: remove cuts that are not to be done */
2289         /* in addition, count ring cuts DERIV_RING */
2290         num_ring_cuts  = 0;
2291         num_cuts       = 0;
2292         num_cut_pieces = 0;
2293         for ( i = num = 0; i < num_atoms; i ++ ) {
2294             for ( len = 0; len < MAX_AT_DERIV && da[i].typ[len]; len ++ )
2295                 ;
2296             switch( len ) {
2297             case 0:
2298                 continue;
2299             case 1:
2300                 /* single cut: unconditional */
2301                 num_cuts       += 1;
2302                 num_cut_pieces += 1;
2303                 continue;
2304             case 2:
2305                 if ( (da[i].typ[0] & DERIV_RING) && (da[i].typ[1] & DERIV_RING) ) {
2306                     /* double cut, unconditional */
2307                     num_ring_cuts  += 2;
2308                     num_cuts       += 2;
2309                     num_cut_pieces += 1;
2310                     continue;
2311                 } else
2312                 if ( da[i].typ[0] == DERIV_AMINE_tN && da[i].typ[1] == DERIV_AMINE_tN ) {
2313                     /* double cut, unconditional */
2314                     num_cuts       += 2;
2315                     num_cut_pieces += 2;
2316                     continue;
2317                 }
2318                 if ( da[i].typ[0] == da[i].typ[1] ) {
2319                     /* DERIV_BRIDGE_O or DERIV_BRIDGE_NH; cut off the smallest */
2320                     if ( 0 == is_deriv_chain( at, i, num_atoms, da+i, 0 ) ) {
2321                         da[i].num[0] = NOT_AT_DERIV;
2322                     }
2323                     if ( 0 == is_deriv_chain( at, i, num_atoms, da+i, 1 ) ) {
2324                         da[i].num[1] = NOT_AT_DERIV;
2325                     }
2326                     if ( da[i].num[0] > da[i].num[1] ) {
2327                         da[i].num[0] = da[i].num[1];
2328                         da[i].ord[0] = da[i].ord[1];
2329                         da[i].typ[0] = da[i].typ[1];
2330                         da[i].typ[1] = 0;
2331                         num_cuts       += 1;
2332                         num_cut_pieces += 1;
2333                     } else
2334                     if ( da[i].num[0] < da[i].num[1] ) {
2335                         da[i].typ[1] = 0;
2336                         num_cuts       += 1;
2337                         num_cut_pieces += 1;
2338                     } else {
2339                         /* attachments have same size: ignore both */
2340                         /* ??? check for standard derivatizations ??? */
2341                         da[i].typ[0] = 0;
2342                         da[i].typ[1] = 0;
2343                     }
2344                     continue;
2345                 }
2346                 ret = -88;
2347                 goto exit_function; /* unexpected */
2348             case 3:
2349                 if ( da[i].typ[0] == da[i].typ[1] &&
2350                      da[i].typ[0] == da[i].typ[2] &&
2351                      da[i].typ[0] == DERIV_AMINE_tN ) {
2352                     int x, y, z;
2353                     if ( 0 == is_deriv_chain( at, i, num_atoms, da+i, 0 ) ) {
2354                         da[i].num[0] = NOT_AT_DERIV;
2355                     }
2356                     if ( 0 == is_deriv_chain( at, i, num_atoms, da+i, 1 ) ) {
2357                         da[i].num[1] = NOT_AT_DERIV;
2358                     }
2359                     if ( 0 == is_deriv_chain( at, i, num_atoms, da+i, 2 ) ) {
2360                         da[i].num[2] = NOT_AT_DERIV;
2361                     }
2362 
2363                     x = (da[i].num[0] < da[i].num[1])? 0 : 1;
2364                     x = (da[i].num[x] < da[i].num[2])? x : 2; /* min */
2365                     z = (da[i].num[0] < da[i].num[1])? 1 : 0;
2366                     z = (da[i].num[x] < da[i].num[2])? 2 : z; /* max */
2367                     y = ((x+1)^(z+1))-1;                      /* median */
2368 
2369 
2370                     if (da[i].num[x] == da[i].num[z] ) {
2371                         /* no cuts */
2372                         da[i].typ[0] = 0;
2373                         da[i].typ[1] = 0;
2374                         da[i].typ[2] = 0;
2375                         continue; /* all deriv. agents have same size, ignore */
2376                                   /* ??? check for standard derivatizations ??? */
2377                     } else
2378                     if ( da[i].num[x] == da[i].num[y] ) {
2379                         /* two smallest */
2380                         switch( z ) {
2381                         case 0:
2382                             da[i].num[0] = da[i].num[1];
2383                             da[i].ord[0] = da[i].ord[1];
2384                             da[i].typ[0] = da[i].typ[1];
2385 
2386                             da[i].num[1] = da[i].num[2];
2387                             da[i].ord[1] = da[i].ord[2];
2388                             da[i].typ[1] = da[i].typ[2];
2389                             break;
2390                         case 1:
2391                             da[i].num[1] = da[i].num[2];
2392                             da[i].ord[1] = da[i].ord[2];
2393                             da[i].typ[1] = da[i].typ[2];
2394                             break;
2395                         case 2:
2396                             break;
2397                         }
2398                         da[i].typ[2] = 0;
2399                         num_cuts       += 2;
2400                         num_cut_pieces += 2;
2401                     } else {
2402                         /* one smallest */
2403                         if ( x ) {
2404                             da[i].num[0] = da[i].num[x];
2405                             da[i].ord[0] = da[i].ord[x];
2406                             da[i].typ[0] = da[i].typ[x];
2407                         }
2408                         da[i].typ[1] = 0;
2409                         da[i].typ[2] = 0;
2410                         num_cuts       += 1;
2411                         num_cut_pieces += 1;
2412                     }
2413                     continue;
2414                 }
2415                 ret = -88;
2416                 goto exit_function; /* unexpected */
2417             case 4:
2418                 if ( (da[i].typ[0] & DERIV_RING) && (da[i].typ[1] & DERIV_RING) &&
2419                      (da[i].typ[2] & DERIV_RING) && (da[i].typ[3] & DERIV_RING) ) {
2420                     int n01 = inchi_max( da[i].num[0], da[i].num[1] );
2421                     int n23 = inchi_max( da[i].num[2], da[i].num[3] );
2422                     if ( n01 < n23 && 0 < is_deriv_ring( at, i, num_atoms, da+i, 0) ) {
2423                         da[i].typ[2] = 0;
2424                         da[i].typ[3] = 0;
2425                         num_cuts       += 2;
2426                         num_ring_cuts  += 2;
2427                         num_cut_pieces += 1;
2428                     } else
2429                     if ( n01 > n23 && 0 < is_deriv_ring( at, i, num_atoms, da+i, 2) ) {
2430                         da[i].num[0] = da[i].num[2];
2431                         da[i].ord[0] = da[i].ord[2];
2432                         da[i].typ[0] = da[i].typ[2];
2433 
2434                         da[i].num[1] = da[i].num[3];
2435                         da[i].ord[1] = da[i].ord[3];
2436                         da[i].typ[1] = da[i].typ[3];
2437 
2438                         da[i].typ[2] = 0;
2439                         da[i].typ[3] = 0;
2440                         num_cuts       += 2;
2441                         num_ring_cuts  += 2;
2442                         num_cut_pieces += 1;
2443                     } else {
2444                         da[i].typ[0] = 0;
2445                         da[i].typ[1] = 0;
2446                         da[i].typ[2] = 0;
2447                         da[i].typ[3] = 0;
2448                     }
2449                     continue;
2450                 }
2451                 ret = -88;
2452                 goto exit_function; /* unexpected */
2453             }
2454         }
2455 
2456         /* eliminate cases when
2457              da[i1].typ[j1] && da[i2].typ[j2] &&
2458              at[i1].neighbor[da[i1].ord[j1]] == i2 && at[i2].neighbor[da[i2].ord[j2]] == i1
2459            that is, same bond is in the da[] elements of the adjacent atoms */
2460         nFound = 0; /* number of cuts to remove */
2461         for ( i = 0; i < num_atoms; i ++ ) {
2462             for ( j = 0; j < MAX_AT_DERIV && da[i].typ[j]; j ++ ) {
2463                 if ( da[i].typ[j] & DERIV_DUPLIC )
2464                     continue;
2465                 n = at[i].neighbor[(int)da[i].ord[j]];
2466                 if ( n < i )
2467                     continue;
2468                 for ( k = 0; k < MAX_AT_DERIV && da[n].typ[k]; k ++ ) {
2469                     if ( da[n].typ[k] & DERIV_DUPLIC )
2470                         continue;
2471                     m = at[n].neighbor[(int)da[n].ord[k]];
2472                     if ( m == i ) {
2473                         /* same bond in da[i].typ[j] and da[n].typ[k] */
2474                         /* check whether both derivatives are acceptable */
2475                         int k1=k, j1=j;
2476                         int ret_i = is_deriv_chain_or_ring( at, i, num_atoms, da+i, &j1 );
2477                         int ret_n = is_deriv_chain_or_ring( at, n, num_atoms, da+n, &k1 );
2478                         if ( ret_i < 0 ) {
2479                             ret = ret_i;
2480                             goto exit_function;
2481                         }
2482                         if ( ret_n < 0 ) {
2483                             ret = ret_n;
2484                             goto exit_function;
2485                         }
2486                         if ( !ret_i || ret_i && ret_n ) {
2487                             if ( da[i].typ[j1] & DERIV_RING ) {
2488                                 num_cuts       -= 2;
2489                                 num_ring_cuts  -= 2;
2490                             } else {
2491                                 num_cuts       -= 1;
2492                             }
2493                             num_cut_pieces -= 1;
2494                             if (ret = remove_deriv_mark( da+i, j1 )) {
2495                                 goto exit_function;
2496                             }
2497                             nFound ++;
2498                         }
2499                         if ( !ret_n || ret_i && ret_n ) {
2500                             if ( da[n].typ[k1] & DERIV_RING ) {
2501                                 num_cuts       -= 2;
2502                                 num_ring_cuts  -= 2;
2503                             } else {
2504                                 num_cuts       -= 1;
2505                             }
2506                             num_cut_pieces -= 1;
2507                             if ( ret = remove_deriv_mark( da+n, k1 ) ) {
2508                                 goto exit_function;
2509                             }
2510                             nFound ++;
2511                         }
2512                     }
2513                 }
2514             }
2515         }
2516         if ( nFound ) {
2517             for ( i = 0; i < num_atoms; i ++ ) {
2518                 for ( j = 0; j < MAX_AT_DERIV && da[i].typ[j]; j ++ ) {
2519                    /* attn: j is changed inside the cycle body */
2520                    if ( da[i].typ[j] & DERIV_DUPLIC ) {
2521                         if (ret = remove_deriv( da+i, j )) {
2522                             goto exit_function;
2523                         }
2524                         j --;
2525                     }
2526                 }
2527             }
2528         }
2529 
2530         /* make sure DERIV_RING type disconnections increase */
2531         /* number of components by the number of disconnected derivateves */
2532         /* Avoid cases like these:
2533 
2534                  O--R--O             DO--R--OD
2535                 /       \
2536             R--X         Y--R => R--XT2     T2Y--R
2537                 \       /
2538                  O--R--O             DO--R--OD
2539 
2540 
2541 
2542                  O--O                 DO--OD
2543                 /    \
2544             R--X--O---Y--R    =>  R--X  OD2 Y--R
2545                                      T2     T2
2546 
2547         */
2548         /* count DERIV_RING-type attachments */
2549 #if ( COUNT_ALL_NOT_DERIV == 1 )
2550         num_cuts_to_check = num_cuts;
2551 #else
2552         num_cuts_to_check = num_ring_cuts;
2553 #endif
2554         if ( num_cuts_to_check >= 2 )
2555         {
2556             /* check */
2557             R2C_ATPAIR *ap = (R2C_ATPAIR *) inchi_malloc( num_cuts_to_check * sizeof(ap[0]) );
2558             AT_NUMB    comp_num;
2559             int        /*n,*/ m_at, m_ord;
2560             AT_NUMB at1, at2;
2561             if ( !ap ) {
2562                 ret = -1;
2563                 goto exit_function; /* malloc failure */
2564             }
2565 repeat_without_deriv_ring:
2566             comp_num = 0;
2567             /* fill out the array of bonds to be cut */
2568             for ( i = j = 0; i < num_atoms; i ++ ) {
2569                 if ( (da[i].typ[0] & DERIV_RING)   && (da[i].typ[1] & DERIV_RING) &&
2570                       da[i].num[0] <= MAX_AT_DERIV && da[i].num[1] <= MAX_AT_DERIV ) {
2571                     if ( j+1 >= num_cuts_to_check ) {
2572                         ret = -2;
2573                         goto exit_r2c_num; /* wrong number of cuts = num */
2574                     }
2575                     for ( k = 0; k < 2; k ++ ) {
2576                         at1 = i;
2577                         at2 = at[i].neighbor[(int)da[i].ord[k]];
2578                         n = ( at1 > at2 );
2579                         ap[j].at[n] = at1;
2580                         ap[j].at[1-n] = at2; /* ap[j].at[0] < ap[j].at[1] */
2581                         j ++;
2582                     }
2583                     if ( 0 < cmp_r2c_atpair( ap+j-2, ap+j-1 ) ) {
2584                         R2C_ATPAIR ap1 = ap[j-2];
2585                         ap[j-2] = ap[j-1];
2586                         ap[j-1] = ap1; /* sort each pair */
2587                     }
2588                 }
2589 #if ( COUNT_ALL_NOT_DERIV == 1 )
2590                 else {
2591                     for ( k = 0; k < DERIV_AT_LEN && da[i].typ[k]; k ++ ) {
2592                         if ( j >= num_cuts_to_check || (da[i].typ[k] & DERIV_RING) ) {
2593                             ret = -2;
2594                             goto exit_r2c_num; /* wrong number of cuts = num or wrong type */
2595                         }
2596                         at1 = i;
2597                         at2 = at[i].neighbor[(int)da[i].ord[k]];
2598                         n = ( at1 > at2 );
2599                         ap[j].at[n] = at1;
2600                         ap[j].at[1-n] = at2; /* ap[j].at[0] < ap[j].at[1] */
2601                         j ++;
2602                     }
2603                 }
2604 #endif
2605 
2606             }
2607             if ( j != num_cuts_to_check ) {
2608                 ret = -3;
2609                 goto exit_r2c_num; /* wrong number of cuts = num */
2610             }
2611             /* !!!!!!!! check that there are no derivatives inside a derivative */
2612             comp_num = 0; /* here it is the number of removed cuts */
2613             for ( i = 0; i < num_cuts_to_check; i += j ) {
2614                 for ( j = n = 0; j < 2; j ++ ) {
2615                     int atj = (int)ap[i].at[j];
2616                     if ( da[atj].typ[0] && at[atj].neighbor[(int)da[atj].ord[0]] == ap[i].at[1-j] ) {
2617                         k = j;
2618                         n ++;
2619                         m_at = atj;
2620                         m_ord = 0;
2621                     } else
2622                     if ( da[atj].typ[1] && at[atj].neighbor[(int)da[atj].ord[1]] == ap[i].at[1-j] ) {
2623                         k = j;
2624                         n ++;
2625                         m_at = atj;
2626                         m_ord = 1;
2627                     }
2628                 }
2629                 if ( n != 1 ) {
2630                     ret = -3;
2631                     goto exit_r2c_num; /* wrong atom pair */
2632                 }
2633                 if ( (da[m_at].typ[m_ord] & DERIV_RING) ) {
2634                     n = (int)ap[i].at[k];   /* atom inside the derivation attachment */
2635                     j = 2;             /* number of bonds to cut */
2636                     if ( i+j > num_cuts_to_check || (int)ap[i+1].at[0] != n && (int)ap[i+1].at[1] != n ) {
2637                         ret = -3;
2638                         goto exit_r2c_num; /* wrong atom pair */
2639                     }
2640                 } else {
2641                     n = ap[i].at[1-k]; /* atom inside the derivation attachment */
2642                     j = 1;             /* number of bonds to cut */
2643                 }
2644 
2645                 /* at[n] belongs to the derivation agent */
2646                 cur_num_at = mark_atoms_ap( at, n, ap+i, j, 0, 1 );
2647                 for ( k = 0; k < num_cuts_to_check; k ++ ) {
2648                     if ( k == i || k==i+j-1 )
2649                         continue;
2650                     if ( at[(int)ap[k].at[0]].at_type || at[(int)ap[k].at[1]].at_type ) {
2651                         /* unmark the cut: found a cut inside the derivatizing agent */
2652                         da[m_at].typ[m_ord] |= DERIV_UNMARK;
2653                         num_cuts       -= 1;
2654                         num_cut_pieces -= 1;
2655                         if ( j == 2 ) {
2656                             da[m_at].typ[1-m_ord] |= DERIV_UNMARK;
2657                             num_cuts       -= 1;
2658                             num_ring_cuts  -= 2;
2659                         }
2660                         comp_num ++;
2661                         break;
2662                     }
2663                 }
2664                 UnMarkOtherIndicators( at, num_atoms );
2665             }
2666             if ( comp_num ) {
2667                 for ( i = 0; i < num_atoms; i ++ ) {
2668                     if ( da[i].typ[0] & DERIV_UNMARK ) {
2669                         da[i].num[0] = da[i].num[1];
2670                         da[i].ord[0] = da[i].ord[1];
2671                         da[i].typ[0] = da[i].typ[1];
2672                         da[i].typ[1] = 0;
2673                         j = 0;
2674                     } else {
2675                         j = 1;
2676                     }
2677                     if ( da[i].typ[j] & DERIV_UNMARK ) {
2678                         da[i].typ[j] = 0;
2679                     }
2680                 }
2681 #if ( COUNT_ALL_NOT_DERIV == 1 )
2682                 num_cuts_to_check = num_cuts;
2683 #else
2684                 num_cuts_to_check = num_ring_cuts;
2685 #endif
2686                 if ( num_cuts < 0 || num_ring_cuts < 0 || num_cut_pieces < 0 ) {
2687                     ret = -3;
2688                     goto exit_r2c_num; /* wrong number of cuts = num */
2689                 }
2690                 goto repeat_without_deriv_ring;
2691             }
2692 
2693             /* sort the bonds for subsequent searching by bisections */
2694             if ( num_cuts_to_check > 1 ) {
2695                 qsort( ap, num_cuts_to_check, sizeof(ap[0]), cmp_r2c_atpair);
2696             }
2697             /* mark components to be disconnected */
2698             comp_num = 0;   /* number of components */
2699             cur_num_at = 0; /* number of atoms left after disconnecting the derivatizing agent */
2700             UnMarkOtherIndicators( at, num_atoms );
2701             for ( i = 0; i < num_cuts_to_check; i ++ ) {
2702                 n =  0;
2703                 for ( j = 0; j < 2; j ++ ) {
2704                     if ( da[(int)ap[i].at[j]].typ[0] ) {
2705                         k = j;
2706                         n ++;
2707                     }
2708                 }
2709                 if ( n != 1 ) {
2710                     ret = -3;
2711                     goto exit_r2c_num; /* wrong atom pair */
2712                 }
2713                 n = ap[i].at[k]; /* marked atom */
2714                 if ( (da[n].typ[0] & DERIV_RING) ) {
2715                     n = ap[i].at[1-k];
2716                 }
2717                 /* at[n] belongs to the derivation agent */
2718                 if ( !at[n].at_type ) {
2719                     comp_num ++;
2720                     cur_num_at = mark_atoms_ap( at, n, ap, num_cuts_to_check, cur_num_at, comp_num );
2721                 }
2722             }
2723             if ( comp_num > 1 ) {
2724                 /* eliminate offending DERIV_RING type derivatives */
2725                 if ( num_ring_cuts <= 2 ) {
2726                     ret = -99;
2727                     goto exit_r2c_num;
2728                 }
2729                 n = 0;
2730                 for ( i = j = 0; i < num_atoms; i ++ ) {
2731                     if ( (da[i].typ[0] & DERIV_RING) && (da[i].typ[1] & DERIV_RING) ) {
2732                         int at1a = at[i].neighbor[(int)da[i].ord[0]];
2733                         int at2a = at[i].neighbor[(int)da[i].ord[1]];
2734                         if ( at[at1a].at_type != at[at2a].at_type ) {
2735                             da[i].typ[0] = 0; /* eliminate this cut */
2736                             da[i].typ[1] = 0;
2737                             n ++;
2738                             num_cuts_to_check -= 2;
2739                             num_cuts          -= 2;
2740                             num_ring_cuts     -= 2;
2741                             num_cut_pieces    -= 1;
2742                         }
2743                     }
2744                 }
2745                 if ( n > 0 && num_cuts_to_check > 2 ) {
2746                     goto repeat_without_deriv_ring;
2747                 }
2748             }
2749             ret = 0;
2750 exit_r2c_num:
2751             inchi_free( ap );
2752             UnMarkOtherIndicators( at, num_atoms );
2753             if ( ret < 0 || num_cuts_to_check >= 2 && cur_num_at < MIN_AT_LEFT_DERIV ) {
2754                 goto exit_function; /* unexpected  error or nothing left */
2755             }
2756         }
2757 
2758         if ( !num_cuts ) {
2759             continue; /*goto exit_function;*/
2760         }
2761         /* eliminate derivatives that are not in the list */
2762         num_cuts = EliminateDerivNotInList( at, da, num_atoms );
2763         if ( num_cuts < 0 ) {
2764             ret = num_cuts;
2765             goto exit_function;
2766         }
2767 
2768 
2769         /* make cuts */
2770         num_cuts      = 0;
2771         for ( i = num = 0; i < num_atoms; i ++ ) {
2772             for ( len = 0; len < MAX_AT_DERIV && da[i].typ[len]; len ++ )
2773                 ;
2774             switch( len ) {
2775             case 0:
2776                 continue;
2777             case 1:
2778                 /* single cut: unconditional */
2779                 make_single_cut( at, da, i, 0 );
2780                 num_cuts += 1;
2781                 continue;
2782             case 2:
2783                 if ( (da[i].typ[0] & DERIV_RING)    && (da[i].typ[1] & DERIV_RING) ||
2784                      da[i].typ[0] == DERIV_AMINE_tN && da[i].typ[1] == DERIV_AMINE_tN ) {
2785                     /* double cut, unconditional */
2786                     make_single_cut( at, da, i, 1 );
2787                     make_single_cut( at, da, i, 0 );
2788                     num_cuts += 1;
2789                     continue;
2790                 }
2791                 if ( da[i].typ[0] == da[i].typ[1] ) {
2792                     /* DERIV_BRIDGE_O or DERIV_BRIDGE_NH; cut off the smallest */
2793                     if ( da[i].num[0] > da[i].num[1] ) {
2794                         make_single_cut( at, da, i, 1 );
2795                         num_cuts += 1;
2796                     } else
2797                     if ( da[i].num[0] < da[i].num[1] ) {
2798                         make_single_cut( at, da, i, 0 );
2799                         num_cuts += 1;
2800                     }
2801                     continue;
2802                 }
2803                 ret = -88;
2804                 goto exit_function; /* unexpected */
2805             case 3:
2806                 if ( da[i].typ[0] == da[i].typ[1] &&
2807                      da[i].typ[0] == da[i].typ[2] &&
2808                      da[i].typ[0] == DERIV_AMINE_tN ) {
2809                     int x, y, z;
2810                     x = (da[i].num[0] < da[i].num[1])? 0 : 1;
2811                     x = (da[i].num[x] < da[i].num[2])? x : 2; /* min */
2812                     z = (da[i].num[0] < da[i].num[1])? 1 : 0;
2813                     z = (da[i].num[x] < da[i].num[2])? 2 : z; /* max */
2814                     y = ((x+1)^(z+1))-1;                      /* median */
2815                     if (da[i].num[x] == da[i].num[z] )
2816                         continue; /* all deriv. agents have same size */
2817                     /* two smallest */
2818                     if ( da[i].num[x] == da[i].num[y] && x < y ) {
2819                         int t = x; /* first cut x > y */
2820                         x = y;
2821                         y = t;
2822                     }
2823                     make_single_cut( at, da, i, x );
2824                     num_cuts += 1;
2825                     if ( da[i].num[x] == da[i].num[y] ) {
2826                         /* equally small */
2827                         make_single_cut( at, da, i, y );
2828                         num_cuts += 1;
2829                     }
2830                     continue;
2831                 }
2832                 ret = -88;
2833                 goto exit_function; /* unexpected */
2834             case 4:
2835                 if ( (da[i].typ[0] & DERIV_RING) && (da[i].typ[1] & DERIV_RING) &&
2836                      (da[i].typ[2] & DERIV_RING) && (da[i].typ[3] & DERIV_RING) ) {
2837                     int n01 = inchi_max( da[i].num[0], da[i].num[1] );
2838                     int n23 = inchi_max( da[i].num[2], da[i].num[3] );
2839                     if ( n01 < n23 ) {
2840                         make_single_cut( at, da, i, 1 );
2841                         make_single_cut( at, da, i, 0 );
2842                         num_cuts += 1;
2843                     } else
2844                     if ( n01 > n23 ) {
2845                         make_single_cut( at, da, i, 3 );
2846                         make_single_cut( at, da, i, 2 );
2847                         num_cuts += 1;
2848                     }
2849                     continue;
2850                 }
2851             }
2852         }
2853         nTotNumCuts += num_cuts;
2854 #if ( REMOVE_CUT_DERIV == 1 )  /* normally YES */
2855         if ( num_cuts ) {
2856             /* remove marked with Tritium disconnected derivative attachments */
2857             ORIG_ATOM_DATA Orig_inp_data1, *orig_inp_data1;
2858             INP_ATOM_DATA *inp_cur_data1 = NULL;
2859             int num_components1, i_component1, num_component_left=0;
2860             orig_inp_data1 = &Orig_inp_data1;
2861             memset( orig_inp_data1, 0, sizeof(orig_inp_data1[0]) );
2862             UnMarkRingSystemsInp( at, num_atoms );
2863             UnMarkOtherIndicators( at, num_atoms );
2864             UnMarkOneComponent( at, num_atoms );
2865             for (i = 0; i < num_atoms; i ++ ) {
2866                 orig_inp_data1->num_inp_bonds += at[i].valence;
2867             }
2868             orig_inp_data1->num_inp_bonds /= 2;
2869             orig_inp_data1->num_inp_atoms  = num_atoms;
2870             orig_inp_data1->at = at; /* = from inp_cur_data[i_component].at */
2871             num_components1 = MarkDisconnectedComponents( orig_inp_data1, 0 );
2872             inp_cur_data1 = (INP_ATOM_DATA *)inchi_calloc( num_components1, sizeof(inp_cur_data1[0]) );
2873             /* extract components and discard disconnected derivatizing agents */
2874             for ( i_component1 = 0; i_component1 < num_components1; i_component1 ++ ) {
2875                 CreateInpAtomData( inp_cur_data1+i_component1, orig_inp_data1->nCurAtLen[i_component1], 0 );
2876                 inp_cur_data1[i_component1].num_at = ExtractConnectedComponent( orig_inp_data1->at, orig_inp_data1->num_inp_atoms,
2877                                                                                 i_component1+1, inp_cur_data1[i_component1].at );
2878                 /*  error processing */
2879                 if ( inp_cur_data1[i_component1].num_at <= 0 || orig_inp_data1->nCurAtLen[i_component1] != inp_cur_data1[i_component1].num_at ) {
2880                     ret = -(i_component1+1); /* severe error */
2881                     break;
2882                 }
2883                 /* if the component has tritium then discard: it is a derivatizing agent */
2884                 for (i = 0; i < inp_cur_data1[i_component1].num_at; i ++ ) {
2885                     if ( inp_cur_data1[i_component1].at[i].num_iso_H[1] ) {
2886                         inp_cur_data1[i_component1].at[i].num_iso_H[1] = 0; /* remove deuterium */
2887                     }
2888                     if ( inp_cur_data1[i_component1].at[i].num_iso_H[2] ) {
2889                         FreeInpAtomData( inp_cur_data1+i_component1 );
2890                         break;
2891                     }
2892                 }
2893             }
2894             /* merge components into one -- must be only one */
2895             for ( i_component1 = 0, num_atoms = 0; i_component1 < num_components1; i_component1 ++ ) {
2896                 num_atoms += inp_cur_data1[i_component1].num_at;
2897             }
2898             at = (inp_ATOM *) inchi_calloc( num_atoms, sizeof(at[0]) );
2899             cur_num_at = 0;
2900             for ( i_component1 = 0; i_component1 < num_components1; i_component1 ++ ) {
2901                 /* clean and prepare */
2902                 if ( !inp_cur_data1[i_component1].num_at )
2903                     continue; /* removed derivatizing object */
2904                 /*UnMarkOneComponent( inp_cur_data1[i_component1].at, inp_cur_data1[i_component1].num_at );*/
2905                 /* merge one by one */
2906                 cur_num_at = add_inp_ATOM( at, num_atoms, cur_num_at, inp_cur_data1[i_component1].at, inp_cur_data1[i_component1].num_at );
2907                 FreeInpAtomData( inp_cur_data1+i_component1 ); /* cleanup */
2908                 num_component_left ++;
2909             }
2910             /* replace the component */
2911             /* order of the following two statements is critically important */
2912             UnMarkDisconnectedComponents( orig_inp_data1 ); /* orig_inp_data1->at is same as inp_cur_data[i_component].at */
2913             FreeInpAtomData( inp_cur_data+i_component ); /* cleanup the original component */
2914             inp_cur_data[i_component].at = at;
2915             inp_cur_data[i_component].num_at = cur_num_at;
2916             inchi_free( inp_cur_data1 );
2917         }
2918 #endif
2919     }
2920     if ( nTotNumCuts ) {
2921         /* merge components into one */
2922         for ( i = 0, num_atoms = 0; i < num_components; i ++ ) {
2923             num_atoms += inp_cur_data[i].num_at;
2924         }
2925         at = (inp_ATOM *) inchi_calloc( num_atoms, sizeof(at[0]) );
2926         cur_num_at = 0;
2927         for ( i = 0; i < num_components; i ++ ) {
2928             /* clean and prepare */
2929             UnMarkRingSystemsInp( inp_cur_data[i].at, inp_cur_data[i].num_at );
2930             UnMarkOtherIndicators( inp_cur_data[i].at, inp_cur_data[i].num_at );
2931             UnMarkOneComponent( inp_cur_data[i].at, inp_cur_data[i].num_at );
2932             subtract_DT_from_num_H( inp_cur_data[i].num_at, inp_cur_data[i].at );
2933             /* merge one by one */
2934             cur_num_at = add_inp_ATOM( at, num_atoms, cur_num_at, inp_cur_data[i].at, inp_cur_data[i].num_at );
2935         }
2936         /* replace orig_inp_data */
2937         if ( cur_num_at == num_atoms ) {
2938             inchi_free( orig_inp_data->at );
2939             orig_inp_data->at = at;
2940             orig_inp_data->num_inp_atoms = cur_num_at;
2941             if ( orig_inp_data->szCoord ) {
2942                 inchi_free( orig_inp_data->szCoord );
2943                 orig_inp_data->szCoord = NULL;
2944             }
2945             UnMarkDisconnectedComponents( orig_inp_data );
2946         } else {
2947             if ( at ) {
2948                 inchi_free( at );
2949                 at = NULL;
2950             }
2951             ret = -999; /* num atoms mismatch */
2952         }
2953     }
2954 exit_function:
2955     if ( da ) {
2956         inchi_free( da );
2957         da = NULL;
2958     }
2959     for ( i_component = 0; i_component < num_components; i_component ++ ) {
2960         FreeInpAtomData( inp_cur_data+i_component );
2961     }
2962     inchi_free( inp_cur_data );
2963     inp_cur_data = NULL;
2964 
2965     return ret? ret : nTotNumCuts;
2966 }
2967 
2968 #endif /* UNDERIVATIZE */
2969 /********************************************************************/
2970 #if ( RING2CHAIN == 1 )
2971 /*
2972     type=1  (incl sugars: W=O, A=C(sat), Z=C(sat), Y=O, B=C(sat)-OH
2973 
2974      A---W               A---WH
2975     /    |              /
2976    |     |        ---> |
2977     \    |              \
2978      B---Z---YH          B---Z===Y
2979          |                   |
2980          |                   |
2981          C(opt)              C(opt)
2982 
2983     type=2 [not implemented]
2984 
2985      R---W               R---WH
2986     /     \             /
2987    |       Z      ---> |        Z
2988     \     /             \     //
2989      R---YH              R---Y
2990 
2991 */
2992 #define R2C_EMPTY  127
2993 typedef struct tagRing2Chain {  /* atom Z */
2994     char type; /* 1 => sugar-like */
2995     char ordW; /* ordering number of W-neighbor; bond to break; H to add */
2996     char ordY; /* ordering number of YH-neighbor; bond to increment; H to remove */
2997     char ordC; /* atom B = C(sat) */
2998     char ordCopt; /* if exists, saturated C connected by a chain-bond to Z */
2999 } R2C_AT;
3000 
3001 int detect_r2c_Zatom( inp_ATOM *at, R2C_AT *da, int iZ );
3002 int cut_ring_to_chain( inp_ATOM *at, R2C_AT *da, int iZ );
3003 
3004 /********************************************************************/
detect_r2c_Zatom(inp_ATOM * at,R2C_AT * da,int iZ)3005 int detect_r2c_Zatom( inp_ATOM *at, R2C_AT *da, int iZ )
3006 {
3007     int i, j, neigh, neighneigh, nRingSystem, num_found;
3008     R2C_AT da1;
3009     if ( at[iZ].valence > 4 )
3010         return 0;
3011     if ( at[iZ].valence != at[iZ].chem_bonds_valence )
3012         return 0; /* approach limitation: no double bonds */
3013 
3014     if ( at[iZ].el_number != el_number_C )
3015         return 0; /* sugar-specific */
3016 
3017     if ( at[iZ].nNumAtInRingSystem < 5 )
3018         return 0; /* not in a suitable ring */
3019 
3020     if ( !at[iZ].bCutVertex )
3021         return 0;  /* recognize only type 1 for now */
3022 
3023     nRingSystem = at[iZ].nRingSystem;
3024     memset ( &da1, R2C_EMPTY, sizeof(da1) );
3025 
3026     for ( i = 0, num_found = 0; i < at[iZ].valence; i ++ ) {
3027         neigh = at[iZ].neighbor[i];
3028         if ( at[neigh].charge || at[neigh].radical )
3029             return 0;
3030         if ( at[neigh].el_number == el_number_O &&
3031              at[neigh].valence   == 1 &&
3032              at[neigh].chem_bonds_valence == 1 &&
3033              at[neigh].num_H     == 1 ) {
3034             /* found Z-OH, i.e. Z-YH */
3035             if ( da1.ordY == R2C_EMPTY ) {
3036                 da1.ordY = i;
3037                 num_found ++;
3038                 continue;
3039             } else {
3040                 return 0;
3041             }
3042         }
3043         if ( at[neigh].el_number == el_number_O &&
3044              at[neigh].valence   == 2 &&
3045              at[neigh].chem_bonds_valence == 2 &&
3046              at[neigh].num_H     == 0 &&
3047              at[neigh].nRingSystem == nRingSystem ) {
3048             /* found Z-O-, i.e. Z-W- */
3049             if ( da1.ordW == R2C_EMPTY ) {
3050                  /* j = index of the oppozite to at[iZ] neighbor of at[neigh] */
3051                 j = (at[neigh].neighbor[0] == iZ);
3052                 neighneigh = at[neigh].neighbor[j];
3053                 if ( at[neighneigh].valence != at[neighneigh].chem_bonds_valence ||
3054                      at[neighneigh].el_number != el_number_C )
3055                     return 0; /* sugar-specific */
3056                 da1.ordW = i;
3057                 num_found ++;
3058                 continue;
3059             } else {
3060                 return 0;
3061             }
3062         }
3063         if ( at[neigh].el_number == el_number_C &&
3064              at[neigh].valence   > 2 &&
3065              at[neigh].chem_bonds_valence == at[neigh].valence &&
3066              at[neigh].num_H     <= 1 &&
3067              at[neigh].nRingSystem == nRingSystem ) {
3068             /* sugar-specfic: carbon in the ring should have -OH neighbor */
3069             int iOH;
3070             for ( j = 0; j < at[neigh].valence; j ++ ) {
3071                 iOH = at[neigh].neighbor[j];
3072                 if ( at[iOH].el_number == el_number_O &&
3073                      at[iOH].valence == 1 &&
3074                      at[iOH].chem_bonds_valence == 1 &&
3075                      at[iOH].num_H == 1 &&
3076                      !at[iOH].charge && !at[iOH].radical ) {
3077                     if ( da1.ordC == R2C_EMPTY ) {
3078                         da1.ordC = i;
3079                         num_found ++;
3080                         break;
3081                     } else {
3082                         return 0;
3083                     }
3084                 }
3085             }
3086             if ( j < at[neigh].valence )
3087                 continue;
3088         }
3089         if ( at[neigh].el_number == el_number_C &&
3090              at[neigh].chem_bonds_valence == at[neigh].valence &&
3091              at[neigh].nRingSystem != nRingSystem ) {
3092             /* extra carbon neighbor of Z */
3093             if ( da1.ordCopt == R2C_EMPTY ) {
3094                 da1.ordCopt = i;
3095                 continue;
3096             }
3097         }
3098         return 0; /* unexpectd neighbor */
3099     }
3100     if (num_found == 3) {
3101         da1.type = 1;
3102         da[iZ] = da1;
3103         return 1; /* disconnection found */
3104     }
3105     return 0;
3106 }
3107 /********************************************************************/
cut_ring_to_chain(inp_ATOM * at,R2C_AT * da,int iZ)3108 int cut_ring_to_chain( inp_ATOM *at, R2C_AT *da, int iZ )
3109 {
3110     int ret = -1; /* error flag */
3111     int iordW = (int)da[iZ].ordW; /* ord of the bond in iZ */
3112     int iordY = (int)da[iZ].ordY; /* ord of the bond in iZ */
3113     int iordC = (int)da[iZ].ordC;
3114     int iW, iY, num_iso_H, i, jordZ;
3115     AT_NUMB *p;
3116 
3117     if ( da[iZ].type != 1 ) {
3118         return 0;
3119     }
3120     if ( 0 > iordW || iordW >= at[iZ].valence ||
3121          0 > iordY || iordY >= at[iZ].valence ||
3122          0 > iordC || iordC >= at[iZ].valence /* suger-specific*/) {
3123         return -1; /* program error */
3124     }
3125     /* find other da[] that affect at[iZ] */
3126     iW  = at[iZ].neighbor[iordW];  /* opposite atom to disconnect and add H */
3127     iY  = at[iZ].neighbor[iordY];  /* opposite atom to increment the bond and remove H*/
3128     if ( !at[iY].num_H || at[iZ].bond_type[iordY] != BOND_TYPE_SINGLE ) {
3129         return -2; /* program error */
3130     }
3131     /* increment at[iZ]--at[iY] bond */
3132     p = is_in_the_list( at[iY].neighbor, (AT_NUMB) iZ, at[iY].valence );
3133     if ( !p ) {
3134         return -3; /* program error */
3135     }
3136     jordZ = p - at[iY].neighbor;
3137     at[iZ].bond_type[iordY] ++;
3138     at[iZ].chem_bonds_valence ++;
3139     at[iY].bond_type[jordZ] ++;
3140     at[iY].chem_bonds_valence ++;
3141 
3142     /* disconnect at[iZ]--at[iW] bond */
3143     ret = DisconnectInpAtBond( at, NULL, iZ, iordW );
3144     if ( ret != 1 ) {
3145         return -4; /* program error */
3146     }
3147     /* disconnection succeeded */
3148     /* transfer H from at[iY] to at[iW] */
3149     num_iso_H = NUM_ISO_H(at, iY);
3150     if ( at[iY].num_H == num_iso_H ) {
3151         for ( i = 0; i < NUM_H_ISOTOPES; i ++ ) {
3152             if ( at[iY].num_iso_H[i] ) {
3153                 at[iY].num_iso_H[i] --;
3154                 at[iW].num_iso_H[i] ++;
3155             }
3156         }
3157     }
3158     at[iY].num_H --;
3159     at[iW].num_H ++;
3160     return 1;
3161 }
3162 /********************************************************************/
Ring2Chain(ORIG_ATOM_DATA * orig_inp_data)3163 int Ring2Chain( ORIG_ATOM_DATA *orig_inp_data )
3164 {
3165     int ret = 0, i, j, n, num_atoms, num_components, nFound, num, num_cuts, iZ, cur_num_at;
3166     inp_ATOM *at = orig_inp_data->at;
3167     INP_ATOM_DATA *inp_cur_data = NULL;
3168     R2C_AT        *da           = NULL;
3169 
3170     set_R2C_el_numbers( );
3171     /* prepare */
3172     num_atoms = remove_terminal_HDT( orig_inp_data->num_inp_atoms, at, 1 );
3173                 /*^^^^^ always accomodate accomodate FIX_TERM_H_CHRG_BUG - IPl, July 2008*/
3174     orig_inp_data->num_inp_atoms = num_atoms;
3175 
3176     /* initialize */
3177     UnMarkDisconnectedComponents( orig_inp_data );
3178     num_cuts = 0;
3179     /* mark */
3180     num_components = MarkDisconnectedComponents( orig_inp_data, 0 );
3181     inp_cur_data = (INP_ATOM_DATA *)inchi_calloc( num_components, sizeof(inp_cur_data[0]) );
3182     iZ = -1;
3183     for ( j = 0; j < num_components; j ++ ) {
3184         CreateInpAtomData( inp_cur_data+j, orig_inp_data->nCurAtLen[j], 0 );
3185         inp_cur_data[j].num_at = ExtractConnectedComponent( orig_inp_data->at, orig_inp_data->num_inp_atoms, j+1, inp_cur_data[j].at );
3186         /*  error processing */
3187         if ( inp_cur_data[j].num_at <= 0 || orig_inp_data->nCurAtLen[j] != inp_cur_data[j].num_at ) {
3188             ret = -(j+1); /* severe error */
3189             goto exit_function;
3190         }
3191         /* initialize */
3192         num_atoms = inp_cur_data[j].num_at;
3193         at        = inp_cur_data[j].at;
3194         add_DT_to_num_H( num_atoms, at );
3195 
3196         UnMarkRingSystemsInp( at, num_atoms );
3197         UnMarkOtherIndicators( at, num_atoms );
3198         UnMarkOneComponent( at, num_atoms );
3199         MarkRingSystemsInp( at, num_atoms, 0 );
3200         ret = mark_arom_bonds( at, num_atoms );
3201         if ( ret < 0 ) {
3202             goto exit_function;
3203         }
3204         ret = 0;
3205         if ( da ) inchi_free( da );
3206         da = (R2C_AT *)inchi_calloc( num_atoms, sizeof(da[0]) );
3207 
3208         /* detect ring-to-chain possibilities */
3209         nFound = 0;
3210         for ( i = 0, num=0; i < num_atoms; i ++ ) {
3211             if ( at[i].bCutVertex /* type 1 specific*/ && !da[i].type ) {
3212                 num += (n=detect_r2c_Zatom( at, da, i ));
3213                 if ( n == 1 )
3214                     iZ = i;
3215                 UnMarkOtherIndicators( at, num_atoms );
3216             }
3217         }
3218 
3219         if ( num == 1 ) {
3220             /* convert ring to chain: make single cut */
3221             ret = cut_ring_to_chain( at, da, iZ );
3222             if ( ret < 0 ) {
3223                 goto exit_function;
3224             }
3225             num_cuts += (ret == 1);
3226         } else
3227         if ( num ) {
3228             /* allocate an array of bonds to be cut */
3229             R2C_ATPAIR *ap = (R2C_ATPAIR *) inchi_malloc( sizeof(ap[0]) * num );
3230             AT_NUMB    comp_num = 0;
3231             if ( !ap ) {
3232                 ret = -1; /* malloc failure */
3233                 goto exit_function;
3234             }
3235             /* fill out the array of bonds to be cut */
3236             for ( i = j = 0; i < num_atoms; i ++ ) {
3237                 if ( da[i].type == 1 ) {
3238                     AT_NUMB at1 = i;
3239                     AT_NUMB at2 = at[i].neighbor[(int)da[i].ordW];
3240                     if ( j >= num ) {
3241                         ret = -2;
3242                         goto exit_r2c_num; /* wrong number of cuts = num */
3243                     }
3244                     n = ( at1 > at2 );
3245                     ap[j].at[n] = at1;
3246                     ap[j].at[1-n] = at2; /* ap[j].at[0] < ap[j].at[1] */
3247                     j ++;
3248                 }
3249             }
3250             if ( j != num ) {
3251                 ret = -3;
3252                 goto exit_r2c_num; /* wrong number of cuts = num */
3253             }
3254             /* sort the bonds for subsequent searching by bisections */
3255             qsort( ap, num, sizeof(ap[0]), cmp_r2c_atpair);
3256             /* mark components to be disconnected */
3257             for ( i = 0; i < num; i ++ ) {
3258                 for ( j = 0; j < 2; j ++ ) {
3259                     if ( !at[ap[i].at[j]].at_type ) {
3260                         comp_num ++;
3261                         mark_atoms_ap( at, (int)ap[i].at[j], ap, num, 0, comp_num );
3262                     }
3263                 }
3264             }
3265             /* convert ring to chain */
3266             for ( i = 0; i < num; i ++ ) {
3267                 int i1 = ap[i].at[0];
3268                 int i2 = ap[i].at[1];
3269                 iZ = -1;
3270                 if ( at[i1].at_type == at[i2].at_type ) {
3271                     /* by definition, there are no adjacent iZ atoms; one iZ atom per bond */
3272                     if ( da[i1].type == 1 && at[i1].neighbor[(int)da[i1].ordW] == i2 ) {
3273                         iZ = i1;
3274                     } else
3275                     if ( da[i2].type == 1 && at[i2].neighbor[(int)da[i2].ordW] == i1 ) {
3276                         iZ = i2;
3277                     } else {
3278                         ret = -3;
3279                         goto exit_r2c_num;
3280                     }
3281                     ret = cut_ring_to_chain( at, da, iZ );
3282                     if ( ret < 0 ) {
3283                         goto exit_r2c_num;
3284                     }
3285                     num_cuts += (ret == 1);
3286                 }
3287             }
3288             ret = 0;
3289 exit_r2c_num:
3290             inchi_free( ap );
3291             UnMarkOtherIndicators( at, num_atoms );
3292             if ( ret < 0 ) {
3293                 goto exit_function;
3294             }
3295         }
3296     }
3297     if ( num_cuts ) {
3298         /* merge components into one */
3299         for ( i = 0, num_atoms = 0; i < num_components; i ++ ) {
3300             num_atoms += inp_cur_data[i].num_at;
3301         }
3302         at = (inp_ATOM *) inchi_calloc( num_atoms, sizeof(at[0]) );
3303         cur_num_at = 0;
3304         for ( i = 0; i < num_components; i ++ ) {
3305             /* clean and prepare */
3306             UnMarkRingSystemsInp( inp_cur_data[i].at, inp_cur_data[i].num_at );
3307             UnMarkOtherIndicators( inp_cur_data[i].at, inp_cur_data[i].num_at );
3308             UnMarkOneComponent( inp_cur_data[i].at, inp_cur_data[i].num_at );
3309             subtract_DT_from_num_H( inp_cur_data[i].num_at, inp_cur_data[i].at );
3310             /* merge one by one */
3311             cur_num_at = add_inp_ATOM( at, num_atoms, cur_num_at, inp_cur_data[i].at, inp_cur_data[i].num_at );
3312         }
3313         /* replace orig_inp_data */
3314         if ( cur_num_at == num_atoms ) {
3315             inchi_free( orig_inp_data->at );
3316             orig_inp_data->at = at;
3317             orig_inp_data->num_inp_atoms = cur_num_at;
3318             if ( orig_inp_data->szCoord ) {
3319                 inchi_free( orig_inp_data->szCoord );
3320                 orig_inp_data->szCoord = NULL;
3321             }
3322             UnMarkDisconnectedComponents( orig_inp_data );
3323         } else {
3324             if ( at ) {
3325                 inchi_free( at );
3326                 at = NULL;
3327             }
3328             ret = -999; /* num atoms mismatch */
3329         }
3330     }
3331 exit_function:
3332     if ( da ) {
3333         inchi_free( da );
3334         da = NULL;
3335     }
3336     for ( j = 0; j < num_components; j ++ ) {
3337         FreeInpAtomData( inp_cur_data+j );
3338     }
3339     inchi_free( inp_cur_data );
3340     inp_cur_data = NULL;
3341 
3342     return ret? ret : num_cuts;
3343 }
3344 #endif /* RING2CHAIN */
3345 
3346