1 /*
2 * International Chemical Identifier (InChI)
3 * Version 1
4 * Software version 1.04
5 * September 9, 2011
6 *
7 * The InChI library and programs are free software developed under the
8 * auspices of the International Union of Pure and Applied Chemistry (IUPAC).
9 * Originally developed at NIST. Modifications and additions by IUPAC
10 * and the InChI Trust.
11 *
12 * IUPAC/InChI-Trust Licence for the International Chemical Identifier (InChI)
13 * Software version 1.0.
14 * Copyright (C) IUPAC and InChI Trust Limited
15 *
16 * This library is free software; you can redistribute it and/or modify it under the
17 * terms of the IUPAC/InChI Trust Licence for the International Chemical Identifier
18 * (InChI) Software version 1.0; either version 1.0 of the License, or
19 * (at your option) any later version.
20 *
21 * This library is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
24 * See the IUPAC/InChI Trust Licence for the International Chemical Identifier (InChI)
25 * Software version 1.0 for more details.
26 *
27 * You should have received a copy of the IUPAC/InChI Trust Licence for the
28 * International Chemical Identifier (InChI) Software version 1.0 along with
29 * this library; if not, write to:
30 *
31 * The InChI Trust
32 * c/o FIZ CHEMIE Berlin
33 * Franklinstrasse 11
34 * 10587 Berlin
35 * GERMANY
36 *
37 */
38
39
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43
44 #include "mode.h"
45
46 #include "inpdef.h"
47 #include "extr_ct.h"
48 #include "ichitaut.h"
49 #include "ichinorm.h"
50 #include "ichierr.h"
51 #include "util.h"
52
53 #include "ichicomp.h"
54
55 #ifndef COMPILE_ALL_CPP
56 #ifdef __cplusplus
57 extern "C" {
58 #endif
59 #endif
60
61 /* defined in ichisort.c, prototype in ichicomn.h */
62 int insertions_sort_AT_RANK( AT_RANK *base, int num );
63
64 #ifndef COMPILE_ALL_CPP
65 #ifdef __cplusplus
66 }
67 #endif
68 #endif
69
70 /* local prototypes */
71 int cmp_iso_atw_diff_component_no( const void *a1, const void *a2 );
72
73 /********************************************************************************/
cmp_iso_atw_diff_component_no(const void * a1,const void * a2)74 int cmp_iso_atw_diff_component_no( const void *a1, const void *a2 )
75 {
76 int ret = (int)((const inp_ATOM*)a1)->iso_atw_diff - (int)((const inp_ATOM*)a2)->iso_atw_diff;
77 if ( !ret ) /* make the sort stable */
78 ret = (int)((const inp_ATOM*)a1)->component - (int)((const inp_ATOM*)a2)->component;
79 return ret;
80 }
81 typedef struct tagTreeAtom {
82 AT_NUMB neighbor[MAXVAL]; /* positions (from 0) of the neighbors in the inp_ATOM array */
83 S_CHAR valence; /* number of bonds = number of neighbors */
84 AT_NUMB nRingSystem;
85 AT_NUMB nBlockSystem;
86 S_CHAR bCutVertex;
87 } tre_ATOM;
88
89 #if ( FIND_RING_SYSTEMS == 1 ) /* { */
90 /********************************************************************************/
MarkRingSystemsInp(inp_ATOM * at,int num_atoms,int start)91 int MarkRingSystemsInp( inp_ATOM *at, int num_atoms, int start )
92 {
93 AT_NUMB *nStackAtom = NULL;
94 int nTopStackAtom=-1;
95 AT_NUMB *nRingStack = NULL;
96 int nTopRingStack=-1; /* was AT_NUMB */
97 AT_NUMB *nDfsNumber = NULL;
98 AT_NUMB *nLowNumber = NULL;
99 S_CHAR *cNeighNumb = NULL;
100 AT_NUMB nDfs;
101 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
102 AT_NUMB nRs, *nRsConnect = NULL;
103 int k;
104 AT_NUMB *tree = NULL;
105 int nNumConnect, nMaxNumConnect, nLenConnect;
106 #endif
107 AT_NUMB nNumAtInRingSystem;
108 int i, j, u, /*start,*/ nNumRingSystems, nNumStartChildren;
109
110 /* allocate arrays */
111 nStackAtom = (AT_NUMB *) inchi_malloc(num_atoms*sizeof(nStackAtom[0]));
112 nRingStack = (AT_NUMB *) inchi_malloc(num_atoms*sizeof(nRingStack[0]));
113 nDfsNumber = (AT_NUMB *) inchi_malloc(num_atoms*sizeof(nDfsNumber[0]));
114 nLowNumber = (AT_NUMB *) inchi_malloc(num_atoms*sizeof(nLowNumber[0]));
115 cNeighNumb = (S_CHAR *) inchi_malloc(num_atoms*sizeof(cNeighNumb[0]));
116 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
117 nRsConnect = (AT_NUMB *)inchi_calloc(3*num_atoms+3,sizeof(nRsConnect[0]));
118 #endif
119 /* check allocation */
120 if ( !nStackAtom || !nRingStack || !nDfsNumber || !nLowNumber || !cNeighNumb
121 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
122 || !nRsConnect
123 #endif
124 ) {
125 nNumRingSystems = CT_OUT_OF_RAM; /* program error */ /* <BRKPT> */
126 goto exit_function;
127 }
128
129 /********************************************
130 *
131 * Find Cut-vertices & Blocks
132 *
133 ********************************************/
134
135 /* initiation */
136 /*start = 0;*/
137 nNumRingSystems = 0;
138 u = start; /* start atom */
139 nDfs = 0;
140 nTopStackAtom =-1;
141 nTopRingStack =-1;
142 memset( nDfsNumber, 0, num_atoms*sizeof(nDfsNumber[0]));
143 memset( cNeighNumb, 0, num_atoms*sizeof(cNeighNumb[0]));
144 /* push the start atom on the stack */
145 nLowNumber[u] = nDfsNumber[u] = ++nDfs;
146 nStackAtom[++nTopStackAtom] = (AT_NUMB)u;
147 nRingStack[++nTopRingStack] = (AT_NUMB)u;
148
149 nNumStartChildren = 0;
150
151 do {
152 /* advance */
153 advance_block:
154 /*if ( (int)at[i=nStackAtom[nTopStackAtom]].valence > (j = (int)cNeighNumb[i]) )*/
155 /* replaced due to missing sequence point */
156 if ( i=(int)nStackAtom[nTopStackAtom], j = (int)cNeighNumb[i], (int)at[i].valence > j )
157 {
158 cNeighNumb[i] ++;
159 u = (int)at[i].neighbor[j];
160 if ( !nDfsNumber[u] ) {
161 /* tree edge, 1st visit -- advance */
162 nStackAtom[++nTopStackAtom] = (AT_NUMB)u;
163 nRingStack[++nTopRingStack] = (AT_NUMB)u;
164 nLowNumber[u] = nDfsNumber[u] = ++nDfs;
165 nNumStartChildren += (i == start);
166 } else
167 if ( !nTopStackAtom || u != (int)nStackAtom[nTopStackAtom-1] ) { /* may comment out ? */
168 /* back edge: u is not a predecessor of i */
169 if ( nDfsNumber[u] < nDfsNumber[i] ) {
170 /* Back edge, 1st visit: u is an ancestor of i. Compare */
171 if ( nLowNumber[i] > nDfsNumber[u] ) {
172 nLowNumber[i] = nDfsNumber[u];
173 }
174 }
175 } /* may comment out ? */
176 goto advance_block;
177 } else {
178 cNeighNumb[i] = 0;
179 }
180
181 /* back up */
182 if ( i != start ) {
183 u = (int)nStackAtom[nTopStackAtom-1]; /* predecessor of i */
184 if ( nLowNumber[i] >= nDfsNumber[u] ) {
185 /* output the block */
186 nNumRingSystems ++;
187 at[u].nBlockSystem = nNumRingSystems;
188 if ( u != start || nNumStartChildren > 1 ) {
189 at[u].bCutVertex += 1;
190 }
191 while ( nTopRingStack >= 0 ) {
192 j = nRingStack[nTopRingStack--];
193 at[j].nBlockSystem = nNumRingSystems; /* mark the atom */
194 if ( i == j ) {
195 break;
196 }
197 }
198 } else
199 if ( nLowNumber[u] > nLowNumber[i] ) {
200 /* inherit */
201 nLowNumber[u] = nLowNumber[i];
202 }
203 }
204 } while ( --nTopStackAtom >= 0 );
205
206
207 /********************************************
208 *
209 * Find Ring Systems
210 * Including chain atoms X: A-X-B, where the bonds (of any kind) are bridges.
211 *
212 ********************************************/
213
214 /* initiation */
215 /*start = 0;*/
216 nNumRingSystems = 0;
217 u = start; /* start atom */
218 nDfs = 0;
219 nTopStackAtom =-1;
220 nTopRingStack =-1;
221 memset( nDfsNumber, 0, num_atoms*sizeof(nDfsNumber[0]));
222 memset( cNeighNumb, 0, num_atoms*sizeof(cNeighNumb[0]));
223 /* push the start atom on the stack */
224 nLowNumber[u] = nDfsNumber[u] = ++nDfs;
225 nStackAtom[++nTopStackAtom] = (AT_NUMB)u;
226 nRingStack[++nTopRingStack] = (AT_NUMB)u;
227 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
228 nNumConnect = nLenConnect = nMaxNumConnect = 0;
229 #endif
230
231 do {
232 /* advance */
233 advance_ring:
234 /*if ( (int)at[i=nStackAtom[nTopStackAtom]].valence > (j = (int)cNeighNumb[i]) )*/
235 /* replaced due to missing sequence point */
236 if ( i=(int)nStackAtom[nTopStackAtom], j = (int)cNeighNumb[i], (int)at[i].valence > j )
237 {
238 cNeighNumb[i] ++;
239 u = (int)at[i].neighbor[j];
240 if ( !nDfsNumber[u] ) {
241 /* tree edge, 1st visit -- advance */
242 nStackAtom[++nTopStackAtom] = (AT_NUMB)u;
243 nRingStack[++nTopRingStack] = (AT_NUMB)u;
244 nLowNumber[u] = nDfsNumber[u] = ++nDfs;
245 } else
246 if ( !nTopStackAtom || u != (int)nStackAtom[nTopStackAtom-1] ) {
247 /* back edge: u is not a predecessor of i */
248 if ( nDfsNumber[u] < nDfsNumber[i] ) {
249 /* Back edge, 1st visit: u is ancestor of i. Compare */
250 if ( nLowNumber[i] > nDfsNumber[u] ) {
251 nLowNumber[i] = nDfsNumber[u];
252 }
253 }
254 }
255 goto advance_ring;
256 } else {
257 cNeighNumb[i] = 0;
258 }
259
260 /* back up */
261 if ( nDfsNumber[i] == nLowNumber[i] ) {
262 /* found a ring system */
263 nNumRingSystems ++;
264 /* unwind nRingStack[] down to i */
265 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
266 nNumConnect = 2;
267 /* data structure: for each ring system nRsConnect[] contains:
268 * 1) nNumConnect+1 = (number of already discovered neighboring "ring systems" + 1)+1
269 * 2) nNumAtInRingSystem
270 * 3) (nNumConnect-1) numbers (IDs) of neighboring ring systems.
271 * BFS guarantees that each neighboring ring system is encountered only one time
272 * Number of all neighboring ring systems = (nNumConnect-1)+1 = nNumConnect
273 * (One additional ring system is where the BFS retracts from the vertex #i,
274 * except when i=DFS root node. In the latter case there is/are only (nNumConnect-1)
275 * neighboring ring system(s).
276 */
277 #endif
278 /* count atoms in a ring system */
279 for ( nNumAtInRingSystem = 0, j = nTopRingStack; 0 <= j; j -- ) {
280 nNumAtInRingSystem ++;
281 if ( i == (int)nRingStack[j] ) {
282 break;
283 }
284 }
285 while ( nTopRingStack >= 0 ) {
286 j = (int)nRingStack[nTopRingStack--];
287 at[j].nRingSystem = (AT_NUMB)nNumRingSystems; /* ring system id */
288 at[j].nNumAtInRingSystem = nNumAtInRingSystem;
289 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
290 for ( k = 0; k < at[j].valence; k ++ ) {
291 if ( (nRs = at[at[j].neighbor[k]].nRingSystem) && (int)nRs != nNumRingSystems ) {
292 nRsConnect[nLenConnect + (nNumConnect++)] = nRs; /* adjacent ring system id */
293 }
294 }
295 #endif
296 if ( i == j ) {
297 /* reached atom on the top of nStackAtom[] stack */
298 break;
299 }
300 }
301 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
302 nRsConnect[nLenConnect] = nNumConnect;
303 nRsConnect[nLenConnect+1] = nNumAtInRingSystem;
304 nLenConnect += nNumConnect;
305 if ( nMaxNumConnect < nNumConnect ) {
306 /* max number of neighboring ring systems */
307 nMaxNumConnect = nNumConnect;
308 }
309 #endif
310 } else
311 if ( nTopStackAtom > 0 ) {
312 j = (int)nStackAtom[nTopStackAtom-1];
313 /* inherit nLowNumber */
314 if ( nLowNumber[j] > nLowNumber[i] ) {
315 nLowNumber[j] = nLowNumber[i];
316 }
317 }
318 } while ( --nTopStackAtom >= 0 );
319
320 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 ) /* normally disabled */
321 nMaxNumConnect ++;
322 if ( nNumRingSystems > 1 ) {
323 int nCol = nMaxNumConnect+1;
324 int nNumInSyst= nMaxNumConnect;
325 int nMaxNeigh = nMaxNumConnect-1;
326 #define T(a,b) tree[(a)*nCol+b]
327 if ( tree = (AT_NUMB *)inchi_calloc( nCol * (nNumRingSystems+1), sizeof(tree[0])) ) {
328 int len, neigh;
329 /* reuse previous allocations */
330 AT_NUMB *nNumVisitedNeighbors = nStackAtom;
331 AT_NUMB *nDistanceFromTerminal = nRingStack;
332 AT_NUMB *nCurrActiveRingSystem = nDfsNumber;
333 AT_NUMB *nNextActiveRingSystem = nLowNumber;
334 int nNumCurrActiveRingSystems, nNumNextActiveRingSystems, pass;
335 /* build a "condensation graph (actually, a tree)" in which
336 * each vertex corresponds to a ring system T(row, col) = T(ring syst, neighbors)
337 * Number of rows = column length = max. number of ring system neighbors + 2
338 * Number of cols = row length = number of ring systems + 1
339 * Neighboring ring systems are contiguously stored in a row
340 * T(i,0) = number of neighbors, 1 <= i <= nNumRingSystems;
341 * T(i,k) = number of a neighboring ring system, 1 <= k <= T(i,0)
342 * T(i,nCol-1) = number of atoms in the system #i
343 */
344 for ( i = 1, j = 0; len=nRsConnect[j]; i ++ ) {
345 T(i, nNumInSyst) = nRsConnect[j+1];
346 for ( k = 2; k < len; k ++ ) {
347 neigh = nRsConnect[j+k];
348 if ( T(i,0) < nMaxNeigh && T(neigh,0) < nMaxNeigh ) {
349 T(i,0) ++;
350 T(neigh,0) ++;
351 T(i,T(i,0)) = neigh;
352 T(neigh,T(neigh,0)) = i;
353 } else {
354 nNumRingSystems = CT_OVERFLOW; /* program error */ /* <BRKPT> */
355 goto exit_function;
356 }
357 }
358 j += len;
359 }
360 /* clear memory */
361 memset( nNumVisitedNeighbors, 0, nNumRingSystems*sizeof(nNumVisitedNeighbors[0]) );
362 memset( nDistanceFromTerminal, 0, nNumRingSystems*sizeof(nDistanceFromTerminal[0]) );
363 memset( nCurrActiveRingSystem, 0, nNumRingSystems*sizeof(nCurrActiveRingSystem[0]) );
364 memset( nNextActiveRingSystem, 0, nNumRingSystems*sizeof(nNextActiveRingSystem[0]) );
365 nNumNextActiveRingSystems = 0;
366 for ( i = 0; i < nNumRingSystems; i ++ ) {
367 if ( 1 == T(i+1,0) ) {
368 nNextActiveRingSystem[i] = 1; /* number of traversed neighbors + 1 */
369 nDistanceFromTerminal[i] = 1;
370 nNumNextActiveRingSystems ++;
371 } else {
372 nNextActiveRingSystem[i] = 0;
373 nDistanceFromTerminal[i] = 0;
374 }
375 nNumVisitedNeighbors[i] = 0;
376 }
377
378 /* nCurrActiveRingSystem[i] = a sum of:
379 * 1) +1 if it is or was active
380 * 2) +(number of neighbors from which it was reached)
381 * 3) +1 if it was left and not active anymore
382 */
383 pass = 0;
384 do {
385 nNumCurrActiveRingSystems = nNumNextActiveRingSystems;
386 nNumNextActiveRingSystems = 0;
387 memcpy( nCurrActiveRingSystem, nNextActiveRingSystem,
388 nNumRingSystems*sizeof(nNextActiveRingSystem[0]));
389 for ( i = 0; i < nNumRingSystems; i ++ ) {
390 if ( T(i+1,0) == nCurrActiveRingSystem[i] ) {
391 /* on the previous pass currently active ring system i+1 bas been reached
392 * from all neighbors except one;
393 * the neighbors from which it was reached have
394 * T(neigh,0)+1 == nCurrActiveRingSystem[i]
395 * this ring system has not been left yet
396 */
397 for ( k = 1, len=T(i+1,0); k <= len; k ++ ) {
398 neigh = (int)T(i+1,k);
399 if ( T(neigh,0) >= nCurrActiveRingSystem[neigh-1] ) {
400 if ( 0 == pass ) {
401 nDistanceFromTerminal[i] = 1;
402 }
403 break;
404 }
405 }
406 if ( k <= len ) {
407 /* neigh was not reached from at least 2 neighbors
408 * walk along -R- chain (T(neigh,0)==2) up to
409 * 1) a terminal system, not including it or
410 * 2) a branching point.
411 *
412 * pass = 0: started from terminal systems:
413 * reach the branching point.
414 * If chain system next to a terminal system has already been reached
415 * then walk along it according to Note below
416 *
417 * pass > 0: started from branching points
418 * 2a) If the branching point has not been reached from 2 or more neighbors,
419 * then include it
420 * 2b) If the branching point has not been reached from 1 neighbor only,
421 * then do not include it: it will be a starting point later
422 * Note: if a chain atom already has nDistanceFromTerminal[i] > 0, then
423 * the last atom should be the one such that
424 * its nDistanceFromTerminal[]+1>= nDistanceFromTerminal[] of the
425 * next in the chain
426 */
427 int bOk = 0;
428 k = i+1; /* starting point */
429 if ( 0 == pass && T(k,nNumInSyst) > 1 ) {
430 nNumNextActiveRingSystems ++; /* request next pass */
431 continue; /* stop a the terminal ring system */
432 }
433 while( 2 == T(neigh,0) ) {
434 /* walk along a chain */
435 if ( !nNextActiveRingSystem[neigh-1] ) {
436 nNextActiveRingSystem[neigh-1] = 1; /* make neighbor active */
437 } else
438 if ( nDistanceFromTerminal[k-1]+1 <= nDistanceFromTerminal[neigh-1] ) {
439 /* walking along the chain; already have had a walk */
440 /* in the opposite direction at this pass */
441 } else {
442 /* k is the last; neigh (it is a bridge -X-) has not been reached */
443 bOk = 1;
444 break;
445 }
446 nNextActiveRingSystem[k-1] ++; /* leave system k */
447 if ( nNextActiveRingSystem[neigh-1] < T(neigh,0) ) {
448 nNextActiveRingSystem[neigh-1] ++; /* add one connection to neigh */
449 }
450 nDistanceFromTerminal[neigh-1] = nDistanceFromTerminal[k-1]+1;
451 j = (T(neigh,1)==k)? 2:1;
452 k = neigh;
453 neigh = T(k,j); /* next in the chain */
454 nNumNextActiveRingSystems ++;
455 if ( T(k,nNumInSyst) > 1 ) {
456 bOk = 1;
457 break; /* stop on a ring system */
458 }
459 }
460 /* neigh is a terminal or a bridge or a branching point */
461 if ( 2 > T(neigh,0) ) {
462 /* neighbor is a terminal atom */
463 if ( 1 < pass ) {
464 nNumRingSystems = CT_UNKNOWN_ERR; /* error (debug only) */ /* <BRKPT> */
465 goto exit_function;
466 }
467 continue;
468 }
469 if ( 2 == T(neigh,0) ) {
470 /* neighbor is a bridge */
471 continue;
472 }
473 /* neighbor is a branching point */
474 if ( T(neigh,0) > nCurrActiveRingSystem[neigh-1] ) {
475 /* move to the neigh (make neigh active): on previous pass it */
476 /* has not been reached from 2 or more neighbors */
477 if ( !nNextActiveRingSystem[neigh-1] ) {
478 nNextActiveRingSystem[neigh-1] = 1;
479 }
480 if ( nDistanceFromTerminal[neigh-1] < nDistanceFromTerminal[k-1]+1 ) {
481 nDistanceFromTerminal[neigh-1] = nDistanceFromTerminal[k-1]+1;
482 }
483 nNextActiveRingSystem[k-1] ++; /* leave system k */
484 if ( nNextActiveRingSystem[neigh-1] < T(neigh,0) ) {
485 nNextActiveRingSystem[neigh-1] ++; /* add one connection to neigh */
486 }
487 nNumNextActiveRingSystems ++;
488 }
489 }
490 }
491 }
492 pass ++;
493 } while ( nNumNextActiveRingSystems );
494
495 for ( i = 0; i < num_atoms; i ++ ) {
496 at[i].nDistanceFromTerminal = nDistanceFromTerminal[(int)at[i].nRingSystem-1];
497 }
498
499 inchi_free( tree );
500 tree = NULL;
501 #undef T
502 } else {
503 nNumRingSystems = CT_OUT_OF_RAM; /* error */ /* <BRKPT> */
504 goto exit_function;
505 }
506 }
507 #endif
508
509
510 exit_function:
511 if ( nStackAtom )
512 inchi_free( nStackAtom );
513 if ( nRingStack )
514 inchi_free( nRingStack );
515 if ( nDfsNumber )
516 inchi_free( nDfsNumber );
517 if ( nLowNumber )
518 inchi_free( nLowNumber );
519 if ( cNeighNumb )
520 inchi_free( cNeighNumb );
521 #if ( FIND_RINS_SYSTEMS_DISTANCES == 1 )
522 if ( nRsConnect )
523 inchi_free( nRsConnect );
524 if ( tree )
525 inchi_free( tree );
526 #endif
527 return nNumRingSystems;
528 }
529
530
531 #endif /* } FIND_RING_SYSTEMS */
532
533 /********************************************************************************/
534 /* Return value: new number of atoms > 0 or -1=out of RAM */
remove_terminal_HDT(int num_atoms,inp_ATOM * at,int bFixTermHChrg)535 int remove_terminal_HDT( int num_atoms, inp_ATOM *at, int bFixTermHChrg )
536 {
537 AT_NUMB *new_ord;
538 inp_ATOM *new_at;
539 char *p;
540 const static char szHDT[]="HDT";
541 const static int kMax = sizeof(szHDT); /* = 4 */
542 int ret = -1;
543 int num_hydrogens=0, num_H = 0; /* number of terminal H, D, T */
544 int i, j, k, n, m;
545 int val;
546 AT_RANK new_HydrogenAt_order[NUM_H_ISOTOPES+1];
547 AT_RANK new_OtherNeigh_order[MAXVAL];
548 S_CHAR old_trans[MAX_NUM_STEREO_BONDS];
549
550 int num_OtherNeigh, num_HydrogenAt;
551
552 new_ord=(AT_NUMB *)inchi_calloc(num_atoms, sizeof(new_ord[0])); /* changed malloc to calloc 9-11-2003 */
553 new_at =(inp_ATOM *) inchi_malloc(sizeof(new_at[0]) *num_atoms);
554 if (!new_ord || !new_at)
555 goto exit_function;
556
557 /* move H. D, T to the end of the list of atoms */
558 for ( i = 0; i < num_atoms; i ++ )
559 {
560 at[i].component = i; /* temporarily save original numbering */
561 /* get k = temp. hydrogen isotope/non-hydrogen atom type: */
562 /* k=0:H, k=2:D, k=3:T, k=4=kMax: not a hydrogen */
563 k = at[i].elname[1]? kMax : (p=(char*)strchr(szHDT, at[i].elname[0]))? p-szHDT : kMax;
564 /* set hydrogen isotope atw differences */
565 /* Notes: k-value of isotopic H is incremented to correct iso_atw_diff value later. */
566 /* 1H isotope cannot be detected here. */
567 if ( k == ATW_H || k == ATW_H+1 )
568 {
569 /* D or T, k = 1 or 2 */
570 at[i].elname[0] = 'H'; /* hydrogen isotope */
571 at[i].iso_atw_diff = ++k; /* increment k to make k = iso_atw_diff ( 2 for D, 3 for T ) */
572 }
573 num_H += (k != kMax && at[i].valence == 1 && at[i].chem_bonds_valence == 1 && !NUMH(at,i) );
574 }
575
576 /* special case: HD, HT, DT, HH: the only non-isotopic H or
577 * the lightest isotopic H out of two is removed
578 * to become implicit (make the heavier H the "central atom").
579 * Note: This must be consistent with mol_to_atom()
580 * treatment of isotopic Hn aliases.
581 */
582 if ( 2 == num_H && 2 == num_atoms && !NUMH(at,0) && !NUMH(at,1) )
583 {
584
585 if ( at[0].iso_atw_diff >= at[1].iso_atw_diff ) {
586 new_ord[0] = 0;
587 new_ord[1] = 1;
588 } else {
589 new_ord[0] = 1;
590 new_ord[1] = 0;
591 }
592 if ( at[new_ord[1]].charge ) {
593 at[new_ord[0]].charge += at[new_ord[1]].charge;
594 at[new_ord[1]].charge = 0;
595 }
596 new_at[new_ord[0]] = at[0];
597 new_at[new_ord[1]] = at[1];
598 num_hydrogens = 1;
599
600 }
601 else
602 {
603 /* general case except H-H */
604 for ( i = 0; i < num_atoms; i ++ )
605 {
606 k = (at[i].elname[1] || NUMH(at,i))? kMax : (at[i].elname[0]=='H')? at[i].iso_atw_diff : kMax;
607 if ( k < kMax && at[i].valence == 1 && at[i].chem_bonds_valence == 1 &&
608 /* the order of comparison is important */
609 ((n=(int)at[i].neighbor[0]) > i /* at[n] has not been encountered yet*/ ||
610 (int)new_ord[n] < num_atoms - num_hydrogens) /* at[n] might have been encountered; it has not been moved */ )
611 {
612 /* found an explicit terminal hydrogen */
613 num_hydrogens ++;
614 if ( k==0 && ATW_H <= at[i].iso_atw_diff && at[i].iso_atw_diff < ATW_H+NUM_H_ISOTOPES )
615 {
616 k = at[i].iso_atw_diff; /* H isotope has already been marked above or elsewhere */
617 }
618 if ( at[i].charge )
619 {
620 /* transfer charge from the hydrogen */
621 at[n].charge += at[i].charge;
622 at[i].charge = 0;
623 if (bFixTermHChrg)
624 {
625 /*^^^^^ Fixed bug (July 6, 2008 IPl) :
626 if terminal H was charged (not neutralized before call of remove_terminal_HDT)
627 and had an ordering number > than that of heavy-atom neighbour, then
628 charge on neighbour atom was not adjusted (though charge on H was removed).
629 ^^^^^ */
630 if ( i > n )
631 /* new_at[new_ord[n]] has been created and filled already */
632 new_at[new_ord[n]].charge = at[n].charge;
633 }
634 /*^^^^^ */
635 }
636 new_ord[i] = num_atoms - num_hydrogens; /* move hydrogens to the end of the list */
637 }
638 else
639 {
640 /* atom is not an explicit terminal hydrogen */
641 new_ord[i] = i - num_hydrogens; /* adjust non-hydrogens positions */
642 }
643
644 /* copy atom to the new position */
645 new_at[new_ord[i]] = at[i];
646
647 } /* i */
648
649 } /* general case except H-H */
650
651 if ( num_hydrogens ) {
652 int num_others = num_atoms-num_hydrogens; /* atoms which are not terminal H, D, T */
653 if ( num_hydrogens > 1 ) {
654 /* sort hydrogen isotopes in ascending order, */
655 /* orig, numbers being the secondary sorting key */
656 qsort( new_at+num_others, num_hydrogens, sizeof(new_at[0]), cmp_iso_atw_diff_component_no );
657 }
658 /* save new numbering of hydrogen atoms using temporarily saved orig numbering */
659 for ( i = num_others; i < num_atoms; i ++ ) {
660 new_ord[(int)new_at[i].component] = i;
661 }
662
663 /* renumber neighbors according to new_ord[] and detach terminal hydrogens */
664 for ( i = 0; i < num_others; i++ ) {
665 memset( new_HydrogenAt_order, 0, sizeof(new_HydrogenAt_order) );
666 memset( new_OtherNeigh_order, 0, sizeof(new_OtherNeigh_order) );
667 num_OtherNeigh = 0;
668 num_HydrogenAt = 0;
669 num_H = 0;
670
671 for ( m = 0; m < MAX_NUM_STEREO_BONDS && new_at[i].sb_parity[m]; m ++ ) {
672 old_trans[m] = 2 - (new_at[i].sn_ord[m] + new_at[i].sb_ord[m] + (new_at[i].sn_ord[m] > new_at[i].sb_ord[m]))%2;
673 }
674
675 for ( k = j = val = 0; k < new_at[i].valence; k++ ) {
676 if ( num_others <= ( n = new_ord[new_at[i].neighbor[k]] ) ) {
677 /* discovered neighbor = disconnected explicit hydrogen
678 * i = new atom new_at[i] ordering number
679 * n = new number of the explicit H
680 * k = ordering number of the explicit H in new_at[i] adjacency list
681 */
682 if ( 0 < new_at[n].iso_atw_diff && new_at[n].iso_atw_diff < ATW_H+NUM_H_ISOTOPES ) {
683 /* make explicit isotopic H implicit */
684 new_at[i].num_iso_H[new_at[n].iso_atw_diff-1] ++; /* isotopic H */
685 num_HydrogenAt += !new_HydrogenAt_order[new_at[n].iso_atw_diff];
686 new_HydrogenAt_order[new_at[n].iso_atw_diff] = k+1;
687 } else {
688 /* make explicit non-isotopic H implicit */
689 new_at[i].num_H ++; /* non-isotopic H */
690 num_HydrogenAt += !num_H;
691 num_H ++;
692 new_HydrogenAt_order[0] = k+1;
693 }
694 /* decrement chem. bonds valence because one bond is removed */
695 new_at[i].chem_bonds_valence = inchi_max( 0, new_at[i].chem_bonds_valence-1 );
696 new_at[n].neighbor[0] = i; /* update removed hydrogen neighbor number */
697 if ( new_at[i].sb_parity[0] ) {
698 /* if the removed H is an SB neighbor then mark it as removed */
699 for ( m = 0; m < MAX_NUM_STEREO_BONDS && new_at[i].sb_parity[m]; m ++ ) {
700 if ( k == (int)new_at[i].sn_ord[m] ) {
701 new_at[i].sn_ord[m] = -(new_at[n].iso_atw_diff+1);
702 /* means the SB neighbor has been removed; (-4)=H, (-3)=1H, (-2)=D, (-1)=T */
703 }
704 }
705 }
706 } else {
707 /* discovered a regular (not an explicit H) neighbor */
708 if ( new_at[i].sb_parity[0] ) {
709 if ( num_OtherNeigh < MAX_NUM_STEREO_BONDS ) {
710 new_OtherNeigh_order[num_OtherNeigh] = k+1;
711 }
712 num_OtherNeigh ++; /* increment outside of if() to detect overflow */
713 if ( val != k ) {
714 /* store new stereobond and sb-neighbor ordering numbers */
715 for ( m = 0; m < MAX_NUM_STEREO_BONDS && new_at[i].sb_parity[m]; m ++ ) {
716 if ( k == (int)new_at[i].sb_ord[m] )
717 new_at[i].sb_ord[m] = val;
718 else
719 if ( k == (int)new_at[i].sn_ord[m] )
720 new_at[i].sn_ord[m] = val;
721 }
722 }
723 }
724 new_at[i].neighbor[val] = new_ord[new_at[i].neighbor[k]];
725 new_at[i].bond_type[val] = new_at[i].bond_type[k];
726 new_at[i].bond_stereo[val] = new_at[i].bond_stereo[k];
727 val ++;
728 }
729 }
730 if ( new_at[i].valence > val && new_at[i].sb_parity[0] ) {
731 if ( num_HydrogenAt == new_at[i].valence - val && num_HydrogenAt + num_OtherNeigh <= MAXVAL ) {
732 /* recalculate parity so that it would describe neighbor sequence H,1H,D,T,neigh[0],neigh[1]... */
733 memmove( new_OtherNeigh_order + num_HydrogenAt, new_OtherNeigh_order, num_OtherNeigh*sizeof(new_OtherNeigh_order[0]));
734 for ( k = 0, j = 1; k <= NUM_H_ISOTOPES; k ++ ) {
735 if ( new_HydrogenAt_order[k] ) {
736 new_OtherNeigh_order[num_HydrogenAt - j] = new_HydrogenAt_order[k];
737 for ( m = 0; m < MAX_NUM_STEREO_BONDS && new_at[i].sb_parity[m]; m ++ ) {
738 if ( (int)new_at[i].sn_ord[m] == -(k+1) ) {
739 new_at[i].sn_ord[m] = -j;
740 /* negative means explicit H isotope ord are
741 (contiguously) in front of the adjacency list */
742 }
743 }
744 j ++;
745 }
746 }
747 /* at this point new_OtherNeigh_order[] contains
748 incremented old ordering numbers in new order */
749 k = insertions_sort_AT_RANK( new_OtherNeigh_order, num_HydrogenAt + num_OtherNeigh );
750 k = k%2; /* seems to be of no use */
751 /*if ( k ) {*/
752 /*
753 for ( m = 0; m < MAX_NUM_STEREO_BONDS && new_at[i].sb_parity[m]; m ++ ) {
754 if ( PARITY_WELL_DEF(new_at[i].sb_parity[m]) ) {
755 if ( old_trans[m] != 2 - (4 + new_at[i].sn_ord[m] + new_at[i].sb_ord[m] + (new_at[i].sn_ord[m] > new_at[i].sb_ord[m]))%2 ) {
756 new_at[i].sb_parity[m] = 3 - new_at[i].sb_parity[m];
757 }
758 }
759 }
760 */
761 /*}*/
762 }
763 #ifdef _DEBUG
764 else {
765 /* error */
766 int stop = 1;
767 }
768 #endif
769 }
770 new_at[i].valence = val;
771 }
772 memcpy( at, new_at, sizeof(at[0])*num_atoms );
773 ret = num_others;
774 } else {
775 ret = num_atoms;
776 }
777 exit_function:
778 if ( new_ord )
779 inchi_free ( new_ord );
780 if ( new_at )
781 inchi_free ( new_at );
782 return ret;
783 }
784 /************************************************************************/
add_DT_to_num_H(int num_atoms,inp_ATOM * at)785 int add_DT_to_num_H( int num_atoms, inp_ATOM *at )
786 /* assume num_1H, num_D and num_T are not included in num_H */
787 {
788 int i, j;
789 for ( i = 0; i < num_atoms; i ++ ) {
790 for ( j = 0; j < NUM_H_ISOTOPES; j ++ )
791 at[i].num_H += at[i].num_iso_H[j];
792 }
793 return 0;
794 }
795 /***************************************************************/
796 /* not used ---
797 int FixAromaticOxygenAndSulfur( inp_ATOM *atom )
798 {
799 if ( !atom->elname[1] && (atom->elname[0]=='O' || atom->elname[0]=='S') &&
800 atom->valence==2 && !atom->charge && !atom->radical &&
801 atom->bond_type[0] + atom->bond_type[1] == 3 ) {
802 atom->charge = 1;
803 return 1; // fixed
804 }
805 return 0;
806 }
807 */
808 /********************************************************************
809
810 InChI post-version 1.01 features implementation
811 (v. 1.04 : underivatize is still experimental and for engineering mode)
812
813 ********************************************************************/
814 #if ( RING2CHAIN == 1 || UNDERIVATIZE == 1 )
815
816 static U_CHAR el_number_O;
817 static U_CHAR el_number_C;
818 static U_CHAR el_number_N;
819 static U_CHAR el_number_P;
820 static U_CHAR el_number_S;
821 static U_CHAR el_number_Si;
822 static U_CHAR el_number_F;
823 static U_CHAR el_number_Cl;
824 static U_CHAR el_number_Br;
825 static U_CHAR el_number_I;
826 static U_CHAR el_number_B;
827
828 typedef struct tagAtPair {
829 AT_NUMB at[2]; /* at[0] < at[1] */
830 } R2C_ATPAIR;
831
832 int DisconnectInpAtBond( inp_ATOM *at, AT_NUMB *nOldCompNumber, int iat, int neigh_ord );
833 int ExtractConnectedComponent( inp_ATOM *at, int num_at, int component_number, inp_ATOM *component_at );
834 int mark_arom_bonds( inp_ATOM *at, int num_atoms );
835
836 void set_R2C_el_numbers( void );
837 int UnMarkDisconnectedComponents( ORIG_ATOM_DATA *orig_inp_data );
838 int UnMarkRingSystemsInp( inp_ATOM *at, int num_atoms );
839 int UnMarkOtherIndicators( inp_ATOM *at, int num_atoms );
840 int UnMarkOneComponent( inp_ATOM *at, int num_atoms );
841 int subtract_DT_from_num_H( int num_atoms, inp_ATOM *at );
842 int add_inp_ATOM( inp_ATOM *at, int len_at, int len_cur, inp_ATOM *add, int len_add );
843 int cmp_r2c_atpair( const void *p1, const void *p2 );
844 int has_atom_pair( R2C_ATPAIR *ap, int num_ap, AT_NUMB at1, AT_NUMB at2 );
845 int mark_atoms_ap( inp_ATOM *at, AT_NUMB start, R2C_ATPAIR *ap, int num_ap, int num, AT_NUMB cFlags );
846
847 /********************************************************************/
set_R2C_el_numbers(void)848 void set_R2C_el_numbers( void )
849 {
850 if ( !el_number_O ) {
851 el_number_O = (U_CHAR)get_periodic_table_number( "O" );
852 el_number_C = (U_CHAR)get_periodic_table_number( "C" );
853 el_number_N = (U_CHAR)get_periodic_table_number( "N" );
854 el_number_P = (U_CHAR)get_periodic_table_number( "P" );
855 el_number_S = (U_CHAR)get_periodic_table_number( "S" );
856 el_number_Si = (U_CHAR)get_periodic_table_number( "Si" );
857 el_number_F = (U_CHAR)get_periodic_table_number( "F" );
858 el_number_Cl = (U_CHAR)get_periodic_table_number( "Cl" );
859 el_number_Br = (U_CHAR)get_periodic_table_number( "Br" );
860 el_number_I = (U_CHAR)get_periodic_table_number( "I" );
861 el_number_B = (U_CHAR)get_periodic_table_number( "B" );
862 }
863 }
864 /***************************************************************/
UnMarkDisconnectedComponents(ORIG_ATOM_DATA * orig_inp_data)865 int UnMarkDisconnectedComponents( ORIG_ATOM_DATA *orig_inp_data )
866 {
867 int i;
868 for ( i = 0; i < orig_inp_data->num_inp_atoms; i ++ ) {
869 orig_inp_data->at[i].orig_compt_at_numb = 0;
870 orig_inp_data->at[i].component = 0;
871 }
872 if ( orig_inp_data->nCurAtLen ) {
873 inchi_free( orig_inp_data->nCurAtLen );
874 orig_inp_data->nCurAtLen = NULL;
875 }
876 if ( orig_inp_data->nOldCompNumber ) {
877 inchi_free( orig_inp_data->nOldCompNumber );
878 orig_inp_data->nOldCompNumber = NULL;
879 }
880 orig_inp_data->num_components = 0;
881 return 0;
882 }
883 /***************************************************************/
UnMarkRingSystemsInp(inp_ATOM * at,int num_atoms)884 int UnMarkRingSystemsInp( inp_ATOM *at, int num_atoms )
885 {
886 int i;
887 for ( i = 0; i < num_atoms; i ++ ) {
888 at[i].bCutVertex = 0;
889 at[i].nRingSystem = 0;
890 at[i].nNumAtInRingSystem = 0;
891 at[i].nBlockSystem = 0;
892 }
893 return 0;
894 }
895 /***************************************************************/
UnMarkOtherIndicators(inp_ATOM * at,int num_atoms)896 int UnMarkOtherIndicators( inp_ATOM *at, int num_atoms )
897 {
898 int i;
899 for ( i = 0; i < num_atoms; i ++ ) {
900 at[i].at_type = 0;
901 at[i].cFlags = 0;
902 }
903 return 0;
904 }
905 /***************************************************************/
UnMarkOneComponent(inp_ATOM * at,int num_atoms)906 int UnMarkOneComponent( inp_ATOM *at, int num_atoms )
907 {
908 int i;
909 for ( i = 0; i < num_atoms; i ++ ) {
910 at[i].orig_compt_at_numb = 0;
911 at[i].component = 0;
912 }
913 return 0;
914 }
915 /***************************************************************/
subtract_DT_from_num_H(int num_atoms,inp_ATOM * at)916 int subtract_DT_from_num_H( int num_atoms, inp_ATOM *at )
917 /* assume num_1H, num_D and num_T are included in num_H */
918 {
919 int i, j;
920 for ( i = 0; i < num_atoms; i ++ ) {
921 for ( j = 0; j < NUM_H_ISOTOPES; j ++ )
922 at[i].num_H -= at[i].num_iso_H[j];
923 }
924 return 0;
925 }
926 /***************************************************************/
add_inp_ATOM(inp_ATOM * at,int len_at,int len_cur,inp_ATOM * add,int len_add)927 int add_inp_ATOM( inp_ATOM *at, int len_at, int len_cur, inp_ATOM *add, int len_add )
928 {
929 int i, j;
930 inp_ATOM *a;
931 /* chack correctness */
932 if ( len_cur < 0 )
933 return len_cur;
934 if ( len_add < 0 )
935 return len_add;
936 if ( len_cur + len_add > len_at )
937 return -1;
938 /* copy */
939 memcpy( at+len_cur, add, len_add*sizeof(at[0]) );
940 /* modify */
941 if ( len_cur ) {
942 a = at + len_cur;
943 for ( i = 0; i < len_add; i ++ ) {
944 for ( j = 0; j < a[i].valence; j ++ ) {
945 a[i].neighbor[j] += len_cur;
946 }
947 }
948 }
949 return len_cur + len_add;
950 }
951 /****************************************************************************/
mark_arom_bonds(inp_ATOM * at,int num_atoms)952 int mark_arom_bonds( inp_ATOM *at, int num_atoms )
953 {
954 INCHI_MODE bTautFlags=0, bTautFlagsDone = 0;
955 inp_ATOM *at_fixed_bonds_out = NULL;
956 T_GROUP_INFO *t_group_info = NULL;
957 int ret;
958 ret = mark_alt_bonds_and_taut_groups ( at, at_fixed_bonds_out, num_atoms,
959 t_group_info, &bTautFlags, &bTautFlagsDone );
960 return ret;
961
962 }
963
964 /********************************************************************/
cmp_r2c_atpair(const void * p1,const void * p2)965 int cmp_r2c_atpair( const void *p1, const void *p2 )
966 {
967 const R2C_ATPAIR *ap1 = (const R2C_ATPAIR *)p1;
968 const R2C_ATPAIR *ap2 = (const R2C_ATPAIR *)p2;
969 int diff = (int)ap1->at[0] - (int)ap2->at[0];
970 if ( !diff ) {
971 diff = (int)ap1->at[1] - (int)ap2->at[1];
972 }
973 return diff;
974 }
975 /***************************************************************/
has_atom_pair(R2C_ATPAIR * ap,int num_ap,AT_NUMB at1,AT_NUMB at2)976 int has_atom_pair( R2C_ATPAIR *ap, int num_ap, AT_NUMB at1, AT_NUMB at2 )
977 {
978 R2C_ATPAIR ap1;
979 int i1, i2, i3, diff;
980 int n = at1 > at2;
981
982 ap1.at[n] = at1;
983 ap1.at[1-n] = at2;
984 i1 = 0;
985 i2 = num_ap-1;
986 /* search for ap1 by simple bisections */
987 do {
988 i3 = (i1 + i2)/2;
989 if ( !(diff = cmp_r2c_atpair(&ap1, ap+i3) ) ) {
990 return i3+1; /* found => positive number */
991 } else
992 if ( diff > 0 ) {
993 i1 = i3 + 1;
994 } else {
995 i2 = i3 - 1;
996 }
997 }while ( i2 >= i1 );
998 return 0; /* not found */
999 }
1000
1001 /***************************************************************/
1002 /* DFS search for atoms that do not have a flag */
mark_atoms_ap(inp_ATOM * at,AT_NUMB start,R2C_ATPAIR * ap,int num_ap,int num,AT_NUMB cFlags)1003 int mark_atoms_ap( inp_ATOM *at, AT_NUMB start, R2C_ATPAIR *ap, int num_ap, int num, AT_NUMB cFlags )
1004 {
1005 if ( !at[start].at_type ) {
1006 int i;
1007 AT_NUMB neigh;
1008 at[start].at_type = cFlags;
1009 num ++;
1010 for ( i = 0; i < at[start].valence; i ++ ) {
1011 neigh = at[start].neighbor[i];
1012 if ( has_atom_pair( ap, num_ap, start, neigh ) )
1013 continue;
1014 num = mark_atoms_ap( at, neigh, ap, num_ap, num, cFlags );
1015 }
1016 }
1017 return num; /* number of atoms traversed forward from at[start] */
1018 }
1019
1020 #endif /* RING2CHAIN || UNDERIVATIZE */
1021
1022 #if ( UNDERIVATIZE == 1 )
1023 /***************************************************************/
1024
1025 #ifdef NEVER
1026 typedef struct tagAtTypeBitmap {
1027 AT_NUMB ord1 : 5; /* up to 2^5-1 = 31 = 0x0037 */
1028 AT_NUMB ord2 : 5;
1029 AT_NUMB type : 6; /* up to 2^6-1 = 63 = 0x0077 */
1030 } AtTypeBitmap;
1031 typedef union tagAtTypeUnion {
1032 AT_NUMB num;
1033 AtTypeBitmap bits;
1034 } AtTypeUnion;
1035 #endif
1036
1037 #define DERIV_AT_LEN 4
1038 typedef struct tagDerivAttachment {
1039 char typ[DERIV_AT_LEN];
1040 char ord[DERIV_AT_LEN];
1041 char num[DERIV_AT_LEN];
1042 } DERIV_AT;
1043
1044
1045 #define DERIV_BRIDGE_O 0x0001 /* R1-O-R2 => R1-OH + HO-R2 */
1046 #define DERIV_BRIDGE_NH 0x0002 /* R1-NH-R2 amine */
1047 #define DERIV_AMINE_tN 0x0004 /* R1-N(-R2)-R3 tertiary amine */
1048 #define DERIV_RING_O 0x0008 /* -O- in a ring */
1049 #define DERIV_RING_NH 0x0010 /* -NH- in a ring */
1050 #define DERIV_UNMARK 0x0040 /* unmark the cut */
1051 #define DERIV_NOT 0x1000 /* cannot be a derivatization agent atom */
1052
1053 #define DERIV_DUPLIC 0x0080 /* duplicated disconnection */
1054
1055 #define DERIV_RING (DERIV_RING_O | DERIV_RING_NH)
1056
1057 #define MAX_AT_DERIV 12
1058 #define NOT_AT_DERIV 99
1059 #define MIN_AT_LEFT_DERIV 3
1060 #define NO_ORD_VAL 0x0037
1061
1062 #define CFLAG_MARK_BRANCH 1 /* for main derivative traversal */
1063 #define CFLAG_MARK_BLOCK 2 /* for block detection */
1064 #define CFLAG_MARK_BLOCK_INV ((char)~(CFLAG_MARK_BLOCK)) /* for block detection */
1065 #define COUNT_ALL_NOT_DERIV 1 /* 1=> count ALL atoms that are not in deriv. agents */
1066 /* 0=> only atoms that are not in DERIV_RING */
1067
1068 int mark_atoms_cFlags( inp_ATOM *at, int start, int num, char cFlags );
1069 int un_mark_atoms_cFlags( inp_ATOM *at, int start, int num, char cFlags, char cInvFlags );
1070 int is_C_or_S_DB_O( inp_ATOM *at, int i );
1071 int is_C_unsat_not_arom( inp_ATOM *at, int i );
1072 int is_C_Alk( inp_ATOM *at, int i, char cFlags );
1073 int is_Si_IV( inp_ATOM *at, int i );
1074 int is_P_TB_N( inp_ATOM *at, int i );
1075 int is_possibly_deriv_neigh( inp_ATOM *at, int iat, int iord, int type, char cFlags );
1076 int get_traversed_deriv_type( inp_ATOM *at, DERIV_AT *da, int k, DERIV_AT *da1, char cFlags );
1077 int add_to_da( DERIV_AT *da, DERIV_AT *add );
1078 int mark_atoms_deriv( inp_ATOM *at, DERIV_AT *da, int start, int num, char cFlags, int *pbFound );
1079 int count_one_bond_atoms( inp_ATOM *at, DERIV_AT *da, int start, int ord, char cFlags, int *bFound );
1080 int is_silyl( inp_ATOM *at, int start, int ord_prev );
1081 int is_Me_or_Et( inp_ATOM *at, int start, int ord_prev );
1082 int is_CF3_or_linC3F7( inp_ATOM *at, int start, int ord_prev );
1083 int is_phenyl( inp_ATOM *at, int start, int ord_prev );
1084 int is_deriv_ring( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int idrv );
1085 int is_deriv_chain( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int idrv );
1086 int is_deriv_chain_or_ring( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int *idrv );
1087 int remove_deriv( DERIV_AT *da1, int idrv );
1088 int remove_deriv_mark( DERIV_AT *da1, int idrv );
1089 int EliminateDerivNotInList( inp_ATOM *at, DERIV_AT *da, int num_atoms );
1090 int make_single_cut( inp_ATOM *at, DERIV_AT *da, int iat, int icut );
1091
1092 /***************************************************************/
1093 /* DFS search for atoms that do not have a flag */
mark_atoms_cFlags(inp_ATOM * at,int start,int num,char cFlags)1094 int mark_atoms_cFlags( inp_ATOM *at, int start, int num, char cFlags )
1095 {
1096 if ( !(at[start].cFlags & cFlags) ) {
1097 int i;
1098 at[start].cFlags |= cFlags;
1099 num ++;
1100 for ( i = 0; i < at[start].valence; i ++ ) {
1101 num = mark_atoms_cFlags( at, at[start].neighbor[i], num, cFlags );
1102 }
1103 }
1104 return num; /* number of atoms traversed forward from at[start] */
1105 }
1106 /***************************************************************/
1107 /* DFS search for atoms that do have a flag */
un_mark_atoms_cFlags(inp_ATOM * at,int start,int num,char cFlags,char cInvFlags)1108 int un_mark_atoms_cFlags( inp_ATOM *at, int start, int num, char cFlags, char cInvFlags )
1109 {
1110 if ( at[start].cFlags & cFlags ) {
1111 int i;
1112 at[start].cFlags &= cInvFlags;
1113 num ++;
1114 for ( i = 0; i < at[start].valence; i ++ ) {
1115 num = un_mark_atoms_cFlags( at, at[start].neighbor[i], num, cFlags, cInvFlags );
1116 }
1117 }
1118 return num; /* number of atoms traversed forward from at[start] */
1119 }
1120 /***************************************************************/
is_C_or_S_DB_O(inp_ATOM * at,int i)1121 int is_C_or_S_DB_O( inp_ATOM *at, int i )
1122 {
1123 int j, neigh;
1124 if ( at[i].el_number != el_number_C &&
1125 at[i].el_number != el_number_S ||
1126 at[i].charge || at[i].radical )
1127 return 0;
1128 for ( j = 0; j < at[i].valence; j ++ ) {
1129 neigh = at[i].neighbor[j];
1130 if ( (at[neigh].el_number == el_number_O ||
1131 at[neigh].el_number == el_number_S ) &&
1132 !at[neigh].num_H && 1 == at[neigh].valence &&
1133 2 == at[neigh].chem_bonds_valence ) {
1134 return 1;
1135 }
1136 }
1137 return 0;
1138 }
1139 /***************************************************************/
is_C_unsat_not_arom(inp_ATOM * at,int i)1140 int is_C_unsat_not_arom( inp_ATOM *at, int i )
1141 {
1142 int j, neigh, num_arom, num_DB;
1143 if ( at[i].el_number != el_number_C ||
1144 at[i].valence == at[i].chem_bonds_valence ||
1145 at[i].valence+1 < at[i].chem_bonds_valence ||
1146 at[i].chem_bonds_valence + at[i].num_H != 4 ||
1147 at[i].charge || at[i].radical )
1148 return 0;
1149 num_arom = num_DB = 0;
1150 for ( j = 0; j < at[i].valence; j ++ ) {
1151 neigh = at[i].neighbor[j];
1152 num_arom += at[i].bond_type[j] == BOND_TYPE_ALTERN;
1153 if ( (at[neigh].el_number == el_number_O ||
1154 at[neigh].el_number == el_number_S ) &&
1155 !at[neigh].num_H && 1 == at[neigh].valence &&
1156 2 == at[neigh].chem_bonds_valence ) {
1157 continue;
1158 }
1159 num_DB += at[i].bond_type[j] == BOND_TYPE_DOUBLE;
1160 }
1161 return num_DB && !num_arom;
1162 }
1163 /***************************************************************/
is_C_Alk(inp_ATOM * at,int i,char cFlags)1164 int is_C_Alk( inp_ATOM *at, int i, char cFlags )
1165 {
1166 if ( at[i].el_number == el_number_C &&
1167 at[i].valence == at[i].chem_bonds_valence ) {
1168 int j, k;
1169 U_CHAR el;
1170 for ( j = 0; j < at[i].valence; j ++ ) {
1171 k = at[i].neighbor[j];
1172 if ( at[k].cFlags & cFlags )
1173 continue;
1174 el = at[k].el_number;
1175 if ( el != el_number_C &&
1176 el != el_number_F &&
1177 el != el_number_Cl &&
1178 el != el_number_Br &&
1179 el != el_number_I ) {
1180 return 0;
1181 }
1182 }
1183 return 1;
1184 }
1185 return 0;
1186 }
1187 /***************************************************************/
is_Si_IV(inp_ATOM * at,int i)1188 int is_Si_IV( inp_ATOM *at, int i )
1189 {
1190 if ( at[i].el_number != el_number_Si ||
1191 at[i].charge || at[i].radical || at[i].valence != 4 || at[i].chem_bonds_valence != 4 )
1192 return 0;
1193 return 1;
1194 }
1195 /***************************************************************/
is_P_TB_N(inp_ATOM * at,int i)1196 int is_P_TB_N( inp_ATOM *at, int i )
1197 {
1198 int j, k;
1199 if ( at[i].el_number != el_number_P || at[i].chem_bonds_valence - at[i].valence != 2 )
1200 return 0;
1201 for ( j = 0; j < at[i].valence; j ++ ) {
1202 k = at[i].neighbor[j];
1203 if ( at[k].el_number == el_number_N && at[k].valence == 1 && at[k].chem_bonds_valence == 3 )
1204 return 1;
1205 }
1206 return 0;
1207 }
1208 /***************************************************************/
is_possibly_deriv_neigh(inp_ATOM * at,int iat,int iord,int type,char cFlags)1209 int is_possibly_deriv_neigh( inp_ATOM *at, int iat, int iord, int type, char cFlags )
1210 {
1211 int neigh = at[iat].neighbor[iord];
1212 int neigh_from = -1;
1213 U_CHAR el = at[neigh].el_number;
1214 int bOk = 0;
1215 switch ( type ) {
1216 case DERIV_BRIDGE_O:
1217 neigh_from = at[iat].neighbor[!iord];
1218 /* -> A--O--B -> traversing from A(neigh_from) to B(neigh); may we cut O--B bond? */
1219 /* do not cut bond "---" in A=Si(IV), B(=O), B=C: Si(IV)-O---B(=O) */
1220 if ( !(is_C_or_S_DB_O( at, neigh ) && is_Si_IV( at, neigh_from )) &&
1221 !is_C_unsat_not_arom( at, neigh ) ) {
1222 bOk = ( el == el_number_C ||
1223 el == el_number_Si ||
1224 el == el_number_S ||
1225 el == el_number_P );
1226 }
1227 break;
1228 case DERIV_BRIDGE_NH:
1229 /* -> A--NH--B -> traversing from A(neigh_from) to B(neigh); may we cut NH--B bond? */
1230 bOk = ( is_C_or_S_DB_O( at, neigh ) ||
1231 is_C_Alk( at, neigh, cFlags ) ||
1232 is_Si_IV( at, neigh ) ||
1233 is_P_TB_N( at, neigh ) ) && !(is_C_unsat_not_arom( at, neigh ));
1234 break;
1235 case DERIV_AMINE_tN:
1236 bOk = ( is_C_or_S_DB_O( at, neigh ) ||
1237 is_C_Alk( at, neigh, cFlags ) ||
1238 is_Si_IV( at, neigh ) ||
1239 is_P_TB_N( at, neigh ) ) && !(is_C_unsat_not_arom( at, neigh ));
1240 break;
1241 }
1242 return bOk;
1243 }
1244 /***************************************************************/
1245 /* determines derivative type on the forward step of the DFS */
1246 /***************************************************************/
get_traversed_deriv_type(inp_ATOM * at,DERIV_AT * da,int k,DERIV_AT * da1,char cFlags)1247 int get_traversed_deriv_type( inp_ATOM *at, DERIV_AT *da, int k, DERIV_AT *da1, char cFlags )
1248 {
1249 int i, j, m, nBlockSystemFrom, nOrdBack1, nOrdBack2, nOrdBack3, nBackType1, nBackType2;
1250 memset( da1, 0, sizeof(*da1) );
1251 if ( at[k].cFlags & cFlags ) {
1252 return 0;
1253 }
1254 for ( m = 0; m < at[k].valence && !(at[(int)at[k].neighbor[m]].cFlags & cFlags); m ++ )
1255 ;
1256 if ( m == at[k].valence )
1257 return -1; /* error: at least one neighbor must have cFlags */
1258 if ( at[k].valence == 1 && at[k].num_H && (
1259 at[k].el_number == el_number_O ||
1260 at[k].el_number == el_number_N ||
1261 at[k].el_number == el_number_S ||
1262 at[k].el_number == el_number_P ) ) {
1263 return DERIV_NOT;
1264 }
1265 if ( is_el_a_metal( at[k].el_number ) ) {
1266 return DERIV_NOT;
1267 }
1268 #ifdef NEVER
1269 if ( at[k].el_number == el_number_N && at[k].valence >= 2 && at[k].chem_bonds_valence <= 3 ) {
1270 return DERIV_NOT; /* prohibit -N-, -N=, allow -N# as in isocyano -N#C or NO2 */
1271 }
1272 #endif
1273 /* m is the ord of the bond from which at[k] was reached first time */
1274 if ( at[k].nNumAtInRingSystem == 1 && (at[k].el_number == el_number_O || at[k].el_number == el_number_S) &&
1275 at[k].valence == 2 && at[k].chem_bonds_valence == 2 &&
1276 !at[k].num_H && !at[k].charge && !at[k].radical) {
1277 /* -> A--O--B -> traversing from A to B; cut O--B */
1278 /* check for carboxy A(=O)-O-B and A--O--B(=O) */
1279 /* int has_A_CO = is_C_or_S_DB_O( at, at[k].neighbor[m] ); */
1280 int has_B_CO = is_C_or_S_DB_O( at, at[k].neighbor[!m] );
1281 int is_A_Si_IV = is_Si_IV( at, at[k].neighbor[m] );
1282 /* int is_B_Si_IV = is_Si_IV( at, at[k].neighbor[!m] );*/
1283 if ( is_A_Si_IV && has_B_CO ) {
1284 ; /* do not cut bond --- in A=Si(IV), B(=O), B=C: Si(IV)-O---B(=O) */
1285 } else
1286 if ( is_possibly_deriv_neigh( at, k, !m, DERIV_BRIDGE_O, cFlags ) ) {
1287 da1->ord[0] = !m; /* ord of neighbor B, not traversed yet */
1288 da1->typ[0] = DERIV_BRIDGE_O; /* type */
1289 return DERIV_BRIDGE_O; /* R-O-R */
1290 }
1291 }
1292 if ( at[k].bCutVertex && at[k].el_number == el_number_N &&
1293 at[k].valence == 2 && at[k].chem_bonds_valence == at[k].valence &&
1294 at[k].valence+at[k].num_H == 3 && !at[k].charge && !at[k].radical) {
1295 da1->ord[0] = !m; /* ord of neighbor B, not traversed yet */
1296 da1->typ[0] = DERIV_BRIDGE_NH; /* type */
1297 return DERIV_BRIDGE_NH; /* R1-N(-R2)-R3 or R1-NH-R2 amine */
1298 }
1299 if ( at[k].bCutVertex && at[k].el_number == el_number_N &&
1300 at[k].valence == 3 && at[k].chem_bonds_valence == at[k].valence &&
1301 at[k].valence+at[k].num_H == 3 && !at[k].charge && !at[k].radical) {
1302 int rm1 = ( at[at[k].neighbor[m]].nRingSystem == at[at[k].neighbor[(m+1)%3]].nRingSystem );
1303 int rm2 = ( at[at[k].neighbor[m]].nRingSystem == at[at[k].neighbor[(m+2)%3]].nRingSystem );
1304 int r12 = ( at[at[k].neighbor[(m+1)%3]].nRingSystem == at[at[k].neighbor[(m+2)%3]].nRingSystem );
1305 int ord[2]= {-1, -1};
1306 i = 0; /* type: tertriary amine */
1307 switch( rm1 + rm2 + r12 ) {
1308 case 0:
1309 /* -N< has no ring bonds */
1310 if ( is_possibly_deriv_neigh( at, k, (m+1)%3, DERIV_AMINE_tN, cFlags ) ) {
1311 ord[i ++] = (m+1)%3; /* ord of a non-ring neighbor, not traversed yet */
1312 }
1313 if ( is_possibly_deriv_neigh( at, k, (m+2)%3, DERIV_AMINE_tN, cFlags ) ) {
1314 ord[i ++] = (m+2)%3; /* ord of another non-ring neighbor, not traversed yet */
1315 }
1316 if ( i == 2 && ord[0] > ord[1] ) {
1317 int tmp = ord[0];
1318 ord[0] = ord[1];
1319 ord[1] = tmp;
1320 }
1321 break;
1322
1323 case 1:
1324 if ( rm1 && is_possibly_deriv_neigh( at, k, (m+2)%3, DERIV_AMINE_tN, cFlags ) ) {
1325 ord[i++] = (m+2)%3; /* ord of a single non-ring neighbor, not traversed yet */
1326 } else
1327 if ( rm2 && is_possibly_deriv_neigh( at, k, (m+1)%3, DERIV_AMINE_tN, cFlags ) ) {
1328 ord[i++] = (m+1)%3; /* ord of a single non-ring neighbor, not traversed yet */
1329 }
1330 }
1331 for ( j = 0; j < i; j ++ ) {
1332 da1->ord[j] = ord[j];
1333 da1->typ[j] = DERIV_AMINE_tN;
1334 }
1335 if ( i ) {
1336 return DERIV_AMINE_tN;
1337 }
1338 return 0; /* all neighbors or two untraversed edges are in one ring system */
1339 }
1340 if ( at[k].bCutVertex && /* DD */
1341 at[k].valence == at[k].chem_bonds_valence &&
1342 (!at[k].num_H || at[k].el_number == el_number_C && 1 == at[k].num_H) &&
1343 !at[k].charge && !at[k].radical &&
1344 (at[k].el_number == el_number_C && at[k].valence+at[k].num_H == 4 ||
1345 at[k].el_number == el_number_Si && at[k].valence == 4 ||
1346 at[k].el_number == el_number_B && at[k].valence == 3) ) {
1347
1348 /* entering path: ->X--O--DD
1349 --X--O
1350 | \ / DD = C, Si, B
1351 | DD
1352 | / \ O = O, NH
1353 --Y--O
1354 X, Y -- ignored for now
1355 */
1356 nBlockSystemFrom = 0;
1357 nBackType1 = nBackType2 = 0;
1358 nOrdBack1 = nOrdBack2 = nOrdBack3 = -1;
1359 j=(int)at[k].neighbor[m];
1360 if ( (at[j].el_number == el_number_O || at[j].el_number == el_number_S) && at[j].valence == 2 &&
1361 at[j].chem_bonds_valence == at[j].valence &&
1362 at[j].nNumAtInRingSystem >= 5 &&
1363 !at[j].num_H && !at[j].charge && !at[j].radical ) {
1364 nBackType1 = DERIV_RING_O;
1365 nBlockSystemFrom = at[j].nBlockSystem;
1366 nOrdBack1 = m; /* predecessor */
1367 } else
1368 if ( at[j].el_number == el_number_N && at[j].valence == 2 &&
1369 at[j].chem_bonds_valence == at[j].valence &&
1370 at[j].nNumAtInRingSystem >= 5 &&
1371 1==at[j].num_H && !at[j].charge && !at[j].radical ) {
1372 nBackType1 = DERIV_RING_NH;
1373 nBlockSystemFrom = at[j].nBlockSystem;
1374 nOrdBack1 = m; /* predecessor */
1375 }
1376 if ( nBlockSystemFrom ) {
1377 int num1, num2, bFound;
1378 at[k].cFlags |= CFLAG_MARK_BLOCK;
1379 num1 = mark_atoms_cFlags( at, at[k].neighbor[nOrdBack1], 1, CFLAG_MARK_BLOCK );
1380 for ( i = 0; i < at[k].valence; i ++ ) {
1381 if ( i == nOrdBack1 )
1382 continue;
1383 j=(int)at[k].neighbor[i];
1384 bFound = 0;
1385 if ( at[j].cFlags & CFLAG_MARK_BLOCK ) {
1386 if ( (at[j].el_number == el_number_O || at[j].el_number == el_number_S) && at[j].valence == 2 &&
1387 at[j].chem_bonds_valence == at[j].valence &&
1388 at[j].nNumAtInRingSystem >= 5 &&
1389 !at[j].num_H && !at[j].charge && !at[j].radical ) {
1390 bFound = 1;
1391 if ( nOrdBack2 < 0 ) {
1392 nOrdBack2 = i; /* predecessor #2 */
1393 nBackType2 = DERIV_RING_O;
1394 } else {
1395 nOrdBack3 = i; /* predecessor #3 -- should not happen */
1396 }
1397 }
1398 if ( at[j].el_number == el_number_N && at[j].valence == 2 &&
1399 at[j].chem_bonds_valence == at[j].valence &&
1400 at[j].nNumAtInRingSystem >= 5 &&
1401 1==at[j].num_H && !at[j].charge && !at[j].radical ) {
1402 bFound = 1;
1403 if ( nOrdBack2 < 0 ) {
1404 nOrdBack2 = i; /* predecessor #2 */
1405 nBackType2 = DERIV_RING_NH;
1406 } else {
1407 nOrdBack3 = i; /* predecessor #3 -- should not happen */
1408 }
1409 }
1410 if ( !bFound ) {
1411 nOrdBack3 = 99; /* reject: wrong neighboring atom in the same block */
1412 break;
1413 }
1414 }
1415 }
1416 num2 = un_mark_atoms_cFlags( at, k, 0, CFLAG_MARK_BLOCK, CFLAG_MARK_BLOCK_INV );
1417 if ( num1 != num2 ) {
1418 return -1; /* mark_atoms_cFlags() program error */
1419 }
1420 if ( nOrdBack2 >= 0 && nOrdBack3 < 0 ) {
1421 if ( nOrdBack1 < nOrdBack2 ) {
1422 da1->ord[0] = nOrdBack1; /* ord of a ring neighbor, traversed */
1423 da1->typ[0] = nBackType1;
1424 da1->ord[1] = nOrdBack2; /* ord of another ring neighbor, not traversed yet */
1425 da1->typ[1] = nBackType2;
1426 } else {
1427 da1->ord[0] = nOrdBack2; /* ord of a ring neighbor, traversed */
1428 da1->typ[0] = nBackType2;
1429 da1->ord[1] = nOrdBack1; /* ord of another ring neighbor, not traversed yet */
1430 da1->typ[1] = nBackType1;
1431 }
1432 return nBackType1 | nBackType2;
1433 }
1434 }
1435 }
1436 return 0;
1437 }
1438 /***************************************************************/
add_to_da(DERIV_AT * da,DERIV_AT * add)1439 int add_to_da( DERIV_AT *da, DERIV_AT *add )
1440 {
1441 /* if add has more than 1 element the elements are in ascending add.ord[] order */
1442 int i, j, len;
1443 for ( len = 0; len < DERIV_AT_LEN && da->typ[len]; len ++ )
1444 ;
1445 for ( j = 0; j < DERIV_AT_LEN && add->typ[j]; j ++ ) {
1446 for ( i = 0; i < len; i ++ ) {
1447 if ( add->ord[j] == da->ord[i] ) {
1448 if ( add->typ[j] != da->typ[i] ) {
1449 return -1; /* error, should not happen */
1450 }
1451 if ( add->num[j] != da->num[i] ) {
1452 return -2; /* error, should not happen */
1453 }
1454 break;
1455 }
1456 }
1457 if ( i == len ) {
1458 if ( len < DERIV_AT_LEN ) {
1459 da->ord[i] = add->ord[j];
1460 da->typ[i] = add->typ[j];
1461 da->num[i] = add->num[j];
1462 len ++;
1463 } else {
1464 return -3; /* overflow, should not happen */
1465 }
1466 }
1467 }
1468 return 0;
1469 }
1470 /***************************************************************/
1471 /* DFS search for atoms that do not have a flag */
mark_atoms_deriv(inp_ATOM * at,DERIV_AT * da,int start,int num,char cFlags,int * pbFound)1472 int mark_atoms_deriv( inp_ATOM *at, DERIV_AT *da, int start, int num, char cFlags, int *pbFound )
1473 {
1474 int i, nFound=0;
1475 DERIV_AT da1;
1476 if ( !(at[start].cFlags & cFlags) ) {
1477 if ( DERIV_NOT == get_traversed_deriv_type( at, da, start, &da1, cFlags ) ) {
1478 nFound ++; /* at[start] cannot belong to a derivatizing agent */
1479 }
1480 num ++;
1481 at[start].cFlags |= cFlags;
1482 if ( da1.typ[0] ) {
1483 /* possibly a derivatization agent attachment point. */
1484 /* check neighbors that have not been traversed yet */
1485 int n1=0, n2=0, i1=-1, i2=-1, nFound1=0, nFound2=0;
1486 switch( da1.typ[0] ) {
1487 case DERIV_BRIDGE_O:
1488 case DERIV_BRIDGE_NH:
1489 n1 = mark_atoms_deriv( at, da, at[start].neighbor[(int)da1.ord[0]], 0, cFlags, &nFound1 );
1490 if ( n1 > MAX_AT_DERIV || nFound1 ) {
1491 da1.typ[0] = 0;
1492 } else {
1493 da1.num[0] = n1;
1494 nFound ++;
1495 }
1496 break;
1497 case DERIV_AMINE_tN:
1498 n1 = mark_atoms_deriv( at, da, at[start].neighbor[i1=da1.ord[0]], 0, cFlags, &nFound1 );
1499 if ( da1.typ[1] ) {
1500 n2 = mark_atoms_deriv( at, da, at[start].neighbor[i2=da1.ord[1]], 0, cFlags, &nFound2 );
1501 }
1502 if ( 0 < n1 && n1 <= MAX_AT_DERIV && !nFound1 ) {
1503 da1.num[0] = n1;
1504 i = 1;
1505 nFound ++;
1506 } else {
1507 da1.ord[0] = da1.ord[1];
1508 da1.num[0] = da1.num[1];
1509 da1.typ[0] = da1.typ[1];
1510 da1.typ[1] = 0;
1511 i = 0;
1512 }
1513 if ( 0 < n2 && n2 <= MAX_AT_DERIV && !nFound2 ) {
1514 da1.num[i] = n2;
1515 nFound ++;
1516 } else {
1517 da1.typ[i] = 0;
1518 }
1519 break;
1520 case DERIV_RING_O:
1521 case DERIV_RING_NH:
1522 for ( i = 0; i < at[start].valence; i ++ ) {
1523 if ( i != da1.ord[0] && i != da1.ord[1] && !(at[at[start].neighbor[i]].cFlags & cFlags) ) {
1524 if ( !n1 )
1525 n1 = mark_atoms_deriv( at, da, at[start].neighbor[i1=i], 0, cFlags, &nFound1 );
1526 else
1527 n2 = mark_atoms_deriv( at, da, at[start].neighbor[i2=i], 0, cFlags, &nFound2 );
1528 }
1529 }
1530 if ( n1+n2+1 > MAX_AT_DERIV || nFound1 || nFound2 ) {
1531 da1.typ[1] = da1.typ[0] = 0;
1532 } else {
1533 da1.num[0] = n1;
1534 da1.num[1] = n2;
1535 nFound ++;
1536 }
1537 break;
1538 }
1539 if ( n1 < 0 )
1540 return n1;
1541 if ( n2 < 0 )
1542 return n2; /* errors */
1543
1544 if ( i = add_to_da( da+start, &da1 ) ) {
1545 return i; /* error */
1546 }
1547
1548 *pbFound += nFound1 + nFound2 + nFound;
1549 num += n1 + n2;
1550 } else {
1551 *pbFound += nFound;
1552 }
1553 for ( i = 0; i < at[start].valence; i ++ ) {
1554 num = mark_atoms_deriv( at, da, at[start].neighbor[i], num, cFlags, pbFound );
1555 if ( num < 0 )
1556 return num;
1557 }
1558 }
1559 /* *pbFound = number of derivatizer attachment points traversed forward from at[start] */
1560 return num; /* number of atoms traversed forward from at[start] */
1561 }
1562 /***************************************************************/
count_one_bond_atoms(inp_ATOM * at,DERIV_AT * da,int start,int ord,char cFlags,int * bFound)1563 int count_one_bond_atoms( inp_ATOM *at, DERIV_AT *da, int start, int ord, char cFlags, int *bFound )
1564 {
1565 int num = 0;
1566 if ( !(at[at[start].neighbor[ord]].cFlags & cFlags) ) {
1567 at[at[start].neighbor[ord]].cFlags |= cFlags;
1568 num ++;
1569 num = mark_atoms_deriv( at, da, start, num, cFlags, bFound );
1570 }
1571 return num;
1572 }
1573 /***************************************************************
1574 List of allowed derivatives
1575
1576 Legend:
1577
1578 ->- marks the bond to be disconnexted: X->-Y => XD + TY
1579 where TY is a derivatizing agent eventually to be discarded
1580
1581 Allowed Derivative Types List
1582 =============================
1583
1584 DERIV_BRIDGE_O, DERIV_BRIDGE_NH, DERIV_AMINE_tN
1585 -----------------------------------------------
1586 CH3 CH3 CH3 CH3 CH3
1587 | | | | |
1588 X->-Si--CH3 X->-Si---Si--CH3 X->-Si----C--CH3 X= O, NH, N
1589 | | | | |
1590 CH3 CH3 CH3 CH3 CH3
1591
1592 4 atoms 7 atoms 7 atoms is_silyl()
1593
1594
1595
1596
1597 O O O F O
1598 || || || | ||
1599 R--C--O->-CH3 R--C--O->-CH2--CH3 R--C--O->-C--F R--C--O->-CF2-CF2-CF3
1600 |
1601 F
1602
1603
1604
1605 1 atom 2 atoms 3 atoms 10 atoms
1606 is_Me_or_Et() is_Me_or_Et() is_CF3_or_linC3F7()
1607
1608
1609 A. DERIV_BRIDGE_NH, DERIV_AMINE_tN
1610 -----------------------------------
1611
1612
1613 O O O F O
1614 || || || | ||
1615 N->-C--CH3 N->-C--CH2--CH3 N->-C---C--F N->-C--CF2-CF2-CF3
1616 |
1617 F
1618
1619
1620
1621 3 atoms 5 atoms 8 atoms 12 atoms
1622 is_Me_or_Et() is_Me_or_Et() is_CF3_or_linC3F7()
1623
1624 DERIV_RING_O (da contains >B- or >C< or >CH- atom)
1625 ------------
1626
1627 C----O R----O R----O
1628 | \ | \ CH3 | \
1629 | > | > / | >
1630 | \ | \ / | \
1631 | B--CH3 | C | CH--Ph
1632 | / | / \ | /
1633 | > | > \ | >
1634 | / | / CH3 | /
1635 C----O R----O R----O
1636
1637 5-member 5 or 6-member 5 or 6-member
1638
1639
1640 2 atoms 3 atoms 7 atoms
1641
1642 DERIV_RING_NH
1643 ------------
1644
1645 None in the list
1646
1647 ***************************************************************/
is_silyl(inp_ATOM * at,int start,int ord_prev)1648 int is_silyl( inp_ATOM *at, int start, int ord_prev )
1649 {
1650 int i, neigh, num_Me=0, iC_IV=-1, iSi_IV=-1, i_C_or_Si_IV;
1651
1652 if ( at[start].el_number != el_number_Si || at[start].valence != 4 ||
1653 at[start].valence != at[start].chem_bonds_valence ||
1654 at[start].charge || at[start].radical )
1655 return 0;
1656 for ( i = 0; i < at[start].valence; i ++ ) {
1657 if ( i == ord_prev )
1658 continue;
1659 neigh = at[start].neighbor[i];
1660 if ( at[neigh].charge || at[neigh].radical ||
1661 at[neigh].valence != at[neigh].chem_bonds_valence)
1662 return 0;
1663 if ( at[neigh].valence == 4 ) {
1664 if ( at[neigh].el_number == el_number_C && iC_IV < 0 && iSi_IV < 0 )
1665 iC_IV = neigh;
1666 else
1667 if ( at[neigh].el_number == el_number_Si && iC_IV < 0 && iSi_IV < 0 )
1668 iSi_IV = neigh;
1669 else
1670 return 0;
1671 } else
1672 if ( at[neigh].valence == 1 &&
1673 at[neigh].valence == at[neigh].chem_bonds_valence &&
1674 at[neigh].el_number == el_number_C && at[neigh].num_H == 3 ) {
1675 num_Me ++;
1676 } else {
1677 return 0;
1678 }
1679 }
1680 if ( num_Me == 3 && iC_IV < 0 && iSi_IV < 0 )
1681 return 1; /* Si(CH3)3 */
1682
1683 /* next C(IV) or Si(IV) */
1684 i_C_or_Si_IV = iC_IV >= 0? iC_IV : iSi_IV >= 0? iSi_IV : -1;
1685 if ( num_Me != 2 || i_C_or_Si_IV < 0 )
1686 return 0;
1687
1688 num_Me = 0;
1689 for ( i = 0; i < at[i_C_or_Si_IV].valence; i ++ ) {
1690 neigh = at[i_C_or_Si_IV].neighbor[i];
1691 if ( neigh == start )
1692 continue;
1693 if ( at[neigh].charge || at[neigh].radical ||
1694 at[neigh].valence != at[neigh].chem_bonds_valence)
1695 return 0;
1696 if ( at[neigh].valence == 1 &&
1697 at[neigh].valence == at[neigh].chem_bonds_valence &&
1698 at[neigh].el_number == el_number_C && at[neigh].num_H == 3 ) {
1699 num_Me ++;
1700 } else {
1701 return 0;
1702 }
1703 }
1704 if ( num_Me == 3 )
1705 return 2; /* Si(CH3)2Si/C(CH3)3 */
1706 return 0;
1707 }
1708 /****************************************************************/
is_Me_or_Et(inp_ATOM * at,int start,int ord_prev)1709 int is_Me_or_Et( inp_ATOM *at, int start, int ord_prev )
1710 {
1711 int i, neigh = -1;
1712 if ( at[start].el_number != el_number_C || at[start].valence > 2 ||
1713 at[start].valence != at[start].chem_bonds_valence ||
1714 at[start].chem_bonds_valence + at[start].num_H != 4 ||
1715 at[start].charge || at[start].radical )
1716 return 0;
1717 for ( i = 0; i < at[start].valence; i ++ ) {
1718 if ( i == ord_prev )
1719 continue;
1720 if ( neigh >= 0 )
1721 return 0;
1722
1723 neigh = at[start].neighbor[i];
1724 if ( at[neigh].el_number != el_number_C || at[neigh].valence > 1 ||
1725 at[neigh].valence != at[neigh].chem_bonds_valence ||
1726 at[neigh].chem_bonds_valence + at[neigh].num_H != 4 ||
1727 at[neigh].charge || at[neigh].radical )
1728 return 0;
1729 }
1730 return 1 + (neigh >= 0);
1731 }
1732 #ifdef NEVER
1733 /****************************************************************
1734 CF3
1735 -CF3 or -CF<
1736 CF3
1737 *****************************************************************/
is_CF3_or_isoC3F7(inp_ATOM * at,int start,int ord_prev)1738 int is_CF3_or_isoC3F7( inp_ATOM *at, int start, int ord_prev )
1739 {
1740 int i, k, num_C_IV, iC_IV[2], neigh, num_F, iC;
1741 if ( at[start].el_number != el_number_C || at[start].valence != 4 ||
1742 at[start].valence != at[start].chem_bonds_valence ||
1743 at[start].chem_bonds_valence + at[start].num_H != 4 ||
1744 at[start].charge || at[start].radical )
1745 return 0;
1746
1747 iC_IV[0] = iC_IV[1] = num_F = 0;
1748
1749 for ( i = num_C_IV = 0; i < at[start].valence; i ++ ) {
1750 if ( i == ord_prev )
1751 continue;
1752
1753 neigh = at[start].neighbor[i];
1754 if ( at[neigh].valence != at[neigh].chem_bonds_valence ||
1755 at[neigh].charge || at[neigh].radical )
1756 return 0;
1757 if ( at[neigh].el_number == el_number_F ) {
1758 if ( at[neigh].chem_bonds_valence + at[neigh].num_H != 1 )
1759 return 0;
1760 num_F ++;
1761 } else
1762 if ( at[neigh].el_number == el_number_C &&
1763 at[neigh].valence == 4 &&
1764 !at[neigh].num_H && !at[neigh].charge && !at[neigh].radical && num_C_IV < 2 ) {
1765
1766 if ( num_C_IV > 1 )
1767 return 0;
1768
1769 iC_IV[num_C_IV++] = neigh;
1770 }
1771 }
1772 if ( !num_C_IV && 3 == num_F )
1773 return 1; /* -CF3 */
1774 if ( 2 != num_C_IV || 1 != num_F )
1775 return 0;
1776
1777 /* detect iso-C3F7 */
1778 for ( k = 0; k < num_C_IV; k ++ ) {
1779 iC = iC_IV[k];
1780 num_F = 0;
1781 for ( i = 0; i < at[iC].valence; i ++ ) {
1782 neigh = at[iC].neighbor[i];
1783 if ( neigh == start )
1784 continue;
1785 if ( at[neigh].valence != at[neigh].chem_bonds_valence ||
1786 at[neigh].charge || at[neigh].radical )
1787 return 0;
1788 if ( at[neigh].el_number == el_number_F ) {
1789 if ( at[neigh].chem_bonds_valence + at[neigh].num_H != 1 )
1790 return 0;
1791 num_F ++;
1792 } else {
1793 return 0;
1794 }
1795 }
1796 if ( num_F != 3 )
1797 return 0;
1798 }
1799 return 2; /* iso-C3F7 */
1800 }
1801 #endif
1802 /**************************************************************/
is_CF3_or_linC3F7(inp_ATOM * at,int start,int ord_prev)1803 int is_CF3_or_linC3F7( inp_ATOM *at, int start, int ord_prev )
1804 {
1805 int i, num_C_IV, iC_IV, neigh, num_F, num_C=0;
1806 AT_NUMB *p;
1807
1808 while( num_C < 4 ) {
1809
1810 if ( at[start].el_number != el_number_C || at[start].valence != 4 ||
1811 at[start].valence != at[start].chem_bonds_valence ||
1812 at[start].chem_bonds_valence + at[start].num_H != 4 ||
1813 at[start].charge || at[start].radical )
1814 return 0;
1815
1816 iC_IV = num_F = 0;
1817
1818 for ( i = num_C_IV = 0; i < at[start].valence; i ++ ) {
1819 if ( i == ord_prev )
1820 continue;
1821
1822 neigh = at[start].neighbor[i];
1823 if ( at[neigh].valence != at[neigh].chem_bonds_valence ||
1824 at[neigh].charge || at[neigh].radical )
1825 return 0;
1826 if ( at[neigh].el_number == el_number_F ) {
1827 if ( at[neigh].chem_bonds_valence + at[neigh].num_H != 1 )
1828 return 0;
1829 num_F ++;
1830 } else
1831 if ( at[neigh].el_number == el_number_C &&
1832 at[neigh].valence == 4 &&
1833 !at[neigh].num_H && !at[neigh].charge && !at[neigh].radical && num_C_IV < 2 ) {
1834
1835 if ( num_C_IV )
1836 return 0;
1837
1838 iC_IV = neigh;
1839 num_C_IV++;
1840 }
1841 }
1842 if ( num_C_IV + num_F != 3 )
1843 return 0;
1844
1845 num_C ++; /* found -CF2-C or -CF3 */
1846 if ( !num_C_IV )
1847 break; /* -CF3 */
1848
1849 /* treat next C(IV) as a new start atom */
1850 if ( p = is_in_the_list( at[iC_IV].neighbor, (AT_NUMB) start, at[iC_IV].valence ) ) {
1851 start = iC_IV;
1852 ord_prev = p - at[iC_IV].neighbor;
1853 } else {
1854 return -1; /* program error */
1855 }
1856 }
1857 return num_C == 1? 1 : num_C == 3? 2 : 0;
1858 }
1859 /****************************************************************/
is_phenyl(inp_ATOM * at,int start,int ord_prev)1860 int is_phenyl( inp_ATOM *at, int start, int ord_prev )
1861 {
1862 int k, neigh, cur_at, ord;
1863 if ( at[start].el_number != el_number_C || at[start].valence != 3 ||
1864 at[start].valence+1 != at[start].chem_bonds_valence ||
1865 at[start].chem_bonds_valence + at[start].num_H != 4 ||
1866 at[start].charge || at[start].radical )
1867 return 0;
1868
1869 ord = (ord_prev + 1) % 3;
1870 cur_at = start;
1871
1872 for ( k = 0; k < 5; k ++ ) {
1873 neigh = at[cur_at].neighbor[ord];
1874 if ( at[neigh].el_number != el_number_C || at[neigh].valence != 2 ||
1875 at[neigh].valence+1 != at[neigh].chem_bonds_valence ||
1876 at[neigh].chem_bonds_valence + at[neigh].num_H != 4 ||
1877 at[neigh].charge || at[neigh].radical )
1878 return 0;
1879 ord = (at[neigh].neighbor[0] == cur_at);
1880 cur_at = neigh;
1881 }
1882 return (at[cur_at].neighbor[ord] == start);
1883 }
1884 /****************************************************************/
is_deriv_ring(inp_ATOM * at,int start,int num_atoms,DERIV_AT * da1,int idrv)1885 int is_deriv_ring( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int idrv )
1886 {
1887 int i, k, neigh_at[2], prev_ord[2], neigh, is_B=0, is_C=0;
1888 AT_NUMB *p;
1889 if ( da1->typ[idrv] != DERIV_RING_O || da1->typ[idrv+1] != DERIV_RING_O )
1890 return 0;
1891 if ( at[start].charge || at[start].radical || at[start].valence != at[start].chem_bonds_valence )
1892 return 0;
1893 if ( at[start].el_number == el_number_B && at[start].valence == 3 )
1894 is_B = 1;
1895 else
1896 if ( at[start].el_number == el_number_C && (at[start].valence == 3 || at[start].valence == 4) &&
1897 at[start].chem_bonds_valence == at[start].valence &&
1898 at[start].num_H + at[start].chem_bonds_valence == 4 )
1899 is_C = 1;
1900 else
1901 return 0;
1902 /* locate bonds connecting >B- or >C< or >C- to the rest of the derivative */
1903 for ( i = k = 0; i < at[start].valence; i ++ ) {
1904 if ( i != da1->ord[idrv] && i != da1->ord[idrv+1] ) {
1905 neigh = at[start].neighbor[i];
1906 if ( k < 2 && (p = is_in_the_list( at[neigh].neighbor, (AT_NUMB) start, at[neigh].valence)) ) {
1907 neigh_at[k] = neigh;
1908 prev_ord[k] = p - at[neigh].neighbor;
1909 k ++;
1910 } else {
1911 return -1; /* program error */
1912 }
1913 }
1914 }
1915 if ( is_B && k == 1 && is_Me_or_Et( at, neigh_at[0], prev_ord[0]) )
1916 return 1;
1917 if ( is_C && k == 1 && at[start].num_H == 1 && is_phenyl( at, neigh_at[0], prev_ord[0]) )
1918 return 1;
1919 if ( is_C && k == 2 && is_Me_or_Et( at, neigh_at[0], prev_ord[0]) &&
1920 is_Me_or_Et( at, neigh_at[1], prev_ord[1]) )
1921 return 1;
1922
1923 return 0;
1924 }
1925 /****************************************************************/
is_deriv_chain(inp_ATOM * at,int start,int num_atoms,DERIV_AT * da1,int idrv)1926 int is_deriv_chain( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int idrv )
1927 {
1928 int i, k, prev_ord, neigh, iC, iO, iNxt;
1929 AT_NUMB *p;
1930 if ( !da1->typ[idrv] || (da1->typ[idrv] & DERIV_RING) )
1931 return 0;
1932 if ( at[start].charge || at[start].radical || at[start].valence != at[start].chem_bonds_valence )
1933 return 0;
1934
1935 neigh = at[start].neighbor[(int)da1->ord[idrv]];
1936 p = is_in_the_list( at[neigh].neighbor, (AT_NUMB) start, at[neigh].valence);
1937 if ( !p )
1938 return -1; /* program error */
1939 prev_ord = p - at[neigh].neighbor;
1940
1941 /* eliminate silyl possibility */
1942 if ( is_silyl( at, neigh, prev_ord ) )
1943 return 1;
1944
1945 if ( da1->typ[idrv] == DERIV_BRIDGE_O ) {
1946 /* check acetyl */
1947 iC = at[start].neighbor[!da1->ord[idrv]];
1948 if ( at[iC].charge || at[iC].radical || at[iC].num_H ||
1949 at[iC].el_number != el_number_C || at[iC].valence != 3 ||
1950 at[iC].valence+1 != at[iC].chem_bonds_valence )
1951 return 0;
1952 for ( i = k = 0; i < at[iC].valence; i ++ ) {
1953 iO = at[iC].neighbor[i];
1954 if ( at[iO].charge || at[iO].radical || at[iO].num_H ||
1955 at[iO].el_number != el_number_O || at[iO].valence != 1 ||
1956 at[iO].valence+1 != at[iO].chem_bonds_valence )
1957 continue;
1958 k ++; /* number of =O */
1959 }
1960 if ( k != 1 )
1961 return 0;
1962 /* check derivative */
1963 return ( is_Me_or_Et( at, neigh, prev_ord ) ||
1964 is_CF3_or_linC3F7( at, neigh, prev_ord ) );
1965 }
1966
1967 if ( da1->typ[idrv] == DERIV_BRIDGE_NH || da1->typ[idrv] == DERIV_AMINE_tN ) {
1968 /* check acetyl */
1969 iNxt = -1;
1970 iC = at[start].neighbor[(int)da1->ord[idrv]];
1971 if ( at[iC].charge || at[iC].radical || at[iC].num_H ||
1972 at[iC].el_number != el_number_C || at[iC].valence != 3 ||
1973 at[iC].valence+1 != at[iC].chem_bonds_valence )
1974 return 0;
1975 for ( i = k = 0; i < at[iC].valence; i ++ ) {
1976 iO = at[iC].neighbor[i];
1977 if ( at[iO].charge || at[iO].radical || at[iO].num_H ||
1978 at[iO].el_number != el_number_O || at[iO].valence != 1 ||
1979 at[iO].valence+1 != at[iO].chem_bonds_valence ) {
1980 if ( iO != start ) {
1981 if ( iNxt < 0 )
1982 iNxt = iO;
1983 else
1984 return 0;
1985 }
1986 continue;
1987 }
1988 k ++; /* number of =O */
1989 }
1990 if ( k != 1 || iNxt < 0 )
1991 return 0;
1992 /* find bond from iNxt to iC */
1993 p = is_in_the_list( at[iNxt].neighbor, (AT_NUMB) iC, at[iNxt].valence);
1994 if ( !p )
1995 return -1; /* program error */
1996 prev_ord = p - at[iNxt].neighbor;
1997 /* check derivative */
1998 return ( is_Me_or_Et( at, iNxt, prev_ord ) ||
1999 is_CF3_or_linC3F7( at, iNxt, prev_ord ) );
2000 }
2001 return 0;
2002 }
2003 /****************************************************************/
is_deriv_chain_or_ring(inp_ATOM * at,int start,int num_atoms,DERIV_AT * da1,int * idrv)2004 int is_deriv_chain_or_ring( inp_ATOM *at, int start, int num_atoms, DERIV_AT *da1, int *idrv )
2005 {
2006 int i, ret = -1;
2007 if ( da1->typ[*idrv] & DERIV_RING ) {
2008 /* find the first ord of this derivative; ord of ring derivatives are in pairs */
2009 int j = -1;
2010 for ( i = 0; i < DERIV_AT_LEN && da1->typ[i]; i ++ ) {
2011 if ( da1->typ[i] & DERIV_RING ) {
2012 if ( i == *idrv || i+1 == *idrv ) {
2013 *idrv = j = i;
2014 break;
2015 }
2016 i ++; /* bypass the second bond to the same derivatization agent */
2017 }
2018 }
2019 /* check consistency */
2020 if ( j == -1 || j+1 >= DERIV_AT_LEN ||
2021 !(da1->typ[j] & DERIV_RING) || !(da1->typ[j+1] & DERIV_RING) ) {
2022 ret = -1; /* program error */
2023 } else {
2024 ret = is_deriv_ring( at, start, num_atoms, da1, j );
2025 }
2026 } else
2027 if ( da1->typ[*idrv] ) {
2028 ret = is_deriv_ring( at, start, num_atoms, da1, *idrv );
2029 }
2030 return ret;
2031 }
2032 /******************************************************/
remove_deriv(DERIV_AT * da1,int idrv)2033 int remove_deriv( DERIV_AT *da1, int idrv )
2034 {
2035 int i, j, ret = -1;
2036 if ( da1->typ[idrv] & DERIV_RING ) {
2037 /* find the first ord of this derivative; ord of ring derivatives are in pairs */
2038 j = -1;
2039 for ( i = 0; i < DERIV_AT_LEN && da1->typ[i]; i ++ ) {
2040 if ( da1->typ[i] & DERIV_RING ) {
2041 if ( i == idrv || i+1 == idrv ) {
2042 j = i;
2043 break;
2044 }
2045 i ++; /* bypass the second bond to the same derivatization agent */
2046 }
2047 }
2048 /* delete if data are consistent */
2049 if ( j >= 0 && j+1 < DERIV_AT_LEN && (da1->typ[j] & DERIV_RING) && (da1->typ[j+1] & DERIV_RING) ) {
2050 for ( ; j < DERIV_AT_LEN-2 && da1->typ[j+2]; j ++ ) {
2051 da1->typ[j] = da1->typ[j+2];
2052 da1->num[j] = da1->num[j+2];
2053 da1->ord[j] = da1->ord[j+2];
2054 }
2055 for ( ; j < DERIV_AT_LEN; j ++ ) {
2056 da1->typ[j] = 0;
2057 da1->num[j] = 0;
2058 da1->ord[j] = 0;
2059 }
2060 ret = 0;
2061 }
2062 } else {
2063 j = idrv;
2064
2065 for ( ; j < DERIV_AT_LEN-1 && da1->typ[j+1]; j ++ ) {
2066 da1->typ[j] = da1->typ[j+1];
2067 da1->num[j] = da1->num[j+1];
2068 da1->ord[j] = da1->ord[j+1];
2069 }
2070 for ( ; j < DERIV_AT_LEN; j ++ ) {
2071 da1->typ[j] = 0;
2072 da1->num[j] = 0;
2073 da1->ord[j] = 0;
2074 }
2075 ret = 0;
2076 }
2077 return ret;
2078 }
2079 /******************************************************/
remove_deriv_mark(DERIV_AT * da1,int idrv)2080 int remove_deriv_mark( DERIV_AT *da1, int idrv )
2081 {
2082 int i, j, ret = -1;
2083 if ( da1->typ[idrv] & DERIV_RING ) {
2084 /* find the first ord of this derivative; ord of ring derivatives are in pairs */
2085 j = -1;
2086 for ( i = 0; i < DERIV_AT_LEN && da1->typ[i]; i ++ ) {
2087 if ( da1->typ[i] & DERIV_RING ) {
2088 if ( i == idrv || i+1 == idrv ) {
2089 j = i;
2090 break;
2091 }
2092 i ++; /* bypass the second bond to the same derivatization agent */
2093 }
2094 }
2095 /* delete if data are consistent */
2096 if ( j >= 0 && j+1 < DERIV_AT_LEN && (da1->typ[j] & DERIV_RING) && (da1->typ[j+1] & DERIV_RING) ) {
2097 da1->typ[j] |= DERIV_DUPLIC;
2098 da1->typ[j+1] |= DERIV_DUPLIC;
2099 ret = 0;
2100 }
2101 } else {
2102 j = idrv;
2103 da1->typ[j] |= DERIV_DUPLIC;
2104 ret = 0;
2105 }
2106 return ret;
2107 }
2108 /****************************************************************/
EliminateDerivNotInList(inp_ATOM * at,DERIV_AT * da,int num_atoms)2109 int EliminateDerivNotInList( inp_ATOM *at, DERIV_AT *da, int num_atoms )
2110 {
2111 int i, j, num_da, num_cuts=0, ret=0;
2112 for ( i = 0; i < num_atoms; i ++ ) {
2113 if ( !da[i].typ[0] )
2114 continue;
2115 /* count deriative attachments */
2116 for ( num_da = 0; num_da < DERIV_AT_LEN && da[i].typ[num_da]; num_da ++ )
2117 ;
2118 if ( num_da > 2 )
2119 return -1; /* should not happen */
2120 if ( num_da == 2 && da[i].typ[0] != da[i].typ[1] ) {
2121 da[i].typ[0] = da[i].typ[1] = 0; /* do not allow */
2122 continue;
2123 }
2124 if ( da[i].typ[0] & DERIV_RING ) {
2125 ret = 0;
2126 if ( num_da == 2 && 1 + da[i].num[0] + da[i].num[1] <= MAX_AT_DERIV &&
2127 0 < (ret=is_deriv_ring( at, i, num_atoms, da+i, 0 ) ) ) {
2128 num_cuts += 2;
2129 } else
2130 if ( ret < 0 ) {
2131 return ret;
2132 } else {
2133 da[i].typ[0] = da[i].typ[1] = 0; /* not a derivative */
2134 }
2135 } else {
2136 ret = 0;
2137 if ( da[i].num[0] <= MAX_AT_DERIV && 0 < (ret = is_deriv_chain( at, i, num_atoms, da+i, 0 )) ) {
2138 num_cuts ++;
2139 j = 1;
2140 } else
2141 if ( ret < 0 ) {
2142 return ret;
2143 } else {
2144 da[i].ord[0] = da[i].ord[1];
2145 da[i].num[0] = da[i].num[1];
2146 da[i].typ[0] = da[i].typ[1];
2147 da[i].typ[1] = 0;
2148 j = 0;
2149 }
2150 if ( da[i].typ[j] && da[i].num[j] <= MAX_AT_DERIV &&
2151 0 < (ret = is_deriv_chain( at, i, num_atoms, da+i, j )) ) {
2152 num_cuts ++;
2153 } else
2154 if ( ret < 0 ) {
2155 return ret;
2156 } else {
2157 da[i].typ[j] = 0;
2158 }
2159 }
2160 }
2161 return num_cuts;
2162 }
2163 /***************************************************************/
make_single_cut(inp_ATOM * at,DERIV_AT * da,int iat,int icut)2164 int make_single_cut( inp_ATOM *at, DERIV_AT *da, int iat, int icut )
2165 {
2166 int ret = -1; /* error flag */
2167 int iord = (int)da[iat].ord[icut]; /* ord of the bond in iat */
2168 if ( da[iat].typ[icut] & DERIV_DUPLIC ) {
2169 return 0;
2170 } else
2171 if ( iord < 0 ) {
2172 return -1; /* program error */
2173 } else {
2174 /* find other da[] that affect at[iat] */
2175 int jat = at[iat].neighbor[iord]; /* opposite atom */
2176 AT_NUMB *p = is_in_the_list( at[jat].neighbor, (AT_NUMB) iat, at[jat].valence );
2177 int jord = p? (p - at[jat].neighbor) : -1;
2178 int i, iD=1, iT=2;
2179 if ( jord < 0 ) {
2180 return -1; /* program error */
2181 }
2182 ret = DisconnectInpAtBond( at, NULL, iat, iord );
2183 if ( ret == 1 ) {
2184 if ( da[iat].typ[icut] & DERIV_RING ) {
2185 /* at[jat] belongs to the main structure */
2186 at[jat].num_H ++; /* add D to the main structure */
2187 at[jat].num_iso_H[iD] ++;
2188 at[iat].num_H ++; /* add T to the derivatizing fragment */
2189 at[iat].num_iso_H[iT] ++;
2190 } else {
2191 at[iat].num_H ++; /* add D to the main structure */
2192 at[iat].num_iso_H[iD] ++;
2193 at[jat].num_H ++; /* add T to the derivatizing fragment */
2194 at[jat].num_iso_H[iT] ++;
2195 }
2196 /* adjust ord for other bonds */
2197 for ( i = 0; i < DERIV_AT_LEN && da[iat].typ[i]; i ++ ) {
2198 if ( da[iat].ord[i] == iord ) {
2199 da[iat].ord[i] = -(1+da[iat].ord[i]); /* mark the bond being disconnected */
2200 } else
2201 if ( da[iat].ord[i] > iord ) {
2202 da[iat].ord[i] --;
2203 }
2204 }
2205 for ( i = 0; i < DERIV_AT_LEN && da[jat].typ[i]; i ++ ) {
2206 if ( da[jat].ord[i] == jord ) {
2207 /* opposite atom needs the same bond to be disconnected */
2208 if ( da[iat].num[icut] == da[jat].num[i] ) {
2209 iD=2; /* mark both as fragments */
2210 } else
2211 if ( da[iat].num[icut] > da[jat].num[i] ) {
2212 iD = 2; /* greater as a main structure */
2213 iT = 1; /* mark smaller as a derivatizing fragment */
2214 }
2215 da[jat].ord[i] = -(1+da[jat].ord[i]);
2216 da[jat].typ[i] |= DERIV_DUPLIC;
2217 } else
2218 if ( da[jat].ord[i] > jord ) {
2219 da[jat].ord[i] --;
2220 }
2221 }
2222 }
2223 }
2224 return ret;
2225 }
2226 /***************************************************************/
underivatize(ORIG_ATOM_DATA * orig_inp_data)2227 int underivatize( ORIG_ATOM_DATA *orig_inp_data )
2228 {
2229 #define REMOVE_CUT_DERIV 1 /* remove disconnected derivatizing agents */
2230 int ret = 0, i, j, k, m, n, num_atoms, num_components, i_component, nFound, num, cur_num_at, len;
2231 int num_cuts, num_ring_cuts, num_cut_pieces, num_cuts_to_check;
2232 inp_ATOM *at = orig_inp_data->at;
2233 INP_ATOM_DATA *inp_cur_data = NULL;
2234 DERIV_AT *da = NULL;
2235 int nTotNumCuts = 0;
2236
2237 set_R2C_el_numbers( );
2238 /* prepare */
2239 num_atoms = remove_terminal_HDT( orig_inp_data->num_inp_atoms, at, 1 );
2240 /*^^^^^ always accomodate accomodate FIX_TERM_H_CHRG_BUG - IPl, July 2008*/
2241 orig_inp_data->num_inp_atoms = num_atoms;
2242
2243 /* initialize */
2244 UnMarkDisconnectedComponents( orig_inp_data );
2245 num_cuts = 0;
2246 /* mark */
2247 num_components = MarkDisconnectedComponents( orig_inp_data, 0 );
2248 inp_cur_data = (INP_ATOM_DATA *)inchi_calloc( num_components, sizeof(inp_cur_data[0]) );
2249 for ( i_component = 0; i_component < num_components; i_component ++ ) {
2250 CreateInpAtomData( inp_cur_data+i_component, orig_inp_data->nCurAtLen[i_component], 0 );
2251 inp_cur_data[i_component].num_at = ExtractConnectedComponent( orig_inp_data->at, orig_inp_data->num_inp_atoms, i_component+1, inp_cur_data[i_component].at );
2252 /* error processing */
2253 if ( inp_cur_data[i_component].num_at <= 0 || orig_inp_data->nCurAtLen[i_component] != inp_cur_data[i_component].num_at ) {
2254 ret = -(i_component+1); /* severe error */
2255 goto exit_function;
2256 }
2257 /* initialize */
2258 num_atoms = inp_cur_data[i_component].num_at;
2259 at = inp_cur_data[i_component].at;
2260 add_DT_to_num_H( num_atoms, at );
2261
2262 UnMarkRingSystemsInp( at, num_atoms );
2263 UnMarkOtherIndicators( at, num_atoms );
2264 UnMarkOneComponent( at, num_atoms );
2265 MarkRingSystemsInp( at, num_atoms, 0 );
2266 ret = mark_arom_bonds( at, num_atoms );
2267 if ( ret < 0 ) {
2268 goto exit_function;
2269 }
2270 ret = 0;
2271 if ( da ) inchi_free( da );
2272 da = (DERIV_AT *)inchi_calloc( num_atoms, sizeof(da[0]) );
2273
2274 /* detect derivatives */
2275 nFound = 0;
2276 for ( i = 0; i < num_atoms; i ++ ) {
2277 if ( at[i].bCutVertex && !da[i].typ[0] ) {
2278 for ( k = 0; k < at[i].valence; k ++ ) {
2279 num = count_one_bond_atoms( at, da, i, k, CFLAG_MARK_BRANCH, &nFound );
2280 UnMarkOtherIndicators( at, num_atoms );
2281 if ( num < 0 ) {
2282 ret = num; /* severe error */
2283 goto exit_function;
2284 }
2285 }
2286 }
2287 }
2288 /* prepare cuts: remove cuts that are not to be done */
2289 /* in addition, count ring cuts DERIV_RING */
2290 num_ring_cuts = 0;
2291 num_cuts = 0;
2292 num_cut_pieces = 0;
2293 for ( i = num = 0; i < num_atoms; i ++ ) {
2294 for ( len = 0; len < MAX_AT_DERIV && da[i].typ[len]; len ++ )
2295 ;
2296 switch( len ) {
2297 case 0:
2298 continue;
2299 case 1:
2300 /* single cut: unconditional */
2301 num_cuts += 1;
2302 num_cut_pieces += 1;
2303 continue;
2304 case 2:
2305 if ( (da[i].typ[0] & DERIV_RING) && (da[i].typ[1] & DERIV_RING) ) {
2306 /* double cut, unconditional */
2307 num_ring_cuts += 2;
2308 num_cuts += 2;
2309 num_cut_pieces += 1;
2310 continue;
2311 } else
2312 if ( da[i].typ[0] == DERIV_AMINE_tN && da[i].typ[1] == DERIV_AMINE_tN ) {
2313 /* double cut, unconditional */
2314 num_cuts += 2;
2315 num_cut_pieces += 2;
2316 continue;
2317 }
2318 if ( da[i].typ[0] == da[i].typ[1] ) {
2319 /* DERIV_BRIDGE_O or DERIV_BRIDGE_NH; cut off the smallest */
2320 if ( 0 == is_deriv_chain( at, i, num_atoms, da+i, 0 ) ) {
2321 da[i].num[0] = NOT_AT_DERIV;
2322 }
2323 if ( 0 == is_deriv_chain( at, i, num_atoms, da+i, 1 ) ) {
2324 da[i].num[1] = NOT_AT_DERIV;
2325 }
2326 if ( da[i].num[0] > da[i].num[1] ) {
2327 da[i].num[0] = da[i].num[1];
2328 da[i].ord[0] = da[i].ord[1];
2329 da[i].typ[0] = da[i].typ[1];
2330 da[i].typ[1] = 0;
2331 num_cuts += 1;
2332 num_cut_pieces += 1;
2333 } else
2334 if ( da[i].num[0] < da[i].num[1] ) {
2335 da[i].typ[1] = 0;
2336 num_cuts += 1;
2337 num_cut_pieces += 1;
2338 } else {
2339 /* attachments have same size: ignore both */
2340 /* ??? check for standard derivatizations ??? */
2341 da[i].typ[0] = 0;
2342 da[i].typ[1] = 0;
2343 }
2344 continue;
2345 }
2346 ret = -88;
2347 goto exit_function; /* unexpected */
2348 case 3:
2349 if ( da[i].typ[0] == da[i].typ[1] &&
2350 da[i].typ[0] == da[i].typ[2] &&
2351 da[i].typ[0] == DERIV_AMINE_tN ) {
2352 int x, y, z;
2353 if ( 0 == is_deriv_chain( at, i, num_atoms, da+i, 0 ) ) {
2354 da[i].num[0] = NOT_AT_DERIV;
2355 }
2356 if ( 0 == is_deriv_chain( at, i, num_atoms, da+i, 1 ) ) {
2357 da[i].num[1] = NOT_AT_DERIV;
2358 }
2359 if ( 0 == is_deriv_chain( at, i, num_atoms, da+i, 2 ) ) {
2360 da[i].num[2] = NOT_AT_DERIV;
2361 }
2362
2363 x = (da[i].num[0] < da[i].num[1])? 0 : 1;
2364 x = (da[i].num[x] < da[i].num[2])? x : 2; /* min */
2365 z = (da[i].num[0] < da[i].num[1])? 1 : 0;
2366 z = (da[i].num[x] < da[i].num[2])? 2 : z; /* max */
2367 y = ((x+1)^(z+1))-1; /* median */
2368
2369
2370 if (da[i].num[x] == da[i].num[z] ) {
2371 /* no cuts */
2372 da[i].typ[0] = 0;
2373 da[i].typ[1] = 0;
2374 da[i].typ[2] = 0;
2375 continue; /* all deriv. agents have same size, ignore */
2376 /* ??? check for standard derivatizations ??? */
2377 } else
2378 if ( da[i].num[x] == da[i].num[y] ) {
2379 /* two smallest */
2380 switch( z ) {
2381 case 0:
2382 da[i].num[0] = da[i].num[1];
2383 da[i].ord[0] = da[i].ord[1];
2384 da[i].typ[0] = da[i].typ[1];
2385
2386 da[i].num[1] = da[i].num[2];
2387 da[i].ord[1] = da[i].ord[2];
2388 da[i].typ[1] = da[i].typ[2];
2389 break;
2390 case 1:
2391 da[i].num[1] = da[i].num[2];
2392 da[i].ord[1] = da[i].ord[2];
2393 da[i].typ[1] = da[i].typ[2];
2394 break;
2395 case 2:
2396 break;
2397 }
2398 da[i].typ[2] = 0;
2399 num_cuts += 2;
2400 num_cut_pieces += 2;
2401 } else {
2402 /* one smallest */
2403 if ( x ) {
2404 da[i].num[0] = da[i].num[x];
2405 da[i].ord[0] = da[i].ord[x];
2406 da[i].typ[0] = da[i].typ[x];
2407 }
2408 da[i].typ[1] = 0;
2409 da[i].typ[2] = 0;
2410 num_cuts += 1;
2411 num_cut_pieces += 1;
2412 }
2413 continue;
2414 }
2415 ret = -88;
2416 goto exit_function; /* unexpected */
2417 case 4:
2418 if ( (da[i].typ[0] & DERIV_RING) && (da[i].typ[1] & DERIV_RING) &&
2419 (da[i].typ[2] & DERIV_RING) && (da[i].typ[3] & DERIV_RING) ) {
2420 int n01 = inchi_max( da[i].num[0], da[i].num[1] );
2421 int n23 = inchi_max( da[i].num[2], da[i].num[3] );
2422 if ( n01 < n23 && 0 < is_deriv_ring( at, i, num_atoms, da+i, 0) ) {
2423 da[i].typ[2] = 0;
2424 da[i].typ[3] = 0;
2425 num_cuts += 2;
2426 num_ring_cuts += 2;
2427 num_cut_pieces += 1;
2428 } else
2429 if ( n01 > n23 && 0 < is_deriv_ring( at, i, num_atoms, da+i, 2) ) {
2430 da[i].num[0] = da[i].num[2];
2431 da[i].ord[0] = da[i].ord[2];
2432 da[i].typ[0] = da[i].typ[2];
2433
2434 da[i].num[1] = da[i].num[3];
2435 da[i].ord[1] = da[i].ord[3];
2436 da[i].typ[1] = da[i].typ[3];
2437
2438 da[i].typ[2] = 0;
2439 da[i].typ[3] = 0;
2440 num_cuts += 2;
2441 num_ring_cuts += 2;
2442 num_cut_pieces += 1;
2443 } else {
2444 da[i].typ[0] = 0;
2445 da[i].typ[1] = 0;
2446 da[i].typ[2] = 0;
2447 da[i].typ[3] = 0;
2448 }
2449 continue;
2450 }
2451 ret = -88;
2452 goto exit_function; /* unexpected */
2453 }
2454 }
2455
2456 /* eliminate cases when
2457 da[i1].typ[j1] && da[i2].typ[j2] &&
2458 at[i1].neighbor[da[i1].ord[j1]] == i2 && at[i2].neighbor[da[i2].ord[j2]] == i1
2459 that is, same bond is in the da[] elements of the adjacent atoms */
2460 nFound = 0; /* number of cuts to remove */
2461 for ( i = 0; i < num_atoms; i ++ ) {
2462 for ( j = 0; j < MAX_AT_DERIV && da[i].typ[j]; j ++ ) {
2463 if ( da[i].typ[j] & DERIV_DUPLIC )
2464 continue;
2465 n = at[i].neighbor[(int)da[i].ord[j]];
2466 if ( n < i )
2467 continue;
2468 for ( k = 0; k < MAX_AT_DERIV && da[n].typ[k]; k ++ ) {
2469 if ( da[n].typ[k] & DERIV_DUPLIC )
2470 continue;
2471 m = at[n].neighbor[(int)da[n].ord[k]];
2472 if ( m == i ) {
2473 /* same bond in da[i].typ[j] and da[n].typ[k] */
2474 /* check whether both derivatives are acceptable */
2475 int k1=k, j1=j;
2476 int ret_i = is_deriv_chain_or_ring( at, i, num_atoms, da+i, &j1 );
2477 int ret_n = is_deriv_chain_or_ring( at, n, num_atoms, da+n, &k1 );
2478 if ( ret_i < 0 ) {
2479 ret = ret_i;
2480 goto exit_function;
2481 }
2482 if ( ret_n < 0 ) {
2483 ret = ret_n;
2484 goto exit_function;
2485 }
2486 if ( !ret_i || ret_i && ret_n ) {
2487 if ( da[i].typ[j1] & DERIV_RING ) {
2488 num_cuts -= 2;
2489 num_ring_cuts -= 2;
2490 } else {
2491 num_cuts -= 1;
2492 }
2493 num_cut_pieces -= 1;
2494 if (ret = remove_deriv_mark( da+i, j1 )) {
2495 goto exit_function;
2496 }
2497 nFound ++;
2498 }
2499 if ( !ret_n || ret_i && ret_n ) {
2500 if ( da[n].typ[k1] & DERIV_RING ) {
2501 num_cuts -= 2;
2502 num_ring_cuts -= 2;
2503 } else {
2504 num_cuts -= 1;
2505 }
2506 num_cut_pieces -= 1;
2507 if ( ret = remove_deriv_mark( da+n, k1 ) ) {
2508 goto exit_function;
2509 }
2510 nFound ++;
2511 }
2512 }
2513 }
2514 }
2515 }
2516 if ( nFound ) {
2517 for ( i = 0; i < num_atoms; i ++ ) {
2518 for ( j = 0; j < MAX_AT_DERIV && da[i].typ[j]; j ++ ) {
2519 /* attn: j is changed inside the cycle body */
2520 if ( da[i].typ[j] & DERIV_DUPLIC ) {
2521 if (ret = remove_deriv( da+i, j )) {
2522 goto exit_function;
2523 }
2524 j --;
2525 }
2526 }
2527 }
2528 }
2529
2530 /* make sure DERIV_RING type disconnections increase */
2531 /* number of components by the number of disconnected derivateves */
2532 /* Avoid cases like these:
2533
2534 O--R--O DO--R--OD
2535 / \
2536 R--X Y--R => R--XT2 T2Y--R
2537 \ /
2538 O--R--O DO--R--OD
2539
2540
2541
2542 O--O DO--OD
2543 / \
2544 R--X--O---Y--R => R--X OD2 Y--R
2545 T2 T2
2546
2547 */
2548 /* count DERIV_RING-type attachments */
2549 #if ( COUNT_ALL_NOT_DERIV == 1 )
2550 num_cuts_to_check = num_cuts;
2551 #else
2552 num_cuts_to_check = num_ring_cuts;
2553 #endif
2554 if ( num_cuts_to_check >= 2 )
2555 {
2556 /* check */
2557 R2C_ATPAIR *ap = (R2C_ATPAIR *) inchi_malloc( num_cuts_to_check * sizeof(ap[0]) );
2558 AT_NUMB comp_num;
2559 int /*n,*/ m_at, m_ord;
2560 AT_NUMB at1, at2;
2561 if ( !ap ) {
2562 ret = -1;
2563 goto exit_function; /* malloc failure */
2564 }
2565 repeat_without_deriv_ring:
2566 comp_num = 0;
2567 /* fill out the array of bonds to be cut */
2568 for ( i = j = 0; i < num_atoms; i ++ ) {
2569 if ( (da[i].typ[0] & DERIV_RING) && (da[i].typ[1] & DERIV_RING) &&
2570 da[i].num[0] <= MAX_AT_DERIV && da[i].num[1] <= MAX_AT_DERIV ) {
2571 if ( j+1 >= num_cuts_to_check ) {
2572 ret = -2;
2573 goto exit_r2c_num; /* wrong number of cuts = num */
2574 }
2575 for ( k = 0; k < 2; k ++ ) {
2576 at1 = i;
2577 at2 = at[i].neighbor[(int)da[i].ord[k]];
2578 n = ( at1 > at2 );
2579 ap[j].at[n] = at1;
2580 ap[j].at[1-n] = at2; /* ap[j].at[0] < ap[j].at[1] */
2581 j ++;
2582 }
2583 if ( 0 < cmp_r2c_atpair( ap+j-2, ap+j-1 ) ) {
2584 R2C_ATPAIR ap1 = ap[j-2];
2585 ap[j-2] = ap[j-1];
2586 ap[j-1] = ap1; /* sort each pair */
2587 }
2588 }
2589 #if ( COUNT_ALL_NOT_DERIV == 1 )
2590 else {
2591 for ( k = 0; k < DERIV_AT_LEN && da[i].typ[k]; k ++ ) {
2592 if ( j >= num_cuts_to_check || (da[i].typ[k] & DERIV_RING) ) {
2593 ret = -2;
2594 goto exit_r2c_num; /* wrong number of cuts = num or wrong type */
2595 }
2596 at1 = i;
2597 at2 = at[i].neighbor[(int)da[i].ord[k]];
2598 n = ( at1 > at2 );
2599 ap[j].at[n] = at1;
2600 ap[j].at[1-n] = at2; /* ap[j].at[0] < ap[j].at[1] */
2601 j ++;
2602 }
2603 }
2604 #endif
2605
2606 }
2607 if ( j != num_cuts_to_check ) {
2608 ret = -3;
2609 goto exit_r2c_num; /* wrong number of cuts = num */
2610 }
2611 /* !!!!!!!! check that there are no derivatives inside a derivative */
2612 comp_num = 0; /* here it is the number of removed cuts */
2613 for ( i = 0; i < num_cuts_to_check; i += j ) {
2614 for ( j = n = 0; j < 2; j ++ ) {
2615 int atj = (int)ap[i].at[j];
2616 if ( da[atj].typ[0] && at[atj].neighbor[(int)da[atj].ord[0]] == ap[i].at[1-j] ) {
2617 k = j;
2618 n ++;
2619 m_at = atj;
2620 m_ord = 0;
2621 } else
2622 if ( da[atj].typ[1] && at[atj].neighbor[(int)da[atj].ord[1]] == ap[i].at[1-j] ) {
2623 k = j;
2624 n ++;
2625 m_at = atj;
2626 m_ord = 1;
2627 }
2628 }
2629 if ( n != 1 ) {
2630 ret = -3;
2631 goto exit_r2c_num; /* wrong atom pair */
2632 }
2633 if ( (da[m_at].typ[m_ord] & DERIV_RING) ) {
2634 n = (int)ap[i].at[k]; /* atom inside the derivation attachment */
2635 j = 2; /* number of bonds to cut */
2636 if ( i+j > num_cuts_to_check || (int)ap[i+1].at[0] != n && (int)ap[i+1].at[1] != n ) {
2637 ret = -3;
2638 goto exit_r2c_num; /* wrong atom pair */
2639 }
2640 } else {
2641 n = ap[i].at[1-k]; /* atom inside the derivation attachment */
2642 j = 1; /* number of bonds to cut */
2643 }
2644
2645 /* at[n] belongs to the derivation agent */
2646 cur_num_at = mark_atoms_ap( at, n, ap+i, j, 0, 1 );
2647 for ( k = 0; k < num_cuts_to_check; k ++ ) {
2648 if ( k == i || k==i+j-1 )
2649 continue;
2650 if ( at[(int)ap[k].at[0]].at_type || at[(int)ap[k].at[1]].at_type ) {
2651 /* unmark the cut: found a cut inside the derivatizing agent */
2652 da[m_at].typ[m_ord] |= DERIV_UNMARK;
2653 num_cuts -= 1;
2654 num_cut_pieces -= 1;
2655 if ( j == 2 ) {
2656 da[m_at].typ[1-m_ord] |= DERIV_UNMARK;
2657 num_cuts -= 1;
2658 num_ring_cuts -= 2;
2659 }
2660 comp_num ++;
2661 break;
2662 }
2663 }
2664 UnMarkOtherIndicators( at, num_atoms );
2665 }
2666 if ( comp_num ) {
2667 for ( i = 0; i < num_atoms; i ++ ) {
2668 if ( da[i].typ[0] & DERIV_UNMARK ) {
2669 da[i].num[0] = da[i].num[1];
2670 da[i].ord[0] = da[i].ord[1];
2671 da[i].typ[0] = da[i].typ[1];
2672 da[i].typ[1] = 0;
2673 j = 0;
2674 } else {
2675 j = 1;
2676 }
2677 if ( da[i].typ[j] & DERIV_UNMARK ) {
2678 da[i].typ[j] = 0;
2679 }
2680 }
2681 #if ( COUNT_ALL_NOT_DERIV == 1 )
2682 num_cuts_to_check = num_cuts;
2683 #else
2684 num_cuts_to_check = num_ring_cuts;
2685 #endif
2686 if ( num_cuts < 0 || num_ring_cuts < 0 || num_cut_pieces < 0 ) {
2687 ret = -3;
2688 goto exit_r2c_num; /* wrong number of cuts = num */
2689 }
2690 goto repeat_without_deriv_ring;
2691 }
2692
2693 /* sort the bonds for subsequent searching by bisections */
2694 if ( num_cuts_to_check > 1 ) {
2695 qsort( ap, num_cuts_to_check, sizeof(ap[0]), cmp_r2c_atpair);
2696 }
2697 /* mark components to be disconnected */
2698 comp_num = 0; /* number of components */
2699 cur_num_at = 0; /* number of atoms left after disconnecting the derivatizing agent */
2700 UnMarkOtherIndicators( at, num_atoms );
2701 for ( i = 0; i < num_cuts_to_check; i ++ ) {
2702 n = 0;
2703 for ( j = 0; j < 2; j ++ ) {
2704 if ( da[(int)ap[i].at[j]].typ[0] ) {
2705 k = j;
2706 n ++;
2707 }
2708 }
2709 if ( n != 1 ) {
2710 ret = -3;
2711 goto exit_r2c_num; /* wrong atom pair */
2712 }
2713 n = ap[i].at[k]; /* marked atom */
2714 if ( (da[n].typ[0] & DERIV_RING) ) {
2715 n = ap[i].at[1-k];
2716 }
2717 /* at[n] belongs to the derivation agent */
2718 if ( !at[n].at_type ) {
2719 comp_num ++;
2720 cur_num_at = mark_atoms_ap( at, n, ap, num_cuts_to_check, cur_num_at, comp_num );
2721 }
2722 }
2723 if ( comp_num > 1 ) {
2724 /* eliminate offending DERIV_RING type derivatives */
2725 if ( num_ring_cuts <= 2 ) {
2726 ret = -99;
2727 goto exit_r2c_num;
2728 }
2729 n = 0;
2730 for ( i = j = 0; i < num_atoms; i ++ ) {
2731 if ( (da[i].typ[0] & DERIV_RING) && (da[i].typ[1] & DERIV_RING) ) {
2732 int at1a = at[i].neighbor[(int)da[i].ord[0]];
2733 int at2a = at[i].neighbor[(int)da[i].ord[1]];
2734 if ( at[at1a].at_type != at[at2a].at_type ) {
2735 da[i].typ[0] = 0; /* eliminate this cut */
2736 da[i].typ[1] = 0;
2737 n ++;
2738 num_cuts_to_check -= 2;
2739 num_cuts -= 2;
2740 num_ring_cuts -= 2;
2741 num_cut_pieces -= 1;
2742 }
2743 }
2744 }
2745 if ( n > 0 && num_cuts_to_check > 2 ) {
2746 goto repeat_without_deriv_ring;
2747 }
2748 }
2749 ret = 0;
2750 exit_r2c_num:
2751 inchi_free( ap );
2752 UnMarkOtherIndicators( at, num_atoms );
2753 if ( ret < 0 || num_cuts_to_check >= 2 && cur_num_at < MIN_AT_LEFT_DERIV ) {
2754 goto exit_function; /* unexpected error or nothing left */
2755 }
2756 }
2757
2758 if ( !num_cuts ) {
2759 continue; /*goto exit_function;*/
2760 }
2761 /* eliminate derivatives that are not in the list */
2762 num_cuts = EliminateDerivNotInList( at, da, num_atoms );
2763 if ( num_cuts < 0 ) {
2764 ret = num_cuts;
2765 goto exit_function;
2766 }
2767
2768
2769 /* make cuts */
2770 num_cuts = 0;
2771 for ( i = num = 0; i < num_atoms; i ++ ) {
2772 for ( len = 0; len < MAX_AT_DERIV && da[i].typ[len]; len ++ )
2773 ;
2774 switch( len ) {
2775 case 0:
2776 continue;
2777 case 1:
2778 /* single cut: unconditional */
2779 make_single_cut( at, da, i, 0 );
2780 num_cuts += 1;
2781 continue;
2782 case 2:
2783 if ( (da[i].typ[0] & DERIV_RING) && (da[i].typ[1] & DERIV_RING) ||
2784 da[i].typ[0] == DERIV_AMINE_tN && da[i].typ[1] == DERIV_AMINE_tN ) {
2785 /* double cut, unconditional */
2786 make_single_cut( at, da, i, 1 );
2787 make_single_cut( at, da, i, 0 );
2788 num_cuts += 1;
2789 continue;
2790 }
2791 if ( da[i].typ[0] == da[i].typ[1] ) {
2792 /* DERIV_BRIDGE_O or DERIV_BRIDGE_NH; cut off the smallest */
2793 if ( da[i].num[0] > da[i].num[1] ) {
2794 make_single_cut( at, da, i, 1 );
2795 num_cuts += 1;
2796 } else
2797 if ( da[i].num[0] < da[i].num[1] ) {
2798 make_single_cut( at, da, i, 0 );
2799 num_cuts += 1;
2800 }
2801 continue;
2802 }
2803 ret = -88;
2804 goto exit_function; /* unexpected */
2805 case 3:
2806 if ( da[i].typ[0] == da[i].typ[1] &&
2807 da[i].typ[0] == da[i].typ[2] &&
2808 da[i].typ[0] == DERIV_AMINE_tN ) {
2809 int x, y, z;
2810 x = (da[i].num[0] < da[i].num[1])? 0 : 1;
2811 x = (da[i].num[x] < da[i].num[2])? x : 2; /* min */
2812 z = (da[i].num[0] < da[i].num[1])? 1 : 0;
2813 z = (da[i].num[x] < da[i].num[2])? 2 : z; /* max */
2814 y = ((x+1)^(z+1))-1; /* median */
2815 if (da[i].num[x] == da[i].num[z] )
2816 continue; /* all deriv. agents have same size */
2817 /* two smallest */
2818 if ( da[i].num[x] == da[i].num[y] && x < y ) {
2819 int t = x; /* first cut x > y */
2820 x = y;
2821 y = t;
2822 }
2823 make_single_cut( at, da, i, x );
2824 num_cuts += 1;
2825 if ( da[i].num[x] == da[i].num[y] ) {
2826 /* equally small */
2827 make_single_cut( at, da, i, y );
2828 num_cuts += 1;
2829 }
2830 continue;
2831 }
2832 ret = -88;
2833 goto exit_function; /* unexpected */
2834 case 4:
2835 if ( (da[i].typ[0] & DERIV_RING) && (da[i].typ[1] & DERIV_RING) &&
2836 (da[i].typ[2] & DERIV_RING) && (da[i].typ[3] & DERIV_RING) ) {
2837 int n01 = inchi_max( da[i].num[0], da[i].num[1] );
2838 int n23 = inchi_max( da[i].num[2], da[i].num[3] );
2839 if ( n01 < n23 ) {
2840 make_single_cut( at, da, i, 1 );
2841 make_single_cut( at, da, i, 0 );
2842 num_cuts += 1;
2843 } else
2844 if ( n01 > n23 ) {
2845 make_single_cut( at, da, i, 3 );
2846 make_single_cut( at, da, i, 2 );
2847 num_cuts += 1;
2848 }
2849 continue;
2850 }
2851 }
2852 }
2853 nTotNumCuts += num_cuts;
2854 #if ( REMOVE_CUT_DERIV == 1 ) /* normally YES */
2855 if ( num_cuts ) {
2856 /* remove marked with Tritium disconnected derivative attachments */
2857 ORIG_ATOM_DATA Orig_inp_data1, *orig_inp_data1;
2858 INP_ATOM_DATA *inp_cur_data1 = NULL;
2859 int num_components1, i_component1, num_component_left=0;
2860 orig_inp_data1 = &Orig_inp_data1;
2861 memset( orig_inp_data1, 0, sizeof(orig_inp_data1[0]) );
2862 UnMarkRingSystemsInp( at, num_atoms );
2863 UnMarkOtherIndicators( at, num_atoms );
2864 UnMarkOneComponent( at, num_atoms );
2865 for (i = 0; i < num_atoms; i ++ ) {
2866 orig_inp_data1->num_inp_bonds += at[i].valence;
2867 }
2868 orig_inp_data1->num_inp_bonds /= 2;
2869 orig_inp_data1->num_inp_atoms = num_atoms;
2870 orig_inp_data1->at = at; /* = from inp_cur_data[i_component].at */
2871 num_components1 = MarkDisconnectedComponents( orig_inp_data1, 0 );
2872 inp_cur_data1 = (INP_ATOM_DATA *)inchi_calloc( num_components1, sizeof(inp_cur_data1[0]) );
2873 /* extract components and discard disconnected derivatizing agents */
2874 for ( i_component1 = 0; i_component1 < num_components1; i_component1 ++ ) {
2875 CreateInpAtomData( inp_cur_data1+i_component1, orig_inp_data1->nCurAtLen[i_component1], 0 );
2876 inp_cur_data1[i_component1].num_at = ExtractConnectedComponent( orig_inp_data1->at, orig_inp_data1->num_inp_atoms,
2877 i_component1+1, inp_cur_data1[i_component1].at );
2878 /* error processing */
2879 if ( inp_cur_data1[i_component1].num_at <= 0 || orig_inp_data1->nCurAtLen[i_component1] != inp_cur_data1[i_component1].num_at ) {
2880 ret = -(i_component1+1); /* severe error */
2881 break;
2882 }
2883 /* if the component has tritium then discard: it is a derivatizing agent */
2884 for (i = 0; i < inp_cur_data1[i_component1].num_at; i ++ ) {
2885 if ( inp_cur_data1[i_component1].at[i].num_iso_H[1] ) {
2886 inp_cur_data1[i_component1].at[i].num_iso_H[1] = 0; /* remove deuterium */
2887 }
2888 if ( inp_cur_data1[i_component1].at[i].num_iso_H[2] ) {
2889 FreeInpAtomData( inp_cur_data1+i_component1 );
2890 break;
2891 }
2892 }
2893 }
2894 /* merge components into one -- must be only one */
2895 for ( i_component1 = 0, num_atoms = 0; i_component1 < num_components1; i_component1 ++ ) {
2896 num_atoms += inp_cur_data1[i_component1].num_at;
2897 }
2898 at = (inp_ATOM *) inchi_calloc( num_atoms, sizeof(at[0]) );
2899 cur_num_at = 0;
2900 for ( i_component1 = 0; i_component1 < num_components1; i_component1 ++ ) {
2901 /* clean and prepare */
2902 if ( !inp_cur_data1[i_component1].num_at )
2903 continue; /* removed derivatizing object */
2904 /*UnMarkOneComponent( inp_cur_data1[i_component1].at, inp_cur_data1[i_component1].num_at );*/
2905 /* merge one by one */
2906 cur_num_at = add_inp_ATOM( at, num_atoms, cur_num_at, inp_cur_data1[i_component1].at, inp_cur_data1[i_component1].num_at );
2907 FreeInpAtomData( inp_cur_data1+i_component1 ); /* cleanup */
2908 num_component_left ++;
2909 }
2910 /* replace the component */
2911 /* order of the following two statements is critically important */
2912 UnMarkDisconnectedComponents( orig_inp_data1 ); /* orig_inp_data1->at is same as inp_cur_data[i_component].at */
2913 FreeInpAtomData( inp_cur_data+i_component ); /* cleanup the original component */
2914 inp_cur_data[i_component].at = at;
2915 inp_cur_data[i_component].num_at = cur_num_at;
2916 inchi_free( inp_cur_data1 );
2917 }
2918 #endif
2919 }
2920 if ( nTotNumCuts ) {
2921 /* merge components into one */
2922 for ( i = 0, num_atoms = 0; i < num_components; i ++ ) {
2923 num_atoms += inp_cur_data[i].num_at;
2924 }
2925 at = (inp_ATOM *) inchi_calloc( num_atoms, sizeof(at[0]) );
2926 cur_num_at = 0;
2927 for ( i = 0; i < num_components; i ++ ) {
2928 /* clean and prepare */
2929 UnMarkRingSystemsInp( inp_cur_data[i].at, inp_cur_data[i].num_at );
2930 UnMarkOtherIndicators( inp_cur_data[i].at, inp_cur_data[i].num_at );
2931 UnMarkOneComponent( inp_cur_data[i].at, inp_cur_data[i].num_at );
2932 subtract_DT_from_num_H( inp_cur_data[i].num_at, inp_cur_data[i].at );
2933 /* merge one by one */
2934 cur_num_at = add_inp_ATOM( at, num_atoms, cur_num_at, inp_cur_data[i].at, inp_cur_data[i].num_at );
2935 }
2936 /* replace orig_inp_data */
2937 if ( cur_num_at == num_atoms ) {
2938 inchi_free( orig_inp_data->at );
2939 orig_inp_data->at = at;
2940 orig_inp_data->num_inp_atoms = cur_num_at;
2941 if ( orig_inp_data->szCoord ) {
2942 inchi_free( orig_inp_data->szCoord );
2943 orig_inp_data->szCoord = NULL;
2944 }
2945 UnMarkDisconnectedComponents( orig_inp_data );
2946 } else {
2947 if ( at ) {
2948 inchi_free( at );
2949 at = NULL;
2950 }
2951 ret = -999; /* num atoms mismatch */
2952 }
2953 }
2954 exit_function:
2955 if ( da ) {
2956 inchi_free( da );
2957 da = NULL;
2958 }
2959 for ( i_component = 0; i_component < num_components; i_component ++ ) {
2960 FreeInpAtomData( inp_cur_data+i_component );
2961 }
2962 inchi_free( inp_cur_data );
2963 inp_cur_data = NULL;
2964
2965 return ret? ret : nTotNumCuts;
2966 }
2967
2968 #endif /* UNDERIVATIZE */
2969 /********************************************************************/
2970 #if ( RING2CHAIN == 1 )
2971 /*
2972 type=1 (incl sugars: W=O, A=C(sat), Z=C(sat), Y=O, B=C(sat)-OH
2973
2974 A---W A---WH
2975 / | /
2976 | | ---> |
2977 \ | \
2978 B---Z---YH B---Z===Y
2979 | |
2980 | |
2981 C(opt) C(opt)
2982
2983 type=2 [not implemented]
2984
2985 R---W R---WH
2986 / \ /
2987 | Z ---> | Z
2988 \ / \ //
2989 R---YH R---Y
2990
2991 */
2992 #define R2C_EMPTY 127
2993 typedef struct tagRing2Chain { /* atom Z */
2994 char type; /* 1 => sugar-like */
2995 char ordW; /* ordering number of W-neighbor; bond to break; H to add */
2996 char ordY; /* ordering number of YH-neighbor; bond to increment; H to remove */
2997 char ordC; /* atom B = C(sat) */
2998 char ordCopt; /* if exists, saturated C connected by a chain-bond to Z */
2999 } R2C_AT;
3000
3001 int detect_r2c_Zatom( inp_ATOM *at, R2C_AT *da, int iZ );
3002 int cut_ring_to_chain( inp_ATOM *at, R2C_AT *da, int iZ );
3003
3004 /********************************************************************/
detect_r2c_Zatom(inp_ATOM * at,R2C_AT * da,int iZ)3005 int detect_r2c_Zatom( inp_ATOM *at, R2C_AT *da, int iZ )
3006 {
3007 int i, j, neigh, neighneigh, nRingSystem, num_found;
3008 R2C_AT da1;
3009 if ( at[iZ].valence > 4 )
3010 return 0;
3011 if ( at[iZ].valence != at[iZ].chem_bonds_valence )
3012 return 0; /* approach limitation: no double bonds */
3013
3014 if ( at[iZ].el_number != el_number_C )
3015 return 0; /* sugar-specific */
3016
3017 if ( at[iZ].nNumAtInRingSystem < 5 )
3018 return 0; /* not in a suitable ring */
3019
3020 if ( !at[iZ].bCutVertex )
3021 return 0; /* recognize only type 1 for now */
3022
3023 nRingSystem = at[iZ].nRingSystem;
3024 memset ( &da1, R2C_EMPTY, sizeof(da1) );
3025
3026 for ( i = 0, num_found = 0; i < at[iZ].valence; i ++ ) {
3027 neigh = at[iZ].neighbor[i];
3028 if ( at[neigh].charge || at[neigh].radical )
3029 return 0;
3030 if ( at[neigh].el_number == el_number_O &&
3031 at[neigh].valence == 1 &&
3032 at[neigh].chem_bonds_valence == 1 &&
3033 at[neigh].num_H == 1 ) {
3034 /* found Z-OH, i.e. Z-YH */
3035 if ( da1.ordY == R2C_EMPTY ) {
3036 da1.ordY = i;
3037 num_found ++;
3038 continue;
3039 } else {
3040 return 0;
3041 }
3042 }
3043 if ( at[neigh].el_number == el_number_O &&
3044 at[neigh].valence == 2 &&
3045 at[neigh].chem_bonds_valence == 2 &&
3046 at[neigh].num_H == 0 &&
3047 at[neigh].nRingSystem == nRingSystem ) {
3048 /* found Z-O-, i.e. Z-W- */
3049 if ( da1.ordW == R2C_EMPTY ) {
3050 /* j = index of the oppozite to at[iZ] neighbor of at[neigh] */
3051 j = (at[neigh].neighbor[0] == iZ);
3052 neighneigh = at[neigh].neighbor[j];
3053 if ( at[neighneigh].valence != at[neighneigh].chem_bonds_valence ||
3054 at[neighneigh].el_number != el_number_C )
3055 return 0; /* sugar-specific */
3056 da1.ordW = i;
3057 num_found ++;
3058 continue;
3059 } else {
3060 return 0;
3061 }
3062 }
3063 if ( at[neigh].el_number == el_number_C &&
3064 at[neigh].valence > 2 &&
3065 at[neigh].chem_bonds_valence == at[neigh].valence &&
3066 at[neigh].num_H <= 1 &&
3067 at[neigh].nRingSystem == nRingSystem ) {
3068 /* sugar-specfic: carbon in the ring should have -OH neighbor */
3069 int iOH;
3070 for ( j = 0; j < at[neigh].valence; j ++ ) {
3071 iOH = at[neigh].neighbor[j];
3072 if ( at[iOH].el_number == el_number_O &&
3073 at[iOH].valence == 1 &&
3074 at[iOH].chem_bonds_valence == 1 &&
3075 at[iOH].num_H == 1 &&
3076 !at[iOH].charge && !at[iOH].radical ) {
3077 if ( da1.ordC == R2C_EMPTY ) {
3078 da1.ordC = i;
3079 num_found ++;
3080 break;
3081 } else {
3082 return 0;
3083 }
3084 }
3085 }
3086 if ( j < at[neigh].valence )
3087 continue;
3088 }
3089 if ( at[neigh].el_number == el_number_C &&
3090 at[neigh].chem_bonds_valence == at[neigh].valence &&
3091 at[neigh].nRingSystem != nRingSystem ) {
3092 /* extra carbon neighbor of Z */
3093 if ( da1.ordCopt == R2C_EMPTY ) {
3094 da1.ordCopt = i;
3095 continue;
3096 }
3097 }
3098 return 0; /* unexpectd neighbor */
3099 }
3100 if (num_found == 3) {
3101 da1.type = 1;
3102 da[iZ] = da1;
3103 return 1; /* disconnection found */
3104 }
3105 return 0;
3106 }
3107 /********************************************************************/
cut_ring_to_chain(inp_ATOM * at,R2C_AT * da,int iZ)3108 int cut_ring_to_chain( inp_ATOM *at, R2C_AT *da, int iZ )
3109 {
3110 int ret = -1; /* error flag */
3111 int iordW = (int)da[iZ].ordW; /* ord of the bond in iZ */
3112 int iordY = (int)da[iZ].ordY; /* ord of the bond in iZ */
3113 int iordC = (int)da[iZ].ordC;
3114 int iW, iY, num_iso_H, i, jordZ;
3115 AT_NUMB *p;
3116
3117 if ( da[iZ].type != 1 ) {
3118 return 0;
3119 }
3120 if ( 0 > iordW || iordW >= at[iZ].valence ||
3121 0 > iordY || iordY >= at[iZ].valence ||
3122 0 > iordC || iordC >= at[iZ].valence /* suger-specific*/) {
3123 return -1; /* program error */
3124 }
3125 /* find other da[] that affect at[iZ] */
3126 iW = at[iZ].neighbor[iordW]; /* opposite atom to disconnect and add H */
3127 iY = at[iZ].neighbor[iordY]; /* opposite atom to increment the bond and remove H*/
3128 if ( !at[iY].num_H || at[iZ].bond_type[iordY] != BOND_TYPE_SINGLE ) {
3129 return -2; /* program error */
3130 }
3131 /* increment at[iZ]--at[iY] bond */
3132 p = is_in_the_list( at[iY].neighbor, (AT_NUMB) iZ, at[iY].valence );
3133 if ( !p ) {
3134 return -3; /* program error */
3135 }
3136 jordZ = p - at[iY].neighbor;
3137 at[iZ].bond_type[iordY] ++;
3138 at[iZ].chem_bonds_valence ++;
3139 at[iY].bond_type[jordZ] ++;
3140 at[iY].chem_bonds_valence ++;
3141
3142 /* disconnect at[iZ]--at[iW] bond */
3143 ret = DisconnectInpAtBond( at, NULL, iZ, iordW );
3144 if ( ret != 1 ) {
3145 return -4; /* program error */
3146 }
3147 /* disconnection succeeded */
3148 /* transfer H from at[iY] to at[iW] */
3149 num_iso_H = NUM_ISO_H(at, iY);
3150 if ( at[iY].num_H == num_iso_H ) {
3151 for ( i = 0; i < NUM_H_ISOTOPES; i ++ ) {
3152 if ( at[iY].num_iso_H[i] ) {
3153 at[iY].num_iso_H[i] --;
3154 at[iW].num_iso_H[i] ++;
3155 }
3156 }
3157 }
3158 at[iY].num_H --;
3159 at[iW].num_H ++;
3160 return 1;
3161 }
3162 /********************************************************************/
Ring2Chain(ORIG_ATOM_DATA * orig_inp_data)3163 int Ring2Chain( ORIG_ATOM_DATA *orig_inp_data )
3164 {
3165 int ret = 0, i, j, n, num_atoms, num_components, nFound, num, num_cuts, iZ, cur_num_at;
3166 inp_ATOM *at = orig_inp_data->at;
3167 INP_ATOM_DATA *inp_cur_data = NULL;
3168 R2C_AT *da = NULL;
3169
3170 set_R2C_el_numbers( );
3171 /* prepare */
3172 num_atoms = remove_terminal_HDT( orig_inp_data->num_inp_atoms, at, 1 );
3173 /*^^^^^ always accomodate accomodate FIX_TERM_H_CHRG_BUG - IPl, July 2008*/
3174 orig_inp_data->num_inp_atoms = num_atoms;
3175
3176 /* initialize */
3177 UnMarkDisconnectedComponents( orig_inp_data );
3178 num_cuts = 0;
3179 /* mark */
3180 num_components = MarkDisconnectedComponents( orig_inp_data, 0 );
3181 inp_cur_data = (INP_ATOM_DATA *)inchi_calloc( num_components, sizeof(inp_cur_data[0]) );
3182 iZ = -1;
3183 for ( j = 0; j < num_components; j ++ ) {
3184 CreateInpAtomData( inp_cur_data+j, orig_inp_data->nCurAtLen[j], 0 );
3185 inp_cur_data[j].num_at = ExtractConnectedComponent( orig_inp_data->at, orig_inp_data->num_inp_atoms, j+1, inp_cur_data[j].at );
3186 /* error processing */
3187 if ( inp_cur_data[j].num_at <= 0 || orig_inp_data->nCurAtLen[j] != inp_cur_data[j].num_at ) {
3188 ret = -(j+1); /* severe error */
3189 goto exit_function;
3190 }
3191 /* initialize */
3192 num_atoms = inp_cur_data[j].num_at;
3193 at = inp_cur_data[j].at;
3194 add_DT_to_num_H( num_atoms, at );
3195
3196 UnMarkRingSystemsInp( at, num_atoms );
3197 UnMarkOtherIndicators( at, num_atoms );
3198 UnMarkOneComponent( at, num_atoms );
3199 MarkRingSystemsInp( at, num_atoms, 0 );
3200 ret = mark_arom_bonds( at, num_atoms );
3201 if ( ret < 0 ) {
3202 goto exit_function;
3203 }
3204 ret = 0;
3205 if ( da ) inchi_free( da );
3206 da = (R2C_AT *)inchi_calloc( num_atoms, sizeof(da[0]) );
3207
3208 /* detect ring-to-chain possibilities */
3209 nFound = 0;
3210 for ( i = 0, num=0; i < num_atoms; i ++ ) {
3211 if ( at[i].bCutVertex /* type 1 specific*/ && !da[i].type ) {
3212 num += (n=detect_r2c_Zatom( at, da, i ));
3213 if ( n == 1 )
3214 iZ = i;
3215 UnMarkOtherIndicators( at, num_atoms );
3216 }
3217 }
3218
3219 if ( num == 1 ) {
3220 /* convert ring to chain: make single cut */
3221 ret = cut_ring_to_chain( at, da, iZ );
3222 if ( ret < 0 ) {
3223 goto exit_function;
3224 }
3225 num_cuts += (ret == 1);
3226 } else
3227 if ( num ) {
3228 /* allocate an array of bonds to be cut */
3229 R2C_ATPAIR *ap = (R2C_ATPAIR *) inchi_malloc( sizeof(ap[0]) * num );
3230 AT_NUMB comp_num = 0;
3231 if ( !ap ) {
3232 ret = -1; /* malloc failure */
3233 goto exit_function;
3234 }
3235 /* fill out the array of bonds to be cut */
3236 for ( i = j = 0; i < num_atoms; i ++ ) {
3237 if ( da[i].type == 1 ) {
3238 AT_NUMB at1 = i;
3239 AT_NUMB at2 = at[i].neighbor[(int)da[i].ordW];
3240 if ( j >= num ) {
3241 ret = -2;
3242 goto exit_r2c_num; /* wrong number of cuts = num */
3243 }
3244 n = ( at1 > at2 );
3245 ap[j].at[n] = at1;
3246 ap[j].at[1-n] = at2; /* ap[j].at[0] < ap[j].at[1] */
3247 j ++;
3248 }
3249 }
3250 if ( j != num ) {
3251 ret = -3;
3252 goto exit_r2c_num; /* wrong number of cuts = num */
3253 }
3254 /* sort the bonds for subsequent searching by bisections */
3255 qsort( ap, num, sizeof(ap[0]), cmp_r2c_atpair);
3256 /* mark components to be disconnected */
3257 for ( i = 0; i < num; i ++ ) {
3258 for ( j = 0; j < 2; j ++ ) {
3259 if ( !at[ap[i].at[j]].at_type ) {
3260 comp_num ++;
3261 mark_atoms_ap( at, (int)ap[i].at[j], ap, num, 0, comp_num );
3262 }
3263 }
3264 }
3265 /* convert ring to chain */
3266 for ( i = 0; i < num; i ++ ) {
3267 int i1 = ap[i].at[0];
3268 int i2 = ap[i].at[1];
3269 iZ = -1;
3270 if ( at[i1].at_type == at[i2].at_type ) {
3271 /* by definition, there are no adjacent iZ atoms; one iZ atom per bond */
3272 if ( da[i1].type == 1 && at[i1].neighbor[(int)da[i1].ordW] == i2 ) {
3273 iZ = i1;
3274 } else
3275 if ( da[i2].type == 1 && at[i2].neighbor[(int)da[i2].ordW] == i1 ) {
3276 iZ = i2;
3277 } else {
3278 ret = -3;
3279 goto exit_r2c_num;
3280 }
3281 ret = cut_ring_to_chain( at, da, iZ );
3282 if ( ret < 0 ) {
3283 goto exit_r2c_num;
3284 }
3285 num_cuts += (ret == 1);
3286 }
3287 }
3288 ret = 0;
3289 exit_r2c_num:
3290 inchi_free( ap );
3291 UnMarkOtherIndicators( at, num_atoms );
3292 if ( ret < 0 ) {
3293 goto exit_function;
3294 }
3295 }
3296 }
3297 if ( num_cuts ) {
3298 /* merge components into one */
3299 for ( i = 0, num_atoms = 0; i < num_components; i ++ ) {
3300 num_atoms += inp_cur_data[i].num_at;
3301 }
3302 at = (inp_ATOM *) inchi_calloc( num_atoms, sizeof(at[0]) );
3303 cur_num_at = 0;
3304 for ( i = 0; i < num_components; i ++ ) {
3305 /* clean and prepare */
3306 UnMarkRingSystemsInp( inp_cur_data[i].at, inp_cur_data[i].num_at );
3307 UnMarkOtherIndicators( inp_cur_data[i].at, inp_cur_data[i].num_at );
3308 UnMarkOneComponent( inp_cur_data[i].at, inp_cur_data[i].num_at );
3309 subtract_DT_from_num_H( inp_cur_data[i].num_at, inp_cur_data[i].at );
3310 /* merge one by one */
3311 cur_num_at = add_inp_ATOM( at, num_atoms, cur_num_at, inp_cur_data[i].at, inp_cur_data[i].num_at );
3312 }
3313 /* replace orig_inp_data */
3314 if ( cur_num_at == num_atoms ) {
3315 inchi_free( orig_inp_data->at );
3316 orig_inp_data->at = at;
3317 orig_inp_data->num_inp_atoms = cur_num_at;
3318 if ( orig_inp_data->szCoord ) {
3319 inchi_free( orig_inp_data->szCoord );
3320 orig_inp_data->szCoord = NULL;
3321 }
3322 UnMarkDisconnectedComponents( orig_inp_data );
3323 } else {
3324 if ( at ) {
3325 inchi_free( at );
3326 at = NULL;
3327 }
3328 ret = -999; /* num atoms mismatch */
3329 }
3330 }
3331 exit_function:
3332 if ( da ) {
3333 inchi_free( da );
3334 da = NULL;
3335 }
3336 for ( j = 0; j < num_components; j ++ ) {
3337 FreeInpAtomData( inp_cur_data+j );
3338 }
3339 inchi_free( inp_cur_data );
3340 inp_cur_data = NULL;
3341
3342 return ret? ret : num_cuts;
3343 }
3344 #endif /* RING2CHAIN */
3345
3346