1 /*
2  * International Chemical Identifier (InChI)
3  * Version 1
4  * Software version 1.04
5  * September 9, 2011
6  *
7  * The InChI library and programs are free software developed under the
8  * auspices of the International Union of Pure and Applied Chemistry (IUPAC).
9  * Originally developed at NIST. Modifications and additions by IUPAC
10  * and the InChI Trust.
11  *
12  * IUPAC/InChI-Trust Licence for the International Chemical Identifier (InChI)
13  * Software version 1.0.
14  * Copyright (C) IUPAC and InChI Trust Limited
15  *
16  * This library is free software; you can redistribute it and/or modify it under the
17  * terms of the IUPAC/InChI Trust Licence for the International Chemical Identifier
18  * (InChI) Software version 1.0; either version 1.0 of the License, or
19  * (at your option) any later version.
20  *
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
24  * See the IUPAC/InChI Trust Licence for the International Chemical Identifier (InChI)
25  * Software version 1.0 for more details.
26  *
27  * You should have received a copy of the IUPAC/InChI Trust Licence for the
28  * International Chemical Identifier (InChI) Software version 1.0 along with
29  * this library; if not, write to:
30  *
31  * The InChI Trust
32  * c/o FIZ CHEMIE Berlin
33  * Franklinstrasse 11
34  * 10587 Berlin
35  * GERMANY
36  *
37  */
38 
39 
40 #ifndef __INCHICANT_H__
41 #define __INCHICANT_H__
42 
43 /******************************************************/
44 /*                                                    */
45 /*                 Canonicalization definitions       */
46 /*                                                    */
47 /******************************************************/
48 #include "ichisize.h"
49 
50 
51 #ifndef INCHI_US_SHORT_DEF
52 typedef signed short S_SHORT;
53 typedef unsigned short U_SHORT;
54 #define INCHI_US_SHORT_DEF
55 #endif
56 
57 /*typedef unsigned long  INCHI_MODE;*/
58 
59 typedef union tagSplitLong {
60     unsigned long  ul;
61     U_SHORT        us[2];
62 }SU_LONG;
63 
64 #define _HI 1                            /* Intel platform */
65 #define _LO 0
66 
67 #define NEIGH_LIST_LEN 4
68 #define U_LONG_LEN  2
69 
70 #ifndef defined_NEIGH_LIST
71 typedef AT_RANK  *NEIGH_LIST;
72 #define defined_NEIGH_LIST
73 #endif
74 
75 typedef struct tagEQUIV_INFO {
76     int nNumSets;
77     int *nCutVertexAtom; /* cut-vertex atom for the set of equivalent atoms */
78     int *nFirstInSet;    /* first of equivalent atoms in the connected to the cut-vertex atom parts of the structure */
79     int *nNumInSet;      /* number of the equivalent atoms connected to the cut-vertex atom */
80     int *nAtomNo;        /* eqivalent atom number */
81     int *nAddToRank;     /* number to add to the rank to normalize */
82 } EQUIV_INFO;
83 
84 #define MOL_PART_MASK  (~0x0U ^ 0x07U)
85 
86 
87 typedef struct tagAtData_dch {
88      char element[3];
89      int valence;
90    }AT_DATA;
91 
92 
93 #define MAXVAL 20       /* maximum valence */
94 
95 #define ATOM_EL_LEN 6
96 
97 typedef struct tagAtomInvariantBytes {
98     S_CHAR cNotExactlyHillOrderNumber;
99     S_CHAR cNumberOfConnections;
100     /* S_CHAR cNumberOfNonHydrogenBonds; */
101     S_CHAR cAtomicNumber;
102 #if ( HYDROGENS_IN_INIT_RANKS == 1 )
103     S_CHAR cNumberOfAttachedHydrogens;
104 #endif
105 } ATOM_INVARIANT_BYTES;
106 
107 typedef struct tagAtomInvariant {
108     /* non-isotopic part */
109 #if ( USE_DISTANCES_FOR_RANKING == 1 )
110     AT_RANK         nDistanceFromTerminal;
111 #endif
112     ATOM_INVARIANT_BYTES b;
113     AT_RANK         cNum_tautomer;        /* 0 or for tautomer endpoint: number of endpoints in the group */
114     AT_RANK         cNum_tautomer_num[T_NUM_NO_ISOTOPIC]; /* 0 or numbers from t_gtroup */
115     /* isotopic part */
116     AT_ISO_SORT_KEY iso_sort_key;
117     AT_RANK         cNum_tautomer_iso[T_NUM_ISOTOPIC]; /* 0 or numbers from t_group */
118 } ATOM_INVARIANT;
119 /**********************************/
120 typedef enum tagAtInvariantIndexes {
121     AT_INV_HILL_ORDER,
122     AT_INV_NUM_CONNECTIONS,
123     AT_INV_NUM_H,
124     /* for endpoint + undirected graph, otherwise 0 */
125     AT_INV_NUM_TG_ENDPOINTS,
126     AT_INV_TG_NUMBERS,       /* num H, num (-) */
127     AT_INV_NUM_H_FIX = AT_INV_TG_NUMBERS+T_NUM_NO_ISOTOPIC,
128     AT_INV_BREAK1,
129     /* here compare iso sort key */
130     AT_INV_TAUT_ISO = AT_INV_BREAK1,
131     AT_INV_LENGTH   = AT_INV_TAUT_ISO + T_NUM_ISOTOPIC
132 } AT_INV_INDEXES;
133 
134 typedef struct tagAtomInvariant2 {
135     AT_NUMB         val[AT_INV_LENGTH];
136     AT_ISO_SORT_KEY iso_sort_key;
137     S_CHAR          iso_aux_key;
138 } ATOM_INVARIANT2;
139 
140 /******************* Partition **********************************/
141 typedef struct tagPartition {
142     AT_RANK *Rank;
143     AT_NUMB *AtNumber;
144 } Partition;
145 
146 /********************* BFCN *************************************/
147 typedef struct tagFixHOrTautCanonNumbering {
148 
149     int             num_at_tg;  /* = num_atoms for non-taut */
150     int             num_atoms;
151     int             nCanonFlags;
152     NEIGH_LIST     *NeighList;  /* length = num_at_tg */
153     /****************************/
154     /*     base structure       */
155     /****************************/
156     AT_RANK        *LinearCt;      /* connection table atoms (+taut. groups, directed graph)*/
157     int             nLenLinearCtAtOnly;
158     int             nLenLinearCt;
159     int             nMaxLenLinearCt;
160 
161     Partition       PartitionCt;  /* canonical numbering */
162     AT_RANK        *nSymmRankCt;  /* orbits */
163 
164     /* orig. fixed by tautomerism H positions */
165     NUM_H          *nNumHOrig;  /* original  H atoms positions + taut. info, excluding tautomeric H */
166     NUM_H          *nNumH;      /* canonical H atoms positions + taut. info, excluding tautomeric H */
167     int             nLenNumH;   /* length = num_atoms + 2*num_taut_groups */
168 
169     /* fixed H: original positions of tautomeric H; exists obly for tautomeric structures */
170     NUM_H          *nNumHOrigFixH;  /* original fixed positions of tautomeric H */
171     NUM_H          *nNumHFixH;      /* canonical fixed positions of tautomeric H */
172     int             nLenNumHFixH;   /* length = num_atoms */
173 
174     /*******************************************************************************/
175     /* the following exists only if isotopic and isotopic results requested        */
176     /*******************************************************************************/
177     Partition       PartitionCtIso;     /* canonical numbering of isotopic base structure, defined later */
178     AT_RANK        *nSymmRankCtIso;     /* orbits of isotopic structure */
179     AT_ISO_SORT_KEY *iso_sort_keys;     /* original isotopic sort keys for atoms and taut groups */
180     AT_ISO_SORT_KEY *iso_sort_keysOrig; /* canonical isotopic sort keys for atoms and taut groups */
181     int              len_iso_sort_keys;
182     S_CHAR          *iso_exchg_atnos;     /* canonical: 0=> tautomeric or may have isotopic H exchanged */
183     S_CHAR          *iso_exchg_atnosOrig; /* original: 0=> tautomeric or may have isotopic H exchanged */
184 
185 } FTCN;
186 
187 /******************** BCN *************************************/
188 typedef struct tagBaseCanonNumbering {
189 
190     AT_RANK            **pRankStack;
191     int                  nMaxLenRankStack;
192     int                  num_max;        /* allocated nRank[] arrays lengths in pRankStack */
193     int                  num_at_tg;  /* all of the following arrays have this length */
194     int                  num_atoms;
195     struct tagInchiTime *ulTimeOutTime;
196     FTCN                 ftcn[TAUT_NUM];
197 
198 } BCN;
199 
200 /***********************************
201  *
202  *  CANON_STAT
203  */
204 typedef struct tagCanonStat {
205     /*  statistics */
206     long                 lNumBreakTies;
207     long                 lNumNeighListIter;
208     long                 lNumTotCT;
209     long                 lNumDecreasedCT;
210     long                 lNumRejectedCT;
211     long                 lNumEqualCT;
212     struct tagInchiTime *ulTimeOutTime;
213     long                 lTotalTime;
214 
215     /* control */
216     int                  bFirstCT;
217     int                  bKeepSymmRank;
218     int                  bStereoIsBetter;
219 
220     int nCanonFlags;
221 
222     /* data : */
223 
224     AT_NUMB          *LinearCT;        /* connection table only */
225     AT_ISOTOPIC      *LinearCTIsotopic;
226     AT_ISO_TGROUP    *LinearCTIsotopicTautomer;
227     AT_STEREO_DBLE   *LinearCTStereoDble;
228     AT_STEREO_CARB   *LinearCTStereoCarb;
229     AT_STEREO_DBLE   *LinearCTStereoDbleInv;
230     AT_STEREO_CARB   *LinearCTStereoCarbInv;
231     AT_STEREO_DBLE   *LinearCTIsotopicStereoDble;
232     AT_STEREO_CARB   *LinearCTIsotopicStereoCarb;
233     AT_STEREO_DBLE   *LinearCTIsotopicStereoDbleInv;
234     AT_STEREO_CARB   *LinearCTIsotopicStereoCarbInv;
235     AT_TAUTOMER      *LinearCTTautomer;  /*  minimal */
236 
237 /* second copies of line notation arrays */
238 
239     AT_NUMB          *LinearCT2;   /* to save non-isotopic CT */
240 
241     int               nLenLinearCTStereoDble;
242     int               nLenLinearCTStereoDbleInv;
243     int               nMaxLenLinearCTStereoDble;  /* new */
244 
245     int               bCmpStereo;         /* 0 => no stereo to invert;
246                                              1 => StereoCtInv < StereoCt;
247                                              2 => StereoCtInv = StereoCt;
248                                              3 => StereoCtInv > StereoCt;
249                                            */
250     int               nLenLinearCTStereoCarb;
251     int               nLenLinearCTStereoCarbInv;
252     int               nMaxLenLinearCTStereoCarb;  /* new */
253 
254     int               nLenLinearCTIsotopic;
255     int               nMaxLenLinearCTIsotopic;
256 
257     int               nLenLinearCTIsotopicTautomer;
258     int               nMaxLenLinearCTIsotopicTautomer;
259 
260     int               nLenLinearCT;         /* connection table only  */
261     int               nLenLinearCT2;        /* connection table only, non-isotopic result  */
262     int               nLenLinearCTAtOnly;   /* connection table only without tautomeric pseudoatoms  */
263     int               nLenLinearCTAtOnly2;  /* connection table only, non-isotopic result without tautomeric pseudoatoms  */
264     int               nMaxLenLinearCT;      /* connection table only  */
265 
266     int               nLenLinearCTTautomer;
267     int               nMaxLenLinearCTTautomer;
268 
269     int               bCmpIsotopicStereo; /* 0 => no stereo to invert;
270                                              1 => StereoCtInv < StereoCt;
271                                              2 => StereoCtInv = StereoCt;
272                                              3 => StereoCtInv > StereoCt;
273                                            */
274     int               nLenLinearCTIsotopicStereoDble;
275     int               nLenLinearCTIsotopicStereoDbleInv;
276     int               nMaxLenLinearCTIsotopicStereoDble;
277 
278     int               nLenLinearCTIsotopicStereoCarb; /*  new */
279     int               nLenLinearCTIsotopicStereoCarbInv; /*  new */
280     int               nMaxLenLinearCTIsotopicStereoCarb;
281     S_CHAR           *bRankUsedForStereo;  /* canon. rank used for stereo mapping */
282     S_CHAR           *bAtomUsedForStereo;  /* 0 if not a stereo atom or during a canon. rank being mapped on this atom; */
283                                            /* STEREO_AT_MARK if an unpapped stereogenic atom */
284                                            /* or a number of stereogenic bonds adjacent to an atom */
285 
286     AT_RANK          *nPrevAtomNumber;
287 
288     AT_RANK          *nCanonOrd;       /* atom numbers in order of increasing canon. ranks  */
289     AT_RANK          *nSymmRank;       /* symmetry numbers in order of atoms  */
290     AT_RANK          *nCanonOrdTaut;   /* t-group numbers numbers in order of increasing canon. ranks  */
291     AT_RANK          *nSymmRankTaut;   /* t-group symmetry numbers in order of t-groups  */
292 
293     AT_RANK          *nCanonOrdStereo;     /* atom numbers in order of increasing canon. ranks */
294     AT_RANK          *nCanonOrdStereoInv;     /* atom numbers in order of increasing canon. ranks */
295     AT_RANK          *nCanonOrdStereoTaut; /* t-group numbers in order of increasing canon. ranks */
296 
297     AT_RANK          *nSymmRankIsotopic;
298     AT_RANK          *nCanonOrdIsotopic;        /* atom numbers in order of increasing canon. ranks */
299     AT_RANK          *nSymmRankIsotopicTaut;    /* !!! */
300     AT_RANK          *nCanonOrdIsotopicTaut;    /*/ t-group numbers in order of increasing canon. ranks */
301 
302     AT_RANK          *nCanonOrdIsotopicStereo;
303     AT_RANK          *nCanonOrdIsotopicStereoInv;
304     AT_RANK          *nCanonOrdIsotopicStereoTaut;    /*  !!! */
305 
306                       /* actual lengths if successfully calculated */
307 
308     int               nLenCanonOrd;               /* Superceded by any of the following > 0 */
309     int               nLenCanonOrdTaut;           /* !!! Superceded by any of the following > 0 */
310     int               nLenCanonOrdIsotopic;
311     int               nLenCanonOrdIsotopicTaut;   /* !!! */
312     int               nLenCanonOrdStereo;
313     int               nLenCanonOrdStereoTaut;     /* !!! */
314     int               nLenCanonOrdIsotopicStereo;
315     int               nLenCanonOrdIsotopicStereoTaut; /* !!! */
316 
317                       /*  other */
318 
319     int               bHasIsotopicInTautomerGroups;
320     T_GROUP_INFO     *t_group_info;
321     int               bIgnoreIsotopic;
322     int               bDoubleBondSquare; /* 0 or 2 */
323     INCHI_MODE         nMode;
324 #if ( bRELEASE_VERSION == 0 )
325     int               bExtract;          /* for debug only */
326 #endif
327     NEIGH_LIST       *NeighList;
328     BCN              *pBCN;
329     S_CHAR    *nNum_H;      /* number of terminal hydrogen atoms on each atom except tautomeric [num_atoms], in order of canonical numbers */
330     S_CHAR    *nNum_H_fixed;/* number of terminal hydrogen atoms on tautomeric atoms (for non-atautomeric representation) [num_atoms] */
331     S_CHAR    *nExchgIsoH;
332 } CANON_STAT;
333 
334 /**************************************************/
335 typedef struct tagCanonData {
336 
337     /* same names/types as in ConTable; here the order is from original numbering */
338 
339     AT_NUMB *LinearCT;  /* output ?? */
340 
341     int      nMaxLenLinearCT;
342     int      nLenLinearCT;
343     int      nLenCTAtOnly;
344     int      nCanonFlags;
345     /* hydrogen atoms fixed in tautomeric representation:
346        compare before diff sign inversion: (+) <=> Ct1->() > Ct2->() */
347     NUM_H          *NumH;
348     int             lenNumH;    /* used length */
349     int             maxlenNumH; /*  n + T_NUM_NO_ISOTOPIC*(n_tg-n) + 1 */
350 
351     /* hydrogen atoms fixed in non-tautomeric representation only:
352        compare before diff sign inversion: (+) <=> Ct1->() > Ct2->() */
353     NUM_H           *NumHfixed;
354     int              lenNumHfixed;       /* used length */
355     int              maxlenNumHfixed;    /* max length = n+1  */
356 
357     /* isotopic atoms (without tautomeric H) and isotopic tautomeric groups */
358     /* note: AT_ISO_SORT_KEY and T_GROUP_ISOWT are identical types: long    */
359     AT_ISO_SORT_KEY *iso_sort_key;
360     int              len_iso_sort_key;    /* used length */
361     int              maxlen_iso_sort_key; /* max length = n_tg+1 */
362     S_CHAR          *iso_exchg_atnos;
363     int              len_iso_exchg_atnos;    /* used length */
364     int              maxlen_iso_exchg_atnos;
365 
366     /* isotopic hydrogen atoms fixed in non-tautomeric representation only */
367 #if ( USE_ISO_SORT_KEY_HFIXED == 1 )
368     AT_ISO_SORT_KEY *iso_sort_key_Hfixed;
369     int              len_iso_sort_key_Hfixed;    /* used length */
370     int              maxlen_iso_sort_key_Hfixed; /* max length = n+1  */
371 #endif
372     /* auxiliary ranking */
373 
374     AT_RANK  *nAuxRank;
375 
376     struct tagInchiTime *ulTimeOutTime;  /* timeout */
377 
378 } CANON_DATA;
379 /**************************************************/
380 
381 typedef struct tagCanonCounts {
382     long     lNumBreakTies;
383     long     lNumDecreasedCT;
384     long     lNumRejectedCT;
385     long     lNumEqualCT;
386     long     lNumTotCT;
387     double    dGroupSize;
388     long     lNumGenerators;
389     long     lNumStoredIsomorphisms;
390 
391 } CANON_COUNTS;
392 /***********************************************
393  tree structure: one segment
394 
395    canon. rank
396    at.no          orig. atom numbers on which the canon. rank has been successfully mapped
397    ...
398    at.no          except the last at.no: it is not known if it has been mapped until all atoms are mapped
399    num.at+1       number of atoms in this segment
400 */
401 
402 typedef struct tagCurTree {
403     AT_NUMB   *tree;
404     int       max_len;  /* allocated length of tree in sizeof(tree[0]) units */
405     int       cur_len;  /* currently used length */
406     int       incr_len; /* reallocation increment */
407 } CUR_TREE;
408 
409 #endif /* __INCHICANT_H__ */
410