1 /**********************************************************************
2 Copyright (C) 2005,2006,2007 Chris Morley
3 
4 Based on the IUPAC InChI reference software, which is distributed
5 under the GNU LGPL:
6 Copyright (C) 2005 The International Union of Pure and Applied Chemistry
7 IUPAC International Chemical Identifier (InChI) (contact:secretariat@iupac.org)
8 
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation version 2 of the License.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 ***********************************************************************/
18 #include <openbabel/babelconfig.h>
19 #include <openbabel/mol.h>
20 #include <openbabel/atom.h>
21 #include <openbabel/bond.h>
22 #include <openbabel/obiter.h>
23 #include <openbabel/obconversion.h>
24 #include <openbabel/obmolecformat.h>
25 #include <openbabel/generic.h>
26 
27 #include "inchi_api.h"
28 #include <sstream>
29 #include <set>
30 #include <vector>
31 #include <iterator>
32 #include <openbabel/inchiformat.h>
33 #include <openbabel/stereo/tetrahedral.h>
34 #include <openbabel/stereo/cistrans.h>
35 #include <openbabel/elements.h>
36 
37 using namespace std;
38 namespace OpenBabel
39 {
40 extern string GetInChI(istream& is);
41 
42 //Make an instance of the format class
43 InChIFormat theInChIFormat;
44 
45 // The average molecular masses used by InChI are listed in util.c or the InChI Technical Manual Appendix 1
46 const unsigned int MAX_AVG_MASS = 134;
47 const unsigned int inchi_avg_mass[MAX_AVG_MASS+1] = {0, 1, 4, 7, 9, 11, 12, 14, 16, 19, 20, 23, 24, 27, 28, 31, 32, 35, 40, 39, 40, 45, 48,
48    51, 52, 55, 56, 59, 59, 64, 65, 70, 73, 75, 79, 80, 84, 85, 88, 89, 91, 93, 96, 98, 101, 103, 106, 108, 112, 115, 119, 122, 128,
49    127, 131, 133, 137, 139, 140, 141, 144, 145, 150, 152, 157, 159, 163, 165, 167, 169, 173, 175, 178, 181, 184, 186, 190, 192, 195,
50    197, 201, 204, 207, 209, 209, 210, 222, 223, 226, 227, 232, 231, 238, 237, 244, 243, 247, 247, 251, 252, 257, 258, 259, 260, 261,
51    268, 271, 267, 277, 276, 281, 280, 285};
52 
GetInChIAtomicMass(unsigned int atomicnum)53 static unsigned int GetInChIAtomicMass(unsigned int atomicnum)
54 {
55   if (atomicnum < MAX_AVG_MASS)
56     return inchi_avg_mass[atomicnum]; // the correct value
57   else // fallback to our internal values
58     return (unsigned int)(OBElements::GetMass(atomicnum) + 0.5);
59 }
60 
61 /////////////////////////////////////////////////////////////////
ReadMolecule(OBBase * pOb,OBConversion * pConv)62 bool InChIFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
63 {
64   OBMol* pmol = pOb->CastAndClear<OBMol>();
65   if (pmol == nullptr) return false;
66   istream &ifs = *pConv->GetInStream();
67 
68   //Extract InChI from input stream even if it is split
69   string inchi;
70   do
71   {
72     inchi = GetInChI(ifs);
73     if(inchi.empty())
74       return false; //eof
75   }while(inchi.size()<9); //ignore empty "InChI=" or "InChI=1/"
76 
77   // Save the original inchi to be used on output,
78   // avoiding conversion to and from OBMol
79   SaveInchi(pmol, inchi);
80 
81   //Set up input struct
82   inchi_InputINCHI inp;
83 
84   char* opts= GetInChIOptions(pConv, true);
85   inp.szOptions = opts;
86 
87   char* nonconstinchi = new char[inchi.size()+1];
88   inp.szInChI = strcpy(nonconstinchi, inchi.c_str());
89 
90   inchi_OutputStruct out;
91   memset(&out, 0, sizeof(out));
92 
93   //Call the conversion routine in InChI code
94   int ret = GetStructFromINCHI( &inp, &out );
95   delete[] nonconstinchi;
96   delete[] opts;
97 
98   if (ret!=inchi_Ret_OKAY)
99   {
100     string mes = out.szMessage;
101     if (!mes.empty()) {
102       Trim(mes);
103       obErrorLog.ThrowError("InChI code", "For " + inchi + "\n  " + mes, obWarning);
104     }
105     if (ret!=inchi_Ret_WARNING)
106     {
107       obErrorLog.ThrowError("InChI code", "Reading InChI failed", obError);
108       return false;
109     }
110   }
111 
112   //Read name if requested e.g InChI=1/CH4/h1H4 methane
113   //OR InChI=1/CH4/h1H4 "First alkane"  Quote can be any punct char and
114   //uses up to the end of the line if second quote is not found
115   if(pConv->IsOption("n",OBConversion::INOPTIONS))
116   {
117     string name;
118     if(getline(ifs, name))
119       pmol->SetTitle(Trim(name));
120   }
121 
122   //Option to add InChI text to title
123   if(pConv->IsOption("a",OBConversion::INOPTIONS))
124   {
125     string title(pmol->GetTitle());
126     title += ' ' + inchi;
127     pmol->SetTitle(title);
128   }
129 
130   //Translate the returned structure into OBMol
131   pmol->SetDimension(0);
132   pmol->BeginModify();
133   int i;
134   //Make all atoms first because we need pointers to later ones
135   for(i=0;i<out.num_atoms;++i)
136   {
137     pmol->NewAtom();
138   }
139   for(i=0;i<out.num_atoms;++i)
140   {
141     OBAtom* patom = pmol->GetAtom(i+1); //index starts at 1
142     inchi_Atom* piat = &out.atom[i];
143     unsigned int atomicnum = OBElements::GetAtomicNum(piat->elname);
144     patom->SetAtomicNum(atomicnum);
145     if(piat->isotopic_mass)
146       patom->SetIsotope(piat->isotopic_mass - ISOTOPIC_SHIFT_FLAG + GetInChIAtomicMass(atomicnum));
147 
148     patom->SetSpinMultiplicity(piat->radical);
149     patom->SetFormalCharge(piat->charge);
150 //    patom->SetVector(piat->x,piat->y,piat->z);
151 
152     int j;
153     for(j=0;j<piat->num_bonds;++j)
154     {
155       if (i < piat->neighbor[j]) // Only add the bond in one direction
156         pmol->AddBond(i+1, piat->neighbor[j]+1, piat->bond_type[j]);
157     }
158 
159     //Now use the implicit H info provided by InChI code to make explicit H in OBMol,
160     //assign spinMultiplicity, then remove the hydrogens to be consistent with old way.
161     //Add implicit hydrogen. m=0 is non-istopic H m=1,2,3 are isotope specified
162     patom->SetImplicitHCount(piat->num_iso_H[0]);
163     for (int m=1; m<=3; ++m) {
164       if (piat->num_iso_H[m]) {
165         for (int k=0; k<piat->num_iso_H[m]; ++k) {
166           OBAtom* DorT = pmol->NewAtom();
167           DorT->SetAtomicNum(1);
168           DorT->SetIsotope(m);
169           pmol->AddBond(i+1, pmol->NumAtoms(), 1);
170         }
171       }
172     }
173   }
174   // pmol->AssignSpinMultiplicity(true); //true means no implicit H (TODO - Spin stuff)
175 
176   //***@todo implicit H isotopes
177   //Stereochemistry
178   for(i=0;i<out.num_stereo0D;++i)
179   {
180     inchi_Stereo0D& stereo = out.stereo0D[i];
181 
182     switch(stereo.type)
183     {
184     case INCHI_StereoType_DoubleBond:
185     {
186       OBCisTransStereo::Config *ct = new OBCisTransStereo::Config;
187 
188       ct->begin = stereo.neighbor[1];
189       ct->end = stereo.neighbor[2];
190       unsigned long start = OBStereo::ImplicitRef;
191       unsigned long end = OBStereo::ImplicitRef;
192       FOR_NBORS_OF_ATOM(a, pmol->GetAtom(ct->begin + 1)) {
193         if ( !(a->GetId() == ct->end || a->GetId() == stereo.neighbor[0] ) ) {
194           start = a->GetId();
195           break;
196         }
197       }
198       FOR_NBORS_OF_ATOM(b, pmol->GetAtom(ct->end + 1)) {
199         if ( !(b->GetId() == ct->begin || b->GetId() == stereo.neighbor[3] ) ) {
200           end = b->GetId();
201           break;
202         }
203       }
204       ct->refs = OBStereo::MakeRefs(stereo.neighbor[0], start, stereo.neighbor[3], end);
205 
206       if(stereo.parity==INCHI_PARITY_EVEN)
207         ct->shape = OBStereo::ShapeU;
208       else if(stereo.parity==INCHI_PARITY_ODD)
209         ct->shape = OBStereo::ShapeZ;
210       else
211         ct->specified = false;
212 
213       OBCisTransStereo *obct = new OBCisTransStereo(pmol);
214       obct->SetConfig(*ct);
215       pmol->SetData(obct);
216 
217       //InChI seems to prefer to define double bond stereo using H atoms.
218 
219       break;
220     }
221     case INCHI_StereoType_Tetrahedral:
222     {
223       OBTetrahedralStereo::Config *ts = new OBTetrahedralStereo::Config;
224       ts->center = stereo.central_atom;
225       ts->from = stereo.neighbor[0];
226       if (ts->from == ts->center) // Handle the case where there are only three neighbours
227         ts->from = OBStereo::ImplicitRef;
228       ts->refs = OBStereo::MakeRefs(stereo.neighbor[1], stereo.neighbor[2],
229                                     stereo.neighbor[3]);
230 
231       if(stereo.parity==INCHI_PARITY_EVEN)
232         ts->winding = OBStereo::Clockwise;
233       else if(stereo.parity==INCHI_PARITY_ODD)
234         ts->winding = OBStereo::AntiClockwise;
235       else
236         ts->specified = false;
237 
238       OBTetrahedralStereo *obts = new OBTetrahedralStereo(pmol);
239       obts->SetConfig(*ts);
240       pmol->SetData(obts);
241 
242       break;
243     }
244 
245     case INCHI_StereoType_Allene:
246     default:
247       obErrorLog.ThrowError("InChI code", "Unsupported stereo type has been ignored.", obWarning);
248     }
249   }
250 
251   pmol->DeleteHydrogens(); // Explicit H included for stereo H
252 
253   pmol->EndModify();
254   pmol->SetChiralityPerceived();
255 
256   FreeStructFromINCHI( &out );
257   return true;
258 }
259 
SkipObjects(int n,OBConversion * pConv)260 int InChIFormat::SkipObjects(int n, OBConversion* pConv)
261 {
262   istream& ifs = *pConv->GetInStream();
263   string inchi;
264   while(ifs.good() && n)
265   {
266     inchi = GetInChI(ifs);
267     if(inchi.size()>=8)//ignore empty "InChI=" or "InChI=1/"
268       --n;
269   }
270   return ifs.good() ? 1 : -1;
271 }
272 
273 // Convert the atom Ids used by the stereorefs to inchi atom ids
OBAtomIdToInChIAtomId(OBMol & mol,OBStereo::Ref atomid)274 static AT_NUM  OBAtomIdToInChIAtomId(OBMol &mol, OBStereo::Ref atomid)
275 {
276   return (AT_NUM)(mol.GetAtomById(atomid)->GetIdx() - 1);
277 }
278 
279 /////////////////////////////////////////////////////////////////
WriteMolecule(OBBase * pOb,OBConversion * pConv)280 bool InChIFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
281 {
282   //Although the OBMol may be altered, it is restored before exit.
283   OBMol* pmol = dynamic_cast<OBMol*>(pOb);
284   if (pmol == nullptr) return false;
285     OBMol& mol = *pmol;
286 
287   string ostring; //the inchi string
288   inchi_Output inout;
289   inout.szInChI = nullptr; // We are going to test this value later
290 
291   stringstream molID;
292   if(strlen(mol.GetTitle())==0)
293     molID << '#' << pConv->GetOutputIndex() << ' ';
294   else
295     molID << mol.GetTitle() << ' ';
296   if(pConv->GetOutputIndex()==1)
297     firstID=molID.str();
298 
299   //Use any existing InChI, probably from an InChIformat input,
300   //in preference to determining it from the structure.
301   //but only if no InChI output option has been specified that would
302   //modify a standard InChI
303   if (pmol->HasData("inchi") && pConv->IsOption("r") == nullptr && pConv->IsOption("a") == nullptr &&
304     pConv->IsOption("s") == nullptr && pConv->IsOption("X") == nullptr && pConv->IsOption("F") == nullptr &&
305     pConv->IsOption("M") == nullptr)
306   {
307     //All origins for the data are currently acceptable.
308     //Possibly this may need to be restricted to data with a local origin.
309     ostring = pmol->GetData("inchi")->GetValue();
310   }
311   else
312   {
313     //Determine InChI from the chemical structure
314     if(pmol->NumAtoms()==0) return true; // PR#2864334
315 
316     inchi_Input inp;
317     memset(&inp,0,sizeof(inchi_Input));
318 
319     // Prepare stereo information for 2D, 3D
320     map<OBBond*, OBStereo::BondDirection> updown;
321     map<OBBond*, OBStereo::Ref> from;
322     map<OBBond*, OBStereo::Ref>::const_iterator from_cit;
323     if (mol.GetDimension() == 3 || (mol.GetDimension() == 2 && pConv->IsOption("s", pConv->OUTOPTIONS) != nullptr))
324       TetStereoToWedgeHash(mol, updown, from);
325     set<OBBond*> unspec_ctstereo = GetUnspecifiedCisTrans(mol);
326 
327     OBAtom* patom;
328     vector<inchi_Atom> inchiAtoms(mol.NumAtoms());
329     vector<OBNodeBase*>::iterator itr;
330     for (patom = mol.BeginAtom(itr);patom;patom = mol.NextAtom(itr))
331     {
332       //OB atom index starts at 1; inchi atom index starts at 0
333       inchi_Atom& iat = inchiAtoms[patom->GetIdx()-1];
334       memset(&iat,0,sizeof(inchi_Atom));
335 
336       iat.x = patom->GetX();
337       iat.y = patom->GetY();
338       iat.z = patom->GetZ();
339 
340       int nbonds = 0;
341       vector<OBBond*>::iterator itr;
342       OBBond *pbond;
343       for (pbond = patom->BeginBond(itr);pbond;pbond = patom->NextBond(itr))
344       {
345         from_cit = from.find(pbond);
346         // Do each bond only once. If the bond is a stereobond
347         // ensure that the BeginAtom is the 'from' atom.
348         if( (from_cit==from.end() && patom->GetIdx() != pbond->GetBeginAtomIdx()) ||
349             (from_cit!=from.end() && from_cit->second != patom->GetId()) )
350           continue;
351 
352         iat.neighbor[nbonds]      = pbond->GetNbrAtomIdx(patom)-1;
353         int bo = pbond->GetBondOrder();
354         if(bo==5)
355           bo=4;
356         iat.bond_type[nbonds]     = bo;
357 
358         OBStereo::BondDirection stereo = OBStereo::NotStereo;
359         if (mol.GetDimension()==2 && pConv->IsOption("s", pConv->OUTOPTIONS) == nullptr) {
360           if (pbond->IsWedge())
361             stereo = OBStereo::UpBond;
362           else if (pbond->IsHash())
363             stereo = OBStereo::DownBond;
364           else if (pbond->IsWedgeOrHash())
365             stereo = OBStereo::UnknownDir;
366         }
367         else if (from_cit!=from.end()) { // It's a stereo bond
368           stereo = updown[pbond];
369         }
370 
371         if (mol.GetDimension() != 0) {
372           inchi_BondStereo2D bondstereo2D = INCHI_BOND_STEREO_NONE;
373           if (stereo != OBStereo::NotStereo) {
374             switch (stereo) {
375               case OBStereo::UpBond:
376                 bondstereo2D = INCHI_BOND_STEREO_SINGLE_1UP;
377                 break;
378               case OBStereo::DownBond:
379                 bondstereo2D = INCHI_BOND_STEREO_SINGLE_1DOWN;
380                 break;
381               case OBStereo::UnknownDir:
382                 bondstereo2D = INCHI_BOND_STEREO_SINGLE_1EITHER;
383                 break;
384               default:
385                 ; // INCHI_BOND_STEREO_NONE
386             }
387           }
388           // Is it a double bond with unspecified stereochemistry?
389           if (unspec_ctstereo.find(pbond)!=unspec_ctstereo.end())
390             bondstereo2D = INCHI_BOND_STEREO_DOUBLE_EITHER;
391           iat.bond_stereo[nbonds] = bondstereo2D;
392         }
393         nbonds++;
394       }
395 
396       strcpy(iat.elname,OBElements::GetSymbol(patom->GetAtomicNum()));
397       iat.num_bonds = nbonds;
398       iat.num_iso_H[0] = patom->GetImplicitHCount();
399       if(patom->GetIsotope())
400       {
401         iat.isotopic_mass = ISOTOPIC_SHIFT_FLAG +
402           patom->GetIsotope() - GetInChIAtomicMass(patom->GetAtomicNum());
403       }
404       else
405         iat.isotopic_mass = 0 ;
406       iat.radical = patom->GetSpinMultiplicity();
407       //InChI doesn't recognize spin miltiplicity of 4 or 5 (as used in OB for CH and C atom)
408       if(iat.radical>=4)
409         iat.radical=0;
410       iat.charge  = patom->GetFormalCharge();
411     }
412 
413     inp.atom = &inchiAtoms[0];
414 
415     vector<inchi_Stereo0D> stereoVec;
416 
417     if(mol.GetDimension()==0)
418     {
419       std::vector<OBGenericData*>::iterator data;
420       std::vector<OBGenericData*> stereoData = mol.GetAllData(OBGenericDataType::StereoData);
421       for (data = stereoData.begin(); data != stereoData.end(); ++data) {
422         if (static_cast<OBStereoBase*>(*data)->GetType() == OBStereo::Tetrahedral) {
423           OBTetrahedralStereo *ts = dynamic_cast<OBTetrahedralStereo*>(*data);
424           OBTetrahedralStereo::Config config = ts->GetConfig();
425 
426           if(config.specified) {
427             inchi_Stereo0D stereo;
428             stereo.type = INCHI_StereoType_Tetrahedral;
429             stereo.central_atom = OBAtomIdToInChIAtomId(mol, config.center);
430 
431             // count number of implicit refs
432             int num_implicit = 0;
433             if (config.from == OBStereo::ImplicitRef)
434               num_implicit = 1;
435             num_implicit += std::count(config.refs.begin(), config.refs.end(), OBStereo::ImplicitRef);
436 
437             // ignore invalid cases with more than 1 implicit ref
438             if (num_implicit > 1)
439               continue;
440 
441             if (num_implicit) {
442               config = ts->GetConfig(OBStereo::ImplicitRef); // Make the 'from' atom the implicit ref
443               stereo.neighbor[0] = stereo.central_atom;
444             } else
445               stereo.neighbor[0] = OBAtomIdToInChIAtomId(mol, config.from);
446 
447             for(int i=0; i<3; ++i)
448               stereo.neighbor[i + 1] = OBAtomIdToInChIAtomId(mol, config.refs[i]);
449 
450             if (config.winding == OBStereo::Clockwise)
451               stereo.parity = INCHI_PARITY_EVEN;
452             else
453               stereo.parity = INCHI_PARITY_ODD;
454 
455             stereoVec.push_back(stereo);
456           }
457         }
458       }
459 
460       //Double bond stereo (still inside 0D section)
461       //Currently does not handle cumulenes
462       for (data = stereoData.begin(); data != stereoData.end(); ++data) {
463         if (static_cast<OBStereoBase*>(*data)->GetType() == OBStereo::CisTrans) {
464           OBCisTransStereo *ts = dynamic_cast<OBCisTransStereo*>(*data);
465           OBCisTransStereo::Config config = ts->GetConfig();
466 
467           if(config.specified) {
468             inchi_Stereo0D stereo;
469             stereo.central_atom = NO_ATOM;
470             stereo.type = INCHI_StereoType_DoubleBond;
471             OBStereo::Refs refs = config.refs;
472 
473             // ignore invalid cases with 2 implicit refs on one side
474             if (refs[0] == OBStereo::ImplicitRef && refs[1] == OBStereo::ImplicitRef)
475               continue;
476             if (refs[2] == OBStereo::ImplicitRef && refs[3] == OBStereo::ImplicitRef)
477               continue;
478 
479             unsigned long start = refs[0];
480             if (refs[0]==OBStereo::ImplicitRef)
481               start = refs[1];
482             unsigned long end = refs[3];
483             if (refs[3]==OBStereo::ImplicitRef)
484               end = refs[2];
485 
486             stereo.neighbor[0] = OBAtomIdToInChIAtomId(mol, start);
487             stereo.neighbor[1] = OBAtomIdToInChIAtomId(mol, config.begin);
488             stereo.neighbor[2] = OBAtomIdToInChIAtomId(mol, config.end);
489             stereo.neighbor[3] = OBAtomIdToInChIAtomId(mol, end);
490 
491             if (ts->IsTrans(start, end))
492               stereo.parity = INCHI_PARITY_EVEN;
493             else
494               stereo.parity = INCHI_PARITY_ODD;
495 
496             stereoVec.push_back(stereo);
497           }
498         }
499       }
500     }
501 
502     char* opts = GetInChIOptions(pConv, false);
503     inp.szOptions = opts;
504 
505     inp.num_atoms = mol.NumAtoms();
506     inp.num_stereo0D = (AT_NUM) stereoVec.size();
507     if(inp.num_stereo0D>0)
508       inp.stereo0D = &stereoVec[0];
509 
510     //inchi_Output inout; now declared in block above
511     memset(&inout,0,sizeof(inchi_Output));
512 
513     int ret = GetINCHI(&inp, &inout);
514 
515     delete[] opts;
516     if(ret!=inchi_Ret_OKAY)
517     {
518       if(inout.szMessage)
519       {
520         string mes(inout.szMessage);
521         if(pConv->IsOption("w"))
522         {
523           string::size_type pos;
524           string targ[4];
525           targ[0] = "Omitted undefined stereo";
526           targ[1] = "Charges were rearranged";
527           targ[2] = "Proton(s) added/removed";
528           targ[3] = "Metal was disconnected";
529           for(int i=0;i<4;++i)
530           {
531             pos = mes.find(targ[i]);
532             if(pos!=string::npos)
533             {
534               mes.erase(pos,targ[i].size());
535               if(pos<mes.size() && mes[pos]==';')
536                 mes[pos]=' ';
537             }
538           }
539         }
540         Trim(mes);
541         if(!mes.empty())
542           obErrorLog.ThrowError("InChI code", molID.str() + ':' + mes, obWarning);
543       }
544 
545       if(ret!=inchi_Ret_WARNING)
546       {
547         obErrorLog.ThrowError("InChI code", "InChI generation failed", obError);
548         FreeStdINCHI(&inout);
549         return false;
550       }
551     }
552     ostring = inout.szInChI;
553   }
554 
555   //Truncate the InChI if requested
556   const char* truncspec = pConv->IsOption("T");
557   if(truncspec)
558   {
559     string trunc(truncspec);
560     EditInchi(ostring, trunc);
561   }
562 
563   if(pConv->IsOption("K")) //Generate InChIKey and add after InChI on same line
564   {
565     char szINCHIKey[28];
566     GetINCHIKeyFromINCHI(ostring.c_str(), 0 ,0, szINCHIKey, nullptr, nullptr);
567     ostring = szINCHIKey;
568   }
569 
570   if(pConv->IsOption("t"))
571   {
572     ostring += ' ';
573     ostring +=  pmol->GetTitle();
574   }
575 
576   ostream &ofs = *pConv->GetOutStream();
577 
578   if(pConv->IsOption("U"))
579   {
580     if(pConv->GetOutputIndex()==1)
581       allInchi.clear();
582     //Just add to set and don't output, except at the end
583     allInchi.insert(ostring);
584 
585     if(pConv->IsLast())
586     {
587       nSet::iterator itr;
588       for(itr=allInchi.begin();itr!=allInchi.end();++itr)
589         ofs << *itr << endl;
590     }
591     return true;
592   }
593   else if(pConv->IsOption("u"))
594   {
595     if(pConv->GetOutputIndex()==1)
596       allInchi.clear();
597     if(!allInchi.insert(ostring).second)
598       return true; //no output if already in set
599   }
600 
601   ofs << ostring << endl;
602 
603   // Note that inout.szInChI will still be NULL if this is an InChI->InChIKey conversion
604   // and so the following section will not apply.
605   if (inout.szInChI != nullptr) {
606     if (pConv->IsOption("a"))
607       ofs << inout.szAuxInfo << endl;
608 
609     if(pConv->IsOption("l"))
610       //Display InChI log message. With multiple molecules, it appears only once
611       obErrorLog.ThrowError("InChI log", inout.szLog, obError, onceOnly);
612 
613     if(pConv->IsOption("e"))
614     {
615       if(pConv->GetOutputIndex()==1)
616         firstInchi = inout.szInChI;
617       else
618       {
619         ofs << "Molecules " << firstID << "and " << molID.str();
620         ofs << InChIErrorMessage(CompareInchi(firstInchi.c_str(), inout.szInChI)) << endl;
621       }
622     }
623     FreeStdINCHI(&inout);
624   }
625 
626   return true;
627 }
628 
629 //////////////////////////////////////////////////////////
CompareInchi(const string & Inchi1,const string & Inchi2)630 char InChIFormat::CompareInchi(const string& Inchi1, const string& Inchi2)
631 {
632   //Returns 0 if identical or an char identifying the layer where they first differed
633   string s1(Inchi1), s2(Inchi2);
634 
635   if(s1.size()<s2.size())
636     s1.swap(s2);
637   string::size_type pos;
638   for(pos=0;pos<s1.size();++pos)
639   {
640     if(pos==s2.size() || s1[pos]!=s2[pos])
641       return s1[s1.rfind('/',pos)+1];
642   }
643   return 0;
644 
645 /*  //Remove anything after the end of the Inchi
646   string::size_type pos;
647   pos = s1.find_first_of(" \t\n");
648   if(pos!=string::npos)
649     s1.erase(pos);
650   pos = s2.find_first_of(" \t\n");
651   if(pos!=string::npos)
652     s2.erase(pos);
653 
654   vector<string> layers1, layers2;
655   tokenize(layers1,s1,"/\n");
656   tokenize(layers2,s2,"/\n");
657   unsigned int i;
658   if(layers1.size()<layers2.size())
659     layers1.swap(layers2); //layers1 is the longest
660 
661   for(i=1;i<layers2.size();++i)
662   {
663     if(layers1[i]!=layers2[i])
664     {
665       char ch = '+';
666       if(i>1) //not formula layer
667         ch=layers1[i][0];
668       return ch;
669     }
670   }
671   if(layers1.size()==layers2.size())
672     return 0;
673   else
674     return layers1[i][0];
675 */
676 }
677 
InChIErrorMessage(const char ch)678 string InChIFormat::InChIErrorMessage(const char ch)
679 {
680   string s;
681   switch (ch)
682   {
683   case 0:
684     s = " are identical";
685     break;
686   case '+':
687     s = " have different formulae";
688     break;
689   case 'c':
690     s = " have different connection tables";
691     break;
692   case 'h':
693     s = " have different bond orders, or radical character";
694     break;
695   case 'q':
696     s = " have different charges";
697     break;
698   case 'p':
699     s = " have different numbers of attached protons";
700     break;
701   case 'b':
702     s = " have different double bond stereochemistry";
703     break;
704   case 'm':
705   case 't':
706     s = " have different sp3 stereochemistry";
707     break;
708   case 'i':
709     s = " have different isotopic composition";
710     break;
711   default:
712     s = " are different";
713   }
714   return s;
715 }
716 
GetCommonAtom(OBBond * pb1,OBBond * pb2)717 OBAtom* InChIFormat::GetCommonAtom(OBBond* pb1, OBBond* pb2)
718 {
719   OBAtom* pa1 = pb1->GetBeginAtom();
720   if(pa1==pb2->GetBeginAtom() || pa1==pb2->GetEndAtom())
721     return pa1;
722   pa1 = pb1->GetEndAtom();
723   if(pa1==pb2->GetBeginAtom() || pa1==pb2->GetEndAtom())
724     return pa1;
725   return nullptr; //not adjacent bonds
726 }
727 
728 
729 //Returns pointer to InChI options string, which needs to be deleted with delete[]
730 //If there are no options returns an empty string
GetInChIOptions(OBConversion * pConv,bool Reading)731 char* InChIFormat::GetInChIOptions(OBConversion* pConv, bool Reading)
732 {
733   vector<string> optsvec; //the InChi options
734   /* In Standard InChI these are not used
735   if(!Reading && !pConv->IsOption("n"))
736     //without -xn option, the default is to write using these 'recommended' options
737     tokenize(optsvec, "FixedH RecMet SPXYZ SAsXYZ Newps Fb Fnud");
738   */
739   char* opts;
740   OBConversion::Option_type opttyp = Reading ? OBConversion::INOPTIONS : OBConversion::OUTOPTIONS;
741   const char* copts = pConv->IsOption("X", opttyp);
742   if(copts)
743   {
744     string tmp(copts); // GCC doesn't like passing string temporaries to functions
745     vector<string> useropts;
746     tokenize(useropts, tmp);
747     copy(useropts.begin(), useropts.end(), back_inserter(optsvec));
748   }
749 
750   //Add a couple InChI options built in to OB
751   if(opttyp==OBConversion::OUTOPTIONS)
752   {
753     if(pConv->IsOption("F", opttyp))
754     {
755       string tmp2("FixedH");
756       optsvec.push_back(tmp2);
757     }
758     if(pConv->IsOption("M", opttyp))
759     {
760       string tmp2("RecMet");
761       optsvec.push_back(tmp2);
762     }
763   }
764 
765 
766 #ifdef WIN32
767     string ch(" /");
768 #else
769     string ch(" -");
770 #endif
771 
772   string sopts;
773   for (unsigned int i = 0; i < optsvec.size(); ++i)
774     sopts += ch + optsvec[i];
775   opts = new char[strlen(sopts.c_str())+1]; //has to be char, not const char
776   return strcpy(opts, sopts.c_str());
777   opts = new char[1];
778   *opts = '\0';
779   return opts;
780 }
781 
EditInchi(std::string & inchi,std::string & spec)782 bool InChIFormat::EditInchi(std::string& inchi, std::string& spec)
783 {
784   std::vector<std::string> vec;
785   std::vector<std::string>::iterator itr;
786   tokenize(vec, spec, " \t/");
787   for(itr=vec.begin();itr!=vec.end();++itr)
788   {
789     if(*itr=="formula")
790     {
791       std::string::size_type pos = inchi.find('/', inchi.find('/')+1); //2nd /
792       if(pos!=string::npos)
793         inchi.erase(pos);
794     }
795     else if(*itr=="connect")
796       RemoveLayer(inchi,"/h",true);
797     else if(*itr=="nochg")
798     {
799       RemoveLayer(inchi,"/p");
800       RemoveLayer(inchi,"/q");
801     }
802     else if(*itr=="nosp3")
803     {
804       RemoveLayer(inchi,"/t");
805       RemoveLayer(inchi,"/m");
806       RemoveLayer(inchi,"/s");
807     }
808     else if(*itr=="noEZ")
809       RemoveLayer(inchi,"/b");
810     else if(*itr=="noiso")
811       RemoveLayer(inchi,"/i");
812     else if(*itr=="nostereo")
813     {
814       RemoveLayer(inchi,"/t");
815       RemoveLayer(inchi,"/m");
816       RemoveLayer(inchi,"/s");
817       RemoveLayer(inchi,"/b");
818     }
819     else if(!(*itr).empty())
820     {
821       obErrorLog.ThrowError(__FUNCTION__,
822       spec + " not recognized as a truncation specification",obError, onceOnly);
823       return false;
824     }
825   }
826   return true;
827 }
828 
RemoveLayer(std::string & inchi,const std::string & str,bool all)829 void InChIFormat::RemoveLayer (std::string& inchi, const std::string& str, bool all)
830 {
831   std::string::size_type pos = inchi.find(str);
832   if(pos!=string::npos)
833     inchi.erase(pos, (all ? string::npos : inchi.find('/', pos+1) - pos));
834 }
835 
SaveInchi(OBMol * pmol,const std::string & s)836 void InChIFormat::SaveInchi(OBMol* pmol, const std::string& s)
837 {
838   OBPairData* dp = new OBPairData;
839   dp->SetAttribute("inchi");
840   dp->SetValue(s);
841   dp->SetOrigin(local);
842   pmol->SetData(dp);
843 }
844 
845 //************************************************************************
846 InChICompareFormat theInChICompareFormat;
847 
WriteMolecule(OBBase * pOb,OBConversion * pConv)848 bool InChICompareFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
849 {
850   pConv->AddOption("e",OBConversion::OUTOPTIONS);
851   pConv->AddOption("t",OBConversion::OUTOPTIONS);
852   return theInChIFormat.WriteMolecule(pOb,pConv);
853 }
854 
855 //************************************************************************
856 InChIKeyFormat theInChIKeyFormat;
857 
WriteMolecule(OBBase * pOb,OBConversion * pConv)858 bool InChIKeyFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
859 {
860   pConv->AddOption("K",OBConversion::OUTOPTIONS);
861   return theInChIFormat.WriteMolecule(pOb,pConv);
862 }
863 
864 
865 }//namespace OpenBabel
866 
867