1 /**********************************************************************
2 Copyright (C) 2005,2006,2007 Chris Morley
3
4 Based on the IUPAC InChI reference software, which is distributed
5 under the GNU LGPL:
6 Copyright (C) 2005 The International Union of Pure and Applied Chemistry
7 IUPAC International Chemical Identifier (InChI) (contact:secretariat@iupac.org)
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation version 2 of the License.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17 ***********************************************************************/
18 #include <openbabel/babelconfig.h>
19 #include <openbabel/mol.h>
20 #include <openbabel/atom.h>
21 #include <openbabel/bond.h>
22 #include <openbabel/obiter.h>
23 #include <openbabel/obconversion.h>
24 #include <openbabel/obmolecformat.h>
25 #include <openbabel/generic.h>
26
27 #include "inchi_api.h"
28 #include <sstream>
29 #include <set>
30 #include <vector>
31 #include <iterator>
32 #include <openbabel/inchiformat.h>
33 #include <openbabel/stereo/tetrahedral.h>
34 #include <openbabel/stereo/cistrans.h>
35 #include <openbabel/elements.h>
36
37 using namespace std;
38 namespace OpenBabel
39 {
40 extern string GetInChI(istream& is);
41
42 //Make an instance of the format class
43 InChIFormat theInChIFormat;
44
45 // The average molecular masses used by InChI are listed in util.c or the InChI Technical Manual Appendix 1
46 const unsigned int MAX_AVG_MASS = 134;
47 const unsigned int inchi_avg_mass[MAX_AVG_MASS+1] = {0, 1, 4, 7, 9, 11, 12, 14, 16, 19, 20, 23, 24, 27, 28, 31, 32, 35, 40, 39, 40, 45, 48,
48 51, 52, 55, 56, 59, 59, 64, 65, 70, 73, 75, 79, 80, 84, 85, 88, 89, 91, 93, 96, 98, 101, 103, 106, 108, 112, 115, 119, 122, 128,
49 127, 131, 133, 137, 139, 140, 141, 144, 145, 150, 152, 157, 159, 163, 165, 167, 169, 173, 175, 178, 181, 184, 186, 190, 192, 195,
50 197, 201, 204, 207, 209, 209, 210, 222, 223, 226, 227, 232, 231, 238, 237, 244, 243, 247, 247, 251, 252, 257, 258, 259, 260, 261,
51 268, 271, 267, 277, 276, 281, 280, 285};
52
GetInChIAtomicMass(unsigned int atomicnum)53 static unsigned int GetInChIAtomicMass(unsigned int atomicnum)
54 {
55 if (atomicnum < MAX_AVG_MASS)
56 return inchi_avg_mass[atomicnum]; // the correct value
57 else // fallback to our internal values
58 return (unsigned int)(OBElements::GetMass(atomicnum) + 0.5);
59 }
60
61 /////////////////////////////////////////////////////////////////
ReadMolecule(OBBase * pOb,OBConversion * pConv)62 bool InChIFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
63 {
64 OBMol* pmol = pOb->CastAndClear<OBMol>();
65 if (pmol == nullptr) return false;
66 istream &ifs = *pConv->GetInStream();
67
68 //Extract InChI from input stream even if it is split
69 string inchi;
70 do
71 {
72 inchi = GetInChI(ifs);
73 if(inchi.empty())
74 return false; //eof
75 }while(inchi.size()<9); //ignore empty "InChI=" or "InChI=1/"
76
77 // Save the original inchi to be used on output,
78 // avoiding conversion to and from OBMol
79 SaveInchi(pmol, inchi);
80
81 //Set up input struct
82 inchi_InputINCHI inp;
83
84 char* opts= GetInChIOptions(pConv, true);
85 inp.szOptions = opts;
86
87 char* nonconstinchi = new char[inchi.size()+1];
88 inp.szInChI = strcpy(nonconstinchi, inchi.c_str());
89
90 inchi_OutputStruct out;
91 memset(&out, 0, sizeof(out));
92
93 //Call the conversion routine in InChI code
94 int ret = GetStructFromINCHI( &inp, &out );
95 delete[] nonconstinchi;
96 delete[] opts;
97
98 if (ret!=inchi_Ret_OKAY)
99 {
100 string mes = out.szMessage;
101 if (!mes.empty()) {
102 Trim(mes);
103 obErrorLog.ThrowError("InChI code", "For " + inchi + "\n " + mes, obWarning);
104 }
105 if (ret!=inchi_Ret_WARNING)
106 {
107 obErrorLog.ThrowError("InChI code", "Reading InChI failed", obError);
108 return false;
109 }
110 }
111
112 //Read name if requested e.g InChI=1/CH4/h1H4 methane
113 //OR InChI=1/CH4/h1H4 "First alkane" Quote can be any punct char and
114 //uses up to the end of the line if second quote is not found
115 if(pConv->IsOption("n",OBConversion::INOPTIONS))
116 {
117 string name;
118 if(getline(ifs, name))
119 pmol->SetTitle(Trim(name));
120 }
121
122 //Option to add InChI text to title
123 if(pConv->IsOption("a",OBConversion::INOPTIONS))
124 {
125 string title(pmol->GetTitle());
126 title += ' ' + inchi;
127 pmol->SetTitle(title);
128 }
129
130 //Translate the returned structure into OBMol
131 pmol->SetDimension(0);
132 pmol->BeginModify();
133 int i;
134 //Make all atoms first because we need pointers to later ones
135 for(i=0;i<out.num_atoms;++i)
136 {
137 pmol->NewAtom();
138 }
139 for(i=0;i<out.num_atoms;++i)
140 {
141 OBAtom* patom = pmol->GetAtom(i+1); //index starts at 1
142 inchi_Atom* piat = &out.atom[i];
143 unsigned int atomicnum = OBElements::GetAtomicNum(piat->elname);
144 patom->SetAtomicNum(atomicnum);
145 if(piat->isotopic_mass)
146 patom->SetIsotope(piat->isotopic_mass - ISOTOPIC_SHIFT_FLAG + GetInChIAtomicMass(atomicnum));
147
148 patom->SetSpinMultiplicity(piat->radical);
149 patom->SetFormalCharge(piat->charge);
150 // patom->SetVector(piat->x,piat->y,piat->z);
151
152 int j;
153 for(j=0;j<piat->num_bonds;++j)
154 {
155 if (i < piat->neighbor[j]) // Only add the bond in one direction
156 pmol->AddBond(i+1, piat->neighbor[j]+1, piat->bond_type[j]);
157 }
158
159 //Now use the implicit H info provided by InChI code to make explicit H in OBMol,
160 //assign spinMultiplicity, then remove the hydrogens to be consistent with old way.
161 //Add implicit hydrogen. m=0 is non-istopic H m=1,2,3 are isotope specified
162 patom->SetImplicitHCount(piat->num_iso_H[0]);
163 for (int m=1; m<=3; ++m) {
164 if (piat->num_iso_H[m]) {
165 for (int k=0; k<piat->num_iso_H[m]; ++k) {
166 OBAtom* DorT = pmol->NewAtom();
167 DorT->SetAtomicNum(1);
168 DorT->SetIsotope(m);
169 pmol->AddBond(i+1, pmol->NumAtoms(), 1);
170 }
171 }
172 }
173 }
174 // pmol->AssignSpinMultiplicity(true); //true means no implicit H (TODO - Spin stuff)
175
176 //***@todo implicit H isotopes
177 //Stereochemistry
178 for(i=0;i<out.num_stereo0D;++i)
179 {
180 inchi_Stereo0D& stereo = out.stereo0D[i];
181
182 switch(stereo.type)
183 {
184 case INCHI_StereoType_DoubleBond:
185 {
186 OBCisTransStereo::Config *ct = new OBCisTransStereo::Config;
187
188 ct->begin = stereo.neighbor[1];
189 ct->end = stereo.neighbor[2];
190 unsigned long start = OBStereo::ImplicitRef;
191 unsigned long end = OBStereo::ImplicitRef;
192 FOR_NBORS_OF_ATOM(a, pmol->GetAtom(ct->begin + 1)) {
193 if ( !(a->GetId() == ct->end || a->GetId() == stereo.neighbor[0] ) ) {
194 start = a->GetId();
195 break;
196 }
197 }
198 FOR_NBORS_OF_ATOM(b, pmol->GetAtom(ct->end + 1)) {
199 if ( !(b->GetId() == ct->begin || b->GetId() == stereo.neighbor[3] ) ) {
200 end = b->GetId();
201 break;
202 }
203 }
204 ct->refs = OBStereo::MakeRefs(stereo.neighbor[0], start, stereo.neighbor[3], end);
205
206 if(stereo.parity==INCHI_PARITY_EVEN)
207 ct->shape = OBStereo::ShapeU;
208 else if(stereo.parity==INCHI_PARITY_ODD)
209 ct->shape = OBStereo::ShapeZ;
210 else
211 ct->specified = false;
212
213 OBCisTransStereo *obct = new OBCisTransStereo(pmol);
214 obct->SetConfig(*ct);
215 pmol->SetData(obct);
216
217 //InChI seems to prefer to define double bond stereo using H atoms.
218
219 break;
220 }
221 case INCHI_StereoType_Tetrahedral:
222 {
223 OBTetrahedralStereo::Config *ts = new OBTetrahedralStereo::Config;
224 ts->center = stereo.central_atom;
225 ts->from = stereo.neighbor[0];
226 if (ts->from == ts->center) // Handle the case where there are only three neighbours
227 ts->from = OBStereo::ImplicitRef;
228 ts->refs = OBStereo::MakeRefs(stereo.neighbor[1], stereo.neighbor[2],
229 stereo.neighbor[3]);
230
231 if(stereo.parity==INCHI_PARITY_EVEN)
232 ts->winding = OBStereo::Clockwise;
233 else if(stereo.parity==INCHI_PARITY_ODD)
234 ts->winding = OBStereo::AntiClockwise;
235 else
236 ts->specified = false;
237
238 OBTetrahedralStereo *obts = new OBTetrahedralStereo(pmol);
239 obts->SetConfig(*ts);
240 pmol->SetData(obts);
241
242 break;
243 }
244
245 case INCHI_StereoType_Allene:
246 default:
247 obErrorLog.ThrowError("InChI code", "Unsupported stereo type has been ignored.", obWarning);
248 }
249 }
250
251 pmol->DeleteHydrogens(); // Explicit H included for stereo H
252
253 pmol->EndModify();
254 pmol->SetChiralityPerceived();
255
256 FreeStructFromINCHI( &out );
257 return true;
258 }
259
SkipObjects(int n,OBConversion * pConv)260 int InChIFormat::SkipObjects(int n, OBConversion* pConv)
261 {
262 istream& ifs = *pConv->GetInStream();
263 string inchi;
264 while(ifs.good() && n)
265 {
266 inchi = GetInChI(ifs);
267 if(inchi.size()>=8)//ignore empty "InChI=" or "InChI=1/"
268 --n;
269 }
270 return ifs.good() ? 1 : -1;
271 }
272
273 // Convert the atom Ids used by the stereorefs to inchi atom ids
OBAtomIdToInChIAtomId(OBMol & mol,OBStereo::Ref atomid)274 static AT_NUM OBAtomIdToInChIAtomId(OBMol &mol, OBStereo::Ref atomid)
275 {
276 return (AT_NUM)(mol.GetAtomById(atomid)->GetIdx() - 1);
277 }
278
279 /////////////////////////////////////////////////////////////////
WriteMolecule(OBBase * pOb,OBConversion * pConv)280 bool InChIFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
281 {
282 //Although the OBMol may be altered, it is restored before exit.
283 OBMol* pmol = dynamic_cast<OBMol*>(pOb);
284 if (pmol == nullptr) return false;
285 OBMol& mol = *pmol;
286
287 string ostring; //the inchi string
288 inchi_Output inout;
289 inout.szInChI = nullptr; // We are going to test this value later
290
291 stringstream molID;
292 if(strlen(mol.GetTitle())==0)
293 molID << '#' << pConv->GetOutputIndex() << ' ';
294 else
295 molID << mol.GetTitle() << ' ';
296 if(pConv->GetOutputIndex()==1)
297 firstID=molID.str();
298
299 //Use any existing InChI, probably from an InChIformat input,
300 //in preference to determining it from the structure.
301 //but only if no InChI output option has been specified that would
302 //modify a standard InChI
303 if (pmol->HasData("inchi") && pConv->IsOption("r") == nullptr && pConv->IsOption("a") == nullptr &&
304 pConv->IsOption("s") == nullptr && pConv->IsOption("X") == nullptr && pConv->IsOption("F") == nullptr &&
305 pConv->IsOption("M") == nullptr)
306 {
307 //All origins for the data are currently acceptable.
308 //Possibly this may need to be restricted to data with a local origin.
309 ostring = pmol->GetData("inchi")->GetValue();
310 }
311 else
312 {
313 //Determine InChI from the chemical structure
314 if(pmol->NumAtoms()==0) return true; // PR#2864334
315
316 inchi_Input inp;
317 memset(&inp,0,sizeof(inchi_Input));
318
319 // Prepare stereo information for 2D, 3D
320 map<OBBond*, OBStereo::BondDirection> updown;
321 map<OBBond*, OBStereo::Ref> from;
322 map<OBBond*, OBStereo::Ref>::const_iterator from_cit;
323 if (mol.GetDimension() == 3 || (mol.GetDimension() == 2 && pConv->IsOption("s", pConv->OUTOPTIONS) != nullptr))
324 TetStereoToWedgeHash(mol, updown, from);
325 set<OBBond*> unspec_ctstereo = GetUnspecifiedCisTrans(mol);
326
327 OBAtom* patom;
328 vector<inchi_Atom> inchiAtoms(mol.NumAtoms());
329 vector<OBNodeBase*>::iterator itr;
330 for (patom = mol.BeginAtom(itr);patom;patom = mol.NextAtom(itr))
331 {
332 //OB atom index starts at 1; inchi atom index starts at 0
333 inchi_Atom& iat = inchiAtoms[patom->GetIdx()-1];
334 memset(&iat,0,sizeof(inchi_Atom));
335
336 iat.x = patom->GetX();
337 iat.y = patom->GetY();
338 iat.z = patom->GetZ();
339
340 int nbonds = 0;
341 vector<OBBond*>::iterator itr;
342 OBBond *pbond;
343 for (pbond = patom->BeginBond(itr);pbond;pbond = patom->NextBond(itr))
344 {
345 from_cit = from.find(pbond);
346 // Do each bond only once. If the bond is a stereobond
347 // ensure that the BeginAtom is the 'from' atom.
348 if( (from_cit==from.end() && patom->GetIdx() != pbond->GetBeginAtomIdx()) ||
349 (from_cit!=from.end() && from_cit->second != patom->GetId()) )
350 continue;
351
352 iat.neighbor[nbonds] = pbond->GetNbrAtomIdx(patom)-1;
353 int bo = pbond->GetBondOrder();
354 if(bo==5)
355 bo=4;
356 iat.bond_type[nbonds] = bo;
357
358 OBStereo::BondDirection stereo = OBStereo::NotStereo;
359 if (mol.GetDimension()==2 && pConv->IsOption("s", pConv->OUTOPTIONS) == nullptr) {
360 if (pbond->IsWedge())
361 stereo = OBStereo::UpBond;
362 else if (pbond->IsHash())
363 stereo = OBStereo::DownBond;
364 else if (pbond->IsWedgeOrHash())
365 stereo = OBStereo::UnknownDir;
366 }
367 else if (from_cit!=from.end()) { // It's a stereo bond
368 stereo = updown[pbond];
369 }
370
371 if (mol.GetDimension() != 0) {
372 inchi_BondStereo2D bondstereo2D = INCHI_BOND_STEREO_NONE;
373 if (stereo != OBStereo::NotStereo) {
374 switch (stereo) {
375 case OBStereo::UpBond:
376 bondstereo2D = INCHI_BOND_STEREO_SINGLE_1UP;
377 break;
378 case OBStereo::DownBond:
379 bondstereo2D = INCHI_BOND_STEREO_SINGLE_1DOWN;
380 break;
381 case OBStereo::UnknownDir:
382 bondstereo2D = INCHI_BOND_STEREO_SINGLE_1EITHER;
383 break;
384 default:
385 ; // INCHI_BOND_STEREO_NONE
386 }
387 }
388 // Is it a double bond with unspecified stereochemistry?
389 if (unspec_ctstereo.find(pbond)!=unspec_ctstereo.end())
390 bondstereo2D = INCHI_BOND_STEREO_DOUBLE_EITHER;
391 iat.bond_stereo[nbonds] = bondstereo2D;
392 }
393 nbonds++;
394 }
395
396 strcpy(iat.elname,OBElements::GetSymbol(patom->GetAtomicNum()));
397 iat.num_bonds = nbonds;
398 iat.num_iso_H[0] = patom->GetImplicitHCount();
399 if(patom->GetIsotope())
400 {
401 iat.isotopic_mass = ISOTOPIC_SHIFT_FLAG +
402 patom->GetIsotope() - GetInChIAtomicMass(patom->GetAtomicNum());
403 }
404 else
405 iat.isotopic_mass = 0 ;
406 iat.radical = patom->GetSpinMultiplicity();
407 //InChI doesn't recognize spin miltiplicity of 4 or 5 (as used in OB for CH and C atom)
408 if(iat.radical>=4)
409 iat.radical=0;
410 iat.charge = patom->GetFormalCharge();
411 }
412
413 inp.atom = &inchiAtoms[0];
414
415 vector<inchi_Stereo0D> stereoVec;
416
417 if(mol.GetDimension()==0)
418 {
419 std::vector<OBGenericData*>::iterator data;
420 std::vector<OBGenericData*> stereoData = mol.GetAllData(OBGenericDataType::StereoData);
421 for (data = stereoData.begin(); data != stereoData.end(); ++data) {
422 if (static_cast<OBStereoBase*>(*data)->GetType() == OBStereo::Tetrahedral) {
423 OBTetrahedralStereo *ts = dynamic_cast<OBTetrahedralStereo*>(*data);
424 OBTetrahedralStereo::Config config = ts->GetConfig();
425
426 if(config.specified) {
427 inchi_Stereo0D stereo;
428 stereo.type = INCHI_StereoType_Tetrahedral;
429 stereo.central_atom = OBAtomIdToInChIAtomId(mol, config.center);
430
431 // count number of implicit refs
432 int num_implicit = 0;
433 if (config.from == OBStereo::ImplicitRef)
434 num_implicit = 1;
435 num_implicit += std::count(config.refs.begin(), config.refs.end(), OBStereo::ImplicitRef);
436
437 // ignore invalid cases with more than 1 implicit ref
438 if (num_implicit > 1)
439 continue;
440
441 if (num_implicit) {
442 config = ts->GetConfig(OBStereo::ImplicitRef); // Make the 'from' atom the implicit ref
443 stereo.neighbor[0] = stereo.central_atom;
444 } else
445 stereo.neighbor[0] = OBAtomIdToInChIAtomId(mol, config.from);
446
447 for(int i=0; i<3; ++i)
448 stereo.neighbor[i + 1] = OBAtomIdToInChIAtomId(mol, config.refs[i]);
449
450 if (config.winding == OBStereo::Clockwise)
451 stereo.parity = INCHI_PARITY_EVEN;
452 else
453 stereo.parity = INCHI_PARITY_ODD;
454
455 stereoVec.push_back(stereo);
456 }
457 }
458 }
459
460 //Double bond stereo (still inside 0D section)
461 //Currently does not handle cumulenes
462 for (data = stereoData.begin(); data != stereoData.end(); ++data) {
463 if (static_cast<OBStereoBase*>(*data)->GetType() == OBStereo::CisTrans) {
464 OBCisTransStereo *ts = dynamic_cast<OBCisTransStereo*>(*data);
465 OBCisTransStereo::Config config = ts->GetConfig();
466
467 if(config.specified) {
468 inchi_Stereo0D stereo;
469 stereo.central_atom = NO_ATOM;
470 stereo.type = INCHI_StereoType_DoubleBond;
471 OBStereo::Refs refs = config.refs;
472
473 // ignore invalid cases with 2 implicit refs on one side
474 if (refs[0] == OBStereo::ImplicitRef && refs[1] == OBStereo::ImplicitRef)
475 continue;
476 if (refs[2] == OBStereo::ImplicitRef && refs[3] == OBStereo::ImplicitRef)
477 continue;
478
479 unsigned long start = refs[0];
480 if (refs[0]==OBStereo::ImplicitRef)
481 start = refs[1];
482 unsigned long end = refs[3];
483 if (refs[3]==OBStereo::ImplicitRef)
484 end = refs[2];
485
486 stereo.neighbor[0] = OBAtomIdToInChIAtomId(mol, start);
487 stereo.neighbor[1] = OBAtomIdToInChIAtomId(mol, config.begin);
488 stereo.neighbor[2] = OBAtomIdToInChIAtomId(mol, config.end);
489 stereo.neighbor[3] = OBAtomIdToInChIAtomId(mol, end);
490
491 if (ts->IsTrans(start, end))
492 stereo.parity = INCHI_PARITY_EVEN;
493 else
494 stereo.parity = INCHI_PARITY_ODD;
495
496 stereoVec.push_back(stereo);
497 }
498 }
499 }
500 }
501
502 char* opts = GetInChIOptions(pConv, false);
503 inp.szOptions = opts;
504
505 inp.num_atoms = mol.NumAtoms();
506 inp.num_stereo0D = (AT_NUM) stereoVec.size();
507 if(inp.num_stereo0D>0)
508 inp.stereo0D = &stereoVec[0];
509
510 //inchi_Output inout; now declared in block above
511 memset(&inout,0,sizeof(inchi_Output));
512
513 int ret = GetINCHI(&inp, &inout);
514
515 delete[] opts;
516 if(ret!=inchi_Ret_OKAY)
517 {
518 if(inout.szMessage)
519 {
520 string mes(inout.szMessage);
521 if(pConv->IsOption("w"))
522 {
523 string::size_type pos;
524 string targ[4];
525 targ[0] = "Omitted undefined stereo";
526 targ[1] = "Charges were rearranged";
527 targ[2] = "Proton(s) added/removed";
528 targ[3] = "Metal was disconnected";
529 for(int i=0;i<4;++i)
530 {
531 pos = mes.find(targ[i]);
532 if(pos!=string::npos)
533 {
534 mes.erase(pos,targ[i].size());
535 if(pos<mes.size() && mes[pos]==';')
536 mes[pos]=' ';
537 }
538 }
539 }
540 Trim(mes);
541 if(!mes.empty())
542 obErrorLog.ThrowError("InChI code", molID.str() + ':' + mes, obWarning);
543 }
544
545 if(ret!=inchi_Ret_WARNING)
546 {
547 obErrorLog.ThrowError("InChI code", "InChI generation failed", obError);
548 FreeStdINCHI(&inout);
549 return false;
550 }
551 }
552 ostring = inout.szInChI;
553 }
554
555 //Truncate the InChI if requested
556 const char* truncspec = pConv->IsOption("T");
557 if(truncspec)
558 {
559 string trunc(truncspec);
560 EditInchi(ostring, trunc);
561 }
562
563 if(pConv->IsOption("K")) //Generate InChIKey and add after InChI on same line
564 {
565 char szINCHIKey[28];
566 GetINCHIKeyFromINCHI(ostring.c_str(), 0 ,0, szINCHIKey, nullptr, nullptr);
567 ostring = szINCHIKey;
568 }
569
570 if(pConv->IsOption("t"))
571 {
572 ostring += ' ';
573 ostring += pmol->GetTitle();
574 }
575
576 ostream &ofs = *pConv->GetOutStream();
577
578 if(pConv->IsOption("U"))
579 {
580 if(pConv->GetOutputIndex()==1)
581 allInchi.clear();
582 //Just add to set and don't output, except at the end
583 allInchi.insert(ostring);
584
585 if(pConv->IsLast())
586 {
587 nSet::iterator itr;
588 for(itr=allInchi.begin();itr!=allInchi.end();++itr)
589 ofs << *itr << endl;
590 }
591 return true;
592 }
593 else if(pConv->IsOption("u"))
594 {
595 if(pConv->GetOutputIndex()==1)
596 allInchi.clear();
597 if(!allInchi.insert(ostring).second)
598 return true; //no output if already in set
599 }
600
601 ofs << ostring << endl;
602
603 // Note that inout.szInChI will still be NULL if this is an InChI->InChIKey conversion
604 // and so the following section will not apply.
605 if (inout.szInChI != nullptr) {
606 if (pConv->IsOption("a"))
607 ofs << inout.szAuxInfo << endl;
608
609 if(pConv->IsOption("l"))
610 //Display InChI log message. With multiple molecules, it appears only once
611 obErrorLog.ThrowError("InChI log", inout.szLog, obError, onceOnly);
612
613 if(pConv->IsOption("e"))
614 {
615 if(pConv->GetOutputIndex()==1)
616 firstInchi = inout.szInChI;
617 else
618 {
619 ofs << "Molecules " << firstID << "and " << molID.str();
620 ofs << InChIErrorMessage(CompareInchi(firstInchi.c_str(), inout.szInChI)) << endl;
621 }
622 }
623 FreeStdINCHI(&inout);
624 }
625
626 return true;
627 }
628
629 //////////////////////////////////////////////////////////
CompareInchi(const string & Inchi1,const string & Inchi2)630 char InChIFormat::CompareInchi(const string& Inchi1, const string& Inchi2)
631 {
632 //Returns 0 if identical or an char identifying the layer where they first differed
633 string s1(Inchi1), s2(Inchi2);
634
635 if(s1.size()<s2.size())
636 s1.swap(s2);
637 string::size_type pos;
638 for(pos=0;pos<s1.size();++pos)
639 {
640 if(pos==s2.size() || s1[pos]!=s2[pos])
641 return s1[s1.rfind('/',pos)+1];
642 }
643 return 0;
644
645 /* //Remove anything after the end of the Inchi
646 string::size_type pos;
647 pos = s1.find_first_of(" \t\n");
648 if(pos!=string::npos)
649 s1.erase(pos);
650 pos = s2.find_first_of(" \t\n");
651 if(pos!=string::npos)
652 s2.erase(pos);
653
654 vector<string> layers1, layers2;
655 tokenize(layers1,s1,"/\n");
656 tokenize(layers2,s2,"/\n");
657 unsigned int i;
658 if(layers1.size()<layers2.size())
659 layers1.swap(layers2); //layers1 is the longest
660
661 for(i=1;i<layers2.size();++i)
662 {
663 if(layers1[i]!=layers2[i])
664 {
665 char ch = '+';
666 if(i>1) //not formula layer
667 ch=layers1[i][0];
668 return ch;
669 }
670 }
671 if(layers1.size()==layers2.size())
672 return 0;
673 else
674 return layers1[i][0];
675 */
676 }
677
InChIErrorMessage(const char ch)678 string InChIFormat::InChIErrorMessage(const char ch)
679 {
680 string s;
681 switch (ch)
682 {
683 case 0:
684 s = " are identical";
685 break;
686 case '+':
687 s = " have different formulae";
688 break;
689 case 'c':
690 s = " have different connection tables";
691 break;
692 case 'h':
693 s = " have different bond orders, or radical character";
694 break;
695 case 'q':
696 s = " have different charges";
697 break;
698 case 'p':
699 s = " have different numbers of attached protons";
700 break;
701 case 'b':
702 s = " have different double bond stereochemistry";
703 break;
704 case 'm':
705 case 't':
706 s = " have different sp3 stereochemistry";
707 break;
708 case 'i':
709 s = " have different isotopic composition";
710 break;
711 default:
712 s = " are different";
713 }
714 return s;
715 }
716
GetCommonAtom(OBBond * pb1,OBBond * pb2)717 OBAtom* InChIFormat::GetCommonAtom(OBBond* pb1, OBBond* pb2)
718 {
719 OBAtom* pa1 = pb1->GetBeginAtom();
720 if(pa1==pb2->GetBeginAtom() || pa1==pb2->GetEndAtom())
721 return pa1;
722 pa1 = pb1->GetEndAtom();
723 if(pa1==pb2->GetBeginAtom() || pa1==pb2->GetEndAtom())
724 return pa1;
725 return nullptr; //not adjacent bonds
726 }
727
728
729 //Returns pointer to InChI options string, which needs to be deleted with delete[]
730 //If there are no options returns an empty string
GetInChIOptions(OBConversion * pConv,bool Reading)731 char* InChIFormat::GetInChIOptions(OBConversion* pConv, bool Reading)
732 {
733 vector<string> optsvec; //the InChi options
734 /* In Standard InChI these are not used
735 if(!Reading && !pConv->IsOption("n"))
736 //without -xn option, the default is to write using these 'recommended' options
737 tokenize(optsvec, "FixedH RecMet SPXYZ SAsXYZ Newps Fb Fnud");
738 */
739 char* opts;
740 OBConversion::Option_type opttyp = Reading ? OBConversion::INOPTIONS : OBConversion::OUTOPTIONS;
741 const char* copts = pConv->IsOption("X", opttyp);
742 if(copts)
743 {
744 string tmp(copts); // GCC doesn't like passing string temporaries to functions
745 vector<string> useropts;
746 tokenize(useropts, tmp);
747 copy(useropts.begin(), useropts.end(), back_inserter(optsvec));
748 }
749
750 //Add a couple InChI options built in to OB
751 if(opttyp==OBConversion::OUTOPTIONS)
752 {
753 if(pConv->IsOption("F", opttyp))
754 {
755 string tmp2("FixedH");
756 optsvec.push_back(tmp2);
757 }
758 if(pConv->IsOption("M", opttyp))
759 {
760 string tmp2("RecMet");
761 optsvec.push_back(tmp2);
762 }
763 }
764
765
766 #ifdef WIN32
767 string ch(" /");
768 #else
769 string ch(" -");
770 #endif
771
772 string sopts;
773 for (unsigned int i = 0; i < optsvec.size(); ++i)
774 sopts += ch + optsvec[i];
775 opts = new char[strlen(sopts.c_str())+1]; //has to be char, not const char
776 return strcpy(opts, sopts.c_str());
777 opts = new char[1];
778 *opts = '\0';
779 return opts;
780 }
781
EditInchi(std::string & inchi,std::string & spec)782 bool InChIFormat::EditInchi(std::string& inchi, std::string& spec)
783 {
784 std::vector<std::string> vec;
785 std::vector<std::string>::iterator itr;
786 tokenize(vec, spec, " \t/");
787 for(itr=vec.begin();itr!=vec.end();++itr)
788 {
789 if(*itr=="formula")
790 {
791 std::string::size_type pos = inchi.find('/', inchi.find('/')+1); //2nd /
792 if(pos!=string::npos)
793 inchi.erase(pos);
794 }
795 else if(*itr=="connect")
796 RemoveLayer(inchi,"/h",true);
797 else if(*itr=="nochg")
798 {
799 RemoveLayer(inchi,"/p");
800 RemoveLayer(inchi,"/q");
801 }
802 else if(*itr=="nosp3")
803 {
804 RemoveLayer(inchi,"/t");
805 RemoveLayer(inchi,"/m");
806 RemoveLayer(inchi,"/s");
807 }
808 else if(*itr=="noEZ")
809 RemoveLayer(inchi,"/b");
810 else if(*itr=="noiso")
811 RemoveLayer(inchi,"/i");
812 else if(*itr=="nostereo")
813 {
814 RemoveLayer(inchi,"/t");
815 RemoveLayer(inchi,"/m");
816 RemoveLayer(inchi,"/s");
817 RemoveLayer(inchi,"/b");
818 }
819 else if(!(*itr).empty())
820 {
821 obErrorLog.ThrowError(__FUNCTION__,
822 spec + " not recognized as a truncation specification",obError, onceOnly);
823 return false;
824 }
825 }
826 return true;
827 }
828
RemoveLayer(std::string & inchi,const std::string & str,bool all)829 void InChIFormat::RemoveLayer (std::string& inchi, const std::string& str, bool all)
830 {
831 std::string::size_type pos = inchi.find(str);
832 if(pos!=string::npos)
833 inchi.erase(pos, (all ? string::npos : inchi.find('/', pos+1) - pos));
834 }
835
SaveInchi(OBMol * pmol,const std::string & s)836 void InChIFormat::SaveInchi(OBMol* pmol, const std::string& s)
837 {
838 OBPairData* dp = new OBPairData;
839 dp->SetAttribute("inchi");
840 dp->SetValue(s);
841 dp->SetOrigin(local);
842 pmol->SetData(dp);
843 }
844
845 //************************************************************************
846 InChICompareFormat theInChICompareFormat;
847
WriteMolecule(OBBase * pOb,OBConversion * pConv)848 bool InChICompareFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
849 {
850 pConv->AddOption("e",OBConversion::OUTOPTIONS);
851 pConv->AddOption("t",OBConversion::OUTOPTIONS);
852 return theInChIFormat.WriteMolecule(pOb,pConv);
853 }
854
855 //************************************************************************
856 InChIKeyFormat theInChIKeyFormat;
857
WriteMolecule(OBBase * pOb,OBConversion * pConv)858 bool InChIKeyFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
859 {
860 pConv->AddOption("K",OBConversion::OUTOPTIONS);
861 return theInChIFormat.WriteMolecule(pOb,pConv);
862 }
863
864
865 }//namespace OpenBabel
866
867