1 /**********************************************************************
2 Copyright (C) 2006 by Fredrik Wallner
3 Some portions Copyright (C) 2006-2007 by Geoffrey Hutchsion
4 Some portions Copyright (C) 2011 by Chris Morley
5 
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 ***********************************************************************/
15 
16 #include <openbabel/babelconfig.h>
17 #include <openbabel/obmolecformat.h>
18 #include <openbabel/mol.h>
19 #include <openbabel/atom.h>
20 #include <openbabel/bond.h>
21 #include <openbabel/elements.h>
22 #include <openbabel/reactionfacade.h>
23 #include <openbabel/stereo/stereo.h>
24 #include <openbabel/obfunctions.h>
25 #include <openbabel/reaction.h>
26 #include <openbabel/tokenst.h>
27 #include <openbabel/alias.h>
28 #include <openbabel/text.h>
29 #include "chemdrawcdx.h"
30 
31 #include <iostream>
32 #include <fstream>
33 #include <sstream>
34 #include <map>
35 #include <list>
36 
37 
38 #if !defined(__CYGWIN__)
bswap_16(unsigned short x)39 static inline unsigned short bswap_16(unsigned short x) {
40   return (x>>8) | (x<<8);
41 }
42 
bswap_32(unsigned int x)43 static inline unsigned int bswap_32(unsigned int x) {
44   return (bswap_16(x&0xffff)<<16) | (bswap_16(x>>16));
45 }
46 
bswap_64(unsigned long long x)47 static inline unsigned long long bswap_64(unsigned long long x) {
48   return (((unsigned long long)bswap_32(x&0xffffffffull))<<32) | (bswap_32(x>>32));
49 }
50 #endif
51 
52 // Macs -- need to use Apple macros to deal with Universal binaries correctly
53 #ifdef __APPLE__
54 #include <machine/endian.h>
55 #if BYTE_ORDER == BIG_ENDIAN
56 #    define READ_INT16(stream,data) \
57 (stream).read ((char*)&data, sizeof(data)); \
58 data = bswap_16 (data);
59 #    define READ_INT32(stream,data) \
60 (stream).read ((char*)&data, sizeof(data)); \
61 data = bswap_32 (data);
62 #else BYTE_ORDER == LITTLE_ENDIAN
63 #    define READ_INT16(stream,data) \
64 (stream).read ((char*)&data, sizeof(data));
65 #    define READ_INT32(stream,data) \
66 (stream).read ((char*)&data, sizeof(data));
67 #endif
68 #else
69 
70 // Non-Apple systems
71 // defined in babelconfig.h by autoconf (portable to Solaris, BSD, Linux)
72 #ifdef WORDS_BIGENDIAN
73 #    define READ_INT16(stream,data) \
74 (stream).read ((char*)&data, sizeof(data)); \
75 data = bswap_16 (data);
76 #    define READ_INT32(stream,data) \
77 (stream).read ((char*)&data, sizeof(data)); \
78 data = bswap_32 (data);
79 #else
80 #    define READ_INT16(stream,data) \
81 (stream).read ((char*)&data, sizeof(data));
82 #    define READ_INT32(stream,data) \
83 (stream).read ((char*)&data, sizeof(data));
84 #endif
85 // end endian / bigendian issues (on non-Mac systems)
86 #endif
87 // end Apple/non-Apple systems
88 
89 using namespace std;
90 namespace OpenBabel
91 {
92 
93 //Class which traverse the tree in CDX binary files
94 class CDXReader
95 {
96 public:
97   CDXReader(std::istream& is);
98   CDXTag ReadNext(bool objectsOnly=false, int targetDepth=-2);
IgnoreObject()99   void IgnoreObject()          { ReadNext(true, GetDepth()-1); }
operator bool() const100   operator bool ()const        { return (bool)ifs; }
GetDepth() const101   int GetDepth()const          { return depth; }
GetLen() const102   int GetLen()const            { return _len;} //length of current property data
CurrentID() const103   CDXObjectID CurrentID()const { return ids.back(); }
104   stringstream& data(); //call this only once for each set of property data
105 
106   //Routines to display the structure of a cdx binary file
107   OBText* WriteTree(const std::string& filename, unsigned wtoptions);
108 private:
109   bool ParseEnums(std::map<CDXTag, std::string>& enummap, const std::string& filename);
110   std::string TagName(std::map<CDXTag, std::string>& enummap, CDXTag tag);
111 
112 private:
113   std::istream& ifs;
114   int depth;
115   std::vector<CDXObjectID> ids;
116   CDXObjectID _tempback;
117   std::string _buf;
118   UINT16 _len;
119   std::stringstream _ss;
120 };
121 
122 //**************************************************************
123 class ChemDrawBinaryXFormat : OBMoleculeFormat
124 {
125 public:
126   //Register this format type ID in the constructor
ChemDrawBinaryXFormat()127   ChemDrawBinaryXFormat()
128   {
129     OBConversion::RegisterFormat("cdx",this);
130   }
131 
Description()132   virtual const char* Description() //required
133   {
134     return
135       "ChemDraw binary format\n"
136       "Read only\n"
137       "The whole file is read in one call.\n"
138       "Note that a file may contain a mixture of reactions and\n"
139       "molecules.\n"
140 
141       "With the -ad option, a human-readable representation of the CDX tree\n"
142       "structure is output as an OBText object. Use textformat to view it::\n\n"
143 
144       "    obabel input.cdx -otext -ad\n\n"
145 
146       "Many reactions in CDX files are not fully specified with reaction data\n"
147       "structures, and may not be completely interpreted by this parser.\n\n"
148 
149       "Read Options, e.g. -am\n"
150       " m read molecules only; no reactions\n"
151       " d output CDX tree to OBText object\n"
152       " o display only objects in tree output\n";
153   }
154 
SpecificationURL()155   virtual const char* SpecificationURL()
156   {return "http://www.cambridgesoft.com/services/documentation/sdk/chemdraw/cdx/IntroCDX.htm";}
157 
GetMIMEType()158   virtual const char* GetMIMEType()
159   { return "chemical/x-cdx"; };
160 
Flags()161   virtual unsigned int Flags()
162   {
163     return READBINARY|NOTWRITABLE;
164   }
165 
166   ////////////////////////////////////////////////////
167   virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
168 
169 private:
170   enum graphicType {none, equilArrow};
171   bool        TopLevelParse(CDXReader& cdxr, OBConversion* pConv,CDXObjectID ContainingGroup);
172   bool        DoFragment(CDXReader& cdxr, OBMol* pmol);
173   bool        DoFragmentImpl(CDXReader& cdxr, OBMol* pmol,
174          map<CDXObjectID, unsigned>& atommap, map<OBBond*, OBStereo::BondDirection>& updown);
175   bool        DoReaction(CDXReader& cdxr, OBMol* pReact);
176   std::string DoText(CDXReader& cdxr);
177 
178   std::vector<OBMol*> LookupMol(CDXObjectID id);
179   graphicType         LookupGraphic(CDXObjectID id);
180   OBMol*              LookupInMolMap(CDXObjectID id);
181 
182 private:
183   bool readReactions;
184   static const bool objectsOnly = true;
185   std::map<CDXObjectID, graphicType> _graphicmap;
186   std::map<CDXObjectID, OBMol*> _molmap;
187   std::map<CDXObjectID, std::vector<CDXObjectID> > _groupmap;
188   // In case of chain A -> B -> C, B is both reactant and product
189   CDXObjectID _lastProdId;
190   typedef std::map<CDXObjectID, std::vector<CDXObjectID> >::iterator GroupMapIterator;
191   static const unsigned usedFlag = 1<<30;
192 };
193 
194 //******************************************************************
195   //Global instance of the format
196  ChemDrawBinaryXFormat theChemDrawBinaryXFormat;
197 //******************************************************************
198 
199  /*New CDXformat
200 Each fragment goes into a new OBMol on the heap.
201 The CDX id and OBMol* are added to _molmap.
202 When a reaction is found, the reactant/product/agent CDX ids are looked up in molmap,
203 and added to an OBReaction (made by deleting pOb if it is a OBMol
204 and assigning pOb to a new OBReaction. The OBMol is marked as Used.
205 When the reaction is complete it is output via AddChemObject().
206 At the end, any OBMol in the map not marked as Used is output as an OBMol.
207 */
208 
209 
ReadMolecule(OBBase * pOb,OBConversion * pConv)210 bool ChemDrawBinaryXFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
211 {
212   _molmap.clear();
213   _graphicmap.clear();
214   _groupmap.clear();
215   OBMol* pmol=nullptr;
216   bool ok = true;
217 
218   CDXReader cdxr(*pConv->GetInStream());
219   readReactions = pConv->IsOption("m", OBConversion::INOPTIONS) == nullptr;
220 
221   // Write CDX tree only if requested
222   if(pConv->IsOption("d",OBConversion::INOPTIONS))
223   {
224     unsigned wtoptions=0;
225     if(pConv->IsOption("o",OBConversion::INOPTIONS))
226       wtoptions |= 1; //display objects only
227     OBText* pText  = cdxr.WriteTree("chemdrawcdx.h", wtoptions);
228     if(pText)
229     {
230       pConv->AddChemObject(pText);
231       return true;
232     }
233     return false;
234   }
235 
236   // Normal reading of molecules and reactions
237   //Top level parse
238   while(cdxr)
239   {
240     if(!TopLevelParse(cdxr, pConv, 0))
241       return false;
242   }
243 
244   //At the end, output molecules that have not been used in a reaction
245   map<CDXObjectID, OBMol*>::iterator mapiter = _molmap.begin();
246   for(; mapiter!=_molmap.end(); ++mapiter)
247   {
248     pmol = mapiter->second;
249     if(!(pmol->GetFlags() & usedFlag) && strcmp(pmol->GetTitle(),"justplus"))
250     {
251       OBMol* ptmol = static_cast<OBMol*>(pmol->DoTransformations(
252                     pConv->GetOptions(OBConversion::GENOPTIONS),pConv));
253       if(!ptmol)
254         delete pmol;
255       else
256         if(!pConv->AddChemObject(ptmol))
257           return false; //error during writing
258     }
259   }
260 
261   return ok;
262 }
263 ///////////////////////////////////////////////////////////////////////
TopLevelParse(CDXReader & cdxr,OBConversion * pConv,CDXObjectID ContainingGroup)264 bool ChemDrawBinaryXFormat::TopLevelParse
265         (CDXReader& cdxr, OBConversion* pConv, CDXObjectID ContainingGroup)
266 {
267   bool ok = true;
268   CDXTag tag;
269   while((tag = cdxr.ReadNext(objectsOnly)))
270   {
271     if(tag == kCDXObj_Group)
272     {
273       CDXObjectID cid = cdxr.CurrentID();
274       vector<CDXObjectID> v;
275       _groupmap.insert(make_pair(cid,v)); //empty vector as yet
276       TopLevelParse(cdxr, pConv, cid );
277     }
278 
279     else if(tag==kCDXObj_Fragment)
280     {
281       OBMol* pmol = new OBMol;
282       //Save all molecules to the end
283       _molmap[cdxr.CurrentID()] = pmol;
284 
285       if(ContainingGroup)
286       {
287         // Add the id of this mol to the group's entry in _groupmap
288         GroupMapIterator gmapiter = _groupmap.find(ContainingGroup);
289         if(gmapiter!=_groupmap.end())
290           gmapiter->second.push_back(cdxr.CurrentID());
291       }
292       ok = DoFragment(cdxr, pmol);
293     }
294 
295     else if(tag == kCDXObj_ReactionStep && readReactions)
296     {
297       OBMol* pReact = new OBMol;
298       pReact->SetIsReaction();
299       ok = DoReaction(cdxr, pReact);
300       // Output OBReaction and continue
301       if(pReact)
302         if(!pConv->AddChemObject(pReact))
303           return false; //error during writing
304     }
305 
306     else if(ok && tag==kCDXObj_Graphic)
307     {
308       while( (tag = cdxr.ReadNext()) )
309       {
310         stringstream& ss = cdxr.data();
311         if(tag == kCDXProp_Arrow_Type)
312         {
313           char type1=0;
314           UINT16 type2=0;
315           if(cdxr.GetLen()==1)
316             ss.get(type1);
317           else
318             READ_INT16(ss,type2);
319           if(type1==kCDXArrowType_Equilibrium || type2==kCDXArrowType_Equilibrium)
320             _graphicmap[type1+type2] = equilArrow; //save in graphicmap
321         }
322       }
323     }
324   }
325   return true;
326 }
327 ///////////////////////////////////////////////////////////////////////
DoReaction(CDXReader & cdxr,OBMol * pReact)328 bool ChemDrawBinaryXFormat::DoReaction(CDXReader& cdxr, OBMol* pReact)
329 {
330   CDXTag tag;
331   CDXObjectID id;
332   OBReactionFacade facade(pReact);
333   while( (tag = cdxr.ReadNext()) )
334   {
335     if(tag ==	kCDXProp_ReactionStep_Reactants)
336     {
337       stringstream& ss = cdxr.data();
338       for(unsigned i=0;i<cdxr.GetLen()/4;++i)//for each reactant id
339       {
340         READ_INT32(ss,id);
341         vector<OBMol*> molvec = LookupMol(id); //id could be a group with several mols
342         for(unsigned i=0;i<molvec.size();++i)
343           if(strcmp(molvec[i]->GetTitle(),"justplus"))
344           {
345             facade.AddComponent(molvec[i], REACTANT);
346           }
347       }
348     }
349     else if(tag == kCDXProp_ReactionStep_Products)
350     {
351       stringstream& ss = cdxr.data();
352       for(unsigned i=0;i<cdxr.GetLen()/4;++i)//for each product id
353       {
354         READ_INT32(ss,id);
355         vector<OBMol*> molvec = LookupMol(id); //id could be a group with several mols
356         for(unsigned i=0;i<molvec.size();++i)
357           if(strcmp(molvec[i]->GetTitle(),"justplus"))
358           {
359             facade.AddComponent(molvec[i], PRODUCT);
360             _lastProdId = id;
361           }
362       }
363     }
364     else if(tag==kCDXProp_ReactionStep_Arrows)
365     {
366       READ_INT32(cdxr.data(),id);
367       //if(LookupGraphic(id)==equilArrow) // TODO? Store reversibility somehow?
368       //  pReact->SetReversible();
369     }
370   }
371   return true;
372 }
373 ///////////////////////////////////////////////////////////////////////
LookupMol(CDXObjectID id)374 vector<OBMol*> ChemDrawBinaryXFormat::LookupMol(CDXObjectID id)
375 {
376   vector<OBMol*> molvec;
377   //Check whether the id is that of a kCDXObj_Group
378   GroupMapIterator gmapiter;
379   gmapiter = _groupmap.find(id);
380   if(gmapiter != _groupmap.end())
381   {
382     for(unsigned i=0;i<gmapiter->second.size();++i)
383     {
384       OBMol* pmmol = LookupInMolMap(gmapiter->second[i]);
385       if(pmmol)
386         molvec.push_back(pmmol);
387     }
388   }
389   else
390   {
391     //id is not a group; it must be a fragment
392     OBMol* pmmol = LookupInMolMap(id);
393     if(pmmol)
394       molvec.push_back(pmmol);
395   }
396   return molvec;
397 }
398 
LookupInMolMap(CDXObjectID id)399 OBMol* ChemDrawBinaryXFormat::LookupInMolMap(CDXObjectID id)
400 {
401   std::map<CDXObjectID, OBMol*>::iterator mapiter;
402   mapiter = _molmap.find(id);
403   if(mapiter!=_molmap.end())
404   {
405     //Mark mol as used in a reaction, so that it will not be output independently
406     mapiter->second->SetFlags(mapiter->second->GetFlags() | usedFlag);
407     return mapiter->second;
408   }
409   else
410   {
411     stringstream ss;
412     ss << "Reactant or product mol not found id = " << hex << showbase << id;
413     obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError);
414     return nullptr;
415   }
416 }
417 
418 ////////////////////////////////////////////////////////////////////////
LookupGraphic(CDXObjectID id)419 ChemDrawBinaryXFormat::graphicType ChemDrawBinaryXFormat::LookupGraphic(CDXObjectID id)
420 {
421   std::map<CDXObjectID, graphicType>::iterator mapiter;
422   mapiter = _graphicmap.find(id);
423   if(mapiter != _graphicmap.end())
424     return mapiter->second;
425   else
426     return none;
427 }
428 
429 ////////////////////////////////////////////////////////////////////////
DoFragment(CDXReader & cdxr,OBMol * pmol)430 bool ChemDrawBinaryXFormat::DoFragment(CDXReader& cdxr, OBMol* pmol)
431 {
432   map<OBBond*, OBStereo::BondDirection> updown;
433   pmol->SetDimension(2);
434   pmol->BeginModify();
435 
436   map<CDXObjectID, unsigned> atommap; //key = CDX id; value = OB atom idx
437 
438   //The inner workings of DoFragment,since Fragment elements can be nested
439   DoFragmentImpl(cdxr, pmol, atommap, updown);
440 
441   // use 2D coordinates + hash/wedge to determine stereochemistry
442   StereoFrom2D(pmol, &updown);
443 
444   pmol->EndModify();
445 
446   //Expand any aliases after molecule constructed
447   //Need to save aliases in list first and expand later
448   vector<OBAtom*> aliasatoms;
449   for(int idx=1; idx<=pmol->NumAtoms();++idx)
450   {
451     OBAtom* pAtom = pmol->GetAtom(idx);
452     AliasData* ad = dynamic_cast<AliasData*>(pAtom->GetData(AliasDataType));
453     if(ad && !ad->IsExpanded())
454       aliasatoms.push_back(pAtom);
455   }
456   for(vector<OBAtom*>::iterator vit=aliasatoms.begin();
457       vit!=aliasatoms.end(); ++vit)
458   {
459     int idx = (*vit)->GetIdx();
460     AliasData* ad = dynamic_cast<AliasData*>((*vit)->GetData(AliasDataType));
461     if(ad && !ad->IsExpanded())
462       ad->Expand(*pmol, idx); //Make chemically meaningful, if possible.
463   }
464   return true;
465 }
466 
DoFragmentImpl(CDXReader & cdxr,OBMol * pmol,map<CDXObjectID,unsigned> & atommap,map<OBBond *,OBStereo::BondDirection> & updown)467 bool ChemDrawBinaryXFormat::DoFragmentImpl(CDXReader& cdxr, OBMol* pmol,
468        map<CDXObjectID, unsigned>& atommap, map<OBBond*, OBStereo::BondDirection>& updown)
469 {
470   CDXTag tag;
471   std::vector<OBAtom*> handleImplicitCarbons;
472   while((tag = cdxr.ReadNext(objectsOnly)))
473   {
474     if(tag==kCDXObj_Node)
475     {
476       unsigned nodeID = cdxr.CurrentID();
477       bool isAlias=false, hasElement=false;
478       bool hasNumHs = false;
479       UINT16 atnum=-1, spin=0, numHs=0;
480       int x, y, charge=0, iso=0;
481       string aliastext;
482 
483       //Read all node properties
484       while( (tag = cdxr.ReadNext()) )
485       {
486         switch(tag)
487         {
488         case kCDXProp_Node_Type:
489           UINT16 type;
490           READ_INT16(cdxr.data(), type);
491           if(type==4 || type==5) //Nickname or fragment
492             isAlias = true;
493           break;
494         case kCDXProp_Node_Element:
495           READ_INT16(cdxr.data(), atnum);
496           hasElement = true;
497           break;
498         case kCDXProp_2DPosition:
499           {
500             stringstream& ss = cdxr.data();
501             READ_INT32(ss, y); //yes, this way round
502             READ_INT32(ss, x);
503           }
504             break;
505         case kCDXProp_Atom_Charge:
506           if(cdxr.GetLen()==1)
507             charge = cdxr.data().get();
508           else
509             READ_INT32(cdxr.data(), charge);
510           break;
511         case kCDXProp_Atom_Radical:
512           READ_INT16(cdxr.data(),spin);
513           break;
514         case kCDXProp_Atom_Isotope:
515           READ_INT16(cdxr.data(),iso);
516           break;
517         case kCDXProp_Atom_NumHydrogens:
518           READ_INT16(cdxr.data(), numHs);
519           hasNumHs = true;
520           break;
521         case kCDXProp_Atom_CIPStereochemistry:
522           break;
523         case kCDXObj_Text:
524           aliastext = DoText(cdxr);
525           if(aliastext=="+")
526           {
527             //This node is not an atom, but dangerous to delete
528             pmol->SetTitle("justplus");
529           }
530           break;
531         case kCDXObj_Fragment:
532         /* ignore fragment contained in node
533         if(isAlias)
534           {
535             unsigned Idxbefore = pmol->NumAtoms();
536             if(DoFragmentImpl(cdxr, pmol, atommap, updown))
537               return false;
538           }
539          */
540           //ignore the contents of this node
541           cdxr.IgnoreObject();
542           //cdxr.ReadNext(objectsOnly, cdxr.GetDepth()-1);
543           break;
544         default:
545           if(tag & kCDXTag_Object) //unhandled object
546             while(cdxr.ReadNext());
547         }
548       }
549       //All properties of Node have now been read
550       OBAtom* pAtom = pmol->NewAtom();
551       pAtom->SetVector(x*1.0e-6, -y*1.0e-6, 0); //inv y axis
552       atommap[nodeID] = pmol->NumAtoms();
553       if(isAlias || (!aliastext.empty() && atnum==0xffff))
554       {
555         //Treat text as an alias
556         pAtom->SetAtomicNum(0);
557         AliasData* ad = new AliasData();
558         ad->SetAlias(aliastext);
559         ad->SetOrigin(fileformatInput);
560         pAtom->SetData(ad);
561       }
562       else
563       {
564         if(atnum==0xffff)
565           atnum = 6; //atoms are C by default
566         pAtom->SetAtomicNum(atnum);
567         if (hasNumHs)
568           pAtom->SetImplicitHCount(numHs);
569         else if (atnum==6)
570           handleImplicitCarbons.push_back(pAtom);
571         pAtom->SetFormalCharge(charge);
572         pAtom->SetIsotope(iso);
573         pAtom->SetSpinMultiplicity(spin);
574       }
575     }
576 
577     else if(tag==kCDXObj_Bond)
578     {
579       CDXObjectID bgnID, endID;
580       int order=1, bgnIdx, endIdx ;
581       UINT16 stereo=0;
582 
583       while( (tag = cdxr.ReadNext()) )
584       {
585         switch(tag)
586         {
587         case kCDXProp_Bond_Begin:
588           READ_INT32(cdxr.data(), bgnID);
589           bgnIdx = atommap[bgnID];
590           break;
591         case kCDXProp_Bond_End:
592           READ_INT32(cdxr.data(), endID);
593           endIdx = atommap[endID];
594           break;
595         case kCDXProp_Bond_Order:
596           READ_INT16(cdxr.data(), order);
597           switch (order)
598           {
599           case 0xFFFF: // undefined, keep 1 for now
600             order = 1;
601           case 0x0001:
602           case 0x0002:
603             break;
604           case 0x0004:
605             order = 3;
606             break;
607           case 0x0080: // aromatic bond
608             order = 5;
609             break;
610           default: // other cases are just not supported, keep 1
611             order = 1;
612             break;
613           }
614           break;
615         case kCDXProp_Bond_Display:
616           READ_INT16(cdxr.data(), stereo);
617         break;
618         }
619       }
620 
621       if(!order || !bgnIdx || !endIdx)
622       {
623         obErrorLog.ThrowError(__FUNCTION__,"Incorrect bond", obError);
624         return false;
625       }
626       if(stereo==4 || stereo==7 || stereo==10 || stereo==12)
627         swap(bgnIdx, endIdx);
628       pmol->AddBond(bgnIdx, endIdx, order);
629       if(stereo)
630       {
631         OBBond* pBond = pmol->GetBond(pmol->NumBonds()-1);
632         if(stereo==3 || stereo==4)
633           pBond->SetHash();
634         else if(stereo==6 || stereo==7)
635           pBond->SetWedge();
636       }
637     }
638   }
639   // Handle 'implicit carbons' by adjusting their valence with
640   // implicit hydrognes
641   for(vector<OBAtom*>::iterator vit=handleImplicitCarbons.begin();
642       vit!=handleImplicitCarbons.end(); ++vit)
643     OBAtomAssignTypicalImplicitHydrogens(*vit);
644 
645   return true;
646 }
647 
DoText(CDXReader & cdxr)648 string ChemDrawBinaryXFormat::DoText(CDXReader& cdxr)
649 {
650   CDXTag tag;
651   string text;
652   while( (tag=cdxr.ReadNext()) )
653   {
654     stringstream& ss = cdxr.data();
655     switch(tag)
656     {
657     case kCDXProp_Text:
658       UINT16 nStyleRuns;
659       READ_INT16(ss,nStyleRuns);
660       ss.ignore(nStyleRuns*10);
661       ss >> text;
662     default:
663       if(tag & kCDXTag_Object) //unhandled object
664         while(cdxr.ReadNext());
665     }
666   }
667   return text;
668 }
669 
670 //****************************************************************
ReadNext(bool objectsOnly,int targetDepth)671 CDXTag CDXReader::ReadNext(bool objectsOnly, int targetDepth)
672 {
673   //ostringstream treestream;
674   CDXTag tag;
675   CDXObjectID id;
676 
677   while(ifs)
678   {
679     READ_INT16(ifs, tag);
680     if(tag==0)
681     {
682       if(depth==0)
683       {
684         ifs.setstate(ios::eofbit); //ignore everything after end of document
685         return 0; //end of document
686       }
687       --depth;
688       _tempback = ids.back(); //needed for WriteTree
689       ids.pop_back();
690       if(targetDepth<0 || depth == targetDepth)
691         return 0; //end of object
692     }
693     else if(tag & kCDXTag_Object)
694     {
695       READ_INT32(ifs, id);
696       ids.push_back(id);
697       ++depth;
698       if(targetDepth<0 || depth-1 == targetDepth)
699         return tag; //object
700     }
701     else
702     {
703       //property
704       READ_INT16(ifs, _len);
705 
706       if(objectsOnly)
707         ifs.ignore(_len);
708       else
709       {
710         //copy property data to buffer
711         char* p = new char[_len+1];
712         ifs.read(p, _len);
713         _buf.assign(p, _len);
714         delete[] p;
715         return tag; //property
716       }
717     }
718   }
719   return 0;
720 }
721 /////////////////////////////////////////////////////////////////////
722 
data()723 stringstream& CDXReader::data()
724 {
725   _ss.clear();
726   _ss.str(_buf);
727   return _ss;
728 }
729 /////////////////////////////////////////////////////////////////////
730 
CDXReader(std::istream & is)731 CDXReader::CDXReader(std::istream& is) : ifs(is), depth(0)
732 {
733   //ReadHeader
734   char buffer[kCDX_HeaderStringLen+1];
735   ifs.read(buffer,kCDX_HeaderStringLen);
736   buffer[kCDX_HeaderStringLen] = '\0';
737   if(strncmp(buffer, kCDX_HeaderString, kCDX_HeaderStringLen) == 0)
738     ifs.ignore(kCDX_HeaderLength - kCDX_HeaderStringLen);	// Discard rest of header.
739   else
740   {
741     obErrorLog.ThrowError(__FUNCTION__,"Invalid file, no ChemDraw Header",obError);
742     ifs.setstate(ios::eofbit);
743     throw;
744   }
745 }
746 //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
747 //Routines to display the structure of a cdx binary file
748 
WriteTree(const string & filename,unsigned wtoptions)749 OBText* CDXReader::WriteTree(const string& filename, unsigned wtoptions)
750 {
751   const char indentchar = '\t';
752   std::map<CDXTag, std::string> enummap;
753   ParseEnums(enummap, filename);
754 
755   stringstream tss;
756   tss << hex << showbase;
757 
758   while(*this)
759   {
760     CDXTag tag = ReadNext();
761     if(ifs.eof())
762       return new OBText(tss.str()); //normal exit
763     if(tag==0 && !(wtoptions &1))
764     {
765       //Object end
766       tss << string(depth,indentchar) << "ObjectEnd " << _tempback << endl;
767     }
768     else if(tag & kCDXTag_Object)
769     {
770       //Object
771       tss<<string(depth-1,indentchar) << "Object " << tag
772                    << TagName(enummap,tag) << " id=" << ids.back() << endl;
773     }
774     else
775     {
776       //Property
777       if(!(wtoptions &1))
778       {
779         stringstream ss;
780         ss << _len;
781         tss<<string(depth,indentchar) << "Property  "<< tag << TagName(enummap,tag)
782                      << " [" << ss.str() << " bytes] ";
783         for(unsigned i=0;i<_len;++i)
784         {
785           ss.str("");
786           ss.fill('0');
787           ss.width(8);
788           ss << hex << static_cast<unsigned>(_buf[i]) << dec;
789           tss << ss.str()[6] << ss.str()[7] << ' ';
790         }
791 
792         if(tag==0x700 || tag==kCDXProp_CreationProgram || tag==kCDXProp_CreationDate
793           || tag==kCDXProp_Name)
794         {
795           stringstream ss(_buf);
796           UINT16 nStyleRuns;
797           READ_INT16(ss, nStyleRuns);
798           tss << '\"';
799           for(unsigned i=2+nStyleRuns*10; i<_len; ++i)
800             tss << _buf[i];
801           tss << '\"';
802         }
803         tss << endl;
804       }
805     }
806   }
807   return nullptr; //error exit
808 }
809 
810 ///////////////////////////////////////////////////////////////////////
ParseEnums(map<CDXTag,string> & enummap,const string & filename)811 bool CDXReader::ParseEnums(map<CDXTag, string>& enummap, const string& filename)
812 {
813   ifstream ihs;
814   if(OpenDatafile(ihs, filename).empty())
815   {
816     obErrorLog.ThrowError(__FUNCTION__,
817       filename + " needs to be in the *data* directory when displaying the tree.\n" , obError);
818     return false;
819   }
820   ignore(ihs, "enum CDXDatumID");
821   string ln;
822   vector<string> vec;
823   stringstream ss;
824   CDXTag tag;
825   while(ihs)
826   {
827     getline(ihs, ln);
828     tokenize(vec, ln, " \t,{}");
829     if(vec.size()==0 || vec[0]=="//")
830       continue; //blank and comment lines
831     if(vec[0]==";") //line is }; end of enum
832       return true;
833     if(vec[0][0]!='k') //only collect enums starting with kCDX
834       continue;
835     int tagpos = (vec[1]=="=" && vec.size()>4) ? 4 : 2;
836     ss.str(vec[tagpos]);
837     ss.clear();
838     ss >> hex >> tag;
839     if(ss)
840     {
841       if(tag==0x0400 && vec[0]=="kCDXUser_TemporaryEnd")//special case
842         continue;
843       enummap[tag] = vec[0];
844     }
845   }
846   return false;
847 }
848 /////////////////////////////////////////////////////////////////////////
849 
TagName(map<CDXTag,string> & enummap,CDXTag tag)850 string CDXReader::TagName(map<CDXTag, string>& enummap, CDXTag tag)
851 {
852   string tagname;
853   if(!enummap.empty())
854   {
855     map<CDXTag, std::string>::iterator iter = enummap.find(tag);
856     if(iter!=enummap.end())
857     {
858       tagname=iter->second;
859       //Remove prefix, e.g. kCDXProp_
860       string::size_type pos = tagname.find('_');
861       if(pos!=string::npos)
862       {
863         tagname.erase(0,pos);
864         tagname[0] = ' ';
865       }
866     }
867   }
868   return tagname;
869 }
870 
871 } //namespace
872