1 /**********************************************************************
2 Copyright (C) 2006 by Fredrik Wallner
3 Some portions Copyright (C) 2006-2007 by Geoffrey Hutchsion
4 Some portions Copyright (C) 2011 by Chris Morley
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation version 2 of the License.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14 ***********************************************************************/
15
16 #include <openbabel/babelconfig.h>
17 #include <openbabel/obmolecformat.h>
18 #include <openbabel/mol.h>
19 #include <openbabel/atom.h>
20 #include <openbabel/bond.h>
21 #include <openbabel/elements.h>
22 #include <openbabel/reactionfacade.h>
23 #include <openbabel/stereo/stereo.h>
24 #include <openbabel/obfunctions.h>
25 #include <openbabel/reaction.h>
26 #include <openbabel/tokenst.h>
27 #include <openbabel/alias.h>
28 #include <openbabel/text.h>
29 #include "chemdrawcdx.h"
30
31 #include <iostream>
32 #include <fstream>
33 #include <sstream>
34 #include <map>
35 #include <list>
36
37
38 #if !defined(__CYGWIN__)
bswap_16(unsigned short x)39 static inline unsigned short bswap_16(unsigned short x) {
40 return (x>>8) | (x<<8);
41 }
42
bswap_32(unsigned int x)43 static inline unsigned int bswap_32(unsigned int x) {
44 return (bswap_16(x&0xffff)<<16) | (bswap_16(x>>16));
45 }
46
bswap_64(unsigned long long x)47 static inline unsigned long long bswap_64(unsigned long long x) {
48 return (((unsigned long long)bswap_32(x&0xffffffffull))<<32) | (bswap_32(x>>32));
49 }
50 #endif
51
52 // Macs -- need to use Apple macros to deal with Universal binaries correctly
53 #ifdef __APPLE__
54 #include <machine/endian.h>
55 #if BYTE_ORDER == BIG_ENDIAN
56 # define READ_INT16(stream,data) \
57 (stream).read ((char*)&data, sizeof(data)); \
58 data = bswap_16 (data);
59 # define READ_INT32(stream,data) \
60 (stream).read ((char*)&data, sizeof(data)); \
61 data = bswap_32 (data);
62 #else BYTE_ORDER == LITTLE_ENDIAN
63 # define READ_INT16(stream,data) \
64 (stream).read ((char*)&data, sizeof(data));
65 # define READ_INT32(stream,data) \
66 (stream).read ((char*)&data, sizeof(data));
67 #endif
68 #else
69
70 // Non-Apple systems
71 // defined in babelconfig.h by autoconf (portable to Solaris, BSD, Linux)
72 #ifdef WORDS_BIGENDIAN
73 # define READ_INT16(stream,data) \
74 (stream).read ((char*)&data, sizeof(data)); \
75 data = bswap_16 (data);
76 # define READ_INT32(stream,data) \
77 (stream).read ((char*)&data, sizeof(data)); \
78 data = bswap_32 (data);
79 #else
80 # define READ_INT16(stream,data) \
81 (stream).read ((char*)&data, sizeof(data));
82 # define READ_INT32(stream,data) \
83 (stream).read ((char*)&data, sizeof(data));
84 #endif
85 // end endian / bigendian issues (on non-Mac systems)
86 #endif
87 // end Apple/non-Apple systems
88
89 using namespace std;
90 namespace OpenBabel
91 {
92
93 //Class which traverse the tree in CDX binary files
94 class CDXReader
95 {
96 public:
97 CDXReader(std::istream& is);
98 CDXTag ReadNext(bool objectsOnly=false, int targetDepth=-2);
IgnoreObject()99 void IgnoreObject() { ReadNext(true, GetDepth()-1); }
operator bool() const100 operator bool ()const { return (bool)ifs; }
GetDepth() const101 int GetDepth()const { return depth; }
GetLen() const102 int GetLen()const { return _len;} //length of current property data
CurrentID() const103 CDXObjectID CurrentID()const { return ids.back(); }
104 stringstream& data(); //call this only once for each set of property data
105
106 //Routines to display the structure of a cdx binary file
107 OBText* WriteTree(const std::string& filename, unsigned wtoptions);
108 private:
109 bool ParseEnums(std::map<CDXTag, std::string>& enummap, const std::string& filename);
110 std::string TagName(std::map<CDXTag, std::string>& enummap, CDXTag tag);
111
112 private:
113 std::istream& ifs;
114 int depth;
115 std::vector<CDXObjectID> ids;
116 CDXObjectID _tempback;
117 std::string _buf;
118 UINT16 _len;
119 std::stringstream _ss;
120 };
121
122 //**************************************************************
123 class ChemDrawBinaryXFormat : OBMoleculeFormat
124 {
125 public:
126 //Register this format type ID in the constructor
ChemDrawBinaryXFormat()127 ChemDrawBinaryXFormat()
128 {
129 OBConversion::RegisterFormat("cdx",this);
130 }
131
Description()132 virtual const char* Description() //required
133 {
134 return
135 "ChemDraw binary format\n"
136 "Read only\n"
137 "The whole file is read in one call.\n"
138 "Note that a file may contain a mixture of reactions and\n"
139 "molecules.\n"
140
141 "With the -ad option, a human-readable representation of the CDX tree\n"
142 "structure is output as an OBText object. Use textformat to view it::\n\n"
143
144 " obabel input.cdx -otext -ad\n\n"
145
146 "Many reactions in CDX files are not fully specified with reaction data\n"
147 "structures, and may not be completely interpreted by this parser.\n\n"
148
149 "Read Options, e.g. -am\n"
150 " m read molecules only; no reactions\n"
151 " d output CDX tree to OBText object\n"
152 " o display only objects in tree output\n";
153 }
154
SpecificationURL()155 virtual const char* SpecificationURL()
156 {return "http://www.cambridgesoft.com/services/documentation/sdk/chemdraw/cdx/IntroCDX.htm";}
157
GetMIMEType()158 virtual const char* GetMIMEType()
159 { return "chemical/x-cdx"; };
160
Flags()161 virtual unsigned int Flags()
162 {
163 return READBINARY|NOTWRITABLE;
164 }
165
166 ////////////////////////////////////////////////////
167 virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
168
169 private:
170 enum graphicType {none, equilArrow};
171 bool TopLevelParse(CDXReader& cdxr, OBConversion* pConv,CDXObjectID ContainingGroup);
172 bool DoFragment(CDXReader& cdxr, OBMol* pmol);
173 bool DoFragmentImpl(CDXReader& cdxr, OBMol* pmol,
174 map<CDXObjectID, unsigned>& atommap, map<OBBond*, OBStereo::BondDirection>& updown);
175 bool DoReaction(CDXReader& cdxr, OBMol* pReact);
176 std::string DoText(CDXReader& cdxr);
177
178 std::vector<OBMol*> LookupMol(CDXObjectID id);
179 graphicType LookupGraphic(CDXObjectID id);
180 OBMol* LookupInMolMap(CDXObjectID id);
181
182 private:
183 bool readReactions;
184 static const bool objectsOnly = true;
185 std::map<CDXObjectID, graphicType> _graphicmap;
186 std::map<CDXObjectID, OBMol*> _molmap;
187 std::map<CDXObjectID, std::vector<CDXObjectID> > _groupmap;
188 // In case of chain A -> B -> C, B is both reactant and product
189 CDXObjectID _lastProdId;
190 typedef std::map<CDXObjectID, std::vector<CDXObjectID> >::iterator GroupMapIterator;
191 static const unsigned usedFlag = 1<<30;
192 };
193
194 //******************************************************************
195 //Global instance of the format
196 ChemDrawBinaryXFormat theChemDrawBinaryXFormat;
197 //******************************************************************
198
199 /*New CDXformat
200 Each fragment goes into a new OBMol on the heap.
201 The CDX id and OBMol* are added to _molmap.
202 When a reaction is found, the reactant/product/agent CDX ids are looked up in molmap,
203 and added to an OBReaction (made by deleting pOb if it is a OBMol
204 and assigning pOb to a new OBReaction. The OBMol is marked as Used.
205 When the reaction is complete it is output via AddChemObject().
206 At the end, any OBMol in the map not marked as Used is output as an OBMol.
207 */
208
209
ReadMolecule(OBBase * pOb,OBConversion * pConv)210 bool ChemDrawBinaryXFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
211 {
212 _molmap.clear();
213 _graphicmap.clear();
214 _groupmap.clear();
215 OBMol* pmol=nullptr;
216 bool ok = true;
217
218 CDXReader cdxr(*pConv->GetInStream());
219 readReactions = pConv->IsOption("m", OBConversion::INOPTIONS) == nullptr;
220
221 // Write CDX tree only if requested
222 if(pConv->IsOption("d",OBConversion::INOPTIONS))
223 {
224 unsigned wtoptions=0;
225 if(pConv->IsOption("o",OBConversion::INOPTIONS))
226 wtoptions |= 1; //display objects only
227 OBText* pText = cdxr.WriteTree("chemdrawcdx.h", wtoptions);
228 if(pText)
229 {
230 pConv->AddChemObject(pText);
231 return true;
232 }
233 return false;
234 }
235
236 // Normal reading of molecules and reactions
237 //Top level parse
238 while(cdxr)
239 {
240 if(!TopLevelParse(cdxr, pConv, 0))
241 return false;
242 }
243
244 //At the end, output molecules that have not been used in a reaction
245 map<CDXObjectID, OBMol*>::iterator mapiter = _molmap.begin();
246 for(; mapiter!=_molmap.end(); ++mapiter)
247 {
248 pmol = mapiter->second;
249 if(!(pmol->GetFlags() & usedFlag) && strcmp(pmol->GetTitle(),"justplus"))
250 {
251 OBMol* ptmol = static_cast<OBMol*>(pmol->DoTransformations(
252 pConv->GetOptions(OBConversion::GENOPTIONS),pConv));
253 if(!ptmol)
254 delete pmol;
255 else
256 if(!pConv->AddChemObject(ptmol))
257 return false; //error during writing
258 }
259 }
260
261 return ok;
262 }
263 ///////////////////////////////////////////////////////////////////////
TopLevelParse(CDXReader & cdxr,OBConversion * pConv,CDXObjectID ContainingGroup)264 bool ChemDrawBinaryXFormat::TopLevelParse
265 (CDXReader& cdxr, OBConversion* pConv, CDXObjectID ContainingGroup)
266 {
267 bool ok = true;
268 CDXTag tag;
269 while((tag = cdxr.ReadNext(objectsOnly)))
270 {
271 if(tag == kCDXObj_Group)
272 {
273 CDXObjectID cid = cdxr.CurrentID();
274 vector<CDXObjectID> v;
275 _groupmap.insert(make_pair(cid,v)); //empty vector as yet
276 TopLevelParse(cdxr, pConv, cid );
277 }
278
279 else if(tag==kCDXObj_Fragment)
280 {
281 OBMol* pmol = new OBMol;
282 //Save all molecules to the end
283 _molmap[cdxr.CurrentID()] = pmol;
284
285 if(ContainingGroup)
286 {
287 // Add the id of this mol to the group's entry in _groupmap
288 GroupMapIterator gmapiter = _groupmap.find(ContainingGroup);
289 if(gmapiter!=_groupmap.end())
290 gmapiter->second.push_back(cdxr.CurrentID());
291 }
292 ok = DoFragment(cdxr, pmol);
293 }
294
295 else if(tag == kCDXObj_ReactionStep && readReactions)
296 {
297 OBMol* pReact = new OBMol;
298 pReact->SetIsReaction();
299 ok = DoReaction(cdxr, pReact);
300 // Output OBReaction and continue
301 if(pReact)
302 if(!pConv->AddChemObject(pReact))
303 return false; //error during writing
304 }
305
306 else if(ok && tag==kCDXObj_Graphic)
307 {
308 while( (tag = cdxr.ReadNext()) )
309 {
310 stringstream& ss = cdxr.data();
311 if(tag == kCDXProp_Arrow_Type)
312 {
313 char type1=0;
314 UINT16 type2=0;
315 if(cdxr.GetLen()==1)
316 ss.get(type1);
317 else
318 READ_INT16(ss,type2);
319 if(type1==kCDXArrowType_Equilibrium || type2==kCDXArrowType_Equilibrium)
320 _graphicmap[type1+type2] = equilArrow; //save in graphicmap
321 }
322 }
323 }
324 }
325 return true;
326 }
327 ///////////////////////////////////////////////////////////////////////
DoReaction(CDXReader & cdxr,OBMol * pReact)328 bool ChemDrawBinaryXFormat::DoReaction(CDXReader& cdxr, OBMol* pReact)
329 {
330 CDXTag tag;
331 CDXObjectID id;
332 OBReactionFacade facade(pReact);
333 while( (tag = cdxr.ReadNext()) )
334 {
335 if(tag == kCDXProp_ReactionStep_Reactants)
336 {
337 stringstream& ss = cdxr.data();
338 for(unsigned i=0;i<cdxr.GetLen()/4;++i)//for each reactant id
339 {
340 READ_INT32(ss,id);
341 vector<OBMol*> molvec = LookupMol(id); //id could be a group with several mols
342 for(unsigned i=0;i<molvec.size();++i)
343 if(strcmp(molvec[i]->GetTitle(),"justplus"))
344 {
345 facade.AddComponent(molvec[i], REACTANT);
346 }
347 }
348 }
349 else if(tag == kCDXProp_ReactionStep_Products)
350 {
351 stringstream& ss = cdxr.data();
352 for(unsigned i=0;i<cdxr.GetLen()/4;++i)//for each product id
353 {
354 READ_INT32(ss,id);
355 vector<OBMol*> molvec = LookupMol(id); //id could be a group with several mols
356 for(unsigned i=0;i<molvec.size();++i)
357 if(strcmp(molvec[i]->GetTitle(),"justplus"))
358 {
359 facade.AddComponent(molvec[i], PRODUCT);
360 _lastProdId = id;
361 }
362 }
363 }
364 else if(tag==kCDXProp_ReactionStep_Arrows)
365 {
366 READ_INT32(cdxr.data(),id);
367 //if(LookupGraphic(id)==equilArrow) // TODO? Store reversibility somehow?
368 // pReact->SetReversible();
369 }
370 }
371 return true;
372 }
373 ///////////////////////////////////////////////////////////////////////
LookupMol(CDXObjectID id)374 vector<OBMol*> ChemDrawBinaryXFormat::LookupMol(CDXObjectID id)
375 {
376 vector<OBMol*> molvec;
377 //Check whether the id is that of a kCDXObj_Group
378 GroupMapIterator gmapiter;
379 gmapiter = _groupmap.find(id);
380 if(gmapiter != _groupmap.end())
381 {
382 for(unsigned i=0;i<gmapiter->second.size();++i)
383 {
384 OBMol* pmmol = LookupInMolMap(gmapiter->second[i]);
385 if(pmmol)
386 molvec.push_back(pmmol);
387 }
388 }
389 else
390 {
391 //id is not a group; it must be a fragment
392 OBMol* pmmol = LookupInMolMap(id);
393 if(pmmol)
394 molvec.push_back(pmmol);
395 }
396 return molvec;
397 }
398
LookupInMolMap(CDXObjectID id)399 OBMol* ChemDrawBinaryXFormat::LookupInMolMap(CDXObjectID id)
400 {
401 std::map<CDXObjectID, OBMol*>::iterator mapiter;
402 mapiter = _molmap.find(id);
403 if(mapiter!=_molmap.end())
404 {
405 //Mark mol as used in a reaction, so that it will not be output independently
406 mapiter->second->SetFlags(mapiter->second->GetFlags() | usedFlag);
407 return mapiter->second;
408 }
409 else
410 {
411 stringstream ss;
412 ss << "Reactant or product mol not found id = " << hex << showbase << id;
413 obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError);
414 return nullptr;
415 }
416 }
417
418 ////////////////////////////////////////////////////////////////////////
LookupGraphic(CDXObjectID id)419 ChemDrawBinaryXFormat::graphicType ChemDrawBinaryXFormat::LookupGraphic(CDXObjectID id)
420 {
421 std::map<CDXObjectID, graphicType>::iterator mapiter;
422 mapiter = _graphicmap.find(id);
423 if(mapiter != _graphicmap.end())
424 return mapiter->second;
425 else
426 return none;
427 }
428
429 ////////////////////////////////////////////////////////////////////////
DoFragment(CDXReader & cdxr,OBMol * pmol)430 bool ChemDrawBinaryXFormat::DoFragment(CDXReader& cdxr, OBMol* pmol)
431 {
432 map<OBBond*, OBStereo::BondDirection> updown;
433 pmol->SetDimension(2);
434 pmol->BeginModify();
435
436 map<CDXObjectID, unsigned> atommap; //key = CDX id; value = OB atom idx
437
438 //The inner workings of DoFragment,since Fragment elements can be nested
439 DoFragmentImpl(cdxr, pmol, atommap, updown);
440
441 // use 2D coordinates + hash/wedge to determine stereochemistry
442 StereoFrom2D(pmol, &updown);
443
444 pmol->EndModify();
445
446 //Expand any aliases after molecule constructed
447 //Need to save aliases in list first and expand later
448 vector<OBAtom*> aliasatoms;
449 for(int idx=1; idx<=pmol->NumAtoms();++idx)
450 {
451 OBAtom* pAtom = pmol->GetAtom(idx);
452 AliasData* ad = dynamic_cast<AliasData*>(pAtom->GetData(AliasDataType));
453 if(ad && !ad->IsExpanded())
454 aliasatoms.push_back(pAtom);
455 }
456 for(vector<OBAtom*>::iterator vit=aliasatoms.begin();
457 vit!=aliasatoms.end(); ++vit)
458 {
459 int idx = (*vit)->GetIdx();
460 AliasData* ad = dynamic_cast<AliasData*>((*vit)->GetData(AliasDataType));
461 if(ad && !ad->IsExpanded())
462 ad->Expand(*pmol, idx); //Make chemically meaningful, if possible.
463 }
464 return true;
465 }
466
DoFragmentImpl(CDXReader & cdxr,OBMol * pmol,map<CDXObjectID,unsigned> & atommap,map<OBBond *,OBStereo::BondDirection> & updown)467 bool ChemDrawBinaryXFormat::DoFragmentImpl(CDXReader& cdxr, OBMol* pmol,
468 map<CDXObjectID, unsigned>& atommap, map<OBBond*, OBStereo::BondDirection>& updown)
469 {
470 CDXTag tag;
471 std::vector<OBAtom*> handleImplicitCarbons;
472 while((tag = cdxr.ReadNext(objectsOnly)))
473 {
474 if(tag==kCDXObj_Node)
475 {
476 unsigned nodeID = cdxr.CurrentID();
477 bool isAlias=false, hasElement=false;
478 bool hasNumHs = false;
479 UINT16 atnum=-1, spin=0, numHs=0;
480 int x, y, charge=0, iso=0;
481 string aliastext;
482
483 //Read all node properties
484 while( (tag = cdxr.ReadNext()) )
485 {
486 switch(tag)
487 {
488 case kCDXProp_Node_Type:
489 UINT16 type;
490 READ_INT16(cdxr.data(), type);
491 if(type==4 || type==5) //Nickname or fragment
492 isAlias = true;
493 break;
494 case kCDXProp_Node_Element:
495 READ_INT16(cdxr.data(), atnum);
496 hasElement = true;
497 break;
498 case kCDXProp_2DPosition:
499 {
500 stringstream& ss = cdxr.data();
501 READ_INT32(ss, y); //yes, this way round
502 READ_INT32(ss, x);
503 }
504 break;
505 case kCDXProp_Atom_Charge:
506 if(cdxr.GetLen()==1)
507 charge = cdxr.data().get();
508 else
509 READ_INT32(cdxr.data(), charge);
510 break;
511 case kCDXProp_Atom_Radical:
512 READ_INT16(cdxr.data(),spin);
513 break;
514 case kCDXProp_Atom_Isotope:
515 READ_INT16(cdxr.data(),iso);
516 break;
517 case kCDXProp_Atom_NumHydrogens:
518 READ_INT16(cdxr.data(), numHs);
519 hasNumHs = true;
520 break;
521 case kCDXProp_Atom_CIPStereochemistry:
522 break;
523 case kCDXObj_Text:
524 aliastext = DoText(cdxr);
525 if(aliastext=="+")
526 {
527 //This node is not an atom, but dangerous to delete
528 pmol->SetTitle("justplus");
529 }
530 break;
531 case kCDXObj_Fragment:
532 /* ignore fragment contained in node
533 if(isAlias)
534 {
535 unsigned Idxbefore = pmol->NumAtoms();
536 if(DoFragmentImpl(cdxr, pmol, atommap, updown))
537 return false;
538 }
539 */
540 //ignore the contents of this node
541 cdxr.IgnoreObject();
542 //cdxr.ReadNext(objectsOnly, cdxr.GetDepth()-1);
543 break;
544 default:
545 if(tag & kCDXTag_Object) //unhandled object
546 while(cdxr.ReadNext());
547 }
548 }
549 //All properties of Node have now been read
550 OBAtom* pAtom = pmol->NewAtom();
551 pAtom->SetVector(x*1.0e-6, -y*1.0e-6, 0); //inv y axis
552 atommap[nodeID] = pmol->NumAtoms();
553 if(isAlias || (!aliastext.empty() && atnum==0xffff))
554 {
555 //Treat text as an alias
556 pAtom->SetAtomicNum(0);
557 AliasData* ad = new AliasData();
558 ad->SetAlias(aliastext);
559 ad->SetOrigin(fileformatInput);
560 pAtom->SetData(ad);
561 }
562 else
563 {
564 if(atnum==0xffff)
565 atnum = 6; //atoms are C by default
566 pAtom->SetAtomicNum(atnum);
567 if (hasNumHs)
568 pAtom->SetImplicitHCount(numHs);
569 else if (atnum==6)
570 handleImplicitCarbons.push_back(pAtom);
571 pAtom->SetFormalCharge(charge);
572 pAtom->SetIsotope(iso);
573 pAtom->SetSpinMultiplicity(spin);
574 }
575 }
576
577 else if(tag==kCDXObj_Bond)
578 {
579 CDXObjectID bgnID, endID;
580 int order=1, bgnIdx, endIdx ;
581 UINT16 stereo=0;
582
583 while( (tag = cdxr.ReadNext()) )
584 {
585 switch(tag)
586 {
587 case kCDXProp_Bond_Begin:
588 READ_INT32(cdxr.data(), bgnID);
589 bgnIdx = atommap[bgnID];
590 break;
591 case kCDXProp_Bond_End:
592 READ_INT32(cdxr.data(), endID);
593 endIdx = atommap[endID];
594 break;
595 case kCDXProp_Bond_Order:
596 READ_INT16(cdxr.data(), order);
597 switch (order)
598 {
599 case 0xFFFF: // undefined, keep 1 for now
600 order = 1;
601 case 0x0001:
602 case 0x0002:
603 break;
604 case 0x0004:
605 order = 3;
606 break;
607 case 0x0080: // aromatic bond
608 order = 5;
609 break;
610 default: // other cases are just not supported, keep 1
611 order = 1;
612 break;
613 }
614 break;
615 case kCDXProp_Bond_Display:
616 READ_INT16(cdxr.data(), stereo);
617 break;
618 }
619 }
620
621 if(!order || !bgnIdx || !endIdx)
622 {
623 obErrorLog.ThrowError(__FUNCTION__,"Incorrect bond", obError);
624 return false;
625 }
626 if(stereo==4 || stereo==7 || stereo==10 || stereo==12)
627 swap(bgnIdx, endIdx);
628 pmol->AddBond(bgnIdx, endIdx, order);
629 if(stereo)
630 {
631 OBBond* pBond = pmol->GetBond(pmol->NumBonds()-1);
632 if(stereo==3 || stereo==4)
633 pBond->SetHash();
634 else if(stereo==6 || stereo==7)
635 pBond->SetWedge();
636 }
637 }
638 }
639 // Handle 'implicit carbons' by adjusting their valence with
640 // implicit hydrognes
641 for(vector<OBAtom*>::iterator vit=handleImplicitCarbons.begin();
642 vit!=handleImplicitCarbons.end(); ++vit)
643 OBAtomAssignTypicalImplicitHydrogens(*vit);
644
645 return true;
646 }
647
DoText(CDXReader & cdxr)648 string ChemDrawBinaryXFormat::DoText(CDXReader& cdxr)
649 {
650 CDXTag tag;
651 string text;
652 while( (tag=cdxr.ReadNext()) )
653 {
654 stringstream& ss = cdxr.data();
655 switch(tag)
656 {
657 case kCDXProp_Text:
658 UINT16 nStyleRuns;
659 READ_INT16(ss,nStyleRuns);
660 ss.ignore(nStyleRuns*10);
661 ss >> text;
662 default:
663 if(tag & kCDXTag_Object) //unhandled object
664 while(cdxr.ReadNext());
665 }
666 }
667 return text;
668 }
669
670 //****************************************************************
ReadNext(bool objectsOnly,int targetDepth)671 CDXTag CDXReader::ReadNext(bool objectsOnly, int targetDepth)
672 {
673 //ostringstream treestream;
674 CDXTag tag;
675 CDXObjectID id;
676
677 while(ifs)
678 {
679 READ_INT16(ifs, tag);
680 if(tag==0)
681 {
682 if(depth==0)
683 {
684 ifs.setstate(ios::eofbit); //ignore everything after end of document
685 return 0; //end of document
686 }
687 --depth;
688 _tempback = ids.back(); //needed for WriteTree
689 ids.pop_back();
690 if(targetDepth<0 || depth == targetDepth)
691 return 0; //end of object
692 }
693 else if(tag & kCDXTag_Object)
694 {
695 READ_INT32(ifs, id);
696 ids.push_back(id);
697 ++depth;
698 if(targetDepth<0 || depth-1 == targetDepth)
699 return tag; //object
700 }
701 else
702 {
703 //property
704 READ_INT16(ifs, _len);
705
706 if(objectsOnly)
707 ifs.ignore(_len);
708 else
709 {
710 //copy property data to buffer
711 char* p = new char[_len+1];
712 ifs.read(p, _len);
713 _buf.assign(p, _len);
714 delete[] p;
715 return tag; //property
716 }
717 }
718 }
719 return 0;
720 }
721 /////////////////////////////////////////////////////////////////////
722
data()723 stringstream& CDXReader::data()
724 {
725 _ss.clear();
726 _ss.str(_buf);
727 return _ss;
728 }
729 /////////////////////////////////////////////////////////////////////
730
CDXReader(std::istream & is)731 CDXReader::CDXReader(std::istream& is) : ifs(is), depth(0)
732 {
733 //ReadHeader
734 char buffer[kCDX_HeaderStringLen+1];
735 ifs.read(buffer,kCDX_HeaderStringLen);
736 buffer[kCDX_HeaderStringLen] = '\0';
737 if(strncmp(buffer, kCDX_HeaderString, kCDX_HeaderStringLen) == 0)
738 ifs.ignore(kCDX_HeaderLength - kCDX_HeaderStringLen); // Discard rest of header.
739 else
740 {
741 obErrorLog.ThrowError(__FUNCTION__,"Invalid file, no ChemDraw Header",obError);
742 ifs.setstate(ios::eofbit);
743 throw;
744 }
745 }
746 //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
747 //Routines to display the structure of a cdx binary file
748
WriteTree(const string & filename,unsigned wtoptions)749 OBText* CDXReader::WriteTree(const string& filename, unsigned wtoptions)
750 {
751 const char indentchar = '\t';
752 std::map<CDXTag, std::string> enummap;
753 ParseEnums(enummap, filename);
754
755 stringstream tss;
756 tss << hex << showbase;
757
758 while(*this)
759 {
760 CDXTag tag = ReadNext();
761 if(ifs.eof())
762 return new OBText(tss.str()); //normal exit
763 if(tag==0 && !(wtoptions &1))
764 {
765 //Object end
766 tss << string(depth,indentchar) << "ObjectEnd " << _tempback << endl;
767 }
768 else if(tag & kCDXTag_Object)
769 {
770 //Object
771 tss<<string(depth-1,indentchar) << "Object " << tag
772 << TagName(enummap,tag) << " id=" << ids.back() << endl;
773 }
774 else
775 {
776 //Property
777 if(!(wtoptions &1))
778 {
779 stringstream ss;
780 ss << _len;
781 tss<<string(depth,indentchar) << "Property "<< tag << TagName(enummap,tag)
782 << " [" << ss.str() << " bytes] ";
783 for(unsigned i=0;i<_len;++i)
784 {
785 ss.str("");
786 ss.fill('0');
787 ss.width(8);
788 ss << hex << static_cast<unsigned>(_buf[i]) << dec;
789 tss << ss.str()[6] << ss.str()[7] << ' ';
790 }
791
792 if(tag==0x700 || tag==kCDXProp_CreationProgram || tag==kCDXProp_CreationDate
793 || tag==kCDXProp_Name)
794 {
795 stringstream ss(_buf);
796 UINT16 nStyleRuns;
797 READ_INT16(ss, nStyleRuns);
798 tss << '\"';
799 for(unsigned i=2+nStyleRuns*10; i<_len; ++i)
800 tss << _buf[i];
801 tss << '\"';
802 }
803 tss << endl;
804 }
805 }
806 }
807 return nullptr; //error exit
808 }
809
810 ///////////////////////////////////////////////////////////////////////
ParseEnums(map<CDXTag,string> & enummap,const string & filename)811 bool CDXReader::ParseEnums(map<CDXTag, string>& enummap, const string& filename)
812 {
813 ifstream ihs;
814 if(OpenDatafile(ihs, filename).empty())
815 {
816 obErrorLog.ThrowError(__FUNCTION__,
817 filename + " needs to be in the *data* directory when displaying the tree.\n" , obError);
818 return false;
819 }
820 ignore(ihs, "enum CDXDatumID");
821 string ln;
822 vector<string> vec;
823 stringstream ss;
824 CDXTag tag;
825 while(ihs)
826 {
827 getline(ihs, ln);
828 tokenize(vec, ln, " \t,{}");
829 if(vec.size()==0 || vec[0]=="//")
830 continue; //blank and comment lines
831 if(vec[0]==";") //line is }; end of enum
832 return true;
833 if(vec[0][0]!='k') //only collect enums starting with kCDX
834 continue;
835 int tagpos = (vec[1]=="=" && vec.size()>4) ? 4 : 2;
836 ss.str(vec[tagpos]);
837 ss.clear();
838 ss >> hex >> tag;
839 if(ss)
840 {
841 if(tag==0x0400 && vec[0]=="kCDXUser_TemporaryEnd")//special case
842 continue;
843 enummap[tag] = vec[0];
844 }
845 }
846 return false;
847 }
848 /////////////////////////////////////////////////////////////////////////
849
TagName(map<CDXTag,string> & enummap,CDXTag tag)850 string CDXReader::TagName(map<CDXTag, string>& enummap, CDXTag tag)
851 {
852 string tagname;
853 if(!enummap.empty())
854 {
855 map<CDXTag, std::string>::iterator iter = enummap.find(tag);
856 if(iter!=enummap.end())
857 {
858 tagname=iter->second;
859 //Remove prefix, e.g. kCDXProp_
860 string::size_type pos = tagname.find('_');
861 if(pos!=string::npos)
862 {
863 tagname.erase(0,pos);
864 tagname[0] = ' ';
865 }
866 }
867 }
868 return tagname;
869 }
870
871 } //namespace
872