1 /* Copyright (C) 1997-2007 Egon Willighagen <egonw@users.sf.net> 2 * 3 * Contact: cdk-devel@lists.sourceforge.net 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public License 7 * as published by the Free Software Foundation; either version 2.1 8 * of the License, or (at your option) any later version. 9 * All we ask is that proper credit is given for our work, which includes 10 * - but is not limited to - adding the above copyright notice to the beginning 11 * of your source code files, and to any copyright notice that you may distribute 12 * with programs based on this work. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 22 * 23 */ 24 package org.openscience.cdk.io.cml; 25 26 import java.util.ArrayList; 27 import java.util.HashMap; 28 import java.util.Hashtable; 29 import java.util.Iterator; 30 import java.util.List; 31 import java.util.Map; 32 import java.util.StringTokenizer; 33 34 import javax.vecmath.Point2d; 35 import javax.vecmath.Point3d; 36 import javax.vecmath.Vector3d; 37 38 import org.openscience.cdk.CDKConstants; 39 import org.openscience.cdk.dict.DictRef; 40 import org.openscience.cdk.geometry.CrystalGeometryTools; 41 import org.openscience.cdk.interfaces.IAtom; 42 import org.openscience.cdk.interfaces.IAtomContainer; 43 import org.openscience.cdk.interfaces.IAtomContainerSet; 44 import org.openscience.cdk.interfaces.IBond; 45 import org.openscience.cdk.interfaces.IBond.Order; 46 import org.openscience.cdk.interfaces.IChemFile; 47 import org.openscience.cdk.interfaces.IChemModel; 48 import org.openscience.cdk.interfaces.IChemSequence; 49 import org.openscience.cdk.interfaces.ICrystal; 50 import org.openscience.cdk.interfaces.IMonomer; 51 import org.openscience.cdk.interfaces.IPseudoAtom; 52 import org.openscience.cdk.interfaces.IReaction; 53 import org.openscience.cdk.interfaces.IReactionSet; 54 import org.openscience.cdk.interfaces.ISingleElectron; 55 import org.openscience.cdk.interfaces.IStrand; 56 import org.openscience.cdk.interfaces.ITetrahedralChirality.Stereo; 57 import org.openscience.cdk.stereo.TetrahedralChirality; 58 import org.openscience.cdk.tools.ILoggingTool; 59 import org.openscience.cdk.tools.LoggingToolFactory; 60 import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; 61 import org.openscience.cdk.tools.manipulator.BondManipulator; 62 import org.openscience.cdk.tools.periodictable.PeriodicTable; 63 import org.xml.sax.Attributes; 64 65 /** 66 * Core CML 1.x and 2.x elements are parsed by this class (see {@cdk.cite WIL01}). 67 * 68 * <p>Please file a bug report if this parser fails to parse 69 * a certain element or attribute value in a valid CML document. 70 * 71 * @cdk.module io 72 * @cdk.githash 73 * 74 * @author Egon Willighagen <egonw@sci.kun.nl> 75 **/ 76 public class CMLCoreModule implements ICMLModule { 77 78 protected ILoggingTool logger; 79 protected final String SYSTEMID = "CML-1999-05-15"; 80 // protected IChemicalDocumentObject cdo; 81 82 // data model to store things into 83 protected IChemFile currentChemFile; 84 85 protected IAtomContainer currentMolecule; 86 protected IAtomContainerSet currentMoleculeSet; 87 protected IChemModel currentChemModel; 88 protected IChemSequence currentChemSequence; 89 protected IReactionSet currentReactionSet; 90 protected IReaction currentReaction; 91 protected IAtom currentAtom; 92 protected IBond currentBond; 93 protected IStrand currentStrand; 94 protected IMonomer currentMonomer; 95 protected Map<String, IAtom> atomEnumeration; 96 protected List<String> moleculeCustomProperty; 97 98 // helper fields 99 protected int formulaCounter; 100 protected int atomCounter; 101 protected List<String> elsym; 102 protected List<String> eltitles; 103 protected List<String> elid; 104 protected List<String> formula; 105 protected List<String> formalCharges; 106 protected List<String> partialCharges; 107 protected List<String> isotope; 108 protected List<String> atomicNumbers; 109 protected List<String> exactMasses; 110 protected List<String> x3; 111 protected List<String> y3; 112 protected List<String> z3; 113 protected List<String> x2; 114 protected List<String> y2; 115 protected List<String> xfract; 116 protected List<String> yfract; 117 protected List<String> zfract; 118 protected List<String> hCounts; 119 protected List<String> atomParities; 120 protected List<String> parityARef1; 121 protected List<String> parityARef2; 122 protected List<String> parityARef3; 123 protected List<String> parityARef4; 124 protected List<String> atomDictRefs; 125 protected List<String> atomAromaticities; 126 protected List<String> spinMultiplicities; 127 protected List<String> occupancies; 128 protected Map<Integer, List<String>> atomCustomProperty; 129 protected boolean parityAtomsGiven; 130 protected boolean parityGiven; 131 132 protected int bondCounter; 133 protected List<String> bondid; 134 protected List<String> bondARef1; 135 protected List<String> bondARef2; 136 protected List<String> order; 137 protected List<String> bondStereo; 138 protected List<String> bondDictRefs; 139 protected List<String> bondElid; 140 protected List<Boolean> bondAromaticity; 141 protected Map<String, Map<String, String>> bondCustomProperty; 142 protected boolean stereoGiven; 143 protected String inchi; 144 protected int curRef; 145 protected int CurrentElement; 146 protected String BUILTIN; 147 protected String DICTREF; 148 protected String elementTitle; 149 protected String currentChars; 150 151 protected double[] unitcellparams; 152 protected int crystalScalar; 153 154 // private Vector3d aAxis; 155 // private Vector3d bAxis; 156 // private Vector3d cAxis; 157 boolean cartesianAxesSet = false; 158 CMLCoreModule(IChemFile chemFile)159 public CMLCoreModule(IChemFile chemFile) { 160 logger = LoggingToolFactory.createLoggingTool(CMLCoreModule.class); 161 this.currentChemFile = chemFile; 162 } 163 CMLCoreModule(ICMLModule conv)164 public CMLCoreModule(ICMLModule conv) { 165 logger = LoggingToolFactory.createLoggingTool(CMLCoreModule.class); 166 inherit(conv); 167 } 168 169 @Override inherit(ICMLModule convention)170 public void inherit(ICMLModule convention) { 171 if (convention instanceof CMLCoreModule) { 172 CMLCoreModule conv = (CMLCoreModule) convention; 173 174 // copy the data model 175 this.currentChemFile = conv.currentChemFile; 176 this.currentMolecule = conv.currentMolecule; 177 this.currentMoleculeSet = conv.currentMoleculeSet; 178 this.currentChemModel = conv.currentChemModel; 179 this.currentChemSequence = conv.currentChemSequence; 180 this.currentReactionSet = conv.currentReactionSet; 181 this.currentReaction = conv.currentReaction; 182 this.currentAtom = conv.currentAtom; 183 this.currentStrand = conv.currentStrand; 184 this.currentMonomer = conv.currentMonomer; 185 this.atomEnumeration = conv.atomEnumeration; 186 this.moleculeCustomProperty = conv.moleculeCustomProperty; 187 188 // copy the intermediate fields 189 this.logger = conv.logger; 190 this.BUILTIN = conv.BUILTIN; 191 this.atomCounter = conv.atomCounter; 192 this.formulaCounter = conv.formulaCounter; 193 this.elsym = conv.elsym; 194 this.eltitles = conv.eltitles; 195 this.elid = conv.elid; 196 this.formalCharges = conv.formalCharges; 197 this.partialCharges = conv.partialCharges; 198 this.isotope = conv.isotope; 199 this.atomicNumbers = conv.atomicNumbers; 200 this.exactMasses = conv.exactMasses; 201 this.x3 = conv.x3; 202 this.y3 = conv.y3; 203 this.z3 = conv.z3; 204 this.x2 = conv.x2; 205 this.y2 = conv.y2; 206 this.xfract = conv.xfract; 207 this.yfract = conv.yfract; 208 this.zfract = conv.zfract; 209 this.hCounts = conv.hCounts; 210 this.atomParities = conv.atomParities; 211 this.parityARef1 = conv.parityARef1; 212 this.parityARef2 = conv.parityARef2; 213 this.parityARef3 = conv.parityARef3; 214 this.parityARef4 = conv.parityARef4; 215 this.atomDictRefs = conv.atomDictRefs; 216 this.atomAromaticities = conv.atomAromaticities; 217 this.spinMultiplicities = conv.spinMultiplicities; 218 this.occupancies = conv.occupancies; 219 this.bondCounter = conv.bondCounter; 220 this.bondid = conv.bondid; 221 this.bondARef1 = conv.bondARef1; 222 this.bondARef2 = conv.bondARef2; 223 this.order = conv.order; 224 this.bondStereo = conv.bondStereo; 225 this.bondCustomProperty = conv.bondCustomProperty; 226 this.atomCustomProperty = conv.atomCustomProperty; 227 this.bondDictRefs = conv.bondDictRefs; 228 this.bondAromaticity = conv.bondAromaticity; 229 this.curRef = conv.curRef; 230 this.unitcellparams = conv.unitcellparams; 231 this.inchi = conv.inchi; 232 } else { 233 logger.warn("Cannot inherit information from module: ", convention.getClass().getName()); 234 } 235 } 236 237 @Override returnChemFile()238 public IChemFile returnChemFile() { 239 return currentChemFile; 240 } 241 242 /** 243 * Clean all data about parsed data. 244 */ newMolecule()245 protected void newMolecule() { 246 newMoleculeData(); 247 newAtomData(); 248 newBondData(); 249 newCrystalData(); 250 newFormulaData(); 251 } 252 253 /** 254 * Clean all data about the molecule itself. 255 */ newMoleculeData()256 protected void newMoleculeData() { 257 this.inchi = null; 258 } 259 260 /** 261 * Clean all data about read formulas. 262 */ newFormulaData()263 protected void newFormulaData() { 264 formulaCounter = 0; 265 formula = new ArrayList<String>(); 266 } 267 268 /** 269 * Clean all data about read atoms. 270 */ newAtomData()271 protected void newAtomData() { 272 atomCounter = 0; 273 elsym = new ArrayList<String>(); 274 elid = new ArrayList<String>(); 275 eltitles = new ArrayList<String>(); 276 formalCharges = new ArrayList<String>(); 277 partialCharges = new ArrayList<String>(); 278 isotope = new ArrayList<String>(); 279 atomicNumbers = new ArrayList<String>(); 280 exactMasses = new ArrayList<String>(); 281 x3 = new ArrayList<String>(); 282 y3 = new ArrayList<String>(); 283 z3 = new ArrayList<String>(); 284 x2 = new ArrayList<String>(); 285 y2 = new ArrayList<String>(); 286 xfract = new ArrayList<String>(); 287 yfract = new ArrayList<String>(); 288 zfract = new ArrayList<String>(); 289 hCounts = new ArrayList<String>(); 290 atomParities = new ArrayList<String>(); 291 parityARef1 = new ArrayList<String>(); 292 parityARef2 = new ArrayList<String>(); 293 parityARef3 = new ArrayList<String>(); 294 parityARef4 = new ArrayList<String>(); 295 atomAromaticities = new ArrayList<String>(); 296 atomDictRefs = new ArrayList<String>(); 297 spinMultiplicities = new ArrayList<String>(); 298 occupancies = new ArrayList<String>(); 299 atomCustomProperty = new HashMap<Integer, List<String>>(); 300 } 301 302 /** 303 * Clean all data about read bonds. 304 */ newBondData()305 protected void newBondData() { 306 bondCounter = 0; 307 bondid = new ArrayList<String>(); 308 bondARef1 = new ArrayList<String>(); 309 bondARef2 = new ArrayList<String>(); 310 order = new ArrayList<String>(); 311 bondStereo = new ArrayList<String>(); 312 bondCustomProperty = new Hashtable<String, Map<String, String>>(); 313 bondDictRefs = new ArrayList<String>(); 314 bondElid = new ArrayList<String>(); 315 bondAromaticity = new ArrayList<Boolean>(); 316 } 317 318 /** 319 * Clean all data about read bonds. 320 */ newCrystalData()321 protected void newCrystalData() { 322 unitcellparams = new double[6]; 323 cartesianAxesSet = false; 324 crystalScalar = 0; 325 // aAxis = new Vector3d(); 326 // bAxis = new Vector3d(); 327 // cAxis = new Vector3d(); 328 } 329 330 @Override startDocument()331 public void startDocument() { 332 logger.info("Start XML Doc"); 333 // cdo.startDocument(); 334 currentChemSequence = currentChemFile.getBuilder().newInstance(IChemSequence.class); 335 currentChemModel = currentChemFile.getBuilder().newInstance(IChemModel.class); 336 currentMoleculeSet = currentChemFile.getBuilder().newInstance(IAtomContainerSet.class); 337 currentMolecule = currentChemFile.getBuilder().newInstance(IAtomContainer.class); 338 atomEnumeration = new HashMap<String, IAtom>(); 339 moleculeCustomProperty = new ArrayList<String>(); 340 341 newMolecule(); 342 BUILTIN = ""; 343 curRef = 0; 344 } 345 346 @Override endDocument()347 public void endDocument() { 348 // cdo.endDocument(); 349 if (currentReactionSet != null && currentReactionSet.getReactionCount() == 0 && currentReaction != null) { 350 logger.debug("Adding reaction to ReactionSet"); 351 currentReactionSet.addReaction(currentReaction); 352 } 353 if (currentReactionSet != null && currentChemModel.getReactionSet() == null) { 354 logger.debug("Adding SOR to ChemModel"); 355 currentChemModel.setReactionSet(currentReactionSet); 356 } 357 if (currentMoleculeSet != null && currentMoleculeSet.getAtomContainerCount() != 0) { 358 logger.debug("Adding reaction to MoleculeSet"); 359 currentChemModel.setMoleculeSet(currentMoleculeSet); 360 } 361 if (currentChemSequence.getChemModelCount() == 0) { 362 logger.debug("Adding ChemModel to ChemSequence"); 363 currentChemSequence.addChemModel(currentChemModel); 364 } 365 if (currentChemFile.getChemSequenceCount() == 0) { 366 // assume there is one non-animation ChemSequence 367 // addChemSequence(currentChemSequence); 368 currentChemFile.addChemSequence(currentChemSequence); 369 } 370 371 logger.info("End XML Doc"); 372 } 373 374 @Override startElement(CMLStack xpath, String uri, String local, String raw, Attributes atts)375 public void startElement(CMLStack xpath, String uri, String local, String raw, Attributes atts) { 376 String name = local; 377 logger.debug("StartElement"); 378 currentChars = ""; 379 380 BUILTIN = ""; 381 DICTREF = ""; 382 383 for (int i = 0; i < atts.getLength(); i++) { 384 String qname = atts.getQName(i); 385 if (qname.equals("builtin")) { 386 BUILTIN = atts.getValue(i); 387 logger.debug(name, "->BUILTIN found: ", atts.getValue(i)); 388 } else if (qname.equals("dictRef")) { 389 DICTREF = atts.getValue(i); 390 logger.debug(name, "->DICTREF found: ", atts.getValue(i)); 391 } else if (qname.equals("title")) { 392 elementTitle = atts.getValue(i); 393 logger.debug(name, "->TITLE found: ", atts.getValue(i)); 394 } else { 395 logger.debug("Qname: ", qname); 396 } 397 } 398 399 if ("atom".equals(name)) { 400 atomCounter++; 401 for (int i = 0; i < atts.getLength(); i++) { 402 403 String att = atts.getQName(i); 404 String value = atts.getValue(i); 405 406 if (att.equals("id")) { // this is supported in CML 1.x 407 elid.add(value); 408 } // this is supported in CML 2.0 409 else if (att.equals("elementType")) { 410 elsym.add(value); 411 } // this is supported in CML 2.0 412 else if (att.equals("title")) { 413 eltitles.add(value); 414 } // this is supported in CML 2.0 415 else if (att.equals("x2")) { 416 x2.add(value); 417 } // this is supported in CML 2.0 418 else if (att.equals("xy2")) { 419 StringTokenizer tokenizer = new StringTokenizer(value); 420 x2.add(tokenizer.nextToken()); 421 y2.add(tokenizer.nextToken()); 422 } // this is supported in CML 2.0 423 else if (att.equals("xyzFract")) { 424 StringTokenizer tokenizer = new StringTokenizer(value); 425 xfract.add(tokenizer.nextToken()); 426 yfract.add(tokenizer.nextToken()); 427 zfract.add(tokenizer.nextToken()); 428 } // this is supported in CML 2.0 429 else if (att.equals("xyz3")) { 430 StringTokenizer tokenizer = new StringTokenizer(value); 431 x3.add(tokenizer.nextToken()); 432 y3.add(tokenizer.nextToken()); 433 z3.add(tokenizer.nextToken()); 434 } // this is supported in CML 2.0 435 else if (att.equals("y2")) { 436 y2.add(value); 437 } // this is supported in CML 2.0 438 else if (att.equals("x3")) { 439 x3.add(value); 440 } // this is supported in CML 2.0 441 else if (att.equals("y3")) { 442 y3.add(value); 443 } // this is supported in CML 2.0 444 else if (att.equals("z3")) { 445 z3.add(value); 446 } // this is supported in CML 2.0 447 else if (att.equals("xFract")) { 448 xfract.add(value); 449 } // this is supported in CML 2.0 450 else if (att.equals("yFract")) { 451 yfract.add(value); 452 } // this is supported in CML 2.0 453 else if (att.equals("zFract")) { 454 zfract.add(value); 455 } // this is supported in CML 2.0 456 else if (att.equals("formalCharge")) { 457 formalCharges.add(value); 458 } // this is supported in CML 2.0 459 else if (att.equals("hydrogenCount")) { 460 hCounts.add(value); 461 } else if (att.equals("isotopeNumber")) { 462 isotope.add(value); 463 } else if (att.equals("dictRef")) { 464 logger.debug("occupancy: " + value); 465 atomDictRefs.add(value); 466 } else if (att.equals("spinMultiplicity")) { 467 spinMultiplicities.add(value); 468 } else if (att.equals("occupancy")) { 469 occupancies.add(value); 470 } 471 472 else { 473 logger.warn("Unparsed attribute: " + att); 474 } 475 476 parityAtomsGiven = false; 477 parityGiven = false; 478 } 479 } else if ("atomArray".equals(name) && !xpath.endsWith("formula", "atomArray")) { 480 boolean atomsCounted = false; 481 for (int i = 0; i < atts.getLength(); i++) { 482 String att = atts.getQName(i); 483 int count = 0; 484 if (att.equals("atomID")) { 485 count = addArrayElementsTo(elid, atts.getValue(i)); 486 } else if (att.equals("elementType")) { 487 count = addArrayElementsTo(elsym, atts.getValue(i)); 488 } else if (att.equals("x2")) { 489 count = addArrayElementsTo(x2, atts.getValue(i)); 490 } else if (att.equals("y2")) { 491 count = addArrayElementsTo(y2, atts.getValue(i)); 492 } else if (att.equals("x3")) { 493 count = addArrayElementsTo(x3, atts.getValue(i)); 494 } else if (att.equals("y3")) { 495 count = addArrayElementsTo(y3, atts.getValue(i)); 496 } else if (att.equals("z3")) { 497 count = addArrayElementsTo(z3, atts.getValue(i)); 498 } else if (att.equals("xFract")) { 499 count = addArrayElementsTo(xfract, atts.getValue(i)); 500 } else if (att.equals("yFract")) { 501 count = addArrayElementsTo(yfract, atts.getValue(i)); 502 } else if (att.equals("zFract")) { 503 count = addArrayElementsTo(zfract, atts.getValue(i)); 504 } else { 505 logger.warn("Unparsed attribute: " + att); 506 } 507 if (!atomsCounted) { 508 atomCounter += count; 509 atomsCounted = true; 510 } 511 } 512 } else if ("atomParity".equals(name)) { 513 for (int i = 0; i < atts.getLength(); i++) { 514 String att = atts.getQName(i); 515 if (att.equals("atomRefs4") && !parityAtomsGiven) { 516 //Expect exactly four references 517 try { 518 StringTokenizer st = new StringTokenizer(atts.getValue(i)); 519 parityARef1.add((String) st.nextElement()); 520 parityARef2.add((String) st.nextElement()); 521 parityARef3.add((String) st.nextElement()); 522 parityARef4.add((String) st.nextElement()); 523 parityAtomsGiven = true; 524 } catch (Exception e) { 525 logger.error("Error in CML file: ", e.getMessage()); 526 logger.debug(e); 527 } 528 } 529 } 530 } else if ("bond".equals(name)) { 531 bondCounter++; 532 for (int i = 0; i < atts.getLength(); i++) { 533 String att = atts.getQName(i); 534 logger.debug("B2 ", att, "=", atts.getValue(i)); 535 536 if (att.equals("id")) { 537 bondid.add(atts.getValue(i)); 538 logger.debug("B3 ", bondid); 539 } else if (att.equals("atomRefs") || // this is CML 1.x support 540 att.equals("atomRefs2")) { // this is CML 2.0 support 541 542 // expect exactly two references 543 try { 544 StringTokenizer st = new StringTokenizer(atts.getValue(i)); 545 bondARef1.add((String) st.nextElement()); 546 bondARef2.add((String) st.nextElement()); 547 } catch (Exception e) { 548 logger.error("Error in CML file: ", e.getMessage()); 549 logger.debug(e); 550 } 551 } else if (att.equals("order")) { // this is CML 2.0 support 552 order.add(atts.getValue(i).trim()); 553 } else if (att.equals("dictRef")) { 554 bondDictRefs.add(atts.getValue(i).trim()); 555 } 556 } 557 558 stereoGiven = false; 559 curRef = 0; 560 } else if ("bondArray".equals(name)) { 561 boolean bondsCounted = false; 562 for (int i = 0; i < atts.getLength(); i++) { 563 String att = atts.getQName(i); 564 int count = 0; 565 if (att.equals("bondID")) { 566 count = addArrayElementsTo(bondid, atts.getValue(i)); 567 } else if (att.equals("atomRefs1")) { 568 count = addArrayElementsTo(bondARef1, atts.getValue(i)); 569 } else if (att.equals("atomRefs2")) { 570 count = addArrayElementsTo(bondARef2, atts.getValue(i)); 571 } else if (att.equals("atomRef1")) { 572 count = addArrayElementsTo(bondARef1, atts.getValue(i)); 573 } else if (att.equals("atomRef2")) { 574 count = addArrayElementsTo(bondARef2, atts.getValue(i)); 575 } else if (att.equals("order")) { 576 count = addArrayElementsTo(order, atts.getValue(i)); 577 } else { 578 logger.warn("Unparsed attribute: " + att); 579 } 580 if (!bondsCounted) { 581 bondCounter += count; 582 bondsCounted = true; 583 } 584 } 585 curRef = 0; 586 } else if ("bondStereo".equals(name)) { 587 for (int i = 0; i < atts.getLength(); i++) { 588 if (atts.getQName(i).equals("dictRef")) { 589 String value = atts.getValue(i); 590 if (value.startsWith("cml:") && value.length() > 4) { 591 bondStereo.add(value.substring(4)); 592 stereoGiven = true; 593 } 594 } 595 } 596 } else if ("bondType".equals(name)) { 597 for (int i = 0; i < atts.getLength(); i++) { 598 if (atts.getQName(i).equals("dictRef")) { 599 if (atts.getValue(i).equals("cdk:aromaticBond")) bondAromaticity.add(Boolean.TRUE); 600 } 601 } 602 } else if ("molecule".equals(name)) { 603 newMolecule(); 604 BUILTIN = ""; 605 // cdo.startObject("Molecule"); 606 if (currentChemModel == null) 607 currentChemModel = currentChemFile.getBuilder().newInstance(IChemModel.class); 608 if (currentMoleculeSet == null) 609 currentMoleculeSet = currentChemFile.getBuilder().newInstance(IAtomContainerSet.class); 610 currentMolecule = currentChemFile.getBuilder().newInstance(IAtomContainer.class); 611 for (int i = 0; i < atts.getLength(); i++) { 612 if (atts.getQName(i).equals("id")) { 613 // cdo.setObjectProperty("Molecule", "id", atts.getValue(i)); 614 currentMolecule.setID(atts.getValue(i)); 615 } else if (atts.getQName(i).equals("dictRef")) { 616 // cdo.setObjectProperty("Molecule", "dictRef", atts.getValue(i)); 617 currentMolecule.setProperty(new DictRef(DICTREF, atts.getValue(i)), atts.getValue(i)); 618 } 619 } 620 } else if ("crystal".equals(name)) { 621 newCrystalData(); 622 // cdo.startObject("Crystal"); 623 currentMolecule = currentChemFile.getBuilder().newInstance(ICrystal.class, currentMolecule); 624 for (int i = 0; i < atts.getLength(); i++) { 625 String att = atts.getQName(i); 626 if (att.equals("z")) { 627 // cdo.setObjectProperty("Crystal", "z", atts.getValue(i)); 628 ((ICrystal) currentMolecule).setZ(Integer.parseInt(atts.getValue(i))); 629 } 630 } 631 } else if ("symmetry".equals(name)) { 632 for (int i = 0; i < atts.getLength(); i++) { 633 String att = atts.getQName(i); 634 if (att.equals("spaceGroup")) { 635 // cdo.setObjectProperty("Crystal", "spacegroup", atts.getValue(i)); 636 ((ICrystal) currentMolecule).setSpaceGroup(atts.getValue(i)); 637 } 638 } 639 } else if ("identifier".equals(name)) { 640 if (atts.getValue("convention") != null && atts.getValue("convention").equals("iupac:inchi") 641 && atts.getValue("value") != null) { 642 // cdo.setObjectProperty("Molecule", "inchi", atts.getValue("value")); 643 currentMolecule.setProperty(CDKConstants.INCHI, atts.getValue("value")); 644 } 645 } else if ("scalar".equals(name)) { 646 if (xpath.endsWith("crystal", "scalar")) crystalScalar++; 647 } else if ("label".equals(name)) { 648 if (xpath.endsWith("atomType", "label")) { 649 // cdo.setObjectProperty("Atom", "atomTypeLabel", atts.getValue("value")); 650 currentAtom.setAtomTypeName(atts.getValue("value")); 651 } 652 } else if ("list".equals(name)) { 653 // cdo.startObject("MoleculeSet"); 654 if (DICTREF.equals("cdk:model")) { 655 currentChemModel = currentChemFile.getBuilder().newInstance(IChemModel.class); 656 // see if there is an ID attribute 657 for (int i = 0; i < atts.getLength(); i++) { 658 String att = atts.getQName(i); 659 if (att.equals("id")) { 660 currentChemModel.setID(atts.getValue(i)); 661 } 662 } 663 } else if (DICTREF.equals("cdk:moleculeSet")) { 664 currentMoleculeSet = currentChemFile.getBuilder().newInstance(IAtomContainerSet.class); 665 // see if there is an ID attribute 666 for (int i = 0; i < atts.getLength(); i++) { 667 String att = atts.getQName(i); 668 if (att.equals("id")) { 669 currentMoleculeSet.setID(atts.getValue(i)); 670 } 671 } 672 currentMolecule = currentChemFile.getBuilder().newInstance(IAtomContainer.class); 673 } else { 674 // the old default 675 currentMoleculeSet = currentChemFile.getBuilder().newInstance(IAtomContainerSet.class); 676 // see if there is an ID attribute 677 for (int i = 0; i < atts.getLength(); i++) { 678 String att = atts.getQName(i); 679 if (att.equals("id")) { 680 currentMoleculeSet.setID(atts.getValue(i)); 681 } 682 } 683 currentMolecule = currentChemFile.getBuilder().newInstance(IAtomContainer.class); 684 } 685 } else if ("formula".equals(name)) { 686 formulaCounter++; 687 for (int i = 0; i < atts.getLength(); i++) { 688 String att = atts.getQName(i); 689 String value = atts.getValue(i); 690 if (att.equals("concise")) { 691 formula.add(value); 692 } 693 } 694 } 695 } 696 697 @Override endElement(CMLStack xpath, String uri, String name, String raw)698 public void endElement(CMLStack xpath, String uri, String name, String raw) { 699 logger.debug("EndElement: ", name); 700 701 String cData = currentChars; 702 if ("bond".equals(name)) { 703 if (!stereoGiven) bondStereo.add(""); 704 if (bondCounter > bondDictRefs.size()) bondDictRefs.add(null); 705 if (bondCounter > bondAromaticity.size()) bondAromaticity.add(null); 706 } else if ("atom".equals(name)) { 707 if (atomCounter > eltitles.size()) { 708 eltitles.add(null); 709 } 710 if (atomCounter > hCounts.size()) { 711 hCounts.add(null); 712 } 713 if (atomCounter > atomDictRefs.size()) { 714 atomDictRefs.add(null); 715 } 716 if (atomCounter > atomAromaticities.size()) { 717 atomAromaticities.add(null); 718 } 719 if (atomCounter > isotope.size()) { 720 isotope.add(null); 721 } 722 if (atomCounter > atomicNumbers.size()) { 723 atomicNumbers.add(null); 724 } 725 if (atomCounter > exactMasses.size()) { 726 exactMasses.add(null); 727 } 728 if (atomCounter > spinMultiplicities.size()) { 729 spinMultiplicities.add(null); 730 } 731 if (atomCounter > occupancies.size()) { 732 occupancies.add(null); 733 } 734 if (atomCounter > formalCharges.size()) { 735 /* 736 * while strictly undefined, assume zero formal charge when no 737 * number is given 738 */ 739 formalCharges.add("0"); 740 } 741 if (!parityGiven) { 742 atomParities.add(""); 743 } 744 if (!parityAtomsGiven) { 745 parityARef1.add(""); 746 parityARef2.add(""); 747 parityARef3.add(""); 748 parityARef4.add(""); 749 } 750 /* 751 * It may happen that not all atoms have associated 2D or 3D 752 * coordinates. accept that 753 */ 754 if (atomCounter > x2.size() && x2.size() != 0) { 755 /* 756 * apparently, the previous atoms had atomic coordinates, add 757 * 'null' for this atom 758 */ 759 x2.add(null); 760 y2.add(null); 761 } 762 if (atomCounter > x3.size() && x3.size() != 0) { 763 /* 764 * apparently, the previous atoms had atomic coordinates, add 765 * 'null' for this atom 766 */ 767 x3.add(null); 768 y3.add(null); 769 z3.add(null); 770 } 771 772 if (atomCounter > xfract.size() && xfract.size() != 0) { 773 /* 774 * apparently, the previous atoms had atomic coordinates, add 775 * 'null' for this atom 776 */ 777 xfract.add(null); 778 yfract.add(null); 779 zfract.add(null); 780 } 781 } else if ("molecule".equals(name)) { 782 storeData(); 783 // cdo.endObject("Molecule"); 784 if (currentMolecule instanceof ICrystal) { 785 logger.debug("Adding crystal to chemModel"); 786 currentChemModel.setCrystal((ICrystal) currentMolecule); 787 currentChemSequence.addChemModel(currentChemModel); 788 } else if (currentMolecule instanceof IAtomContainer) { 789 logger.debug("Adding molecule to set"); 790 currentMoleculeSet.addAtomContainer(currentMolecule); 791 logger.debug("#mols in set: " + currentMoleculeSet.getAtomContainerCount()); 792 } 793 } else if ("crystal".equals(name)) { 794 if (crystalScalar > 0) { 795 // convert unit cell parameters to cartesians 796 Vector3d[] axes = CrystalGeometryTools.notionalToCartesian(unitcellparams[0], unitcellparams[1], 797 unitcellparams[2], unitcellparams[3], unitcellparams[4], unitcellparams[5]); 798 cartesianAxesSet = true; 799 // cdo.startObject("a-axis"); 800 // cdo.setObjectProperty("a-axis", "x", new Double(aAxis.x).toString()); 801 // cdo.setObjectProperty("a-axis", "y", new Double(aAxis.y).toString()); 802 // cdo.setObjectProperty("a-axis", "z", new Double(aAxis.z).toString()); 803 // cdo.endObject("a-axis"); 804 // cdo.startObject("b-axis"); 805 // cdo.setObjectProperty("b-axis", "x", new Double(bAxis.x).toString()); 806 // cdo.setObjectProperty("b-axis", "y", new Double(bAxis.y).toString()); 807 // cdo.setObjectProperty("b-axis", "z", new Double(bAxis.z).toString()); 808 // cdo.endObject("b-axis"); 809 // cdo.startObject("c-axis"); 810 // cdo.setObjectProperty("c-axis", "x", new Double(cAxis.x).toString()); 811 // cdo.setObjectProperty("c-axis", "y", new Double(cAxis.y).toString()); 812 // cdo.setObjectProperty("c-axis", "z", new Double(cAxis.z).toString()); 813 // cdo.endObject("c-axis"); 814 ((ICrystal) currentMolecule).setA(axes[0]); 815 ((ICrystal) currentMolecule).setB(axes[1]); 816 ((ICrystal) currentMolecule).setC(axes[2]); 817 } else { 818 logger.error("Could not find crystal unit cell parameters"); 819 } 820 // cdo.endObject("Crystal"); 821 } else if ("list".equals(name)) { 822 // cdo.endObject("MoleculeSet"); 823 // FIXME: I really should check the DICTREF, but there is currently 824 // no mechanism for storing these for use with endTag() :( 825 // So, instead, for now, just see if it already has done the setting 826 // to work around duplication 827 if (currentChemModel.getMoleculeSet() != currentMoleculeSet) { 828 currentChemModel.setMoleculeSet(currentMoleculeSet); 829 currentChemSequence.addChemModel(currentChemModel); 830 } 831 } else if ("coordinate3".equals(name)) { 832 if (BUILTIN.equals("xyz3")) { 833 logger.debug("New coord3 xyz3 found: ", currentChars); 834 835 try { 836 837 StringTokenizer st = new StringTokenizer(currentChars); 838 x3.add(st.nextToken()); 839 y3.add(st.nextToken()); 840 z3.add(st.nextToken()); 841 logger.debug("coord3 x3.length: ", x3.size()); 842 logger.debug("coord3 y3.length: ", y3.size()); 843 logger.debug("coord3 z3.length: ", z3.size()); 844 } catch (Exception exception) { 845 logger.error("CMLParsing error while setting coordinate3!"); 846 logger.debug(exception); 847 } 848 } else { 849 logger.warn("Unknown coordinate3 BUILTIN: " + BUILTIN); 850 } 851 } else if ("string".equals(name)) { 852 if (BUILTIN.equals("elementType")) { 853 logger.debug("Element: ", cData.trim()); 854 elsym.add(cData); 855 } else if (BUILTIN.equals("atomRef")) { 856 curRef++; 857 logger.debug("Bond: ref #", curRef); 858 859 if (curRef == 1) { 860 bondARef1.add(cData.trim()); 861 } else if (curRef == 2) { 862 bondARef2.add(cData.trim()); 863 } 864 } else if (BUILTIN.equals("order")) { 865 logger.debug("Bond: order ", cData.trim()); 866 order.add(cData.trim()); 867 } else if (BUILTIN.equals("formalCharge")) { 868 // NOTE: this combination is in violation of the CML DTD!!! 869 logger.warn("formalCharge BUILTIN accepted but violating CML DTD"); 870 logger.debug("Charge: ", cData.trim()); 871 String charge = cData.trim(); 872 if (charge.startsWith("+") && charge.length() > 1) { 873 charge = charge.substring(1); 874 } 875 formalCharges.add(charge); 876 } 877 } else if ("bondStereo".equals(name)) { 878 if (!currentChars.isEmpty() && !stereoGiven) { 879 bondStereo.add(currentChars); 880 stereoGiven = Boolean.TRUE; 881 } 882 } else if ("atomParity".equals(name)) { 883 if (!currentChars.isEmpty() && !parityGiven && parityAtomsGiven) { 884 atomParities.add(currentChars); 885 parityGiven = Boolean.TRUE; 886 } 887 } else if ("float".equals(name)) { 888 if (BUILTIN.equals("x3")) { 889 x3.add(cData.trim()); 890 } else if (BUILTIN.equals("y3")) { 891 y3.add(cData.trim()); 892 } else if (BUILTIN.equals("z3")) { 893 z3.add(cData.trim()); 894 } else if (BUILTIN.equals("x2")) { 895 x2.add(cData.trim()); 896 } else if (BUILTIN.equals("y2")) { 897 y2.add(cData.trim()); 898 } else if (BUILTIN.equals("order")) { 899 // NOTE: this combination is in violation of the CML DTD!!! 900 order.add(cData.trim()); 901 } else if (BUILTIN.equals("charge") || BUILTIN.equals("partialCharge")) { 902 partialCharges.add(cData.trim()); 903 } 904 } else if ("integer".equals(name)) { 905 if (BUILTIN.equals("formalCharge")) { 906 formalCharges.add(cData.trim()); 907 } 908 } else if ("coordinate2".equals(name)) { 909 if (BUILTIN.equals("xy2")) { 910 logger.debug("New coord2 xy2 found.", cData); 911 912 try { 913 914 StringTokenizer st = new StringTokenizer(cData); 915 x2.add(st.nextToken()); 916 y2.add(st.nextToken()); 917 } catch (Exception e) { 918 notify("CMLParsing error: " + e, SYSTEMID, 175, 1); 919 } 920 } 921 } else if ("stringArray".equals(name)) { 922 if (BUILTIN.equals("id") || BUILTIN.equals("atomId") || BUILTIN.equals("atomID")) { // invalid according to CML1 DTD but found in OpenBabel 1.x output 923 924 try { 925 boolean countAtoms = (atomCounter == 0) ? true : false; 926 StringTokenizer st = new StringTokenizer(cData); 927 928 while (st.hasMoreTokens()) { 929 if (countAtoms) { 930 atomCounter++; 931 } 932 String token = st.nextToken(); 933 logger.debug("StringArray (Token): ", token); 934 elid.add(token); 935 } 936 } catch (Exception e) { 937 notify("CMLParsing error: " + e, SYSTEMID, 186, 1); 938 } 939 } else if (BUILTIN.equals("elementType")) { 940 941 try { 942 boolean countAtoms = (atomCounter == 0) ? true : false; 943 StringTokenizer st = new StringTokenizer(cData); 944 945 while (st.hasMoreTokens()) { 946 if (countAtoms) { 947 atomCounter++; 948 } 949 elsym.add(st.nextToken()); 950 } 951 } catch (Exception e) { 952 notify("CMLParsing error: " + e, SYSTEMID, 194, 1); 953 } 954 } else if (BUILTIN.equals("atomRefs")) { 955 curRef++; 956 logger.debug("New atomRefs found: ", curRef); 957 958 try { 959 boolean countBonds = (bondCounter == 0) ? true : false; 960 StringTokenizer st = new StringTokenizer(cData); 961 962 while (st.hasMoreTokens()) { 963 if (countBonds) { 964 bondCounter++; 965 } 966 String token = st.nextToken(); 967 logger.debug("Token: ", token); 968 969 if (curRef == 1) { 970 bondARef1.add(token); 971 } else if (curRef == 2) { 972 bondARef2.add(token); 973 } 974 } 975 } catch (Exception e) { 976 notify("CMLParsing error: " + e, SYSTEMID, 194, 1); 977 } 978 } else if (BUILTIN.equals("atomRef")) { 979 curRef++; 980 logger.debug("New atomRef found: ", curRef); // this is CML1 stuff, we get things like: 981 /* 982 * <bondArray> <stringArray builtin="atomRef">a2 a2 a2 a2 a3 a3 983 * a4 a4 a5 a6 a7 a9</stringArray> <stringArray 984 * builtin="atomRef">a9 a11 a12 a13 a5 a4 a6 a9 a7 a8 a8 985 * a10</stringArray> <stringArray builtin="order">1 1 1 1 2 1 2 986 * 1 1 1 2 2</stringArray> </bondArray> 987 */ 988 989 try { 990 boolean countBonds = (bondCounter == 0) ? true : false; 991 StringTokenizer st = new StringTokenizer(cData); 992 993 while (st.hasMoreTokens()) { 994 if (countBonds) { 995 bondCounter++; 996 } 997 String token = st.nextToken(); 998 logger.debug("Token: ", token); 999 1000 if (curRef == 1) { 1001 bondARef1.add(token); 1002 } else if (curRef == 2) { 1003 bondARef2.add(token); 1004 } 1005 } 1006 } catch (Exception e) { 1007 notify("CMLParsing error: " + e, SYSTEMID, 194, 1); 1008 } 1009 } else if (BUILTIN.equals("order")) { 1010 logger.debug("New bond order found."); 1011 1012 try { 1013 1014 StringTokenizer st = new StringTokenizer(cData); 1015 1016 while (st.hasMoreTokens()) { 1017 1018 String token = st.nextToken(); 1019 logger.debug("Token: ", token); 1020 order.add(token); 1021 } 1022 } catch (Exception e) { 1023 notify("CMLParsing error: " + e, SYSTEMID, 194, 1); 1024 } 1025 } 1026 } else if ("integerArray".equals(name)) { 1027 logger.debug("IntegerArray: builtin = ", BUILTIN); 1028 1029 if (BUILTIN.equals("formalCharge")) { 1030 1031 try { 1032 1033 StringTokenizer st = new StringTokenizer(cData); 1034 1035 while (st.hasMoreTokens()) { 1036 1037 String token = st.nextToken(); 1038 logger.debug("Charge added: ", token); 1039 formalCharges.add(token); 1040 } 1041 } catch (Exception e) { 1042 notify("CMLParsing error: " + e, SYSTEMID, 205, 1); 1043 } 1044 } 1045 } else if ("scalar".equals(name)) { 1046 if (xpath.endsWith("crystal", "scalar")) { 1047 logger.debug("Going to set a crystal parameter: " + crystalScalar, " to ", cData); 1048 try { 1049 unitcellparams[crystalScalar - 1] = Double.parseDouble(cData.trim()); 1050 } catch (NumberFormatException exception) { 1051 logger.error("Content must a float: " + cData); 1052 } 1053 } else if (xpath.endsWith("bond", "scalar")) { 1054 if (DICTREF.equals("mdl:stereo")) { 1055 bondStereo.add(cData.trim()); 1056 stereoGiven = true; 1057 } else { 1058 Map<String, String> bp = bondCustomProperty.get(bondid.get(bondid.size() - 1)); 1059 if (bp == null) { 1060 bp = new Hashtable<String, String>(); 1061 bondCustomProperty.put(bondid.get(bondid.size() - 1), bp); 1062 } 1063 bp.put(elementTitle, cData.trim()); 1064 } 1065 } else if (xpath.endsWith("atom", "scalar")) { 1066 if (DICTREF.equals("cdk:partialCharge")) { 1067 partialCharges.add(cData.trim()); 1068 } else if (DICTREF.equals("cdk:atomicNumber")) { 1069 atomicNumbers.add(cData.trim()); 1070 } else if (DICTREF.equals("cdk:aromaticAtom")) { 1071 atomAromaticities.add(cData.trim()); 1072 } else if (DICTREF.equals("cdk:isotopicMass")) { 1073 exactMasses.add(cData.trim()); 1074 } else { 1075 if (atomCustomProperty.get(Integer.valueOf(atomCounter - 1)) == null) 1076 atomCustomProperty.put(Integer.valueOf(atomCounter - 1), new ArrayList<String>()); 1077 atomCustomProperty.get(Integer.valueOf(atomCounter - 1)).add(elementTitle); 1078 atomCustomProperty.get(Integer.valueOf(atomCounter - 1)).add(cData.trim()); 1079 } 1080 } else if (xpath.endsWith("molecule", "scalar")) { 1081 if (DICTREF.equals("pdb:id")) { 1082 // cdo.setObjectProperty("Molecule", DICTREF, cData); 1083 currentMolecule.setProperty(new DictRef(DICTREF, cData), cData); 1084 } else if (DICTREF.equals("cdk:molecularProperty")) { 1085 currentMolecule.setProperty(elementTitle, cData); 1086 } else { 1087 moleculeCustomProperty.add(elementTitle); 1088 moleculeCustomProperty.add(cData.trim()); 1089 } 1090 } else if (xpath.endsWith("reaction", "scalar")) { 1091 if (DICTREF.equals("cdk:reactionProperty")) { 1092 currentReaction.setProperty(elementTitle, cData); 1093 } 1094 } else { 1095 logger.warn("Ignoring scalar: " + xpath); 1096 } 1097 } else if ("floatArray".equals(name)) { 1098 if (BUILTIN.equals("x3")) { 1099 1100 try { 1101 1102 StringTokenizer st = new StringTokenizer(cData); 1103 1104 while (st.hasMoreTokens()) 1105 x3.add(st.nextToken()); 1106 } catch (Exception e) { 1107 notify("CMLParsing error: " + e, SYSTEMID, 205, 1); 1108 } 1109 } else if (BUILTIN.equals("y3")) { 1110 1111 try { 1112 1113 StringTokenizer st = new StringTokenizer(cData); 1114 1115 while (st.hasMoreTokens()) 1116 y3.add(st.nextToken()); 1117 } catch (Exception e) { 1118 notify("CMLParsing error: " + e, SYSTEMID, 213, 1); 1119 } 1120 } else if (BUILTIN.equals("z3")) { 1121 1122 try { 1123 1124 StringTokenizer st = new StringTokenizer(cData); 1125 1126 while (st.hasMoreTokens()) 1127 z3.add(st.nextToken()); 1128 } catch (Exception e) { 1129 notify("CMLParsing error: " + e, SYSTEMID, 221, 1); 1130 } 1131 } else if (BUILTIN.equals("x2")) { 1132 logger.debug("New floatArray found."); 1133 1134 try { 1135 1136 StringTokenizer st = new StringTokenizer(cData); 1137 1138 while (st.hasMoreTokens()) 1139 x2.add(st.nextToken()); 1140 } catch (Exception e) { 1141 notify("CMLParsing error: " + e, SYSTEMID, 205, 1); 1142 } 1143 } else if (BUILTIN.equals("y2")) { 1144 logger.debug("New floatArray found."); 1145 1146 try { 1147 1148 StringTokenizer st = new StringTokenizer(cData); 1149 1150 while (st.hasMoreTokens()) 1151 y2.add(st.nextToken()); 1152 } catch (Exception e) { 1153 notify("CMLParsing error: " + e, SYSTEMID, 454, 1); 1154 } 1155 } else if (BUILTIN.equals("partialCharge")) { 1156 logger.debug("New floatArray with partial charges found."); 1157 1158 try { 1159 1160 StringTokenizer st = new StringTokenizer(cData); 1161 1162 while (st.hasMoreTokens()) 1163 partialCharges.add(st.nextToken()); 1164 } catch (Exception e) { 1165 notify("CMLParsing error: " + e, SYSTEMID, 462, 1); 1166 } 1167 } 1168 } else if ("basic".equals(name)) { 1169 // assuming this is the child element of <identifier> 1170 this.inchi = cData; 1171 } else if ("name".equals(name)) { 1172 if (xpath.endsWith("molecule", "name")) { 1173 if (DICTREF.length() > 0) { 1174 // cdo.setObjectProperty("Molecule", DICTREF, cData); 1175 1176 currentMolecule.setProperty(new DictRef(DICTREF, cData), cData); 1177 } else { 1178 // cdo.setObjectProperty("Molecule", "Name", cData); 1179 currentMolecule.setTitle(cData); 1180 } 1181 } 1182 } else if ("formula".equals(name)) { 1183 currentMolecule.setProperty(CDKConstants.FORMULA, cData); 1184 } else { 1185 logger.debug("Skipping end element: " + name); 1186 } 1187 1188 currentChars = ""; 1189 BUILTIN = ""; 1190 elementTitle = ""; 1191 } 1192 1193 @Override characterData(CMLStack xpath, char[] ch, int start, int length)1194 public void characterData(CMLStack xpath, char[] ch, int start, int length) { 1195 currentChars = currentChars + new String(ch, start, length); 1196 logger.debug("CD: ", currentChars); 1197 } 1198 notify(String message, String systemId, int line, int column)1199 protected void notify(String message, String systemId, int line, int column) { 1200 logger.debug("Message: ", message); 1201 logger.debug("SystemId: ", systemId); 1202 logger.debug("Line: ", line); 1203 logger.debug("Column: ", column); 1204 } 1205 storeData()1206 protected void storeData() { 1207 if (inchi != null) { 1208 // cdo.setObjectProperty("Molecule", "inchi", inchi); 1209 currentMolecule.setProperty(CDKConstants.INCHI, inchi); 1210 } 1211 if (formula != null && formula.size() > 0) { 1212 currentMolecule.setProperty(CDKConstants.FORMULA, formula); 1213 } 1214 Iterator<String> customs = moleculeCustomProperty.iterator(); 1215 while (customs.hasNext()) { 1216 String x = customs.next(); 1217 String y = customs.next(); 1218 currentMolecule.setProperty(x, y); 1219 } 1220 storeAtomData(); 1221 newAtomData(); 1222 storeBondData(); 1223 newBondData(); 1224 convertCMLToCDKHydrogenCounts(); 1225 } 1226 convertCMLToCDKHydrogenCounts()1227 private void convertCMLToCDKHydrogenCounts() { 1228 for (IAtom atom : currentMolecule.atoms()) { 1229 if (atom.getImplicitHydrogenCount() != null) { 1230 int explicitHCount = AtomContainerManipulator.countExplicitHydrogens(currentMolecule, atom); 1231 if (explicitHCount != 0) { 1232 atom.setImplicitHydrogenCount(atom.getImplicitHydrogenCount() - explicitHCount); 1233 } 1234 } 1235 } 1236 } 1237 storeAtomData()1238 protected void storeAtomData() { 1239 logger.debug("No atoms: ", atomCounter); 1240 if (atomCounter == 0) { 1241 return; 1242 } 1243 1244 boolean hasID = false; 1245 boolean has3D = false; 1246 boolean has3Dfract = false; 1247 boolean has2D = false; 1248 boolean hasFormalCharge = false; 1249 boolean hasAtomAromaticities = false; 1250 boolean hasPartialCharge = false; 1251 boolean hasHCounts = false; 1252 boolean hasSymbols = false; 1253 boolean hasTitles = false; 1254 boolean hasIsotopes = false; 1255 boolean hasAtomicNumbers = false; 1256 boolean hasExactMasses = false; 1257 boolean hasDictRefs = false; 1258 boolean hasSpinMultiplicities = false; 1259 boolean hasAtomParities = false; 1260 boolean hasOccupancies = false; 1261 1262 if (elid.size() == atomCounter) { 1263 hasID = true; 1264 } else { 1265 logger.debug("No atom ids: " + elid.size(), " != " + atomCounter); 1266 } 1267 1268 if (elsym.size() == atomCounter) { 1269 hasSymbols = true; 1270 } else { 1271 logger.debug("No atom symbols: " + elsym.size(), " != " + atomCounter); 1272 } 1273 1274 if (eltitles.size() == atomCounter) { 1275 hasTitles = true; 1276 } else { 1277 logger.debug("No atom titles: " + eltitles.size(), " != " + atomCounter); 1278 } 1279 1280 if ((x3.size() == atomCounter) && (y3.size() == atomCounter) && (z3.size() == atomCounter)) { 1281 has3D = true; 1282 } else { 1283 logger.debug("No 3D info: " + x3.size(), " " + y3.size(), " " + z3.size(), " != " + atomCounter); 1284 } 1285 1286 if ((xfract.size() == atomCounter) && (yfract.size() == atomCounter) && (zfract.size() == atomCounter)) { 1287 has3Dfract = true; 1288 } else { 1289 logger.debug("No 3D fractional info: " + xfract.size(), " " + yfract.size(), " " + zfract.size(), " != " 1290 + atomCounter); 1291 } 1292 1293 if ((x2.size() == atomCounter) && (y2.size() == atomCounter)) { 1294 has2D = true; 1295 } else { 1296 logger.debug("No 2D info: " + x2.size(), " " + y2.size(), " != " + atomCounter); 1297 } 1298 1299 if (formalCharges.size() == atomCounter) { 1300 hasFormalCharge = true; 1301 } else { 1302 logger.debug("No formal Charge info: " + formalCharges.size(), " != " + atomCounter); 1303 } 1304 1305 if (atomAromaticities.size() == atomCounter) { 1306 hasAtomAromaticities = true; 1307 } else { 1308 logger.debug("No aromatic atom info: " + atomAromaticities.size(), " != " + atomCounter); 1309 } 1310 1311 if (partialCharges.size() == atomCounter) { 1312 hasPartialCharge = true; 1313 } else { 1314 logger.debug("No partial Charge info: " + partialCharges.size(), " != " + atomCounter); 1315 } 1316 1317 if (hCounts.size() == atomCounter) { 1318 hasHCounts = true; 1319 } else { 1320 logger.debug("No hydrogen Count info: " + hCounts.size(), " != " + atomCounter); 1321 } 1322 1323 if (spinMultiplicities.size() == atomCounter) { 1324 hasSpinMultiplicities = true; 1325 } else { 1326 logger.debug("No spinMultiplicity info: " + spinMultiplicities.size(), " != " + atomCounter); 1327 } 1328 1329 if (atomParities.size() == atomCounter) { 1330 hasAtomParities = true; 1331 } else { 1332 logger.debug("No atomParity info: " + spinMultiplicities.size(), " != " + atomCounter); 1333 } 1334 1335 if (occupancies.size() == atomCounter) { 1336 hasOccupancies = true; 1337 } else { 1338 logger.debug("No occupancy info: " + occupancies.size(), " != " + atomCounter); 1339 } 1340 1341 if (atomDictRefs.size() == atomCounter) { 1342 hasDictRefs = true; 1343 } else { 1344 logger.debug("No dictRef info: " + atomDictRefs.size(), " != " + atomCounter); 1345 } 1346 1347 if (isotope.size() == atomCounter) { 1348 hasIsotopes = true; 1349 } else { 1350 logger.debug("No isotope info: " + isotope.size(), " != " + atomCounter); 1351 } 1352 if (atomicNumbers.size() == atomCounter) { 1353 hasAtomicNumbers = true; 1354 } else { 1355 logger.debug("No atomicNumbers info: " + atomicNumbers.size(), " != " + atomCounter); 1356 } 1357 if (exactMasses.size() == atomCounter) { 1358 hasExactMasses = true; 1359 } else { 1360 logger.debug("No atomicNumbers info: " + atomicNumbers.size(), " != " + atomCounter); 1361 } 1362 1363 for (int i = 0; i < atomCounter; i++) { 1364 logger.info("Storing atom: ", i); 1365 // cdo.startObject("Atom"); 1366 currentAtom = currentChemFile.getBuilder().newInstance(IAtom.class, "H"); 1367 logger.debug("Atom # " + atomCounter); 1368 if (hasID) { 1369 // cdo.setObjectProperty("Atom", "id", (String)elid.get(i)); 1370 logger.debug("id: ", (String) elid.get(i)); 1371 currentAtom.setID((String) elid.get(i)); 1372 atomEnumeration.put((String) elid.get(i), currentAtom); 1373 } 1374 if (hasTitles) { 1375 if (hasSymbols) { 1376 String symbol = (String) elsym.get(i); 1377 if (symbol.equals("Du") || symbol.equals("Dummy")) { 1378 // cdo.setObjectProperty("PseudoAtom", "label", (String)eltitles.get(i)); 1379 if (!(currentAtom instanceof IPseudoAtom)) { 1380 currentAtom = currentChemFile.getBuilder().newInstance(IPseudoAtom.class, currentAtom); 1381 if (hasID) atomEnumeration.put((String) elid.get(i), currentAtom); 1382 } 1383 ((IPseudoAtom) currentAtom).setLabel((String) eltitles.get(i)); 1384 } else { 1385 // cdo.setObjectProperty("Atom", "title", (String)eltitles.get(i)); 1386 // FIXME: huh? 1387 if (eltitles.get(i) != null) 1388 currentAtom.setProperty(CDKConstants.TITLE, (String) eltitles.get(i)); 1389 } 1390 } else { 1391 // cdo.setObjectProperty("Atom", "title", (String)eltitles.get(i)); 1392 // FIXME: huh? 1393 if (eltitles.get(i) != null) currentAtom.setProperty(CDKConstants.TITLE, (String) eltitles.get(i)); 1394 } 1395 } 1396 1397 // store optional atom properties 1398 if (hasSymbols) { 1399 String symbol = (String) elsym.get(i); 1400 if (symbol.equals("Du") || symbol.equals("Dummy")) { 1401 symbol = "R"; 1402 } 1403 // cdo.setObjectProperty("Atom", "type", symbol); 1404 if (symbol.equals("R") && !(currentAtom instanceof IPseudoAtom)) { 1405 currentAtom = currentChemFile.getBuilder().newInstance(IPseudoAtom.class, currentAtom); 1406 ((IPseudoAtom) currentAtom).setLabel("R"); 1407 if (hasID) atomEnumeration.put((String) elid.get(i), currentAtom); 1408 } 1409 currentAtom.setSymbol(symbol); 1410 if (!hasAtomicNumbers || atomicNumbers.get(i) == null) 1411 currentAtom.setAtomicNumber(PeriodicTable.getAtomicNumber(symbol)); 1412 } 1413 1414 if (has3D) { 1415 // cdo.setObjectProperty("Atom", "x3", (String)x3.get(i)); 1416 // cdo.setObjectProperty("Atom", "y3", (String)y3.get(i)); 1417 // cdo.setObjectProperty("Atom", "z3", (String)z3.get(i)); 1418 if (x3.get(i) != null && y3.get(i) != null && z3.get(i) != null) { 1419 currentAtom.setPoint3d(new Point3d(Double.parseDouble((String) x3.get(i)), Double 1420 .parseDouble((String) y3.get(i)), Double.parseDouble((String) z3.get(i)))); 1421 } 1422 } 1423 1424 if (has3Dfract) { 1425 // ok, need to convert fractional into eucledian coordinates 1426 // cdo.setObjectProperty("Atom", "xFract", (String)xfract.get(i)); 1427 // cdo.setObjectProperty("Atom", "yFract", (String)yfract.get(i)); 1428 // cdo.setObjectProperty("Atom", "zFract", (String)zfract.get(i)); 1429 currentAtom.setFractionalPoint3d(new Point3d(Double.parseDouble((String) xfract.get(i)), Double 1430 .parseDouble((String) yfract.get(i)), Double.parseDouble((String) zfract.get(i)))); 1431 } 1432 1433 if (hasFormalCharge) { 1434 // cdo.setObjectProperty("Atom", "formalCharge", 1435 // (String)formalCharges.get(i)); 1436 currentAtom.setFormalCharge(Integer.parseInt((String) formalCharges.get(i))); 1437 } 1438 1439 if (hasAtomAromaticities) { 1440 if (atomAromaticities.get(i) != null) currentAtom.setFlag(CDKConstants.ISAROMATIC, true); 1441 } 1442 1443 if (hasPartialCharge) { 1444 logger.debug("Storing partial atomic charge..."); 1445 // cdo.setObjectProperty("Atom", "partialCharge", 1446 // (String)partialCharges.get(i)); 1447 currentAtom.setCharge(Double.parseDouble((String) partialCharges.get(i))); 1448 } 1449 1450 if (hasHCounts) { 1451 // cdo.setObjectProperty("Atom", "hydrogenCount", (String)hCounts.get(i)); 1452 // convertCMLToCDKHydrogenCounts() is called to update hydrogen counts when molecule is stored 1453 String hCount = hCounts.get(i); 1454 if (hCount != null) { 1455 currentAtom.setImplicitHydrogenCount(Integer.parseInt(hCount)); 1456 } else { 1457 currentAtom.setImplicitHydrogenCount((Integer) CDKConstants.UNSET); 1458 } 1459 } 1460 1461 if (has2D) { 1462 if (x2.get(i) != null && y2.get(i) != null) { 1463 // cdo.setObjectProperty("Atom", "x2", (String)x2.get(i)); 1464 // cdo.setObjectProperty("Atom", "y2", (String)y2.get(i)); 1465 currentAtom.setPoint2d(new Point2d(Double.parseDouble((String) x2.get(i)), Double 1466 .parseDouble((String) y2.get(i)))); 1467 } 1468 } 1469 1470 if (hasDictRefs) { 1471 // cdo.setObjectProperty("Atom", "dictRef", (String)atomDictRefs.get(i)); 1472 if (atomDictRefs.get(i) != null) 1473 currentAtom.setProperty("org.openscience.cdk.dict", (String) atomDictRefs.get(i)); 1474 } 1475 1476 if (hasSpinMultiplicities && spinMultiplicities.get(i) != null) { 1477 // cdo.setObjectProperty("Atom", "spinMultiplicity", (String)spinMultiplicities.get(i)); 1478 int unpairedElectrons = Integer.parseInt((String) spinMultiplicities.get(i)) - 1; 1479 for (int sm = 0; sm < unpairedElectrons; sm++) { 1480 currentMolecule.addSingleElectron(currentChemFile.getBuilder().newInstance(ISingleElectron.class, 1481 currentAtom)); 1482 } 1483 } 1484 1485 if (hasOccupancies && occupancies.get(i) != null) { 1486 // cdo.setObjectProperty("Atom", "occupanciy", (String)occupancies.get(i)); 1487 // FIXME: this has no ChemFileCDO equivalent, not even if spelled correctly 1488 } 1489 1490 if (hasIsotopes) { 1491 // cdo.setObjectProperty("Atom", "massNumber", (String)isotope.get(i)); 1492 if (isotope.get(i) != null) 1493 currentAtom.setMassNumber((int) Double.parseDouble((String) isotope.get(i))); 1494 } 1495 1496 if (hasAtomicNumbers) { 1497 if (atomicNumbers.get(i) != null) currentAtom.setAtomicNumber(Integer.parseInt(atomicNumbers.get(i))); 1498 } 1499 1500 if (hasExactMasses) { 1501 if (exactMasses.get(i) != null) currentAtom.setExactMass(Double.parseDouble(exactMasses.get(i))); 1502 } 1503 1504 if (atomCustomProperty.get(Integer.valueOf(i)) != null) { 1505 Iterator<String> it = atomCustomProperty.get(Integer.valueOf(i)).iterator(); 1506 while (it.hasNext()) { 1507 currentAtom.setProperty(it.next(), it.next()); 1508 } 1509 } 1510 1511 // cdo.endObject("Atom"); 1512 1513 currentMolecule.addAtom(currentAtom); 1514 } 1515 1516 for (int i = 0; i < atomCounter; i++) { 1517 if (hasAtomParities && 1518 atomParities.get(i) != null && 1519 !atomParities.get(i).isEmpty()) { 1520 IAtom ligandAtom1 = atomEnumeration.get(parityARef1.get(i)); 1521 IAtom ligandAtom2 = atomEnumeration.get(parityARef2.get(i)); 1522 IAtom ligandAtom3 = atomEnumeration.get(parityARef3.get(i)); 1523 IAtom ligandAtom4 = atomEnumeration.get(parityARef4.get(i)); 1524 IAtom[] ligandAtoms = new IAtom[]{ligandAtom1, ligandAtom2, ligandAtom3, ligandAtom4}; 1525 Stereo config; 1526 int parity = 0; 1527 try { 1528 parity = (int) Math.signum(Double.parseDouble(atomParities.get(i))); 1529 } catch (NumberFormatException ex) { 1530 // ignored 1531 } 1532 if (parity > 0) 1533 config = Stereo.CLOCKWISE; 1534 else if (parity < 0) 1535 config = Stereo.ANTI_CLOCKWISE; 1536 else { 1537 config = null; 1538 logger.warn("Cannot interpret stereo information, invalid parity: '" + atomParities.get(i) + "'"); 1539 } 1540 if (config != null) { 1541 TetrahedralChirality chirality = new TetrahedralChirality(currentMolecule.getAtom(i), ligandAtoms, config); 1542 currentMolecule.addStereoElement(chirality); 1543 } 1544 } 1545 } 1546 1547 if (elid.size() > 0) { 1548 // assume this is the current working list 1549 bondElid = elid; 1550 } 1551 } 1552 storeBondData()1553 protected void storeBondData() { 1554 logger.debug("Testing a1,a2,stereo,order = count: " + bondARef1.size(), "," + bondARef2.size(), "," 1555 + bondStereo.size(), "," + order.size(), "=" + bondCounter); 1556 1557 if ((bondARef1.size() == bondCounter) && (bondARef2.size() == bondCounter)) { 1558 logger.debug("About to add bond info..."); 1559 1560 Iterator<String> orders = order.iterator(); 1561 Iterator<String> ids = bondid.iterator(); 1562 Iterator<String> bar1s = bondARef1.iterator(); 1563 Iterator<String> bar2s = bondARef2.iterator(); 1564 Iterator<String> stereos = bondStereo.iterator(); 1565 Iterator<Boolean> aroms = bondAromaticity.iterator(); 1566 1567 while (bar1s.hasNext()) { 1568 // cdo.startObject("Bond"); 1569 // if (ids.hasNext()) { 1570 // cdo.setObjectProperty("Bond", "id", (String)ids.next()); 1571 // } 1572 // cdo.setObjectProperty("Bond", "atom1", 1573 // Integer.valueOf(bondElid.indexOf( 1574 // (String)bar1s.next())).toString()); 1575 // cdo.setObjectProperty("Bond", "atom2", 1576 // Integer.valueOf(bondElid.indexOf( 1577 // (String)bar2s.next())).toString()); 1578 IAtom a1 = (IAtom) atomEnumeration.get((String) bar1s.next()); 1579 IAtom a2 = (IAtom) atomEnumeration.get((String) bar2s.next()); 1580 currentBond = currentChemFile.getBuilder().newInstance(IBond.class, a1, a2); 1581 if (ids.hasNext()) { 1582 currentBond.setID((String) ids.next()); 1583 } 1584 1585 if (orders.hasNext()) { 1586 String bondOrder = (String) orders.next(); 1587 1588 if ("S".equals(bondOrder)) { 1589 // cdo.setObjectProperty("Bond", "order", "1"); 1590 currentBond.setOrder(Order.SINGLE); 1591 } else if ("D".equals(bondOrder)) { 1592 // cdo.setObjectProperty("Bond", "order", "2"); 1593 currentBond.setOrder(Order.DOUBLE); 1594 } else if ("T".equals(bondOrder)) { 1595 // cdo.setObjectProperty("Bond", "order", "3"); 1596 currentBond.setOrder(Order.TRIPLE); 1597 } else if ("A".equals(bondOrder)) { 1598 // cdo.setObjectProperty("Bond", "order", "1.5"); 1599 currentBond.setOrder(Order.SINGLE); 1600 currentBond.setFlag(CDKConstants.ISAROMATIC, true); 1601 } else { 1602 // cdo.setObjectProperty("Bond", "order", bondOrder); 1603 currentBond.setOrder(BondManipulator.createBondOrder(Double.parseDouble(bondOrder))); 1604 } 1605 } 1606 1607 if (stereos.hasNext()) { 1608 // cdo.setObjectProperty("Bond", "stereo", 1609 // (String)stereos.next()); 1610 String nextStereo = (String) stereos.next(); 1611 if ("H".equals(nextStereo)) { 1612 currentBond.setStereo(IBond.Stereo.DOWN); 1613 } else if ("W".equals(nextStereo)) { 1614 currentBond.setStereo(IBond.Stereo.UP); 1615 } else if (nextStereo != null && !nextStereo.isEmpty()) { 1616 logger.warn("Cannot interpret bond display information: '" + nextStereo + "'"); 1617 } 1618 } 1619 1620 if (aroms.hasNext()) { 1621 Object nextArom = aroms.next(); 1622 if (nextArom != null && ((boolean) nextArom)) { 1623 currentBond.setFlag(CDKConstants.ISAROMATIC, true); 1624 } 1625 } 1626 1627 if (currentBond.getID() != null) { 1628 Map<String, String> currentBondProperties = bondCustomProperty.get(currentBond.getID()); 1629 if (currentBondProperties != null) { 1630 Iterator<String> keys = currentBondProperties.keySet().iterator(); 1631 while (keys.hasNext()) { 1632 String key = keys.next(); 1633 currentBond.setProperty(key, currentBondProperties.get(key)); 1634 } 1635 bondCustomProperty.remove(currentBond.getID()); 1636 } 1637 } 1638 1639 // cdo.endObject("Bond"); 1640 currentMolecule.addBond(currentBond); 1641 } 1642 } 1643 } 1644 addArrayElementsTo(List<String> toAddto, String array)1645 protected int addArrayElementsTo(List<String> toAddto, String array) { 1646 StringTokenizer tokenizer = new StringTokenizer(array); 1647 int i = 0; 1648 while (tokenizer.hasMoreElements()) { 1649 toAddto.add(tokenizer.nextToken()); 1650 i++; 1651 } 1652 return i; 1653 } 1654 } 1655