1 /* Copyright (C) 1997-2007  Egon Willighagen <egonw@users.sf.net>
2  *
3  * Contact: cdk-devel@lists.sourceforge.net
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public License
7  * as published by the Free Software Foundation; either version 2.1
8  * of the License, or (at your option) any later version.
9  * All we ask is that proper credit is given for our work, which includes
10  * - but is not limited to - adding the above copyright notice to the beginning
11  * of your source code files, and to any copyright notice that you may distribute
12  * with programs based on this work.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
22  *
23  */
24 package org.openscience.cdk.io.cml;
25 
26 import java.util.ArrayList;
27 import java.util.HashMap;
28 import java.util.Hashtable;
29 import java.util.Iterator;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.StringTokenizer;
33 
34 import javax.vecmath.Point2d;
35 import javax.vecmath.Point3d;
36 import javax.vecmath.Vector3d;
37 
38 import org.openscience.cdk.CDKConstants;
39 import org.openscience.cdk.dict.DictRef;
40 import org.openscience.cdk.geometry.CrystalGeometryTools;
41 import org.openscience.cdk.interfaces.IAtom;
42 import org.openscience.cdk.interfaces.IAtomContainer;
43 import org.openscience.cdk.interfaces.IAtomContainerSet;
44 import org.openscience.cdk.interfaces.IBond;
45 import org.openscience.cdk.interfaces.IBond.Order;
46 import org.openscience.cdk.interfaces.IChemFile;
47 import org.openscience.cdk.interfaces.IChemModel;
48 import org.openscience.cdk.interfaces.IChemSequence;
49 import org.openscience.cdk.interfaces.ICrystal;
50 import org.openscience.cdk.interfaces.IMonomer;
51 import org.openscience.cdk.interfaces.IPseudoAtom;
52 import org.openscience.cdk.interfaces.IReaction;
53 import org.openscience.cdk.interfaces.IReactionSet;
54 import org.openscience.cdk.interfaces.ISingleElectron;
55 import org.openscience.cdk.interfaces.IStrand;
56 import org.openscience.cdk.interfaces.ITetrahedralChirality.Stereo;
57 import org.openscience.cdk.stereo.TetrahedralChirality;
58 import org.openscience.cdk.tools.ILoggingTool;
59 import org.openscience.cdk.tools.LoggingToolFactory;
60 import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
61 import org.openscience.cdk.tools.manipulator.BondManipulator;
62 import org.openscience.cdk.tools.periodictable.PeriodicTable;
63 import org.xml.sax.Attributes;
64 
65 /**
66  * Core CML 1.x and 2.x elements are parsed by this class (see {@cdk.cite WIL01}).
67  *
68  * <p>Please file a bug report if this parser fails to parse
69  * a certain element or attribute value in a valid CML document.
70  *
71  * @cdk.module io
72  * @cdk.githash
73  *
74  * @author Egon Willighagen &lt;egonw@sci.kun.nl&gt;
75  **/
76 public class CMLCoreModule implements ICMLModule {
77 
78     protected ILoggingTool                     logger;
79     protected final String                     SYSTEMID         = "CML-1999-05-15";
80     //    protected IChemicalDocumentObject cdo;
81 
82     // data model to store things into
83     protected IChemFile                        currentChemFile;
84 
85     protected IAtomContainer                   currentMolecule;
86     protected IAtomContainerSet                currentMoleculeSet;
87     protected IChemModel                       currentChemModel;
88     protected IChemSequence                    currentChemSequence;
89     protected IReactionSet                     currentReactionSet;
90     protected IReaction                        currentReaction;
91     protected IAtom                            currentAtom;
92     protected IBond                            currentBond;
93     protected IStrand                          currentStrand;
94     protected IMonomer                         currentMonomer;
95     protected Map<String, IAtom>               atomEnumeration;
96     protected List<String>                     moleculeCustomProperty;
97 
98     // helper fields
99     protected int                              formulaCounter;
100     protected int                              atomCounter;
101     protected List<String>                     elsym;
102     protected List<String>                     eltitles;
103     protected List<String>                     elid;
104     protected List<String>                     formula;
105     protected List<String>                     formalCharges;
106     protected List<String>                     partialCharges;
107     protected List<String>                     isotope;
108     protected List<String>                     atomicNumbers;
109     protected List<String>                     exactMasses;
110     protected List<String>                     x3;
111     protected List<String>                     y3;
112     protected List<String>                     z3;
113     protected List<String>                     x2;
114     protected List<String>                     y2;
115     protected List<String>                     xfract;
116     protected List<String>                     yfract;
117     protected List<String>                     zfract;
118     protected List<String>                     hCounts;
119     protected List<String>                     atomParities;
120     protected List<String>                     parityARef1;
121     protected List<String>                     parityARef2;
122     protected List<String>                     parityARef3;
123     protected List<String>                     parityARef4;
124     protected List<String>                     atomDictRefs;
125     protected List<String>                     atomAromaticities;
126     protected List<String>                     spinMultiplicities;
127     protected List<String>                     occupancies;
128     protected Map<Integer, List<String>>       atomCustomProperty;
129     protected boolean                          parityAtomsGiven;
130     protected boolean                          parityGiven;
131 
132     protected int                              bondCounter;
133     protected List<String>                     bondid;
134     protected List<String>                     bondARef1;
135     protected List<String>                     bondARef2;
136     protected List<String>                     order;
137     protected List<String>                     bondStereo;
138     protected List<String>                     bondDictRefs;
139     protected List<String>                     bondElid;
140     protected List<Boolean>                    bondAromaticity;
141     protected Map<String, Map<String, String>> bondCustomProperty;
142     protected boolean                          stereoGiven;
143     protected String                           inchi;
144     protected int                              curRef;
145     protected int                              CurrentElement;
146     protected String                           BUILTIN;
147     protected String                           DICTREF;
148     protected String                           elementTitle;
149     protected String                           currentChars;
150 
151     protected double[]                         unitcellparams;
152     protected int                              crystalScalar;
153 
154     //    private Vector3d aAxis;
155     //    private Vector3d bAxis;
156     //    private Vector3d cAxis;
157     boolean                                    cartesianAxesSet = false;
158 
CMLCoreModule(IChemFile chemFile)159     public CMLCoreModule(IChemFile chemFile) {
160         logger = LoggingToolFactory.createLoggingTool(CMLCoreModule.class);
161         this.currentChemFile = chemFile;
162     }
163 
CMLCoreModule(ICMLModule conv)164     public CMLCoreModule(ICMLModule conv) {
165         logger = LoggingToolFactory.createLoggingTool(CMLCoreModule.class);
166         inherit(conv);
167     }
168 
169     @Override
inherit(ICMLModule convention)170     public void inherit(ICMLModule convention) {
171         if (convention instanceof CMLCoreModule) {
172             CMLCoreModule conv = (CMLCoreModule) convention;
173 
174             // copy the data model
175             this.currentChemFile = conv.currentChemFile;
176             this.currentMolecule = conv.currentMolecule;
177             this.currentMoleculeSet = conv.currentMoleculeSet;
178             this.currentChemModel = conv.currentChemModel;
179             this.currentChemSequence = conv.currentChemSequence;
180             this.currentReactionSet = conv.currentReactionSet;
181             this.currentReaction = conv.currentReaction;
182             this.currentAtom = conv.currentAtom;
183             this.currentStrand = conv.currentStrand;
184             this.currentMonomer = conv.currentMonomer;
185             this.atomEnumeration = conv.atomEnumeration;
186             this.moleculeCustomProperty = conv.moleculeCustomProperty;
187 
188             // copy the intermediate fields
189             this.logger = conv.logger;
190             this.BUILTIN = conv.BUILTIN;
191             this.atomCounter = conv.atomCounter;
192             this.formulaCounter = conv.formulaCounter;
193             this.elsym = conv.elsym;
194             this.eltitles = conv.eltitles;
195             this.elid = conv.elid;
196             this.formalCharges = conv.formalCharges;
197             this.partialCharges = conv.partialCharges;
198             this.isotope = conv.isotope;
199             this.atomicNumbers = conv.atomicNumbers;
200             this.exactMasses = conv.exactMasses;
201             this.x3 = conv.x3;
202             this.y3 = conv.y3;
203             this.z3 = conv.z3;
204             this.x2 = conv.x2;
205             this.y2 = conv.y2;
206             this.xfract = conv.xfract;
207             this.yfract = conv.yfract;
208             this.zfract = conv.zfract;
209             this.hCounts = conv.hCounts;
210             this.atomParities = conv.atomParities;
211             this.parityARef1 = conv.parityARef1;
212             this.parityARef2 = conv.parityARef2;
213             this.parityARef3 = conv.parityARef3;
214             this.parityARef4 = conv.parityARef4;
215             this.atomDictRefs = conv.atomDictRefs;
216             this.atomAromaticities = conv.atomAromaticities;
217             this.spinMultiplicities = conv.spinMultiplicities;
218             this.occupancies = conv.occupancies;
219             this.bondCounter = conv.bondCounter;
220             this.bondid = conv.bondid;
221             this.bondARef1 = conv.bondARef1;
222             this.bondARef2 = conv.bondARef2;
223             this.order = conv.order;
224             this.bondStereo = conv.bondStereo;
225             this.bondCustomProperty = conv.bondCustomProperty;
226             this.atomCustomProperty = conv.atomCustomProperty;
227             this.bondDictRefs = conv.bondDictRefs;
228             this.bondAromaticity = conv.bondAromaticity;
229             this.curRef = conv.curRef;
230             this.unitcellparams = conv.unitcellparams;
231             this.inchi = conv.inchi;
232         } else {
233             logger.warn("Cannot inherit information from module: ", convention.getClass().getName());
234         }
235     }
236 
237     @Override
returnChemFile()238     public IChemFile returnChemFile() {
239         return currentChemFile;
240     }
241 
242     /**
243      * Clean all data about parsed data.
244      */
newMolecule()245     protected void newMolecule() {
246         newMoleculeData();
247         newAtomData();
248         newBondData();
249         newCrystalData();
250         newFormulaData();
251     }
252 
253     /**
254      * Clean all data about the molecule itself.
255      */
newMoleculeData()256     protected void newMoleculeData() {
257         this.inchi = null;
258     }
259 
260     /**
261      * Clean all data about read formulas.
262      */
newFormulaData()263     protected void newFormulaData() {
264         formulaCounter = 0;
265         formula = new ArrayList<String>();
266     }
267 
268     /**
269      * Clean all data about read atoms.
270      */
newAtomData()271     protected void newAtomData() {
272         atomCounter = 0;
273         elsym = new ArrayList<String>();
274         elid = new ArrayList<String>();
275         eltitles = new ArrayList<String>();
276         formalCharges = new ArrayList<String>();
277         partialCharges = new ArrayList<String>();
278         isotope = new ArrayList<String>();
279         atomicNumbers = new ArrayList<String>();
280         exactMasses = new ArrayList<String>();
281         x3 = new ArrayList<String>();
282         y3 = new ArrayList<String>();
283         z3 = new ArrayList<String>();
284         x2 = new ArrayList<String>();
285         y2 = new ArrayList<String>();
286         xfract = new ArrayList<String>();
287         yfract = new ArrayList<String>();
288         zfract = new ArrayList<String>();
289         hCounts = new ArrayList<String>();
290         atomParities = new ArrayList<String>();
291         parityARef1 = new ArrayList<String>();
292         parityARef2 = new ArrayList<String>();
293         parityARef3 = new ArrayList<String>();
294         parityARef4 = new ArrayList<String>();
295         atomAromaticities = new ArrayList<String>();
296         atomDictRefs = new ArrayList<String>();
297         spinMultiplicities = new ArrayList<String>();
298         occupancies = new ArrayList<String>();
299         atomCustomProperty = new HashMap<Integer, List<String>>();
300     }
301 
302     /**
303      * Clean all data about read bonds.
304      */
newBondData()305     protected void newBondData() {
306         bondCounter = 0;
307         bondid = new ArrayList<String>();
308         bondARef1 = new ArrayList<String>();
309         bondARef2 = new ArrayList<String>();
310         order = new ArrayList<String>();
311         bondStereo = new ArrayList<String>();
312         bondCustomProperty = new Hashtable<String, Map<String, String>>();
313         bondDictRefs = new ArrayList<String>();
314         bondElid = new ArrayList<String>();
315         bondAromaticity = new ArrayList<Boolean>();
316     }
317 
318     /**
319      * Clean all data about read bonds.
320      */
newCrystalData()321     protected void newCrystalData() {
322         unitcellparams = new double[6];
323         cartesianAxesSet = false;
324         crystalScalar = 0;
325         //        aAxis = new Vector3d();
326         //        bAxis = new Vector3d();
327         //        cAxis = new Vector3d();
328     }
329 
330     @Override
startDocument()331     public void startDocument() {
332         logger.info("Start XML Doc");
333         // cdo.startDocument();
334         currentChemSequence = currentChemFile.getBuilder().newInstance(IChemSequence.class);
335         currentChemModel = currentChemFile.getBuilder().newInstance(IChemModel.class);
336         currentMoleculeSet = currentChemFile.getBuilder().newInstance(IAtomContainerSet.class);
337         currentMolecule = currentChemFile.getBuilder().newInstance(IAtomContainer.class);
338         atomEnumeration = new HashMap<String, IAtom>();
339         moleculeCustomProperty = new ArrayList<String>();
340 
341         newMolecule();
342         BUILTIN = "";
343         curRef = 0;
344     }
345 
346     @Override
endDocument()347     public void endDocument() {
348         //        cdo.endDocument();
349         if (currentReactionSet != null && currentReactionSet.getReactionCount() == 0 && currentReaction != null) {
350             logger.debug("Adding reaction to ReactionSet");
351             currentReactionSet.addReaction(currentReaction);
352         }
353         if (currentReactionSet != null && currentChemModel.getReactionSet() == null) {
354             logger.debug("Adding SOR to ChemModel");
355             currentChemModel.setReactionSet(currentReactionSet);
356         }
357         if (currentMoleculeSet != null && currentMoleculeSet.getAtomContainerCount() != 0) {
358             logger.debug("Adding reaction to MoleculeSet");
359             currentChemModel.setMoleculeSet(currentMoleculeSet);
360         }
361         if (currentChemSequence.getChemModelCount() == 0) {
362             logger.debug("Adding ChemModel to ChemSequence");
363             currentChemSequence.addChemModel(currentChemModel);
364         }
365         if (currentChemFile.getChemSequenceCount() == 0) {
366             // assume there is one non-animation ChemSequence
367             //    		addChemSequence(currentChemSequence);
368             currentChemFile.addChemSequence(currentChemSequence);
369         }
370 
371         logger.info("End XML Doc");
372     }
373 
374     @Override
startElement(CMLStack xpath, String uri, String local, String raw, Attributes atts)375     public void startElement(CMLStack xpath, String uri, String local, String raw, Attributes atts) {
376         String name = local;
377         logger.debug("StartElement");
378         currentChars = "";
379 
380         BUILTIN = "";
381         DICTREF = "";
382 
383         for (int i = 0; i < atts.getLength(); i++) {
384             String qname = atts.getQName(i);
385             if (qname.equals("builtin")) {
386                 BUILTIN = atts.getValue(i);
387                 logger.debug(name, "->BUILTIN found: ", atts.getValue(i));
388             } else if (qname.equals("dictRef")) {
389                 DICTREF = atts.getValue(i);
390                 logger.debug(name, "->DICTREF found: ", atts.getValue(i));
391             } else if (qname.equals("title")) {
392                 elementTitle = atts.getValue(i);
393                 logger.debug(name, "->TITLE found: ", atts.getValue(i));
394             } else {
395                 logger.debug("Qname: ", qname);
396             }
397         }
398 
399         if ("atom".equals(name)) {
400             atomCounter++;
401             for (int i = 0; i < atts.getLength(); i++) {
402 
403                 String att = atts.getQName(i);
404                 String value = atts.getValue(i);
405 
406                 if (att.equals("id")) { // this is supported in CML 1.x
407                     elid.add(value);
408                 } // this is supported in CML 2.0
409                 else if (att.equals("elementType")) {
410                     elsym.add(value);
411                 } // this is supported in CML 2.0
412                 else if (att.equals("title")) {
413                     eltitles.add(value);
414                 } // this is supported in CML 2.0
415                 else if (att.equals("x2")) {
416                     x2.add(value);
417                 } // this is supported in CML 2.0
418                 else if (att.equals("xy2")) {
419                     StringTokenizer tokenizer = new StringTokenizer(value);
420                     x2.add(tokenizer.nextToken());
421                     y2.add(tokenizer.nextToken());
422                 } // this is supported in CML 2.0
423                 else if (att.equals("xyzFract")) {
424                     StringTokenizer tokenizer = new StringTokenizer(value);
425                     xfract.add(tokenizer.nextToken());
426                     yfract.add(tokenizer.nextToken());
427                     zfract.add(tokenizer.nextToken());
428                 } // this is supported in CML 2.0
429                 else if (att.equals("xyz3")) {
430                     StringTokenizer tokenizer = new StringTokenizer(value);
431                     x3.add(tokenizer.nextToken());
432                     y3.add(tokenizer.nextToken());
433                     z3.add(tokenizer.nextToken());
434                 } // this is supported in CML 2.0
435                 else if (att.equals("y2")) {
436                     y2.add(value);
437                 } // this is supported in CML 2.0
438                 else if (att.equals("x3")) {
439                     x3.add(value);
440                 } // this is supported in CML 2.0
441                 else if (att.equals("y3")) {
442                     y3.add(value);
443                 } // this is supported in CML 2.0
444                 else if (att.equals("z3")) {
445                     z3.add(value);
446                 } // this is supported in CML 2.0
447                 else if (att.equals("xFract")) {
448                     xfract.add(value);
449                 } // this is supported in CML 2.0
450                 else if (att.equals("yFract")) {
451                     yfract.add(value);
452                 } // this is supported in CML 2.0
453                 else if (att.equals("zFract")) {
454                     zfract.add(value);
455                 } // this is supported in CML 2.0
456                 else if (att.equals("formalCharge")) {
457                     formalCharges.add(value);
458                 } // this is supported in CML 2.0
459                 else if (att.equals("hydrogenCount")) {
460                     hCounts.add(value);
461                 } else if (att.equals("isotopeNumber")) {
462                     isotope.add(value);
463                 } else if (att.equals("dictRef")) {
464                     logger.debug("occupancy: " + value);
465                     atomDictRefs.add(value);
466                 } else if (att.equals("spinMultiplicity")) {
467                     spinMultiplicities.add(value);
468                 } else if (att.equals("occupancy")) {
469                     occupancies.add(value);
470                 }
471 
472                 else {
473                     logger.warn("Unparsed attribute: " + att);
474                 }
475 
476                 parityAtomsGiven = false;
477                 parityGiven = false;
478             }
479         } else if ("atomArray".equals(name) && !xpath.endsWith("formula", "atomArray")) {
480             boolean atomsCounted = false;
481             for (int i = 0; i < atts.getLength(); i++) {
482                 String att = atts.getQName(i);
483                 int count = 0;
484                 if (att.equals("atomID")) {
485                     count = addArrayElementsTo(elid, atts.getValue(i));
486                 } else if (att.equals("elementType")) {
487                     count = addArrayElementsTo(elsym, atts.getValue(i));
488                 } else if (att.equals("x2")) {
489                     count = addArrayElementsTo(x2, atts.getValue(i));
490                 } else if (att.equals("y2")) {
491                     count = addArrayElementsTo(y2, atts.getValue(i));
492                 } else if (att.equals("x3")) {
493                     count = addArrayElementsTo(x3, atts.getValue(i));
494                 } else if (att.equals("y3")) {
495                     count = addArrayElementsTo(y3, atts.getValue(i));
496                 } else if (att.equals("z3")) {
497                     count = addArrayElementsTo(z3, atts.getValue(i));
498                 } else if (att.equals("xFract")) {
499                     count = addArrayElementsTo(xfract, atts.getValue(i));
500                 } else if (att.equals("yFract")) {
501                     count = addArrayElementsTo(yfract, atts.getValue(i));
502                 } else if (att.equals("zFract")) {
503                     count = addArrayElementsTo(zfract, atts.getValue(i));
504                 } else {
505                     logger.warn("Unparsed attribute: " + att);
506                 }
507                 if (!atomsCounted) {
508                     atomCounter += count;
509                     atomsCounted = true;
510                 }
511             }
512         } else if ("atomParity".equals(name)) {
513             for (int i = 0; i < atts.getLength(); i++) {
514                 String att = atts.getQName(i);
515                 if (att.equals("atomRefs4") && !parityAtomsGiven) {
516                     //Expect exactly four references
517                     try {
518                         StringTokenizer st = new StringTokenizer(atts.getValue(i));
519                         parityARef1.add((String) st.nextElement());
520                         parityARef2.add((String) st.nextElement());
521                         parityARef3.add((String) st.nextElement());
522                         parityARef4.add((String) st.nextElement());
523                         parityAtomsGiven = true;
524                     } catch (Exception e) {
525                         logger.error("Error in CML file: ", e.getMessage());
526                         logger.debug(e);
527                     }
528                 }
529             }
530         } else if ("bond".equals(name)) {
531             bondCounter++;
532             for (int i = 0; i < atts.getLength(); i++) {
533                 String att = atts.getQName(i);
534                 logger.debug("B2 ", att, "=", atts.getValue(i));
535 
536                 if (att.equals("id")) {
537                     bondid.add(atts.getValue(i));
538                     logger.debug("B3 ", bondid);
539                 } else if (att.equals("atomRefs") || // this is CML 1.x support
540                         att.equals("atomRefs2")) { // this is CML 2.0 support
541 
542                     // expect exactly two references
543                     try {
544                         StringTokenizer st = new StringTokenizer(atts.getValue(i));
545                         bondARef1.add((String) st.nextElement());
546                         bondARef2.add((String) st.nextElement());
547                     } catch (Exception e) {
548                         logger.error("Error in CML file: ", e.getMessage());
549                         logger.debug(e);
550                     }
551                 } else if (att.equals("order")) { // this is CML 2.0 support
552                     order.add(atts.getValue(i).trim());
553                 } else if (att.equals("dictRef")) {
554                     bondDictRefs.add(atts.getValue(i).trim());
555                 }
556             }
557 
558             stereoGiven = false;
559             curRef = 0;
560         } else if ("bondArray".equals(name)) {
561             boolean bondsCounted = false;
562             for (int i = 0; i < atts.getLength(); i++) {
563                 String att = atts.getQName(i);
564                 int count = 0;
565                 if (att.equals("bondID")) {
566                     count = addArrayElementsTo(bondid, atts.getValue(i));
567                 } else if (att.equals("atomRefs1")) {
568                     count = addArrayElementsTo(bondARef1, atts.getValue(i));
569                 } else if (att.equals("atomRefs2")) {
570                     count = addArrayElementsTo(bondARef2, atts.getValue(i));
571                 } else if (att.equals("atomRef1")) {
572                     count = addArrayElementsTo(bondARef1, atts.getValue(i));
573                 } else if (att.equals("atomRef2")) {
574                     count = addArrayElementsTo(bondARef2, atts.getValue(i));
575                 } else if (att.equals("order")) {
576                     count = addArrayElementsTo(order, atts.getValue(i));
577                 } else {
578                     logger.warn("Unparsed attribute: " + att);
579                 }
580                 if (!bondsCounted) {
581                     bondCounter += count;
582                     bondsCounted = true;
583                 }
584             }
585             curRef = 0;
586         } else if ("bondStereo".equals(name)) {
587             for (int i = 0; i < atts.getLength(); i++) {
588                 if (atts.getQName(i).equals("dictRef")) {
589                     String value = atts.getValue(i);
590                     if (value.startsWith("cml:") && value.length() > 4) {
591                         bondStereo.add(value.substring(4));
592                         stereoGiven = true;
593                     }
594                 }
595             }
596         } else if ("bondType".equals(name)) {
597             for (int i = 0; i < atts.getLength(); i++) {
598                 if (atts.getQName(i).equals("dictRef")) {
599                     if (atts.getValue(i).equals("cdk:aromaticBond")) bondAromaticity.add(Boolean.TRUE);
600                 }
601             }
602         } else if ("molecule".equals(name)) {
603             newMolecule();
604             BUILTIN = "";
605             //            cdo.startObject("Molecule");
606             if (currentChemModel == null)
607                 currentChemModel = currentChemFile.getBuilder().newInstance(IChemModel.class);
608             if (currentMoleculeSet == null)
609                 currentMoleculeSet = currentChemFile.getBuilder().newInstance(IAtomContainerSet.class);
610             currentMolecule = currentChemFile.getBuilder().newInstance(IAtomContainer.class);
611             for (int i = 0; i < atts.getLength(); i++) {
612                 if (atts.getQName(i).equals("id")) {
613                     //                    cdo.setObjectProperty("Molecule", "id", atts.getValue(i));
614                     currentMolecule.setID(atts.getValue(i));
615                 } else if (atts.getQName(i).equals("dictRef")) {
616                     //                	cdo.setObjectProperty("Molecule", "dictRef", atts.getValue(i));
617                     currentMolecule.setProperty(new DictRef(DICTREF, atts.getValue(i)), atts.getValue(i));
618                 }
619             }
620         } else if ("crystal".equals(name)) {
621             newCrystalData();
622             //            cdo.startObject("Crystal");
623             currentMolecule = currentChemFile.getBuilder().newInstance(ICrystal.class, currentMolecule);
624             for (int i = 0; i < atts.getLength(); i++) {
625                 String att = atts.getQName(i);
626                 if (att.equals("z")) {
627                     //                    cdo.setObjectProperty("Crystal", "z", atts.getValue(i));
628                     ((ICrystal) currentMolecule).setZ(Integer.parseInt(atts.getValue(i)));
629                 }
630             }
631         } else if ("symmetry".equals(name)) {
632             for (int i = 0; i < atts.getLength(); i++) {
633                 String att = atts.getQName(i);
634                 if (att.equals("spaceGroup")) {
635                     //                    cdo.setObjectProperty("Crystal", "spacegroup", atts.getValue(i));
636                     ((ICrystal) currentMolecule).setSpaceGroup(atts.getValue(i));
637                 }
638             }
639         } else if ("identifier".equals(name)) {
640             if (atts.getValue("convention") != null && atts.getValue("convention").equals("iupac:inchi")
641                     && atts.getValue("value") != null) {
642                 //                cdo.setObjectProperty("Molecule", "inchi", atts.getValue("value"));
643                 currentMolecule.setProperty(CDKConstants.INCHI, atts.getValue("value"));
644             }
645         } else if ("scalar".equals(name)) {
646             if (xpath.endsWith("crystal", "scalar")) crystalScalar++;
647         } else if ("label".equals(name)) {
648             if (xpath.endsWith("atomType", "label")) {
649                 //            	cdo.setObjectProperty("Atom", "atomTypeLabel", atts.getValue("value"));
650                 currentAtom.setAtomTypeName(atts.getValue("value"));
651             }
652         } else if ("list".equals(name)) {
653             //            cdo.startObject("MoleculeSet");
654             if (DICTREF.equals("cdk:model")) {
655                 currentChemModel = currentChemFile.getBuilder().newInstance(IChemModel.class);
656                 // see if there is an ID attribute
657                 for (int i = 0; i < atts.getLength(); i++) {
658                     String att = atts.getQName(i);
659                     if (att.equals("id")) {
660                         currentChemModel.setID(atts.getValue(i));
661                     }
662                 }
663             } else if (DICTREF.equals("cdk:moleculeSet")) {
664                 currentMoleculeSet = currentChemFile.getBuilder().newInstance(IAtomContainerSet.class);
665                 // see if there is an ID attribute
666                 for (int i = 0; i < atts.getLength(); i++) {
667                     String att = atts.getQName(i);
668                     if (att.equals("id")) {
669                         currentMoleculeSet.setID(atts.getValue(i));
670                     }
671                 }
672                 currentMolecule = currentChemFile.getBuilder().newInstance(IAtomContainer.class);
673             } else {
674                 // the old default
675                 currentMoleculeSet = currentChemFile.getBuilder().newInstance(IAtomContainerSet.class);
676                 // see if there is an ID attribute
677                 for (int i = 0; i < atts.getLength(); i++) {
678                     String att = atts.getQName(i);
679                     if (att.equals("id")) {
680                         currentMoleculeSet.setID(atts.getValue(i));
681                     }
682                 }
683                 currentMolecule = currentChemFile.getBuilder().newInstance(IAtomContainer.class);
684             }
685         } else if ("formula".equals(name)) {
686             formulaCounter++;
687             for (int i = 0; i < atts.getLength(); i++) {
688                 String att = atts.getQName(i);
689                 String value = atts.getValue(i);
690                 if (att.equals("concise")) {
691                     formula.add(value);
692                 }
693             }
694         }
695     }
696 
697     @Override
endElement(CMLStack xpath, String uri, String name, String raw)698     public void endElement(CMLStack xpath, String uri, String name, String raw) {
699         logger.debug("EndElement: ", name);
700 
701         String cData = currentChars;
702         if ("bond".equals(name)) {
703             if (!stereoGiven) bondStereo.add("");
704             if (bondCounter > bondDictRefs.size()) bondDictRefs.add(null);
705             if (bondCounter > bondAromaticity.size()) bondAromaticity.add(null);
706         } else if ("atom".equals(name)) {
707             if (atomCounter > eltitles.size()) {
708                 eltitles.add(null);
709             }
710             if (atomCounter > hCounts.size()) {
711                 hCounts.add(null);
712             }
713             if (atomCounter > atomDictRefs.size()) {
714                 atomDictRefs.add(null);
715             }
716             if (atomCounter > atomAromaticities.size()) {
717                 atomAromaticities.add(null);
718             }
719             if (atomCounter > isotope.size()) {
720                 isotope.add(null);
721             }
722             if (atomCounter > atomicNumbers.size()) {
723                 atomicNumbers.add(null);
724             }
725             if (atomCounter > exactMasses.size()) {
726                 exactMasses.add(null);
727             }
728             if (atomCounter > spinMultiplicities.size()) {
729                 spinMultiplicities.add(null);
730             }
731             if (atomCounter > occupancies.size()) {
732                 occupancies.add(null);
733             }
734             if (atomCounter > formalCharges.size()) {
735                 /*
736                  * while strictly undefined, assume zero formal charge when no
737                  * number is given
738                  */
739                 formalCharges.add("0");
740             }
741             if (!parityGiven) {
742                 atomParities.add("");
743             }
744             if (!parityAtomsGiven) {
745                 parityARef1.add("");
746                 parityARef2.add("");
747                 parityARef3.add("");
748                 parityARef4.add("");
749             }
750             /*
751              * It may happen that not all atoms have associated 2D or 3D
752              * coordinates. accept that
753              */
754             if (atomCounter > x2.size() && x2.size() != 0) {
755                 /*
756                  * apparently, the previous atoms had atomic coordinates, add
757                  * 'null' for this atom
758                  */
759                 x2.add(null);
760                 y2.add(null);
761             }
762             if (atomCounter > x3.size() && x3.size() != 0) {
763                 /*
764                  * apparently, the previous atoms had atomic coordinates, add
765                  * 'null' for this atom
766                  */
767                 x3.add(null);
768                 y3.add(null);
769                 z3.add(null);
770             }
771 
772             if (atomCounter > xfract.size() && xfract.size() != 0) {
773                 /*
774                  * apparently, the previous atoms had atomic coordinates, add
775                  * 'null' for this atom
776                  */
777                 xfract.add(null);
778                 yfract.add(null);
779                 zfract.add(null);
780             }
781         } else if ("molecule".equals(name)) {
782             storeData();
783             //            cdo.endObject("Molecule");
784             if (currentMolecule instanceof ICrystal) {
785                 logger.debug("Adding crystal to chemModel");
786                 currentChemModel.setCrystal((ICrystal) currentMolecule);
787                 currentChemSequence.addChemModel(currentChemModel);
788             } else if (currentMolecule instanceof IAtomContainer) {
789                 logger.debug("Adding molecule to set");
790                 currentMoleculeSet.addAtomContainer(currentMolecule);
791                 logger.debug("#mols in set: " + currentMoleculeSet.getAtomContainerCount());
792             }
793         } else if ("crystal".equals(name)) {
794             if (crystalScalar > 0) {
795                 // convert unit cell parameters to cartesians
796                 Vector3d[] axes = CrystalGeometryTools.notionalToCartesian(unitcellparams[0], unitcellparams[1],
797                         unitcellparams[2], unitcellparams[3], unitcellparams[4], unitcellparams[5]);
798                 cartesianAxesSet = true;
799                 //                cdo.startObject("a-axis");
800                 //                cdo.setObjectProperty("a-axis", "x", new Double(aAxis.x).toString());
801                 //                cdo.setObjectProperty("a-axis", "y", new Double(aAxis.y).toString());
802                 //                cdo.setObjectProperty("a-axis", "z", new Double(aAxis.z).toString());
803                 //                cdo.endObject("a-axis");
804                 //                cdo.startObject("b-axis");
805                 //                cdo.setObjectProperty("b-axis", "x", new Double(bAxis.x).toString());
806                 //                cdo.setObjectProperty("b-axis", "y", new Double(bAxis.y).toString());
807                 //                cdo.setObjectProperty("b-axis", "z", new Double(bAxis.z).toString());
808                 //                cdo.endObject("b-axis");
809                 //                cdo.startObject("c-axis");
810                 //                cdo.setObjectProperty("c-axis", "x", new Double(cAxis.x).toString());
811                 //                cdo.setObjectProperty("c-axis", "y", new Double(cAxis.y).toString());
812                 //                cdo.setObjectProperty("c-axis", "z", new Double(cAxis.z).toString());
813                 //                cdo.endObject("c-axis");
814                 ((ICrystal) currentMolecule).setA(axes[0]);
815                 ((ICrystal) currentMolecule).setB(axes[1]);
816                 ((ICrystal) currentMolecule).setC(axes[2]);
817             } else {
818                 logger.error("Could not find crystal unit cell parameters");
819             }
820             //            cdo.endObject("Crystal");
821         } else if ("list".equals(name)) {
822             //            cdo.endObject("MoleculeSet");
823             // FIXME: I really should check the DICTREF, but there is currently
824             // no mechanism for storing these for use with endTag() :(
825             // So, instead, for now, just see if it already has done the setting
826             // to work around duplication
827             if (currentChemModel.getMoleculeSet() != currentMoleculeSet) {
828                 currentChemModel.setMoleculeSet(currentMoleculeSet);
829                 currentChemSequence.addChemModel(currentChemModel);
830             }
831         } else if ("coordinate3".equals(name)) {
832             if (BUILTIN.equals("xyz3")) {
833                 logger.debug("New coord3 xyz3 found: ", currentChars);
834 
835                 try {
836 
837                     StringTokenizer st = new StringTokenizer(currentChars);
838                     x3.add(st.nextToken());
839                     y3.add(st.nextToken());
840                     z3.add(st.nextToken());
841                     logger.debug("coord3 x3.length: ", x3.size());
842                     logger.debug("coord3 y3.length: ", y3.size());
843                     logger.debug("coord3 z3.length: ", z3.size());
844                 } catch (Exception exception) {
845                     logger.error("CMLParsing error while setting coordinate3!");
846                     logger.debug(exception);
847                 }
848             } else {
849                 logger.warn("Unknown coordinate3 BUILTIN: " + BUILTIN);
850             }
851         } else if ("string".equals(name)) {
852             if (BUILTIN.equals("elementType")) {
853                 logger.debug("Element: ", cData.trim());
854                 elsym.add(cData);
855             } else if (BUILTIN.equals("atomRef")) {
856                 curRef++;
857                 logger.debug("Bond: ref #", curRef);
858 
859                 if (curRef == 1) {
860                     bondARef1.add(cData.trim());
861                 } else if (curRef == 2) {
862                     bondARef2.add(cData.trim());
863                 }
864             } else if (BUILTIN.equals("order")) {
865                 logger.debug("Bond: order ", cData.trim());
866                 order.add(cData.trim());
867             } else if (BUILTIN.equals("formalCharge")) {
868                 // NOTE: this combination is in violation of the CML DTD!!!
869                 logger.warn("formalCharge BUILTIN accepted but violating CML DTD");
870                 logger.debug("Charge: ", cData.trim());
871                 String charge = cData.trim();
872                 if (charge.startsWith("+") && charge.length() > 1) {
873                     charge = charge.substring(1);
874                 }
875                 formalCharges.add(charge);
876             }
877         } else if ("bondStereo".equals(name)) {
878             if (!currentChars.isEmpty() && !stereoGiven) {
879                 bondStereo.add(currentChars);
880                 stereoGiven = Boolean.TRUE;
881             }
882         } else if ("atomParity".equals(name)) {
883             if (!currentChars.isEmpty() && !parityGiven && parityAtomsGiven) {
884                 atomParities.add(currentChars);
885                 parityGiven = Boolean.TRUE;
886             }
887         } else if ("float".equals(name)) {
888             if (BUILTIN.equals("x3")) {
889                 x3.add(cData.trim());
890             } else if (BUILTIN.equals("y3")) {
891                 y3.add(cData.trim());
892             } else if (BUILTIN.equals("z3")) {
893                 z3.add(cData.trim());
894             } else if (BUILTIN.equals("x2")) {
895                 x2.add(cData.trim());
896             } else if (BUILTIN.equals("y2")) {
897                 y2.add(cData.trim());
898             } else if (BUILTIN.equals("order")) {
899                 // NOTE: this combination is in violation of the CML DTD!!!
900                 order.add(cData.trim());
901             } else if (BUILTIN.equals("charge") || BUILTIN.equals("partialCharge")) {
902                 partialCharges.add(cData.trim());
903             }
904         } else if ("integer".equals(name)) {
905             if (BUILTIN.equals("formalCharge")) {
906                 formalCharges.add(cData.trim());
907             }
908         } else if ("coordinate2".equals(name)) {
909             if (BUILTIN.equals("xy2")) {
910                 logger.debug("New coord2 xy2 found.", cData);
911 
912                 try {
913 
914                     StringTokenizer st = new StringTokenizer(cData);
915                     x2.add(st.nextToken());
916                     y2.add(st.nextToken());
917                 } catch (Exception e) {
918                     notify("CMLParsing error: " + e, SYSTEMID, 175, 1);
919                 }
920             }
921         } else if ("stringArray".equals(name)) {
922             if (BUILTIN.equals("id") || BUILTIN.equals("atomId") || BUILTIN.equals("atomID")) { // invalid according to CML1 DTD but found in OpenBabel 1.x output
923 
924                 try {
925                     boolean countAtoms = (atomCounter == 0) ? true : false;
926                     StringTokenizer st = new StringTokenizer(cData);
927 
928                     while (st.hasMoreTokens()) {
929                         if (countAtoms) {
930                             atomCounter++;
931                         }
932                         String token = st.nextToken();
933                         logger.debug("StringArray (Token): ", token);
934                         elid.add(token);
935                     }
936                 } catch (Exception e) {
937                     notify("CMLParsing error: " + e, SYSTEMID, 186, 1);
938                 }
939             } else if (BUILTIN.equals("elementType")) {
940 
941                 try {
942                     boolean countAtoms = (atomCounter == 0) ? true : false;
943                     StringTokenizer st = new StringTokenizer(cData);
944 
945                     while (st.hasMoreTokens()) {
946                         if (countAtoms) {
947                             atomCounter++;
948                         }
949                         elsym.add(st.nextToken());
950                     }
951                 } catch (Exception e) {
952                     notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
953                 }
954             } else if (BUILTIN.equals("atomRefs")) {
955                 curRef++;
956                 logger.debug("New atomRefs found: ", curRef);
957 
958                 try {
959                     boolean countBonds = (bondCounter == 0) ? true : false;
960                     StringTokenizer st = new StringTokenizer(cData);
961 
962                     while (st.hasMoreTokens()) {
963                         if (countBonds) {
964                             bondCounter++;
965                         }
966                         String token = st.nextToken();
967                         logger.debug("Token: ", token);
968 
969                         if (curRef == 1) {
970                             bondARef1.add(token);
971                         } else if (curRef == 2) {
972                             bondARef2.add(token);
973                         }
974                     }
975                 } catch (Exception e) {
976                     notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
977                 }
978             } else if (BUILTIN.equals("atomRef")) {
979                 curRef++;
980                 logger.debug("New atomRef found: ", curRef); // this is CML1 stuff, we get things like:
981                 /*
982                  * <bondArray> <stringArray builtin="atomRef">a2 a2 a2 a2 a3 a3
983                  * a4 a4 a5 a6 a7 a9</stringArray> <stringArray
984                  * builtin="atomRef">a9 a11 a12 a13 a5 a4 a6 a9 a7 a8 a8
985                  * a10</stringArray> <stringArray builtin="order">1 1 1 1 2 1 2
986                  * 1 1 1 2 2</stringArray> </bondArray>
987                  */
988 
989                 try {
990                     boolean countBonds = (bondCounter == 0) ? true : false;
991                     StringTokenizer st = new StringTokenizer(cData);
992 
993                     while (st.hasMoreTokens()) {
994                         if (countBonds) {
995                             bondCounter++;
996                         }
997                         String token = st.nextToken();
998                         logger.debug("Token: ", token);
999 
1000                         if (curRef == 1) {
1001                             bondARef1.add(token);
1002                         } else if (curRef == 2) {
1003                             bondARef2.add(token);
1004                         }
1005                     }
1006                 } catch (Exception e) {
1007                     notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
1008                 }
1009             } else if (BUILTIN.equals("order")) {
1010                 logger.debug("New bond order found.");
1011 
1012                 try {
1013 
1014                     StringTokenizer st = new StringTokenizer(cData);
1015 
1016                     while (st.hasMoreTokens()) {
1017 
1018                         String token = st.nextToken();
1019                         logger.debug("Token: ", token);
1020                         order.add(token);
1021                     }
1022                 } catch (Exception e) {
1023                     notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
1024                 }
1025             }
1026         } else if ("integerArray".equals(name)) {
1027             logger.debug("IntegerArray: builtin = ", BUILTIN);
1028 
1029             if (BUILTIN.equals("formalCharge")) {
1030 
1031                 try {
1032 
1033                     StringTokenizer st = new StringTokenizer(cData);
1034 
1035                     while (st.hasMoreTokens()) {
1036 
1037                         String token = st.nextToken();
1038                         logger.debug("Charge added: ", token);
1039                         formalCharges.add(token);
1040                     }
1041                 } catch (Exception e) {
1042                     notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
1043                 }
1044             }
1045         } else if ("scalar".equals(name)) {
1046             if (xpath.endsWith("crystal", "scalar")) {
1047                 logger.debug("Going to set a crystal parameter: " + crystalScalar, " to ", cData);
1048                 try {
1049                     unitcellparams[crystalScalar - 1] = Double.parseDouble(cData.trim());
1050                 } catch (NumberFormatException exception) {
1051                     logger.error("Content must a float: " + cData);
1052                 }
1053             } else if (xpath.endsWith("bond", "scalar")) {
1054                 if (DICTREF.equals("mdl:stereo")) {
1055                     bondStereo.add(cData.trim());
1056                     stereoGiven = true;
1057                 } else {
1058                     Map<String, String> bp = bondCustomProperty.get(bondid.get(bondid.size() - 1));
1059                     if (bp == null) {
1060                         bp = new Hashtable<String, String>();
1061                         bondCustomProperty.put(bondid.get(bondid.size() - 1), bp);
1062                     }
1063                     bp.put(elementTitle, cData.trim());
1064                 }
1065             } else if (xpath.endsWith("atom", "scalar")) {
1066                 if (DICTREF.equals("cdk:partialCharge")) {
1067                     partialCharges.add(cData.trim());
1068                 } else if (DICTREF.equals("cdk:atomicNumber")) {
1069                     atomicNumbers.add(cData.trim());
1070                 } else if (DICTREF.equals("cdk:aromaticAtom")) {
1071                     atomAromaticities.add(cData.trim());
1072                 } else if (DICTREF.equals("cdk:isotopicMass")) {
1073                     exactMasses.add(cData.trim());
1074                 } else {
1075                     if (atomCustomProperty.get(Integer.valueOf(atomCounter - 1)) == null)
1076                         atomCustomProperty.put(Integer.valueOf(atomCounter - 1), new ArrayList<String>());
1077                     atomCustomProperty.get(Integer.valueOf(atomCounter - 1)).add(elementTitle);
1078                     atomCustomProperty.get(Integer.valueOf(atomCounter - 1)).add(cData.trim());
1079                 }
1080             } else if (xpath.endsWith("molecule", "scalar")) {
1081                 if (DICTREF.equals("pdb:id")) {
1082                     //                	cdo.setObjectProperty("Molecule", DICTREF, cData);
1083                     currentMolecule.setProperty(new DictRef(DICTREF, cData), cData);
1084                 } else if (DICTREF.equals("cdk:molecularProperty")) {
1085                     currentMolecule.setProperty(elementTitle, cData);
1086                 } else {
1087                     moleculeCustomProperty.add(elementTitle);
1088                     moleculeCustomProperty.add(cData.trim());
1089                 }
1090             } else if (xpath.endsWith("reaction", "scalar")) {
1091                 if (DICTREF.equals("cdk:reactionProperty")) {
1092                     currentReaction.setProperty(elementTitle, cData);
1093                 }
1094             } else {
1095                 logger.warn("Ignoring scalar: " + xpath);
1096             }
1097         } else if ("floatArray".equals(name)) {
1098             if (BUILTIN.equals("x3")) {
1099 
1100                 try {
1101 
1102                     StringTokenizer st = new StringTokenizer(cData);
1103 
1104                     while (st.hasMoreTokens())
1105                         x3.add(st.nextToken());
1106                 } catch (Exception e) {
1107                     notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
1108                 }
1109             } else if (BUILTIN.equals("y3")) {
1110 
1111                 try {
1112 
1113                     StringTokenizer st = new StringTokenizer(cData);
1114 
1115                     while (st.hasMoreTokens())
1116                         y3.add(st.nextToken());
1117                 } catch (Exception e) {
1118                     notify("CMLParsing error: " + e, SYSTEMID, 213, 1);
1119                 }
1120             } else if (BUILTIN.equals("z3")) {
1121 
1122                 try {
1123 
1124                     StringTokenizer st = new StringTokenizer(cData);
1125 
1126                     while (st.hasMoreTokens())
1127                         z3.add(st.nextToken());
1128                 } catch (Exception e) {
1129                     notify("CMLParsing error: " + e, SYSTEMID, 221, 1);
1130                 }
1131             } else if (BUILTIN.equals("x2")) {
1132                 logger.debug("New floatArray found.");
1133 
1134                 try {
1135 
1136                     StringTokenizer st = new StringTokenizer(cData);
1137 
1138                     while (st.hasMoreTokens())
1139                         x2.add(st.nextToken());
1140                 } catch (Exception e) {
1141                     notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
1142                 }
1143             } else if (BUILTIN.equals("y2")) {
1144                 logger.debug("New floatArray found.");
1145 
1146                 try {
1147 
1148                     StringTokenizer st = new StringTokenizer(cData);
1149 
1150                     while (st.hasMoreTokens())
1151                         y2.add(st.nextToken());
1152                 } catch (Exception e) {
1153                     notify("CMLParsing error: " + e, SYSTEMID, 454, 1);
1154                 }
1155             } else if (BUILTIN.equals("partialCharge")) {
1156                 logger.debug("New floatArray with partial charges found.");
1157 
1158                 try {
1159 
1160                     StringTokenizer st = new StringTokenizer(cData);
1161 
1162                     while (st.hasMoreTokens())
1163                         partialCharges.add(st.nextToken());
1164                 } catch (Exception e) {
1165                     notify("CMLParsing error: " + e, SYSTEMID, 462, 1);
1166                 }
1167             }
1168         } else if ("basic".equals(name)) {
1169             // assuming this is the child element of <identifier>
1170             this.inchi = cData;
1171         } else if ("name".equals(name)) {
1172             if (xpath.endsWith("molecule", "name")) {
1173                 if (DICTREF.length() > 0) {
1174                     //            		cdo.setObjectProperty("Molecule", DICTREF, cData);
1175 
1176                     currentMolecule.setProperty(new DictRef(DICTREF, cData), cData);
1177                 } else {
1178                     //            		cdo.setObjectProperty("Molecule", "Name", cData);
1179                     currentMolecule.setTitle(cData);
1180                 }
1181             }
1182         } else if ("formula".equals(name)) {
1183             currentMolecule.setProperty(CDKConstants.FORMULA, cData);
1184         } else {
1185             logger.debug("Skipping end element: " + name);
1186         }
1187 
1188         currentChars = "";
1189         BUILTIN = "";
1190         elementTitle = "";
1191     }
1192 
1193     @Override
characterData(CMLStack xpath, char[] ch, int start, int length)1194     public void characterData(CMLStack xpath, char[] ch, int start, int length) {
1195         currentChars = currentChars + new String(ch, start, length);
1196         logger.debug("CD: ", currentChars);
1197     }
1198 
notify(String message, String systemId, int line, int column)1199     protected void notify(String message, String systemId, int line, int column) {
1200         logger.debug("Message: ", message);
1201         logger.debug("SystemId: ", systemId);
1202         logger.debug("Line: ", line);
1203         logger.debug("Column: ", column);
1204     }
1205 
storeData()1206     protected void storeData() {
1207         if (inchi != null) {
1208             //            cdo.setObjectProperty("Molecule", "inchi", inchi);
1209             currentMolecule.setProperty(CDKConstants.INCHI, inchi);
1210         }
1211         if (formula != null && formula.size() > 0) {
1212             currentMolecule.setProperty(CDKConstants.FORMULA, formula);
1213         }
1214         Iterator<String> customs = moleculeCustomProperty.iterator();
1215         while (customs.hasNext()) {
1216             String x = customs.next();
1217             String y = customs.next();
1218             currentMolecule.setProperty(x, y);
1219         }
1220         storeAtomData();
1221         newAtomData();
1222         storeBondData();
1223         newBondData();
1224         convertCMLToCDKHydrogenCounts();
1225     }
1226 
convertCMLToCDKHydrogenCounts()1227     private void convertCMLToCDKHydrogenCounts() {
1228         for (IAtom atom : currentMolecule.atoms()) {
1229             if (atom.getImplicitHydrogenCount() != null) {
1230                 int explicitHCount = AtomContainerManipulator.countExplicitHydrogens(currentMolecule, atom);
1231                 if (explicitHCount != 0) {
1232                     atom.setImplicitHydrogenCount(atom.getImplicitHydrogenCount() - explicitHCount);
1233                 }
1234             }
1235         }
1236     }
1237 
storeAtomData()1238     protected void storeAtomData() {
1239         logger.debug("No atoms: ", atomCounter);
1240         if (atomCounter == 0) {
1241             return;
1242         }
1243 
1244         boolean hasID = false;
1245         boolean has3D = false;
1246         boolean has3Dfract = false;
1247         boolean has2D = false;
1248         boolean hasFormalCharge = false;
1249         boolean hasAtomAromaticities = false;
1250         boolean hasPartialCharge = false;
1251         boolean hasHCounts = false;
1252         boolean hasSymbols = false;
1253         boolean hasTitles = false;
1254         boolean hasIsotopes = false;
1255         boolean hasAtomicNumbers = false;
1256         boolean hasExactMasses = false;
1257         boolean hasDictRefs = false;
1258         boolean hasSpinMultiplicities = false;
1259         boolean hasAtomParities = false;
1260         boolean hasOccupancies = false;
1261 
1262         if (elid.size() == atomCounter) {
1263             hasID = true;
1264         } else {
1265             logger.debug("No atom ids: " + elid.size(), " != " + atomCounter);
1266         }
1267 
1268         if (elsym.size() == atomCounter) {
1269             hasSymbols = true;
1270         } else {
1271             logger.debug("No atom symbols: " + elsym.size(), " != " + atomCounter);
1272         }
1273 
1274         if (eltitles.size() == atomCounter) {
1275             hasTitles = true;
1276         } else {
1277             logger.debug("No atom titles: " + eltitles.size(), " != " + atomCounter);
1278         }
1279 
1280         if ((x3.size() == atomCounter) && (y3.size() == atomCounter) && (z3.size() == atomCounter)) {
1281             has3D = true;
1282         } else {
1283             logger.debug("No 3D info: " + x3.size(), " " + y3.size(), " " + z3.size(), " != " + atomCounter);
1284         }
1285 
1286         if ((xfract.size() == atomCounter) && (yfract.size() == atomCounter) && (zfract.size() == atomCounter)) {
1287             has3Dfract = true;
1288         } else {
1289             logger.debug("No 3D fractional info: " + xfract.size(), " " + yfract.size(), " " + zfract.size(), " != "
1290                     + atomCounter);
1291         }
1292 
1293         if ((x2.size() == atomCounter) && (y2.size() == atomCounter)) {
1294             has2D = true;
1295         } else {
1296             logger.debug("No 2D info: " + x2.size(), " " + y2.size(), " != " + atomCounter);
1297         }
1298 
1299         if (formalCharges.size() == atomCounter) {
1300             hasFormalCharge = true;
1301         } else {
1302             logger.debug("No formal Charge info: " + formalCharges.size(), " != " + atomCounter);
1303         }
1304 
1305         if (atomAromaticities.size() == atomCounter) {
1306             hasAtomAromaticities = true;
1307         } else {
1308             logger.debug("No aromatic atom info: " + atomAromaticities.size(), " != " + atomCounter);
1309         }
1310 
1311         if (partialCharges.size() == atomCounter) {
1312             hasPartialCharge = true;
1313         } else {
1314             logger.debug("No partial Charge info: " + partialCharges.size(), " != " + atomCounter);
1315         }
1316 
1317         if (hCounts.size() == atomCounter) {
1318             hasHCounts = true;
1319         } else {
1320             logger.debug("No hydrogen Count info: " + hCounts.size(), " != " + atomCounter);
1321         }
1322 
1323         if (spinMultiplicities.size() == atomCounter) {
1324             hasSpinMultiplicities = true;
1325         } else {
1326             logger.debug("No spinMultiplicity info: " + spinMultiplicities.size(), " != " + atomCounter);
1327         }
1328 
1329         if (atomParities.size() == atomCounter) {
1330             hasAtomParities = true;
1331         } else {
1332             logger.debug("No atomParity info: " + spinMultiplicities.size(), " != " + atomCounter);
1333         }
1334 
1335         if (occupancies.size() == atomCounter) {
1336             hasOccupancies = true;
1337         } else {
1338             logger.debug("No occupancy info: " + occupancies.size(), " != " + atomCounter);
1339         }
1340 
1341         if (atomDictRefs.size() == atomCounter) {
1342             hasDictRefs = true;
1343         } else {
1344             logger.debug("No dictRef info: " + atomDictRefs.size(), " != " + atomCounter);
1345         }
1346 
1347         if (isotope.size() == atomCounter) {
1348             hasIsotopes = true;
1349         } else {
1350             logger.debug("No isotope info: " + isotope.size(), " != " + atomCounter);
1351         }
1352         if (atomicNumbers.size() == atomCounter) {
1353             hasAtomicNumbers = true;
1354         } else {
1355             logger.debug("No atomicNumbers info: " + atomicNumbers.size(), " != " + atomCounter);
1356         }
1357         if (exactMasses.size() == atomCounter) {
1358             hasExactMasses = true;
1359         } else {
1360             logger.debug("No atomicNumbers info: " + atomicNumbers.size(), " != " + atomCounter);
1361         }
1362 
1363         for (int i = 0; i < atomCounter; i++) {
1364             logger.info("Storing atom: ", i);
1365             //            cdo.startObject("Atom");
1366             currentAtom = currentChemFile.getBuilder().newInstance(IAtom.class, "H");
1367             logger.debug("Atom # " + atomCounter);
1368             if (hasID) {
1369                 //                cdo.setObjectProperty("Atom", "id", (String)elid.get(i));
1370                 logger.debug("id: ", (String) elid.get(i));
1371                 currentAtom.setID((String) elid.get(i));
1372                 atomEnumeration.put((String) elid.get(i), currentAtom);
1373             }
1374             if (hasTitles) {
1375                 if (hasSymbols) {
1376                     String symbol = (String) elsym.get(i);
1377                     if (symbol.equals("Du") || symbol.equals("Dummy")) {
1378                         //                        cdo.setObjectProperty("PseudoAtom", "label", (String)eltitles.get(i));
1379                         if (!(currentAtom instanceof IPseudoAtom)) {
1380                             currentAtom = currentChemFile.getBuilder().newInstance(IPseudoAtom.class, currentAtom);
1381                             if (hasID) atomEnumeration.put((String) elid.get(i), currentAtom);
1382                         }
1383                         ((IPseudoAtom) currentAtom).setLabel((String) eltitles.get(i));
1384                     } else {
1385                         //                        cdo.setObjectProperty("Atom", "title", (String)eltitles.get(i));
1386                         // FIXME: huh?
1387                         if (eltitles.get(i) != null)
1388                             currentAtom.setProperty(CDKConstants.TITLE, (String) eltitles.get(i));
1389                     }
1390                 } else {
1391                     //                    cdo.setObjectProperty("Atom", "title", (String)eltitles.get(i));
1392                     // FIXME: huh?
1393                     if (eltitles.get(i) != null) currentAtom.setProperty(CDKConstants.TITLE, (String) eltitles.get(i));
1394                 }
1395             }
1396 
1397             // store optional atom properties
1398             if (hasSymbols) {
1399                 String symbol = (String) elsym.get(i);
1400                 if (symbol.equals("Du") || symbol.equals("Dummy")) {
1401                     symbol = "R";
1402                 }
1403                 //                cdo.setObjectProperty("Atom", "type", symbol);
1404                 if (symbol.equals("R") && !(currentAtom instanceof IPseudoAtom)) {
1405                     currentAtom = currentChemFile.getBuilder().newInstance(IPseudoAtom.class, currentAtom);
1406                     ((IPseudoAtom) currentAtom).setLabel("R");
1407                     if (hasID) atomEnumeration.put((String) elid.get(i), currentAtom);
1408                 }
1409                 currentAtom.setSymbol(symbol);
1410                 if (!hasAtomicNumbers || atomicNumbers.get(i) == null)
1411                     currentAtom.setAtomicNumber(PeriodicTable.getAtomicNumber(symbol));
1412             }
1413 
1414             if (has3D) {
1415                 //                cdo.setObjectProperty("Atom", "x3", (String)x3.get(i));
1416                 //                cdo.setObjectProperty("Atom", "y3", (String)y3.get(i));
1417                 //                cdo.setObjectProperty("Atom", "z3", (String)z3.get(i));
1418                 if (x3.get(i) != null && y3.get(i) != null && z3.get(i) != null) {
1419                     currentAtom.setPoint3d(new Point3d(Double.parseDouble((String) x3.get(i)), Double
1420                             .parseDouble((String) y3.get(i)), Double.parseDouble((String) z3.get(i))));
1421                 }
1422             }
1423 
1424             if (has3Dfract) {
1425                 // ok, need to convert fractional into eucledian coordinates
1426                 //                cdo.setObjectProperty("Atom", "xFract", (String)xfract.get(i));
1427                 //                cdo.setObjectProperty("Atom", "yFract", (String)yfract.get(i));
1428                 //                cdo.setObjectProperty("Atom", "zFract", (String)zfract.get(i));
1429                 currentAtom.setFractionalPoint3d(new Point3d(Double.parseDouble((String) xfract.get(i)), Double
1430                         .parseDouble((String) yfract.get(i)), Double.parseDouble((String) zfract.get(i))));
1431             }
1432 
1433             if (hasFormalCharge) {
1434                 //                cdo.setObjectProperty("Atom", "formalCharge",
1435                 //                                      (String)formalCharges.get(i));
1436                 currentAtom.setFormalCharge(Integer.parseInt((String) formalCharges.get(i)));
1437             }
1438 
1439             if (hasAtomAromaticities) {
1440                 if (atomAromaticities.get(i) != null) currentAtom.setFlag(CDKConstants.ISAROMATIC, true);
1441             }
1442 
1443             if (hasPartialCharge) {
1444                 logger.debug("Storing partial atomic charge...");
1445                 //                cdo.setObjectProperty("Atom", "partialCharge",
1446                 //                                      (String)partialCharges.get(i));
1447                 currentAtom.setCharge(Double.parseDouble((String) partialCharges.get(i)));
1448             }
1449 
1450             if (hasHCounts) {
1451                 //                cdo.setObjectProperty("Atom", "hydrogenCount", (String)hCounts.get(i));
1452                 // convertCMLToCDKHydrogenCounts() is called to update hydrogen counts when molecule is stored
1453                 String hCount = hCounts.get(i);
1454                 if (hCount != null) {
1455                     currentAtom.setImplicitHydrogenCount(Integer.parseInt(hCount));
1456                 } else {
1457                     currentAtom.setImplicitHydrogenCount((Integer) CDKConstants.UNSET);
1458                 }
1459             }
1460 
1461             if (has2D) {
1462                 if (x2.get(i) != null && y2.get(i) != null) {
1463                     //                    cdo.setObjectProperty("Atom", "x2", (String)x2.get(i));
1464                     //                    cdo.setObjectProperty("Atom", "y2", (String)y2.get(i));
1465                     currentAtom.setPoint2d(new Point2d(Double.parseDouble((String) x2.get(i)), Double
1466                             .parseDouble((String) y2.get(i))));
1467                 }
1468             }
1469 
1470             if (hasDictRefs) {
1471                 //                cdo.setObjectProperty("Atom", "dictRef", (String)atomDictRefs.get(i));
1472                 if (atomDictRefs.get(i) != null)
1473                     currentAtom.setProperty("org.openscience.cdk.dict", (String) atomDictRefs.get(i));
1474             }
1475 
1476             if (hasSpinMultiplicities && spinMultiplicities.get(i) != null) {
1477                 //                cdo.setObjectProperty("Atom", "spinMultiplicity", (String)spinMultiplicities.get(i));
1478                 int unpairedElectrons = Integer.parseInt((String) spinMultiplicities.get(i)) - 1;
1479                 for (int sm = 0; sm < unpairedElectrons; sm++) {
1480                     currentMolecule.addSingleElectron(currentChemFile.getBuilder().newInstance(ISingleElectron.class,
1481                             currentAtom));
1482                 }
1483             }
1484 
1485             if (hasOccupancies && occupancies.get(i) != null) {
1486                 //                cdo.setObjectProperty("Atom", "occupanciy", (String)occupancies.get(i));
1487                 // FIXME: this has no ChemFileCDO equivalent, not even if spelled correctly
1488             }
1489 
1490             if (hasIsotopes) {
1491                 //                cdo.setObjectProperty("Atom", "massNumber", (String)isotope.get(i));
1492                 if (isotope.get(i) != null)
1493                     currentAtom.setMassNumber((int) Double.parseDouble((String) isotope.get(i)));
1494             }
1495 
1496             if (hasAtomicNumbers) {
1497                 if (atomicNumbers.get(i) != null) currentAtom.setAtomicNumber(Integer.parseInt(atomicNumbers.get(i)));
1498             }
1499 
1500             if (hasExactMasses) {
1501                 if (exactMasses.get(i) != null) currentAtom.setExactMass(Double.parseDouble(exactMasses.get(i)));
1502             }
1503 
1504             if (atomCustomProperty.get(Integer.valueOf(i)) != null) {
1505                 Iterator<String> it = atomCustomProperty.get(Integer.valueOf(i)).iterator();
1506                 while (it.hasNext()) {
1507                     currentAtom.setProperty(it.next(), it.next());
1508                 }
1509             }
1510 
1511             //            cdo.endObject("Atom");
1512 
1513             currentMolecule.addAtom(currentAtom);
1514         }
1515 
1516         for (int i = 0; i < atomCounter; i++) {
1517             if (hasAtomParities &&
1518                 atomParities.get(i) != null &&
1519                 !atomParities.get(i).isEmpty()) {
1520                 IAtom ligandAtom1 = atomEnumeration.get(parityARef1.get(i));
1521                 IAtom ligandAtom2 = atomEnumeration.get(parityARef2.get(i));
1522                 IAtom ligandAtom3 = atomEnumeration.get(parityARef3.get(i));
1523                 IAtom ligandAtom4 = atomEnumeration.get(parityARef4.get(i));
1524                 IAtom[] ligandAtoms = new IAtom[]{ligandAtom1, ligandAtom2, ligandAtom3, ligandAtom4};
1525                 Stereo config;
1526                 int parity = 0;
1527                 try {
1528                     parity = (int) Math.signum(Double.parseDouble(atomParities.get(i)));
1529                 } catch (NumberFormatException ex) {
1530                     // ignored
1531                 }
1532                 if (parity > 0)
1533                     config = Stereo.CLOCKWISE;
1534                 else if (parity < 0)
1535                     config = Stereo.ANTI_CLOCKWISE;
1536                 else {
1537                     config = null;
1538                     logger.warn("Cannot interpret stereo information, invalid parity: '" + atomParities.get(i) + "'");
1539                 }
1540                 if (config != null) {
1541                     TetrahedralChirality chirality = new TetrahedralChirality(currentMolecule.getAtom(i), ligandAtoms, config);
1542                     currentMolecule.addStereoElement(chirality);
1543                 }
1544             }
1545         }
1546 
1547         if (elid.size() > 0) {
1548             // assume this is the current working list
1549             bondElid = elid;
1550         }
1551     }
1552 
storeBondData()1553     protected void storeBondData() {
1554         logger.debug("Testing a1,a2,stereo,order = count: " + bondARef1.size(), "," + bondARef2.size(), ","
1555                 + bondStereo.size(), "," + order.size(), "=" + bondCounter);
1556 
1557         if ((bondARef1.size() == bondCounter) && (bondARef2.size() == bondCounter)) {
1558             logger.debug("About to add bond info...");
1559 
1560             Iterator<String> orders = order.iterator();
1561             Iterator<String> ids = bondid.iterator();
1562             Iterator<String> bar1s = bondARef1.iterator();
1563             Iterator<String> bar2s = bondARef2.iterator();
1564             Iterator<String> stereos = bondStereo.iterator();
1565             Iterator<Boolean> aroms = bondAromaticity.iterator();
1566 
1567             while (bar1s.hasNext()) {
1568                 //                cdo.startObject("Bond");
1569                 //                if (ids.hasNext()) {
1570                 //                    cdo.setObjectProperty("Bond", "id", (String)ids.next());
1571                 //                }
1572                 //                cdo.setObjectProperty("Bond", "atom1",
1573                 //                                      Integer.valueOf(bondElid.indexOf(
1574                 //                                                          (String)bar1s.next())).toString());
1575                 //                cdo.setObjectProperty("Bond", "atom2",
1576                 //                                      Integer.valueOf(bondElid.indexOf(
1577                 //                                                          (String)bar2s.next())).toString());
1578                 IAtom a1 = (IAtom) atomEnumeration.get((String) bar1s.next());
1579                 IAtom a2 = (IAtom) atomEnumeration.get((String) bar2s.next());
1580                 currentBond = currentChemFile.getBuilder().newInstance(IBond.class, a1, a2);
1581                 if (ids.hasNext()) {
1582                     currentBond.setID((String) ids.next());
1583                 }
1584 
1585                 if (orders.hasNext()) {
1586                     String bondOrder = (String) orders.next();
1587 
1588                     if ("S".equals(bondOrder)) {
1589                         //                        cdo.setObjectProperty("Bond", "order", "1");
1590                         currentBond.setOrder(Order.SINGLE);
1591                     } else if ("D".equals(bondOrder)) {
1592                         //                        cdo.setObjectProperty("Bond", "order", "2");
1593                         currentBond.setOrder(Order.DOUBLE);
1594                     } else if ("T".equals(bondOrder)) {
1595                         //                        cdo.setObjectProperty("Bond", "order", "3");
1596                         currentBond.setOrder(Order.TRIPLE);
1597                     } else if ("A".equals(bondOrder)) {
1598                         //                        cdo.setObjectProperty("Bond", "order", "1.5");
1599                         currentBond.setOrder(Order.SINGLE);
1600                         currentBond.setFlag(CDKConstants.ISAROMATIC, true);
1601                     } else {
1602                         //                        cdo.setObjectProperty("Bond", "order", bondOrder);
1603                         currentBond.setOrder(BondManipulator.createBondOrder(Double.parseDouble(bondOrder)));
1604                     }
1605                 }
1606 
1607                 if (stereos.hasNext()) {
1608                     //                    cdo.setObjectProperty("Bond", "stereo",
1609                     //                                          (String)stereos.next());
1610                     String nextStereo = (String) stereos.next();
1611                     if ("H".equals(nextStereo)) {
1612                         currentBond.setStereo(IBond.Stereo.DOWN);
1613                     } else if ("W".equals(nextStereo)) {
1614                         currentBond.setStereo(IBond.Stereo.UP);
1615                     } else if (nextStereo != null && !nextStereo.isEmpty()) {
1616                         logger.warn("Cannot interpret bond display information: '" + nextStereo + "'");
1617                     }
1618                 }
1619 
1620                 if (aroms.hasNext()) {
1621                     Object nextArom = aroms.next();
1622                     if (nextArom != null && ((boolean) nextArom)) {
1623                         currentBond.setFlag(CDKConstants.ISAROMATIC, true);
1624                     }
1625                 }
1626 
1627                 if (currentBond.getID() != null) {
1628                     Map<String, String> currentBondProperties = bondCustomProperty.get(currentBond.getID());
1629                     if (currentBondProperties != null) {
1630                         Iterator<String> keys = currentBondProperties.keySet().iterator();
1631                         while (keys.hasNext()) {
1632                             String key = keys.next();
1633                             currentBond.setProperty(key, currentBondProperties.get(key));
1634                         }
1635                         bondCustomProperty.remove(currentBond.getID());
1636                     }
1637                 }
1638 
1639                 //                cdo.endObject("Bond");
1640                 currentMolecule.addBond(currentBond);
1641             }
1642         }
1643     }
1644 
addArrayElementsTo(List<String> toAddto, String array)1645     protected int addArrayElementsTo(List<String> toAddto, String array) {
1646         StringTokenizer tokenizer = new StringTokenizer(array);
1647         int i = 0;
1648         while (tokenizer.hasMoreElements()) {
1649             toAddto.add(tokenizer.nextToken());
1650             i++;
1651         }
1652         return i;
1653     }
1654 }
1655