1 /* $RCSfile$
2  * $Author: hansonr $
3  * $Date: 2006-03-15 07:52:29 -0600 (Wed, 15 Mar 2006) $
4  * $Revision: 4614 $
5  *
6  * Copyright (C) 2003-2005  Miguel, Jmol Development, www.jmol.org
7  *
8  * Contact: jmol-developers@lists.sf.net
9  *
10  *  This library is free software; you can redistribute it and/or
11  *  modify it under the terms of the GNU Lesser General Public
12  *  License as published by the Free Software Foundation; either
13  *  version 2.1 of the License, or (at your option) any later version.
14  *
15  *  This library is distributed in the hope that it will be useful,
16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *  Lesser General Public License for more details.
19  *
20  *  You should have received a copy of the GNU Lesser General Public
21  *  License along with this library; if not, write to the Free Software
22  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
23  */
24 
25 package org.jmol.adapter.readers.more;
26 
27 import javajs.util.PT;
28 
29 import org.jmol.adapter.smarter.Bond;
30 import org.jmol.adapter.smarter.Atom;
31 
32 import org.jmol.api.JmolAdapter;
33 
34 /**
35  * A minimal multi-file reader for TRIPOS SYBYL mol2 files.
36  *<p>
37  * <a href='http://www.tripos.com/data/support/mol2.pdf '>
38  * http://www.tripos.com/data/support/mol2.pdf
39  * </a>
40  *
41  * see also http://www.tripos.com/mol2/atom_types.html
42  *
43  * PDB note:
44  *
45  * Note that mol2 format of PDB files is quite minimal. All we
46  * get is the PDB atom name, coordinates, residue number, and residue name
47  * No chain terminator, not chain designator, no element symbol.
48  *
49  * Chains based on numbering reset just labeled A B C D .... Z a b c d .... z
50  *
51  *<p>
52  */
53 
54 public class Mol2Reader extends ForceFieldReader {
55 
56   private int nAtoms = 0;
57   private int ac = 0;
58   private boolean isPDB = false;
59 
60   @Override
initializeReader()61   protected void initializeReader() throws Exception {
62     setUserAtomTypes();
63   }
64 
65   @Override
checkLine()66   public boolean checkLine() throws Exception {
67     if (line.equals("@<TRIPOS>MOLECULE")) {
68       if (!processMolecule()) {
69         return true;
70       }
71       continuing = !isLastModel(modelNumber);
72       return false;
73     }
74     if (line.length() != 0 && line.charAt(0) == '#') {
75       /*
76        * Comment lines (starting with '#' as per Tripos spec) may contain an
77        * inline Jmol script.
78        */
79       checkCurrentLineForScript();
80     }
81     return true;
82   }
83 
processMolecule()84   private boolean processMolecule() throws Exception {
85     /* 4-6 lines:
86      ZINC02211856
87      55    58     0     0     0
88      SMALL
89      USER_CHARGES
90      2-diethylamino-1-[2-(2-naphthyl)-4-quinolyl]-ethanol
91 
92      mol_name
93      num_atoms [num_bonds [num_subst [num_feat [num_sets]]]]
94      mol_type
95      charge_type
96      [status_bits
97      [mol_comment]]
98 
99      */
100 
101     isPDB = false;
102     String thisDataSetName = rd().trim();
103     if (!doGetModel(++modelNumber, thisDataSetName)) {
104       return false;
105     }
106 
107     lastSequenceNumber = Integer.MAX_VALUE;
108     chainID = 64; // 'A' - 1;
109     rd();
110     line += " 0 0 0 0 0 0";
111     ac = parseIntStr(line);
112     int bondCount = parseInt();
113     if (bondCount == 0)
114       asc.setNoAutoBond();
115     int resCount = parseInt();
116     rd();//mol_type
117     rd();//charge_type
118     //boolean iHaveCharges = (line.indexOf("NO_CHARGES") != 0);
119     //optional SYBYL status
120     if (rd() != null && (line.length() == 0 || line.charAt(0) != '@')) {
121       //optional comment -- but present if comment is present
122       if (rd() != null && line.length() != 0 && line.charAt(0) != '@') {
123         /* The MOLECULE's comment line may contain an inline Jmol script.
124             (But don't expect it to be applied just to this molecule/model/frame.)
125             Note: '#' is not needed here, but it is for general comments (out of the MOLECULE data structure),
126             so for consistency we'll allow both 'jmolscript:' as such or preceded by # (spaces are ignored).
127             Any comments written before the 'jmolscript:' will be preserved (and added to the model's title).
128         */
129         if (line.indexOf("jmolscript:") >= 0) {
130           checkCurrentLineForScript();
131           if (line.equals("#")) {
132             line = "";
133           }
134         }
135         if (line.length() != 0) {
136           thisDataSetName += ": " + line.trim();
137         }
138       }
139     }
140     newAtomSet(thisDataSetName);
141     while (line != null && !line.equals("@<TRIPOS>MOLECULE")) {
142       if (line.equals("@<TRIPOS>ATOM")) {
143         readAtoms(ac);
144         asc.setAtomSetName(thisDataSetName);
145       } else if (line.equals("@<TRIPOS>BOND")) {
146         readBonds(bondCount);
147       } else if (line.equals("@<TRIPOS>SUBSTRUCTURE")) {
148         readResInfo(resCount);
149       } else if (line.equals("@<TRIPOS>CRYSIN")) {
150         readCrystalInfo();
151       }
152       rd();
153     }
154     nAtoms += ac;
155     if (isPDB) {
156       setIsPDB();
157       setModelPDB(true);
158     }
159     applySymmetryAndSetTrajectory();
160     return true;
161   }
162 
163   private int lastSequenceNumber = Integer.MAX_VALUE;
164   private int chainID = 64; // 'A' - 1
165 
readAtoms(int ac)166   private void readAtoms(int ac) throws Exception {
167     //     1 Cs       0.0000   4.1230   0.0000   Cs        1 RES1   0.0000
168     //  1 C1          7.0053   11.3096   -1.5429 C.3       1 <0>        -0.1912
169     // free format, but no blank lines
170     if (ac == 0)
171       return;
172     int i0 = asc.ac;
173     for (int i = 0; i < ac; ++i) {
174       Atom atom = asc.addNewAtom();
175       String[] tokens = PT.getTokens(rd());
176       String atomType = tokens[5];
177       String name = tokens[1];// + '\0' + atomType;
178       int pt = atomType.indexOf(".");
179       if (pt >= 0) {
180         // accepts "." for "no atom type"
181         atom.elementSymbol = atomType.substring(0, pt);
182       } else {
183         atom.atomName = name;
184         atom.elementSymbol = atom.getElementSymbol();
185       }
186       atom.atomName = name + '\0' + atomType;
187       atom.set(parseFloatStr(tokens[2]), parseFloatStr(tokens[3]),
188           parseFloatStr(tokens[4]));
189       // apparently "NO_CHARGES" is not strictly enforced
190       //      if (iHaveCharges)
191       if (tokens.length > 6) {
192         atom.sequenceNumber = parseIntStr(tokens[6]);
193         if (atom.sequenceNumber < lastSequenceNumber) {
194           if (chainID == 90) //'Z'
195             chainID = 96;//'a' - 1;
196           chainID++;
197         }
198         lastSequenceNumber = atom.sequenceNumber;
199         setChainID(atom, "" + (char) chainID);
200       }
201       if (tokens.length > 7)
202         atom.group3 = tokens[7];
203       if (tokens.length > 8) {
204         atom.partialCharge = parseFloatStr(tokens[8]);
205         if (atom.partialCharge == (int) atom.partialCharge)
206           atom.formalCharge = (int) atom.partialCharge;
207       }
208     }
209 
210     // trying to guess if this is a PDB-type file
211 
212     Atom[] atoms = asc.atoms;
213 
214     // 1. Does the very first atom have a group name?
215 
216     String g3 = atoms[i0].group3;
217     if (g3 == null)
218       return;
219     boolean isPDB = false;
220 
221     // 2. If so, is there more than one kind of group?
222 
223     if (!g3.equals("UNK") && !g3.startsWith("RES")) {
224 
225       for (int i = asc.ac; --i >= i0;)
226         if (!g3.equals(atoms[asc.ac - 1].group3)) {
227           isPDB = true;
228           break;
229         }
230 
231       // 3. If so, is there an identifiable group name?
232 
233       if (isPDB) {
234         isPDB = false;
235         for (int i = asc.ac; --i >= i0;) {
236           int pt = getPDBGroupLength(atoms[i].group3);
237           if (pt == 0 || pt > 3)
238             break;
239           if (vwr.getJBR().isKnownPDBGroup(g3.substring(0, pt), Integer.MAX_VALUE)) {
240             isPDB = this.isPDB = true;
241             break;
242           }
243         }
244       }
245 
246     }
247 
248     // remove group3 entry if not PDB; fix if it is like THR13
249     for (int i = asc.ac; --i >= i0;) {
250       if (isPDB) {
251         g3 = atoms[i].group3;
252         g3 = g3.substring(0, getPDBGroupLength(g3));
253         atoms[i].isHetero = vwr.getJBR().isHetero(g3);
254       } else {
255         g3 = null;
256       }
257       atoms[i].group3 = g3;
258     }
259   }
260 
getPDBGroupLength(String g3)261   private int getPDBGroupLength(String g3) {
262     int pt0 = g3.length();
263     int pt = pt0;
264     while (--pt > 0 && Character.isDigit(g3.charAt(pt))) {
265       // continue
266     }
267     return ++pt;
268   }
269 
readBonds(int bondCount)270   private void readBonds(int bondCount) throws Exception {
271     //     6     1    42    1
272     // free format, but no blank lines
273     for (int i = 0; i < bondCount; ++i) {
274       String[] tokens = PT.getTokens(rd());
275       int atomIndex1 = parseIntStr(tokens[1]);
276       int atomIndex2 = parseIntStr(tokens[2]);
277       int order = parseIntStr(tokens[3]);
278       if (order == Integer.MIN_VALUE)
279         order = (tokens[3].equals("ar") ? JmolAdapter.ORDER_AROMATIC
280             : tokens[3].equals("am") ? 1 : JmolAdapter.ORDER_UNSPECIFIED);
281       asc.addBond(new Bond(nAtoms + atomIndex1 - 1, nAtoms
282           + atomIndex2 - 1, order));
283     }
284   }
285 
readResInfo(int resCount)286   private void readResInfo(int resCount) throws Exception {
287     // free format, but no blank lines
288     for (int i = 0; i < resCount; ++i) {
289       rd();
290       //to be determined -- not implemented
291     }
292   }
293 
readCrystalInfo()294   private void readCrystalInfo() throws Exception {
295     //    4.1230    4.1230    4.1230   90.0000   90.0000   90.0000   221     1
296     rd();
297     String[] tokens = getTokens();
298     if (tokens.length < 6)
299       return;
300     String name = "";
301     for (int i = 6; i < tokens.length; i++)
302       name += " " + tokens[i];
303     if (name == "")
304       name = " P1";
305     else
306       name += " *";
307     name = name.substring(1);
308     setSpaceGroupName(name);
309     if (ignoreFileUnitCell)
310       return;
311     for (int i = 0; i < 6; i++)
312       setUnitCellItem(i, parseFloatStr(tokens[i]));
313     Atom[] atoms = asc.atoms;
314     for (int i = 0; i < ac; ++i)
315       setAtomCoord(atoms[nAtoms + i]);
316   }
317 }
318