1 /* Copyright (C) 2004-2007  Rajarshi Guha <rajarshi@users.sourceforge.net>
2  *
3  *  Contact: cdk-devel@lists.sourceforge.net
4  *
5  *  This program is free software; you can redistribute it and/or
6  *  modify it under the terms of the GNU Lesser General Public License
7  *  as published by the Free Software Foundation; either version 2.1
8  *  of the License, or (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU Lesser General Public License for more details.
14  *
15  *  You should have received a copy of the GNU Lesser General Public License
16  *  along with this program; if not, write to the Free Software
17  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 
20 package org.openscience.cdk.qsar.descriptors.protein;
21 
22 import java.io.BufferedReader;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.util.ArrayList;
27 import java.util.HashMap;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Set;
32 
33 import org.openscience.cdk.exception.CDKException;
34 import org.openscience.cdk.interfaces.IAtomContainer;
35 import org.openscience.cdk.interfaces.IBioPolymer;
36 import org.openscience.cdk.interfaces.IMonomer;
37 import org.openscience.cdk.interfaces.IStrand;
38 import org.openscience.cdk.qsar.AbstractMolecularDescriptor;
39 import org.openscience.cdk.qsar.DescriptorSpecification;
40 import org.openscience.cdk.qsar.DescriptorValue;
41 import org.openscience.cdk.qsar.IMolecularDescriptor;
42 import org.openscience.cdk.qsar.result.DoubleArrayResult;
43 import org.openscience.cdk.qsar.result.DoubleArrayResultType;
44 import org.openscience.cdk.qsar.result.IDescriptorResult;
45 import org.openscience.cdk.tools.ILoggingTool;
46 import org.openscience.cdk.tools.LoggingToolFactory;
47 
48 /**
49  * An implementation of the TAE descriptors for amino acids.
50  *
51  * The TAE descriptors ({@cdk.cite BREN1995} {@cdk.cite BREN1997} {@cdk.cite WHITE2003})
52  * are derived from pre-calculated quantum mechanical parameters. This class
53  * uses the parameters for amino acids and thus evaluates a set of 147 descriptors for peptide
54  * sequences.
55  *
56  * The class expects that it will be supplied an object which implements the {@link IBioPolymer}. Thus ordinary
57  * AtomContainer objects  will result in an exception.
58  *
59  * The descriptors are returned in the following order (see
60  * <a href="http://www.chem.rpi.edu/chemweb/recondoc/TAE.doc">here</a>
61  * for a detailed description of the individual descriptors):
62  * <pre>
63  * Energy Population VOLTAE SurfArea
64  * SIDel.Rho.N Del.Rho.NMin Del.Rho.NMax Del.Rho.NIA Del.Rho.NA1
65  * Del.Rho.NA2 Del.Rho.NA3 Del.Rho.NA4 Del.Rho.NA5 Del.Rho.NA6
66  * Del.Rho.NA7 Del.Rho.NA8 Del.Rho.NA9 Del.Rho.NA10 SIDel.K.N
67  * Del.K.Min Del.K.Max Del.K.IA Del.K.NA1 Del.K.NA2
68  * Del.K.NA3 Del.K.NA4 Del.K.NA5 Del.K.NA6 Del.K.NA7
69  * Del.K.NA8 Del.K.NA9 Del.K.NA10 SIK SIKMin
70  * SIKMax SIKIA SIKA1 SIKA2 SIKA3
71  * SIKA4 SIKA5 SIKA6 SIKA7 SIKA8
72  * SIKA9 SIKA10 SIDel.G.N Del.G.NMin Del.G.NMax
73  * Del.G.NIA Del.G.NA1 Del.G.NA2 Del.G.NA3 Del.G.NA4
74  * Del.G.NA5 Del.G.NA6 Del.G.NA7 Del.G.NA8 Del.G.NA9
75  * Del.G.NA10 SIG SIGMin SIGMax SIGIA
76  * SIGA1 SIGA2 SIGA3 SIGA4 SIGA5
77  * SIGA6 SIGA7 SIGA8 SIGA9 SIGA10
78  * SIEP SIEPMin SIEPMax SIEPIA SIEPA1
79  * SIEPA2 SIEPA3 SIEPA4 SIEPA5 SIEPA6
80  * SIEPA7 SIEPA8 SIEPA9 SIEPA10 EP1
81  * EP2 EP3 EP4 EP5 EP6
82  * EP7 EP8 EP9 EP10 PIPMin
83  * PIPMax PIPAvg PIP1 PIP2 PIP3
84  * PIP4 PIP5 PIP6 PIP7 PIP8
85  * PIP9 PIP10 PIP11 PIP12 PIP13
86  * PIP14 PIP15 PIP16 PIP17 PIP18
87  * PIP19 PIP20 Fuk FukMin FukMax
88  * Fuk1 Fuk2 Fuk3 Fuk4 Fuk5
89  * Fuk6 Fuk7 Fuk8 Fuk9 Fuk10
90  * Lapl LaplMin LaplMax Lapl1 Lapl2
91  * Lapl3 Lapl4 Lapl5 Lapl6 Lapl7
92  * Lapl8 Lapl9 Lapl10
93  * </pre>
94  *
95  * <table border="1"><caption>Parameters for this descriptor:</caption>
96  * <tr>
97  * <td>Name</td>
98  * <td>Default</td>
99  * <td>Description</td>
100  * </tr>
101  * <tr>
102  * <td></td>
103  * <td></td>
104  * <td>no parameters</td>
105  * </tr>
106  * </table>
107  *
108  * @author      Rajarshi Guha
109  * @cdk.created 2006-08-23
110  * @cdk.module  qsarprotein
111  * @cdk.githash
112  * @cdk.dictref qsar-descriptors:taeAminoAcid
113  * @see         IBioPolymer
114  */
115 public class TaeAminoAcidDescriptor extends AbstractMolecularDescriptor implements IMolecularDescriptor {
116 
117     private static ILoggingTool          logger    = LoggingToolFactory.createLoggingTool(TaeAminoAcidDescriptor.class);
118     private        Map<String, Double[]> taeParams = new HashMap<String, Double[]>();
119     private        int                   ndesc     = 147;
120 
121     private Map<String, String> nametrans = new HashMap<String, String>();
122 
getMonomers(IBioPolymer iBioPolymer)123     private List<IMonomer> getMonomers(IBioPolymer iBioPolymer) {
124         List<IMonomer> monomList = new ArrayList<IMonomer>();
125 
126         Map<String, IStrand> strands = iBioPolymer.getStrands();
127         Set<String> strandKeys = strands.keySet();
128         for (Iterator<String> iterator = strandKeys.iterator(); iterator.hasNext(); ) {
129             String key = iterator.next();
130             IStrand aStrand = strands.get(key);
131             Map<String, IMonomer> tmp = aStrand.getMonomers();
132             Set<String> keys = tmp.keySet();
133             for (Iterator<String> iterator1 = keys.iterator(); iterator1.hasNext(); ) {
134                 String o1 = iterator1.next();
135                 monomList.add(tmp.get(o1));
136             }
137         }
138 
139         return monomList;
140     }
141 
loadTAEParams()142     private void loadTAEParams() {
143         String filename = "org/openscience/cdk/qsar/descriptors/data/taepeptides.txt";
144         InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
145         if (ins == null) {
146             logger.debug("Could not load the TAE peptide parameter data file");
147             taeParams = null;
148             return;
149         }
150         try {
151             BufferedReader breader = new BufferedReader(new InputStreamReader(ins));
152             breader.readLine(); // throw away the header
153             for (int i = 0; i < 60; i++) {
154                 String line = breader.readLine();
155                 String[] components = line.split(",");
156                 if (components.length != (ndesc + 1))
157                     throw new CDKException("TAE peptide data table seems to be corrupt");
158                 String key = components[0].toLowerCase().trim();
159 
160                 Double[] data = new Double[ndesc];
161                 for (int j = 1; j < components.length; j++)
162                     data[j - 1] = new Double(components[j]);
163 
164                 taeParams.put(key, data);
165             }
166         } catch (IOException ioe) {
167             ioe.printStackTrace();
168             taeParams = null;
169             return;
170         } catch (CDKException e) {
171             e.printStackTrace();
172             taeParams = null;
173             return;
174         }
175 
176         logger.debug("Loaded " + taeParams.size() + " TAE parameters for amino acids");
177     }
178 
TaeAminoAcidDescriptor()179     public TaeAminoAcidDescriptor() {
180         nametrans.put("a", "ala");
181         nametrans.put("c", "cys");
182         nametrans.put("d", "asp");
183         nametrans.put("e", "glu");
184         nametrans.put("f", "phe");
185         nametrans.put("g", "gly");
186         nametrans.put("h", "his");
187         nametrans.put("i", "ile");
188         nametrans.put("k", "lys");
189         nametrans.put("l", "leu");
190         nametrans.put("m", "met");
191         nametrans.put("n", "asn");
192         nametrans.put("p", "pro");
193         nametrans.put("q", "gln");
194         nametrans.put("r", "arg");
195         nametrans.put("s", "ser");
196         nametrans.put("t", "thr");
197         nametrans.put("v", "val");
198         nametrans.put("w", "trp");
199         nametrans.put("y", "tyr");
200 
201         loadTAEParams();
202     }
203 
204     @Override
getSpecification()205     public DescriptorSpecification getSpecification() {
206         return new DescriptorSpecification(
207                 "http://www.blueobelisk.org/ontologies/chemoinformatics-algorithms/#taeAminoAcid", this.getClass()
208                                                                                                        .getName(), "The Chemistry Development Kit");
209     }
210 
211     /**
212      * Sets the parameters attribute of the TaeAminoAcidDescriptor object.
213      *
214      * @param params The new parameters value
215      * @throws org.openscience.cdk.exception.CDKException
216      *          Description of the Exception
217      */
218     @Override
setParameters(Object[] params)219     public void setParameters(Object[] params) throws CDKException {
220         // no parameters for this descriptor
221     }
222 
223     /**
224      * Gets the parameters attribute of the TaeAminoAcidDescriptor object.
225      *
226      * @return The parameters value
227      */
228     @Override
getParameters()229     public Object[] getParameters() {
230         // no parameters to return
231         return (null);
232     }
233 
234     @Override
getDescriptorNames()235     public String[] getDescriptorNames() {
236         String[] names = new String[ndesc];
237         for (int i = 0; i < names.length; i++)
238             names[i] = "TAE" + i;
239         return names;
240     }
241 
242     /**
243      * Gets the parameterNames attribute of the TaeAminOAcidDescriptor object.
244      *
245      * @return The parameterNames value
246      */
247     @Override
getParameterNames()248     public String[] getParameterNames() {
249         // no param names to return
250         return (null);
251     }
252 
253     /**
254      * Gets the parameterType attribute of the TaeAminoAcidDescriptor object.
255      *
256      * @param name Description of the Parameter
257      * @return The parameterType value
258      */
259     @Override
getParameterType(String name)260     public Object getParameterType(String name) {
261         return (null);
262     }
263 
getDummyDescriptorValue(Exception e)264     private DescriptorValue getDummyDescriptorValue(Exception e) {
265         int ndesc = getDescriptorNames().length;
266         DoubleArrayResult results = new DoubleArrayResult(ndesc);
267         for (int i = 0; i < ndesc; i++)
268             results.add(Double.NaN);
269         return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(), results,
270                 getDescriptorNames(), e);
271     }
272 
273     /**
274      * Calculates the 147 TAE descriptors for amino acids.
275      *
276      * @param container Parameter is the atom container which should implement {@link IBioPolymer}.
277      * @return A DoubleArrayResult value representing the 147 TAE descriptors
278      */
279     @Override
calculate(IAtomContainer container)280     public DescriptorValue calculate(IAtomContainer container) {
281         if (taeParams == null) return getDummyDescriptorValue(new CDKException("TAE parameters were not initialized"));
282         if (!(container instanceof IBioPolymer))
283             return getDummyDescriptorValue(new CDKException("The molecule should be of type IBioPolymer"));
284 
285         IBioPolymer peptide = (IBioPolymer) container;
286 
287         // I assume that we get single letter names
288         //Collection aas = peptide.getMonomerNames();
289 
290         double[] desc = new double[ndesc];
291         for (int i = 0; i < ndesc; i++)
292             desc[i] = 0.0;
293 
294         List<IMonomer> monomers = getMonomers(peptide);
295 
296         for (Iterator<IMonomer> iterator = monomers.iterator(); iterator.hasNext();) {
297             IMonomer monomer = iterator.next();
298 
299             String o = monomer.getMonomerName();
300 
301             if (o.length() == 0) continue;
302 
303             String olc = String.valueOf(o.toLowerCase().charAt(0));
304             String tlc = (String) nametrans.get(olc);
305 
306             logger.debug("Converted " + olc + " to " + tlc);
307 
308             // get the params for this AA
309             Double[] params = (Double[]) taeParams.get(tlc);
310 
311             for (int i = 0; i < ndesc; i++)
312                 desc[i] += params[i];
313         }
314 
315         DoubleArrayResult retval = new DoubleArrayResult(ndesc);
316         for (int i = 0; i < ndesc; i++)
317             retval.add(desc[i]);
318 
319         return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(), retval,
320                 getDescriptorNames());
321     }
322 
323     /**
324      * Returns the specific type of the DescriptorResult object.
325      *
326      * The return value from this method really indicates what type of result will
327      * be obtained from the {@link org.openscience.cdk.qsar.DescriptorValue} object. Note that the same result
328      * can be achieved by interrogating the {@link org.openscience.cdk.qsar.DescriptorValue} object; this method
329      * allows you to do the same thing, without actually calculating the descriptor.
330      *
331      * @return an object that implements the {@link org.openscience.cdk.qsar.result.IDescriptorResult} interface indicating
332      *         the actual type of values returned by the descriptor in the {@link org.openscience.cdk.qsar.DescriptorValue} object
333      */
334     @Override
getDescriptorResultType()335     public IDescriptorResult getDescriptorResultType() {
336         return new DoubleArrayResultType(147);
337     }
338 
339 }
340