1 /* Copyright (C) 2004-2007 Rajarshi Guha <rajarshi@users.sourceforge.net> 2 * 3 * Contact: cdk-devel@lists.sourceforge.net 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public License 7 * as published by the Free Software Foundation; either version 2.1 8 * of the License, or (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 */ 19 20 package org.openscience.cdk.qsar.descriptors.protein; 21 22 import java.io.BufferedReader; 23 import java.io.IOException; 24 import java.io.InputStream; 25 import java.io.InputStreamReader; 26 import java.util.ArrayList; 27 import java.util.HashMap; 28 import java.util.Iterator; 29 import java.util.List; 30 import java.util.Map; 31 import java.util.Set; 32 33 import org.openscience.cdk.exception.CDKException; 34 import org.openscience.cdk.interfaces.IAtomContainer; 35 import org.openscience.cdk.interfaces.IBioPolymer; 36 import org.openscience.cdk.interfaces.IMonomer; 37 import org.openscience.cdk.interfaces.IStrand; 38 import org.openscience.cdk.qsar.AbstractMolecularDescriptor; 39 import org.openscience.cdk.qsar.DescriptorSpecification; 40 import org.openscience.cdk.qsar.DescriptorValue; 41 import org.openscience.cdk.qsar.IMolecularDescriptor; 42 import org.openscience.cdk.qsar.result.DoubleArrayResult; 43 import org.openscience.cdk.qsar.result.DoubleArrayResultType; 44 import org.openscience.cdk.qsar.result.IDescriptorResult; 45 import org.openscience.cdk.tools.ILoggingTool; 46 import org.openscience.cdk.tools.LoggingToolFactory; 47 48 /** 49 * An implementation of the TAE descriptors for amino acids. 50 * 51 * The TAE descriptors ({@cdk.cite BREN1995} {@cdk.cite BREN1997} {@cdk.cite WHITE2003}) 52 * are derived from pre-calculated quantum mechanical parameters. This class 53 * uses the parameters for amino acids and thus evaluates a set of 147 descriptors for peptide 54 * sequences. 55 * 56 * The class expects that it will be supplied an object which implements the {@link IBioPolymer}. Thus ordinary 57 * AtomContainer objects will result in an exception. 58 * 59 * The descriptors are returned in the following order (see 60 * <a href="http://www.chem.rpi.edu/chemweb/recondoc/TAE.doc">here</a> 61 * for a detailed description of the individual descriptors): 62 * <pre> 63 * Energy Population VOLTAE SurfArea 64 * SIDel.Rho.N Del.Rho.NMin Del.Rho.NMax Del.Rho.NIA Del.Rho.NA1 65 * Del.Rho.NA2 Del.Rho.NA3 Del.Rho.NA4 Del.Rho.NA5 Del.Rho.NA6 66 * Del.Rho.NA7 Del.Rho.NA8 Del.Rho.NA9 Del.Rho.NA10 SIDel.K.N 67 * Del.K.Min Del.K.Max Del.K.IA Del.K.NA1 Del.K.NA2 68 * Del.K.NA3 Del.K.NA4 Del.K.NA5 Del.K.NA6 Del.K.NA7 69 * Del.K.NA8 Del.K.NA9 Del.K.NA10 SIK SIKMin 70 * SIKMax SIKIA SIKA1 SIKA2 SIKA3 71 * SIKA4 SIKA5 SIKA6 SIKA7 SIKA8 72 * SIKA9 SIKA10 SIDel.G.N Del.G.NMin Del.G.NMax 73 * Del.G.NIA Del.G.NA1 Del.G.NA2 Del.G.NA3 Del.G.NA4 74 * Del.G.NA5 Del.G.NA6 Del.G.NA7 Del.G.NA8 Del.G.NA9 75 * Del.G.NA10 SIG SIGMin SIGMax SIGIA 76 * SIGA1 SIGA2 SIGA3 SIGA4 SIGA5 77 * SIGA6 SIGA7 SIGA8 SIGA9 SIGA10 78 * SIEP SIEPMin SIEPMax SIEPIA SIEPA1 79 * SIEPA2 SIEPA3 SIEPA4 SIEPA5 SIEPA6 80 * SIEPA7 SIEPA8 SIEPA9 SIEPA10 EP1 81 * EP2 EP3 EP4 EP5 EP6 82 * EP7 EP8 EP9 EP10 PIPMin 83 * PIPMax PIPAvg PIP1 PIP2 PIP3 84 * PIP4 PIP5 PIP6 PIP7 PIP8 85 * PIP9 PIP10 PIP11 PIP12 PIP13 86 * PIP14 PIP15 PIP16 PIP17 PIP18 87 * PIP19 PIP20 Fuk FukMin FukMax 88 * Fuk1 Fuk2 Fuk3 Fuk4 Fuk5 89 * Fuk6 Fuk7 Fuk8 Fuk9 Fuk10 90 * Lapl LaplMin LaplMax Lapl1 Lapl2 91 * Lapl3 Lapl4 Lapl5 Lapl6 Lapl7 92 * Lapl8 Lapl9 Lapl10 93 * </pre> 94 * 95 * <table border="1"><caption>Parameters for this descriptor:</caption> 96 * <tr> 97 * <td>Name</td> 98 * <td>Default</td> 99 * <td>Description</td> 100 * </tr> 101 * <tr> 102 * <td></td> 103 * <td></td> 104 * <td>no parameters</td> 105 * </tr> 106 * </table> 107 * 108 * @author Rajarshi Guha 109 * @cdk.created 2006-08-23 110 * @cdk.module qsarprotein 111 * @cdk.githash 112 * @cdk.dictref qsar-descriptors:taeAminoAcid 113 * @see IBioPolymer 114 */ 115 public class TaeAminoAcidDescriptor extends AbstractMolecularDescriptor implements IMolecularDescriptor { 116 117 private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(TaeAminoAcidDescriptor.class); 118 private Map<String, Double[]> taeParams = new HashMap<String, Double[]>(); 119 private int ndesc = 147; 120 121 private Map<String, String> nametrans = new HashMap<String, String>(); 122 getMonomers(IBioPolymer iBioPolymer)123 private List<IMonomer> getMonomers(IBioPolymer iBioPolymer) { 124 List<IMonomer> monomList = new ArrayList<IMonomer>(); 125 126 Map<String, IStrand> strands = iBioPolymer.getStrands(); 127 Set<String> strandKeys = strands.keySet(); 128 for (Iterator<String> iterator = strandKeys.iterator(); iterator.hasNext(); ) { 129 String key = iterator.next(); 130 IStrand aStrand = strands.get(key); 131 Map<String, IMonomer> tmp = aStrand.getMonomers(); 132 Set<String> keys = tmp.keySet(); 133 for (Iterator<String> iterator1 = keys.iterator(); iterator1.hasNext(); ) { 134 String o1 = iterator1.next(); 135 monomList.add(tmp.get(o1)); 136 } 137 } 138 139 return monomList; 140 } 141 loadTAEParams()142 private void loadTAEParams() { 143 String filename = "org/openscience/cdk/qsar/descriptors/data/taepeptides.txt"; 144 InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename); 145 if (ins == null) { 146 logger.debug("Could not load the TAE peptide parameter data file"); 147 taeParams = null; 148 return; 149 } 150 try { 151 BufferedReader breader = new BufferedReader(new InputStreamReader(ins)); 152 breader.readLine(); // throw away the header 153 for (int i = 0; i < 60; i++) { 154 String line = breader.readLine(); 155 String[] components = line.split(","); 156 if (components.length != (ndesc + 1)) 157 throw new CDKException("TAE peptide data table seems to be corrupt"); 158 String key = components[0].toLowerCase().trim(); 159 160 Double[] data = new Double[ndesc]; 161 for (int j = 1; j < components.length; j++) 162 data[j - 1] = new Double(components[j]); 163 164 taeParams.put(key, data); 165 } 166 } catch (IOException ioe) { 167 ioe.printStackTrace(); 168 taeParams = null; 169 return; 170 } catch (CDKException e) { 171 e.printStackTrace(); 172 taeParams = null; 173 return; 174 } 175 176 logger.debug("Loaded " + taeParams.size() + " TAE parameters for amino acids"); 177 } 178 TaeAminoAcidDescriptor()179 public TaeAminoAcidDescriptor() { 180 nametrans.put("a", "ala"); 181 nametrans.put("c", "cys"); 182 nametrans.put("d", "asp"); 183 nametrans.put("e", "glu"); 184 nametrans.put("f", "phe"); 185 nametrans.put("g", "gly"); 186 nametrans.put("h", "his"); 187 nametrans.put("i", "ile"); 188 nametrans.put("k", "lys"); 189 nametrans.put("l", "leu"); 190 nametrans.put("m", "met"); 191 nametrans.put("n", "asn"); 192 nametrans.put("p", "pro"); 193 nametrans.put("q", "gln"); 194 nametrans.put("r", "arg"); 195 nametrans.put("s", "ser"); 196 nametrans.put("t", "thr"); 197 nametrans.put("v", "val"); 198 nametrans.put("w", "trp"); 199 nametrans.put("y", "tyr"); 200 201 loadTAEParams(); 202 } 203 204 @Override getSpecification()205 public DescriptorSpecification getSpecification() { 206 return new DescriptorSpecification( 207 "http://www.blueobelisk.org/ontologies/chemoinformatics-algorithms/#taeAminoAcid", this.getClass() 208 .getName(), "The Chemistry Development Kit"); 209 } 210 211 /** 212 * Sets the parameters attribute of the TaeAminoAcidDescriptor object. 213 * 214 * @param params The new parameters value 215 * @throws org.openscience.cdk.exception.CDKException 216 * Description of the Exception 217 */ 218 @Override setParameters(Object[] params)219 public void setParameters(Object[] params) throws CDKException { 220 // no parameters for this descriptor 221 } 222 223 /** 224 * Gets the parameters attribute of the TaeAminoAcidDescriptor object. 225 * 226 * @return The parameters value 227 */ 228 @Override getParameters()229 public Object[] getParameters() { 230 // no parameters to return 231 return (null); 232 } 233 234 @Override getDescriptorNames()235 public String[] getDescriptorNames() { 236 String[] names = new String[ndesc]; 237 for (int i = 0; i < names.length; i++) 238 names[i] = "TAE" + i; 239 return names; 240 } 241 242 /** 243 * Gets the parameterNames attribute of the TaeAminOAcidDescriptor object. 244 * 245 * @return The parameterNames value 246 */ 247 @Override getParameterNames()248 public String[] getParameterNames() { 249 // no param names to return 250 return (null); 251 } 252 253 /** 254 * Gets the parameterType attribute of the TaeAminoAcidDescriptor object. 255 * 256 * @param name Description of the Parameter 257 * @return The parameterType value 258 */ 259 @Override getParameterType(String name)260 public Object getParameterType(String name) { 261 return (null); 262 } 263 getDummyDescriptorValue(Exception e)264 private DescriptorValue getDummyDescriptorValue(Exception e) { 265 int ndesc = getDescriptorNames().length; 266 DoubleArrayResult results = new DoubleArrayResult(ndesc); 267 for (int i = 0; i < ndesc; i++) 268 results.add(Double.NaN); 269 return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(), results, 270 getDescriptorNames(), e); 271 } 272 273 /** 274 * Calculates the 147 TAE descriptors for amino acids. 275 * 276 * @param container Parameter is the atom container which should implement {@link IBioPolymer}. 277 * @return A DoubleArrayResult value representing the 147 TAE descriptors 278 */ 279 @Override calculate(IAtomContainer container)280 public DescriptorValue calculate(IAtomContainer container) { 281 if (taeParams == null) return getDummyDescriptorValue(new CDKException("TAE parameters were not initialized")); 282 if (!(container instanceof IBioPolymer)) 283 return getDummyDescriptorValue(new CDKException("The molecule should be of type IBioPolymer")); 284 285 IBioPolymer peptide = (IBioPolymer) container; 286 287 // I assume that we get single letter names 288 //Collection aas = peptide.getMonomerNames(); 289 290 double[] desc = new double[ndesc]; 291 for (int i = 0; i < ndesc; i++) 292 desc[i] = 0.0; 293 294 List<IMonomer> monomers = getMonomers(peptide); 295 296 for (Iterator<IMonomer> iterator = monomers.iterator(); iterator.hasNext();) { 297 IMonomer monomer = iterator.next(); 298 299 String o = monomer.getMonomerName(); 300 301 if (o.length() == 0) continue; 302 303 String olc = String.valueOf(o.toLowerCase().charAt(0)); 304 String tlc = (String) nametrans.get(olc); 305 306 logger.debug("Converted " + olc + " to " + tlc); 307 308 // get the params for this AA 309 Double[] params = (Double[]) taeParams.get(tlc); 310 311 for (int i = 0; i < ndesc; i++) 312 desc[i] += params[i]; 313 } 314 315 DoubleArrayResult retval = new DoubleArrayResult(ndesc); 316 for (int i = 0; i < ndesc; i++) 317 retval.add(desc[i]); 318 319 return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(), retval, 320 getDescriptorNames()); 321 } 322 323 /** 324 * Returns the specific type of the DescriptorResult object. 325 * 326 * The return value from this method really indicates what type of result will 327 * be obtained from the {@link org.openscience.cdk.qsar.DescriptorValue} object. Note that the same result 328 * can be achieved by interrogating the {@link org.openscience.cdk.qsar.DescriptorValue} object; this method 329 * allows you to do the same thing, without actually calculating the descriptor. 330 * 331 * @return an object that implements the {@link org.openscience.cdk.qsar.result.IDescriptorResult} interface indicating 332 * the actual type of values returned by the descriptor in the {@link org.openscience.cdk.qsar.DescriptorValue} object 333 */ 334 @Override getDescriptorResultType()335 public IDescriptorResult getDescriptorResultType() { 336 return new DoubleArrayResultType(147); 337 } 338 339 } 340