1 package uk.ac.cam.ch.wwmm.opsin; 2 3 import java.util.ArrayList; 4 import java.util.Collections; 5 import java.util.HashMap; 6 import java.util.List; 7 import java.util.Map; 8 import java.util.Set; 9 import java.util.regex.Matcher; 10 11 import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; 12 13 /** 14 * An atom. Carries information about which fragment it is in, and an ID 15 * number and a list of bonds that it is involved. It may also have other information such as 16 * whether it has "spare valencies" due to unsaturation, its charge, locant labels, stereochemistry and notes 17 * 18 * @author ptc24 19 * @author dl387 20 * 21 */ 22 class Atom { 23 24 /**The (unique over the molecule) ID of the atom.*/ 25 private final int id; 26 27 /**The chemical element of the atom. */ 28 private ChemEl chemEl; 29 30 /**The locants that pertain to the atom.*/ 31 private final List<String> locants = new ArrayList<String>(2); 32 33 /**The formal charge on the atom.*/ 34 private int charge = 0; 35 36 /**The isotope of the atom. Null if not defined explicitly.*/ 37 private Integer isotope = null; 38 39 /** 40 * Holds the atomParity object associated with this object 41 * null by default 42 */ 43 private AtomParity atomParity = null; 44 45 /**The bonds that involve the atom*/ 46 private final List<Bond> bonds = new ArrayList<Bond>(4); 47 48 /**A map between PropertyKey s as declared here and useful atom properties, usually relating to some kind of special case. */ 49 @SuppressWarnings("rawtypes") 50 private final Map<PropertyKey, Object> properties = new HashMap<PropertyKey, Object>(); 51 /** A set of atoms that were equally plausible to perform functional replacement on */ 52 static final PropertyKey<Set<Atom>> AMBIGUOUS_ELEMENT_ASSIGNMENT = new PropertyKey<Set<Atom>>("ambiguousElementAssignment"); 53 /** The atom class which will be output when serialised to SMILES. Useful for distinguishing attachment points */ 54 static final PropertyKey<Integer> ATOM_CLASS = new PropertyKey<Integer>("atomClass"); 55 /** Used on wildcard atoms to indicate their meaning */ 56 static final PropertyKey<String> HOMOLOGY_GROUP = new PropertyKey<String>("homologyGroup"); 57 /** Used on wildcard atoms to indicate that they are a position variation bond */ 58 static final PropertyKey<List<Atom>> POSITION_VARIATION_BOND = new PropertyKey<List<Atom>>("positionVariationBond"); 59 /** The hydrogen count as set in the SMILES*/ 60 static final PropertyKey<Integer> SMILES_HYDROGEN_COUNT = new PropertyKey<Integer>("smilesHydrogenCount"); 61 /** The oxidation number as specified by Roman numerals in the name*/ 62 static final PropertyKey<Integer> OXIDATION_NUMBER = new PropertyKey<Integer>("oxidationNumber"); 63 /** Is this atom the carbon of an aldehyde? (however NOT formaldehyde)*/ 64 static final PropertyKey<Boolean> ISALDEHYDE = new PropertyKey<Boolean>("isAldehyde"); 65 /** Indicates that this atom is an anomeric atom in a cyclised carbohydrate*/ 66 static final PropertyKey<Boolean> ISANOMERIC = new PropertyKey<Boolean>("isAnomeric"); 67 /** Transient integer used to indicate traversal of fragments*/ 68 static final PropertyKey<Integer> VISITED = new PropertyKey<Integer>("visited"); 69 70 /**The fragment to which the atom belongs.*/ 71 private Fragment frag; 72 73 /** Whether an atom is part of a delocalised set of double bonds. A double bond in a kekule structure 74 * can be mapped to a single bond with this attribute set to true on both atoms that were in the double bond 75 * For example, benzene could be temporarily represented by six singly-bonded atoms, each with a set 76 * spare valency attribute , and later converted into a fully-specified valence structure.*/ 77 private boolean spareValency = false; 78 79 /**The total bond order of all bonds that are expected to be used for inter fragment bonding 80 * e.g. in butan-2-ylidene this would be 2 for the atom at position 2 and 0 for the other 3 */ 81 private int outValency = 0; 82 83 /** Null by default or set by the lambda convention.*/ 84 private Integer lambdaConventionValency; 85 86 /** Null by default or set by the SMILES builder*/ 87 private Integer minimumValency; 88 89 /** Can this atom have implicit hydrogen? True unless explicitly set otherwise otherwise*/ 90 private boolean implicitHydrogenAllowed = true; 91 92 /** This is modified by ium/ide/ylium/uide and is used to choose the appropriate valency for the atom*/ 93 private int protonsExplicitlyAddedOrRemoved = 0; 94 95 /** 96 * Takes same values as type in Fragment. Useful for discriminating suffix atoms from other atoms when a suffix is incorporated into another fragments 97 */ 98 private String type; 99 100 /** 101 * Is this atom in a ring. Default false. Set by the CycleDetector. 102 * Double bonds are only converted to spareValency if atom is in a ring 103 * Some suffixes have different meanings if an atom is part of a ring or not c.g. cyclohexanal vs ethanal 104 */ 105 private boolean atomIsInACycle = false; 106 107 /** 108 * Builds an Atom from scratch. 109 * GENERALLY EXCEPT FOR TESTING SHOULD NOT BE CALLED EXCEPT FROM THE FRAGMANAGER 110 * @param id The ID number, unique to the atom in the molecule being built 111 * @param chemlEl The chemical element 112 * @param frag the Fragment to contain the Atom 113 */ Atom(int id, ChemEl chemlEl, Fragment frag)114 Atom(int id, ChemEl chemlEl, Fragment frag) { 115 if (frag == null){ 116 throw new IllegalArgumentException("Atom is not in a fragment!"); 117 } 118 if (chemlEl == null){ 119 throw new IllegalArgumentException("Atom does not have an element!"); 120 } 121 this.frag = frag; 122 this.id = id; 123 this.chemEl = chemlEl; 124 this.type =frag.getType(); 125 } 126 127 /** Used to build a DUMMY atom. 128 * Does not have an id/frag/type as would be expected for a proper atom 129 * @param chemlEl The chemical element 130 */ Atom(ChemEl chemlEl)131 Atom(ChemEl chemlEl){ 132 this.chemEl = chemlEl; 133 this.id = 0; 134 } 135 136 /** 137 * Uses the lambdaConventionValency or if that is not available 138 * the default valency assuming this is >= the current valency 139 * If not then allowed the chemically sensible valencies of the atom are checked with the one that is closest and >= to the current valency 140 * being returned. If the valency has still not been determined the current valency i.e. assuming the atom to have 0 implicit hydrogen is returned. 141 * This is the correct behaviour for inorganics. For p block elements it means that OPSIN does not believe the atom to be in a valid valency (too high) 142 * 143 * if considerOutValency is true, the valency that will be used to form bonds using the outAtoms is 144 * taken into account i.e. if any radicals were used to form bonds 145 * @param considerOutValency 146 * @return 147 */ determineValency(boolean considerOutValency)148 int determineValency(boolean considerOutValency) { 149 if (lambdaConventionValency != null){ 150 return lambdaConventionValency + protonsExplicitlyAddedOrRemoved; 151 } 152 int currentValency = getIncomingValency(); 153 if (considerOutValency){ 154 currentValency += outValency; 155 } 156 Integer calculatedMinValency = minimumValency == null ? null : minimumValency + protonsExplicitlyAddedOrRemoved; 157 if (charge ==0 || protonsExplicitlyAddedOrRemoved != 0){ 158 Integer defaultValency = ValencyChecker.getDefaultValency(chemEl); 159 if (defaultValency != null){ 160 defaultValency += protonsExplicitlyAddedOrRemoved; 161 if (currentValency <= defaultValency && (calculatedMinValency == null || defaultValency >= calculatedMinValency)){ 162 return defaultValency; 163 } 164 } 165 } 166 Integer[] possibleValencies = ValencyChecker.getPossibleValencies(chemEl, charge); 167 if (possibleValencies != null) { 168 if (calculatedMinValency != null && calculatedMinValency >= currentValency){ 169 return calculatedMinValency; 170 } 171 for (Integer possibleValency : possibleValencies) { 172 if (calculatedMinValency != null && possibleValency < calculatedMinValency){ 173 continue; 174 } 175 if (currentValency <= possibleValency){ 176 return possibleValency; 177 } 178 } 179 } 180 if (calculatedMinValency != null && calculatedMinValency >= currentValency){ 181 return calculatedMinValency; 182 } 183 else{ 184 return currentValency; 185 } 186 } 187 188 /**Adds a locant to the Atom. Other locants are preserved. 189 * Also associates the locant with the atom in the parent fragments hash 190 * 191 * @param locant The new locant 192 */ addLocant(String locant)193 void addLocant(String locant) { 194 locants.add(locant); 195 frag.addMappingToAtomLocantMap(locant, this); 196 } 197 198 /**Replaces all existing locants with a new one. 199 * 200 * @param locant The new locant 201 */ replaceLocants(String locant)202 void replaceLocants(String locant) { 203 clearLocants(); 204 addLocant(locant); 205 } 206 removeLocant(String locantToRemove)207 void removeLocant(String locantToRemove) { 208 int locantArraySize = locants.size(); 209 for (int i = locantArraySize -1; i >=0 ; i--) { 210 if (locants.get(i).equals(locantToRemove)){ 211 locants.remove(i); 212 frag.removeMappingFromAtomLocantMap(locantToRemove); 213 } 214 } 215 } 216 217 /**Removes all locants from the Atom. 218 * 219 */ clearLocants()220 void clearLocants() { 221 for (int i = 0, l = locants.size(); i < l; i++) { 222 frag.removeMappingFromAtomLocantMap(locants.get(i)); 223 } 224 locants.clear(); 225 } 226 227 /** 228 * Removes only elementSymbolLocants: e.g. N, S', Se 229 */ removeElementSymbolLocants()230 void removeElementSymbolLocants() { 231 for (int i = locants.size() - 1; i >= 0; i--) { 232 String locant = locants.get(i); 233 if (MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()){ 234 frag.removeMappingFromAtomLocantMap(locant); 235 locants.remove(i); 236 } 237 } 238 } 239 240 /** 241 * Removes all locants other than elementSymbolLocants (e.g. N, S', Se) 242 * Hence removes numeric locants and greek locants 243 */ removeLocantsOtherThanElementSymbolLocants()244 void removeLocantsOtherThanElementSymbolLocants() { 245 for (int i = locants.size() - 1; i >= 0; i--) { 246 String locant = locants.get(i); 247 if (!MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()){ 248 frag.removeMappingFromAtomLocantMap(locant); 249 locants.remove(i); 250 } 251 } 252 } 253 254 /**Checks if the Atom has a given locant. 255 * 256 * @param locant The locant to test for 257 * @return true if it has, false if not 258 */ hasLocant(String locant)259 boolean hasLocant(String locant) { 260 if (locants.contains(locant)) { 261 return true; 262 } 263 Matcher m = MATCH_AMINOACID_STYLE_LOCANT.matcher(locant); 264 if (m.matches()){//e.g. N'5 265 if (chemEl.toString().equals(m.group(1))){//element symbol 266 if (!m.group(2).equals("") && (!hasLocant(m.group(1) +m.group(2)))){//has primes 267 return false;//must have exact locant e.g. N' 268 } 269 if (OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(this, m.group(3)) != null){ 270 return true; 271 } 272 } 273 } 274 return false; 275 } 276 277 /**Gets the first locant for the Atom. This may be the locant that was initially 278 * specified, or the most recent locant specified using replaceLocant, or first 279 * locant to be added since the last invocation of clearLocants. 280 * 281 * @return The locant, or null if there is no locant 282 */ getFirstLocant()283 String getFirstLocant() { 284 return locants.size() > 0 ? locants.get(0) : null; 285 } 286 287 /**Returns the array of locants containing all locants associated with the atom 288 * 289 * @return The list of locants (may be empty) 290 */ getLocants()291 List<String> getLocants() { 292 return Collections.unmodifiableList(locants); 293 } 294 295 /**Returns the subset of the locants which are element symbol locants e.g. N, S', Se 296 * 297 * @return The list of locants (may be empty) 298 */ getElementSymbolLocants()299 List<String> getElementSymbolLocants() { 300 List<String> elementSymbolLocants = new ArrayList<String>(1); 301 for (int i = 0, l = locants.size(); i < l; i++) { 302 String locant = locants.get(i); 303 if (MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()) { 304 elementSymbolLocants.add(locant); 305 } 306 } 307 return elementSymbolLocants; 308 } 309 setFrag(Fragment f)310 void setFrag(Fragment f) { 311 frag = f; 312 } 313 getFrag()314 Fragment getFrag() { 315 return frag; 316 } 317 318 /**Gets the ID of the atom. 319 * 320 * @return The ID of the atom 321 */ getID()322 int getID() { 323 return id; 324 } 325 326 /**Gets the chemical element corresponding to the element of the atom. 327 * 328 * @return The chemical element corresponding to the element of the atom 329 */ getElement()330 ChemEl getElement() { 331 return chemEl; 332 } 333 334 /**Sets the chemical element corresponding to the element of the atom. 335 * 336 * @param chemEl The chemical element corresponding to the element of the atom 337 */ setElement(ChemEl chemEl)338 void setElement(ChemEl chemEl) { 339 this.chemEl = chemEl; 340 } 341 342 /**Gets the formal charge on the atom. 343 * 344 * @return The formal charge on the atom 345 */ getCharge()346 int getCharge() { 347 return charge; 348 } 349 350 /**Modifies the charge of this atom by the amount given. This can be any integer 351 * The number of protons changed is noted so as to calculate the correct valency for the atom. This can be any integer. 352 * For example ide is the loss of a proton so is charge=-1, protons =-1 353 * @param charge 354 * @param protons 355 */ addChargeAndProtons(int charge, int protons)356 void addChargeAndProtons(int charge, int protons){ 357 this.charge += charge; 358 protonsExplicitlyAddedOrRemoved+=protons; 359 } 360 361 /**Sets the formal charge on the atom. 362 * NOTE: make sure to update protonsExplicitlyAddedOrRemoved if necessary 363 * 364 * @param c The formal charge on the atom 365 */ setCharge(int c)366 void setCharge(int c) { 367 charge = c; 368 } 369 370 /** 371 * Sets the formal charge and number of protonsExplicitlyAddedOrRemoved to 0 372 */ neutraliseCharge()373 void neutraliseCharge() { 374 charge = 0; 375 protonsExplicitlyAddedOrRemoved = 0; 376 } 377 378 /** 379 * Gets the mass number of the atom or null if not explicitly defined 380 * e.g. 3 for tritium 381 * @return 382 */ getIsotope()383 Integer getIsotope() { 384 return isotope; 385 } 386 387 /** 388 * Sets the mass number of the atom explicitly 389 * @param isotope 390 */ setIsotope(Integer isotope)391 void setIsotope(Integer isotope) { 392 if (isotope != null && isotope < chemEl.ATOMIC_NUM) { 393 throw new RuntimeException("Isotopic mass cannot be less than the element's number of protons: " + chemEl.toString() + " " + isotope + " < " + chemEl.ATOMIC_NUM ); 394 } 395 this.isotope = isotope; 396 } 397 398 /**Adds a bond to the atom 399 * 400 * @param b The bond to be added 401 */ addBond(Bond b)402 void addBond(Bond b) { 403 if (bonds.contains(b)){ 404 throw new IllegalArgumentException("Atom already has given bond (This is not allowed as this would give two bonds between the same atoms!)"); 405 } 406 bonds.add(b); 407 } 408 409 /**Removes a bond to the atom 410 * 411 * @param b The bond to be removed 412 * @return whether bond was present 413 */ removeBond(Bond b)414 boolean removeBond(Bond b) { 415 return bonds.remove(b); 416 } 417 418 /**Calculates the number of bonds connecting to the atom, excluding bonds to implicit 419 * hydrogens. Double bonds count as 420 * two bonds, etc. Eg ethene - both C's have an incoming valency of 2. 421 * 422 * @return Incoming Valency 423 */ getIncomingValency()424 int getIncomingValency() { 425 int v = 0; 426 for (int i = 0, l = bonds.size(); i < l; i++) { 427 v += bonds.get(i).getOrder(); 428 } 429 return v; 430 } 431 getProtonsExplicitlyAddedOrRemoved()432 int getProtonsExplicitlyAddedOrRemoved() { 433 return protonsExplicitlyAddedOrRemoved; 434 } 435 setProtonsExplicitlyAddedOrRemoved(int protonsExplicitlyAddedOrRemoved)436 void setProtonsExplicitlyAddedOrRemoved(int protonsExplicitlyAddedOrRemoved) { 437 this.protonsExplicitlyAddedOrRemoved = protonsExplicitlyAddedOrRemoved; 438 } 439 440 /**Does the atom have spare valency to form double bonds? 441 * 442 * @return true if atom has spare valency 443 */ hasSpareValency()444 boolean hasSpareValency() { 445 return spareValency; 446 } 447 448 /**Set whether an atom has spare valency 449 * 450 * @param sv The spare valency 451 */ setSpareValency(boolean sv)452 void setSpareValency(boolean sv) { 453 spareValency = sv; 454 } 455 456 /**Gets the total bond order of the bonds expected to be created from this atom for inter fragment bonding 457 * 458 * @return The outValency 459 */ getOutValency()460 int getOutValency() { 461 return outValency; 462 } 463 464 /**Adds to the total bond order of the bonds expected to be created from this atom for inter fragment bonding 465 * 466 * @param outV The outValency to be added 467 */ addOutValency(int outV)468 void addOutValency(int outV) { 469 outValency += outV; 470 } 471 getBonds()472 List<Bond> getBonds() { 473 return Collections.unmodifiableList(bonds); 474 } 475 getBondCount()476 int getBondCount() { 477 return bonds.size(); 478 } 479 480 /**Gets a list of atoms that connect to the atom 481 * 482 * @return The list of atoms connected to the atom 483 */ getAtomNeighbours()484 List<Atom> getAtomNeighbours(){ 485 int bondCount = bonds.size(); 486 List<Atom> results = new ArrayList<Atom>(bondCount); 487 for (int i = 0; i < bondCount; i++) { 488 results.add(bonds.get(i).getOtherAtom(this)); 489 } 490 return results; 491 } 492 getLambdaConventionValency()493 Integer getLambdaConventionValency() { 494 return lambdaConventionValency; 495 } 496 setLambdaConventionValency(Integer valency)497 void setLambdaConventionValency(Integer valency) { 498 this.lambdaConventionValency = valency; 499 } 500 getType()501 String getType() { 502 return type; 503 } 504 setType(String type)505 void setType(String type) { 506 this.type = type; 507 } 508 getAtomIsInACycle()509 boolean getAtomIsInACycle() { 510 return atomIsInACycle; 511 } 512 513 /** 514 * Sets whether atom is in a cycle, true if it is 515 * @param atomIsInACycle 516 */ setAtomIsInACycle(boolean atomIsInACycle)517 void setAtomIsInACycle(boolean atomIsInACycle) { 518 this.atomIsInACycle = atomIsInACycle; 519 } 520 getAtomParity()521 AtomParity getAtomParity() { 522 return atomParity; 523 } 524 setAtomParity(AtomParity atomParity)525 void setAtomParity(AtomParity atomParity) { 526 this.atomParity = atomParity; 527 } 528 setAtomParity(Atom[] atomRefs4, int parity)529 void setAtomParity(Atom[] atomRefs4, int parity) { 530 atomParity = new AtomParity(atomRefs4, parity); 531 } 532 getMinimumValency()533 Integer getMinimumValency() { 534 return minimumValency; 535 } 536 setMinimumValency(Integer minimumValency)537 void setMinimumValency(Integer minimumValency) { 538 this.minimumValency = minimumValency; 539 } 540 getImplicitHydrogenAllowed()541 boolean getImplicitHydrogenAllowed() { 542 return implicitHydrogenAllowed; 543 } 544 setImplicitHydrogenAllowed(boolean implicitHydrogenAllowed)545 void setImplicitHydrogenAllowed(boolean implicitHydrogenAllowed) { 546 this.implicitHydrogenAllowed = implicitHydrogenAllowed; 547 } 548 549 @SuppressWarnings("unchecked") getProperty(PropertyKey<T> propertyKey)550 <T> T getProperty(PropertyKey<T> propertyKey) { 551 return (T) properties.get(propertyKey); 552 } 553 setProperty(PropertyKey<T> propertyKey, T value)554 <T> void setProperty(PropertyKey<T> propertyKey, T value) { 555 properties.put(propertyKey, value); 556 } 557 558 /** 559 * Checks if the valency of this atom allows it to have the amount of spare valency that the atom currently has 560 * May reduce the spare valency on the atom to be consistent with the valency of the atom 561 * Does nothing if the atom has no spare valency 562 * @param takeIntoAccountExternalBonds 563 * @throws StructureBuildingException 564 */ ensureSVIsConsistantWithValency(boolean takeIntoAccountExternalBonds)565 void ensureSVIsConsistantWithValency(boolean takeIntoAccountExternalBonds) throws StructureBuildingException { 566 if (spareValency) { 567 Integer maxValency; 568 if (lambdaConventionValency != null) { 569 maxValency = lambdaConventionValency + protonsExplicitlyAddedOrRemoved; 570 } 571 else{ 572 Integer hwValency = ValencyChecker.getHWValency(chemEl); 573 if (hwValency == null) { 574 throw new StructureBuildingException(chemEl + " is not expected to be aromatic!"); 575 } 576 maxValency = hwValency + protonsExplicitlyAddedOrRemoved; 577 } 578 int maxSpareValency; 579 if (takeIntoAccountExternalBonds) { 580 maxSpareValency = maxValency - getIncomingValency() - outValency; 581 } 582 else{ 583 maxSpareValency = maxValency - frag.getIntraFragmentIncomingValency(this); 584 } 585 if (maxSpareValency < 1) { 586 setSpareValency(false); 587 } 588 } 589 } 590 591 /** 592 * Returns the the first bond in the atom's bond list or null if it has no bonds 593 * @return 594 */ getFirstBond()595 Bond getFirstBond() { 596 if (bonds.size() > 0){ 597 return bonds.get(0); 598 } 599 return null; 600 } 601 602 /**Gets the bond between this atom and a given atom 603 * 604 * @param a The atom to find a bond to 605 * @return The bond, or null if there is no bond 606 */ getBondToAtom(Atom a)607 Bond getBondToAtom(Atom a) { 608 for (int i = 0, l = bonds.size(); i < l; i++) { 609 Bond b = bonds.get(i); 610 if(b.getOtherAtom(this) == a){ 611 return b; 612 } 613 } 614 return null; 615 } 616 617 /**Gets the bond between this atom and a given atom, throwing if fails. 618 * 619 * @param a The atom to find a bond to 620 * @return The bond found 621 * @throws StructureBuildingException 622 */ getBondToAtomOrThrow(Atom a)623 Bond getBondToAtomOrThrow(Atom a) throws StructureBuildingException { 624 Bond b = getBondToAtom(a); 625 if(b == null){ 626 throw new StructureBuildingException("Couldn't find specified bond"); 627 } 628 return b; 629 } 630 } 631