1 package uk.ac.cam.ch.wwmm.opsin; 2 3 import java.util.ArrayDeque; 4 import java.util.ArrayList; 5 import java.util.Collection; 6 import java.util.Collections; 7 import java.util.Deque; 8 import java.util.HashMap; 9 import java.util.HashSet; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.regex.Matcher; 15 import java.util.regex.Pattern; 16 17 import org.apache.log4j.Logger; 18 19 import uk.ac.cam.ch.wwmm.opsin.IsotopeSpecificationParser.IsotopeSpecification; 20 21 import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; 22 import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; 23 24 /** 25 * Methods for processing the substitutive and additive operations that connect all the fragments together 26 * as well as indicated hydrogen/unsaturation/heteroatom replacement 27 * @author dl387 28 * 29 */ 30 class StructureBuildingMethods { 31 private static final Logger LOG = Logger.getLogger(StructureBuildingMethods.class); 32 private static final Pattern matchCompoundLocant =Pattern.compile("[\\[\\(\\{](\\d+[a-z]?'*)[\\]\\)\\}]"); 33 StructureBuildingMethods()34 private StructureBuildingMethods() {} 35 36 /** 37 * Resolves a word/bracket: 38 * Locanted attributes of words are resolved onto their group 39 * Locanted substitution is performed 40 * Connections involving multi radicals are processed 41 * Unlocanted attributes of words are resolved onto their group 42 * 43 * If word is a wordRule the function will instantly return 44 * 45 * @param state 46 * @param word 47 * @throws StructureBuildingException 48 */ resolveWordOrBracket(BuildState state, Element word)49 static void resolveWordOrBracket(BuildState state, Element word) throws StructureBuildingException { 50 if (word.getName().equals(WORDRULE_EL)){//already been resolved 51 return; 52 } 53 if (!word.getName().equals(WORD_EL) && !word.getName().equals(BRACKET_EL)){ 54 throw new StructureBuildingException("A word or bracket is the expected input"); 55 } 56 recursivelyResolveLocantedFeatures(state, word); 57 recursivelyResolveUnLocantedFeatures(state, word); 58 //TODO check all things that can substitute have outAtoms 59 //TOOD think whether you can avoid the need to have a cansubstitute function by only using appropriate group 60 List<Element> subsBracketsAndRoots = OpsinTools.getDescendantElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); 61 for (Element subsBracketsAndRoot : subsBracketsAndRoots) { 62 if (subsBracketsAndRoot.getAttribute(MULTIPLIER_ATR) != null) { 63 throw new StructureBuildingException("Structure building problem: multiplier on :" + subsBracketsAndRoot.getName() + " was never used"); 64 } 65 } 66 List<Element> groups = OpsinTools.getDescendantElementsWithTagName(word, GROUP_EL); 67 for (int i = 0; i < groups.size(); i++) { 68 Element group = groups.get(i); 69 if (group.getAttribute(RESOLVED_ATR)==null && i != groups.size()-1){ 70 throw new StructureBuildingException("Structure building problem: Bond was not made from :" +group.getValue() + " but one should of been"); 71 } 72 } 73 } 74 75 /** 76 * Performs locanted attribute resolution 77 * then additive joining of fragments 78 * then locanted substitutive joining of fragments 79 * 80 * @param state 81 * @param word 82 * @throws StructureBuildingException 83 */ recursivelyResolveLocantedFeatures(BuildState state, Element word)84 static void recursivelyResolveLocantedFeatures(BuildState state, Element word) throws StructureBuildingException { 85 if (!word.getName().equals(WORD_EL) && !word.getName().equals(BRACKET_EL)){ 86 throw new StructureBuildingException("A word or bracket is the expected input"); 87 } 88 List<Element> subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); 89 //substitution occurs left to right so by doing this right to left you ensure that any groups that will come into existence 90 //due to multipliers being expanded will be in existence 91 for (int i =subsBracketsAndRoots.size()-1; i>=0; i--) { 92 Element subBracketOrRoot = subsBracketsAndRoots.get(i); 93 if (subBracketOrRoot.getName().equals(BRACKET_EL)){ 94 recursivelyResolveLocantedFeatures(state,subBracketOrRoot); 95 if (potentiallyCanSubstitute(subBracketOrRoot)){ 96 performAdditiveOperations(state, subBracketOrRoot); 97 performLocantedSubstitutiveOperations(state, subBracketOrRoot); 98 } 99 } 100 else{ 101 resolveRootOrSubstituentLocanted(state, subBracketOrRoot); 102 } 103 } 104 } 105 106 /** 107 * Performs locanted attribute resolution 108 * then additive joining of fragments 109 * then locanted substitutive joining of fragments 110 * 111 * @param state 112 * @param word 113 * @throws StructureBuildingException 114 */ recursivelyResolveUnLocantedFeatures(BuildState state, Element word)115 static void recursivelyResolveUnLocantedFeatures(BuildState state, Element word) throws StructureBuildingException { 116 if (!word.getName().equals(WORD_EL) && !word.getName().equals(BRACKET_EL)){ 117 throw new StructureBuildingException("A word or bracket is the expected input"); 118 } 119 List<Element> subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); 120 //substitution occurs left to right so by doing this right to left you ensure that any groups that will come into existence 121 //due to multipliers being expanded will be in existence 122 for (int i =subsBracketsAndRoots.size()-1; i>=0; i--) { 123 Element subBracketOrRoot = subsBracketsAndRoots.get(i); 124 if (subBracketOrRoot.getName().equals(BRACKET_EL)){ 125 recursivelyResolveUnLocantedFeatures(state,subBracketOrRoot); 126 if (potentiallyCanSubstitute(subBracketOrRoot)){ 127 performUnLocantedSubstitutiveOperations(state, subBracketOrRoot); 128 } 129 } 130 else{ 131 resolveRootOrSubstituentUnLocanted(state, subBracketOrRoot); 132 } 133 } 134 } 135 resolveRootOrSubstituentLocanted(BuildState state, Element subOrRoot)136 static void resolveRootOrSubstituentLocanted(BuildState state, Element subOrRoot) throws StructureBuildingException { 137 138 resolveLocantedFeatures(state, subOrRoot);//e.g. unsaturators, hydro groups and heteroatom replacement 139 140 boolean foundSomethingToSubstitute = potentiallyCanSubstitute(subOrRoot); 141 142 if (foundSomethingToSubstitute){ 143 performAdditiveOperations(state, subOrRoot);//e.g. ethylenediimino, oxyethylene (operations where two outAtoms are used to produce the bond and no locant is required as groups) 144 performLocantedSubstitutiveOperations(state, subOrRoot);//e.g. 2-methyltoluene 145 } 146 } 147 resolveRootOrSubstituentUnLocanted(BuildState state, Element subOrRoot)148 static void resolveRootOrSubstituentUnLocanted(BuildState state, Element subOrRoot) throws StructureBuildingException { 149 150 boolean foundSomethingToSubstitute = potentiallyCanSubstitute(subOrRoot); 151 152 resolveUnLocantedFeatures(state, subOrRoot);//e.g. unsaturators, hydro groups and heteroatom replacement 153 154 if (foundSomethingToSubstitute){ 155 performUnLocantedSubstitutiveOperations(state, subOrRoot);//e.g. tetramethylfuran 156 } 157 } 158 159 performLocantedSubstitutiveOperations(BuildState state, Element subBracketOrRoot)160 private static void performLocantedSubstitutiveOperations(BuildState state, Element subBracketOrRoot) throws StructureBuildingException { 161 Element group; 162 if (subBracketOrRoot.getName().equals(BRACKET_EL)) { 163 group = findRightMostGroupInBracket(subBracketOrRoot); 164 } 165 else{ 166 group = subBracketOrRoot.getFirstChildElement(GROUP_EL); 167 } 168 if (group.getAttribute(RESOLVED_ATR) != null) { 169 return; 170 } 171 Fragment frag = group.getFrag(); 172 if (frag.getOutAtomCount() >=1 && subBracketOrRoot.getAttribute(LOCANT_ATR) != null){ 173 String locantString = subBracketOrRoot.getAttributeValue(LOCANT_ATR); 174 if (frag.getOutAtomCount() >1){ 175 checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group); 176 } 177 if (subBracketOrRoot.getAttribute(MULTIPLIER_ATR) != null) {//e.g. 1,2-diethyl 178 multiplyOutAndSubstitute(state, subBracketOrRoot); 179 } 180 else{ 181 Fragment parentFrag = findFragmentWithLocant(subBracketOrRoot, locantString); 182 if (parentFrag == null){ 183 String modifiedLocant = checkForBracketedPrimedLocantSpecialCase(subBracketOrRoot, locantString); 184 if (modifiedLocant != null){ 185 parentFrag = findFragmentWithLocant(subBracketOrRoot, modifiedLocant); 186 if (parentFrag != null){ 187 locantString = modifiedLocant; 188 } 189 } 190 } 191 if (parentFrag==null){ 192 throw new StructureBuildingException("Cannot find in scope fragment with atom with locant " + locantString + "."); 193 } 194 group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); 195 Element groupToAttachTo = parentFrag.getTokenEl(); 196 if (groupToAttachTo.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && 197 parentFrag.getOutAtomCount() > 0 && 198 groupToAttachTo.getAttribute(ISAMULTIRADICAL_ATR) != null && 199 parentFrag.getAtomByLocantOrThrow(locantString).getOutValency() > 0 && 200 frag.getOutAtom(0).getValency() == 1 && 201 parentFrag.getFirstAtom().equals(parentFrag.getAtomByLocantOrThrow(locantString))) { 202 //horrible special case to allow C-hydroxycarbonimidoyl and the like 203 //If additive nomenclature the first atom should be an out atom 204 joinFragmentsAdditively(state, frag, parentFrag); 205 } 206 else{ 207 Atom atomToSubstituteAt = parentFrag.getAtomByLocantOrThrow(locantString); 208 if (PHOSPHO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && frag.getOutAtom(0).getValency() == 1){ 209 if (atomToSubstituteAt.getElement() != ChemEl.O){ 210 for (Atom neighbour : atomToSubstituteAt.getAtomNeighbours()) { 211 if (neighbour.getElement() == ChemEl.O && 212 neighbour.getBondCount()==1 && 213 neighbour.getFirstBond().getOrder() == 1 && 214 neighbour.getOutValency() == 0 && 215 neighbour.getCharge() == 0){ 216 atomToSubstituteAt = neighbour; 217 break; 218 } 219 } 220 } 221 } 222 joinFragmentsSubstitutively(state, frag, atomToSubstituteAt); 223 } 224 } 225 } 226 } 227 performUnLocantedSubstitutiveOperations(BuildState state, Element subBracketOrRoot)228 private static void performUnLocantedSubstitutiveOperations(BuildState state, Element subBracketOrRoot) throws StructureBuildingException { 229 Element group; 230 if (subBracketOrRoot.getName().equals(BRACKET_EL)){ 231 group = findRightMostGroupInBracket(subBracketOrRoot); 232 } 233 else{ 234 group = subBracketOrRoot.getFirstChildElement(GROUP_EL); 235 } 236 if (group.getAttribute(RESOLVED_ATR) != null){ 237 return; 238 } 239 Fragment frag = group.getFrag(); 240 if (frag.getOutAtomCount() >= 1){ 241 if (subBracketOrRoot.getAttribute(LOCANT_ATR) != null){ 242 throw new RuntimeException("Substituent has an unused outAtom and has a locant but locanted substitution should already have been performed!"); 243 } 244 if (frag.getOutAtomCount() > 1){ 245 checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group); 246 } 247 if (subBracketOrRoot.getAttribute(MULTIPLIER_ATR) != null) {//e.g. diethyl 248 multiplyOutAndSubstitute(state, subBracketOrRoot); 249 } 250 else{ 251 if (PERHALOGENO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))) { 252 performPerHalogenoSubstitution(state, frag, subBracketOrRoot); 253 } 254 else{ 255 List<Atom> atomsToJoinTo = null; 256 if (PHOSPHO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && frag.getOutAtom(0).getValency() == 1){ 257 List<Fragment> possibleParents = findAlternativeFragments(subBracketOrRoot); 258 for (Fragment fragment : possibleParents) { 259 List<Atom> hydroxyAtoms = FragmentTools.findHydroxyGroups(fragment); 260 if (hydroxyAtoms.size() >= 1){ 261 atomsToJoinTo = hydroxyAtoms; 262 } 263 break; 264 } 265 } 266 if (atomsToJoinTo == null) { 267 atomsToJoinTo = findAtomsForSubstitution(subBracketOrRoot, 1, frag.getOutAtom(0).getValency()); 268 } 269 if (atomsToJoinTo == null){ 270 throw new StructureBuildingException("Unlocanted substitution failed: unable to find suitable atom to bond atom with id:" + frag.getOutAtom(0).getAtom().getID() + " to!"); 271 } 272 if (AmbiguityChecker.isSubstitutionAmbiguous(atomsToJoinTo, 1)) { 273 state.addIsAmbiguous("Connection of " + group.getValue() + " to " + atomsToJoinTo.get(0).getFrag().getTokenEl().getValue()); 274 } 275 joinFragmentsSubstitutively(state, frag, atomsToJoinTo.get(0)); 276 } 277 group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); 278 } 279 } 280 } 281 282 /** 283 * Clones the perhalogenFrag sufficiently to replace all in scope hydrogen with halogens. 284 * The cloned fragments are merged into the perhalogenFrag 285 * @param state 286 * @param perhalogenFrag 287 * @param subBracketOrRoot 288 * @throws StructureBuildingException 289 */ performPerHalogenoSubstitution(BuildState state, Fragment perhalogenFrag, Element subBracketOrRoot)290 private static void performPerHalogenoSubstitution(BuildState state, Fragment perhalogenFrag, Element subBracketOrRoot) throws StructureBuildingException { 291 List<Fragment> fragmentsToAttachTo = findAlternativeFragments(subBracketOrRoot); 292 List<Atom> atomsToHalogenate = new ArrayList<Atom>(); 293 for (Fragment fragment : fragmentsToAttachTo) { 294 FragmentTools.convertSpareValenciesToDoubleBonds(fragment); 295 for (Atom atom : fragment.getAtomList()) { 296 int substitutableHydrogen = calculateSubstitutableHydrogenAtoms(atom); 297 if (substitutableHydrogen > 0 && FragmentTools.isCharacteristicAtom(atom)){ 298 continue; 299 } 300 for (int i = 0; i < substitutableHydrogen; i++) { 301 atomsToHalogenate.add(atom); 302 } 303 } 304 } 305 if (atomsToHalogenate.size() == 0){ 306 throw new RuntimeException("Failed to find any substitutable hydrogen to apply " + perhalogenFrag.getTokenEl().getValue() + " to!"); 307 } 308 List<Fragment> halogens = new ArrayList<Fragment>(); 309 halogens.add(perhalogenFrag); 310 for (int i = 0; i < atomsToHalogenate.size() - 1; i++) { 311 halogens.add(state.fragManager.copyFragment(perhalogenFrag)); 312 } 313 for (int i = 0; i < atomsToHalogenate.size(); i++) { 314 Fragment halogen = halogens.get(i); 315 Atom from = halogen.getOutAtom(0).getAtom(); 316 halogen.removeOutAtom(0); 317 state.fragManager.createBond(from, atomsToHalogenate.get(i), 1); 318 } 319 for (int i = 1; i < atomsToHalogenate.size(); i++) { 320 state.fragManager.incorporateFragment(halogens.get(i), perhalogenFrag); 321 } 322 } 323 324 /** 325 * Multiplies out groups/brakets and substitutes them. The attribute "locant" is checked for locants 326 * If it is present it should contain a comma separated list of locants 327 * The strategy employed is to clone subOrBracket and its associated fragments as many times as the multiplier attribute 328 * perform(Un)LocantedSubstitutiveOperations is then called with on each call a different clone (or the original element) being in position 329 * Hence bonding between the clones is impossible 330 * @param state 331 * @param subOrBracket 332 * @throws StructureBuildingException 333 */ multiplyOutAndSubstitute(BuildState state, Element subOrBracket)334 private static void multiplyOutAndSubstitute(BuildState state, Element subOrBracket) throws StructureBuildingException { 335 Attribute multiplierAtr = subOrBracket.getAttribute(MULTIPLIER_ATR); 336 int multiplier = Integer.parseInt(multiplierAtr.getValue()); 337 subOrBracket.removeAttribute(multiplierAtr); 338 String[] locants = null; 339 String locantsAtrValue = subOrBracket.getAttributeValue(LOCANT_ATR); 340 if (locantsAtrValue != null){ 341 locants = locantsAtrValue.split(","); 342 } 343 Element parentWordOrBracket = subOrBracket.getParent(); 344 int indexOfSubOrBracket = parentWordOrBracket.indexOf(subOrBracket); 345 subOrBracket.detach(); 346 347 List<Element> elementsNotToBeMultiplied = new ArrayList<Element>();//anything before the multiplier in the sub/bracket 348 Element multiplierEl = subOrBracket.getFirstChildElement(MULTIPLIER_EL); 349 if (multiplierEl == null){ 350 throw new RuntimeException("Multiplier not found where multiplier expected"); 351 } 352 for (int i = subOrBracket.indexOf(multiplierEl) -1 ; i >=0 ; i--) { 353 Element el = subOrBracket.getChild(i); 354 el.detach(); 355 elementsNotToBeMultiplied.add(el); 356 } 357 multiplierEl.detach(); 358 359 List<Element> multipliedElements = new ArrayList<Element>(); 360 for (int i = multiplier - 1; i >=0; i--) { 361 Element currentElement; 362 if (i != 0){ 363 currentElement = state.fragManager.cloneElement(state, subOrBracket, i); 364 addPrimesToLocantedStereochemistryElements(currentElement, StringTools.multiplyString("'", i));//Stereochemistry elements with locants will need to have their locants primed (stereochemistry is only processed after structure building) 365 } 366 else{ 367 currentElement = subOrBracket; 368 } 369 multipliedElements.add(currentElement); 370 if (locants != null){ 371 parentWordOrBracket.insertChild(currentElement, indexOfSubOrBracket); 372 currentElement.getAttribute(LOCANT_ATR).setValue(locants[i]); 373 performLocantedSubstitutiveOperations(state, currentElement); 374 currentElement.detach(); 375 } 376 } 377 if (locants == null) { 378 parentWordOrBracket.insertChild(multipliedElements.get(0), indexOfSubOrBracket); 379 performUnlocantedSubstitutiveOperations(state, multipliedElements); 380 multipliedElements.get(0).detach(); 381 } 382 for (Element multipliedElement : multipliedElements) {//attach all the multiplied subs/brackets 383 parentWordOrBracket.insertChild(multipliedElement, indexOfSubOrBracket); 384 } 385 for (Element el : elementsNotToBeMultiplied) {//re-add anything before multiplier to original subOrBracket 386 subOrBracket.insertChild(el, 0); 387 } 388 } 389 performUnlocantedSubstitutiveOperations(BuildState state, List<Element> multipliedElements)390 private static void performUnlocantedSubstitutiveOperations(BuildState state, List<Element> multipliedElements) throws StructureBuildingException { 391 int numOfSubstituents = multipliedElements.size(); 392 Element subBracketOrRoot = multipliedElements.get(0); 393 Element group; 394 if (subBracketOrRoot.getName().equals(BRACKET_EL)){ 395 group = findRightMostGroupInBracket(subBracketOrRoot); 396 } 397 else{ 398 group = subBracketOrRoot.getFirstChildElement(GROUP_EL); 399 } 400 Fragment frag = group.getFrag(); 401 if (frag.getOutAtomCount() >= 1){ 402 if (subBracketOrRoot.getAttribute(LOCANT_ATR) != null){ 403 throw new RuntimeException("Substituent has an unused outAtom and has a locant but locanted substitution should already been been performed!"); 404 } 405 if (PERHALOGENO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))) { 406 throw new StructureBuildingException(group.getValue() + " cannot be multiplied"); 407 } 408 if (frag.getOutAtomCount() > 1){ 409 checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group); 410 } 411 List<Atom> atomsToJoinTo = null; 412 if (PHOSPHO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && frag.getOutAtom(0).getValency() == 1){ 413 List<Fragment> possibleParents = findAlternativeFragments(subBracketOrRoot); 414 for (Fragment fragment : possibleParents) { 415 List<Atom> hydroxyAtoms = FragmentTools.findHydroxyGroups(fragment); 416 if (hydroxyAtoms.size() >= numOfSubstituents){ 417 atomsToJoinTo = hydroxyAtoms; 418 } 419 break; 420 } 421 } 422 if (atomsToJoinTo == null) { 423 atomsToJoinTo = findAtomsForSubstitution(subBracketOrRoot, numOfSubstituents, frag.getOutAtom(0).getValency()); 424 } 425 if (atomsToJoinTo == null) { 426 throw new StructureBuildingException("Unlocanted substitution failed: unable to find suitable atom to bond atom with id:" + frag.getOutAtom(0).getAtom().getID() + " to!"); 427 } 428 if (AmbiguityChecker.isSubstitutionAmbiguous(atomsToJoinTo, numOfSubstituents)) { 429 state.addIsAmbiguous("Connection of " + group.getValue() + " to " + atomsToJoinTo.get(0).getFrag().getTokenEl().getValue()); 430 List<Atom> atomsPreferredByEnvironment = AmbiguityChecker.useAtomEnvironmentsToGivePlausibleSubstitution(atomsToJoinTo, numOfSubstituents); 431 if (atomsPreferredByEnvironment != null) { 432 atomsToJoinTo = atomsPreferredByEnvironment; 433 } 434 } 435 436 joinFragmentsSubstitutively(state, frag, atomsToJoinTo.get(0)); 437 group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); 438 439 for (int i = 1; i < numOfSubstituents; i++) { 440 subBracketOrRoot = multipliedElements.get(i); 441 if (subBracketOrRoot.getName().equals(BRACKET_EL)){ 442 group = findRightMostGroupInBracket(subBracketOrRoot); 443 } 444 else{ 445 group = subBracketOrRoot.getFirstChildElement(GROUP_EL); 446 } 447 frag = group.getFrag(); 448 if (frag.getOutAtomCount() > 1){//TODO do this prior to multiplication? 449 checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group); 450 } 451 452 joinFragmentsSubstitutively(state, frag, atomsToJoinTo.get(i)); 453 group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); 454 } 455 } 456 } 457 458 /** 459 * Adds locanted unsaturators, heteroatoms and hydrogen elements to the group within the sub or root 460 * @param state 461 * @param subOrRoot 462 * @throws StructureBuildingException 463 */ resolveLocantedFeatures(BuildState state, Element subOrRoot)464 static void resolveLocantedFeatures(BuildState state, Element subOrRoot) throws StructureBuildingException { 465 List<Element> groups = subOrRoot.getChildElements(GROUP_EL); 466 if (groups.size() != 1){ 467 throw new StructureBuildingException("Each sub or root should only have one group element. This indicates a bug in OPSIN"); 468 } 469 Element group = groups.get(0); 470 Fragment thisFrag = group.getFrag(); 471 472 List<Element> unsaturators = new ArrayList<Element>(); 473 List<Element> heteroatoms = new ArrayList<Element>(); 474 List<Element> hydrogenElements = new ArrayList<Element>(); 475 List<Element> subtractivePrefixElements = new ArrayList<Element>(); 476 List<Element> isotopeSpecifications = new ArrayList<Element>(); 477 478 List<Element> children =subOrRoot.getChildElements(); 479 for (Element currentEl : children) { 480 String elName =currentEl.getName(); 481 if (elName.equals(UNSATURATOR_EL)){ 482 unsaturators.add(currentEl); 483 } 484 else if (elName.equals(HETEROATOM_EL)){ 485 heteroatoms.add(currentEl); 486 } 487 else if (elName.equals(SUBTRACTIVEPREFIX_EL)){ 488 subtractivePrefixElements.add(currentEl); 489 } 490 else if (elName.equals(HYDRO_EL)){ 491 hydrogenElements.add(currentEl); 492 } 493 else if (elName.equals(INDICATEDHYDROGEN_EL)){ 494 hydrogenElements.add(currentEl); 495 } 496 else if (elName.equals(ADDEDHYDROGEN_EL)){ 497 hydrogenElements.add(currentEl); 498 } 499 else if (elName.equals(ISOTOPESPECIFICATION_EL)){ 500 isotopeSpecifications.add(currentEl); 501 } 502 } 503 /* 504 * Add locanted functionality 505 */ 506 507 List<Atom> atomsToDehydro = new ArrayList<Atom>(); 508 //locanted substitution can be assumed to be irrelevant to subtractive operations hence perform all subtractive operations now 509 Map<ChemEl, Integer> unlocantedSubtractivePrefixes = new HashMap<ChemEl, Integer>(); 510 511 for(int i = subtractivePrefixElements.size() -1; i >= 0; i--) { 512 Element subtractivePrefix = subtractivePrefixElements.get(i); 513 String type = subtractivePrefix.getAttributeValue(TYPE_ATR); 514 if (type.equals(DEOXY_TYPE_VAL)){ 515 String locant = subtractivePrefix.getAttributeValue(LOCANT_ATR); 516 ChemEl chemEl = ChemEl.valueOf(subtractivePrefix.getAttributeValue(VALUE_ATR)); 517 if (locant == null) { 518 Integer count = unlocantedSubtractivePrefixes.get(chemEl); 519 unlocantedSubtractivePrefixes.put(chemEl, count != null ? count + 1 : 1); 520 } 521 else { 522 applySubtractivePrefix(state, thisFrag, chemEl, locant); 523 } 524 } 525 else if (type.equals(ANHYDRO_TYPE_VAL)){ 526 applyAnhydroPrefix(state, thisFrag, subtractivePrefix); 527 } 528 else if (type.equals(DEHYDRO_TYPE_VAL)){ 529 String locant = subtractivePrefix.getAttributeValue(LOCANT_ATR); 530 if(locant != null) { 531 atomsToDehydro.add(thisFrag.getAtomByLocantOrThrow(locant)); 532 } 533 else{ 534 throw new StructureBuildingException("locants are assumed to be required for the use of dehydro to be unambiguous"); 535 } 536 } 537 else{ 538 throw new StructureBuildingException("OPSIN bug: Unexpected subtractive prefix type: " + type); 539 } 540 subtractivePrefix.detach(); 541 } 542 for (Entry<ChemEl, Integer> entry : unlocantedSubtractivePrefixes.entrySet()) { 543 applyUnlocantedSubtractivePrefixes(state, thisFrag, entry.getKey(), entry.getValue()); 544 } 545 546 if (atomsToDehydro.size() > 0){ 547 boolean isCarbohydrateDehydro = false; 548 if (group.getAttributeValue(TYPE_ATR).equals(CARBOHYDRATE_TYPE_VAL)){ 549 Set<Atom> uniquifiedDehydroAtoms = new HashSet<Atom>(atomsToDehydro); 550 if (uniquifiedDehydroAtoms.size()==atomsToDehydro.size()){//need to rule out case where dehydro is being used to form triple bonds on carbohydrates 551 isCarbohydrateDehydro = true; 552 } 553 } 554 if (isCarbohydrateDehydro){ 555 for (Atom a : atomsToDehydro) { 556 List<Atom> hydroxyAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(a.getAtomNeighbours(), ChemEl.O); 557 if (hydroxyAtoms.size() > 0){ 558 hydroxyAtoms.get(0).getFirstBond().setOrder(2); 559 } 560 else{ 561 throw new StructureBuildingException("atom with locant " + a.getFirstLocant() + " did not have a hydroxy group to convert to a ketose"); 562 } 563 } 564 } 565 else{ 566 List<Atom> atomsToFormDoubleBonds = new ArrayList<Atom>(); 567 List<Atom> atomsToFormTripleBondsBetween = new ArrayList<Atom>();//dehydro on a double/aromatic bond forms a triple bond 568 569 for (Atom a : atomsToDehydro) { 570 if (!a.hasSpareValency()){ 571 a.setSpareValency(true); 572 atomsToFormDoubleBonds.add(a); 573 } 574 else{ 575 atomsToFormTripleBondsBetween.add(a); 576 } 577 } 578 579 for (Atom atom : atomsToFormDoubleBonds) {//check that all the dehydro-ed atoms are next to another atom with spare valency 580 boolean hasSpareValency =false; 581 for (Atom neighbour : atom.getAtomNeighbours()) { 582 if (neighbour.hasSpareValency()){ 583 hasSpareValency = true; 584 break; 585 } 586 } 587 if (!hasSpareValency){ 588 throw new StructureBuildingException("Unexpected use of dehydro; two adjacent atoms were not unsaturated such as to form a double bond"); 589 } 590 } 591 addDehydroInducedTripleBonds(atomsToFormTripleBondsBetween); 592 } 593 } 594 595 for(int i=hydrogenElements.size() -1;i >= 0;i--) { 596 Element hydrogen = hydrogenElements.get(i); 597 String locant = hydrogen.getAttributeValue(LOCANT_ATR); 598 if(locant != null) { 599 Atom a =thisFrag.getAtomByLocantOrThrow(locant); 600 if (a.hasSpareValency()){ 601 a.setSpareValency(false); 602 } 603 else{ 604 if (!acdNameSpiroIndicatedHydrogenBug(group, locant)){ 605 throw new StructureBuildingException("hydrogen addition at locant: " + locant +" was requested, but this atom is not unsaturated"); 606 } 607 } 608 hydrogenElements.remove(i); 609 hydrogen.detach(); 610 } 611 } 612 613 for(int i=unsaturators.size() -1;i >= 0;i--) { 614 Element unsaturator = unsaturators.get(i); 615 String locant = unsaturator.getAttributeValue(LOCANT_ATR); 616 int bondOrder = Integer.parseInt(unsaturator.getAttributeValue(VALUE_ATR)); 617 if(bondOrder <= 1) { 618 unsaturator.detach(); 619 continue; 620 } 621 if(locant != null) { 622 unsaturators.remove(unsaturator); 623 /* 624 * Is the locant a compound locant e.g. 1(6) 625 * This would indicate unsaturation between the atoms with locants 1 and 6 626 */ 627 Matcher matcher = matchCompoundLocant.matcher(locant); 628 if (matcher.find()) { 629 String compoundLocant = matcher.group(1); 630 locant = matcher.replaceAll(""); 631 FragmentTools.unsaturate(thisFrag.getAtomByLocantOrThrow(locant), compoundLocant, bondOrder, thisFrag); 632 } 633 else { 634 FragmentTools.unsaturate(thisFrag.getAtomByLocantOrThrow(locant), bondOrder, thisFrag); 635 } 636 unsaturator.detach(); 637 } 638 } 639 640 for(int i=heteroatoms.size() -1;i >= 0;i--) { 641 Element heteroatomEl = heteroatoms.get(i); 642 String locant = heteroatomEl.getAttributeValue(LOCANT_ATR); 643 if(locant != null) { 644 Atom heteroatom = state.fragManager.getHeteroatom(heteroatomEl.getAttributeValue(VALUE_ATR)); 645 Atom atomToBeReplaced =thisFrag.getAtomByLocantOrThrow(locant); 646 if (heteroatom.getElement() == atomToBeReplaced.getElement() && heteroatom.getCharge() == atomToBeReplaced.getCharge()){ 647 throw new StructureBuildingException("The replacement term " +heteroatomEl.getValue() +" was used on an atom that already is a " + heteroatom.getElement()); 648 } 649 state.fragManager.replaceAtomWithAtom(thisFrag.getAtomByLocantOrThrow(locant), heteroatom, true); 650 if (heteroatomEl.getAttribute(LAMBDA_ATR) != null){ 651 thisFrag.getAtomByLocantOrThrow(locant).setLambdaConventionValency(Integer.parseInt(heteroatomEl.getAttributeValue(LAMBDA_ATR))); 652 } 653 heteroatoms.remove(heteroatomEl); 654 heteroatomEl.detach(); 655 } 656 } 657 658 if (isotopeSpecifications.size() > 0) { 659 applyIsotopeSpecifications(state, thisFrag, isotopeSpecifications, true); 660 } 661 } 662 663 /** 664 * ACD/Name has a known bug where it produces names in which a suffixed saturated ring in a polycyclic spiro 665 * is treated as if it is unsaturated and hence has indicated hydrogens 666 * e.g. 1',3'-dihydro-2H,5H-spiro[imidazolidine-4,2'-indene]-2,5-dione 667 * @param group 668 * @param indicatedHydrogenLocant 669 * @return 670 */ acdNameSpiroIndicatedHydrogenBug(Element group, String indicatedHydrogenLocant)671 private static boolean acdNameSpiroIndicatedHydrogenBug(Element group, String indicatedHydrogenLocant) { 672 if (group.getValue().startsWith("spiro")) { 673 for (Element suffix : group.getParent().getChildElements(SUFFIX_EL)) { 674 String suffixLocant = suffix.getAttributeValue(LOCANT_ATR); 675 if (suffixLocant != null && suffixLocant.equals(indicatedHydrogenLocant)) { 676 LOG.debug("Indicated hydrogen at " + indicatedHydrogenLocant + " ignored. Known bug in generated IUPAC name"); 677 return true; 678 } 679 } 680 } 681 return false; 682 } 683 684 /** 685 * Removes a terminal atom of a particular element e.g. oxygen 686 * The locant specifies the atom adjacent to the atom to be removed 687 * Formally the atom is replaced by hydrogen, hence stereochemistry is intentionally preserved 688 * @param state 689 * @param fragment 690 * @param chemEl 691 * @param locant A locant or null 692 * @throws StructureBuildingException 693 */ applySubtractivePrefix(BuildState state, Fragment fragment, ChemEl chemEl, String locant)694 static void applySubtractivePrefix(BuildState state, Fragment fragment, ChemEl chemEl, String locant) throws StructureBuildingException { 695 Atom adjacentAtom = fragment.getAtomByLocantOrThrow(locant); 696 List<Atom> applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(adjacentAtom.getAtomNeighbours(), chemEl); 697 if (applicableTerminalAtoms.isEmpty()) { 698 throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " at locant "+ locant +" for subtractive nomenclature"); 699 } 700 Atom atomToRemove = applicableTerminalAtoms.get(0); 701 if (FragmentTools.isFunctionalAtom(atomToRemove)) {//This can occur with aminoglycosides where the anomeric OH is removed by deoxy 702 for (int i = 0, len = fragment.getFunctionalAtomCount(); i < len; i++) { 703 if (atomToRemove.equals(fragment.getFunctionalAtom(i).getAtom())) { 704 fragment.removeFunctionalAtom(i); 705 break; 706 } 707 } 708 fragment.addFunctionalAtom(atomToRemove.getFirstBond().getOtherAtom(atomToRemove)); 709 } 710 FragmentTools.removeTerminalAtom(state, atomToRemove); 711 } 712 713 /** 714 * Removes terminal atoms of a particular element e.g. oxygen 715 * The number to remove is decided by the count 716 * Formally the atom is replaced by hydrogen, hence stereochemistry is intentionally preserved 717 * @param state 718 * @param fragment 719 * @param chemEl 720 * @param count 721 * @throws StructureBuildingException 722 */ applyUnlocantedSubtractivePrefixes(BuildState state, Fragment fragment, ChemEl chemEl, int count)723 static void applyUnlocantedSubtractivePrefixes(BuildState state, Fragment fragment, ChemEl chemEl, int count) throws StructureBuildingException { 724 List<Atom> applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(fragment.getAtomList(), chemEl); 725 if (applicableTerminalAtoms.isEmpty() || applicableTerminalAtoms.size() < count) { 726 throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " for subtractive nomenclature"); 727 } 728 if (AmbiguityChecker.isSubstitutionAmbiguous(applicableTerminalAtoms, count)) { 729 state.addIsAmbiguous("Group to remove with subtractive prefix"); 730 } 731 for (int i = 0; i < count; i++) { 732 Atom atomToRemove = applicableTerminalAtoms.get(i); 733 if (FragmentTools.isFunctionalAtom(atomToRemove)) {//This can occur with aminoglycosides where the anomeric OH is removed by deoxy 734 for (int j = 0, len = fragment.getFunctionalAtomCount(); j < len; j++) { 735 if (atomToRemove.equals(fragment.getFunctionalAtom(j).getAtom())) { 736 fragment.removeFunctionalAtom(j); 737 break; 738 } 739 } 740 fragment.addFunctionalAtom(atomToRemove.getFirstBond().getOtherAtom(atomToRemove)); 741 } 742 FragmentTools.removeTerminalAtom(state, atomToRemove); 743 } 744 } 745 applyAnhydroPrefix(BuildState state, Fragment frag, Element subtractivePrefix)746 private static void applyAnhydroPrefix(BuildState state, Fragment frag, Element subtractivePrefix) throws StructureBuildingException { 747 ChemEl chemEl = ChemEl.valueOf(subtractivePrefix.getAttributeValue(VALUE_ATR)); 748 String locantStr = subtractivePrefix.getAttributeValue(LOCANT_ATR); 749 if (locantStr == null) { 750 throw new StructureBuildingException("Two locants are required before an anhydro prefix"); 751 } 752 String[] locants = locantStr.split(","); 753 Atom backBoneAtom1 = frag.getAtomByLocantOrThrow(locants[0]); 754 Atom backBoneAtom2 = frag.getAtomByLocantOrThrow(locants[1]); 755 List<Atom> applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(backBoneAtom1.getAtomNeighbours(), chemEl); 756 if (applicableTerminalAtoms.isEmpty()){ 757 throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " for subtractive nomenclature"); 758 } 759 FragmentTools.removeTerminalAtom(state, applicableTerminalAtoms.get(0)); 760 761 applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(backBoneAtom2.getAtomNeighbours(), chemEl); 762 if (applicableTerminalAtoms.isEmpty()){ 763 throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " for subtractive nomenclature"); 764 } 765 state.fragManager.createBond(backBoneAtom1, applicableTerminalAtoms.get(0), 1); 766 } 767 768 /** 769 * Attempts to form triple bond between the atoms in atomsToFormTripleBondsBetween 770 * Throws an exception if the list contains duplicates or atoms with no adjacent atom in the list 771 * @param atomsToFormTripleBondsBetween 772 * @throws StructureBuildingException 773 */ addDehydroInducedTripleBonds(List<Atom> atomsToFormTripleBondsBetween)774 private static void addDehydroInducedTripleBonds(List<Atom> atomsToFormTripleBondsBetween) throws StructureBuildingException { 775 if (atomsToFormTripleBondsBetween.size()>0){ 776 Set<Atom> atoms = new HashSet<Atom>(atomsToFormTripleBondsBetween); 777 if (atomsToFormTripleBondsBetween.size() != atoms.size()){ 778 throw new StructureBuildingException("locants specified for dehydro specify the same atom too many times"); 779 } 780 atomLoop: for (int i = atomsToFormTripleBondsBetween.size()-1; i >=0; i = i-2) {//two atoms will have a triple bond formed betwen them 781 Atom a = atomsToFormTripleBondsBetween.get(i); 782 List<Atom> neighbours = a.getAtomNeighbours(); 783 for (Atom neighbour : neighbours) { 784 if (atomsToFormTripleBondsBetween.contains(neighbour)){ 785 atomsToFormTripleBondsBetween.remove(i); 786 atomsToFormTripleBondsBetween.remove(neighbour); 787 Bond b = a.getBondToAtomOrThrow(neighbour); 788 b.setOrder(3); 789 a.setSpareValency(false); 790 neighbour.setSpareValency(false); 791 continue atomLoop; 792 } 793 } 794 throw new StructureBuildingException("dehydro indicated atom should form a triple bond but no adjacent atoms also had hydrogen removed!"); 795 } 796 } 797 } 798 799 /** 800 * Adds locanted unsaturators, heteroatoms and hydrogen elements to the group within the sub or root 801 * @param state 802 * @param subOrRoot 803 * @throws StructureBuildingException 804 */ resolveUnLocantedFeatures(BuildState state, Element subOrRoot)805 static void resolveUnLocantedFeatures(BuildState state, Element subOrRoot) throws StructureBuildingException { 806 List<Element> groups = subOrRoot.getChildElements(GROUP_EL); 807 if (groups.size() != 1){ 808 throw new StructureBuildingException("Each sub or root should only have one group element. This indicates a bug in OPSIN"); 809 } 810 Fragment frag = groups.get(0).getFrag(); 811 812 List<Integer> unsaturationBondOrders = new ArrayList<Integer>(); 813 List<Element> heteroatoms = new ArrayList<Element>(); 814 List<Element> hydrogenElements = new ArrayList<Element>(); 815 List<Element> isotopeSpecifications = new ArrayList<Element>(); 816 817 List<Element> children = subOrRoot.getChildElements(); 818 for (Element currentEl : children) { 819 String elName = currentEl.getName(); 820 if (elName.equals(UNSATURATOR_EL)) { 821 int bondOrder = Integer.parseInt(currentEl.getAttributeValue(VALUE_ATR)); 822 if (bondOrder > 1) { 823 unsaturationBondOrders.add(bondOrder); 824 } 825 currentEl.detach(); 826 } 827 else if (elName.equals(HETEROATOM_EL)){ 828 heteroatoms.add(currentEl); 829 currentEl.detach(); 830 } 831 else if (elName.equals(HYDRO_EL) || 832 elName.equals(INDICATEDHYDROGEN_EL) || 833 elName.equals(ADDEDHYDROGEN_EL)){ 834 hydrogenElements.add(currentEl); 835 currentEl.detach(); 836 } 837 else if (elName.equals(ISOTOPESPECIFICATION_EL)){ 838 isotopeSpecifications.add(currentEl); 839 } 840 } 841 842 if (hydrogenElements.size() > 0) { 843 applyUnlocantedHydro(state, frag, hydrogenElements); 844 } 845 846 if (unsaturationBondOrders.size() > 0){ 847 unsaturateBonds(state, frag, unsaturationBondOrders); 848 } 849 850 if (heteroatoms.size() > 0) { 851 applyUnlocantedHeteroatoms(state, frag, heteroatoms); 852 } 853 854 if (isotopeSpecifications.size() > 0) { 855 applyIsotopeSpecifications(state, frag, isotopeSpecifications, false); 856 } 857 858 if (frag.getOutAtomCount() > 0){//assign any outAtoms that have not been set to a specific atom to a specific atom 859 for (int i = 0, l = frag.getOutAtomCount(); i < l; i++) { 860 OutAtom outAtom = frag.getOutAtom(i); 861 if (!outAtom.isSetExplicitly()){ 862 outAtom.setAtom(findAtomForUnlocantedRadical(state, frag, outAtom)); 863 outAtom.setSetExplicitly(true); 864 } 865 } 866 } 867 } 868 applyUnlocantedHydro(BuildState state, Fragment frag, List<Element> hydrogenElements)869 private static void applyUnlocantedHydro(BuildState state, Fragment frag, List<Element> hydrogenElements) throws StructureBuildingException { 870 /* 871 * This function is not entirely straightforward as certain atoms definitely should have their spare valency reduced 872 * However names are not consistent as to whether they bother having the hydro tags do this! 873 * The atoms in atomsWithSV are in atom order those that can take a hydro element and then those that shouldn't really take a hydro element as its absence is unambiguous 874 */ 875 List<Atom> atomsAcceptingHydroPrefix = new ArrayList<Atom>(); 876 Set<Atom> atomsWhichImplicitlyHadTheirSVRemoved = new HashSet<Atom>(); 877 List<Atom> atomList = frag.getAtomList(); 878 for (Atom atom : atomList) { 879 if (atom.getType().equals(SUFFIX_TYPE_VAL)){ 880 continue; 881 } 882 atom.ensureSVIsConsistantWithValency(false);//doesn't take into account suffixes 883 if (atom.hasSpareValency()) { 884 atomsAcceptingHydroPrefix.add(atom); 885 //if we take into account suffixes is the SV removed 886 atom.ensureSVIsConsistantWithValency(true); 887 if (!atom.hasSpareValency()) { 888 atomsWhichImplicitlyHadTheirSVRemoved.add(atom); 889 } 890 } 891 } 892 893 int hydrogenElsCount = hydrogenElements.size(); 894 for (Element hydrogenElement : hydrogenElements) { 895 if (hydrogenElement.getValue().equals("perhydro")) { 896 if (hydrogenElsCount != 1){ 897 throw new StructureBuildingException("Unexpected indication of hydrogen when perhydro makes such indication redundnant"); 898 } 899 for (Atom atom : atomsAcceptingHydroPrefix) { 900 atom.setSpareValency(false); 901 } 902 return; 903 } 904 } 905 906 List<Atom> atomsWithDefiniteSV = new ArrayList<Atom>(); 907 List<Atom> otherAtomsThatCanHaveHydro = new ArrayList<Atom>(); 908 for(Atom a : atomsAcceptingHydroPrefix) { 909 if (atomsWhichImplicitlyHadTheirSVRemoved.contains(a)) { 910 otherAtomsThatCanHaveHydro.add(a); 911 } 912 else { 913 boolean canFormDoubleBond = false; 914 for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) { 915 if(aa.hasSpareValency()) { 916 canFormDoubleBond = true; 917 break; 918 } 919 } 920 if (canFormDoubleBond) { 921 atomsWithDefiniteSV.add(a); 922 } 923 else { 924 otherAtomsThatCanHaveHydro.add(a); 925 } 926 } 927 } 928 List<Atom> prioritisedAtomsAcceptingHydro = new ArrayList<Atom>(atomsWithDefiniteSV); 929 prioritisedAtomsAcceptingHydro.addAll(otherAtomsThatCanHaveHydro);//these end up at the end of the list 930 931 if (hydrogenElsCount > prioritisedAtomsAcceptingHydro.size()) { 932 throw new StructureBuildingException("Cannot find atom to add hydrogen to (" + 933 hydrogenElsCount + " hydrogens requested but only " + prioritisedAtomsAcceptingHydro.size() +" positions that can be hydrogenated)" ); 934 } 935 936 int svCountAfterRemoval = atomsWithDefiniteSV.size() - hydrogenElsCount; 937 if (svCountAfterRemoval > 1) { //ambiguity likely. If it's 1 then an atom will be implicitly hydrogenated 938 //NOTE: as hydrogens as added in pairs the unambiguous if one hydrogen is added and allow atoms are identical condition is unlikely to be ever satisfied 939 if (!(AmbiguityChecker.allAtomsEquivalent(atomsWithDefiniteSV) && 940 (hydrogenElsCount == 1 || hydrogenElsCount == atomsWithDefiniteSV.size() - 1))) { 941 state.addIsAmbiguous("Ambiguous choice of positions to add hydrogen to on " + frag.getTokenEl().getValue()); 942 } 943 } 944 945 for (int i = 0; i < hydrogenElsCount; i++) { 946 prioritisedAtomsAcceptingHydro.get(i).setSpareValency(false); 947 } 948 } 949 unsaturateBonds(BuildState state, Fragment frag, List<Integer> unsaturationBondOrders)950 private static void unsaturateBonds(BuildState state, Fragment frag, List<Integer> unsaturationBondOrders) throws StructureBuildingException { 951 int tripleBonds = 0; 952 int doublebonds = 0; 953 for (Integer bondOrder : unsaturationBondOrders) { 954 if (bondOrder == 3) { 955 tripleBonds++; 956 } 957 else if (bondOrder == 2) { 958 doublebonds++; 959 } 960 else { 961 throw new RuntimeException("Unexpected unsaturation bon order: " + bondOrder); 962 } 963 } 964 965 if (tripleBonds > 0) { 966 unsaturateBonds(state, frag, 3, tripleBonds); 967 } 968 if (doublebonds > 0) { 969 unsaturateBonds(state, frag, 2, doublebonds); 970 } 971 } 972 unsaturateBonds(BuildState state, Fragment frag, int bondOrder, int numToUnsaturate)973 private static void unsaturateBonds(BuildState state, Fragment frag, int bondOrder, int numToUnsaturate) throws StructureBuildingException { 974 List<Bond> bondsThatCouldBeUnsaturated = findBondsToUnSaturate(frag, bondOrder, false); 975 List<Bond> alternativeBondsThatCouldBeUnsaturated = Collections.emptyList(); 976 if (bondsThatCouldBeUnsaturated.size() < numToUnsaturate){ 977 bondsThatCouldBeUnsaturated = findBondsToUnSaturate(frag, bondOrder, true); 978 } 979 else { 980 alternativeBondsThatCouldBeUnsaturated = findAlternativeBondsToUnSaturate(frag, bondOrder, bondsThatCouldBeUnsaturated); 981 } 982 if (bondsThatCouldBeUnsaturated.size() < numToUnsaturate){ 983 throw new StructureBuildingException("Failed to find bond to change to a bond of order: " + bondOrder); 984 } 985 if (bondsThatCouldBeUnsaturated.size() > numToUnsaturate) { 986 //by convention cycloalkanes can have one unsaturation implicitly at the 1 locant 987 //terms like oxazoline are formally ambiguous but in practice the lowest locant is the one that will be intended (in this case 2-oxazoline) 988 if (!isCycloAlkaneSpecialCase(frag, numToUnsaturate, bondsThatCouldBeUnsaturated) && 989 !HANTZSCHWIDMAN_SUBTYPE_VAL.equals(frag.getSubType())) { 990 if (alternativeBondsThatCouldBeUnsaturated.size() >= numToUnsaturate) { 991 List<Bond> allBonds = new ArrayList<Bond>(bondsThatCouldBeUnsaturated); 992 allBonds.addAll(alternativeBondsThatCouldBeUnsaturated); 993 if (!(AmbiguityChecker.allBondsEquivalent(allBonds) && 994 numToUnsaturate == 1 )) { 995 state.addIsAmbiguous("Unsaturation of bonds of " + frag.getTokenEl().getValue()); 996 } 997 } 998 else { 999 if (!(AmbiguityChecker.allBondsEquivalent(bondsThatCouldBeUnsaturated) && 1000 (numToUnsaturate == 1 || numToUnsaturate == bondsThatCouldBeUnsaturated.size() - 1))){ 1001 state.addIsAmbiguous("Unsaturation of bonds of " + frag.getTokenEl().getValue()); 1002 } 1003 } 1004 } 1005 } 1006 for (int i = 0; i < numToUnsaturate; i++) { 1007 bondsThatCouldBeUnsaturated.get(i).setOrder(bondOrder); 1008 } 1009 } 1010 isCycloAlkaneSpecialCase(Fragment frag, int numToUnsaturate, List<Bond> bondsThatCouldBeUnsaturated)1011 private static boolean isCycloAlkaneSpecialCase(Fragment frag, int numToUnsaturate, List<Bond> bondsThatCouldBeUnsaturated) { 1012 if (numToUnsaturate == 1) { 1013 Bond b = bondsThatCouldBeUnsaturated.get(0); 1014 Atom a1 = b.getFromAtom(); 1015 Atom a2 = b.getToAtom(); 1016 if ((ALKANESTEM_SUBTYPE_VAL.equals(frag.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(frag.getSubType())) && 1017 a1.getAtomIsInACycle() && a2.getAtomIsInACycle() && 1018 (a1.equals(frag.getFirstAtom()) || a2.equals(frag.getFirstAtom()))) { 1019 //mono unsaturated cyclo alkanes are unambiguous e.g. cyclohexene 1020 return true; 1021 } 1022 } 1023 return false; 1024 } 1025 isCycloAlkaneHeteroatomSpecialCase(Fragment frag, int numHeteroatoms, List<Atom> atomsThatCouldBeReplaced)1026 private static boolean isCycloAlkaneHeteroatomSpecialCase(Fragment frag, int numHeteroatoms, List<Atom> atomsThatCouldBeReplaced) { 1027 if (numHeteroatoms == 1) { 1028 if ((ALKANESTEM_SUBTYPE_VAL.equals(frag.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(frag.getSubType())) && 1029 frag.getFirstAtom().getAtomIsInACycle() && atomsThatCouldBeReplaced.get(0).equals(frag.getFirstAtom())) { 1030 //single heteroatom implicitly goes to 1 position 1031 return true; 1032 } 1033 } 1034 return false; 1035 } 1036 1037 private static class HeteroAtomSmilesAndLambda { 1038 private final String smiles; 1039 private final String lambdaConvention; 1040 HeteroAtomSmilesAndLambda(String smiles, String lambdaConvention)1041 public HeteroAtomSmilesAndLambda(String smiles, String lambdaConvention) { 1042 this.smiles = smiles; 1043 this.lambdaConvention = lambdaConvention; 1044 } 1045 1046 @Override hashCode()1047 public int hashCode() { 1048 final int prime = 31; 1049 int result = 1; 1050 result = prime 1051 * result 1052 + ((lambdaConvention == null) ? 0 : lambdaConvention 1053 .hashCode()); 1054 result = prime * result 1055 + ((smiles == null) ? 0 : smiles.hashCode()); 1056 return result; 1057 } 1058 1059 @Override equals(Object obj)1060 public boolean equals(Object obj) { 1061 if (this == obj) 1062 return true; 1063 if (obj == null) 1064 return false; 1065 if (getClass() != obj.getClass()) 1066 return false; 1067 HeteroAtomSmilesAndLambda other = (HeteroAtomSmilesAndLambda) obj; 1068 if (lambdaConvention == null) { 1069 if (other.lambdaConvention != null) 1070 return false; 1071 } else if (!lambdaConvention.equals(other.lambdaConvention)) 1072 return false; 1073 if (smiles == null) { 1074 if (other.smiles != null) 1075 return false; 1076 } else if (!smiles.equals(other.smiles)) 1077 return false; 1078 return true; 1079 } 1080 1081 1082 } 1083 applyUnlocantedHeteroatoms(BuildState state, Fragment frag, List<Element> heteroatoms)1084 private static void applyUnlocantedHeteroatoms(BuildState state, Fragment frag, List<Element> heteroatoms) throws StructureBuildingException { 1085 Map<HeteroAtomSmilesAndLambda, Integer> heteroatomDescriptionToCount = new HashMap<HeteroAtomSmilesAndLambda, Integer>(); 1086 for (Element heteroatomEl : heteroatoms) { 1087 String smiles = heteroatomEl.getAttributeValue(VALUE_ATR); 1088 String lambdaConvention = heteroatomEl.getAttributeValue(LAMBDA_ATR); 1089 HeteroAtomSmilesAndLambda desc = new HeteroAtomSmilesAndLambda(smiles, lambdaConvention); 1090 Integer count = heteroatomDescriptionToCount.get(desc); 1091 heteroatomDescriptionToCount.put(desc, count != null ? count + 1 : 1); 1092 } 1093 List<Atom> atomlist = frag.getAtomList(); 1094 for (Entry<HeteroAtomSmilesAndLambda, Integer> entry : heteroatomDescriptionToCount.entrySet()) { 1095 HeteroAtomSmilesAndLambda desc = entry.getKey(); 1096 int replacementsRequired = entry.getValue(); 1097 Atom heteroatom = state.fragManager.getHeteroatom(desc.smiles); 1098 ChemEl heteroatomChemEl = heteroatom.getElement(); 1099 //finds an atom for which changing it to the specified heteroatom will not cause valency to be violated 1100 List<Atom> atomsThatCouldBeReplaced = new ArrayList<Atom>(); 1101 for (Atom atom : atomlist) { 1102 if (atom.getType().equals(SUFFIX_TYPE_VAL)) { 1103 continue; 1104 } 1105 if ((heteroatomChemEl.equals(atom.getElement()) && heteroatom.getCharge() == atom.getCharge())){ 1106 continue;//replacement would do nothing 1107 } 1108 if(atom.getElement() != ChemEl.C && heteroatomChemEl != ChemEl.C){ 1109 if (atom.getElement() == ChemEl.O && (heteroatomChemEl == ChemEl.S || heteroatomChemEl == ChemEl.Se || heteroatomChemEl == ChemEl.Te)) { 1110 //by special case allow replacement of oxygen by chalcogen 1111 } 1112 else{ 1113 //replacement of heteroatom by another heteroatom 1114 continue; 1115 } 1116 } 1117 if (ValencyChecker.checkValencyAvailableForReplacementByHeteroatom(atom, heteroatom)) { 1118 atomsThatCouldBeReplaced.add(atom); 1119 } 1120 } 1121 if (atomsThatCouldBeReplaced.size() < replacementsRequired){ 1122 throw new StructureBuildingException("Cannot find suitable atom for heteroatom replacement"); 1123 } 1124 1125 if (atomsThatCouldBeReplaced.size() > replacementsRequired && !isCycloAlkaneHeteroatomSpecialCase(frag, replacementsRequired, atomsThatCouldBeReplaced)) { 1126 if (!(AmbiguityChecker.allAtomsEquivalent(atomsThatCouldBeReplaced) && 1127 (replacementsRequired == 1 || replacementsRequired == atomsThatCouldBeReplaced.size() - 1))) { 1128 //by convention cycloalkanes can have one unsaturation implicitly at the 1 locant 1129 state.addIsAmbiguous("Heteroatom replacement on " + frag.getTokenEl().getValue()); 1130 } 1131 } 1132 1133 for (int i = 0; i < replacementsRequired; i++) { 1134 Atom atomToReplaceWithHeteroAtom = atomsThatCouldBeReplaced.get(i); 1135 state.fragManager.replaceAtomWithAtom(atomToReplaceWithHeteroAtom, heteroatom, true); 1136 if (desc.lambdaConvention != null) { 1137 atomToReplaceWithHeteroAtom.setLambdaConventionValency(Integer.parseInt(desc.lambdaConvention)); 1138 } 1139 } 1140 } 1141 } 1142 applyIsotopeSpecifications(BuildState state, Fragment frag, List<Element> isotopeSpecifications, boolean applyLocanted)1143 private static void applyIsotopeSpecifications(BuildState state, Fragment frag, List<Element> isotopeSpecifications, boolean applyLocanted) throws StructureBuildingException { 1144 for(int i = isotopeSpecifications.size() - 1; i >= 0; i--) { 1145 Element isotopeSpecification = isotopeSpecifications.get(i); 1146 IsotopeSpecification isotopeSpec = IsotopeSpecificationParser.parseIsotopeSpecification(isotopeSpecification); 1147 String[] locants = isotopeSpec.getLocants(); 1148 if(locants != null) { 1149 if (!applyLocanted) { 1150 continue; 1151 } 1152 } 1153 else if (applyLocanted) { 1154 continue; 1155 } 1156 1157 ChemEl chemEl = isotopeSpec.getChemEl(); 1158 int isotope = isotopeSpec.getIsotope(); 1159 if(locants != null) { 1160 if (chemEl == ChemEl.H) { 1161 for (int j = 0; j < locants.length; j++) { 1162 Atom atomWithHydrogenIsotope = frag.getAtomByLocantOrThrow(locants[j]); 1163 Atom hydrogen = state.fragManager.createAtom(isotopeSpec.getChemEl(), frag); 1164 hydrogen.setIsotope(isotope); 1165 state.fragManager.createBond(atomWithHydrogenIsotope, hydrogen, 1); 1166 } 1167 } 1168 else { 1169 for (int j = 0; j < locants.length; j++) { 1170 Atom atom = frag.getAtomByLocantOrThrow(locants[j]); 1171 if (chemEl != atom.getElement()) { 1172 throw new StructureBuildingException("The atom at locant: " + locants[j] + " was not a " + chemEl.toString() ); 1173 } 1174 atom.setIsotope(isotope); 1175 } 1176 } 1177 } 1178 else { 1179 int multiplier = isotopeSpec.getMultiplier(); 1180 if (chemEl == ChemEl.H) { 1181 List<Atom> parentAtomsToApplyTo = FragmentTools.findnAtomsForSubstitution(frag, multiplier, 1); 1182 if (parentAtomsToApplyTo == null){ 1183 throw new StructureBuildingException("Failed to find sufficient hydrogen atoms for unlocanted hydrogen isotope replacement"); 1184 } 1185 if (AmbiguityChecker.isSubstitutionAmbiguous(parentAtomsToApplyTo, multiplier)) { 1186 if (!casIsotopeAmbiguitySpecialCase(frag, parentAtomsToApplyTo, multiplier)) { 1187 state.addIsAmbiguous("Position of hydrogen isotope on " + frag.getTokenEl().getValue()); 1188 } 1189 } 1190 for (int j = 0; j < multiplier; j++) { 1191 Atom atomWithHydrogenIsotope = parentAtomsToApplyTo.get(j); 1192 Atom hydrogen = state.fragManager.createAtom(isotopeSpec.getChemEl(), frag); 1193 hydrogen.setIsotope(isotope); 1194 state.fragManager.createBond(atomWithHydrogenIsotope, hydrogen, 1); 1195 } 1196 } 1197 else { 1198 List<Atom> parentAtomsToApplyTo = new ArrayList<Atom>(); 1199 for (Atom atom : frag.getAtomList()) { 1200 if (atom.getElement() == chemEl) { 1201 parentAtomsToApplyTo.add(atom); 1202 } 1203 } 1204 if (parentAtomsToApplyTo.size() < multiplier) { 1205 throw new StructureBuildingException("Failed to find sufficient atoms for " + chemEl.toString() + " isotope replacement"); 1206 } 1207 if (AmbiguityChecker.isSubstitutionAmbiguous(parentAtomsToApplyTo, multiplier)) { 1208 state.addIsAmbiguous("Position of isotope on " + frag.getTokenEl().getValue()); 1209 } 1210 for (int j = 0; j < multiplier; j++) { 1211 parentAtomsToApplyTo.get(j).setIsotope(isotope); 1212 } 1213 } 1214 } 1215 isotopeSpecification.detach(); 1216 } 1217 } 1218 casIsotopeAmbiguitySpecialCase(Fragment frag, List<Atom> parentAtomsToApplyTo, int multiplier)1219 private static boolean casIsotopeAmbiguitySpecialCase(Fragment frag, List<Atom> parentAtomsToApplyTo, int multiplier) throws StructureBuildingException { 1220 if (multiplier !=1) { 1221 return false; 1222 } 1223 List<Atom> atoms = frag.getAtomList(); 1224 Atom firstAtom = atoms.get(0); 1225 if (!parentAtomsToApplyTo.get(0).equals(firstAtom)) { 1226 return false; 1227 } 1228 ChemEl firstAtomEl = firstAtom.getElement(); 1229 if (atoms.size() ==2) { 1230 if (firstAtomEl == atoms.get(1).getElement()) { 1231 //e.g. ethane 1232 return true; 1233 } 1234 } 1235 else { 1236 int intraFragValency = frag.getIntraFragmentIncomingValency(firstAtom); 1237 boolean spareValency = firstAtom.hasSpareValency(); 1238 if (firstAtom.getAtomIsInACycle()) { 1239 for (int i = 1; i < atoms.size(); i++) { 1240 Atom atom = atoms.get(i); 1241 if (atom.getElement() != firstAtomEl){ 1242 return false; 1243 } 1244 if (frag.getIntraFragmentIncomingValency(atom) != intraFragValency){ 1245 return false; 1246 } 1247 if (atom.hasSpareValency() != spareValency){ 1248 return false; 1249 } 1250 } 1251 //e.g. benzene 1252 return true; 1253 } 1254 } 1255 return false; 1256 } 1257 findAtomForUnlocantedRadical(BuildState state, Fragment frag, OutAtom outAtom)1258 static Atom findAtomForUnlocantedRadical(BuildState state, Fragment frag, OutAtom outAtom) throws StructureBuildingException { 1259 List<Atom> possibleAtoms = FragmentTools.findnAtomsForSubstitution(frag, outAtom.getAtom(), 1, outAtom.getValency(), true); 1260 if (possibleAtoms == null){ 1261 throw new StructureBuildingException("Failed to assign all unlocanted radicals to actual atoms without violating valency"); 1262 } 1263 if (!((ALKANESTEM_SUBTYPE_VAL.equals(frag.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(frag.getSubType())) && possibleAtoms.get(0).equals(frag.getFirstAtom()))) { 1264 if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtoms, 1)) { 1265 state.addIsAmbiguous("Positioning of radical on: " + frag.getTokenEl().getValue()); 1266 } 1267 } 1268 return possibleAtoms.get(0); 1269 } 1270 1271 findAlternativeBondsToUnSaturate(Fragment frag, int bondOrder, Collection<Bond> bondsToIgnore)1272 private static List<Bond> findAlternativeBondsToUnSaturate(Fragment frag, int bondOrder, Collection<Bond> bondsToIgnore) { 1273 return findBondsToUnSaturate(frag, bondOrder, false, new HashSet<Bond>(bondsToIgnore)); 1274 } 1275 1276 /** 1277 * Finds bond within the fragment that can have their bondOrder increased to the specified bond order 1278 * Depending on the value of allowAdjacentUnsaturatedBonds adjacent higher bonds are prevented 1279 * @param frag 1280 * @param bondOrder 1281 * @param allowAdjacentUnsaturatedBonds 1282 * @return 1283 */ findBondsToUnSaturate(Fragment frag, int bondOrder, boolean allowAdjacentUnsaturatedBonds)1284 static List<Bond> findBondsToUnSaturate(Fragment frag, int bondOrder, boolean allowAdjacentUnsaturatedBonds) { 1285 return findBondsToUnSaturate(frag, bondOrder, allowAdjacentUnsaturatedBonds, Collections.<Bond>emptySet()); 1286 } 1287 findBondsToUnSaturate(Fragment frag, int bondOrder, boolean allowAdjacentUnsaturatedBonds, Set<Bond> bondsToIgnore)1288 private static List<Bond> findBondsToUnSaturate(Fragment frag, int bondOrder, boolean allowAdjacentUnsaturatedBonds, Set<Bond> bondsToIgnore) { 1289 List<Bond> bondsToUnsaturate = new ArrayList<Bond>(); 1290 mainLoop: for (Atom atom1 : frag.getAtomList()) { 1291 if (atom1.hasSpareValency() || SUFFIX_TYPE_VAL.equals(atom1.getType()) || atom1.getProperty(Atom.ISALDEHYDE) !=null) { 1292 continue; 1293 } 1294 List<Bond> bonds = atom1.getBonds(); 1295 int incomingValency = 0; 1296 for (Bond bond : bonds) { 1297 //don't place implicitly unsaturated bonds next to each other 1298 if (bond.getOrder() != 1 && !allowAdjacentUnsaturatedBonds) { 1299 continue mainLoop; 1300 } 1301 if (bondsToUnsaturate.contains(bond)) { 1302 if (!allowAdjacentUnsaturatedBonds) { 1303 continue mainLoop; 1304 } 1305 incomingValency += bondOrder; 1306 } 1307 else { 1308 incomingValency += bond.getOrder(); 1309 } 1310 } 1311 1312 Integer maxVal = getLambdaValencyOrHwValencyOrMaxValIfCharged(atom1); 1313 if(maxVal != null && (incomingValency + (bondOrder - 1) + atom1.getOutValency()) > maxVal) { 1314 continue; 1315 } 1316 bondLoop: for (Bond bond : bonds) { 1317 if (bond.getOrder() == 1 && !bondsToUnsaturate.contains(bond) && !bondsToIgnore.contains(bond)) { 1318 Atom atom2 = bond.getOtherAtom(atom1); 1319 if (frag.getAtomByID(atom2.getID()) != null) {//check other atom is actually in the fragment! 1320 if (atom2.hasSpareValency() || SUFFIX_TYPE_VAL.equals(atom2.getType()) || atom2.getProperty(Atom.ISALDEHYDE) !=null) { 1321 continue; 1322 } 1323 int incomingValency2 = 0; 1324 for (Bond bond2 : atom2.getBonds()) { 1325 //don't place implicitly unsaturated bonds next to each other 1326 if (bond2.getOrder() != 1 && !allowAdjacentUnsaturatedBonds) { 1327 continue bondLoop; 1328 } 1329 if (bondsToUnsaturate.contains(bond2)) { 1330 if (!allowAdjacentUnsaturatedBonds) { 1331 continue bondLoop; 1332 } 1333 incomingValency2 += bondOrder; 1334 } 1335 else { 1336 incomingValency2 += bond2.getOrder(); 1337 } 1338 } 1339 1340 Integer maxVal2 = getLambdaValencyOrHwValencyOrMaxValIfCharged(atom2); 1341 if(maxVal2 != null && (incomingValency2 + (bondOrder - 1) + atom2.getOutValency()) > maxVal2) { 1342 continue; 1343 } 1344 bondsToUnsaturate.add(bond); 1345 break bondLoop; 1346 } 1347 } 1348 } 1349 } 1350 return bondsToUnsaturate; 1351 } 1352 1353 1354 /** 1355 * Return the lambda convention derived valency + protons if set 1356 * Otherwise if charge is 0 returns {@link ValencyChecker#getHWValency(ChemEl)} 1357 * Otherwise return {@link ValencyChecker#getMaximumValency(ChemEl, int)} 1358 * Returns null if the maximum valency is not known 1359 * @param a 1360 * @return 1361 */ getLambdaValencyOrHwValencyOrMaxValIfCharged(Atom a)1362 static Integer getLambdaValencyOrHwValencyOrMaxValIfCharged(Atom a) { 1363 if (a.getLambdaConventionValency() != null) { 1364 return a.getLambdaConventionValency() + a.getProtonsExplicitlyAddedOrRemoved(); 1365 } 1366 else if (a.getCharge() == 0){ 1367 return ValencyChecker.getHWValency(a.getElement()); 1368 } 1369 else { 1370 return ValencyChecker.getMaximumValency(a.getElement(), a.getCharge()); 1371 } 1372 } 1373 performAdditiveOperations(BuildState state, Element subBracketOrRoot)1374 private static void performAdditiveOperations(BuildState state, Element subBracketOrRoot) throws StructureBuildingException { 1375 if (subBracketOrRoot.getAttribute(LOCANT_ATR) != null){//additive nomenclature does not employ locants 1376 return; 1377 } 1378 Element group; 1379 if (subBracketOrRoot.getName().equals(BRACKET_EL)){ 1380 group =findRightMostGroupInBracket(subBracketOrRoot); 1381 } 1382 else{ 1383 group =subBracketOrRoot.getFirstChildElement(GROUP_EL); 1384 } 1385 if (group.getAttribute(RESOLVED_ATR) != null){ 1386 return; 1387 } 1388 Fragment frag = group.getFrag(); 1389 int outAtomCount = frag.getOutAtomCount(); 1390 if (outAtomCount >=1){ 1391 if (subBracketOrRoot.getAttribute(MULTIPLIER_ATR) ==null){ 1392 Element nextSiblingEl = OpsinTools.getNextSibling(subBracketOrRoot); 1393 if (nextSiblingEl.getAttribute(MULTIPLIER_ATR) != null && 1394 (outAtomCount >= Integer.parseInt(nextSiblingEl.getAttributeValue(MULTIPLIER_ATR)) || //probably multiplicative nomenclature, should be as many outAtoms as the multiplier 1395 outAtomCount==1 && frag.getOutAtom(0).getValency()==Integer.parseInt(nextSiblingEl.getAttributeValue(MULTIPLIER_ATR))) && 1396 hasRootLikeOrMultiRadicalGroup(nextSiblingEl)){ 1397 if (outAtomCount==1){//special case e.g. 4,4'-(benzylidene)dianiline 1398 FragmentTools.splitOutAtomIntoValency1OutAtoms(frag.getOutAtom(0)); 1399 //special case where something like benzylidene is being used as if it meant benzdiyl for multiplicative nomenclature 1400 //this is allowed in the IUPAC 79 recommendations but not recommended in the current recommendations 1401 } 1402 performMultiplicativeOperations(state, group, nextSiblingEl); 1403 } 1404 else if (group.getAttribute(ISAMULTIRADICAL_ATR) != null){//additive nomenclature e.g. ethyleneoxy 1405 Fragment nextFrag = getNextInScopeMultiValentFragment(subBracketOrRoot); 1406 if (nextFrag != null){ 1407 Element nextMultiRadicalGroup = nextFrag.getTokenEl(); 1408 Element parentSubOrRoot = nextMultiRadicalGroup.getParent(); 1409 if (state.currentWordRule != WordRule.polymer){//imino does not behave like a substituent in polymers only as a linker 1410 if (nextMultiRadicalGroup.getAttribute(IMINOLIKE_ATR) != null){//imino/methylene can just act as normal substituents, should an additive bond really be made??? 1411 Fragment adjacentFrag = OpsinTools.getNextGroup(subBracketOrRoot).getFrag(); 1412 1413 if (nextFrag != adjacentFrag){//imino is not the absolute next frag 1414 if (potentiallyCanSubstitute(nextMultiRadicalGroup.getParent()) || potentiallyCanSubstitute(nextMultiRadicalGroup.getParent().getParent())){ 1415 return; 1416 } 1417 } 1418 } 1419 if (group.getAttribute(IMINOLIKE_ATR) != null && levelsToWordEl(group) > levelsToWordEl(nextMultiRadicalGroup)){ 1420 return;//e.g. imino substitutes ((chloroimino)ethylene)dibenzene 1421 } 1422 } 1423 if (parentSubOrRoot.getAttribute(MULTIPLIER_ATR) != null){ 1424 throw new StructureBuildingException("Attempted to form additive bond to a multiplied component"); 1425 } 1426 group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); 1427 joinFragmentsAdditively(state, frag, nextFrag); 1428 } 1429 } 1430 else {//e.g. chlorocarbonyl or hydroxy(sulfanyl)phosphoryl 1431 List<Fragment> siblingFragments = findAlternativeFragments(subBracketOrRoot); 1432 if (siblingFragments.size()>0){ 1433 Fragment nextFrag = siblingFragments.get(siblingFragments.size()-1); 1434 Element nextGroup = nextFrag.getTokenEl(); 1435 if (nextGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && nextGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (nextFrag.getOutAtomCount()>1|| nextGroup.getAttribute(RESOLVED_ATR) != null && nextFrag.getOutAtomCount()>=1 )){ 1436 Atom toAtom = nextFrag.getOutAtom(0).getAtom(); 1437 if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){ 1438 group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); 1439 joinFragmentsAdditively(state, frag, nextFrag);//e.g. aminocarbonyl or aminothio 1440 } 1441 } 1442 if (group.getAttribute(RESOLVED_ATR)==null && siblingFragments.size()>1){ 1443 for (int i = 0; i< siblingFragments.size()-1; i++) { 1444 Fragment lastFrag = siblingFragments.get(i); 1445 Element lastGroup = lastFrag.getTokenEl(); 1446 if (lastGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && lastGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (lastFrag.getOutAtomCount()>1|| lastGroup.getAttribute(RESOLVED_ATR) != null && lastFrag.getOutAtomCount()>=1 )){ 1447 Atom toAtom = lastFrag.getOutAtom(0).getAtom(); 1448 if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){ 1449 group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); 1450 joinFragmentsAdditively(state, frag, lastFrag);//e.g. hydroxy(sulfanyl)phosphoryl 1451 } 1452 break; 1453 } 1454 1455 //loop may continue if lastFrag was in fact completely unsubstitutable e.g. hydroxy...phosphoryloxy. The oxy is unsubstituable as the phosphoryl will already have bonded to it 1456 if (FragmentTools.findSubstituableAtoms(lastFrag, frag.getOutAtom(outAtomCount - 1).getValency()).size() > 0) { 1457 break; 1458 } 1459 } 1460 } 1461 } 1462 } 1463 } 1464 else{// e.g. dimethoxyphosphoryl or bis(methylamino)phosphoryl 1465 List<Fragment> siblingFragments = findAlternativeFragments(subBracketOrRoot); 1466 if (siblingFragments.size()>0){ 1467 int multiplier = Integer.parseInt(subBracketOrRoot.getAttributeValue(MULTIPLIER_ATR)); 1468 Fragment nextFrag = siblingFragments.get(siblingFragments.size()-1); 1469 Element nextGroup = nextFrag.getTokenEl(); 1470 if (nextGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && nextGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (nextFrag.getOutAtomCount()>=multiplier|| nextGroup.getAttribute(RESOLVED_ATR) != null && nextFrag.getOutAtomCount()>=multiplier +1 )){ 1471 Atom toAtom = nextFrag.getOutAtom(0).getAtom(); 1472 if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){ 1473 group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); 1474 multiplyOutAndAdditivelyBond(state, subBracketOrRoot, nextFrag);//e.g.dihydroxyphosphoryl 1475 } 1476 } 1477 if (group.getAttribute(RESOLVED_ATR)==null && siblingFragments.size()>1){ 1478 for (int i = 0; i< siblingFragments.size()-1; i++) { 1479 Fragment lastFrag = siblingFragments.get(i); 1480 Element lastGroup = lastFrag.getTokenEl(); 1481 if (lastGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && lastGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (lastFrag.getOutAtomCount()>=multiplier|| lastGroup.getAttribute(RESOLVED_ATR) != null && lastFrag.getOutAtomCount()>=multiplier +1 )){ 1482 Atom toAtom = lastFrag.getOutAtom(0).getAtom(); 1483 if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){ 1484 group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); 1485 multiplyOutAndAdditivelyBond(state, subBracketOrRoot, lastFrag);//e.g. dihydroxyphosphoryloxy 1486 } 1487 break; 1488 } 1489 1490 //loop may continue if lastFrag was in fact completely unsubstitutable e.g. hydroxy...phosphoryloxy. The oxy is unsubstituable as the phosphoryl will already have bonded to it 1491 if (FragmentTools.findSubstituableAtoms(lastFrag, frag.getOutAtom(outAtomCount - 1).getValency()).size() > 0) { 1492 break; 1493 } 1494 } 1495 } 1496 } 1497 } 1498 } 1499 } 1500 1501 /** 1502 * Searches the input for something that either is a multiRadical or has no outAtoms i.e. not dimethyl 1503 * @param subBracketOrRoot 1504 * @return 1505 */ hasRootLikeOrMultiRadicalGroup(Element subBracketOrRoot)1506 private static boolean hasRootLikeOrMultiRadicalGroup(Element subBracketOrRoot) { 1507 List<Element> groups = OpsinTools.getDescendantElementsWithTagName(subBracketOrRoot, GROUP_EL); 1508 if (subBracketOrRoot.getAttribute(INLOCANTS_ATR) != null){ 1509 return true;// a terminus with specified inLocants 1510 } 1511 for (Element group : groups) { 1512 Fragment frag = group.getFrag(); 1513 int outAtomCount =frag.getOutAtomCount(); 1514 if (group.getAttribute(ISAMULTIRADICAL_ATR) != null){ 1515 if (outAtomCount >=1 ){ 1516 return true;//a multi radical 1517 } 1518 } 1519 else if (outAtomCount ==0 && group.getAttribute(RESOLVED_ATR)==null){ 1520 return true;// a terminus 1521 } 1522 } 1523 return false; 1524 } 1525 1526 /** 1527 * Multiply out subOrBracket and additively bond all substituents to the specified fragment 1528 * @param state 1529 * @param subOrBracket 1530 * @param fragToAdditivelyBondTo 1531 * @throws StructureBuildingException 1532 */ multiplyOutAndAdditivelyBond(BuildState state, Element subOrBracket, Fragment fragToAdditivelyBondTo)1533 private static void multiplyOutAndAdditivelyBond(BuildState state, Element subOrBracket, Fragment fragToAdditivelyBondTo) throws StructureBuildingException { 1534 int multiplier = Integer.parseInt(subOrBracket.getAttributeValue(MULTIPLIER_ATR)); 1535 subOrBracket.removeAttribute(subOrBracket.getAttribute(MULTIPLIER_ATR)); 1536 List<Element> clonedElements = new ArrayList<Element>(); 1537 List<Element> elementsNotToBeMultiplied = new ArrayList<Element>();//anything before the multiplier in the sub/bracket 1538 for (int i = multiplier -1; i >=0; i--) { 1539 Element currentElement; 1540 if (i != 0){ 1541 currentElement = state.fragManager.cloneElement(state, subOrBracket, i); 1542 addPrimesToLocantedStereochemistryElements(currentElement, StringTools.multiplyString("'", i));//Stereochemistry elements with locants will need to have their locants primed (stereochemistry is only processed after structure building) 1543 clonedElements.add(currentElement); 1544 } 1545 else{ 1546 currentElement = subOrBracket; 1547 Element multiplierEl = subOrBracket.getFirstChildElement(MULTIPLIER_EL); 1548 if (multiplierEl ==null){ 1549 throw new StructureBuildingException("Multiplier not found where multiplier expected"); 1550 } 1551 for (int j = subOrBracket.indexOf(multiplierEl) -1 ; j >=0 ; j--) { 1552 Element el = subOrBracket.getChild(j); 1553 el.detach(); 1554 elementsNotToBeMultiplied.add(el); 1555 } 1556 multiplierEl.detach(); 1557 } 1558 Element group; 1559 if (currentElement.getName().equals(BRACKET_EL)){ 1560 group = findRightMostGroupInBracket(currentElement); 1561 } 1562 else{ 1563 group = currentElement.getFirstChildElement(GROUP_EL); 1564 } 1565 Fragment frag = group.getFrag(); 1566 if (frag.getOutAtomCount() != 1 ){ 1567 throw new StructureBuildingException("Additive bond formation failure: Fragment expected to have one OutAtom in this case but had: "+ frag.getOutAtomCount()); 1568 } 1569 joinFragmentsAdditively(state, frag, fragToAdditivelyBondTo); 1570 } 1571 for (Element clone : clonedElements) {//make sure cloned substituents don't substitute onto each other! 1572 OpsinTools.insertAfter(subOrBracket, clone); 1573 } 1574 for (Element el : elementsNotToBeMultiplied) {//re-add anything before multiplier to original subOrBracket 1575 subOrBracket.insertChild(el, 0); 1576 } 1577 } 1578 1579 /** 1580 * Creates a build results from the input group for use as the input to the real performMultiplicativeOperations function 1581 * @param state 1582 * @param group 1583 * @param multipliedParent 1584 * @throws StructureBuildingException 1585 */ performMultiplicativeOperations(BuildState state, Element group, Element multipliedParent)1586 private static void performMultiplicativeOperations(BuildState state, Element group, Element multipliedParent) throws StructureBuildingException{ 1587 BuildResults multiRadicalBR = new BuildResults(group.getParent()); 1588 performMultiplicativeOperations(state, multiRadicalBR, multipliedParent); 1589 } 1590 performMultiplicativeOperations(BuildState state, BuildResults multiRadicalBR, Element multipliedParent)1591 private static void performMultiplicativeOperations(BuildState state, BuildResults multiRadicalBR, Element multipliedParent) throws StructureBuildingException { 1592 int multiplier = Integer.parseInt(multipliedParent.getAttributeValue(MULTIPLIER_ATR)); 1593 if (multiplier != multiRadicalBR.getOutAtomCount()){ 1594 if (multiRadicalBR.getOutAtomCount() == multiplier*2){ 1595 //TODO substituents like nitrilo can have their outatoms combined 1596 } 1597 if (multiplier != multiRadicalBR.getOutAtomCount()){ 1598 throw new StructureBuildingException("Multiplication bond formation failure: number of outAtoms disagree with multiplier(multiplier: " + multiplier + ", outAtom count: " + multiRadicalBR.getOutAtomCount()+ ")"); 1599 } 1600 } 1601 if (LOG.isTraceEnabled()){LOG.trace(multiplier +" multiplicative bonds to be formed");} 1602 multipliedParent.removeAttribute(multipliedParent.getAttribute(MULTIPLIER_ATR)); 1603 List<String> inLocants = null; 1604 String inLocantsString = multipliedParent.getAttributeValue(INLOCANTS_ATR); 1605 if (inLocantsString != null){//true for the root of a multiplicative name 1606 if (inLocantsString.equals(INLOCANTS_DEFAULT)){ 1607 inLocants = new ArrayList<String>(multiplier); 1608 for (int i = 0; i < multiplier; i++) { 1609 inLocants.add(INLOCANTS_DEFAULT); 1610 } 1611 } 1612 else{ 1613 inLocants = StringTools.arrayToList(inLocantsString.split(",")); 1614 if (inLocants.size() != multiplier){ 1615 throw new StructureBuildingException("Mismatch between multiplier and number of inLocants in multiplicative nomenclature"); 1616 } 1617 } 1618 } 1619 List<Element> clonedElements = new ArrayList<Element>(); 1620 BuildResults newBr = new BuildResults(); 1621 for (int i = multiplier -1; i >=0; i--) { 1622 Element multipliedElement; 1623 if (i != 0){ 1624 multipliedElement = state.fragManager.cloneElement(state, multipliedParent, i); 1625 addPrimesToLocantedStereochemistryElements(multipliedElement, StringTools.multiplyString("'", i));//Stereochemistry elements with locants will need to have their locants primed (stereochemistry is only processed after structure building) 1626 clonedElements.add(multipliedElement); 1627 } 1628 else{ 1629 multipliedElement = multipliedParent; 1630 } 1631 1632 //determine group that will be additively bonded to 1633 Element multipliedGroup; 1634 if (multipliedElement.getName().equals(BRACKET_EL)) { 1635 multipliedGroup = getFirstMultiValentGroup(multipliedElement); 1636 if (multipliedGroup == null){//root will not have a multivalent group 1637 List<Element> groups = OpsinTools.getDescendantElementsWithTagName(multipliedElement, GROUP_EL); 1638 if (inLocants == null){ 1639 throw new StructureBuildingException("OPSIN Bug? in locants must be specified for a multiplied root in multiplicative nomenclature"); 1640 } 1641 if (inLocants.get(0).equals(INLOCANTS_DEFAULT)){ 1642 multipliedGroup = groups.get(groups.size() - 1); 1643 } 1644 else{ 1645 groupLoop: for (int j = groups.size()-1; j >=0; j--) { 1646 Fragment possibleFrag = groups.get(j).getFrag(); 1647 for (String locant : inLocants) { 1648 if (possibleFrag.hasLocant(locant)){ 1649 multipliedGroup = groups.get(j); 1650 break groupLoop; 1651 } 1652 } 1653 } 1654 } 1655 if (multipliedGroup == null){ 1656 throw new StructureBuildingException("Locants for inAtoms on the root were either misassigned to the root or were invalid: " + inLocants.toString() +" could not be assigned!"); 1657 } 1658 } 1659 } 1660 else{ 1661 multipliedGroup = multipliedElement.getFirstChildElement(GROUP_EL); 1662 } 1663 Fragment multipliedFrag = multipliedGroup.getFrag(); 1664 1665 OutAtom multiRadicalOutAtom = multiRadicalBR.getOutAtom(i); 1666 Fragment multiRadicalFrag = multiRadicalOutAtom.getAtom().getFrag(); 1667 Element multiRadicalGroup = multiRadicalFrag.getTokenEl(); 1668 if (multiRadicalGroup.getAttribute(RESOLVED_ATR) == null){ 1669 resolveUnLocantedFeatures(state, multiRadicalGroup.getParent());//the addition of unlocanted unsaturators can effect the position of radicals e.g. diazenyl 1670 multiRadicalGroup.addAttribute(new Attribute(RESOLVED_ATR, "yes")); 1671 } 1672 1673 boolean substitutivelyBondedToRoot = false; 1674 if (inLocants != null) { 1675 Element rightMostGroup; 1676 if (multipliedElement.getName().equals(BRACKET_EL)) { 1677 rightMostGroup = findRightMostGroupInBracket(multipliedElement); 1678 } 1679 else{ 1680 rightMostGroup = multipliedElement.getFirstChildElement(GROUP_EL); 1681 } 1682 rightMostGroup.addAttribute(new Attribute(RESOLVED_ATR, "yes"));//this group will not be used further within this word but can in principle be a substituent e.g. methylenedisulfonyl dichloride 1683 if (multipliedGroup.getAttribute(ISAMULTIRADICAL_ATR) != null) {//e.g. methylenedisulfonyl dichloride 1684 if (!multipliedParent.getAttributeValue(INLOCANTS_ATR).equals(INLOCANTS_DEFAULT)) { 1685 throw new StructureBuildingException("inLocants should not be specified for a multiradical parent in multiplicative nomenclature"); 1686 } 1687 } 1688 else{ 1689 Atom from = multiRadicalOutAtom.getAtom(); 1690 int bondOrder = multiRadicalOutAtom.getValency(); 1691 //bonding will be substitutive rather additive as this is bonding to a root 1692 Atom atomToJoinTo = null; 1693 for (int j = inLocants.size() -1; j >=0; j--) { 1694 String locant = inLocants.get(j); 1695 if (locant.equals(INLOCANTS_DEFAULT)){//note that if one entry in inLocantArray is default then they all are "default" 1696 List<Atom> possibleAtoms = getPossibleAtomsForUnlocantedConnectionToMultipliedRoot(multipliedGroup, bondOrder, i); 1697 if (possibleAtoms.isEmpty()) { 1698 throw new StructureBuildingException("No suitable atom found for multiplicative operation"); 1699 } 1700 if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtoms, 1)) { 1701 state.addIsAmbiguous("Connection to multiplied group: " + multipliedGroup.getValue()); 1702 } 1703 atomToJoinTo = possibleAtoms.get(0); 1704 inLocants.remove(j); 1705 break; 1706 } 1707 else{ 1708 Atom inAtom = multipliedFrag.getAtomByLocant(locant); 1709 if (inAtom != null) { 1710 atomToJoinTo = inAtom; 1711 inLocants.remove(j); 1712 break; 1713 } 1714 } 1715 } 1716 if (atomToJoinTo == null){ 1717 throw new StructureBuildingException("Locants for inAtoms on the root were either misassigned to the root or were invalid: " + inLocants.toString() +" could not be assigned!"); 1718 } 1719 1720 if (!multiRadicalOutAtom.isSetExplicitly()) {//not set explicitly so may be an inappropriate atom 1721 from = findAtomForUnlocantedRadical(state, from.getFrag(), multiRadicalOutAtom); 1722 } 1723 multiRadicalFrag.removeOutAtom(multiRadicalOutAtom); 1724 1725 state.fragManager.createBond(from, atomToJoinTo, bondOrder); 1726 if (LOG.isTraceEnabled()){LOG.trace("Substitutively bonded (multiplicative to root) " + from.getID() + " (" + from.getFrag().getTokenEl().getValue() + ") " + atomToJoinTo.getID() + " (" + atomToJoinTo.getFrag().getTokenEl().getValue() + ")");} 1727 substitutivelyBondedToRoot = true; 1728 } 1729 } 1730 if (!substitutivelyBondedToRoot) { 1731 joinFragmentsAdditively(state, multiRadicalFrag, multipliedFrag); 1732 } 1733 if (multipliedElement.getName().equals(BRACKET_EL)) { 1734 recursivelyResolveUnLocantedFeatures(state, multipliedElement);//there may be outAtoms that are involved in unlocanted substitution, these can be safely used now e.g. ...bis((3-hydroxy-4-methoxyphenyl)methylene) where (3-hydroxy-4-methoxyphenyl)methylene is the currentElement 1735 } 1736 1737 if (inLocants == null) { 1738 //currentElement is not a root element. Need to build up a new BuildResults so as to call performMultiplicativeOperations again 1739 //at this stage an outAtom has been removed from the fragment within currentElement through an additive bond 1740 newBr.mergeBuildResults(new BuildResults(multipliedElement)); 1741 } 1742 } 1743 1744 if (newBr.getFragmentCount() == 1) { 1745 throw new StructureBuildingException("Multiplicative nomenclature cannot yield only one temporary terminal fragment"); 1746 } 1747 if (newBr.getFragmentCount() >= 2) { 1748 List<Element> siblings = OpsinTools.getNextSiblingsOfTypes(multipliedParent, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL}); 1749 if (siblings.size() == 0) { 1750 Element parentOfMultipliedEl = multipliedParent.getParent(); 1751 if (parentOfMultipliedEl.getName().equals(BRACKET_EL)) {//brackets are allowed 1752 siblings = OpsinTools.getNextSiblingsOfTypes(parentOfMultipliedEl, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL}); 1753 if (siblings.get(0).getAttribute(MULTIPLIER_ATR) == null) { 1754 throw new StructureBuildingException("Multiplier not found where multiplier was expected for succesful multiplicative nomenclature"); 1755 } 1756 performMultiplicativeOperations(state, newBr, siblings.get(0)); 1757 } 1758 else{ 1759 throw new StructureBuildingException("Could not find suitable element to continue multiplicative nomenclature"); 1760 } 1761 } 1762 else{ 1763 if (siblings.get(0).getAttribute(MULTIPLIER_ATR) == null) { 1764 throw new StructureBuildingException("Multiplier not found where multiplier was expected for successful multiplicative nomenclature"); 1765 } 1766 performMultiplicativeOperations(state, newBr, siblings.get(0)); 1767 } 1768 } 1769 1770 for (Element clone : clonedElements) {//only insert cloned substituents now so they don't substitute onto each other! 1771 OpsinTools.insertAfter(multipliedParent, clone); 1772 } 1773 } 1774 1775 /** 1776 * Applies special case to prefer the end of chains with the usableAsAJoiner attributes cf. p-phenylenedipropionic acid 1777 * Such cases will still be considered to be formally ambiguous 1778 * @param multipliedGroup 1779 * @param multipliedFrag 1780 * @param bondOrder 1781 * @param primesAdded 1782 * @return 1783 * @throws StructureBuildingException 1784 */ getPossibleAtomsForUnlocantedConnectionToMultipliedRoot(Element multipliedGroup, int bondOrder, int primesAdded)1785 private static List<Atom> getPossibleAtomsForUnlocantedConnectionToMultipliedRoot(Element multipliedGroup, int bondOrder, int primesAdded) throws StructureBuildingException { 1786 Fragment multipliedFrag = multipliedGroup.getFrag(); 1787 if ("yes".equals(multipliedGroup.getAttributeValue(USABLEASJOINER_ATR)) && multipliedFrag.getDefaultInAtom() == null) { 1788 Element previous = OpsinTools.getPrevious(multipliedGroup); 1789 if (previous != null && previous.getName().equals(MULTIPLIER_EL)){ 1790 String locant = getLocantOfEndOfChainIfGreaterThan1(multipliedFrag, primesAdded); 1791 if (locant != null) { 1792 Atom preferredAtom = multipliedFrag.getAtomByLocantOrThrow(locant); 1793 List<Atom> possibleAtoms = FragmentTools.findnAtomsForSubstitution(multipliedFrag.getAtomList(), preferredAtom, 1, bondOrder, true); 1794 if (possibleAtoms == null) { 1795 possibleAtoms = Collections.emptyList(); 1796 } 1797 return possibleAtoms; 1798 } 1799 } 1800 } 1801 return FragmentTools.findSubstituableAtoms(multipliedFrag, bondOrder); 1802 } 1803 getLocantOfEndOfChainIfGreaterThan1(Fragment frag, int primes)1804 private static String getLocantOfEndOfChainIfGreaterThan1(Fragment frag, int primes) { 1805 String primesStr = StringTools.multiplyString("'", primes); 1806 int length = 0; 1807 Atom next = frag.getAtomByLocant(Integer.toString(length + 1) + primesStr); 1808 Atom previous = null; 1809 while (next != null){ 1810 if (previous != null && previous.getBondToAtom(next) == null){ 1811 break; 1812 } 1813 length++; 1814 previous = next; 1815 next = frag.getAtomByLocant(Integer.toString(length + 1) + primesStr); 1816 } 1817 if (length > 1){ 1818 return Integer.toString(length) + primesStr; 1819 } 1820 return null; 1821 } 1822 1823 /** 1824 * Given a subsituent/bracket finds the next multi valent substituent/root that is in scope and hence its group 1825 * e.g. for oxy(dichloromethyl)methylene given oxy substituent the methylene group would be found 1826 * for oxy(dichloroethylene) given oxy substituent the ethylene group would be found 1827 * for oxy(carbonylimino) given oxy carbonyl would be found 1828 * @param substituentOrBracket 1829 * @return frag 1830 * @throws StructureBuildingException 1831 */ getNextInScopeMultiValentFragment(Element substituentOrBracket)1832 private static Fragment getNextInScopeMultiValentFragment(Element substituentOrBracket) throws StructureBuildingException { 1833 if (!substituentOrBracket.getName().equals(SUBSTITUENT_EL) && !substituentOrBracket.getName().equals(BRACKET_EL)){ 1834 throw new StructureBuildingException("Input to this function should be a substituent or bracket"); 1835 } 1836 if (substituentOrBracket.getParent()==null){ 1837 throw new StructureBuildingException("substituent did not have a parent!"); 1838 } 1839 Element parent = substituentOrBracket.getParent(); 1840 1841 List<Element> children = OpsinTools.getChildElementsWithTagNames(parent, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL});//will be returned in index order 1842 int indexOfSubstituent =parent.indexOf(substituentOrBracket); 1843 for (Element child : children) { 1844 if (parent.indexOf(child) <=indexOfSubstituent){//only want things after the input 1845 continue; 1846 } 1847 if (child.getAttribute(MULTIPLIER_ATR) != null){ 1848 continue; 1849 } 1850 List<Element> childDescendants; 1851 if (child.getName().equals(BRACKET_EL)){ 1852 childDescendants = OpsinTools.getDescendantElementsWithTagNames(child, new String[]{SUBSTITUENT_EL, ROOT_EL});//will be returned in depth-first order 1853 } 1854 else{ 1855 childDescendants =new ArrayList<Element>(); 1856 childDescendants.add(child); 1857 } 1858 for (Element descendantChild : childDescendants) { 1859 Element group = descendantChild.getFirstChildElement(GROUP_EL); 1860 if (group == null){ 1861 throw new StructureBuildingException("substituent/root is missing its group"); 1862 } 1863 Fragment possibleFrag = group.getFrag(); 1864 if (group.getAttribute(ISAMULTIRADICAL_ATR) != null && 1865 (possibleFrag.getOutAtomCount() >=2 || (possibleFrag.getOutAtomCount() >=1 && group.getAttribute(RESOLVED_ATR) != null ))){ 1866 return possibleFrag; 1867 } 1868 } 1869 } 1870 return null; 1871 } 1872 1873 /** 1874 * Given a bracket searches in a depth first manner for the first multi valent group 1875 * @param bracket 1876 * @return group 1877 * @throws StructureBuildingException 1878 */ getFirstMultiValentGroup(Element bracket)1879 private static Element getFirstMultiValentGroup(Element bracket) throws StructureBuildingException { 1880 if (!bracket.getName().equals(BRACKET_EL)){ 1881 throw new StructureBuildingException("Input to this function should be a bracket"); 1882 } 1883 1884 List<Element> groups = OpsinTools.getDescendantElementsWithTagName(bracket, GROUP_EL);//will be returned in index order 1885 for (Element group : groups) { 1886 Fragment possibleFrag = group.getFrag(); 1887 if (group.getAttribute(ISAMULTIRADICAL_ATR) != null && 1888 (possibleFrag.getOutAtomCount() >=2 || (possibleFrag.getOutAtomCount() >=1 && group.getAttribute(RESOLVED_ATR) != null ))){ 1889 return group; 1890 } 1891 } 1892 return null; 1893 } 1894 joinFragmentsAdditively(BuildState state, Fragment fragToBeJoined, Fragment parentFrag)1895 private static void joinFragmentsAdditively(BuildState state, Fragment fragToBeJoined, Fragment parentFrag) throws StructureBuildingException { 1896 Element elOfFragToBeJoined = fragToBeJoined.getTokenEl(); 1897 if (EPOXYLIKE_SUBTYPE_VAL.equals(elOfFragToBeJoined.getAttributeValue(SUBTYPE_ATR))){ 1898 for (int i = 0, l = fragToBeJoined.getOutAtomCount(); i < l; i++) { 1899 OutAtom outAtom = fragToBeJoined.getOutAtom(i); 1900 if (outAtom.getLocant() != null){ 1901 throw new StructureBuildingException("Inappropriate use of " + elOfFragToBeJoined.getValue()); 1902 } 1903 } 1904 } 1905 int outAtomCountOnFragToBeJoined = fragToBeJoined.getOutAtomCount(); 1906 if (outAtomCountOnFragToBeJoined ==0){ 1907 throw new StructureBuildingException("Additive bond formation failure: Fragment expected to have at least one OutAtom but had none"); 1908 } 1909 1910 if (parentFrag.getOutAtomCount() == 0){ 1911 throw new StructureBuildingException("Additive bond formation failure: Fragment expected to have at least one OutAtom but had none"); 1912 } 1913 OutAtom in = null; 1914 if (parentFrag.getOutAtomCount() > 1){ 1915 int firstOutAtomOrder = parentFrag.getOutAtom(0).getValency(); 1916 boolean unresolvedAmbiguity =false; 1917 for (int i = 1, l = parentFrag.getOutAtomCount(); i < l; i++) { 1918 OutAtom outAtom = parentFrag.getOutAtom(i); 1919 if (outAtom.getValency() != firstOutAtomOrder){ 1920 unresolvedAmbiguity =true; 1921 } 1922 } 1923 if (unresolvedAmbiguity){//not all outAtoms on parent equivalent 1924 firstOutAtomOrder = fragToBeJoined.getOutAtom(0).getValency(); 1925 unresolvedAmbiguity =false; 1926 for (int i = 1, l = fragToBeJoined.getOutAtomCount(); i < l; i++) { 1927 OutAtom outAtom = fragToBeJoined.getOutAtom(i); 1928 if (outAtom.getValency() != firstOutAtomOrder){ 1929 unresolvedAmbiguity =true; 1930 } 1931 } 1932 if (unresolvedAmbiguity && outAtomCountOnFragToBeJoined == 2){//not all outAtoms on frag to be joined are equivalent either! 1933 //Solves the specific case of 2,2'-[ethane-1,2-diylbis(azanylylidenemethanylylidene)]diphenol vs 2,2'-[ethane-1,2-diylidenebis(azanylylidenemethanylylidene)]bis(cyclohexan-1-ol) 1934 //but does not solve the general case as only a single look behind is performed. 1935 Element previousGroup = OpsinTools.getPreviousGroup(elOfFragToBeJoined); 1936 if (previousGroup != null){ 1937 Fragment previousFrag = previousGroup.getFrag(); 1938 if (previousFrag.getOutAtomCount() > 1){ 1939 int previousGroupFirstOutAtomOrder = previousFrag.getOutAtom(0).getValency(); 1940 unresolvedAmbiguity =false; 1941 for (int i = 1, l = previousFrag.getOutAtomCount(); i < l; i++) { 1942 OutAtom outAtom = previousFrag.getOutAtom(i); 1943 if (outAtom.getValency() != previousGroupFirstOutAtomOrder){ 1944 unresolvedAmbiguity =true; 1945 } 1946 } 1947 if (!unresolvedAmbiguity && previousGroupFirstOutAtomOrder==parentFrag.getOutAtom(0).getValency()){ 1948 for (int i = 1, l = parentFrag.getOutAtomCount(); i < l; i++) { 1949 OutAtom outAtom = parentFrag.getOutAtom(i); 1950 if (outAtom.getValency() != previousGroupFirstOutAtomOrder){ 1951 in = outAtom; 1952 break; 1953 } 1954 } 1955 } 1956 } 1957 } 1958 } 1959 else{ 1960 for (int i = 0, l = parentFrag.getOutAtomCount(); i < l; i++) { 1961 OutAtom outAtom = parentFrag.getOutAtom(i); 1962 if (outAtom.getValency()==firstOutAtomOrder){ 1963 in = outAtom; 1964 break; 1965 } 1966 } 1967 } 1968 } 1969 } 1970 if (in==null){ 1971 in = parentFrag.getOutAtom(0); 1972 } 1973 Atom to = in.getAtom(); 1974 int bondOrder = in.getValency(); 1975 if (!in.isSetExplicitly()){//not set explicitly so may be an inappropriate atom 1976 to = findAtomForUnlocantedRadical(state, to.getFrag(), in); 1977 } 1978 parentFrag.removeOutAtom(in); 1979 1980 OutAtom out =null; 1981 1982 for (int i =outAtomCountOnFragToBeJoined -1; i>=0; i--) { 1983 if (fragToBeJoined.getOutAtom(i).getValency() == bondOrder){ 1984 out = fragToBeJoined.getOutAtom(i); 1985 break; 1986 } 1987 } 1988 1989 if (out ==null){ 1990 if (outAtomCountOnFragToBeJoined >=bondOrder){//handles cases like nitrilo needing to be -N= (remove later outAtoms first as per usual) 1991 int valency =0; 1992 Atom lastOutAtom = fragToBeJoined.getOutAtom(outAtomCountOnFragToBeJoined -1).getAtom(); 1993 for (int i =outAtomCountOnFragToBeJoined -1; i >= 0; i--) { 1994 OutAtom nextOutAtom = fragToBeJoined.getOutAtom(i); 1995 if (nextOutAtom.getAtom() != lastOutAtom){ 1996 throw new StructureBuildingException("Additive bond formation failure: bond order disagreement"); 1997 } 1998 valency += nextOutAtom.getValency(); 1999 if (valency==bondOrder){ 2000 nextOutAtom.setValency(valency); 2001 out = nextOutAtom; 2002 break; 2003 } 2004 fragToBeJoined.removeOutAtom(nextOutAtom); 2005 } 2006 if (out==null){ 2007 throw new StructureBuildingException("Additive bond formation failure: bond order disagreement"); 2008 } 2009 } 2010 else{ 2011 throw new StructureBuildingException("Additive bond formation failure: bond order disagreement"); 2012 } 2013 } 2014 2015 Atom from = out.getAtom(); 2016 if (!out.isSetExplicitly()){//not set explicitly so may be an inappropriate atom 2017 from = findAtomForUnlocantedRadical(state, from.getFrag(), out); 2018 } 2019 fragToBeJoined.removeOutAtom(out); 2020 2021 state.fragManager.createBond(from, to, bondOrder); 2022 if (LOG.isTraceEnabled()){LOG.trace("Additively bonded " + from.getID() + " (" + from.getFrag().getTokenEl().getValue() + ") " + to.getID() + " (" + to.getFrag().getTokenEl().getValue() + ")" );} 2023 } 2024 joinFragmentsSubstitutively(BuildState state, Fragment fragToBeJoined, Atom atomToJoinTo)2025 private static void joinFragmentsSubstitutively(BuildState state, Fragment fragToBeJoined, Atom atomToJoinTo) throws StructureBuildingException { 2026 Element elOfFragToBeJoined = fragToBeJoined.getTokenEl(); 2027 if (EPOXYLIKE_SUBTYPE_VAL.equals(elOfFragToBeJoined.getAttributeValue(SUBTYPE_ATR))){ 2028 formEpoxide(state, fragToBeJoined, atomToJoinTo); 2029 return; 2030 } 2031 int outAtomCount = fragToBeJoined.getOutAtomCount(); 2032 if (outAtomCount >1){ 2033 throw new StructureBuildingException("Substitutive bond formation failure: Fragment expected to have one OutAtom but had: "+ outAtomCount); 2034 } 2035 if (outAtomCount ==0 ){ 2036 throw new StructureBuildingException("Substitutive bond formation failure: Fragment expected to have one OutAtom but had none"); 2037 } 2038 if (elOfFragToBeJoined.getAttribute(IMINOLIKE_ATR) != null){//special case for methylene/imino 2039 if (fragToBeJoined.getOutAtomCount()==1 && fragToBeJoined.getOutAtom(0).getValency()==1 ){ 2040 fragToBeJoined.getOutAtom(0).setValency(2); 2041 } 2042 } 2043 OutAtom out = fragToBeJoined.getOutAtom(0); 2044 Atom from = out.getAtom(); 2045 int bondOrder = out.getValency(); 2046 if (!out.isSetExplicitly()){//not set explicitly so may be an inappropriate atom 2047 List<Atom> possibleAtoms = FragmentTools.findnAtomsForSubstitution(fragToBeJoined.getAtomList(), from, 1, bondOrder, false); 2048 if (possibleAtoms == null){ 2049 throw new StructureBuildingException("Failed to assign all unlocanted radicals to actual atoms without violating valency"); 2050 } 2051 if (!((ALKANESTEM_SUBTYPE_VAL.equals(fragToBeJoined.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(fragToBeJoined.getSubType())) && possibleAtoms.get(0).equals(fragToBeJoined.getFirstAtom()))) { 2052 if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtoms, 1)) { 2053 state.addIsAmbiguous("Positioning of radical on: " + fragToBeJoined.getTokenEl().getValue()); 2054 } 2055 } 2056 from = possibleAtoms.get(0); 2057 } 2058 fragToBeJoined.removeOutAtom(out); 2059 2060 state.fragManager.createBond(from, atomToJoinTo, bondOrder); 2061 if (LOG.isTraceEnabled()){LOG.trace("Substitutively bonded " + from.getID() + " (" + from.getFrag().getTokenEl().getValue() + ") " + atomToJoinTo.getID() + " (" + atomToJoinTo.getFrag().getTokenEl().getValue() + ")");} 2062 } 2063 2064 /** 2065 * Forms a bridge using the given fragment. 2066 * The bridgingFragment's outAtoms locants or a combination of the atomToJoinTo and a suitable atom 2067 * are used to decide what atoms to form the bridge between 2068 * @param state 2069 * @param bridgingFragment 2070 * @param atomToJoinTo 2071 * @return Atoms that the bridgingFragment attached to 2072 * @throws StructureBuildingException 2073 */ formEpoxide(BuildState state, Fragment bridgingFragment, Atom atomToJoinTo)2074 static Atom[] formEpoxide(BuildState state, Fragment bridgingFragment, Atom atomToJoinTo) throws StructureBuildingException { 2075 Fragment fragToJoinTo = atomToJoinTo.getFrag(); 2076 List<Atom> atomList = fragToJoinTo.getAtomList(); 2077 if (atomList.size()==1){ 2078 throw new StructureBuildingException("Epoxides must be formed between two different atoms"); 2079 } 2080 Atom firstAtomToJoinTo; 2081 if (bridgingFragment.getOutAtom(0).getLocant() != null){ 2082 firstAtomToJoinTo = fragToJoinTo.getAtomByLocantOrThrow(bridgingFragment.getOutAtom(0).getLocant()); 2083 } 2084 else{ 2085 firstAtomToJoinTo = atomToJoinTo; 2086 } 2087 OutAtom outAtom1 = bridgingFragment.getOutAtom(0); 2088 bridgingFragment.removeOutAtom(0); 2089 2090 //In epoxy chalcogenAtom1 will be chalcogenAtom2. Methylenedioxy is also handled by this method 2091 state.fragManager.createBond(outAtom1.getAtom(), firstAtomToJoinTo, outAtom1.getValency()); 2092 2093 Atom secondAtomToJoinTo; 2094 if (bridgingFragment.getOutAtom(0).getLocant() != null){ 2095 secondAtomToJoinTo = fragToJoinTo.getAtomByLocantOrThrow(bridgingFragment.getOutAtom(0).getLocant()); 2096 } 2097 else{ 2098 int index = atomList.indexOf(firstAtomToJoinTo); 2099 Atom preferredAtom = (index + 1 >= atomList.size()) ? atomList.get(index - 1) : atomList.get(index + 1); 2100 List<Atom> possibleSecondAtom = FragmentTools.findnAtomsForSubstitution(fragToJoinTo.getAtomList(), preferredAtom, 1, 1, true); 2101 if (possibleSecondAtom != null) { 2102 possibleSecondAtom.removeAll(Collections.singleton(firstAtomToJoinTo)); 2103 } 2104 if (possibleSecondAtom == null || possibleSecondAtom.size() == 0) { 2105 throw new StructureBuildingException("Unable to find suitable atom to form bridge"); 2106 } 2107 if (AmbiguityChecker.isSubstitutionAmbiguous(possibleSecondAtom, 1)) { 2108 state.addIsAmbiguous("Addition of bridge to: "+ fragToJoinTo.getTokenEl().getValue()); 2109 } 2110 secondAtomToJoinTo = possibleSecondAtom.get(0); 2111 } 2112 OutAtom outAtom2 = bridgingFragment.getOutAtom(0); 2113 bridgingFragment.removeOutAtom(0); 2114 if (outAtom1.getAtom().equals(outAtom2.getAtom()) && firstAtomToJoinTo == secondAtomToJoinTo){ 2115 throw new StructureBuildingException("Epoxides must be formed between two different atoms"); 2116 } 2117 int bondValency = outAtom2.getValency(); 2118 if (outAtom2.getAtom().hasSpareValency() && !secondAtomToJoinTo.hasSpareValency()) { 2119 //bridging groups like azeno are treated as aromatic so that it is not fixed as to which of the two bonds is the double bond 2120 //if connected to a saturated group though, one of them must be a double bond 2121 bondValency = 2; 2122 } 2123 state.fragManager.createBond(outAtom2.getAtom(), secondAtomToJoinTo, bondValency); 2124 CycleDetector.assignWhetherAtomsAreInCycles(bridgingFragment); 2125 return new Atom[]{firstAtomToJoinTo, secondAtomToJoinTo}; 2126 } 2127 2128 /** 2129 * Attempts to find an in-scope fragment capable of forming the given numberOfSubstitutions each with the given bondOrder 2130 * @param subOrBracket 2131 * @param numberOfSubstitutions 2132 * @param bondOrder 2133 * @return 2134 */ findAtomsForSubstitution(Element subOrBracket, int numberOfSubstitutions, int bondOrder)2135 private static List<Atom> findAtomsForSubstitution(Element subOrBracket, int numberOfSubstitutions, int bondOrder) { 2136 FindAlternativeGroupsResult results = findAlternativeGroups(subOrBracket); 2137 List<Atom> substitutableAtoms = findAtomsForSubstitution(results.groups, numberOfSubstitutions, bondOrder, true); 2138 if (substitutableAtoms != null) { 2139 return substitutableAtoms; 2140 } 2141 substitutableAtoms = findAtomsForSubstitution(results.groups, numberOfSubstitutions, bondOrder, false); 2142 if (substitutableAtoms != null) { 2143 return substitutableAtoms; 2144 } 2145 substitutableAtoms = findAtomsForSubstitution(results.groupsSubstitutionUnlikely, numberOfSubstitutions, bondOrder, true); 2146 if (substitutableAtoms != null) { 2147 return substitutableAtoms; 2148 } 2149 substitutableAtoms = findAtomsForSubstitution(results.groupsSubstitutionUnlikely, numberOfSubstitutions, bondOrder, false); 2150 return substitutableAtoms; 2151 } 2152 findAtomsForSubstitution(List<Element> possibleParents, int numberOfSubstitutions, int bondOrder, boolean preserveValency)2153 private static List<Atom> findAtomsForSubstitution(List<Element> possibleParents, int numberOfSubstitutions, int bondOrder, boolean preserveValency) { 2154 boolean rootHandled = false; 2155 for (int i = 0, l = possibleParents.size(); i < l; i++) { 2156 Element possibleParent = possibleParents.get(i); 2157 Fragment frag = possibleParent.getFrag(); 2158 List<Atom> substitutableAtoms; 2159 if (possibleParent.getParent().getName().equals(ROOT_EL)){//consider all root groups as if they were one 2160 if(rootHandled) { 2161 continue; 2162 } 2163 List<Atom> atoms = frag.getAtomList(); 2164 for (int j = i + 1; j < l; j++) { 2165 Element possibleOtherRoot = possibleParents.get(j); 2166 if (possibleOtherRoot.getParent().getName().equals(ROOT_EL)) { 2167 atoms.addAll(possibleOtherRoot.getFrag().getAtomList()); 2168 } 2169 } 2170 rootHandled = true; 2171 substitutableAtoms = FragmentTools.findnAtomsForSubstitution(atoms, frag.getDefaultInAtom(), numberOfSubstitutions, bondOrder, true, preserveValency); 2172 } 2173 else{ 2174 substitutableAtoms = FragmentTools.findnAtomsForSubstitution(frag.getAtomList(), frag.getDefaultInAtom(), numberOfSubstitutions, bondOrder, true, preserveValency); 2175 } 2176 if (substitutableAtoms != null){ 2177 return substitutableAtoms; 2178 } 2179 } 2180 return null; 2181 } 2182 2183 /** 2184 * Finds all the fragments accessible from the startingElement taking into account brackets 2185 * i.e. those that it is feasible that the group of the startingElement could substitute onto 2186 * @param startingElement 2187 * @return A list of fragments in the order to try them as possible parent fragments (for substitutive operations) 2188 */ findAlternativeFragments(Element startingElement)2189 static List<Fragment> findAlternativeFragments(Element startingElement) { 2190 List<Fragment> foundFragments = new ArrayList<Fragment>(); 2191 FindAlternativeGroupsResult results = findAlternativeGroups(startingElement); 2192 for (Element group : results.groups) { 2193 foundFragments.add(group.getFrag()); 2194 } 2195 for (Element group : results.groupsSubstitutionUnlikely) { 2196 foundFragments.add(group.getFrag()); 2197 } 2198 return foundFragments; 2199 } 2200 2201 /** 2202 * Finds all the groups accessible from the startingElement taking into account brackets 2203 * i.e. those that it is feasible that the group of the startingElement could substitute onto 2204 * (locanting onto bracketted groups is unlikely so these are kept seperate in the results object) 2205 * @param startingElement 2206 * @return An object containing the groups in the order to try them as possible parent groups (for substitutive operations) 2207 */ findAlternativeGroups(Element startingElement)2208 static FindAlternativeGroupsResult findAlternativeGroups(Element startingElement) { 2209 Deque<AlternativeGroupFinderState> stack = new ArrayDeque<AlternativeGroupFinderState>(); 2210 stack.add(new AlternativeGroupFinderState(startingElement.getParent(), false)); 2211 List<Element> groups = new ArrayList<Element>(); 2212 List<Element> groupsSubstitutionUnlikely = new ArrayList<Element>();//locanting into brackets is rarely the desired answer so keep these separate 2213 boolean doneFirstIteration = false;//check on index only done on first iteration to only get elements with an index greater than the starting element 2214 while (stack.size() > 0) { 2215 AlternativeGroupFinderState state = stack.removeLast(); 2216 Element currentElement = state.el; 2217 boolean substitutionUnlikely = state.substitutionUnlikely; 2218 if (currentElement.getName().equals(GROUP_EL)) { 2219 if (substitutionUnlikely) { 2220 groupsSubstitutionUnlikely.add(currentElement); 2221 } 2222 else { 2223 groups.add(currentElement); 2224 } 2225 continue; 2226 } 2227 List<Element> siblings = OpsinTools.getChildElementsWithTagNames(currentElement, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); 2228 2229 for (Element bracketOrSubOrRoot : siblings) { 2230 if (!doneFirstIteration && currentElement.indexOf(bracketOrSubOrRoot) <= currentElement.indexOf(startingElement)){ 2231 continue; 2232 } 2233 if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR) != null){ 2234 continue; 2235 } 2236 boolean substitutionUnlikelyForThisEl = substitutionUnlikely; 2237 if (bracketOrSubOrRoot.getName().equals(BRACKET_EL)){ 2238 if (!IMPLICIT_TYPE_VAL.equals(bracketOrSubOrRoot.getAttributeValue(TYPE_ATR))) { 2239 substitutionUnlikelyForThisEl = true; 2240 } 2241 stack.add(new AlternativeGroupFinderState(bracketOrSubOrRoot, substitutionUnlikelyForThisEl)); 2242 } 2243 else{ 2244 if (bracketOrSubOrRoot.getAttribute(LOCANT_ATR) != null) { 2245 substitutionUnlikelyForThisEl = true; 2246 } 2247 Element group = bracketOrSubOrRoot.getFirstChildElement(GROUP_EL); 2248 stack.add(new AlternativeGroupFinderState(group, substitutionUnlikelyForThisEl)); 2249 } 2250 } 2251 doneFirstIteration = true; 2252 } 2253 return new FindAlternativeGroupsResult(groups, groupsSubstitutionUnlikely); 2254 } 2255 2256 private static class AlternativeGroupFinderState { 2257 private final Element el; 2258 private final boolean substitutionUnlikely; 2259 AlternativeGroupFinderState(Element el, boolean substitutionUnlikely)2260 AlternativeGroupFinderState(Element el, boolean substitutionUnlikely) { 2261 this.el = el; 2262 this.substitutionUnlikely = substitutionUnlikely; 2263 } 2264 } 2265 2266 private static class FindAlternativeGroupsResult { 2267 private final List<Element> groups; 2268 private final List<Element> groupsSubstitutionUnlikely; 2269 FindAlternativeGroupsResult(List<Element> groups, List<Element> groupsSubstitutionUnlikely)2270 FindAlternativeGroupsResult(List<Element> groups, List<Element> groupsSubstitutionUnlikely) { 2271 this.groups = groups; 2272 this.groupsSubstitutionUnlikely = groupsSubstitutionUnlikely; 2273 } 2274 } 2275 2276 /** 2277 * Checks through the groups accessible from the currentElement taking into account brackets 2278 * i.e. those that it is feasible that the group of the currentElement could substitute onto 2279 * @param startingElement 2280 * @param locant: the locant string to check for the presence of 2281 * @return The fragment with the locant, or null 2282 * @throws StructureBuildingException 2283 */ findFragmentWithLocant(Element startingElement, String locant)2284 private static Fragment findFragmentWithLocant(Element startingElement, String locant) throws StructureBuildingException { 2285 Deque<Element> stack = new ArrayDeque<Element>(); 2286 stack.add(startingElement.getParent()); 2287 boolean doneFirstIteration = false;//check on index only done on first iteration to only get elements with an index greater than the starting element 2288 Fragment monoNuclearHydride = null;//e.g. methyl/methane - In this case no locant would be expected as unlocanted substitution is always unambiguous. Hence deprioritise 2289 while (stack.size() > 0) { 2290 Element currentElement = stack.removeLast(); 2291 if (currentElement.getName().equals(SUBSTITUENT_EL) || currentElement.getName().equals(ROOT_EL)) { 2292 Fragment groupFrag = currentElement.getFirstChildElement(GROUP_EL).getFrag(); 2293 if (monoNuclearHydride != null && currentElement.getAttribute(LOCANT_ATR) != null) {//It looks like all groups are locanting onto the monoNuclearHydride e.g. 1-oxo-1-phenyl-sulfanylidene 2294 return monoNuclearHydride; 2295 } 2296 if (groupFrag.hasLocant(locant)) { 2297 if (locant.equals("1") && groupFrag.getAtomCount() == 1) { 2298 if (monoNuclearHydride == null) { 2299 monoNuclearHydride = groupFrag; 2300 } 2301 } 2302 else{ 2303 return groupFrag; 2304 } 2305 } 2306 continue; 2307 } 2308 else if (monoNuclearHydride != null) { 2309 return monoNuclearHydride; 2310 } 2311 List<Element> siblings = OpsinTools.getChildElementsWithTagNames(currentElement, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); 2312 2313 List<Element> bracketted = new ArrayList<Element>(); 2314 if (!doneFirstIteration) {//on the first iteration, ignore elements before the starting element and favour the element directly after the starting element (conditions apply) 2315 int indexOfStartingEl = currentElement.indexOf(startingElement); 2316 Element substituentToTryFirst = null; 2317 for (Element bracketOrSubOrRoot : siblings) { 2318 int indexOfCurrentEl = currentElement.indexOf(bracketOrSubOrRoot); 2319 if (indexOfCurrentEl <= indexOfStartingEl) { 2320 continue; 2321 } 2322 if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR) != null) { 2323 continue; 2324 } 2325 2326 if (bracketOrSubOrRoot.getName().equals(BRACKET_EL)) { 2327 if (IMPLICIT_TYPE_VAL.equals(bracketOrSubOrRoot.getAttributeValue(TYPE_ATR)) && bracketOrSubOrRoot.getAttribute(LOCANT_EL) == null) { 2328 //treat implicit brackets without locants as if they are not there 2329 for (Element descendent : getChildrenIgnoringLocantlessImplicitBrackets(bracketOrSubOrRoot)) { 2330 if (descendent.getName().equals(BRACKET_EL)) { 2331 bracketted.add(descendent); 2332 } 2333 else { 2334 if (substituentToTryFirst == null && descendent.getAttribute(LOCANT_EL) == null && MATCH_NUMERIC_LOCANT.matcher(locant).matches()) { 2335 substituentToTryFirst = descendent; 2336 } 2337 else { 2338 stack.add(descendent); 2339 } 2340 } 2341 } 2342 } 2343 else { 2344 bracketted.add(bracketOrSubOrRoot); 2345 } 2346 } 2347 else { 2348 if (substituentToTryFirst == null && bracketOrSubOrRoot.getAttribute(LOCANT_EL) == null && MATCH_NUMERIC_LOCANT.matcher(locant).matches()) { 2349 substituentToTryFirst = bracketOrSubOrRoot; 2350 } 2351 else { 2352 stack.add(bracketOrSubOrRoot); 2353 } 2354 } 2355 } 2356 if (substituentToTryFirst != null) { 2357 stack.add(substituentToTryFirst); 2358 } 2359 doneFirstIteration = true; 2360 } 2361 else { 2362 for (Element bracketOrSubOrRoot : siblings) { 2363 if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR) != null) { 2364 continue; 2365 } 2366 if (bracketOrSubOrRoot.getName().equals(BRACKET_EL)) { 2367 if (IMPLICIT_TYPE_VAL.equals(bracketOrSubOrRoot.getAttributeValue(TYPE_ATR)) && bracketOrSubOrRoot.getAttribute(LOCANT_EL) == null) { 2368 //treat implicit brackets without locants as if they are not there 2369 for (Element descendent : getChildrenIgnoringLocantlessImplicitBrackets(bracketOrSubOrRoot)) { 2370 if (descendent.getName().equals(BRACKET_EL)) { 2371 bracketted.add(descendent); 2372 } 2373 else { 2374 stack.add(descendent); 2375 } 2376 } 2377 } 2378 else { 2379 bracketted.add(bracketOrSubOrRoot); 2380 } 2381 } 2382 else { 2383 stack.add(bracketOrSubOrRoot); 2384 } 2385 } 2386 } 2387 //locanting into brackets is rarely the desired answer so place at the bottom of the stack 2388 for (int i = bracketted.size() -1; i >=0; i--) { 2389 stack.addFirst(bracketted.get(i)); 2390 } 2391 } 2392 return monoNuclearHydride; 2393 } 2394 getChildrenIgnoringLocantlessImplicitBrackets(Element implicitBracket)2395 private static List<Element> getChildrenIgnoringLocantlessImplicitBrackets(Element implicitBracket) { 2396 List<Element> childrenAndImplicitBracketChildren = new ArrayList<Element>(); 2397 for (Element child : implicitBracket.getChildElements()) { 2398 if (child.getName().equals(BRACKET_EL) && IMPLICIT_TYPE_VAL.equals(child.getAttributeValue(TYPE_ATR)) && child.getAttribute(LOCANT_EL) == null) { 2399 childrenAndImplicitBracketChildren.addAll(getChildrenIgnoringLocantlessImplicitBrackets(child)); 2400 } 2401 else { 2402 childrenAndImplicitBracketChildren.add(child); 2403 } 2404 } 2405 return childrenAndImplicitBracketChildren; 2406 } 2407 findRightMostGroupInBracket(Element bracket)2408 static Element findRightMostGroupInBracket(Element bracket) { 2409 List<Element> subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(bracket, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); 2410 Element lastSubsBracketOrRoot = subsBracketsAndRoots.get(subsBracketsAndRoots.size() - 1); 2411 while (lastSubsBracketOrRoot.getName().equals(BRACKET_EL)) { 2412 subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(lastSubsBracketOrRoot, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); 2413 lastSubsBracketOrRoot = subsBracketsAndRoots.get(subsBracketsAndRoots.size() - 1); 2414 } 2415 return findRightMostGroupInSubOrRoot(lastSubsBracketOrRoot); 2416 } 2417 findRightMostGroupInSubBracketOrRoot(Element subBracketOrRoot)2418 static Element findRightMostGroupInSubBracketOrRoot(Element subBracketOrRoot) { 2419 if (subBracketOrRoot.getName().equals(BRACKET_EL)) { 2420 return findRightMostGroupInBracket(subBracketOrRoot); 2421 } 2422 else { 2423 return findRightMostGroupInSubOrRoot(subBracketOrRoot); 2424 } 2425 } 2426 findRightMostGroupInSubOrRoot(Element subOrRoot)2427 private static Element findRightMostGroupInSubOrRoot(Element subOrRoot) { 2428 for (int i = subOrRoot.getChildCount() - 1; i >= 0; i--) { 2429 Element el = subOrRoot.getChild(i); 2430 if (el.getName().equals(GROUP_EL)) { 2431 return el; 2432 } 2433 } 2434 return null; 2435 } 2436 potentiallyCanSubstitute(Element subBracketOrRoot)2437 private static boolean potentiallyCanSubstitute(Element subBracketOrRoot) { 2438 Element parent = subBracketOrRoot.getParent(); 2439 List<Element> children =parent.getChildElements(); 2440 for (int i = parent.indexOf(subBracketOrRoot) +1 ; i < children.size(); i++) { 2441 if (!children.get(i).getName().equals(HYPHEN_EL)){ 2442 return true; 2443 } 2444 } 2445 return false; 2446 } 2447 checkForBracketedPrimedLocantSpecialCase(Element subBracketOrRoot, String locantString)2448 static String checkForBracketedPrimedLocantSpecialCase(Element subBracketOrRoot, String locantString) { 2449 int terminalPrimes = StringTools.countTerminalPrimes(locantString); 2450 if (terminalPrimes > 0){ 2451 int brackettingDepth = 0; 2452 Element parent = subBracketOrRoot.getParent(); 2453 while (parent != null && parent.getName().equals(BRACKET_EL)){ 2454 if (!IMPLICIT_TYPE_VAL.equals(parent.getAttributeValue(TYPE_ATR))){ 2455 brackettingDepth++; 2456 } 2457 parent = parent.getParent(); 2458 } 2459 if (terminalPrimes == brackettingDepth){ 2460 return locantString.substring(0, locantString.length() - terminalPrimes); 2461 } 2462 } 2463 return null; 2464 } 2465 2466 /** 2467 * In cases such as methylenecyclohexane two outAtoms are combined to form a single outAtom with valency 2468 * equal to sum of the valency of the other outAtoms. 2469 * This is only allowed on substituents where all the outAtoms are on the same atom 2470 * @param frag 2471 * @param group 2472 * @throws StructureBuildingException 2473 */ checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(Fragment frag, Element group)2474 private static void checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(Fragment frag, Element group) throws StructureBuildingException { 2475 int outAtomCount = frag.getOutAtomCount(); 2476 if (outAtomCount <= 1) { 2477 return; 2478 } 2479 if (EPOXYLIKE_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ 2480 return; 2481 } 2482 String groupValue = group.getValue(); 2483 if (groupValue.equals("oxy") || groupValue.equals("thio") || groupValue.equals("seleno") || groupValue.equals("telluro")){//always bivalent 2484 return; 2485 } 2486 //special case: all outAtoms on same atom e.g. methylenecyclohexane 2487 Atom firstOutAtom = frag.getOutAtom(0).getAtom(); 2488 int valencyOfOutAtom = 0; 2489 for (int i = outAtomCount - 1; i >=0 ; i--) {//remove all outAtoms and add one with the total valency of all those that have been removed 2490 OutAtom out = frag.getOutAtom(i); 2491 if (!out.getAtom().equals(firstOutAtom)){ 2492 throw new StructureBuildingException("Substitutive bond formation failure: Fragment expected to have one OutAtom but had: "+ outAtomCount); 2493 } 2494 valencyOfOutAtom += out.getValency(); 2495 frag.removeOutAtom(i); 2496 } 2497 frag.addOutAtom(firstOutAtom, valencyOfOutAtom, true); 2498 } 2499 2500 /** 2501 * Calculates the number of substitutable hydrogen by taking into account: 2502 * Specified valency if applicable, outAtoms and the lowest valency state that will satisfy these 2503 * e.g. thio has 2 outAtoms and no bonds hence -->2 outgoing, lowest stable valency = 2 hence no substitutable hydrogen 2504 * e.g. phosphonyl has 2 outAtoms and one double bond -->4 outgoing, lowest stable valency =5 hence 1 substitutable hydrogen 2505 * @param atom 2506 * @return 2507 */ calculateSubstitutableHydrogenAtoms(Atom atom)2508 static int calculateSubstitutableHydrogenAtoms(Atom atom) { 2509 if (!atom.getImplicitHydrogenAllowed()) { 2510 return 0; 2511 } 2512 int valency = atom.determineValency(true); 2513 int currentValency = atom.getIncomingValency() + atom.getOutValency(); 2514 int substitutableHydrogen = valency - currentValency; 2515 return substitutableHydrogen >= 0 ? substitutableHydrogen : 0; 2516 } 2517 2518 /** 2519 * Stereochemistry terms are assigned right at the end so that checks can be done on whether the indicated atom is in fact chiral. 2520 * In the process of multiplication locants are primed. This function adds the appropriate number of primes to any locanted stereochemistry locants 2521 * The primesString is the string containing the primes to add to each locant 2522 * @param subOrBracket 2523 * @param primesString 2524 */ addPrimesToLocantedStereochemistryElements(Element subOrBracket, String primesString)2525 private static void addPrimesToLocantedStereochemistryElements(Element subOrBracket, String primesString) { 2526 List<Element> stereoChemistryElements =OpsinTools.getDescendantElementsWithTagName(subOrBracket, STEREOCHEMISTRY_EL); 2527 for (Element stereoChemistryElement : stereoChemistryElements) { 2528 if (stereoChemistryElement.getAttribute(LOCANT_ATR) != null){ 2529 stereoChemistryElement.getAttribute(LOCANT_ATR).setValue(stereoChemistryElement.getAttributeValue(LOCANT_ATR) + primesString); 2530 } 2531 } 2532 } 2533 2534 /** 2535 * Calculates the number of times getParent() must be called to reach a word element 2536 * Returns null if element does not have an enclosing word element. 2537 * @param element 2538 * @return 2539 */ levelsToWordEl(Element element)2540 private static Integer levelsToWordEl(Element element) { 2541 int count =0; 2542 while (!element.getName().equals(WORD_EL)){ 2543 element = element.getParent(); 2544 if (element == null){ 2545 return null; 2546 } 2547 count++; 2548 } 2549 return count; 2550 } 2551 } 2552