1 package uk.ac.cam.ch.wwmm.opsin; 2 3 import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; 4 5 import java.util.ArrayList; 6 import java.util.Collections; 7 import java.util.Comparator; 8 import java.util.Iterator; 9 import java.util.LinkedHashSet; 10 import java.util.LinkedList; 11 import java.util.List; 12 import java.util.Set; 13 import java.util.regex.Pattern; 14 15 /** 16 * Methods for performing functional replacement 17 * @author dl387 18 * 19 */ 20 class FunctionalReplacement { 21 22 /** 23 * Sorts infix transformations by the number of acceptable inputs for the transformation. 24 * e.g. thio ends up towards the end of the list as it accepts both -O or =O whilst say imido only accepts =O 25 * @author dl387 26 * 27 */ 28 private static class SortInfixTransformations implements Comparator<String> { compare(String infixTransformation1, String infixTransformation2)29 public int compare(String infixTransformation1, String infixTransformation2) { 30 int allowedInputs1 = infixTransformation1.split(",").length; 31 int allowedInputs2 = infixTransformation2.split(",").length; 32 if (allowedInputs1 < allowedInputs2){//infixTransformation1 preferred 33 return -1; 34 } 35 if (allowedInputs1 > allowedInputs2){//infixTransformation2 preferred 36 return 1; 37 } 38 else{ 39 return 0; 40 } 41 } 42 } 43 private static enum PREFIX_REPLACEMENT_TYPE{ 44 chalcogen,//ambiguous 45 halideOrPseudoHalide,//only mean functional replacement when applied to non carboxylic acids 46 dedicatedFunctionalReplacementPrefix,//no ambiguity exists 47 hydrazono,//ambiguous, only applies to non carboxylic acid 48 peroxy//ambiguous, also applies to etheric oxygen 49 } 50 51 private static final Pattern matchChalcogenReplacement= Pattern.compile("thio|seleno|telluro"); 52 53 private final BuildState state; 54 FunctionalReplacement(BuildState state)55 FunctionalReplacement(BuildState state) { 56 this.state = state; 57 } 58 59 /** 60 * Applies the effects of acid replacing functional class nomenclature 61 * This must be performed early so that prefix/infix functional replacement is performed correctly 62 * and so that element symbol locants are assigned appropriately 63 * @param finalSubOrRootInWord 64 * @param word 65 * @throws ComponentGenerationException 66 * @throws StructureBuildingException 67 */ processAcidReplacingFunctionalClassNomenclature(Element finalSubOrRootInWord, Element word)68 void processAcidReplacingFunctionalClassNomenclature(Element finalSubOrRootInWord, Element word) throws ComponentGenerationException, StructureBuildingException { 69 Element wordRule = OpsinTools.getParentWordRule(word); 70 if (WordRule.valueOf(wordRule.getAttributeValue(WORDRULE_ATR)) == WordRule.acidReplacingFunctionalGroup){ 71 Element parentWordRule = word.getParent(); 72 if (parentWordRule.indexOf(word)==0){ 73 for (int i = 1, l = parentWordRule.getChildCount(); i < l ; i++) { 74 Element acidReplacingWord = parentWordRule.getChild(i); 75 if (!acidReplacingWord.getName().equals(WORD_EL)) { 76 throw new RuntimeException("OPSIN bug: problem with acidReplacingFunctionalGroup word rule"); 77 } 78 String type = acidReplacingWord.getAttributeValue(TYPE_ATR); 79 if (type.equals(WordType.full.toString())) { 80 //case where functionalTerm is substituted 81 //as words are processed from right to left in cases like phosphoric acid tri(ethylamide) this will be phosphoric acid ethylamide ethylamide ethylamide 82 processAcidReplacingFunctionalClassNomenclatureFullWord(finalSubOrRootInWord, acidReplacingWord); 83 } 84 else if (type.equals(WordType.functionalTerm.toString())) { 85 processAcidReplacingFunctionalClassNomenclatureFunctionalWord(finalSubOrRootInWord, acidReplacingWord); 86 } 87 else { 88 throw new RuntimeException("OPSIN bug: problem with acidReplacingFunctionalGroup word rule"); 89 } 90 } 91 } 92 } 93 } 94 95 /** 96 * Performs prefix functional replacement e.g. thio in thioacetic acid replaces an O with S 97 * Prefixes will present themselves as substituents. There is potential ambiguity between usage as a substituent 98 * and as a functional replacement term in some cases. If the substituent is deemed to indicate functional replacement 99 * it will be detached and its effects applied to the subsequent group 100 * 101 * The list of groups and substituents given to this method will be mutated in the process. 102 * 103 * For heterocyclic rings functional replacement should technically be limited to : 104 * pyran, morpholine, chromene, isochromene and xanthene, chromane and isochromane. 105 * but this is not currently enforced 106 * @param groups 107 * @param substituents 108 * @return boolean: has any functional replacement occurred 109 * @throws StructureBuildingException 110 * @throws ComponentGenerationException 111 */ processPrefixFunctionalReplacementNomenclature(List<Element> groups, List<Element> substituents)112 boolean processPrefixFunctionalReplacementNomenclature(List<Element> groups, List<Element> substituents) throws StructureBuildingException, ComponentGenerationException { 113 int originalNumberOfGroups = groups.size(); 114 for (int i = originalNumberOfGroups-1; i >=0; i--) { 115 Element group =groups.get(i); 116 String groupValue = group.getValue(); 117 PREFIX_REPLACEMENT_TYPE replacementType = null; 118 if (matchChalcogenReplacement.matcher(groupValue).matches() && !isChalcogenSubstituent(group) || groupValue.equals("thiono")){ 119 replacementType =PREFIX_REPLACEMENT_TYPE.chalcogen; 120 } 121 else if (HALIDEORPSEUDOHALIDE_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ 122 replacementType =PREFIX_REPLACEMENT_TYPE.halideOrPseudoHalide; 123 } 124 else if (DEDICATEDFUNCTIONALREPLACEMENTPREFIX_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ 125 replacementType =PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix; 126 } 127 else if (groupValue.equals("hydrazono")){ 128 replacementType =PREFIX_REPLACEMENT_TYPE.hydrazono; 129 } 130 else if (groupValue.equals("peroxy")){ 131 replacementType =PREFIX_REPLACEMENT_TYPE.peroxy; 132 } 133 if (replacementType != null) { 134 //need to check whether this is an instance of functional replacement by checking the substituent/root it is applying to 135 Element substituent = group.getParent(); 136 Element nextSubOrBracket = OpsinTools.getNextSibling(substituent); 137 if (nextSubOrBracket!=null && (nextSubOrBracket.getName().equals(ROOT_EL) || nextSubOrBracket.getName().equals(SUBSTITUENT_EL))){ 138 Element groupToBeModified = nextSubOrBracket.getFirstChildElement(GROUP_EL); 139 if (groupPrecededByElementThatBlocksPrefixReplacementInterpetation(groupToBeModified)) { 140 if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){ 141 throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue); 142 } 143 continue;//not 2,2'-thiodipyran 144 } 145 Element locantEl = null;//null unless a locant that agrees with the multiplier is present 146 Element multiplierEl = null; 147 int numberOfAtomsToReplace = 1;//the number of atoms to be functionally replaced, modified by a multiplier e.g. dithio 148 Element possibleMultiplier = OpsinTools.getPreviousSibling(group); 149 if (possibleMultiplier != null) { 150 Element possibleLocant; 151 if (possibleMultiplier.getName().equals(MULTIPLIER_EL)) { 152 numberOfAtomsToReplace = Integer.valueOf(possibleMultiplier.getAttributeValue(VALUE_ATR)); 153 possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier); 154 multiplierEl = possibleMultiplier; 155 } 156 else{ 157 possibleLocant = possibleMultiplier; 158 } 159 if (possibleLocant !=null && possibleLocant.getName().equals(LOCANT_EL) && possibleLocant.getAttribute(TYPE_ATR) == null) { 160 int numberOfLocants = possibleLocant.getValue().split(",").length; 161 if (numberOfLocants == numberOfAtomsToReplace){//locants and number of replacements agree 162 locantEl = possibleLocant; 163 } 164 else if (numberOfAtomsToReplace > 1) {//doesn't look like prefix functional replacement 165 if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){ 166 throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue); 167 } 168 continue; 169 } 170 } 171 } 172 173 int oxygenReplaced; 174 if (replacementType == PREFIX_REPLACEMENT_TYPE.chalcogen) { 175 oxygenReplaced = performChalcogenFunctionalReplacement(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR)); 176 } 177 else if (replacementType == PREFIX_REPLACEMENT_TYPE.peroxy) { 178 if (nextSubOrBracket.getName().equals(SUBSTITUENT_EL)) { 179 continue; 180 } 181 oxygenReplaced = performPeroxyFunctionalReplacement(groupToBeModified, locantEl, numberOfAtomsToReplace); 182 } 183 else if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){ 184 if (!groupToBeModified.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL) 185 && !(groupToBeModified.getValue().equals("form") && groupValue.equals("imido"))){ 186 throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue); 187 } 188 oxygenReplaced = performFunctionalReplacementOnAcid(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR)); 189 if (oxygenReplaced==0){ 190 throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue); 191 } 192 } 193 else if (replacementType == PREFIX_REPLACEMENT_TYPE.hydrazono || replacementType == PREFIX_REPLACEMENT_TYPE.halideOrPseudoHalide){ 194 Fragment acidFrag = groupToBeModified.getFrag(); 195 if (!groupToBeModified.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL) || 196 acidHasSufficientHydrogenForSubstitutionInterpretation(acidFrag, group.getFrag().getOutAtom(0).getValency(), locantEl)){ 197 //hydrazono replacement only applies to non carboxylic acids e.g. hydrazonooxalic acid 198 //need to be careful to note that something like chlorophosphonic acid isn't functional replacement 199 continue; 200 } 201 oxygenReplaced = performFunctionalReplacementOnAcid(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR)); 202 } 203 else{ 204 throw new StructureBuildingException("OPSIN bug: Unexpected prefix replacement type"); 205 } 206 if (oxygenReplaced>0){ 207 state.fragManager.removeFragment(group.getFrag()); 208 substituent.removeChild(group); 209 groups.remove(group); 210 List<Element> remainingChildren =substituent.getChildElements();//there may be a locant that should be moved 211 for (int j = remainingChildren.size()-1; j>=0; j--){ 212 Element child =substituent.getChild(j); 213 child.detach(); 214 nextSubOrBracket.insertChild(child, 0); 215 } 216 substituents.remove(substituent); 217 substituent.detach(); 218 if (oxygenReplaced>1){ 219 multiplierEl.detach(); 220 } 221 } 222 } 223 else if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){ 224 throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue); 225 } 226 } 227 } 228 return groups.size() != originalNumberOfGroups; 229 } 230 isChalcogenSubstituent(Element group)231 private boolean isChalcogenSubstituent(Element group) { 232 //Is this group followed by a hyphen and directly preceded by a substituent i.e. no multiplier/locant 233 //e.g. methylthio- 234 Element next = OpsinTools.getNextSibling(group); 235 if (next != null && next.getName().equals(HYPHEN_EL) && 236 OpsinTools.getPreviousSibling(group) == null) { 237 Element previousGroup = OpsinTools.getPreviousGroup(group); 238 if (previousGroup != null) { 239 //TODO We actually want to know if a carbon atom is the attachment point... but we don't know the attachment point locations at this point 240 Element suffix = OpsinTools.getNextSibling(previousGroup, SUFFIX_EL); 241 if (suffix == null || suffix.getFrag() == null) { 242 for (Atom a : previousGroup.getFrag()) { 243 if (a.getElement() == ChemEl.C) { 244 return true; 245 } 246 } 247 } 248 } 249 } 250 return false; 251 } 252 253 /** 254 * Currently prefix replacement terms must be directly adjacent to the groupToBeModified with an exception made 255 * for carbohydrate stereochemistry prefixes e.g. 'gluco' and for substractive prefixes e.g. 'deoxy' 256 * @param groupToBeModified 257 * @return 258 */ groupPrecededByElementThatBlocksPrefixReplacementInterpetation(Element groupToBeModified)259 private boolean groupPrecededByElementThatBlocksPrefixReplacementInterpetation(Element groupToBeModified) { 260 Element previous = OpsinTools.getPreviousSibling(groupToBeModified); 261 while (previous !=null && (previous.getName().equals(SUBTRACTIVEPREFIX_EL) 262 || (previous.getName().equals(STEREOCHEMISTRY_EL) && previous.getAttributeValue(TYPE_ATR).equals(CARBOHYDRATECONFIGURATIONPREFIX_TYPE_VAL)))){ 263 previous = OpsinTools.getPreviousSibling(previous); 264 } 265 return previous != null; 266 } 267 268 269 /* 270 * 271 */ 272 273 /** 274 * Performs functional replacement using infixes e.g. thio in ethanthioic acid replaces an O with S 275 * @param suffixFragments May be modified if a multiplier is determined to mean multiplication of a suffix, usually untouched 276 * @param suffixes The suffix elements May be modified if a multiplier is determined to mean multiplication of a suffix, usually untouched 277 * @throws StructureBuildingException 278 * @throws ComponentGenerationException 279 */ processInfixFunctionalReplacementNomenclature(List<Element> suffixes, List<Fragment> suffixFragments)280 void processInfixFunctionalReplacementNomenclature(List<Element> suffixes, List<Fragment> suffixFragments) throws StructureBuildingException, ComponentGenerationException { 281 for (int i = 0; i < suffixes.size(); i++) { 282 Element suffix = suffixes.get(i); 283 if (suffix.getAttribute(INFIX_ATR) != null){ 284 Fragment fragToApplyInfixTo = suffix.getFrag(); 285 Element possibleAcidGroup = OpsinTools.getPreviousSiblingIgnoringCertainElements(suffix, new String[]{MULTIPLIER_EL, INFIX_EL, SUFFIX_EL}); 286 if (possibleAcidGroup !=null && possibleAcidGroup.getName().equals(GROUP_EL) && 287 (possibleAcidGroup.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL)|| possibleAcidGroup.getAttributeValue(TYPE_ATR).equals(CHALCOGENACIDSTEM_TYPE_VAL))){ 288 fragToApplyInfixTo = possibleAcidGroup.getFrag(); 289 } 290 if (fragToApplyInfixTo ==null){ 291 throw new ComponentGenerationException("infix has erroneously been assigned to a suffix which does not correspond to a suffix fragment. suffix: " + suffix.getValue()); 292 } 293 //e.g. =O:S,-O:S (which indicates replacing either a double or single bonded oxygen with S) 294 //This is semicolon delimited for each infix 295 List<String> infixTransformations = StringTools.arrayToList(suffix.getAttributeValue(INFIX_ATR).split(";")); 296 297 List<Atom> atomList =fragToApplyInfixTo.getAtomList(); 298 LinkedList<Atom> singleBondedOxygen = new LinkedList<Atom>(); 299 LinkedList<Atom> doubleBondedOxygen = new LinkedList<Atom>(); 300 populateTerminalSingleAndDoubleBondedOxygen(atomList, singleBondedOxygen, doubleBondedOxygen); 301 int oxygenAvailable = singleBondedOxygen.size() +doubleBondedOxygen.size(); 302 303 /* 304 * Modifies suffixes, suffixFragments, suffix and infixTransformations as appropriate 305 */ 306 disambiguateMultipliedInfixMeaning(suffixes, suffixFragments, suffix, infixTransformations, oxygenAvailable); 307 308 /* 309 * Sort infixTransformations so more specific transformations are performed first 310 * e.g. ethanthioimidic acid-->ethanimidthioic acid as imid can only apply to the double bonded oxygen 311 */ 312 Collections.sort(infixTransformations, new SortInfixTransformations()); 313 314 for (String infixTransformation : infixTransformations) { 315 String[] transformationArray = infixTransformation.split(":"); 316 if (transformationArray.length !=2){ 317 throw new StructureBuildingException("Atom to be replaced and replacement not specified correctly in infix: " + infixTransformation); 318 } 319 String[] transformations = transformationArray[0].split(","); 320 String replacementSMILES = transformationArray[1]; 321 boolean acceptDoubleBondedOxygen = false; 322 boolean acceptSingleBondedOxygen = false; 323 boolean nitrido =false; 324 for (String transformation : transformations) { 325 if (transformation.startsWith("=")){ 326 acceptDoubleBondedOxygen = true; 327 } 328 else if (transformation.startsWith("-")){ 329 acceptSingleBondedOxygen = true; 330 } 331 else if (transformation.startsWith("#")){ 332 nitrido =true; 333 } 334 else{ 335 throw new StructureBuildingException("Malformed infix transformation. Expected to start with either - or =. Transformation was: " +transformation); 336 } 337 if (transformation.length()<2 || transformation.charAt(1)!='O'){ 338 throw new StructureBuildingException("Only replacement by oxygen is supported. Check infix defintions"); 339 } 340 } 341 boolean infixAssignmentAmbiguous =false; 342 if ((acceptSingleBondedOxygen ||nitrido) && !acceptDoubleBondedOxygen){ 343 if (singleBondedOxygen.size() ==0){ 344 throw new StructureBuildingException("Cannot find single bonded oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!"); 345 } 346 if (singleBondedOxygen.size() !=1){ 347 infixAssignmentAmbiguous=true; 348 } 349 } 350 if (!acceptSingleBondedOxygen && (acceptDoubleBondedOxygen | nitrido)){ 351 if (doubleBondedOxygen.size()==0){ 352 throw new StructureBuildingException("Cannot find double bonded oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!"); 353 } 354 if (doubleBondedOxygen.size() != 1){ 355 infixAssignmentAmbiguous=true; 356 } 357 } 358 if (acceptSingleBondedOxygen && acceptDoubleBondedOxygen){ 359 if (oxygenAvailable ==0){ 360 throw new StructureBuildingException("Cannot find oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!"); 361 } 362 if (oxygenAvailable !=1){ 363 infixAssignmentAmbiguous=true; 364 } 365 } 366 367 Set<Atom> ambiguousElementAtoms = new LinkedHashSet<Atom>(); 368 Atom atomToUse = null; 369 if ((acceptDoubleBondedOxygen || nitrido) && doubleBondedOxygen.size()>0 ){ 370 atomToUse = doubleBondedOxygen.removeFirst(); 371 } 372 else if (acceptSingleBondedOxygen && singleBondedOxygen.size()>0 ){ 373 atomToUse = singleBondedOxygen.removeFirst(); 374 } 375 else{ 376 throw new StructureBuildingException("Cannot find oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");//this would be a bug 377 } 378 Fragment replacementFrag = state.fragManager.buildSMILES(replacementSMILES, SUFFIX_TYPE_VAL, NONE_LABELS_VAL); 379 if (replacementFrag.getOutAtomCount()>0){//SMILES include an indication of the bond order the replacement fragment will have, this is not intended to be an outatom 380 replacementFrag.removeOutAtom(0); 381 } 382 Atom atomThatWillReplaceOxygen =replacementFrag.getFirstAtom(); 383 if (replacementFrag.getAtomCount()==1 && atomThatWillReplaceOxygen.getElement().isChalcogen()){ 384 atomThatWillReplaceOxygen.setCharge(atomToUse.getCharge()); 385 atomThatWillReplaceOxygen.setProtonsExplicitlyAddedOrRemoved(atomToUse.getProtonsExplicitlyAddedOrRemoved()); 386 } 387 removeOrMoveObsoleteFunctionalAtoms(atomToUse, replacementFrag);//also will move charge if necessary 388 moveObsoleteOutAtoms(atomToUse, replacementFrag);//if the replaced atom was an outatom the fragments outatom list need to be corrected 389 if (nitrido){ 390 atomToUse.getFirstBond().setOrder(3); 391 Atom removedHydroxy = singleBondedOxygen.removeFirst(); 392 state.fragManager.removeAtomAndAssociatedBonds(removedHydroxy); 393 removeAssociatedFunctionalAtom(removedHydroxy); 394 } 395 state.fragManager.incorporateFragment(replacementFrag, atomToUse.getFrag()); 396 state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToUse, atomThatWillReplaceOxygen); 397 if (infixAssignmentAmbiguous){ 398 ambiguousElementAtoms.add(atomThatWillReplaceOxygen); 399 if (atomThatWillReplaceOxygen.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){ 400 ambiguousElementAtoms.addAll(atomThatWillReplaceOxygen.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)); 401 } 402 } 403 if (infixAssignmentAmbiguous){//record what atoms could have been replaced. Often this ambiguity is resolved later e.g. S-methyl ethanthioate 404 for (Atom a : doubleBondedOxygen) { 405 ambiguousElementAtoms.add(a); 406 if (a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){ 407 ambiguousElementAtoms.addAll(a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)); 408 } 409 } 410 for (Atom a : singleBondedOxygen) { 411 ambiguousElementAtoms.add(a); 412 if (a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){ 413 ambiguousElementAtoms.addAll(a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)); 414 } 415 } 416 for (Atom atom : ambiguousElementAtoms) { 417 atom.setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, ambiguousElementAtoms); 418 } 419 } 420 } 421 } 422 } 423 } 424 425 /* 426 * Functional class nomenclature 427 */ 428 429 /** 430 * Replaces the appropriate number of functional oxygen atoms with the corresponding fragment 431 * @param acidContainingRoot 432 * @param acidReplacingWord 433 * @throws ComponentGenerationException 434 * @throws StructureBuildingException 435 */ processAcidReplacingFunctionalClassNomenclatureFullWord(Element acidContainingRoot, Element acidReplacingWord)436 private void processAcidReplacingFunctionalClassNomenclatureFullWord(Element acidContainingRoot, Element acidReplacingWord) throws ComponentGenerationException, StructureBuildingException { 437 String locant = acidReplacingWord.getAttributeValue(LOCANT_ATR); 438 Element acidReplacingGroup = StructureBuildingMethods.findRightMostGroupInBracket(acidReplacingWord); 439 if (acidReplacingGroup ==null){ 440 throw new ComponentGenerationException("OPSIN bug: acid replacing group not found where one was expected for acidReplacingFunctionalGroup wordRule"); 441 } 442 String functionalGroupName = acidReplacingGroup.getValue(); 443 Fragment acidReplacingFrag = acidReplacingGroup.getFrag(); 444 if (acidReplacingGroup.getParent().getChildCount() != 1){ 445 throw new ComponentGenerationException("Unexpected qualifier to: " + functionalGroupName); 446 } 447 448 Element groupToBeModified = acidContainingRoot.getFirstChildElement(GROUP_EL); 449 List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified); 450 if (oxygenAtoms.size() == 0){ 451 oxygenAtoms = findFunctionalOxygenAtomsInGroup(groupToBeModified); 452 } 453 if (oxygenAtoms.size() == 0){ 454 List<Element> conjunctiveSuffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL); 455 for (Element conjunctiveSuffixElement : conjunctiveSuffixElements) { 456 oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(conjunctiveSuffixElement)); 457 } 458 } 459 if (oxygenAtoms.size() < 1){ 460 throw new ComponentGenerationException("Insufficient oxygen to replace with " + functionalGroupName +"s in " + acidContainingRoot.getFirstChildElement(GROUP_EL).getValue()); 461 } 462 463 boolean isAmide = functionalGroupName.equals("amide") || functionalGroupName.equals("amid"); 464 if (isAmide) { 465 if (acidReplacingFrag.getAtomCount()!=1){ 466 throw new ComponentGenerationException("OPSIN bug: " + functionalGroupName + " not found where expected"); 467 } 468 Atom amideNitrogen = acidReplacingFrag.getFirstAtom(); 469 amideNitrogen.neutraliseCharge(); 470 amideNitrogen.clearLocants(); 471 acidReplacingFrag.addMappingToAtomLocantMap("N", amideNitrogen); 472 } 473 Atom chosenOxygen = locant != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locant) : oxygenAtoms.get(0); 474 state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(chosenOxygen, acidReplacingFrag.getFirstAtom()); 475 removeAssociatedFunctionalAtom(chosenOxygen); 476 } 477 478 479 /** 480 * Replaces the appropriate number of functional oxygen atoms with the corresponding fragment 481 * @param acidContainingRoot 482 * @param functionalWord 483 * @throws ComponentGenerationException 484 * @throws StructureBuildingException 485 */ processAcidReplacingFunctionalClassNomenclatureFunctionalWord(Element acidContainingRoot, Element functionalWord)486 private void processAcidReplacingFunctionalClassNomenclatureFunctionalWord(Element acidContainingRoot, Element functionalWord) throws ComponentGenerationException, StructureBuildingException { 487 if (functionalWord !=null && functionalWord.getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())){ 488 Element functionalTerm = functionalWord.getFirstChildElement(FUNCTIONALTERM_EL); 489 if (functionalTerm ==null){ 490 throw new ComponentGenerationException("OPSIN bug: functionalTerm word not found where one was expected for acidReplacingFunctionalGroup wordRule"); 491 } 492 Element acidReplacingGroup = functionalTerm.getFirstChildElement(FUNCTIONALGROUP_EL); 493 String functionalGroupName = acidReplacingGroup.getValue(); 494 Element possibleLocantOrMultiplier = OpsinTools.getPreviousSibling(acidReplacingGroup); 495 int numberOfAcidicHydroxysToReplace = 1; 496 String[] locants = null; 497 if (possibleLocantOrMultiplier != null){ 498 if (possibleLocantOrMultiplier.getName().equals(MULTIPLIER_EL)){ 499 numberOfAcidicHydroxysToReplace = Integer.parseInt(possibleLocantOrMultiplier.getAttributeValue(VALUE_ATR)); 500 possibleLocantOrMultiplier.detach(); 501 possibleLocantOrMultiplier = OpsinTools.getPreviousSibling(acidReplacingGroup); 502 } 503 if (possibleLocantOrMultiplier != null){ 504 if (possibleLocantOrMultiplier.getName().equals(LOCANT_EL)){ 505 locants = StringTools.removeDashIfPresent(possibleLocantOrMultiplier.getValue()).split(","); 506 possibleLocantOrMultiplier.detach(); 507 } 508 else { 509 throw new ComponentGenerationException("Unexpected qualifier to acidReplacingFunctionalGroup functionalTerm"); 510 } 511 } 512 } 513 if (functionalTerm.getChildCount() != 1){ 514 throw new ComponentGenerationException("Unexpected qualifier to acidReplacingFunctionalGroup functionalTerm"); 515 } 516 517 Element groupToBeModified = acidContainingRoot.getFirstChildElement(GROUP_EL); 518 List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified); 519 if (oxygenAtoms.size()==0) { 520 oxygenAtoms = findFunctionalOxygenAtomsInGroup(groupToBeModified); 521 } 522 if (oxygenAtoms.size()==0) { 523 List<Element> conjunctiveSuffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL); 524 for (Element conjunctiveSuffixElement : conjunctiveSuffixElements) { 525 oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(conjunctiveSuffixElement)); 526 } 527 } 528 if (numberOfAcidicHydroxysToReplace > oxygenAtoms.size()){ 529 throw new ComponentGenerationException("Insufficient oxygen to replace with nitrogen in " + acidContainingRoot.getFirstChildElement(GROUP_EL).getValue()); 530 } 531 boolean isAmide = functionalGroupName.equals("amide") || functionalGroupName.equals("amid"); 532 if (isAmide) { 533 for (int i = 0; i < numberOfAcidicHydroxysToReplace; i++) { 534 Atom functionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[i]) : oxygenAtoms.get(i); 535 removeAssociatedFunctionalAtom(functionalOxygenToReplace); 536 functionalOxygenToReplace.setElement(ChemEl.N); 537 } 538 } 539 else{ 540 String groupValue = acidReplacingGroup.getAttributeValue(VALUE_ATR); 541 String labelsValue = acidReplacingGroup.getAttributeValue(LABELS_ATR); 542 Fragment acidReplacingFrag = state.fragManager.buildSMILES(groupValue, SUFFIX_TYPE_VAL, labelsValue != null ? labelsValue : NONE_LABELS_VAL); 543 Fragment acidFragment = groupToBeModified.getFrag(); 544 if (acidFragment.hasLocant("2")){//prefer numeric locants on group to those of replacing group 545 for (Atom atom : acidReplacingFrag.getAtomList()) { 546 atom.clearLocants(); 547 } 548 } 549 Atom firstFunctionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[0]) : oxygenAtoms.get(0); 550 state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(firstFunctionalOxygenToReplace, acidReplacingFrag.getFirstAtom()); 551 removeAssociatedFunctionalAtom(firstFunctionalOxygenToReplace); 552 for (int i = 1; i < numberOfAcidicHydroxysToReplace; i++) { 553 Fragment clonedHydrazide = state.fragManager.copyAndRelabelFragment(acidReplacingFrag, i); 554 Atom functionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[i]) : oxygenAtoms.get(i); 555 state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(functionalOxygenToReplace, clonedHydrazide.getFirstAtom()); 556 state.fragManager.incorporateFragment(clonedHydrazide, functionalOxygenToReplace.getFrag()); 557 removeAssociatedFunctionalAtom(functionalOxygenToReplace); 558 } 559 state.fragManager.incorporateFragment(acidReplacingFrag, firstFunctionalOxygenToReplace.getFrag()); 560 } 561 } 562 else{ 563 throw new ComponentGenerationException("amide word not found where expected, bug?"); 564 } 565 } 566 removeOxygenWithAppropriateLocant(List<Atom> oxygenAtoms, String locant)567 private Atom removeOxygenWithAppropriateLocant(List<Atom> oxygenAtoms, String locant) throws ComponentGenerationException { 568 for (Iterator<Atom> iterator = oxygenAtoms.iterator(); iterator.hasNext();) { 569 Atom atom = iterator.next(); 570 if (atom.hasLocant(locant)) { 571 iterator.remove(); 572 return atom; 573 } 574 } 575 //Look for the case whether the locant refers to the backbone 576 for (Iterator<Atom> iterator = oxygenAtoms.iterator(); iterator.hasNext();) { 577 Atom atom = iterator.next(); 578 if (OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(atom, locant) != null){ 579 iterator.remove(); 580 return atom; 581 } 582 } 583 throw new ComponentGenerationException("Failed to find acid group at locant: " + locant); 584 } 585 586 587 /* 588 * Prefix functional replacement nomenclature 589 */ 590 591 acidHasSufficientHydrogenForSubstitutionInterpretation(Fragment acidFrag, int hydrogenRequiredForSubstitutionInterpretation, Element locantEl)592 private boolean acidHasSufficientHydrogenForSubstitutionInterpretation(Fragment acidFrag, int hydrogenRequiredForSubstitutionInterpretation, Element locantEl) { 593 List<Atom> atomsThatWouldBeSubstituted = new ArrayList<Atom>(); 594 if (locantEl !=null){ 595 String[] possibleLocants = locantEl.getValue().split(","); 596 for (String locant : possibleLocants) { 597 Atom atomToBeSubstituted = acidFrag.getAtomByLocant(locant); 598 if (atomToBeSubstituted !=null){ 599 atomsThatWouldBeSubstituted.add(atomToBeSubstituted); 600 } 601 else{ 602 atomsThatWouldBeSubstituted.clear(); 603 atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtomOrFirstAtom()); 604 break; 605 } 606 } 607 } 608 else{ 609 atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtomOrFirstAtom()); 610 } 611 for (Atom atom : atomsThatWouldBeSubstituted) { 612 if (StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(atom) < hydrogenRequiredForSubstitutionInterpretation){ 613 return false;//insufficient hydrogens for substitution interpretation 614 } 615 } 616 return true; 617 } 618 619 /** 620 * Performs replacement of oxygen atoms by chalogen atoms 621 * If this is ambiguous e.g. thioacetate then Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT is populated 622 * @param groupToBeModified 623 * @param locantEl 624 * @param numberOfAtomsToReplace 625 * @param replacementSmiles 626 * @return 627 * @throws StructureBuildingException 628 */ performChalcogenFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles)629 private int performChalcogenFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException { 630 List<Atom> oxygenAtoms = findOxygenAtomsInApplicableSuffixes(groupToBeModified); 631 if (oxygenAtoms.size() == 0) { 632 oxygenAtoms = findOxygenAtomsInGroup(groupToBeModified); 633 } 634 if (locantEl != null) {//locants are used to indicate replacement on trivial groups 635 List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms); 636 if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace) { 637 numberOfAtomsToReplace = 1; 638 //e.g. -1-thioureidomethyl 639 } 640 else{ 641 locantEl.detach(); 642 oxygenAtoms = oxygenWithAppropriateLocants; 643 } 644 } 645 List<Atom> replaceableAtoms = new ArrayList<Atom>(); 646 if (replacementSmiles.startsWith("=")) { 647 //e.g. thiono 648 replacementSmiles = replacementSmiles.substring(1); 649 for (Atom oxygen : oxygenAtoms) { 650 int incomingValency = oxygen.getIncomingValency(); 651 int bondCount = oxygen.getBondCount(); 652 if (bondCount == 1 && incomingValency == 2) { 653 replaceableAtoms.add(oxygen); 654 } 655 } 656 } 657 else { 658 List<Atom> doubleBondedOxygen = new ArrayList<Atom>(); 659 List<Atom> singleBondedOxygen = new ArrayList<Atom>(); 660 List<Atom> ethericOxygen = new ArrayList<Atom>(); 661 for (Atom oxygen : oxygenAtoms) { 662 int incomingValency = oxygen.getIncomingValency(); 663 int bondCount = oxygen.getBondCount(); 664 if (bondCount == 1 && incomingValency ==2 ) { 665 doubleBondedOxygen.add(oxygen); 666 } 667 else if (bondCount == 1 && incomingValency == 1) { 668 singleBondedOxygen.add(oxygen); 669 } 670 else if (bondCount == 2 && incomingValency == 2) { 671 ethericOxygen.add(oxygen); 672 } 673 } 674 replaceableAtoms.addAll(doubleBondedOxygen); 675 replaceableAtoms.addAll(singleBondedOxygen); 676 replaceableAtoms.addAll(ethericOxygen); 677 } 678 679 int totalOxygen = replaceableAtoms.size(); 680 if (numberOfAtomsToReplace >1){ 681 if (totalOxygen < numberOfAtomsToReplace){ 682 numberOfAtomsToReplace=1; 683 } 684 } 685 686 int atomsReplaced =0; 687 if (totalOxygen >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements 688 boolean prefixAssignmentAmbiguous =false; 689 Set<Atom> ambiguousElementAtoms = new LinkedHashSet<Atom>(); 690 if (totalOxygen != numberOfAtomsToReplace){ 691 prefixAssignmentAmbiguous=true; 692 } 693 694 for (Atom atomToReplace : replaceableAtoms) { 695 if (atomsReplaced == numberOfAtomsToReplace){ 696 ambiguousElementAtoms.add(atomToReplace); 697 continue; 698 } 699 else{ 700 state.fragManager.replaceAtomWithSmiles(atomToReplace, replacementSmiles); 701 if (prefixAssignmentAmbiguous){ 702 ambiguousElementAtoms.add(atomToReplace); 703 } 704 } 705 atomsReplaced++; 706 } 707 708 if (prefixAssignmentAmbiguous){//record what atoms could have been replaced. Often this ambiguity is resolved later e.g. S-methyl thioacetate 709 for (Atom atom : ambiguousElementAtoms) { 710 atom.setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, ambiguousElementAtoms); 711 } 712 } 713 } 714 return atomsReplaced; 715 } 716 717 718 /** 719 * Converts functional oxygen to peroxy e.g. peroxybenzoic acid 720 * Returns the number of oxygen replaced 721 * @param groupToBeModified 722 * @param locantEl 723 * @param numberOfAtomsToReplace 724 * @return 725 * @throws StructureBuildingException 726 */ performPeroxyFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace)727 private int performPeroxyFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace) throws StructureBuildingException { 728 List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified); 729 if (oxygenAtoms.size()==0){ 730 oxygenAtoms = findEthericOxygenAtomsInGroup(groupToBeModified); 731 oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(groupToBeModified)); 732 } 733 if (locantEl !=null){ 734 List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms); 735 if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace){ 736 numberOfAtomsToReplace =1; 737 } 738 else{ 739 locantEl.detach(); 740 oxygenAtoms = oxygenWithAppropriateLocants; 741 } 742 } 743 if (numberOfAtomsToReplace >1 && oxygenAtoms.size() < numberOfAtomsToReplace){ 744 numberOfAtomsToReplace=1; 745 } 746 int atomsReplaced = 0; 747 if (oxygenAtoms.size() >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements 748 atomsReplaced = numberOfAtomsToReplace; 749 for (int j = 0; j < numberOfAtomsToReplace; j++) { 750 Atom oxygenToReplace = oxygenAtoms.get(j); 751 if (oxygenToReplace.getBondCount()==2){//etheric oxygen 752 Fragment newOxygen = state.fragManager.buildSMILES("O", SUFFIX_TYPE_VAL, NONE_LABELS_VAL); 753 Bond bondToRemove = oxygenToReplace.getFirstBond(); 754 Atom atomToAttachTo = bondToRemove.getFromAtom() == oxygenToReplace ? bondToRemove.getToAtom() : bondToRemove.getFromAtom(); 755 state.fragManager.createBond(atomToAttachTo, newOxygen.getFirstAtom(), 1); 756 state.fragManager.createBond(newOxygen.getFirstAtom(), oxygenToReplace, 1); 757 state.fragManager.removeBond(bondToRemove); 758 state.fragManager.incorporateFragment(newOxygen, groupToBeModified.getFrag()); 759 } 760 else{ 761 Fragment replacementFrag = state.fragManager.buildSMILES("OO", SUFFIX_TYPE_VAL, NONE_LABELS_VAL); 762 removeOrMoveObsoleteFunctionalAtoms(oxygenToReplace, replacementFrag); 763 state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(oxygenToReplace, replacementFrag.getFirstAtom()); 764 state.fragManager.incorporateFragment(replacementFrag, groupToBeModified.getFrag()); 765 } 766 } 767 } 768 return atomsReplaced; 769 } 770 771 /** 772 * Replaces double bonded oxygen and/or single bonded oxygen depending on the input SMILES 773 * SMILES with a valency 1 outAtom replace -O, SMILES with a valency 2 outAtom replace =O 774 * SMILES with a valency 3 outAtom replace -O and =O (nitrido) 775 * Returns the number of oxygen replaced 776 * @param groupToBeModified 777 * @param locantEl 778 * @param numberOfAtomsToReplace 779 * @param replacementSmiles 780 * @return 781 * @throws StructureBuildingException 782 */ performFunctionalReplacementOnAcid(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles)783 private int performFunctionalReplacementOnAcid(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException { 784 int outValency; 785 if (replacementSmiles.startsWith("-")){ 786 outValency =1; 787 } 788 else if (replacementSmiles.startsWith("=")){ 789 outValency =2; 790 } 791 else if (replacementSmiles.startsWith("#")){ 792 outValency =3; 793 } 794 else{ 795 throw new StructureBuildingException("OPSIN bug: Unexpected valency on fragment for prefix functional replacement"); 796 } 797 replacementSmiles = replacementSmiles.substring(1); 798 List<Atom> oxygenAtoms = findOxygenAtomsInApplicableSuffixes(groupToBeModified); 799 if (oxygenAtoms.size()==0){ 800 oxygenAtoms = findOxygenAtomsInGroup(groupToBeModified); 801 } 802 if (locantEl !=null){//locants are used to indicate replacement on trivial groups 803 List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms); 804 List<Atom> singleBondedOxygen = new ArrayList<Atom>(); 805 List<Atom> terminalDoubleBondedOxygen = new ArrayList<Atom>(); 806 populateTerminalSingleAndDoubleBondedOxygen(oxygenWithAppropriateLocants, singleBondedOxygen, terminalDoubleBondedOxygen); 807 if (outValency ==1){ 808 oxygenWithAppropriateLocants.removeAll(terminalDoubleBondedOxygen); 809 } 810 else if (outValency ==2){ 811 oxygenWithAppropriateLocants.removeAll(singleBondedOxygen); 812 } 813 if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace){ 814 numberOfAtomsToReplace =1; 815 //e.g. -1-thioureidomethyl 816 } 817 else{ 818 locantEl.detach(); 819 oxygenAtoms = oxygenWithAppropriateLocants; 820 } 821 } 822 List<Atom> singleBondedOxygen = new ArrayList<Atom>(); 823 List<Atom> terminalDoubleBondedOxygen = new ArrayList<Atom>(); 824 populateTerminalSingleAndDoubleBondedOxygen(oxygenAtoms, singleBondedOxygen, terminalDoubleBondedOxygen); 825 if (outValency ==1){ 826 oxygenAtoms.removeAll(terminalDoubleBondedOxygen); 827 } 828 else if (outValency ==2){ 829 oxygenAtoms.removeAll(singleBondedOxygen); 830 //favour bridging oxygen over double bonded oxygen c.f. imidodicarbonate 831 oxygenAtoms.removeAll(terminalDoubleBondedOxygen); 832 oxygenAtoms.addAll(terminalDoubleBondedOxygen); 833 } 834 else { 835 if (singleBondedOxygen.size()==0 || terminalDoubleBondedOxygen.size()==0){ 836 throw new StructureBuildingException("Both a -OH and =O are required for nitrido prefix functional replacement"); 837 } 838 oxygenAtoms.removeAll(singleBondedOxygen); 839 } 840 if (numberOfAtomsToReplace >1 && oxygenAtoms.size() < numberOfAtomsToReplace){ 841 numberOfAtomsToReplace=1; 842 } 843 844 int atomsReplaced =0; 845 if (oxygenAtoms.size() >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements 846 for (Atom atomToReplace : oxygenAtoms) { 847 if (atomsReplaced == numberOfAtomsToReplace){ 848 continue; 849 } 850 else{ 851 Fragment replacementFrag = state.fragManager.buildSMILES(replacementSmiles, atomToReplace.getFrag().getTokenEl(), NONE_LABELS_VAL); 852 if (outValency ==3){//special case for nitrido 853 atomToReplace.getFirstBond().setOrder(3); 854 Atom removedHydroxy = singleBondedOxygen.remove(0); 855 state.fragManager.removeAtomAndAssociatedBonds(removedHydroxy); 856 removeAssociatedFunctionalAtom(removedHydroxy); 857 } 858 state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToReplace, replacementFrag.getFirstAtom()); 859 if (outValency ==1){ 860 removeOrMoveObsoleteFunctionalAtoms(atomToReplace, replacementFrag); 861 } 862 moveObsoleteOutAtoms(atomToReplace, replacementFrag); 863 state.fragManager.incorporateFragment(replacementFrag, atomToReplace.getFrag()); 864 } 865 atomsReplaced++; 866 } 867 } 868 return atomsReplaced; 869 } 870 871 /* 872 * Infix functional replacement nomenclature 873 */ 874 875 /** 876 * This block handles infix multiplication. Unless brackets are provided this is ambiguous without knowledge of the suffix that is being modified 877 * For example butandithione could be intepreted as butandi(thione) or butan(dithi)one. 878 * Obviously the latter is wrong in this case but it is the correct interpretation for butandithiate 879 * @param suffixes 880 * @param suffixFragments 881 * @param suffix 882 * @param infixTransformations 883 * @param oxygenAvailable 884 * @throws ComponentGenerationException 885 * @throws StructureBuildingException 886 */ disambiguateMultipliedInfixMeaning(List<Element> suffixes, List<Fragment> suffixFragments,Element suffix, List<String> infixTransformations, int oxygenAvailable)887 private void disambiguateMultipliedInfixMeaning(List<Element> suffixes, 888 List<Fragment> suffixFragments,Element suffix, List<String> infixTransformations, int oxygenAvailable) 889 throws ComponentGenerationException, StructureBuildingException { 890 Element possibleInfix = OpsinTools.getPreviousSibling(suffix); 891 if (possibleInfix.getName().equals(INFIX_EL)){//the infix is only left when there was ambiguity 892 Element possibleMultiplier = OpsinTools.getPreviousSibling(possibleInfix); 893 if (possibleMultiplier.getName().equals(MULTIPLIER_EL)){ 894 int multiplierValue =Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)); 895 if (infixTransformations.size() + multiplierValue-1 <=oxygenAvailable){//multiplier means multiply the infix e.g. butandithiate 896 for (int j = 1; j < multiplierValue; j++) { 897 infixTransformations.add(0, infixTransformations.get(0)); 898 } 899 } 900 else{ 901 Element possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier); 902 String[] locants = null; 903 if (possibleLocant.getName().equals(LOCANT_EL)) { 904 locants = possibleLocant.getValue().split(","); 905 } 906 if (locants !=null){ 907 if (locants.length!=multiplierValue){ 908 throw new ComponentGenerationException("Multiplier/locant disagreement when multiplying infixed suffix"); 909 } 910 suffix.addAttribute(new Attribute(LOCANT_ATR, locants[0])); 911 } 912 suffix.addAttribute(new Attribute(MULTIPLIED_ATR, "multiplied")); 913 for (int j = 1; j < multiplierValue; j++) {//multiplier means multiply the infixed suffix e.g. butandithione 914 Element newSuffix = suffix.copy(); 915 Fragment newSuffixFrag = state.fragManager.copyFragment(suffix.getFrag()); 916 newSuffix.setFrag(newSuffixFrag); 917 suffixFragments.add(newSuffixFrag); 918 OpsinTools.insertAfter(suffix, newSuffix); 919 suffixes.add(newSuffix); 920 if (locants !=null){//assign locants if available 921 newSuffix.getAttribute(LOCANT_ATR).setValue(locants[j]); 922 } 923 } 924 if (locants!=null){ 925 possibleLocant.detach(); 926 } 927 } 928 possibleMultiplier.detach(); 929 possibleInfix.detach(); 930 } 931 else{ 932 throw new ComponentGenerationException("Multiplier expected in front of ambiguous infix"); 933 } 934 } 935 } 936 937 /* 938 * Convenience Methods 939 */ 940 941 /** 942 * Given an atom that is to be replaced by a functional replacement fragment 943 * determines whether this atom is a functional atom and, if it is, performs the following processes: 944 * The functionalAtom is removed. If the the replacement fragment is an atom of O/S/Se/Te or the 945 * the terminal atom of the fragment is a single bonded O/S/Se/Te a functionAom is added to this atom. 946 * @param atomToBeReplaced 947 * @param replacementFrag 948 */ removeOrMoveObsoleteFunctionalAtoms(Atom atomToBeReplaced, Fragment replacementFrag)949 private void removeOrMoveObsoleteFunctionalAtoms(Atom atomToBeReplaced, Fragment replacementFrag){ 950 List<Atom> replacementAtomList = replacementFrag.getAtomList(); 951 Fragment origFrag = atomToBeReplaced.getFrag(); 952 for (int i = origFrag.getFunctionalAtomCount() - 1; i >=0; i--) { 953 FunctionalAtom functionalAtom = origFrag.getFunctionalAtom(i); 954 if (atomToBeReplaced.equals(functionalAtom.getAtom())){ 955 atomToBeReplaced.getFrag().removeFunctionalAtom(i); 956 Atom terminalAtomOfReplacementFrag = replacementAtomList.get(replacementAtomList.size()-1); 957 if ((terminalAtomOfReplacementFrag.getIncomingValency() ==1 || replacementAtomList.size()==1)&& terminalAtomOfReplacementFrag.getElement().isChalcogen()){ 958 replacementFrag.addFunctionalAtom(terminalAtomOfReplacementFrag); 959 terminalAtomOfReplacementFrag.setCharge(atomToBeReplaced.getCharge()); 960 terminalAtomOfReplacementFrag.setProtonsExplicitlyAddedOrRemoved(atomToBeReplaced.getProtonsExplicitlyAddedOrRemoved()); 961 } 962 atomToBeReplaced.neutraliseCharge(); 963 } 964 } 965 } 966 967 /** 968 * Given an atom that is to be replaced by a functional replacement fragment 969 * determines whether this atom has outvalency and if it does removes the outatom from the atom's fragment 970 * and adds an outatom to the replacementFrag 971 * @param atomToBeReplaced 972 * @param replacementFrag 973 */ moveObsoleteOutAtoms(Atom atomToBeReplaced, Fragment replacementFrag)974 private void moveObsoleteOutAtoms(Atom atomToBeReplaced, Fragment replacementFrag){ 975 if (atomToBeReplaced.getOutValency() >0){//this is not known to occur in well formed IUPAC names but would occur in thioxy (as a suffix) 976 List<Atom> replacementAtomList = replacementFrag.getAtomList(); 977 Fragment origFrag = atomToBeReplaced.getFrag(); 978 for (int i = origFrag.getOutAtomCount() - 1; i >=0; i--) { 979 OutAtom outAtom = origFrag.getOutAtom(i); 980 if (atomToBeReplaced.equals(outAtom.getAtom())){ 981 atomToBeReplaced.getFrag().removeOutAtom(i); 982 Atom terminalAtomOfReplacementFrag = replacementAtomList.get(replacementAtomList.size()-1); 983 replacementFrag.addOutAtom(terminalAtomOfReplacementFrag, outAtom.getValency(), outAtom.isSetExplicitly()); 984 } 985 } 986 } 987 } 988 removeAssociatedFunctionalAtom(Atom atomWithFunctionalAtom)989 private void removeAssociatedFunctionalAtom(Atom atomWithFunctionalAtom) throws StructureBuildingException { 990 Fragment frag = atomWithFunctionalAtom.getFrag(); 991 for (int i = frag.getFunctionalAtomCount() - 1; i >=0; i--) { 992 FunctionalAtom functionalAtom = frag.getFunctionalAtom(i); 993 if (atomWithFunctionalAtom.equals(functionalAtom.getAtom())){ 994 atomWithFunctionalAtom.getFrag().removeFunctionalAtom(i); 995 return; 996 } 997 } 998 throw new StructureBuildingException("OPSIN bug: Unable to find associated functionalAtom"); 999 } 1000 1001 1002 /** 1003 * Returns the subset of oxygenAtoms that possess one of the locants in locantEl 1004 * Searches for locant on nearest non suffix atom in case of suffixes 1005 * @param locantEl 1006 * @param oxygenAtoms 1007 * @return 1008 */ pickOxygensWithAppropriateLocants(Element locantEl, List<Atom> oxygenAtoms)1009 private List<Atom> pickOxygensWithAppropriateLocants(Element locantEl, List<Atom> oxygenAtoms) { 1010 String[] possibleLocants = locantEl.getValue().split(","); 1011 boolean pLocantSpecialCase = allLocantsP(possibleLocants); 1012 List<Atom> oxygenWithAppropriateLocants = new ArrayList<Atom>(); 1013 for (Atom atom : oxygenAtoms) { 1014 List<String> atomlocants = atom.getLocants(); 1015 if (atomlocants.size() > 0) { 1016 for (String locantVal : possibleLocants) { 1017 if (atomlocants.contains(locantVal)) { 1018 oxygenWithAppropriateLocants.add(atom); 1019 break; 1020 } 1021 } 1022 } 1023 else if (pLocantSpecialCase) { 1024 for (Atom neighbour : atom.getAtomNeighbours()) { 1025 if (neighbour.getElement() == ChemEl.P) { 1026 oxygenWithAppropriateLocants.add(atom); 1027 break; 1028 } 1029 } 1030 } 1031 else { 1032 Atom atomWithNumericLocant = OpsinTools.depthFirstSearchForAtomWithNumericLocant(atom); 1033 if (atomWithNumericLocant != null) { 1034 List<String> atomWithNumericLocantLocants = atomWithNumericLocant.getLocants(); 1035 for (String locantVal : possibleLocants) { 1036 if (atomWithNumericLocantLocants.contains(locantVal)) { 1037 oxygenWithAppropriateLocants.add(atom); 1038 break; 1039 } 1040 } 1041 } 1042 } 1043 } 1044 return oxygenWithAppropriateLocants; 1045 } 1046 allLocantsP(String[] locants)1047 private boolean allLocantsP(String[] locants) { 1048 if (locants.length == 0) { 1049 return false; 1050 } 1051 for (String locant : locants) { 1052 if (!locant.equals("P")) { 1053 return false; 1054 } 1055 } 1056 return true; 1057 } 1058 1059 /** 1060 * Returns oxygen atoms in suffixes with functionalAtoms 1061 * @param groupToBeModified 1062 * @return 1063 */ findFunctionalOxygenAtomsInApplicableSuffixes(Element groupToBeModified)1064 private List<Atom> findFunctionalOxygenAtomsInApplicableSuffixes(Element groupToBeModified) { 1065 List<Element> suffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL); 1066 List<Atom> oxygenAtoms = new ArrayList<Atom>(); 1067 for (Element suffix : suffixElements) { 1068 Fragment suffixFrag = suffix.getFrag(); 1069 if (suffixFrag != null) {//null for non carboxylic acids 1070 for (int i = 0, l = suffixFrag.getFunctionalAtomCount(); i < l; i++) { 1071 Atom a = suffixFrag.getFunctionalAtom(i).getAtom(); 1072 if (a.getElement() == ChemEl.O) { 1073 oxygenAtoms.add(a); 1074 } 1075 } 1076 } 1077 } 1078 return oxygenAtoms; 1079 } 1080 1081 /** 1082 * Returns functional oxygen atoms in groupToBeModified 1083 * @param groupToBeModified 1084 * @return 1085 */ findFunctionalOxygenAtomsInGroup(Element groupToBeModified)1086 private List<Atom> findFunctionalOxygenAtomsInGroup(Element groupToBeModified) { 1087 List<Atom> oxygenAtoms = new ArrayList<Atom>(); 1088 Fragment frag = groupToBeModified.getFrag(); 1089 for (int i = 0, l = frag.getFunctionalAtomCount(); i < l; i++) { 1090 Atom a = frag.getFunctionalAtom(i).getAtom(); 1091 if (a.getElement() == ChemEl.O){ 1092 oxygenAtoms.add(a); 1093 } 1094 } 1095 return oxygenAtoms; 1096 } 1097 1098 1099 /** 1100 * Returns etheric oxygen atoms in groupToBeModified 1101 * @param groupToBeModified 1102 * @return 1103 */ findEthericOxygenAtomsInGroup(Element groupToBeModified)1104 private List<Atom> findEthericOxygenAtomsInGroup(Element groupToBeModified) { 1105 List<Atom> oxygenAtoms = new ArrayList<Atom>(); 1106 List<Atom> atomList = groupToBeModified.getFrag().getAtomList(); 1107 for (Atom a: atomList) { 1108 if (a.getElement() == ChemEl.O && a.getBondCount()==2 && a.getCharge()==0 && a.getIncomingValency()==2){ 1109 oxygenAtoms.add(a); 1110 } 1111 } 1112 return oxygenAtoms; 1113 } 1114 1115 1116 /** 1117 * Returns oxygen atoms in suffixes with functionalAtoms or acidStem suffixes or aldehyde suffixes (1979 C-531) 1118 * @param groupToBeModified 1119 * @return 1120 */ findOxygenAtomsInApplicableSuffixes(Element groupToBeModified)1121 private List<Atom> findOxygenAtomsInApplicableSuffixes(Element groupToBeModified) { 1122 List<Element> suffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL); 1123 List<Atom> oxygenAtoms = new ArrayList<Atom>(); 1124 for (Element suffix : suffixElements) { 1125 Fragment suffixFrag = suffix.getFrag(); 1126 if (suffixFrag != null) {//null for non carboxylic acids 1127 if (suffixFrag.getFunctionalAtomCount() > 0 || groupToBeModified.getAttributeValue(TYPE_ATR).equals(ACIDSTEM_TYPE_VAL) || suffix.getAttributeValue(VALUE_ATR).equals("aldehyde")) { 1128 List<Atom> atomList = suffixFrag.getAtomList(); 1129 for (Atom a : atomList) { 1130 if (a.getElement() == ChemEl.O) { 1131 oxygenAtoms.add(a); 1132 } 1133 } 1134 } 1135 } 1136 } 1137 return oxygenAtoms; 1138 } 1139 1140 /** 1141 * Returns oxygen atoms in groupToBeModified 1142 * @param groupToBeModified 1143 * @return 1144 */ findOxygenAtomsInGroup(Element groupToBeModified)1145 private List<Atom> findOxygenAtomsInGroup(Element groupToBeModified) { 1146 List<Atom> oxygenAtoms = new ArrayList<Atom>(); 1147 List<Atom> atomList = groupToBeModified.getFrag().getAtomList(); 1148 for (Atom a : atomList) { 1149 if (a.getElement() == ChemEl.O){ 1150 oxygenAtoms.add(a); 1151 } 1152 } 1153 return oxygenAtoms; 1154 } 1155 1156 populateTerminalSingleAndDoubleBondedOxygen(List<Atom> atomList, List<Atom> singleBondedOxygen, List<Atom> doubleBondedOxygen)1157 private void populateTerminalSingleAndDoubleBondedOxygen(List<Atom> atomList, List<Atom> singleBondedOxygen, List<Atom> doubleBondedOxygen) throws StructureBuildingException { 1158 for (Atom a : atomList) { 1159 if (a.getElement() == ChemEl.O){//find terminal oxygens 1160 if (a.getBondCount()==1){ 1161 int incomingValency = a.getIncomingValency(); 1162 if (incomingValency ==2){ 1163 doubleBondedOxygen.add(a); 1164 } 1165 else if (incomingValency ==1){ 1166 singleBondedOxygen.add(a); 1167 } 1168 else{ 1169 throw new StructureBuildingException("Unexpected bond order to oxygen; excepted 1 or 2 found: " +incomingValency); 1170 } 1171 1172 } 1173 } 1174 } 1175 } 1176 } 1177