1 package uk.ac.cam.ch.wwmm.opsin;
2 
3 import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
4 
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
8 import java.util.Iterator;
9 import java.util.LinkedHashSet;
10 import java.util.LinkedList;
11 import java.util.List;
12 import java.util.Set;
13 import java.util.regex.Pattern;
14 
15 /**
16  * Methods for performing functional replacement
17  * @author dl387
18  *
19  */
20 class FunctionalReplacement {
21 
22 	/**
23 	 * Sorts infix transformations by the number of acceptable inputs for the transformation.
24 	 * e.g. thio ends up towards the end of the list as it accepts both -O or =O whilst say imido only accepts =O
25 	 * @author dl387
26 	 *
27 	 */
28 	private static class SortInfixTransformations implements Comparator<String> {
compare(String infixTransformation1, String infixTransformation2)29 		public int compare(String infixTransformation1, String infixTransformation2) {
30 			int allowedInputs1 = infixTransformation1.split(",").length;
31 			int allowedInputs2 = infixTransformation2.split(",").length;
32 			if (allowedInputs1 < allowedInputs2){//infixTransformation1 preferred
33 				return -1;
34 			}
35 			if (allowedInputs1 > allowedInputs2){//infixTransformation2 preferred
36 				return 1;
37 			}
38 			else{
39 				return 0;
40 			}
41 		}
42 	}
43 	private static enum PREFIX_REPLACEMENT_TYPE{
44 		chalcogen,//ambiguous
45 		halideOrPseudoHalide,//only mean functional replacement when applied to non carboxylic acids
46 		dedicatedFunctionalReplacementPrefix,//no ambiguity exists
47 		hydrazono,//ambiguous, only applies to non carboxylic acid
48 		peroxy//ambiguous, also applies to etheric oxygen
49 	}
50 
51 	private static final Pattern matchChalcogenReplacement= Pattern.compile("thio|seleno|telluro");
52 
53 	private final BuildState state;
54 
FunctionalReplacement(BuildState state)55 	FunctionalReplacement(BuildState state) {
56 		this.state = state;
57 	}
58 
59 	/**
60 	 * Applies the effects of acid replacing functional class nomenclature
61 	 * This must be performed early so that prefix/infix functional replacement is performed correctly
62 	 * and so that element symbol locants are assigned appropriately
63 	 * @param finalSubOrRootInWord
64 	 * @param word
65 	 * @throws ComponentGenerationException
66 	 * @throws StructureBuildingException
67 	 */
processAcidReplacingFunctionalClassNomenclature(Element finalSubOrRootInWord, Element word)68 	void processAcidReplacingFunctionalClassNomenclature(Element finalSubOrRootInWord, Element word) throws ComponentGenerationException, StructureBuildingException {
69 		Element wordRule = OpsinTools.getParentWordRule(word);
70 		if (WordRule.valueOf(wordRule.getAttributeValue(WORDRULE_ATR)) == WordRule.acidReplacingFunctionalGroup){
71 			Element parentWordRule = word.getParent();
72 			if (parentWordRule.indexOf(word)==0){
73 				for (int i = 1, l = parentWordRule.getChildCount(); i < l ; i++) {
74 					Element acidReplacingWord = parentWordRule.getChild(i);
75 					if (!acidReplacingWord.getName().equals(WORD_EL)) {
76 						throw new RuntimeException("OPSIN bug: problem with acidReplacingFunctionalGroup word rule");
77 					}
78 					String type = acidReplacingWord.getAttributeValue(TYPE_ATR);
79 					if (type.equals(WordType.full.toString())) {
80 						//case where functionalTerm is substituted
81 						//as words are processed from right to left in cases like phosphoric acid tri(ethylamide) this will be phosphoric acid ethylamide ethylamide ethylamide
82 						processAcidReplacingFunctionalClassNomenclatureFullWord(finalSubOrRootInWord, acidReplacingWord);
83 					}
84 					else if (type.equals(WordType.functionalTerm.toString())) {
85 						processAcidReplacingFunctionalClassNomenclatureFunctionalWord(finalSubOrRootInWord, acidReplacingWord);
86 					}
87 					else {
88 						throw new RuntimeException("OPSIN bug: problem with acidReplacingFunctionalGroup word rule");
89 					}
90 				}
91 			}
92 		}
93 	}
94 
95 	/**
96 	 * Performs prefix functional replacement e.g. thio in thioacetic acid replaces an O with S
97 	 * Prefixes will present themselves as substituents. There is potential ambiguity between usage as a substituent
98 	 * and as a functional replacement term in some cases. If the substituent is deemed to indicate functional replacement
99 	 * it will be detached and its effects applied to the subsequent group
100 	 *
101 	 * The list of groups and substituents given to this method will be mutated in the process.
102 	 *
103 	 * For heterocyclic rings functional replacement should technically be limited to :
104 	 * pyran, morpholine, chromene, isochromene and xanthene, chromane and isochromane.
105 	 * but this is not currently enforced
106 	 * @param groups
107 	 * @param substituents
108 	 * @return boolean: has any functional replacement occurred
109 	 * @throws StructureBuildingException
110 	 * @throws ComponentGenerationException
111 	 */
processPrefixFunctionalReplacementNomenclature(List<Element> groups, List<Element> substituents)112 	boolean processPrefixFunctionalReplacementNomenclature(List<Element> groups, List<Element> substituents) throws StructureBuildingException, ComponentGenerationException {
113 		int originalNumberOfGroups = groups.size();
114 		for (int i = originalNumberOfGroups-1; i >=0; i--) {
115 			Element group =groups.get(i);
116 			String groupValue = group.getValue();
117 			PREFIX_REPLACEMENT_TYPE replacementType = null;
118 			if (matchChalcogenReplacement.matcher(groupValue).matches() && !isChalcogenSubstituent(group) || groupValue.equals("thiono")){
119 				replacementType =PREFIX_REPLACEMENT_TYPE.chalcogen;
120 			}
121 			else if (HALIDEORPSEUDOHALIDE_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){
122 				replacementType =PREFIX_REPLACEMENT_TYPE.halideOrPseudoHalide;
123 			}
124 			else if (DEDICATEDFUNCTIONALREPLACEMENTPREFIX_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){
125 				replacementType =PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix;
126 			}
127 			else if (groupValue.equals("hydrazono")){
128 				replacementType =PREFIX_REPLACEMENT_TYPE.hydrazono;
129 			}
130 			else if (groupValue.equals("peroxy")){
131 				replacementType =PREFIX_REPLACEMENT_TYPE.peroxy;
132 			}
133 			if (replacementType != null) {
134 				//need to check whether this is an instance of functional replacement by checking the substituent/root it is applying to
135 				Element substituent = group.getParent();
136 				Element nextSubOrBracket = OpsinTools.getNextSibling(substituent);
137 				if (nextSubOrBracket!=null && (nextSubOrBracket.getName().equals(ROOT_EL) || nextSubOrBracket.getName().equals(SUBSTITUENT_EL))){
138 					Element groupToBeModified = nextSubOrBracket.getFirstChildElement(GROUP_EL);
139 					if (groupPrecededByElementThatBlocksPrefixReplacementInterpetation(groupToBeModified)) {
140 						if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
141 							throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
142 						}
143 						continue;//not 2,2'-thiodipyran
144 					}
145 					Element locantEl = null;//null unless a locant that agrees with the multiplier is present
146 					Element multiplierEl = null;
147 					int numberOfAtomsToReplace = 1;//the number of atoms to be functionally replaced, modified by a multiplier e.g. dithio
148 					Element possibleMultiplier = OpsinTools.getPreviousSibling(group);
149 					if (possibleMultiplier != null) {
150 						Element possibleLocant;
151 						if (possibleMultiplier.getName().equals(MULTIPLIER_EL)) {
152 							numberOfAtomsToReplace = Integer.valueOf(possibleMultiplier.getAttributeValue(VALUE_ATR));
153 							possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier);
154 							multiplierEl = possibleMultiplier;
155 						}
156 						else{
157 							possibleLocant = possibleMultiplier;
158 						}
159 						if (possibleLocant !=null && possibleLocant.getName().equals(LOCANT_EL) && possibleLocant.getAttribute(TYPE_ATR) == null) {
160 							int numberOfLocants = possibleLocant.getValue().split(",").length;
161 							if (numberOfLocants == numberOfAtomsToReplace){//locants and number of replacements agree
162 								locantEl = possibleLocant;
163 							}
164 							else if (numberOfAtomsToReplace > 1) {//doesn't look like prefix functional replacement
165 								if (replacementType  == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
166 									throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
167 								}
168 								continue;
169 							}
170 						}
171 					}
172 
173 					int oxygenReplaced;
174 					if (replacementType == PREFIX_REPLACEMENT_TYPE.chalcogen) {
175 						oxygenReplaced = performChalcogenFunctionalReplacement(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR));
176 					}
177 					else if (replacementType == PREFIX_REPLACEMENT_TYPE.peroxy) {
178 						if (nextSubOrBracket.getName().equals(SUBSTITUENT_EL)) {
179 							continue;
180 						}
181 						oxygenReplaced = performPeroxyFunctionalReplacement(groupToBeModified, locantEl, numberOfAtomsToReplace);
182 					}
183 					else if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
184 						if (!groupToBeModified.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL)
185 								&& !(groupToBeModified.getValue().equals("form") && groupValue.equals("imido"))){
186 							throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
187 						}
188 						oxygenReplaced = performFunctionalReplacementOnAcid(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR));
189 						if (oxygenReplaced==0){
190 							throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
191 						}
192 					}
193 					else if (replacementType == PREFIX_REPLACEMENT_TYPE.hydrazono || replacementType == PREFIX_REPLACEMENT_TYPE.halideOrPseudoHalide){
194 						Fragment acidFrag = groupToBeModified.getFrag();
195 						if (!groupToBeModified.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL) ||
196 								acidHasSufficientHydrogenForSubstitutionInterpretation(acidFrag, group.getFrag().getOutAtom(0).getValency(), locantEl)){
197 							//hydrazono replacement only applies to non carboxylic acids e.g. hydrazonooxalic acid
198 							//need to be careful to note that something like chlorophosphonic acid isn't functional replacement
199 							continue;
200 						}
201 						oxygenReplaced = performFunctionalReplacementOnAcid(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR));
202 					}
203 					else{
204 						throw new StructureBuildingException("OPSIN bug: Unexpected prefix replacement type");
205 					}
206 					if (oxygenReplaced>0){
207 						state.fragManager.removeFragment(group.getFrag());
208 						substituent.removeChild(group);
209 						groups.remove(group);
210 						List<Element> remainingChildren =substituent.getChildElements();//there may be a locant that should be moved
211 						for (int j = remainingChildren.size()-1; j>=0; j--){
212 							Element child =substituent.getChild(j);
213 							child.detach();
214 							nextSubOrBracket.insertChild(child, 0);
215 						}
216 						substituents.remove(substituent);
217 						substituent.detach();
218 						if (oxygenReplaced>1){
219 							multiplierEl.detach();
220 						}
221 					}
222 				}
223 				else if (replacementType  == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){
224 					throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue);
225 				}
226 			}
227 		}
228 		return groups.size() != originalNumberOfGroups;
229 	}
230 
isChalcogenSubstituent(Element group)231 	private boolean isChalcogenSubstituent(Element group) {
232 		//Is this group followed by a hyphen and directly preceded by a substituent i.e. no multiplier/locant
233 		//e.g. methylthio-
234 		Element next = OpsinTools.getNextSibling(group);
235 		if (next != null && next.getName().equals(HYPHEN_EL) &&
236 				OpsinTools.getPreviousSibling(group) == null) {
237 			Element previousGroup = OpsinTools.getPreviousGroup(group);
238 			if (previousGroup != null) {
239 				//TODO We actually want to know if a carbon atom is the attachment point... but we don't know the attachment point locations at this point
240 				Element suffix = OpsinTools.getNextSibling(previousGroup, SUFFIX_EL);
241 				if (suffix == null || suffix.getFrag() == null) {
242 					for (Atom a : previousGroup.getFrag()) {
243 						if (a.getElement() == ChemEl.C) {
244 							return true;
245 						}
246 					}
247 				}
248 			}
249 		}
250 		return false;
251 	}
252 
253 	/**
254 	 * Currently prefix replacement terms must be directly adjacent to the groupToBeModified with an exception made
255 	 * for carbohydrate stereochemistry prefixes e.g. 'gluco' and for substractive prefixes e.g. 'deoxy'
256 	 * @param groupToBeModified
257 	 * @return
258 	 */
groupPrecededByElementThatBlocksPrefixReplacementInterpetation(Element groupToBeModified)259 	private boolean groupPrecededByElementThatBlocksPrefixReplacementInterpetation(Element groupToBeModified) {
260 		Element previous = OpsinTools.getPreviousSibling(groupToBeModified);
261 		while (previous !=null && (previous.getName().equals(SUBTRACTIVEPREFIX_EL)
262 				|| (previous.getName().equals(STEREOCHEMISTRY_EL) && previous.getAttributeValue(TYPE_ATR).equals(CARBOHYDRATECONFIGURATIONPREFIX_TYPE_VAL)))){
263 			previous = OpsinTools.getPreviousSibling(previous);
264 		}
265 		return previous != null;
266 	}
267 
268 
269 	/*
270 	 *
271 	 */
272 
273 	/**
274 	 * Performs functional replacement using infixes e.g. thio in ethanthioic acid replaces an O with S
275 	 * @param suffixFragments May be modified if a multiplier is determined to mean multiplication of a suffix, usually untouched
276 	 * @param suffixes The suffix elements  May be modified if a multiplier is determined to mean multiplication of a suffix, usually untouched
277 	 * @throws StructureBuildingException
278 	 * @throws ComponentGenerationException
279 	 */
processInfixFunctionalReplacementNomenclature(List<Element> suffixes, List<Fragment> suffixFragments)280 	void processInfixFunctionalReplacementNomenclature(List<Element> suffixes, List<Fragment> suffixFragments) throws StructureBuildingException, ComponentGenerationException {
281 		for (int i = 0; i < suffixes.size(); i++) {
282 			Element suffix = suffixes.get(i);
283 			if (suffix.getAttribute(INFIX_ATR) != null){
284 				Fragment fragToApplyInfixTo = suffix.getFrag();
285 				Element possibleAcidGroup = OpsinTools.getPreviousSiblingIgnoringCertainElements(suffix, new String[]{MULTIPLIER_EL, INFIX_EL, SUFFIX_EL});
286 				if (possibleAcidGroup !=null && possibleAcidGroup.getName().equals(GROUP_EL) &&
287 						(possibleAcidGroup.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL)|| possibleAcidGroup.getAttributeValue(TYPE_ATR).equals(CHALCOGENACIDSTEM_TYPE_VAL))){
288 					fragToApplyInfixTo = possibleAcidGroup.getFrag();
289 				}
290 				if (fragToApplyInfixTo ==null){
291 					throw new ComponentGenerationException("infix has erroneously been assigned to a suffix which does not correspond to a suffix fragment. suffix: " + suffix.getValue());
292 				}
293 				//e.g. =O:S,-O:S (which indicates replacing either a double or single bonded oxygen with S)
294 				//This is semicolon delimited for each infix
295 				List<String> infixTransformations = StringTools.arrayToList(suffix.getAttributeValue(INFIX_ATR).split(";"));
296 
297 				List<Atom> atomList =fragToApplyInfixTo.getAtomList();
298 				LinkedList<Atom> singleBondedOxygen = new LinkedList<Atom>();
299 				LinkedList<Atom> doubleBondedOxygen = new LinkedList<Atom>();
300 				populateTerminalSingleAndDoubleBondedOxygen(atomList, singleBondedOxygen, doubleBondedOxygen);
301 				int oxygenAvailable = singleBondedOxygen.size() +doubleBondedOxygen.size();
302 
303 				/*
304 				 * Modifies suffixes, suffixFragments, suffix and infixTransformations as appropriate
305 				 */
306 				disambiguateMultipliedInfixMeaning(suffixes, suffixFragments, suffix, infixTransformations, oxygenAvailable);
307 
308 				/*
309 				 * Sort infixTransformations so more specific transformations are performed first
310 				 * e.g. ethanthioimidic acid-->ethanimidthioic acid as imid can only apply to the double bonded oxygen
311 				 */
312 				Collections.sort(infixTransformations, new SortInfixTransformations());
313 
314 				for (String infixTransformation : infixTransformations) {
315 					String[] transformationArray = infixTransformation.split(":");
316 					if (transformationArray.length !=2){
317 						throw new StructureBuildingException("Atom to be replaced and replacement not specified correctly in infix: " + infixTransformation);
318 					}
319 					String[] transformations = transformationArray[0].split(",");
320 					String replacementSMILES = transformationArray[1];
321 					boolean acceptDoubleBondedOxygen = false;
322 					boolean acceptSingleBondedOxygen = false;
323 					boolean nitrido =false;
324 					for (String transformation : transformations) {
325 						if (transformation.startsWith("=")){
326 							acceptDoubleBondedOxygen = true;
327 						}
328 						else if (transformation.startsWith("-")){
329 							acceptSingleBondedOxygen = true;
330 						}
331 						else if (transformation.startsWith("#")){
332 							nitrido =true;
333 						}
334 						else{
335 							throw new StructureBuildingException("Malformed infix transformation. Expected to start with either - or =. Transformation was: " +transformation);
336 						}
337 						if (transformation.length()<2 || transformation.charAt(1)!='O'){
338 							throw new StructureBuildingException("Only replacement by oxygen is supported. Check infix defintions");
339 						}
340 					}
341 					boolean infixAssignmentAmbiguous =false;
342 					if ((acceptSingleBondedOxygen ||nitrido)  && !acceptDoubleBondedOxygen){
343 						if (singleBondedOxygen.size() ==0){
344 							throw new StructureBuildingException("Cannot find single bonded oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");
345 						}
346 						if (singleBondedOxygen.size() !=1){
347 							infixAssignmentAmbiguous=true;
348 						}
349 					}
350 					if (!acceptSingleBondedOxygen && (acceptDoubleBondedOxygen | nitrido)){
351 						if (doubleBondedOxygen.size()==0){
352 							throw new StructureBuildingException("Cannot find double bonded oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");
353 						}
354 						if (doubleBondedOxygen.size() != 1){
355 							infixAssignmentAmbiguous=true;
356 						}
357 					}
358 					if (acceptSingleBondedOxygen && acceptDoubleBondedOxygen){
359 						if (oxygenAvailable ==0){
360 							throw new StructureBuildingException("Cannot find oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");
361 						}
362 						if (oxygenAvailable !=1){
363 							infixAssignmentAmbiguous=true;
364 						}
365 					}
366 
367 					Set<Atom> ambiguousElementAtoms = new LinkedHashSet<Atom>();
368 					Atom atomToUse = null;
369 					if ((acceptDoubleBondedOxygen || nitrido) && doubleBondedOxygen.size()>0 ){
370 						atomToUse = doubleBondedOxygen.removeFirst();
371 					}
372 					else if (acceptSingleBondedOxygen && singleBondedOxygen.size()>0 ){
373 						atomToUse = singleBondedOxygen.removeFirst();
374 					}
375 					else{
376 						throw new StructureBuildingException("Cannot find oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");//this would be a bug
377 					}
378 					Fragment replacementFrag = state.fragManager.buildSMILES(replacementSMILES, SUFFIX_TYPE_VAL, NONE_LABELS_VAL);
379 					if (replacementFrag.getOutAtomCount()>0){//SMILES include an indication of the bond order the replacement fragment will have, this is not intended to be an outatom
380 						replacementFrag.removeOutAtom(0);
381 					}
382 					Atom atomThatWillReplaceOxygen =replacementFrag.getFirstAtom();
383 					if (replacementFrag.getAtomCount()==1 && atomThatWillReplaceOxygen.getElement().isChalcogen()){
384 						atomThatWillReplaceOxygen.setCharge(atomToUse.getCharge());
385 						atomThatWillReplaceOxygen.setProtonsExplicitlyAddedOrRemoved(atomToUse.getProtonsExplicitlyAddedOrRemoved());
386 					}
387 					removeOrMoveObsoleteFunctionalAtoms(atomToUse, replacementFrag);//also will move charge if necessary
388 					moveObsoleteOutAtoms(atomToUse, replacementFrag);//if the replaced atom was an outatom the fragments outatom list need to be corrected
389 					if (nitrido){
390 						atomToUse.getFirstBond().setOrder(3);
391 						Atom removedHydroxy = singleBondedOxygen.removeFirst();
392 						state.fragManager.removeAtomAndAssociatedBonds(removedHydroxy);
393 						removeAssociatedFunctionalAtom(removedHydroxy);
394 					}
395 					state.fragManager.incorporateFragment(replacementFrag, atomToUse.getFrag());
396 					state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToUse, atomThatWillReplaceOxygen);
397 					if (infixAssignmentAmbiguous){
398 						ambiguousElementAtoms.add(atomThatWillReplaceOxygen);
399 						if (atomThatWillReplaceOxygen.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){
400 							ambiguousElementAtoms.addAll(atomThatWillReplaceOxygen.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT));
401 						}
402 					}
403 					if (infixAssignmentAmbiguous){//record what atoms could have been replaced. Often this ambiguity is resolved later e.g. S-methyl ethanthioate
404 						for (Atom a : doubleBondedOxygen) {
405 							ambiguousElementAtoms.add(a);
406 							if (a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){
407 								ambiguousElementAtoms.addAll(a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT));
408 							}
409 						}
410 						for (Atom a : singleBondedOxygen) {
411 							ambiguousElementAtoms.add(a);
412 							if (a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){
413 								ambiguousElementAtoms.addAll(a.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT));
414 							}
415 						}
416 						for (Atom atom : ambiguousElementAtoms) {
417 							atom.setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, ambiguousElementAtoms);
418 						}
419 					}
420 				}
421 			}
422 		}
423 	}
424 
425 	/*
426 	 * Functional class nomenclature
427 	 */
428 
429 	/**
430 	 * Replaces the appropriate number of functional oxygen atoms with the corresponding fragment
431 	 * @param acidContainingRoot
432 	 * @param acidReplacingWord
433 	 * @throws ComponentGenerationException
434 	 * @throws StructureBuildingException
435 	 */
processAcidReplacingFunctionalClassNomenclatureFullWord(Element acidContainingRoot, Element acidReplacingWord)436 	private void processAcidReplacingFunctionalClassNomenclatureFullWord(Element acidContainingRoot, Element acidReplacingWord) throws ComponentGenerationException, StructureBuildingException {
437 		String locant = acidReplacingWord.getAttributeValue(LOCANT_ATR);
438 		Element acidReplacingGroup = StructureBuildingMethods.findRightMostGroupInBracket(acidReplacingWord);
439 		if (acidReplacingGroup ==null){
440 			throw new ComponentGenerationException("OPSIN bug: acid replacing group not found where one was expected for acidReplacingFunctionalGroup wordRule");
441 		}
442 		String functionalGroupName = acidReplacingGroup.getValue();
443 		Fragment acidReplacingFrag = acidReplacingGroup.getFrag();
444 		if (acidReplacingGroup.getParent().getChildCount() != 1){
445 			throw new ComponentGenerationException("Unexpected qualifier to: " + functionalGroupName);
446 		}
447 
448 		Element groupToBeModified = acidContainingRoot.getFirstChildElement(GROUP_EL);
449 		List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified);
450 		if (oxygenAtoms.size() == 0){
451 			oxygenAtoms = findFunctionalOxygenAtomsInGroup(groupToBeModified);
452 		}
453 		if (oxygenAtoms.size() == 0){
454 			List<Element> conjunctiveSuffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL);
455 			for (Element conjunctiveSuffixElement : conjunctiveSuffixElements) {
456 				oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(conjunctiveSuffixElement));
457 			}
458 		}
459 		if (oxygenAtoms.size() < 1){
460 			throw new ComponentGenerationException("Insufficient oxygen to replace with " + functionalGroupName +"s in " + acidContainingRoot.getFirstChildElement(GROUP_EL).getValue());
461 		}
462 
463 		boolean isAmide = functionalGroupName.equals("amide") || functionalGroupName.equals("amid");
464 		if (isAmide) {
465 			if (acidReplacingFrag.getAtomCount()!=1){
466 				throw new ComponentGenerationException("OPSIN bug: " + functionalGroupName + " not found where expected");
467 			}
468 			Atom amideNitrogen = acidReplacingFrag.getFirstAtom();
469 			amideNitrogen.neutraliseCharge();
470 			amideNitrogen.clearLocants();
471 			acidReplacingFrag.addMappingToAtomLocantMap("N", amideNitrogen);
472 		}
473 		Atom chosenOxygen = locant != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locant) : oxygenAtoms.get(0);
474 		state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(chosenOxygen, acidReplacingFrag.getFirstAtom());
475 		removeAssociatedFunctionalAtom(chosenOxygen);
476 	}
477 
478 
479 	/**
480 	 * Replaces the appropriate number of functional oxygen atoms with the corresponding fragment
481 	 * @param acidContainingRoot
482 	 * @param functionalWord
483 	 * @throws ComponentGenerationException
484 	 * @throws StructureBuildingException
485 	 */
processAcidReplacingFunctionalClassNomenclatureFunctionalWord(Element acidContainingRoot, Element functionalWord)486 	private void processAcidReplacingFunctionalClassNomenclatureFunctionalWord(Element acidContainingRoot, Element functionalWord) throws ComponentGenerationException, StructureBuildingException {
487 		if (functionalWord !=null && functionalWord.getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())){
488 			Element functionalTerm = functionalWord.getFirstChildElement(FUNCTIONALTERM_EL);
489 			if (functionalTerm ==null){
490 				throw new ComponentGenerationException("OPSIN bug: functionalTerm word not found where one was expected for acidReplacingFunctionalGroup wordRule");
491 			}
492 			Element acidReplacingGroup = functionalTerm.getFirstChildElement(FUNCTIONALGROUP_EL);
493 			String functionalGroupName = acidReplacingGroup.getValue();
494 			Element possibleLocantOrMultiplier = OpsinTools.getPreviousSibling(acidReplacingGroup);
495 			int numberOfAcidicHydroxysToReplace = 1;
496 			String[] locants = null;
497 			if (possibleLocantOrMultiplier != null){
498 				if (possibleLocantOrMultiplier.getName().equals(MULTIPLIER_EL)){
499 					numberOfAcidicHydroxysToReplace = Integer.parseInt(possibleLocantOrMultiplier.getAttributeValue(VALUE_ATR));
500 					possibleLocantOrMultiplier.detach();
501 					possibleLocantOrMultiplier = OpsinTools.getPreviousSibling(acidReplacingGroup);
502 				}
503 				if (possibleLocantOrMultiplier != null){
504 					if (possibleLocantOrMultiplier.getName().equals(LOCANT_EL)){
505 						locants = StringTools.removeDashIfPresent(possibleLocantOrMultiplier.getValue()).split(",");
506 						possibleLocantOrMultiplier.detach();
507 					}
508 					else {
509 						throw new ComponentGenerationException("Unexpected qualifier to acidReplacingFunctionalGroup functionalTerm");
510 					}
511 				}
512 			}
513 			if (functionalTerm.getChildCount() != 1){
514 				throw new ComponentGenerationException("Unexpected qualifier to acidReplacingFunctionalGroup functionalTerm");
515 			}
516 
517 			Element groupToBeModified = acidContainingRoot.getFirstChildElement(GROUP_EL);
518 			List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified);
519 			if (oxygenAtoms.size()==0) {
520 				oxygenAtoms = findFunctionalOxygenAtomsInGroup(groupToBeModified);
521 			}
522 			if (oxygenAtoms.size()==0) {
523 				List<Element> conjunctiveSuffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL);
524 				for (Element conjunctiveSuffixElement : conjunctiveSuffixElements) {
525 					oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(conjunctiveSuffixElement));
526 				}
527 			}
528 			if (numberOfAcidicHydroxysToReplace > oxygenAtoms.size()){
529 				throw new ComponentGenerationException("Insufficient oxygen to replace with nitrogen in " + acidContainingRoot.getFirstChildElement(GROUP_EL).getValue());
530 			}
531 			boolean isAmide = functionalGroupName.equals("amide") || functionalGroupName.equals("amid");
532 			if (isAmide) {
533 				for (int i = 0; i < numberOfAcidicHydroxysToReplace; i++) {
534 					Atom functionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[i]) : oxygenAtoms.get(i);
535 					removeAssociatedFunctionalAtom(functionalOxygenToReplace);
536 					functionalOxygenToReplace.setElement(ChemEl.N);
537 				}
538 			}
539 			else{
540 				String groupValue = acidReplacingGroup.getAttributeValue(VALUE_ATR);
541 				String labelsValue = acidReplacingGroup.getAttributeValue(LABELS_ATR);
542 				Fragment acidReplacingFrag = state.fragManager.buildSMILES(groupValue, SUFFIX_TYPE_VAL, labelsValue != null ? labelsValue : NONE_LABELS_VAL);
543 				Fragment acidFragment = groupToBeModified.getFrag();
544 				if (acidFragment.hasLocant("2")){//prefer numeric locants on group to those of replacing group
545 					for (Atom atom : acidReplacingFrag.getAtomList()) {
546 						atom.clearLocants();
547 					}
548 				}
549 				Atom firstFunctionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[0]) : oxygenAtoms.get(0);
550 				state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(firstFunctionalOxygenToReplace, acidReplacingFrag.getFirstAtom());
551 				removeAssociatedFunctionalAtom(firstFunctionalOxygenToReplace);
552 				for (int i = 1; i < numberOfAcidicHydroxysToReplace; i++) {
553 					Fragment clonedHydrazide = state.fragManager.copyAndRelabelFragment(acidReplacingFrag, i);
554 					Atom functionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[i]) : oxygenAtoms.get(i);
555 					state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(functionalOxygenToReplace, clonedHydrazide.getFirstAtom());
556 					state.fragManager.incorporateFragment(clonedHydrazide, functionalOxygenToReplace.getFrag());
557 					removeAssociatedFunctionalAtom(functionalOxygenToReplace);
558 				}
559 				state.fragManager.incorporateFragment(acidReplacingFrag, firstFunctionalOxygenToReplace.getFrag());
560 			}
561 		}
562 		else{
563 			throw new ComponentGenerationException("amide word not found where expected, bug?");
564 		}
565 	}
566 
removeOxygenWithAppropriateLocant(List<Atom> oxygenAtoms, String locant)567 	private Atom removeOxygenWithAppropriateLocant(List<Atom> oxygenAtoms, String locant) throws ComponentGenerationException {
568 		for (Iterator<Atom> iterator = oxygenAtoms.iterator(); iterator.hasNext();) {
569 			Atom atom = iterator.next();
570 			if (atom.hasLocant(locant)) {
571 				iterator.remove();
572 				return atom;
573 			}
574 		}
575 		//Look for the case whether the locant refers to the backbone
576 		for (Iterator<Atom> iterator = oxygenAtoms.iterator(); iterator.hasNext();) {
577 			Atom atom = iterator.next();
578 			if (OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(atom, locant) != null){
579 				iterator.remove();
580 				return atom;
581 			}
582 		}
583 		throw new ComponentGenerationException("Failed to find acid group at locant: " + locant);
584 	}
585 
586 
587 	/*
588 	 * Prefix functional replacement nomenclature
589 	 */
590 
591 
acidHasSufficientHydrogenForSubstitutionInterpretation(Fragment acidFrag, int hydrogenRequiredForSubstitutionInterpretation, Element locantEl)592 	private boolean acidHasSufficientHydrogenForSubstitutionInterpretation(Fragment acidFrag, int hydrogenRequiredForSubstitutionInterpretation, Element locantEl) {
593 		List<Atom> atomsThatWouldBeSubstituted = new ArrayList<Atom>();
594 		if (locantEl !=null){
595 			String[] possibleLocants = locantEl.getValue().split(",");
596 			for (String locant : possibleLocants) {
597 				Atom atomToBeSubstituted = acidFrag.getAtomByLocant(locant);
598 				if (atomToBeSubstituted !=null){
599 					atomsThatWouldBeSubstituted.add(atomToBeSubstituted);
600 				}
601 				else{
602 					atomsThatWouldBeSubstituted.clear();
603 					atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtomOrFirstAtom());
604 					break;
605 				}
606 			}
607 		}
608 		else{
609 			atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtomOrFirstAtom());
610 		}
611 		for (Atom atom : atomsThatWouldBeSubstituted) {
612 			if (StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(atom) < hydrogenRequiredForSubstitutionInterpretation){
613 				return false;//insufficient hydrogens for substitution interpretation
614 			}
615 		}
616 		return true;
617 	}
618 
619 	/**
620 	 * Performs replacement of oxygen atoms by chalogen atoms
621 	 * If this is ambiguous e.g. thioacetate then Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT is populated
622 	 * @param groupToBeModified
623 	 * @param locantEl
624 	 * @param numberOfAtomsToReplace
625 	 * @param replacementSmiles
626 	 * @return
627 	 * @throws StructureBuildingException
628 	 */
performChalcogenFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles)629 	private int performChalcogenFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException {
630 		List<Atom> oxygenAtoms = findOxygenAtomsInApplicableSuffixes(groupToBeModified);
631 		if (oxygenAtoms.size() == 0) {
632 			oxygenAtoms = findOxygenAtomsInGroup(groupToBeModified);
633 		}
634 		if (locantEl != null) {//locants are used to indicate replacement on trivial groups
635 			List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms);
636 			if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace) {
637 				numberOfAtomsToReplace = 1;
638 				//e.g. -1-thioureidomethyl
639 			}
640 			else{
641 				locantEl.detach();
642 				oxygenAtoms = oxygenWithAppropriateLocants;
643 			}
644 		}
645 		List<Atom> replaceableAtoms = new ArrayList<Atom>();
646 		if (replacementSmiles.startsWith("=")) {
647 			//e.g. thiono
648 			replacementSmiles = replacementSmiles.substring(1);
649 			for (Atom oxygen : oxygenAtoms) {
650 				int incomingValency = oxygen.getIncomingValency();
651 				int bondCount = oxygen.getBondCount();
652 				if (bondCount == 1 && incomingValency == 2) {
653 					replaceableAtoms.add(oxygen);
654 				}
655 			}
656 		}
657 		else {
658 			List<Atom> doubleBondedOxygen = new ArrayList<Atom>();
659 			List<Atom> singleBondedOxygen = new ArrayList<Atom>();
660 			List<Atom> ethericOxygen = new ArrayList<Atom>();
661 			for (Atom oxygen : oxygenAtoms) {
662 				int incomingValency = oxygen.getIncomingValency();
663 				int bondCount = oxygen.getBondCount();
664 				if (bondCount == 1 && incomingValency ==2 ) {
665 					doubleBondedOxygen.add(oxygen);
666 				}
667 				else if (bondCount == 1 && incomingValency == 1) {
668 					singleBondedOxygen.add(oxygen);
669 				}
670 				else if (bondCount == 2 && incomingValency == 2) {
671 					ethericOxygen.add(oxygen);
672 				}
673 			}
674 			replaceableAtoms.addAll(doubleBondedOxygen);
675 			replaceableAtoms.addAll(singleBondedOxygen);
676 			replaceableAtoms.addAll(ethericOxygen);
677 		}
678 
679 		int totalOxygen = replaceableAtoms.size();
680 		if (numberOfAtomsToReplace >1){
681 			if (totalOxygen < numberOfAtomsToReplace){
682 				numberOfAtomsToReplace=1;
683 			}
684 		}
685 
686 		int atomsReplaced =0;
687 		if (totalOxygen >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements
688 			boolean prefixAssignmentAmbiguous =false;
689 			Set<Atom> ambiguousElementAtoms = new LinkedHashSet<Atom>();
690 			if (totalOxygen != numberOfAtomsToReplace){
691 				prefixAssignmentAmbiguous=true;
692 			}
693 
694 			for (Atom atomToReplace : replaceableAtoms) {
695 				if (atomsReplaced == numberOfAtomsToReplace){
696 					ambiguousElementAtoms.add(atomToReplace);
697 					continue;
698 				}
699 				else{
700 					state.fragManager.replaceAtomWithSmiles(atomToReplace, replacementSmiles);
701 					if (prefixAssignmentAmbiguous){
702 						ambiguousElementAtoms.add(atomToReplace);
703 					}
704 				}
705 				atomsReplaced++;
706 			}
707 
708 			if (prefixAssignmentAmbiguous){//record what atoms could have been replaced. Often this ambiguity is resolved later e.g. S-methyl thioacetate
709 				for (Atom atom : ambiguousElementAtoms) {
710 					atom.setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, ambiguousElementAtoms);
711 				}
712 			}
713 		}
714 		return atomsReplaced;
715 	}
716 
717 
718 	/**
719 	 * Converts functional oxygen to peroxy e.g. peroxybenzoic acid
720 	 * Returns the number of oxygen replaced
721 	 * @param groupToBeModified
722 	 * @param locantEl
723 	 * @param numberOfAtomsToReplace
724 	 * @return
725 	 * @throws StructureBuildingException
726 	 */
performPeroxyFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace)727 	private int performPeroxyFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace) throws StructureBuildingException {
728 		List<Atom> oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified);
729 		if (oxygenAtoms.size()==0){
730 			oxygenAtoms = findEthericOxygenAtomsInGroup(groupToBeModified);
731 			oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(groupToBeModified));
732 		}
733 		if (locantEl !=null){
734 			List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms);
735 			if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace){
736 				numberOfAtomsToReplace =1;
737 			}
738 			else{
739 				locantEl.detach();
740 				oxygenAtoms = oxygenWithAppropriateLocants;
741 			}
742 		}
743 		if (numberOfAtomsToReplace >1 && oxygenAtoms.size() < numberOfAtomsToReplace){
744 			numberOfAtomsToReplace=1;
745 		}
746 		int atomsReplaced = 0;
747 		if (oxygenAtoms.size() >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements
748 			atomsReplaced = numberOfAtomsToReplace;
749 			for (int j = 0; j < numberOfAtomsToReplace; j++) {
750 				Atom oxygenToReplace = oxygenAtoms.get(j);
751 				if (oxygenToReplace.getBondCount()==2){//etheric oxygen
752 					Fragment newOxygen = state.fragManager.buildSMILES("O", SUFFIX_TYPE_VAL, NONE_LABELS_VAL);
753 					Bond bondToRemove = oxygenToReplace.getFirstBond();
754 					Atom atomToAttachTo = bondToRemove.getFromAtom() == oxygenToReplace ?  bondToRemove.getToAtom() :  bondToRemove.getFromAtom();
755 					state.fragManager.createBond(atomToAttachTo, newOxygen.getFirstAtom(), 1);
756 					state.fragManager.createBond(newOxygen.getFirstAtom(), oxygenToReplace, 1);
757 					state.fragManager.removeBond(bondToRemove);
758 					state.fragManager.incorporateFragment(newOxygen, groupToBeModified.getFrag());
759 				}
760 				else{
761 					Fragment replacementFrag = state.fragManager.buildSMILES("OO", SUFFIX_TYPE_VAL, NONE_LABELS_VAL);
762 					removeOrMoveObsoleteFunctionalAtoms(oxygenToReplace, replacementFrag);
763 					state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(oxygenToReplace, replacementFrag.getFirstAtom());
764 					state.fragManager.incorporateFragment(replacementFrag, groupToBeModified.getFrag());
765 				}
766 			}
767 		}
768 		return atomsReplaced;
769 	}
770 
771 	/**
772 	 * Replaces double bonded oxygen and/or single bonded oxygen depending on the input SMILES
773 	 * SMILES with a valency 1 outAtom replace -O, SMILES with a valency 2 outAtom replace =O
774 	 * SMILES with a valency 3 outAtom replace -O and =O (nitrido)
775 	 * Returns the number of oxygen replaced
776 	 * @param groupToBeModified
777 	 * @param locantEl
778 	 * @param numberOfAtomsToReplace
779 	 * @param replacementSmiles
780      * @return
781 	 * @throws StructureBuildingException
782 	 */
performFunctionalReplacementOnAcid(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles)783 	private int performFunctionalReplacementOnAcid(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException {
784 		int outValency;
785 		if (replacementSmiles.startsWith("-")){
786 			outValency =1;
787 		}
788 		else if (replacementSmiles.startsWith("=")){
789 			outValency =2;
790 		}
791 		else if (replacementSmiles.startsWith("#")){
792 			outValency =3;
793 		}
794 		else{
795 			throw new StructureBuildingException("OPSIN bug: Unexpected valency on fragment for prefix functional replacement");
796 		}
797 		replacementSmiles = replacementSmiles.substring(1);
798 		List<Atom> oxygenAtoms = findOxygenAtomsInApplicableSuffixes(groupToBeModified);
799 		if (oxygenAtoms.size()==0){
800 			oxygenAtoms = findOxygenAtomsInGroup(groupToBeModified);
801 		}
802 		if (locantEl !=null){//locants are used to indicate replacement on trivial groups
803 			List<Atom> oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms);
804 			List<Atom> singleBondedOxygen = new ArrayList<Atom>();
805 			List<Atom> terminalDoubleBondedOxygen = new ArrayList<Atom>();
806 			populateTerminalSingleAndDoubleBondedOxygen(oxygenWithAppropriateLocants, singleBondedOxygen, terminalDoubleBondedOxygen);
807 			if (outValency ==1){
808 				oxygenWithAppropriateLocants.removeAll(terminalDoubleBondedOxygen);
809 			}
810 			else if (outValency ==2){
811 				oxygenWithAppropriateLocants.removeAll(singleBondedOxygen);
812 			}
813 			if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace){
814 				numberOfAtomsToReplace =1;
815 				//e.g. -1-thioureidomethyl
816 			}
817 			else{
818 				locantEl.detach();
819 				oxygenAtoms = oxygenWithAppropriateLocants;
820 			}
821 		}
822 		List<Atom> singleBondedOxygen = new ArrayList<Atom>();
823 		List<Atom> terminalDoubleBondedOxygen = new ArrayList<Atom>();
824 		populateTerminalSingleAndDoubleBondedOxygen(oxygenAtoms, singleBondedOxygen, terminalDoubleBondedOxygen);
825 		if (outValency ==1){
826 			oxygenAtoms.removeAll(terminalDoubleBondedOxygen);
827 		}
828 		else if (outValency ==2){
829 			oxygenAtoms.removeAll(singleBondedOxygen);
830 			//favour bridging oxygen over double bonded oxygen c.f. imidodicarbonate
831 			oxygenAtoms.removeAll(terminalDoubleBondedOxygen);
832 			oxygenAtoms.addAll(terminalDoubleBondedOxygen);
833 		}
834 		else {
835 			if (singleBondedOxygen.size()==0 || terminalDoubleBondedOxygen.size()==0){
836 				throw new StructureBuildingException("Both a -OH and =O are required for nitrido prefix functional replacement");
837 			}
838 			oxygenAtoms.removeAll(singleBondedOxygen);
839 		}
840 		if (numberOfAtomsToReplace >1 && oxygenAtoms.size() < numberOfAtomsToReplace){
841 			numberOfAtomsToReplace=1;
842 		}
843 
844 		int atomsReplaced =0;
845 		if (oxygenAtoms.size() >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements
846 			for (Atom atomToReplace : oxygenAtoms) {
847 				if (atomsReplaced == numberOfAtomsToReplace){
848 					continue;
849 				}
850 				else{
851 					Fragment replacementFrag = state.fragManager.buildSMILES(replacementSmiles, atomToReplace.getFrag().getTokenEl(), NONE_LABELS_VAL);
852 					if (outValency ==3){//special case for nitrido
853 						atomToReplace.getFirstBond().setOrder(3);
854 						Atom removedHydroxy = singleBondedOxygen.remove(0);
855 						state.fragManager.removeAtomAndAssociatedBonds(removedHydroxy);
856 						removeAssociatedFunctionalAtom(removedHydroxy);
857 					}
858 					state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToReplace, replacementFrag.getFirstAtom());
859 					if (outValency ==1){
860 						removeOrMoveObsoleteFunctionalAtoms(atomToReplace, replacementFrag);
861 					}
862 					moveObsoleteOutAtoms(atomToReplace, replacementFrag);
863 					state.fragManager.incorporateFragment(replacementFrag, atomToReplace.getFrag());
864 				}
865 				atomsReplaced++;
866 			}
867 		}
868 		return atomsReplaced;
869 	}
870 
871 	/*
872 	 * Infix functional replacement nomenclature
873 	 */
874 
875 	/**
876 	 * This block handles infix multiplication. Unless brackets are provided this is ambiguous without knowledge of the suffix that is being modified
877 	 * For example butandithione could be intepreted as butandi(thione) or butan(dithi)one.
878 	 * Obviously the latter is wrong in this case but it is the correct interpretation for butandithiate
879 	 * @param suffixes
880 	 * @param suffixFragments
881 	 * @param suffix
882 	 * @param infixTransformations
883 	 * @param oxygenAvailable
884 	 * @throws ComponentGenerationException
885 	 * @throws StructureBuildingException
886 	 */
disambiguateMultipliedInfixMeaning(List<Element> suffixes, List<Fragment> suffixFragments,Element suffix, List<String> infixTransformations, int oxygenAvailable)887 	private void disambiguateMultipliedInfixMeaning(List<Element> suffixes,
888 			List<Fragment> suffixFragments,Element suffix, List<String> infixTransformations, int oxygenAvailable)
889 			throws ComponentGenerationException, StructureBuildingException {
890 		Element possibleInfix = OpsinTools.getPreviousSibling(suffix);
891 		if (possibleInfix.getName().equals(INFIX_EL)){//the infix is only left when there was ambiguity
892 			Element possibleMultiplier = OpsinTools.getPreviousSibling(possibleInfix);
893 			if (possibleMultiplier.getName().equals(MULTIPLIER_EL)){
894 				int multiplierValue =Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR));
895 				if (infixTransformations.size() + multiplierValue-1 <=oxygenAvailable){//multiplier means multiply the infix e.g. butandithiate
896 					for (int j = 1; j < multiplierValue; j++) {
897 						infixTransformations.add(0, infixTransformations.get(0));
898 					}
899 				}
900 				else{
901 					Element possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier);
902 					String[] locants = null;
903 					if (possibleLocant.getName().equals(LOCANT_EL)) {
904 						locants = possibleLocant.getValue().split(",");
905 					}
906 					if (locants !=null){
907 						if (locants.length!=multiplierValue){
908 							throw new ComponentGenerationException("Multiplier/locant disagreement when multiplying infixed suffix");
909 						}
910 					    suffix.addAttribute(new Attribute(LOCANT_ATR, locants[0]));
911 					}
912 					suffix.addAttribute(new Attribute(MULTIPLIED_ATR, "multiplied"));
913 					for (int j = 1; j < multiplierValue; j++) {//multiplier means multiply the infixed suffix e.g. butandithione
914 						Element newSuffix = suffix.copy();
915 						Fragment newSuffixFrag = state.fragManager.copyFragment(suffix.getFrag());
916 						newSuffix.setFrag(newSuffixFrag);
917 						suffixFragments.add(newSuffixFrag);
918 						OpsinTools.insertAfter(suffix, newSuffix);
919 						suffixes.add(newSuffix);
920 						if (locants !=null){//assign locants if available
921 							newSuffix.getAttribute(LOCANT_ATR).setValue(locants[j]);
922 						}
923 					}
924 					if (locants!=null){
925 						possibleLocant.detach();
926 					}
927 				}
928 				possibleMultiplier.detach();
929 				possibleInfix.detach();
930 			}
931 			else{
932 				throw new ComponentGenerationException("Multiplier expected in front of ambiguous infix");
933 			}
934 		}
935 	}
936 
937 	/*
938 	 * Convenience Methods
939 	 */
940 
941 	/**
942 	 * Given an atom that is to be replaced by a functional replacement fragment
943 	 * determines whether this atom is a functional atom and, if it is, performs the following processes:
944 	 * The functionalAtom is removed. If the the replacement fragment is an atom of O/S/Se/Te or the
945 	 * the terminal atom of the fragment is a single bonded O/S/Se/Te a functionAom is added to this atom.
946 	 * @param atomToBeReplaced
947 	 * @param replacementFrag
948 	 */
removeOrMoveObsoleteFunctionalAtoms(Atom atomToBeReplaced, Fragment replacementFrag)949 	private void removeOrMoveObsoleteFunctionalAtoms(Atom atomToBeReplaced, Fragment replacementFrag){
950 		List<Atom> replacementAtomList = replacementFrag.getAtomList();
951 		Fragment origFrag = atomToBeReplaced.getFrag();
952 		for (int i = origFrag.getFunctionalAtomCount() - 1; i >=0; i--) {
953 			FunctionalAtom functionalAtom = origFrag.getFunctionalAtom(i);
954 			if (atomToBeReplaced.equals(functionalAtom.getAtom())){
955 				atomToBeReplaced.getFrag().removeFunctionalAtom(i);
956 				Atom terminalAtomOfReplacementFrag = replacementAtomList.get(replacementAtomList.size()-1);
957 				if ((terminalAtomOfReplacementFrag.getIncomingValency() ==1 || replacementAtomList.size()==1)&& terminalAtomOfReplacementFrag.getElement().isChalcogen()){
958 					replacementFrag.addFunctionalAtom(terminalAtomOfReplacementFrag);
959 					terminalAtomOfReplacementFrag.setCharge(atomToBeReplaced.getCharge());
960 					terminalAtomOfReplacementFrag.setProtonsExplicitlyAddedOrRemoved(atomToBeReplaced.getProtonsExplicitlyAddedOrRemoved());
961 				}
962 				atomToBeReplaced.neutraliseCharge();
963 			}
964 		}
965 	}
966 
967 	/**
968 	 * Given an atom that is to be replaced by a functional replacement fragment
969 	 * determines whether this atom has outvalency and if it does removes the outatom from the atom's fragment
970 	 * and adds an outatom to the replacementFrag
971 	 * @param atomToBeReplaced
972 	 * @param replacementFrag
973 	 */
moveObsoleteOutAtoms(Atom atomToBeReplaced, Fragment replacementFrag)974 	private void moveObsoleteOutAtoms(Atom atomToBeReplaced, Fragment replacementFrag){
975 		if (atomToBeReplaced.getOutValency() >0){//this is not known to occur in well formed IUPAC names but would occur in thioxy (as a suffix)
976 			List<Atom> replacementAtomList = replacementFrag.getAtomList();
977 			Fragment origFrag = atomToBeReplaced.getFrag();
978 			for (int i = origFrag.getOutAtomCount() - 1; i >=0; i--) {
979 				OutAtom outAtom = origFrag.getOutAtom(i);
980 				if (atomToBeReplaced.equals(outAtom.getAtom())){
981 					atomToBeReplaced.getFrag().removeOutAtom(i);
982 					Atom terminalAtomOfReplacementFrag = replacementAtomList.get(replacementAtomList.size()-1);
983 					replacementFrag.addOutAtom(terminalAtomOfReplacementFrag, outAtom.getValency(), outAtom.isSetExplicitly());
984 				}
985 			}
986 		}
987 	}
988 
removeAssociatedFunctionalAtom(Atom atomWithFunctionalAtom)989 	private void removeAssociatedFunctionalAtom(Atom atomWithFunctionalAtom) throws StructureBuildingException {
990 		Fragment frag = atomWithFunctionalAtom.getFrag();
991 		for (int i = frag.getFunctionalAtomCount() - 1; i >=0; i--) {
992 			FunctionalAtom functionalAtom = frag.getFunctionalAtom(i);
993 			if (atomWithFunctionalAtom.equals(functionalAtom.getAtom())){
994 				atomWithFunctionalAtom.getFrag().removeFunctionalAtom(i);
995 				return;
996 			}
997 		}
998 		throw new StructureBuildingException("OPSIN bug: Unable to find associated functionalAtom");
999 	}
1000 
1001 
1002 	/**
1003 	 * Returns the subset of oxygenAtoms that possess one of the locants in locantEl
1004 	 * Searches for locant on nearest non suffix atom in case of suffixes
1005 	 * @param locantEl
1006 	 * @param oxygenAtoms
1007 	 * @return
1008 	 */
pickOxygensWithAppropriateLocants(Element locantEl, List<Atom> oxygenAtoms)1009 	private List<Atom> pickOxygensWithAppropriateLocants(Element locantEl, List<Atom> oxygenAtoms) {
1010 		String[] possibleLocants = locantEl.getValue().split(",");
1011 		boolean pLocantSpecialCase = allLocantsP(possibleLocants);
1012 		List<Atom> oxygenWithAppropriateLocants = new ArrayList<Atom>();
1013 		for (Atom atom : oxygenAtoms) {
1014 			List<String> atomlocants = atom.getLocants();
1015 			if (atomlocants.size() > 0) {
1016 				for (String locantVal : possibleLocants) {
1017 					if (atomlocants.contains(locantVal)) {
1018 						 oxygenWithAppropriateLocants.add(atom);
1019 						 break;
1020 					}
1021 				}
1022 			}
1023 			else if (pLocantSpecialCase) {
1024 				for (Atom neighbour : atom.getAtomNeighbours()) {
1025 					if (neighbour.getElement() == ChemEl.P) {
1026 						 oxygenWithAppropriateLocants.add(atom);
1027 						 break;
1028 					}
1029 				}
1030 			}
1031 			else {
1032 				Atom atomWithNumericLocant = OpsinTools.depthFirstSearchForAtomWithNumericLocant(atom);
1033 				if (atomWithNumericLocant != null) {
1034 					List<String> atomWithNumericLocantLocants = atomWithNumericLocant.getLocants();
1035 					for (String locantVal : possibleLocants) {
1036 						if (atomWithNumericLocantLocants.contains(locantVal)) {
1037 							 oxygenWithAppropriateLocants.add(atom);
1038 							 break;
1039 						}
1040 					}
1041 				}
1042 			}
1043 		}
1044 		return oxygenWithAppropriateLocants;
1045 	}
1046 
allLocantsP(String[] locants)1047 	private boolean allLocantsP(String[] locants) {
1048 		if (locants.length == 0) {
1049 			return false;
1050 		}
1051 		for (String locant : locants) {
1052 			if (!locant.equals("P")) {
1053 				return false;
1054 			}
1055 		}
1056 		return true;
1057 	}
1058 
1059 	/**
1060 	 * Returns oxygen atoms in suffixes with functionalAtoms
1061 	 * @param groupToBeModified
1062 	 * @return
1063 	 */
findFunctionalOxygenAtomsInApplicableSuffixes(Element groupToBeModified)1064 	private List<Atom> findFunctionalOxygenAtomsInApplicableSuffixes(Element groupToBeModified) {
1065 		List<Element> suffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL);
1066 		List<Atom> oxygenAtoms = new ArrayList<Atom>();
1067 		for (Element suffix : suffixElements) {
1068 			Fragment suffixFrag = suffix.getFrag();
1069 			if (suffixFrag != null) {//null for non carboxylic acids
1070 				for (int i = 0, l = suffixFrag.getFunctionalAtomCount(); i < l; i++) {
1071 					Atom a = suffixFrag.getFunctionalAtom(i).getAtom();
1072 					if (a.getElement() == ChemEl.O) {
1073 						oxygenAtoms.add(a);
1074 					}
1075 				}
1076 			}
1077 		}
1078 		return oxygenAtoms;
1079 	}
1080 
1081 	/**
1082 	 * Returns functional oxygen atoms in groupToBeModified
1083 	 * @param groupToBeModified
1084 	 * @return
1085 	 */
findFunctionalOxygenAtomsInGroup(Element groupToBeModified)1086 	private List<Atom> findFunctionalOxygenAtomsInGroup(Element groupToBeModified) {
1087 		List<Atom> oxygenAtoms = new ArrayList<Atom>();
1088 		Fragment frag = groupToBeModified.getFrag();
1089 		for (int i = 0, l = frag.getFunctionalAtomCount(); i < l; i++) {
1090 			Atom a = frag.getFunctionalAtom(i).getAtom();
1091 			if (a.getElement() == ChemEl.O){
1092 				oxygenAtoms.add(a);
1093 			}
1094 		}
1095 		return oxygenAtoms;
1096 	}
1097 
1098 
1099 	/**
1100 	 * Returns etheric oxygen atoms in groupToBeModified
1101 	 * @param groupToBeModified
1102 	 * @return
1103 	 */
findEthericOxygenAtomsInGroup(Element groupToBeModified)1104 	private List<Atom> findEthericOxygenAtomsInGroup(Element groupToBeModified) {
1105 		List<Atom> oxygenAtoms = new ArrayList<Atom>();
1106 		List<Atom> atomList = groupToBeModified.getFrag().getAtomList();
1107 		for (Atom a: atomList) {
1108 			if (a.getElement() == ChemEl.O && a.getBondCount()==2 && a.getCharge()==0 && a.getIncomingValency()==2){
1109 				oxygenAtoms.add(a);
1110 			}
1111 		}
1112 		return oxygenAtoms;
1113 	}
1114 
1115 
1116 	/**
1117 	 * Returns oxygen atoms in suffixes with functionalAtoms or acidStem suffixes or aldehyde suffixes (1979 C-531)
1118 	 * @param groupToBeModified
1119 	 * @return
1120 	 */
findOxygenAtomsInApplicableSuffixes(Element groupToBeModified)1121 	private List<Atom> findOxygenAtomsInApplicableSuffixes(Element groupToBeModified) {
1122 		List<Element> suffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL);
1123 		List<Atom> oxygenAtoms = new ArrayList<Atom>();
1124 		for (Element suffix : suffixElements) {
1125 			Fragment suffixFrag = suffix.getFrag();
1126 			if (suffixFrag != null) {//null for non carboxylic acids
1127 				if (suffixFrag.getFunctionalAtomCount() > 0 || groupToBeModified.getAttributeValue(TYPE_ATR).equals(ACIDSTEM_TYPE_VAL) || suffix.getAttributeValue(VALUE_ATR).equals("aldehyde")) {
1128 					List<Atom> atomList = suffixFrag.getAtomList();
1129 					for (Atom a : atomList) {
1130 						if (a.getElement() == ChemEl.O) {
1131 							oxygenAtoms.add(a);
1132 						}
1133 					}
1134 				}
1135 			}
1136 		}
1137 		return oxygenAtoms;
1138 	}
1139 
1140 	/**
1141 	 * Returns oxygen atoms in groupToBeModified
1142 	 * @param groupToBeModified
1143 	 * @return
1144 	 */
findOxygenAtomsInGroup(Element groupToBeModified)1145 	private List<Atom> findOxygenAtomsInGroup(Element groupToBeModified) {
1146 		List<Atom> oxygenAtoms = new ArrayList<Atom>();
1147 		List<Atom> atomList = groupToBeModified.getFrag().getAtomList();
1148 		for (Atom a : atomList) {
1149 			if (a.getElement() == ChemEl.O){
1150 				oxygenAtoms.add(a);
1151 			}
1152 		}
1153 		return oxygenAtoms;
1154 	}
1155 
1156 
populateTerminalSingleAndDoubleBondedOxygen(List<Atom> atomList, List<Atom> singleBondedOxygen, List<Atom> doubleBondedOxygen)1157 	private void populateTerminalSingleAndDoubleBondedOxygen(List<Atom> atomList, List<Atom> singleBondedOxygen, List<Atom> doubleBondedOxygen) throws StructureBuildingException {
1158 		for (Atom a : atomList) {
1159 			if (a.getElement() == ChemEl.O){//find terminal oxygens
1160 				if (a.getBondCount()==1){
1161 					int incomingValency = a.getIncomingValency();
1162 					if (incomingValency ==2){
1163 						doubleBondedOxygen.add(a);
1164 					}
1165 					else if (incomingValency ==1){
1166 						singleBondedOxygen.add(a);
1167 					}
1168 					else{
1169 						throw new StructureBuildingException("Unexpected bond order to oxygen; excepted 1 or 2 found: " +incomingValency);
1170 					}
1171 
1172 				}
1173 			}
1174 		}
1175 	}
1176 }
1177